SplitPeriod
ListMonth
ListNewsgroups
+ ReadGroupList
OutputData
FormatOutput
SQLHierarchies
SQLGroupList
- GetMaxLenght
+ GetMaxLength
);
%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)],
Output => [qw(OutputData FormatOutput)],
- SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]);
+ SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]);
$VERSION = '0.01';
our $PackageVersion = '0.01';
### explode a (scalar) list of newsgroup names to a list of newsgroup and
### hierarchy names where every newsgroup and hierarchy appears only once:
### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
-### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header)
-### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys
- my ($Newsgroups) = @_;
+### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
+### $TLH : top level hierarchy (all other newsgroups are ignored)
+### $ValidGroupsR: reference to a hash containing all valid newsgroups
+### as keys
+### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
+ my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
+ my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
my %Newsgroups;
chomp($Newsgroups);
# remove whitespace from contents of Newsgroups:
$Newsgroups =~ s/\s//;
# call &HierarchyCount for each newsgroup in $Newsgroups:
for (split /,/, $Newsgroups) {
+ # don't count newsgroup/hierarchy in wrong TLH
+ next if($TLH and !/^$TLH/);
+ # don't count invalid newsgroups
+ if(%ValidGroups and !defined($ValidGroups{$_})) {
+ warn (sprintf("DROPPED: %s\n",$_));
+ next;
+ }
# add original newsgroup to %Newsgroups
$Newsgroups{$_} = 1;
# add all hierarchy elements to %Newsgroups, amended by '.ALL',
return @Hierarchies;
};
+################################################################################
+sub ReadGroupList {
+################################################################################
+### read a list of valid newsgroups from file (each group on one line,
+### ignoring everything after the first whitespace and so accepting files
+### in checkgroups format as well as (parts of) an INN active file)
+### IN : $Filename : file to read
+### OUT: \%ValidGroups: hash containing all valid newsgroups
+ my ($Filename) = @_;
+ my %ValidGroups;
+ open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n";
+ while (<$LIST>) {
+ s/^(\S+).*$/$1/;
+ chomp;
+ $ValidGroups{$_} = '1';
+ };
+ close $LIST;
+ return \%ValidGroups;
+};
+
################################################################################
#####----------------------------- TimePeriods ----------------------------#####
sub OutputData {
################################################################################
### read database query results from DBHandle and print results with formatting
-### IN : $Format : format specifier
-### $DBQuery: database query handle with executed query,
-### containing $Month, $Key, $Value
+### IN : $Format : format specifier
+### $FileName: file name template (-f): filename-YYYY-MM
+### $DBQuery : database query handle with executed query,
+### containing $Month, $Key, $Value
### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
- my ($Format, $DBQuery,$PadGroup) = @_;
+ my ($Format, $FileName, $DBQuery, $PadGroup) = @_;
+ my ($Handle, $OUT);
+ our $LastIteration;
while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
- print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
+ # set output file handle
+ if (!$FileName) {
+ $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
+ } elsif (!defined($LastIteration) or $LastIteration ne $Month) {
+ close $OUT if ($LastIteration);
+ open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month);
+ $Handle = $OUT;
+ };
+ print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
+ $LastIteration = $Month;
};
+ close $OUT if ($FileName);
};
################################################################################
};
################################################################################
-sub GetMaxLenght {
+sub GetMaxLength {
################################################################################
### get length of longest field in future query result
### IN : $DBHandle : database handel