X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=NewsStats.pm;h=781272d51453c51694cfd2c3e1b7a5b0d8c74685;hp=67b0dbc8a4c46495cd700eae237d04cac43e6688;hb=89db2f904dc9ddd07bfce9c3fe1fe81b58c1aa8b;hpb=4bbd46d3363f6e7b1ab99db24a954f61bf427f21 diff --git a/NewsStats.pm b/NewsStats.pm index 67b0dbc..781272d 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -31,6 +31,7 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ReadGroupList OutputData FormatOutput SQLHierarchies @@ -154,15 +155,26 @@ sub ListNewsgroups { ### explode a (scalar) list of newsgroup names to a list of newsgroup and ### hierarchy names where every newsgroup and hierarchy appears only once: ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin -### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header) -### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys - my ($Newsgroups) = @_; +### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) +### $TLH : top level hierarchy (all other newsgroups are ignored) +### $ValidGroupsR: reference to a hash containing all valid newsgroups +### as keys +### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups,$TLH,$ValidGroupsR) = @_; + my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; my %Newsgroups; chomp($Newsgroups); # remove whitespace from contents of Newsgroups: $Newsgroups =~ s/\s//; # call &HierarchyCount for each newsgroup in $Newsgroups: for (split /,/, $Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if($TLH and !/^$TLH/); + # don't count invalid newsgroups + if(%ValidGroups and !defined($ValidGroups{$_})) { + warn (sprintf("DROPPED: %s\n",$_)); + next; + } # add original newsgroup to %Newsgroups $Newsgroups{$_} = 1; # add all hierarchy elements to %Newsgroups, amended by '.ALL', @@ -194,6 +206,26 @@ sub ParseHierarchies { return @Hierarchies; }; +################################################################################ +sub ReadGroupList { +################################################################################ +### read a list of valid newsgroups from file (each group on one line, +### ignoring everything after the first whitespace and so accepting files +### in checkgroups format as well as (parts of) an INN active file) +### IN : $Filename : file to read +### OUT: \%ValidGroups: hash containing all valid newsgroups + my ($Filename) = @_; + my %ValidGroups; + open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; + while (<$LIST>) { + s/^(\S+).*$/$1/; + chomp; + $ValidGroups{$_} = '1'; + }; + close $LIST; + return \%ValidGroups; +}; + ################################################################################ #####----------------------------- TimePeriods ----------------------------##### @@ -303,14 +335,27 @@ sub ListMonth { sub OutputData { ################################################################################ ### read database query results from DBHandle and print results with formatting -### IN : $Format : format specifier -### $DBQuery: database query handle with executed query, -### containing $Month, $Key, $Value +### IN : $Format : format specifier +### $FileName: file name template (-f): filename-YYYY-MM +### $DBQuery : database query handle with executed query, +### containing $Month, $Key, $Value ### $PadGroup: padding length for newsgroups field (optional) for 'pretty' - my ($Format, $DBQuery,$PadGroup) = @_; + my ($Format, $FileName, $DBQuery, $PadGroup) = @_; + my ($Handle, $OUT); + our $LastIteration; while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { - print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + # set output file handle + if (!$FileName) { + $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT + } elsif (!defined($LastIteration) or $LastIteration ne $Month) { + close $OUT if ($LastIteration); + open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month); + $Handle = $OUT; + }; + print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + $LastIteration = $Month; }; + close $OUT if ($FileName); }; ################################################################################