X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=NewsStats.pm;h=2cb1be2ebf66a62bb7e7edafee890d9af8c38be9;hp=3efc14b84f28a7bc39fcb4ab2ace05faa470c6b0;hb=ad6097927113a27587ed8b1785f18c4b18db58e8;hpb=f6d15ca78e16217b709d69f2977b28dd1eeabc02;ds=sidebyside diff --git a/NewsStats.pm b/NewsStats.pm index 3efc14b..2cb1be2 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -31,6 +31,7 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ReadGroupList OutputData FormatOutput SQLHierarchies @@ -40,8 +41,8 @@ require Exporter; %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], Output => [qw(OutputData FormatOutput)], SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); -$VERSION = '0.1'; -our $PackageVersion = '0.1'; +$VERSION = '0.01'; +our $PackageVersion = '0.01'; use Data::Dumper; use File::Basename; @@ -154,15 +155,23 @@ sub ListNewsgroups { ### explode a (scalar) list of newsgroup names to a list of newsgroup and ### hierarchy names where every newsgroup and hierarchy appears only once: ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin -### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header) -### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys - my ($Newsgroups) = @_; +### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) +### $ValidGroupsR: reference to a hash containing all valid newsgroups +### as keys +### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups,$ValidGroupsR) = @_; + my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; my %Newsgroups; chomp($Newsgroups); # remove whitespace from contents of Newsgroups: $Newsgroups =~ s/\s//; # call &HierarchyCount for each newsgroup in $Newsgroups: for (split /,/, $Newsgroups) { + # don't count invalid newsgroups + if(%ValidGroups and !defined($ValidGroups{$_})) { + warn (sprintf("DROPPED: %s\n",$_)); + next; + } # add original newsgroup to %Newsgroups $Newsgroups{$_} = 1; # add all hierarchy elements to %Newsgroups, amended by '.ALL', @@ -194,6 +203,26 @@ sub ParseHierarchies { return @Hierarchies; }; +################################################################################ +sub ReadGroupList { +################################################################################ +### read a list of valid newsgroups from file (each group on one line, +### ignoring everything after the first whitespace and so accepting files +### in checkgroups format as well as (parts of) an INN active file) +### IN : $Filename : file to read +### OUT: \%ValidGroups: hash containing all valid newsgroups + my ($Filename) = @_; + my %ValidGroups; + open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; + while (<$LIST>) { + s/^(\S+).*$/$1/; + chomp; + $ValidGroups{$_} = '1'; + }; + close $LIST; + return \%ValidGroups; +}; + ################################################################################ #####----------------------------- TimePeriods ----------------------------##### @@ -303,14 +332,27 @@ sub ListMonth { sub OutputData { ################################################################################ ### read database query results from DBHandle and print results with formatting -### IN : $Format : format specifier -### $DBQuery: database query handle with executed query, -### containing $Month, $Key, $Value +### IN : $Format : format specifier +### $FileName: file name template (-f): filename-YYYY-MM +### $DBQuery : database query handle with executed query, +### containing $Month, $Key, $Value ### $PadGroup: padding length for newsgroups field (optional) for 'pretty' - my ($Format, $DBQuery,$PadGroup) = @_; + my ($Format, $FileName, $DBQuery, $PadGroup) = @_; + my ($Handle, $OUT); + our $LastIteration; while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { - print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + # set output file handle + if (!$FileName) { + $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT + } elsif (!defined($LastIteration) or $LastIteration ne $Month) { + close $OUT if ($LastIteration); + open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month); + $Handle = $OUT; + }; + print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + $LastIteration = $Month; }; + close $OUT if ($FileName); }; ################################################################################