X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=NewsStats.pm;h=781272d51453c51694cfd2c3e1b7a5b0d8c74685;hp=34635c16cfa7af8f9604ea1f647fe81132415495;hb=89db2f904dc9ddd07bfce9c3fe1fe81b58c1aa8b;hpb=b221278d97330299db38f7f179c8d4553e75c42a diff --git a/NewsStats.pm b/NewsStats.pm index 34635c1..781272d 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -18,6 +18,7 @@ require Exporter; @EXPORT = qw( $MySelf $MyVersion + $PackageVersion ReadOptions ReadConfig OverrideConfig @@ -30,6 +31,7 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ReadGroupList OutputData FormatOutput SQLHierarchies @@ -39,7 +41,8 @@ require Exporter; %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], Output => [qw(OutputData FormatOutput)], SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); -$VERSION = '0.1'; +$VERSION = '0.01'; +our $PackageVersion = '0.01'; use Data::Dumper; use File::Basename; @@ -58,8 +61,8 @@ our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; ################################################################################ sub ReadOptions { ################################################################################ -### read commandline options and act on standard options -### IN : $Params: containing list of commandline paramaters (without -h and -V) +### read commandline options and act on standard options -h and -V +### IN : $Params: list of legal commandline paramaters (without -h and -V) ### OUT: a hash containing the commandline options $Getopt::Std::STANDARD_HELP_VERSION = 1; @@ -82,7 +85,7 @@ sub ReadOptions { sub ShowVersion { ################################################################################ ### display version and exit - print "$MyVersion\nCopyright (c) 2010 Thomas Hochstein \n"; + print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein \n"; print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; exit(100); }; @@ -116,7 +119,9 @@ sub OverrideConfig { ### $OverrideR: reference to a hash containing overrides my ($ConfigR,$OverrideR) = @_; my %Override = %$OverrideR; + # Config hash empty? warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); + # return if no overrides return if (keys %Override < 1 or keys %$ConfigR < 1); foreach my $Key (keys %Override) { $$ConfigR{$Key} = $Override{$Key}; @@ -129,7 +134,7 @@ sub InitDB { ################################################################################ ### initialise database connection ### IN : $ConfigR: reference to configuration hash -### $Die : if TRUE, die if connection failed +### $Die : if TRUE, die if connection fails ### OUT: DBHandle my ($ConfigR,$Die) = @_; my %Conf = %$ConfigR; @@ -147,16 +152,29 @@ sub InitDB { ################################################################################ sub ListNewsgroups { ################################################################################ -### count each newsgroup and each hierarchy level, but only once -### IN : $Newsgroups: a list of newsgroups (content of Newsgroups:) -### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys - my ($Newsgroups) = @_; +### explode a (scalar) list of newsgroup names to a list of newsgroup and +### hierarchy names where every newsgroup and hierarchy appears only once: +### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin +### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) +### $TLH : top level hierarchy (all other newsgroups are ignored) +### $ValidGroupsR: reference to a hash containing all valid newsgroups +### as keys +### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups,$TLH,$ValidGroupsR) = @_; + my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; my %Newsgroups; chomp($Newsgroups); # remove whitespace from contents of Newsgroups: $Newsgroups =~ s/\s//; # call &HierarchyCount for each newsgroup in $Newsgroups: for (split /,/, $Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if($TLH and !/^$TLH/); + # don't count invalid newsgroups + if(%ValidGroups and !defined($ValidGroups{$_})) { + warn (sprintf("DROPPED: %s\n",$_)); + next; + } # add original newsgroup to %Newsgroups $Newsgroups{$_} = 1; # add all hierarchy elements to %Newsgroups, amended by '.ALL', @@ -171,7 +189,8 @@ sub ListNewsgroups { ################################################################################ sub ParseHierarchies { ################################################################################ -### get all hierarchies a newsgroup belongs to +### return a list of all hierarchy levels a newsgroup belongs to +### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) ### IN : $Newsgroup : a newsgroup name ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to my ($Newsgroup) = @_; @@ -187,6 +206,26 @@ sub ParseHierarchies { return @Hierarchies; }; +################################################################################ +sub ReadGroupList { +################################################################################ +### read a list of valid newsgroups from file (each group on one line, +### ignoring everything after the first whitespace and so accepting files +### in checkgroups format as well as (parts of) an INN active file) +### IN : $Filename : file to read +### OUT: \%ValidGroups: hash containing all valid newsgroups + my ($Filename) = @_; + my %ValidGroups; + open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; + while (<$LIST>) { + s/^(\S+).*$/$1/; + chomp; + $ValidGroups{$_} = '1'; + }; + close $LIST; + return \%ValidGroups; +}; + ################################################################################ #####----------------------------- TimePeriods ----------------------------##### @@ -194,9 +233,11 @@ sub ParseHierarchies { ################################################################################ sub GetTimePeriod { ################################################################################ -### get time period using -m / -p +### get a time period to act on, in order of preference: by default the +### last month; or a month submitted by -m YYYY-MM; or a time period submitted +### by -p YYYY-MM:YYYY-MM ### IN : $Month,$Period: contents of -m and -p -### OUT: $StartMonth, $EndMonth +### OUT: $StartMonth, $EndMonth (identical if period is just one month) my ($Month,$Period) = @_; # exit if -m is set and not like YYYY-MM die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); @@ -220,7 +261,7 @@ sub GetTimePeriod { ################################################################################ sub LastMonth { ################################################################################ -### get last month from today in YYYY-MM format +### get last month from todays date in YYYY-MM format ### OUT: last month as YYYY-MM # get today's date my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); @@ -237,7 +278,7 @@ sub LastMonth { ################################################################################ sub CheckMonth { ################################################################################ -### check for valid month +### check if input is a valid month in YYYY-MM form ### IN : $Month: month ### OUT: TRUE / FALSE my ($Month) = @_; @@ -248,7 +289,7 @@ sub CheckMonth { ################################################################################ sub SplitPeriod { ################################################################################ -### split a time period YYYY-MM:YYYY-MM into start and end month +### split a time period denoted by YYYY-MM:YYYY-MM into start and end month ### IN : $Period: time period ### OUT: $StartMonth, Â$EndMonth my ($Period) = @_; @@ -265,7 +306,7 @@ sub SplitPeriod { ################################################################################ sub ListMonth { ################################################################################ -### return a list of month (YYYY-MM) between start and end month +### return a list of months (YYYY-MM) between start and end month ### IN : $StartMonth, $EndMonth ### OUT: @Months: array containing all months from $StartMonth to $EndMonth my ($StartMonth, $EndMonth) = @_; @@ -293,27 +334,39 @@ sub ListMonth { ################################################################################ sub OutputData { ################################################################################ -### output information with formatting from DBHandle -### IN : $Format : format specifier -### $DBQuery: database query handle with executed query, -### containing $Month, $Key, $Value +### read database query results from DBHandle and print results with formatting +### IN : $Format : format specifier +### $FileName: file name template (-f): filename-YYYY-MM +### $DBQuery : database query handle with executed query, +### containing $Month, $Key, $Value ### $PadGroup: padding length for newsgroups field (optional) for 'pretty' -### OUT: $Output: formatted output - my ($Format, $DBQuery,$PadGroup) = @_; + my ($Format, $FileName, $DBQuery, $PadGroup) = @_; + my ($Handle, $OUT); + our $LastIteration; while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { - print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + # set output file handle + if (!$FileName) { + $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT + } elsif (!defined($LastIteration) or $LastIteration ne $Month) { + close $OUT if ($LastIteration); + open ($OUT,sprintf('>%s-%s',$FileName,$Month)) or die sprintf("$MySelf: E: Cannot open output file '%s-%s': $!\n",$FileName,$Month); + $Handle = $OUT; + }; + print $Handle &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + $LastIteration = $Month; }; + close $OUT if ($FileName); }; ################################################################################ sub FormatOutput { ################################################################################ -### format information for output +### format information for output according to format specifier ### IN : $Format : format specifier -### $PadGroup: padding length for newsgroups field (optional) for 'pretty' ### $Month : month (as YYYY-MM) ### $Key : newsgroup, client, ... ### $Value : number of postings with that attribute +### $PadGroup: padding length for key field (optional) for 'pretty' ### OUT: $Output: formatted output my ($Format, $Month, $Key, $Value, $PadGroup) = @_; @@ -324,6 +377,7 @@ sub FormatOutput { die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format}); my ($Output); + # keep last month in mind our ($LastIteration); if ($Format eq 'dump') { # output as dump (ng nnnnn) @@ -348,7 +402,9 @@ sub FormatOutput { ################################################################################ sub SQLHierarchies { ################################################################################ -### amend WHERE clause to include hierarchies +### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by +### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is +### true, accordingly) ### IN : $ShowHierarchies: boolean value ### OUT: SQL code my ($ShowHierarchies) = @_; @@ -358,7 +414,7 @@ sub SQLHierarchies { ################################################################################ sub GetMaxLenght { ################################################################################ -### get length of longest field in query +### get length of longest field in future query result ### IN : $DBHandle : database handel ### $Table : table to query ### $Field : field to check @@ -375,7 +431,8 @@ sub GetMaxLenght { ################################################################################ sub SQLGroupList { ################################################################################ -### create part of WHERE clause for list of newsgroups separated by : +### explode list of newsgroups separated by : (with wildcards) to a SQL WHERE +### clause ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) ### OUT: SQL code, list of newsgroups my ($Newsgroups) = @_;