X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=groupstats.pl;h=c5b8b7d66af2922fe39534227d8fd3eb8104e798;hp=b4ebecaec7a54a80acb1829d85e6bbfc7787c2c2;hb=78389b28e945178cad3f43a63e5d1091334e8dab;hpb=404c1acdc817c4f5dfece1d76f8644a54e4e56d8 diff --git a/groupstats.pl b/groupstats.pl index b4ebeca..c5b8b7d 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -26,7 +26,7 @@ use DBI; ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); +my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:'); ### read configuration my %Conf = %{ReadConfig('newsstats.conf')}; @@ -39,7 +39,13 @@ $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; ### check for incompatible command line options # you can't mix '-t', '-b' and '-l' # -b/-l take preference over -t, and -b takes preference over -l +# you can't use '-f' with '-b' or '-l' if ($Options{'b'} or $Options{'l'}) { + if ($Options{'f'}) { + # drop -f + warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n"); + undef($Options{'f'}); + }; if ($Options{'t'}) { # drop -t warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); @@ -71,17 +77,36 @@ if ($Options{'l'}) { warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); }; -### get time period -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); -# reset to one month for 'dump' output type -if ($Options{'o'} eq 'dump' and $Options{'p'}) { - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); - $Options{'o'} = 'pretty'; -}; - ### init database my $DBHandle = InitDB(\%Conf,1); +### get time period +my ($StartMonth,$EndMonth); +# if '-a' is set, set start/end month from database +# FIXME - it doesn't make that much sense to get first/last month from database to query it +# with a time period that equals no time period ... +if ($Options{'a'}) { + undef($Options{'m'}); + undef($Options{'p'}); + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); + ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array; +} else { + ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); +}; +# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup' +if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) { + if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) { + # just one newsgroup is defined + warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n"); + $Options{'o'} = 'dumpgroup'; + } elsif (!defined($Options{'f'})) { + # more than one newsgroup - and no file output + warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); + $Options{'o'} = 'pretty'; + } +}; + ### create report # get list of newsgroups (-n) my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); @@ -155,13 +180,17 @@ if (!defined($Options{'b'}) and !defined($Options{'l'})) { } else { $Level = '>'; }; - # push level and $StartMonth,$EndMonth - again - to Params - # FIXME -- together with the query (see below) - push @Params,$Options{'l'}; - push @Params,$StartMonth,$EndMonth; - # prepare query: get number of postings per group from groups table for given months and - # FIXME -- this query is ... in dire need of impromevent - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level,$WhereClause)); + # prepare and execute query: get list of newsgroups meeting level condition + $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level)); + $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'}) + or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); + # add newsgroups to a comma-seperated list ready for IN(...) query + my $GroupList; + while (my ($Newsgroup) = $DBQuery->fetchrow_array) { + $GroupList .= ',' if (defined($GroupList) and $GroupList ne ''); + $GroupList .= "'$Newsgroup'"; + }; + $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause)); }; # execute query @@ -169,30 +198,37 @@ $DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); # output results +# reset caption (-c) if -f is set +undef($Options{'c'}) if $Options{'f'}; # print caption (-c) with time period if -m or -p is set -# FIXME - month or period should handled differently -printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); +if ($Options{'c'}) { + if ($Options{'p'}) { + printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth); + } elsif ($Options{'m'}) { + printf ("----- Report for %s\n",$StartMonth); + }; +}; # print caption (-c) with newsgroup list if -n is set printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; # print caption (-c) with threshold if -t is set, taking -i in account printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; if (!defined($Options{'b'}) and !defined($Options{'l'})) { # default: neither -b nor -l - &OutputData($Options{'o'},$DBQuery,$MaxLength); + &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength); } elsif ($Options{'b'}) { # -b is set (then -l can't be!) # we have to read in the query results ourselves, as they do not have standard layout while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just assign "top x" or "bottom x" instead of a month for the caption - # FIXME - print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); + # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty + print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); }; } else { # -l must be set now, as all other cases have been taken care of + # print caption (-c) with level, taking -i in account + printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'}; # we have to read in the query results ourselves, as they do not have standard layout while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { # we just switch $Newsgroups and $Month for output generation - # FIXME print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); }; }; @@ -210,7 +246,7 @@ groupstats - create reports on newsgroup usage =head1 SYNOPSIS -B [B<-Vhiscqd>] [B<-m> I] [B<-p> I] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-g> I] +B [B<-Vhiscqd>] [B<-m> I | B<-p> I | B<-a>] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-f> I] [B<-g> I] =head1 REQUIREMENTS @@ -268,7 +304,7 @@ below). Captions can be added by setting the B<-c> switch. =head2 Configuration -F will read its configuration from F +B will read its configuration from F which should be present in the same directory via Config::Auto. See doc/INSTALL for an overview of possible configuration options. @@ -290,12 +326,18 @@ Print this man page and exit. =item B<-m> I (month) Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. +or B<-a> is set. =item B<-p> I (period) Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. +YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if +B<-a> is set. + +=item B<-a> (all) + +Set no processing period (process whole database). Overrides B<-m> +and B<-p>. =item B<-n> I (newsgroups) @@ -323,7 +365,7 @@ postings every single month will be included. Output will be ordered by newsgroup name, followed by month. This setting will be ignored if B<-b> is set. Overrides B<-t> and -can't be used together with B<-q> or B<-d>. +can't be used together with B<-q>, B<-d> or B<-f>. =item B<-b> I (best of) @@ -332,8 +374,8 @@ whole reporting period. Can be inverted by the B<-i> switch so that a list of the I newsgroups with the least postings over the whole period is generated. Output will be ordered by sum of postings. -Overrides B<-t> and B<-l> and can't be used together with B<-q> or -B<-d>. Output format is set to I (see below). +Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d> +or B<-f>. Output format is set to I (see below). =item B<-i> (invert) @@ -372,7 +414,10 @@ format. =item B<-c> (captions) -Add captions to output (reporting period, newsgroups list, threshold). +Add captions to output (reporting period, newsgroups list, threshold +and so on). + +This setting will be ignored if B<-f> is set. =item B<-q> (quantity of postings) @@ -386,6 +431,20 @@ Change sort order to descending. Cannot be used with B<-l> or B<-b>. +=item B<-f> I (output file) + +Save output to file instead of dumping it to STDOUT. B +will create one file for each month, with filenames composed by +adding year and month to the I, for example +with B<-f> I: + + stats-2010-01 + stats-2010-02 + ... and so on + +This setting will be ignored if B<-l> or B<-b> is set. Output format +is set to I (see above). + =item B<-g> I (postings per group table) Override I from F.