X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=groupstats.pl;h=14d19c6391cd629a0060a2a650b40110e28b28b1;hp=19c54e1339b93c5823d5743c6041b7995ec8fcd0;hb=c3973e7d0d943c1b4eb042d79c6e477c98a689a3;hpb=628a183c15adf643575f00b92b9ad6d98b584310 diff --git a/groupstats.pl b/groupstats.pl index 19c54e1..14d19c6 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -3,11 +3,11 @@ # groupstats.pl # # This script will get statistical data on newgroup usage -# form a database. +# from a database. # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -19,224 +19,229 @@ BEGIN { } use strict; -use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:'); +my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, + $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, + $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound); +GetOptions ('b|boundary=s' => \$OptBoundType, + 'c|captions!' => \$OptCaptions, + 'checkgroups=s' => \$OptCheckgroupsFile, + 'comments!' => \$OptComments, + 'filetemplate=s' => \$OptFileTemplate, + 'f|format=s' => \$OptFormat, + 'g|group-by=s' => \$OptGroupBy, + 'groupsdb=s' => \$OptGroupsDB, + 'l|lower=i' => \$LowBound, + 'm|month=s' => \$OptMonth, + 'n|newsgroups=s' => \$OptNewsgroups, + 'o|order-by=s' => \$OptOrderBy, + 'r|report=s' => \$OptReportType, + 's|sums!' => \$OptSums, + 'u|upper=i' => \$UppBound, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; +# parse parameters +# $OptComments defaults to TRUE +$OptComments = 1 if (!defined($OptComments)); +# force --nocomments when --filetemplate is used +$OptComments = 0 if ($OptFileTemplate); +# parse $OptBoundType +if ($OptBoundType) { + if ($OptBoundType =~ /level/i) { + $OptBoundType = 'level'; + } elsif ($OptBoundType =~ /av(era)?ge?/i) { + $OptBoundType = 'average'; + } elsif ($OptBoundType =~ /sums?/i) { + $OptBoundType = 'sum'; + } else { + $OptBoundType = 'default'; + } +} +# parse $OptReportType +if ($OptReportType) { + if ($OptReportType =~ /av(era)?ge?/i) { + $OptReportType = 'average'; + } elsif ($OptReportType =~ /sums?/i) { + $OptReportType = 'sum'; + } else { + $OptReportType = 'default'; + } +} +# read list of newsgroups from --checkgroups +# into a hash reference +my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile; ### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ### override configuration via commandline options my %ConfOverride; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; &OverrideConfig(\%Conf,\%ConfOverride); -### check for incompatible command line options -# you can't mix '-t', '-b' and '-l' -# -b/-l take preference over -t, and -b takes preference over -l -# you can't use '-f' with '-b' or '-l' -if ($Options{'b'} or $Options{'l'}) { - if ($Options{'f'}) { - # drop -f - warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n"); - undef($Options{'f'}); - }; - if ($Options{'t'}) { - # drop -t - warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); - undef($Options{'t'}); - }; - if ($Options{'b'} and $Options{'l'}) { - # drop -l - warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); - undef($Options{'l'}); - }; - # -q/-d don't work with -b or -l - warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; - warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; -}; - -### check output type -# default output type to 'pretty' -$Options{'o'} = 'pretty' if !$Options{'o'}; -# fail if more than one newsgroup is combined with 'dumpgroup' type -die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); -# accept 'dumpgroup' only with -n -if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { - $Options{'o'} = 'dump'; - warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); -}; -# set output type to 'pretty' for -l -if ($Options{'l'} and $Options{'o'} ne 'pretty') { - $Options{'o'} = 'pretty'; - warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); -}; -# set output type to 'dump' for -f -if ($Options{'f'} and $Options{'o'} ne 'dump') { - $Options{'o'} = 'dump'; - warn ("$MySelf: W: Output type forced to '-o dump' due to usage of '-f'.\n"); -}; - ### init database my $DBHandle = InitDB(\%Conf,1); -### get time period -my ($StartMonth,$EndMonth); -# if '-a' is set, set start/end month from database -# FIXME - it doesn't make that much sense to get first/last month from database to query it -# with a time period that equals no time period ... -if ($Options{'a'}) { - undef($Options{'m'}); - undef($Options{'p'}); - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); - ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array; +### get time period and newsgroups, prepare SQL 'WHERE' clause +# get time period +# and set caption for output and expression for SQL 'WHERE' clause +my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); +# bail out if --month is invalid +&Bleat(2,"--month option has an invalid format - ". + "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; +# get list of newsgroups and set expression for SQL 'WHERE' clause +# with placeholders as well as a list of newsgroup to bind to them +my ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups) + if $OptNewsgroups;; + +### build SQL WHERE clause (and HAVING clause, if needed) +my ($SQLWhereClause,$SQLHavingClause); +# $OptBoundType 'level' +if ($OptBoundType and $OptBoundType ne 'default') { + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); + $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, + $LowBound,$UppBound)); +# $OptBoundType 'threshold' / 'default' or none } else { - ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); -}; -# if time period is more than one month: force output type to '-o pretty' or '-o dumpgroup' -if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) { - if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) { - # just one newsgroup is defined - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n"); - $Options{'o'} = 'dumpgroup'; - } elsif (!defined($Options{'f'})) { - # more than one newsgroup - and no file output - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); - $Options{'o'} = 'pretty'; - } -}; + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums), + &SQLSetBounds('default',$LowBound,$UppBound)); +} -### create report -# get list of newsgroups (-n) -my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); -my $Newsgroups = $Options{'n'}; -if ($Newsgroups) { - # explode list of newsgroups for WHERE clause - ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups); -} else { - # set to dummy value (always true) - $QueryGroupList = 1; +### get sort order and build SQL 'ORDER BY' clause +# default to 'newsgroup' for $OptBoundType 'level' or 'average' +$OptGroupBy = 'newsgroup' if (!$OptGroupBy and + $OptBoundType and $OptBoundType ne 'default'); +# force to 'month' for $OptReportType 'average' or 'sum' +$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); +# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause +my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); +# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) +# set it to 'month' or 'key' for OutputData() +$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; + +### get report type and build SQL 'SELECT' query +my $SQLSelect; +my $SQLGroupClause = ''; +my $Precision = 0; # number of digits right of decimal point for output +if ($OptReportType and $OptReportType ne 'default') { + $SQLGroupClause = 'GROUP BY newsgroup'; + # change $SQLOrderClause: replace everything before 'postings' + $SQLOrderClause =~ s/BY.+postings/BY postings/; + if ($OptReportType eq 'average') { + $SQLSelect = "'All months',newsgroup,AVG(postings)"; + $Precision = 2; + # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' + $SQLOrderClause =~ s/postings/AVG(postings)/; + } elsif ($OptReportType eq 'sum') { + $SQLSelect = "'All months',newsgroup,SUM(postings)"; + # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' + $SQLOrderClause =~ s/postings/SUM(postings)/; + } + } else { + $SQLSelect = 'month,newsgroup,postings'; }; -# manage thresholds -if (defined($Options{'t'})) { - if ($Options{'i'}) { - # -i: list groups below threshold - $QueryThreshold .= ' postings < ?'; - } else { - # default: list groups above threshold - $QueryThreshold .= ' postings > ?'; - }; - # push threshold to Params - push @Params,$Options{'t'}; -} else { - # set to dummy value (always true) - $QueryThreshold = 1; -} - -# construct WHERE clause -# $QueryGroupList is "list of newsgroup" (or 1), -# $QueryThreshold is threshold definition (or 1), -# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) -# according to setting of -s -my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'})); - -# get length of longest newsgroup delivered by query for formatting purposes -# FIXME -my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params)); - -my ($OrderClause,$DBQuery); -# -b (best of / top list) defined? -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" - # according to -q and -d - $OrderClause = 'newsgroup'; - $OrderClause = 'postings' if $Options{'q'}; - $OrderClause .= ' DESC' if $Options{'d'}; - # prepare query: get number of postings per group from groups table for given months and newsgroups - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # set sorting order (-i): top or flop list? - if ($Options{'i'}) { - $OrderClause = 'postings'; - } else { - $OrderClause = 'postings DESC'; - }; - # set -b to 10 if < 1 (Top 10) - $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1; - # push LIMIT to Params - push @Params,$Options{'b'}; - # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT - $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} else { - # -l must be set now, as all other cases have been taken care of - # which kind of level (-i): more than -l x or less than -l x? - my ($Level); - if ($Options{'i'}) { - $Level = '<'; - } else { - $Level = '>'; - }; - # prepare and execute query: get list of newsgroups meeting level condition - $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level)); - $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'}) - or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); +### get length of longest newsgroup name delivered by query +### for formatting purposes +my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; +my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, + $Field,$SQLWhereClause,$SQLHavingClause, + @SQLBindNewsgroups); + +### build and execute SQL query +my ($DBQuery); +# special query preparation for $OptBoundType 'level', 'average' or 'sums' +if ($OptBoundType and $OptBoundType ne 'default') { + # prepare and execute first query: + # get list of newsgroups meeting level conditions + $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. + 'GROUP BY newsgroup %s', + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLHavingClause)); + $DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); # add newsgroups to a comma-seperated list ready for IN(...) query my $GroupList; while (my ($Newsgroup) = $DBQuery->fetchrow_array) { - $GroupList .= ',' if (defined($GroupList) and $GroupList ne ''); + $GroupList .= ',' if $GroupList; $GroupList .= "'$Newsgroup'"; }; - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause)); -}; + # enhance $WhereClause + if ($GroupList) { + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, + sprintf('newsgroup IN (%s)',$GroupList)); + } else { + # condition cannot be satisfied; + # force query to fail by adding '0=1' + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); + } +} + +# prepare query +$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', + $SQLSelect, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLGroupClause,$ + SQLOrderClause)); # execute query -$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) - or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); - -# output results -# reset caption (-c) if -f is set -undef($Options{'c'}) if $Options{'f'}; -# print caption (-c) with time period if -m or -p is set -if ($Options{'c'}) { - if ($Options{'p'}) { - printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth); - } elsif ($Options{'m'}) { - printf ("----- Report for %s\n",$StartMonth); - }; -}; -# print caption (-c) with newsgroup list if -n is set -printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; -# print caption (-c) with threshold if -t is set, taking -i in account -printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty - print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); - }; -} else { - # -l must be set now, as all other cases have been taken care of - # print caption (-c) with level, taking -i in account - printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'}; - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just switch $Newsgroups and $Month for output generation - print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); - }; -}; +$DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); + +### output results +# set default to 'pretty' +$OptFormat = 'pretty' if !$OptFormat; +# print captions if --caption is set +if ($OptCaptions && $OptComments) { + # print time period with report type + my $CaptionReportType= '(number of postings for each month)'; + if ($OptReportType and $OptReportType ne 'default') { + $CaptionReportType= '(average number of postings for each month)' + if $OptReportType eq 'average'; + $CaptionReportType= '(number of all postings for that time period)' + if $OptReportType eq 'sum'; + } + printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); + # print newsgroup list if --newsgroups is set + printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) + if $OptNewsgroups; + # print boundaries, if set + my $CaptionBoundary= '(counting only month fulfilling this condition)'; + if ($OptBoundType and $OptBoundType ne 'default') { + $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; + $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; + $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; + } + printf("# ----- Threshold: %s %s x %s %s %s\n", + $LowBound ? $LowBound : '',$LowBound ? '=>' : '', + $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) + if ($LowBound or $UppBound); + # print primary and secondary sort order + printf("# ----- Grouped by %s (%s), sorted %s%s\n", + ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', + ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', + ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', + ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); +} + +# output data +&OutputData($OptFormat,$OptComments,$GroupBy,$Precision, + $OptCheckgroupsFile ? $ValidGroups : '', + $OptFileTemplate,$DBQuery,$MaxLength); ### close handles $DBHandle->disconnect; @@ -251,60 +256,67 @@ groupstats - create reports on newsgroup usage =head1 SYNOPSIS -B [B<-Vhiscqd>] [B<-m> I | B<-p> I | B<-a>] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-f> I] [B<-g> I] +B [B<-Vhcs> B<--comments>] [B<-m> I[:I] | I] [B<-n> I] [B<--checkgroups> I] [B<-r> I] [B<-l> I] [B<-u> I] [B<-b> I] [B<-g> I] [B<-o> I] [B<-f> I] [B<--filetemplate> I] [B<--groupsdb> I] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION This script create reports on newsgroup usage (number of postings per group per month) taken from result tables created by -F. - -The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. - -B will process all newsgroups by default; you can limit -that to only some newsgroups by supplying a list of those groups via -B<-n> (see below). You can include hierarchy levels in the output by -adding the B<-s> switch (see below). +B. -Furthermore you can set a threshold via B<-t> so that only newsgroups -with more postings per month will be included in the report. You can -invert that by the B<-i> switch so only newsgroups with less than -I postings per month will be included. +=head2 Features and options -You can sort the output by number of postings per month instead of the -default (alphabetical list of newsgroups) by using B<-q>; you can -reverse the sorting order (from highest to lowest or in reversed -alphabetical order) by using B<-d>. +=head3 Time period and newsgroups -Furthermore, you can create a list of newsgroups that had consistently -more (or less) than x postings per month during the whole report -period by using B<-l> (together with B as needed). +The time period to act on defaults to last month; you can assign another +time period or a single month (or drop all time constraints) via the +B<--month> option (see below). -Last but not least you can create a "best of" list of the top x -newsgroups via B<-b> (or a "worst of" list by adding B). - -By default, B will dump an alphabetical list of newsgroups, -one per line, followed by the number of postings in that group, for -every month. You can change the output format by using B<-o> (see -below). Captions can be added by setting the B<-c> switch. +B will process all newsgroups by default; you can limit +processing to only some newsgroups by supplying a list of those groups via +B<--newsgroups> option (see below). You can include hierarchy levels in +the output by adding the B<--sums> switch (see below). Optionally +newsgroups not present in a checkgroups file can be excluded from output, +sse B<--checkgroups> below. + +=head3 Report type + +You can choose between different B<--report> types: postings per month, +average postings per month or all postings summed up; for details, see +below. + +=head3 Upper and lower boundaries + +Furthermore you can set an upper and/or lower boundary to exclude some +results from output via the B<--lower> and B<--upper> options, +respectively. By default, all newsgroups with more and/or less postings +per month will be excluded from the result set (i.e. not shown and not +considered for average and sum reports). You can change the meaning of +those boundaries with the B<--boundary> option. For details, please see +below. + +=head3 Sorting and formatting the output + +By default, all results are grouped by month; you can group results by +newsgroup instead via the B<--groupy-by> option. Within those groups, the +list of newsgroups (or months) is sorted alphabetically (or +chronologically, respectively) ascending. You can change that order (and +sort by number of postings) with the B<--order-by> option. For details and +exceptions, please see below. + +The results will be formatted as a kind of table; you can change the +output format to a simple list or just a list of newsgroups and number of +postings with the B<--format> option. Captions will be added by means of +the B<--caption> option; all comments (and captions) can be supressed by +using B<--nocomments>. + +Last but not least you can redirect all output to a number of files, e.g. +one for each month, by submitting the B<--filetemplate> option, see below. +Captions and comments are automatically disabled in this case. =head2 Configuration @@ -313,37 +325,28 @@ which should be present in the same directory via Config::Auto. See doc/INSTALL for an overview of possible configuration options. -You can override configuration options via the B<-g> switch. +You can override some configuration options via the B<--groupsdb> option. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. -=item B<-m> I (month) - -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -or B<-a> is set. +=item B<-m>, B<--month> I -=item B<-p> I (period) +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). By using the keyword I instead, you can set no +processing period to process the whole database. -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if -B<-a> is set. - -=item B<-a> (all) - -Set no processing period (process whole database). Overrides B<-m> -and B<-p>. - -=item B<-n> I (newsgroups) +=item B<-n>, B<--newsgroups> I Limit processing to a certain set of newsgroups. I can be a single newsgroup name (de.alt.test), a newsgroup hierarchy @@ -352,107 +355,230 @@ example de.test:de.alt.test:de.newusers.* -=item B<-t> I (threshold) +=item B<-s>, B<--sums|--nosums> (sum per hierarchy level) -Only include newsgroups with more than I postings per -month. Can be inverted by the B<-i> switch so that only newsgroups -with less than I postings will be included. +Include "virtual" groups for every hierarchy level in output, for +example: -This setting will be ignored if B<-l> or B<-b> is set. + de.alt.ALL 10 + de.alt.test 5 + de.alt.admin 7 -=item B<-l> I (level) +See the B man page for details. -Only include newsgroups with more than I postings per -month, every month during the whole reporting period. Can be inverted -by the B<-i> switch so that only newsgroups with less than I -postings every single month will be included. Output will be ordered -by newsgroup name, followed by month. +=item B<--checkgroups> I -This setting will be ignored if B<-b> is set. Overrides B<-t> and -can't be used together with B<-q>, B<-d> or B<-f>. +Restrict output to those newgroups present in a file in checkgroups format +(one newgroup name per line; everything after the first whitespace on each +line is ignored). All other newsgroups will be removed from output. -=item B<-b> I (best of) +=item B<-r>, B<--report> I -Create a list of the I newsgroups with the most postings over the -whole reporting period. Can be inverted by the B<-i> switch so that a -list of the I newsgroups with the least postings over the whole -period is generated. Output will be ordered by sum of postings. +Choose the report type: I, I or I -Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d> -or B<-f>. Output format is set to I (see below). +By default, B will report the number of postings for each +newsgroup in each month. But it can also report the average number of +postings per group for all months or the total sum of postings per group +for all months. -=item B<-i> (invert) +For report types I and I, the B option has no +meaning and will be silently ignored (see below). -Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower -threshold or level or generate a "bottom list" instead of a top list. +=item B<-l>, B<--lower> I -=item B<-s> (sum per hierarchy level) +Set the lower boundary. See B<--boundary> below. -Include "virtual" groups for every hierarchy level in output, for -example: +=item B<-l>, B<--upper> I - de.alt.ALL 10 - de.alt.test 5 - de.alt.admin 7 +Set the upper boundary. See B<--boundary> below. -See the B man page for details. +=item B<-b>, B<--boundary> I + +Set the boundary type to one of I, I, I or +I. + +By default, all newsgroups with more postings per month than the upper +boundary and/or less postings per month than the lower boundary will be +excluded from further processing. For the default report that means each +month only newsgroups with a number of postings between the boundaries +will be displayed. For the other report types, newsgroups with a number of +postings exceeding the boundaries in all (!) months will not be +considered. + +For example, lets take a list of newsgroups like this: + + ----- 2012-01: + de.comp.datenbanken.misc 6 + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.misc 8 + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-03: + de.comp.datenbanken.misc 24 + de.comp.datenbanken.ms-access 83 + de.comp.datenbanken.mysql 36 + +With C, +you'll get the following result: -=item B<-o> I (output format) + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 124 -Set output format. Default is I, which will print a header for -each new month, followed by an alphabetical list of newsgroups, each -on a new line, followed by the number of postings in that month. -B will try to align newsgroup names and posting counts. -Usage of B<-b> will force this format; it cannot be used together with -B<-f>. +de.comp.datenbanken.misc has not been considered even though it has 38 +postings in total, because it has less than 25 postings in every single +month. If you want to list all newsgroups with more than 25 postings U, you'll have to set the boundary type to I, see below. -I format is used to create an easily parsable output consisting -of an alphabetical list of newsgroups, each on a new line, followed by -the number of postings in that month, without any alignment. This -default format can't be used with time periods of more than one month. -Usage of B<-f> will force this format. +A boundary type of I will show only those newsgroups - at all - +that satisfy the boundaries in each and every single month. With the above +list of newsgroups and +C, +you'll get this result: -I format is like I, but will print the month in front of -the newsgroup name. + ----- All months: + de.comp.datenbanken.ms-access 293 -I format can only be use with a group list (see B<-n>) of -exactly one newsgroup and is like I, but will output months, -followed by the number of postings. +de.comp.datenbanken.mysql has not been considered because it had less than +25 postings in 2012-02. -=item B<-c> (captions) +You can use that to get a list of newsgroups that have more (or less) then +x postings during the whole reporting period. -Add captions to output (reporting period, newsgroups list, threshold -and so on). +A boundary type of I will show only those newsgroups - at all -that +satisfy the boundaries on average. With the above list of newsgroups and +C, +you'll get this result: -This setting will be ignored if B<-f> is set. + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 -=item B<-q> (quantity of postings) +The average number of postings in the three groups is: -Sort by number of postings instead of by newsgroup names. + de.comp.datenbanken.misc 12.67 + de.comp.datenbanken.ms-access 97.67 + de.comp.datenbanken.mysql 48.33 -Cannot be used with B<-l> or B<-b>. +Last but not least, a boundary type of I will show only those +newsgroups - at all - that satisfy the boundaries with the total sum of +all postings during the reporting period. With the above list of +newsgroups and +C, +you'll finally get this result: -=item B<-d> (descending) + ----- All months: + de.comp.datenbanken.misc 38 + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 -Change sort order to descending. -Cannot be used with B<-l> or B<-b>. +=item B<-g>, B<--group-by> I -=item B<-f> I (output file) +By default, all results are grouped by month, sorted chronologically in +ascending order, like this: -Save output to file instead of dumping it to STDOUT. B -will create one file for each month, with filenames composed by -adding year and month to the I, for example -with B<-f> I: + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 - stats-2010-01 - stats-2010-02 +The results can be grouped by newsgroups instead via +B<--group-by> I: + + ----- de.comp.datenbanken.ms-access: + 2012-01 84 + 2012-02 126 + ----- de.comp.datenbanken.mysql: + 2012-01 88 + 2012-02 21 + +By appending I<-desc> to the group-by option parameter, you can reverse +the sort order - e.g. B<--group-by> I will give: + + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + +Average and sums reports (see above) will always be grouped by months; +this option will therefore be ignored. + +=item B<-o>, B<--order-by> I + +Within each group (a single month or single newsgroup, see above), the +report will be sorted by newsgroup names in ascending alphabetical order +by default. You can change the sort order to descending or sort by number +of postings instead. + +=item B<-f>, B<--format> I + +Select the output format, I being the default: + + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +I format looks like this: + + 2012-01 de.comp.datenbanken.ms-access 84 + 2012-01 de.comp.datenbanken.mysql 88 + 2012-02 de.comp.datenbanken.ms-access 126 + 2012-02 de.comp.datenbanken.mysql 21 + +And I format looks like this: + + # 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + # 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +You can remove the comments by using B<--nocomments>, see below. + +=item B<-c>, B<--captions|--nocaptions> + +Add captions to output, like this: + + ----- Report for 2012-01 to 2012-02 (number of postings for each month) + ----- Newsgroups: de.comp.datenbanken.* + ----- Threshold: 10 => x <= 20 (on average) + ----- Grouped by Newsgroups (ascending), sorted by number of postings descending + +False by default. + +=item B<--comments|--nocomments> + +Add comments (group headers) to I and I output. True by default. + +Use I<--nocomments> to suppress anything except newsgroup names/months and +numbers of postings. This is enforced when using B<--filetemplate>, see below. + +=item B<--filetemplate> I + +Save output to file(s) instead of dumping it to STDOUT. B will +create one file for each month (or each newsgroup, accordant to the +setting of B<--group-by>, see above), with filenames composed by adding +year and month (or newsgroup names) to the I, for +example with B<--filetemplate> I: + + stats-2012-01 + stats-2012-02 ... and so on -This setting will be ignored if B<-l> or B<-b> is set. Output format -is set to I (see above). +B<--nocomments> is enforced, see above. -=item B<-g> I (postings per group table) +=item B<--groupsdb> I Override I from F. @@ -460,36 +586,40 @@ Override I from F. =head1 INSTALLATION -See doc/INSTALL. +See L. =head1 EXAMPLES -Show number of postings per group for lasth month in I format: +Show number of postings per group for lasth month in I format: groupstats Show that report for January of 2010 and de.alt.* plus de.test, including display of hierarchy levels: - groupstats -m 2010-01 -n de.alt.*:de.test -s + groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums -Show that report for the year of 2010 in I format: +Only show newsgroups with 30 postings or less last month, ordered +by number of postings, descending, in I format: - groupstats -p 2010-01:2010-12 -o pretty + groupstats --upper 30 --order-by postings-desc -Only show newsgroups with less than 30 postings last month, ordered -by number of postings, descending, in I format: +Show the total of all postings for the year of 2010 for all groups that +had 30 postings or less in every single month in that year, ordered by +number of postings in descending order: + + groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc - groupstats -iqdt 30 -o pretty +The same for the average number of postings in the year of 2010: -Show top 10 for the first half-year of of 2010 in I format: + groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc - groupstats -p 2010-01:2010-06 -b 10 -o pretty +List number of postings per group for eacht month of 2010 and redirect +output to one file for each month, namend stats-2010-01 and so on, in +machine-readable form (without formatting): -Report all groups that had less than 30 postings every singele month -in the year of 2010 (I format is forced) + groupstats -m 2010-01:2010-12 -f dump --filetemplate stats - groupstats -p 2010-01:2010-12 -il 30 =head1 FILES @@ -505,7 +635,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -520,11 +650,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +l>doc/INSTALL> =item - @@ -540,7 +670,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.