################################# Main program #################################
### read commandline options
-my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
+my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:');
### read configuration
my %Conf = %{ReadConfig('newsstats.conf')};
### check for incompatible command line options
# you can't mix '-t', '-b' and '-l'
# -b/-l take preference over -t, and -b takes preference over -l
+# you can't use '-f' with '-b' or '-l'
if ($Options{'b'} or $Options{'l'}) {
+ if ($Options{'f'}) {
+ # drop -f
+ warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n");
+ undef($Options{'f'});
+ };
if ($Options{'t'}) {
# drop -t
warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
};
-### get time period
-my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
-# reset to one month for 'dump' output type
-if ($Options{'o'} eq 'dump' and $Options{'p'}) {
- $StartMonth = $EndMonth;
- warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
-};
-
### init database
my $DBHandle = InitDB(\%Conf,1);
+### get time period
+my ($StartMonth,$EndMonth);
+# if '-a' is set, set start/end month from database
+# FIXME - it doesn't make that much sense to get first/last month from database to query it
+# with a time period that equals no time period ...
+if ($Options{'a'}) {
+ undef($Options{'m'});
+ undef($Options{'p'});
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
+ $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
+ ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array;
+} else {
+ ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
+};
+# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
+if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
+ if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
+ # just one newsgroup is defined
+ warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
+ $Options{'o'} = 'dumpgroup';
+ } elsif (!defined($Options{'f'})) {
+ # more than one newsgroup - and no file output
+ warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
+ $Options{'o'} = 'pretty';
+ }
+};
+
### create report
# get list of newsgroups (-n)
-my ($QueryPart,@GroupList);
+my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
my $Newsgroups = $Options{'n'};
if ($Newsgroups) {
# explode list of newsgroups for WHERE clause
- ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
+ ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
} else {
# set to dummy value (always true)
- $QueryPart = 1;
+ $QueryGroupList = 1;
};
# manage thresholds
if (defined($Options{'t'})) {
if ($Options{'i'}) {
# -i: list groups below threshold
- $QueryPart .= ' AND postings < ?';
+ $QueryThreshold .= ' postings < ?';
} else {
# default: list groups above threshold
- $QueryPart .= ' AND postings > ?';
+ $QueryThreshold .= ' postings > ?';
};
- # push threshold to GroupList to match number of binding vars for DBQuery->execute
- push @GroupList,$Options{'t'};
+ # push threshold to Params
+ push @Params,$Options{'t'};
+} else {
+ # set to dummy value (always true)
+ $QueryThreshold = 1;
}
# construct WHERE clause
-# $QueryPart is "list of newsgroup" (or 1),
+# $QueryGroupList is "list of newsgroup" (or 1),
+# $QueryThreshold is threshold definition (or 1),
# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
# according to setting of -s
-my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
+my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
-# get lenght of longest newsgroup delivered by query for formatting purposes
+# get length of longest newsgroup delivered by query for formatting purposes
# FIXME
-my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
+my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
my ($OrderClause,$DBQuery);
# -b (best of / top list) defined?
$DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} elsif ($Options{'b'}) {
# -b is set (then -l can't be!)
- # set sorting order (-i)
+ # set sorting order (-i): top or flop list?
if ($Options{'i'}) {
$OrderClause = 'postings';
} else {
$OrderClause = 'postings DESC';
};
- # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
- push @GroupList,$Options{'b'};
+ # set -b to 10 if < 1 (Top 10)
+ $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
+ # push LIMIT to Params
+ push @Params,$Options{'b'};
# prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
$DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
} else {
# -l must be set now, as all other cases have been taken care of
- # set sorting order (-i)
+ # which kind of level (-i): more than -l x or less than -l x?
+ my ($Level);
if ($Options{'i'}) {
- $OrderClause = '<';
+ $Level = '<';
} else {
- $OrderClause = '>';
+ $Level = '>';
};
- # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
- # FIXME -- together with the query (see below)
- push @GroupList,$Options{'l'};
- push @GroupList,$StartMonth,$EndMonth;
- # prepare query: get number of postings per group from groups table for given months and
- # FIXME -- this query is ... in dire need of impromevent
- $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
+ # prepare and execute query: get list of newsgroups meeting level condition
+ $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
+ $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
+ or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
+ # add newsgroups to a comma-seperated list ready for IN(...) query
+ my $GroupList;
+ while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
+ $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
+ $GroupList .= "'$Newsgroup'";
+ };
+ $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
};
# execute query
-$DBQuery->execute($StartMonth,$EndMonth,@GroupList)
+$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
# output results
+# reset caption (-c) if -f is set
+undef($Options{'c'}) if $Options{'f'};
# print caption (-c) with time period if -m or -p is set
-# FIXME - month or period should handled differently
-printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
+if ($Options{'c'}) {
+ if ($Options{'p'}) {
+ printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth);
+ } elsif ($Options{'m'}) {
+ printf ("----- Report for %s\n",$StartMonth);
+ };
+};
# print caption (-c) with newsgroup list if -n is set
printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
# print caption (-c) with threshold if -t is set, taking -i in account
printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
if (!defined($Options{'b'}) and !defined($Options{'l'})) {
# default: neither -b nor -l
- &OutputData($Options{'o'},$DBQuery,$MaxLength);
+ &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength);
} elsif ($Options{'b'}) {
# -b is set (then -l can't be!)
# we have to read in the query results ourselves, as they do not have standard layout
while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
- # we just assign "top x" or "bottom x" instead of a month for the caption
- # FIXME
- print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
+ # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
+ print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
};
} else {
# -l must be set now, as all other cases have been taken care of
+ # print caption (-c) with level, taking -i in account
+ printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
# we have to read in the query results ourselves, as they do not have standard layout
while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
# we just switch $Newsgroups and $Month for output generation
- # FIXME
print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
};
};
=head1 SYNOPSIS
-B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
+B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-f> I<filename template>] [B<-g> I<database table>]
=head1 REQUIREMENTS
=head2 Configuration
-F<groupstats.pl> will read its configuration from F<newsstats.conf>
+B<groupstats> will read its configuration from F<newsstats.conf>
which should be present in the same directory via Config::Auto.
See doc/INSTALL for an overview of possible configuration options.
=item B<-m> I<YYYY-MM> (month)
Set processing period to a month in YYYY-MM format. Ignored if B<-p>
-is set.
+or B<-a> is set.
=item B<-p> I<YYYY-MM:YYYY-MM> (period)
Set processing period to a time period between two month, each in
-YYYY-MM format, separated by a colon. Overrides B<-m>.
+YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
+B<-a> is set.
+
+=item B<-a> (all)
+
+Set no processing period (process whole database). Overrides B<-m>
+and B<-p>.
=item B<-n> I<newsgroup(s)> (newsgroups)
by newsgroup name, followed by month.
This setting will be ignored if B<-b> is set. Overrides B<-t> and
-can't be used together with B<-q> or B<-d>.
+can't be used together with B<-q>, B<-d> or B<-f>.
=item B<-b> I<n> (best of)
list of the I<n> newsgroups with the least postings over the whole
period is generated. Output will be ordered by sum of postings.
-Overrides B<-t> and B<-l> and can't be used together with B<-q> or
-B<-d>. Output format is set to I<pretty> (see below).
+Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d>
+or B<-f>. Output format is set to I<pretty> (see below).
=item B<-i> (invert)
=item B<-c> (captions)
-Add captions to output (reporting period, newsgroups list, threshold).
+Add captions to output (reporting period, newsgroups list, threshold
+and so on).
+
+This setting will be ignored if B<-f> is set.
=item B<-q> (quantity of postings)
Cannot be used with B<-l> or B<-b>.
+=item B<-f> I<filename template> (output file)
+
+Save output to file instead of dumping it to STDOUT. B<groupstats>
+will create one file for each month, with filenames composed by
+adding year and month to the I<filename template>, for example
+with B<-f> I<stats>:
+
+ stats-2010-01
+ stats-2010-02
+ ... and so on
+
+This setting will be ignored if B<-l> or B<-b> is set. Output format
+is set to I<dump> (see above).
+
=item B<-g> I<table> (postings per group table)
Override I<DBTableGrps> from F<newsstats.conf>.