5 # This script will get statistical data on newgroup usage
8 # It is part of the NewsStats package.
10 # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
12 # It can be redistributed and/or modified under the same terms under
13 # which Perl itself is published.
16 our $VERSION = "0.01";
18 push(@INC, dirname($0));
22 use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
26 ################################# Main program #################################
28 ### read commandline options
29 my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
31 ### read configuration
32 my %Conf = %{ReadConfig('newsstats.conf')};
34 ### override configuration via commandline options
36 $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37 &OverrideConfig(\%Conf,\%ConfOverride);
39 ### check for incompatible command line options
40 # you can't mix '-t', '-b' and '-l'
41 # -b/-l take preference over -t, and -b takes preference over -l
42 if ($Options{'b'} or $Options{'l'}) {
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
48 if ($Options{'b'} and $Options{'l'}) {
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
59 # default output type to 'dump'
60 $Options{'o'} = 'dump' if !$Options{'o'};
61 # fail if more than one newsgroup is combined with 'dumpgroup' type
62 die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63 # accept 'dumpgroup' only with -n
64 if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
68 # set output type to 'pretty' for -l
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
75 my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76 # reset to one month for 'dump' output type
77 if ($Options{'o'} eq 'dump' and $Options{'p'}) {
78 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
79 $Options{'o'} = 'pretty';
83 my $DBHandle = InitDB(\%Conf,1);
86 # get list of newsgroups (-n)
87 my ($QueryPart,@GroupList);
88 my $Newsgroups = $Options{'n'};
90 # explode list of newsgroups for WHERE clause
91 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
93 # set to dummy value (always true)
98 if (defined($Options{'t'})) {
100 # -i: list groups below threshold
101 $QueryPart .= ' AND postings < ?';
103 # default: list groups above threshold
104 $QueryPart .= ' AND postings > ?';
106 # push threshold to GroupList to match number of binding vars for DBQuery->execute
107 push @GroupList,$Options{'t'};
110 # construct WHERE clause
111 # $QueryPart is "list of newsgroup" (or 1),
112 # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
113 # according to setting of -s
114 my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
116 # get lenght of longest newsgroup delivered by query for formatting purposes
118 my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
120 my ($OrderClause,$DBQuery);
121 # -b (best of / top list) defined?
122 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
123 # default: neither -b nor -l
124 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
125 # according to -q and -d
126 $OrderClause = 'newsgroup';
127 $OrderClause = 'postings' if $Options{'q'};
128 $OrderClause .= ' DESC' if $Options{'d'};
129 # prepare query: get number of postings per group from groups table for given months and newsgroups
130 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
131 } elsif ($Options{'b'}) {
132 # -b is set (then -l can't be!)
133 # set sorting order (-i)
135 $OrderClause = 'postings';
137 $OrderClause = 'postings DESC';
139 # set -b to 10 if < 1 (Top 10)
140 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
141 # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
142 push @GroupList,$Options{'b'};
143 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
144 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
146 # -l must be set now, as all other cases have been taken care of
147 # set sorting order (-i)
153 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
154 # FIXME -- together with the query (see below)
155 push @GroupList,$Options{'l'};
156 push @GroupList,$StartMonth,$EndMonth;
157 # prepare query: get number of postings per group from groups table for given months and
158 # FIXME -- this query is ... in dire need of impromevent
159 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
163 $DBQuery->execute($StartMonth,$EndMonth,@GroupList)
164 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
167 # print caption (-c) with time period if -m or -p is set
168 # FIXME - month or period should handled differently
169 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
170 # print caption (-c) with newsgroup list if -n is set
171 printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
172 # print caption (-c) with threshold if -t is set, taking -i in account
173 printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
174 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
175 # default: neither -b nor -l
176 &OutputData($Options{'o'},$DBQuery,$MaxLength);
177 } elsif ($Options{'b'}) {
178 # -b is set (then -l can't be!)
179 # we have to read in the query results ourselves, as they do not have standard layout
180 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
181 # we just assign "top x" or "bottom x" instead of a month for the caption
183 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
186 # -l must be set now, as all other cases have been taken care of
187 # we have to read in the query results ourselves, as they do not have standard layout
188 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
189 # we just switch $Newsgroups and $Month for output generation
191 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
196 $DBHandle->disconnect;
200 ################################ Documentation #################################
204 groupstats - create reports on newsgroup usage
208 B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
212 See doc/README: Perl 5.8.x itself and the following modules from CPAN:
228 This script create reports on newsgroup usage (number of postings per
229 group per month) taken from result tables created by
232 The time period to act on defaults to last month; you can assign
233 another month via the B<-m> switch or a time period via the B<-p>
234 switch; the latter takes preference.
236 B<groupstats> will process all newsgroups by default; you can limit
237 that to only some newsgroups by supplying a list of those groups via
238 B<-n> (see below). You can include hierarchy levels in the output by
239 adding the B<-s> switch (see below).
241 Furthermore you can set a threshold via B<-t> so that only newsgroups
242 with more postings per month will be included in the report. You can
243 invert that by the B<-i> switch so only newsgroups with less than
244 I<threshold> postings per month will be included.
246 You can sort the output by number of postings per month instead of the
247 default (alphabetical list of newsgroups) by using B<-q>; you can
248 reverse the sorting order (from highest to lowest or in reversed
249 alphabetical order) by using B<-d>.
251 Furthermore, you can create a list of newsgroups that had consistently
252 more (or less) than x postings per month during the whole report
253 period by using B<-l> (together with B<i> as needed).
255 Last but not least you can create a "best of" list of the top x
256 newsgroups via B<-b> (or a "worst of" list by adding B<i>).
258 By default, B<groupstats> will dump a very simple alphabetical list of
259 newsgroups, one per line, followed by the number of postings in that
260 month. This output format of course cannot sensibly be combined with
261 time periods, so you can set the output format by using B<-o> (see
262 below). Captions can be added by setting the B<-c> switch.
266 F<groupstats.pl> will read its configuration from F<newsstats.conf>
267 which should be present in the same directory via Config::Auto.
269 See doc/INSTALL for an overview of possible configuration options.
271 You can override configuration options via the B<-g> switch.
277 =item B<-V> (version)
279 Print out version and copyright information on B<yapfaq> and exit.
283 Print this man page and exit.
285 =item B<-m> I<YYYY-MM> (month)
287 Set processing period to a month in YYYY-MM format. Ignored if B<-p>
290 =item B<-p> I<YYYY-MM:YYYY-MM> (period)
292 Set processing period to a time period between two month, each in
293 YYYY-MM format, separated by a colon. Overrides B<-m>.
295 =item B<-n> I<newsgroup(s)> (newsgroups)
297 Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
298 be a single newsgroup name (de.alt.test), a newsgroup hierarchy
299 (de.alt.*) or a list of either of these, separated by colons, for
302 de.test:de.alt.test:de.newusers.*
304 =item B<-t> I<threshold> (threshold)
306 Only include newsgroups with more than I<threshold> postings per
307 month. Can be inverted by the B<-i> switch so that only newsgroups
308 with less than I<threshold> postings will be included.
310 This setting will be ignored if B<-l> or B<-b> is set.
312 =item B<-l> I<level> (level)
314 Only include newsgroups with more than I<level> postings per
315 month, every month during the whole reporting period. Can be inverted
316 by the B<-i> switch so that only newsgroups with less than I<level>
317 postings every single month will be included. Output will be ordered
318 by newsgroup name, followed by month.
320 This setting will be ignored if B<-b> is set. Overrides B<-t> and
321 can't be used together with B<-q> or B<-d>.
323 =item B<-b> I<n> (best of)
325 Create a list of the I<n> newsgroups with the most postings over the
326 whole reporting period. Can be inverted by the B<-i> switch so that a
327 list of the I<n> newsgroups with the least postings over the whole
328 period is generated. Output will be ordered by sum of postings.
330 Overrides B<-t> and B<-l> and can't be used together with B<-q> or
331 B<-d>. Output format is set to I<pretty> (see below).
335 Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
336 threshold or level or generate a "bottom list" instead of a top list.
338 =item B<-s> (sum per hierarchy level)
340 Include "virtual" groups for every hierarchy level in output, for
347 See the B<gatherstats> man page for details.
349 =item B<-o> I<output type> (output format)
351 Set output format. Default is I<dump>, consisting of an alphabetical
352 list of newsgroups, each on a new line, followed by the number of
353 postings in that month. This default format can't be used with time
354 periods of more than one month.
356 I<list> format is like I<dump>, but will print the month in front of
359 I<dumpgroup> format can only be use with a group list (see B<-n>) of
360 exactly one newsgroup and is like I<dump>, but will output months,
361 followed by the number of postings.
363 If you don't need easily parsable output, you'll mostly use I<pretty>
364 format, which will print a header for each new month and try to align
365 newsgroup names and posting counts. Usage of B<-b> will force this
368 =item B<-c> (captions)
370 Add captions to output (reporting period, newsgroups list, threshold).
372 =item B<-q> (quantity of postings)
374 Sort by number of postings instead of by newsgroup names.
376 Cannot be used with B<-l> or B<-b>.
378 =item B<-d> (descending)
380 Change sort order to descending.
382 Cannot be used with B<-l> or B<-b>.
384 =item B<-g> I<table> (postings per group table)
386 Override I<DBTableGrps> from F<newsstats.conf>.
396 Show number of postings per group for lasth month in I<dump> format:
400 Show that report for January of 2010 and de.alt.* plus de.test,
401 including display of hierarchy levels:
403 groupstats -m 2010-01 -n de.alt.*:de.test -s
405 Show that report for the year of 2010 in I<pretty> format:
407 groupstats -p 2010-01:2010-12 -o pretty
409 Only show newsgroups with less than 30 postings last month, ordered
410 by number of postings, descending, in I<pretty> format:
412 groupstats -iqdt 30 -o pretty
414 Show top 10 for the first half-year of of 2010 in I<pretty> format:
416 groupstats -p 2010-01:2010-06 -b 10 -o pretty
418 Report all groups that had less than 30 postings every singele month
419 in the year of 2010 (I<pretty> format is forced)
421 groupstats -p 2010-01:2010-12 -il 30
427 =item F<groupstats.pl>
431 =item F<NewsStats.pm>
433 Library functions for the NewsStats package.
435 =item F<newsstats.conf>
437 Runtime configuration file for B<yapfaq>.
443 Please report any bugs or feature requests to the author or use the
444 bug tracker at L<http://bugs.th-h.de/>!
464 This script is part of the B<NewsStats> package.
468 Thomas Hochstein <thh@inter.net>
470 =head1 COPYRIGHT AND LICENSE
472 Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
474 This program is free software; you may redistribute it and/or modify it
475 under the same terms as Perl itself.