5 # This script will get statistical data on newgroup usage
8 # It is part of the NewsStats package.
10 # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
12 # It can be redistributed and/or modified under the same terms under
13 # which Perl itself is published.
16 our $VERSION = "0.01";
18 push(@INC, dirname($0));
22 use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
26 ################################# Main program #################################
28 ### read commandline options
29 my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
31 ### read configuration
32 my %Conf = %{ReadConfig('newsstats.conf')};
34 ### override configuration via commandline options
36 $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37 &OverrideConfig(\%Conf,\%ConfOverride);
39 ### check for incompatible command line options
40 # you can't mix '-t', '-b' and '-l'
41 # -b/-l take preference over -t, and -b takes preference over -l
42 if ($Options{'b'} or $Options{'l'}) {
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
48 if ($Options{'b'} and $Options{'l'}) {
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
59 # default output type to 'dump'
60 $Options{'o'} = 'dump' if !$Options{'o'};
61 # fail if more than one newsgroup is combined with 'dumpgroup' type
62 die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63 # accept 'dumpgroup' only with -n
64 if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
68 # set output type to 'pretty' for -l
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
75 my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76 # reset to one month for 'dump' output type
77 if ($Options{'o'} eq 'dump' and $Options{'p'}) {
78 $StartMonth = $EndMonth;
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
83 my $DBHandle = InitDB(\%Conf,1);
86 # get list of newsgroups (-n)
87 my ($QueryPart,@GroupList);
88 my $Newsgroups = $Options{'n'};
90 # explode list of newsgroups for WHERE clause
91 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
93 # set to dummy value (always true)
98 if (defined($Options{'t'})) {
100 # -i: list groups below threshold
101 $QueryPart .= ' AND postings < ?';
103 # default: list groups above threshold
104 $QueryPart .= ' AND postings > ?';
106 # push threshold to GroupList to match number of binding vars for DBQuery->execute
107 push @GroupList,$Options{'t'};
110 # construct WHERE clause
111 # $QueryPart is "list of newsgroup" (or 1),
112 # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
113 # according to setting of -s
114 my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
116 # get lenght of longest newsgroup delivered by query for formatting purposes
118 my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
120 my ($OrderClause,$DBQuery);
121 # -b (best of / top list) defined?
122 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
123 # default: neither -b nor -l
124 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
125 # according to -q and -d
126 $OrderClause = 'newsgroup';
127 $OrderClause = 'postings' if $Options{'q'};
128 $OrderClause .= ' DESC' if $Options{'d'};
129 # prepare query: get number of postings per group from groups table for given months and newsgroups
130 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
131 } elsif ($Options{'b'}) {
132 # -b is set (then -l can't be!)
133 # set sorting order (-i)
135 $OrderClause = 'postings';
137 $OrderClause = 'postings DESC';
139 # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
140 push @GroupList,$Options{'b'};
141 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
142 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
144 # -l must be set now, as all other cases have been taken care of
145 # set sorting order (-i)
151 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
152 # FIXME -- together with the query (see below)
153 push @GroupList,$Options{'l'};
154 push @GroupList,$StartMonth,$EndMonth;
155 # prepare query: get number of postings per group from groups table for given months and
156 # FIXME -- this query is ... in dire need of impromevent
157 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
161 $DBQuery->execute($StartMonth,$EndMonth,@GroupList)
162 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
165 # print caption (-c) with time period if -m or -p is set
166 # FIXME - month or period should handled differently
167 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
168 # print caption (-c) with newsgroup list if -n is set
169 printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
170 # print caption (-c) with threshold if -t is set, taking -i in account
171 printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
172 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
173 # default: neither -b nor -l
174 &OutputData($Options{'o'},$DBQuery,$MaxLength);
175 } elsif ($Options{'b'}) {
176 # -b is set (then -l can't be!)
177 # we have to read in the query results ourselves, as they do not have standard layout
178 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
179 # we just assign "top x" or "bottom x" instead of a month for the caption
181 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
184 # -l must be set now, as all other cases have been taken care of
185 # we have to read in the query results ourselves, as they do not have standard layout
186 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
187 # we just switch $Newsgroups and $Month for output generation
189 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
194 $DBHandle->disconnect;
198 ################################ Documentation #################################
202 groupstats - create reports on newsgroup usage
206 B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
210 See doc/README: Perl 5.8.x itself and the following modules from CPAN:
226 This script create reports on newsgroup usage (number of postings per
227 group per month) taken from result tables created by
230 The time period to act on defaults to last month; you can assign
231 another month via the B<-m> switch or a time period via the B<-p>
232 switch; the latter takes preference.
234 B<groupstats> will process all newsgroups by default; you can limit
235 that to only some newsgroups by supplying a list of those groups via
236 B<-n> (see below). You can include hierarchy levels in the output by
237 adding the B<-s> switch (see below).
239 Furthermore you can set a threshold via B<-t> so that only newsgroups
240 with more postings per month will be included in the report. You can
241 invert that by the B<-i> switch so only newsgroups with less than
242 I<threshold> postings per month will be included.
244 You can sort the output by number of postings per month instead of the
245 default (alphabetical list of newsgroups) by using B<-q>; you can
246 reverse the sorting order (from highest to lowest or in reversed
247 alphabetical order) by using B<-d>.
249 Furthermore, you can create a list of newsgroups that had consistently
250 more (or less) than x postings per month during the whole report
251 period by using B<-l> (together with B<i> as needed).
253 Last but not least you can create a "best of" list of the top x
254 newsgroups via B<-b> (or a "worst of" list by adding B<i>).
256 By default, B<groupstats> will dump a very simple alphabetical list of
257 newsgroups, one per line, followed by the number of postings in that
258 month. This output format of course cannot sensibly be combined with
259 time periods, so you can set the output format by using B<-o> (see
260 below). Captions can be added by setting the B<-c> switch.
264 F<groupstats.pl> will read its configuration from F<newsstats.conf>
265 which should be present in the same directory via Config::Auto.
267 See doc/INSTALL for an overview of possible configuration options.
269 You can override configuration options via the B<-g> switch.
275 =item B<-V> (version)
277 Print out version and copyright information on B<yapfaq> and exit.
281 Print this man page and exit.
283 =item B<-m> I<YYYY-MM> (month)
285 Set processing period to a month in YYYY-MM format. Ignored if B<-p>
288 =item B<-p> I<YYYY-MM:YYYY-MM> (period)
290 Set processing period to a time period between two month, each in
291 YYYY-MM format, separated by a colon. Overrides B<-m>.
293 =item B<-n> I<newsgroup(s)> (newsgroups)
295 Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
296 be a single newsgroup name (de.alt.test), a newsgroup hierarchy
297 (de.alt.*) or a list of either of these, separated by colons, for
300 de.test:de.alt.test:de.newusers.*
302 =item B<-t> I<threshold> (threshold)
304 Only include newsgroups with more than I<threshold> postings per
305 month. Can be inverted by the B<-i> switch so that only newsgroups
306 with less than I<threshold> postings will be included.
308 This setting will be ignored if B<-l> or B<-b> is set.
310 =item B<-l> I<level> (level)
312 Only include newsgroups with more than I<level> postings per
313 month, every month during the whole reporting period. Can be inverted
314 by the B<-i> switch so that only newsgroups with less than I<level>
315 postings every single month will be included. Output will be ordered
316 by newsgroup name, followed by month.
318 This setting will be ignored if B<-b> is set. Overrides B<-t> and
319 can't be used together with B<-q> or B<-d>.
321 =item B<-b> I<n> (best of)
323 Create a list of the I<n> newsgroups with the most postings over the
324 whole reporting period. Can be inverted by the B<-i> switch so that a
325 list of the I<n> newsgroups with the least postings over the whole
326 period is generated. Output will be ordered by sum of postings.
328 Overrides B<-t> and B<-l> and can't be used together with B<-q> or
329 B<-d>. Output format is set to I<pretty> (see below).
333 Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
334 threshold or level or generate a "bottom list" instead of a top list.
336 =item B<-s> (sum per hierarchy level)
338 Include "virtual" groups for every hierarchy level in output, for
345 See the B<gatherstats> man page for details.
347 =item B<-o> I<output type> (output format)
349 Set output format. Default is I<dump>, consisting of an alphabetical
350 list of newsgroups, each on a new line, followed by the number of
351 postings in that month. This default format can't be used with time
352 periods of more than one month.
354 I<list> format is like I<dump>, but will print the month in front of
357 I<dumpgroup> format can only be use with a group list (see B<-n>) of
358 exactly one newsgroup and is like I<dump>, but will output months,
359 followed by the number of postings.
361 If you don't need easily parsable output, you'll mostly use I<pretty>
362 format, which will print a header for each new month and try to align
363 newsgroup names and posting counts. Usage of B<-b> will force this
366 =item B<-c> (captions)
368 Add captions to output (reporting period, newsgroups list, threshold).
370 =item B<-q> (quantity of postings)
372 Sort by number of postings instead of by newsgroup names.
374 Cannot be used with B<-l> or B<-b>.
376 =item B<-d> (descending)
378 Change sort order to descending.
380 Cannot be used with B<-l> or B<-b>.
382 =item B<-g> I<table> (postings per group table)
384 Override I<DBTableGrps> from F<newsstats.conf>.
394 Show number of postings per group for lasth month in I<dump> format:
398 Show that report for January of 2010 and de.alt.* plus de.test,
399 including display of hierarchy levels:
401 groupstats -m 2010-01 -n de.alt.*:de.test -s
403 Show that report for the year of 2010 in I<pretty> format:
405 groupstats -p 2010-01:2010-12 -o pretty
407 Only show newsgroups with less than 30 postings last month, ordered
408 by number of postings, descending, in I<pretty> format:
410 groupstats -iqdt 30 -o pretty
412 Show top 10 for the first half-year of of 2010 in I<pretty> format:
414 groupstats -p 2010-01:2010-06 -b 10 -o pretty
416 Report all groups that had less than 30 postings every singele month
417 in the year of 2010 (I<pretty> format is forced)
419 groupstats -p 2010-01:2010-12 -il 30
425 =item F<groupstats.pl>
429 =item F<NewsStats.pm>
431 Library functions for the NewsStats package.
433 =item F<newsstats.conf>
435 Runtime configuration file for B<yapfaq>.
441 Please report any bugs or feature requests to the author or use the
442 bug tracker at L<http://bugs.th-h.de/>!
462 This script is part of the B<NewsStats> package.
466 Thomas Hochstein <thh@inter.net>
468 =head1 COPYRIGHT AND LICENSE
470 Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
472 This program is free software; you may redistribute it and/or modify it
473 under the same terms as Perl itself.