groupstats.pl: Add '-a' option.
[usenet/newsstats.git] / groupstats.pl
... / ...
CommitLineData
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### init database
75my $DBHandle = InitDB(\%Conf,1);
76
77### get time period
78my ($StartMonth,$EndMonth);
79# if '-a' is set, set start/end month from database
80# FIXME - it doesn't make that much sense to get first/last month from database to query it
81# with a time period that equals no time period ...
82if ($Options{'a'}) {
83 undef($Options{'m'});
84 undef($Options{'p'});
85 my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
86 $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
87 ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array;
88} else {
89 ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
90};
91# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
92if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
93 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
94 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
95 $Options{'o'} = 'dumpgroup';
96 } else {
97 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
98 $Options{'o'} = 'pretty';
99 }
100};
101
102### create report
103# get list of newsgroups (-n)
104my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
105my $Newsgroups = $Options{'n'};
106if ($Newsgroups) {
107 # explode list of newsgroups for WHERE clause
108 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
109} else {
110 # set to dummy value (always true)
111 $QueryGroupList = 1;
112};
113
114# manage thresholds
115if (defined($Options{'t'})) {
116 if ($Options{'i'}) {
117 # -i: list groups below threshold
118 $QueryThreshold .= ' postings < ?';
119 } else {
120 # default: list groups above threshold
121 $QueryThreshold .= ' postings > ?';
122 };
123 # push threshold to Params
124 push @Params,$Options{'t'};
125} else {
126 # set to dummy value (always true)
127 $QueryThreshold = 1;
128}
129
130# construct WHERE clause
131# $QueryGroupList is "list of newsgroup" (or 1),
132# $QueryThreshold is threshold definition (or 1),
133# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
134# according to setting of -s
135my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
136
137# get length of longest newsgroup delivered by query for formatting purposes
138# FIXME
139my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
140
141my ($OrderClause,$DBQuery);
142# -b (best of / top list) defined?
143if (!defined($Options{'b'}) and !defined($Options{'l'})) {
144 # default: neither -b nor -l
145 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
146 # according to -q and -d
147 $OrderClause = 'newsgroup';
148 $OrderClause = 'postings' if $Options{'q'};
149 $OrderClause .= ' DESC' if $Options{'d'};
150 # prepare query: get number of postings per group from groups table for given months and newsgroups
151 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
152} elsif ($Options{'b'}) {
153 # -b is set (then -l can't be!)
154 # set sorting order (-i): top or flop list?
155 if ($Options{'i'}) {
156 $OrderClause = 'postings';
157 } else {
158 $OrderClause = 'postings DESC';
159 };
160 # set -b to 10 if < 1 (Top 10)
161 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
162 # push LIMIT to Params
163 push @Params,$Options{'b'};
164 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
165 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
166} else {
167 # -l must be set now, as all other cases have been taken care of
168 # which kind of level (-i): more than -l x or less than -l x?
169 my ($Level);
170 if ($Options{'i'}) {
171 $Level = '<';
172 } else {
173 $Level = '>';
174 };
175 # prepare and execute query: get list of newsgroups meeting level condition
176 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
177 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
178 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
179 # add newsgroups to a comma-seperated list ready for IN(...) query
180 my $GroupList;
181 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
182 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
183 $GroupList .= "'$Newsgroup'";
184 };
185 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
186};
187
188# execute query
189$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
190 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
191
192# output results
193# print caption (-c) with time period if -m or -p is set
194if ($Options{'c'}) {
195 if ($Options{'p'}) {
196 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth);
197 } elsif ($Options{'m'}) {
198 printf ("----- Report for %s\n",$StartMonth);
199 };
200};
201# print caption (-c) with newsgroup list if -n is set
202printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
203# print caption (-c) with threshold if -t is set, taking -i in account
204printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
205if (!defined($Options{'b'}) and !defined($Options{'l'})) {
206 # default: neither -b nor -l
207 &OutputData($Options{'o'},$DBQuery,$MaxLength);
208} elsif ($Options{'b'}) {
209 # -b is set (then -l can't be!)
210 # we have to read in the query results ourselves, as they do not have standard layout
211 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
212 # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
213 print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
214 };
215} else {
216 # -l must be set now, as all other cases have been taken care of
217 # print caption (-c) with level, taking -i in account
218 printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
219 # we have to read in the query results ourselves, as they do not have standard layout
220 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
221 # we just switch $Newsgroups and $Month for output generation
222 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
223 };
224};
225
226### close handles
227$DBHandle->disconnect;
228
229__END__
230
231################################ Documentation #################################
232
233=head1 NAME
234
235groupstats - create reports on newsgroup usage
236
237=head1 SYNOPSIS
238
239B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
240
241=head1 REQUIREMENTS
242
243See doc/README: Perl 5.8.x itself and the following modules from CPAN:
244
245=over 2
246
247=item -
248
249Config::Auto
250
251=item -
252
253DBI
254
255=back
256
257=head1 DESCRIPTION
258
259This script create reports on newsgroup usage (number of postings per
260group per month) taken from result tables created by
261F<gatherstats.pl>.
262
263The time period to act on defaults to last month; you can assign
264another month via the B<-m> switch or a time period via the B<-p>
265switch; the latter takes preference.
266
267B<groupstats> will process all newsgroups by default; you can limit
268that to only some newsgroups by supplying a list of those groups via
269B<-n> (see below). You can include hierarchy levels in the output by
270adding the B<-s> switch (see below).
271
272Furthermore you can set a threshold via B<-t> so that only newsgroups
273with more postings per month will be included in the report. You can
274invert that by the B<-i> switch so only newsgroups with less than
275I<threshold> postings per month will be included.
276
277You can sort the output by number of postings per month instead of the
278default (alphabetical list of newsgroups) by using B<-q>; you can
279reverse the sorting order (from highest to lowest or in reversed
280alphabetical order) by using B<-d>.
281
282Furthermore, you can create a list of newsgroups that had consistently
283more (or less) than x postings per month during the whole report
284period by using B<-l> (together with B<i> as needed).
285
286Last but not least you can create a "best of" list of the top x
287newsgroups via B<-b> (or a "worst of" list by adding B<i>).
288
289By default, B<groupstats> will dump a very simple alphabetical list of
290newsgroups, one per line, followed by the number of postings in that
291month. This output format of course cannot sensibly be combined with
292time periods, so you can set the output format by using B<-o> (see
293below). Captions can be added by setting the B<-c> switch.
294
295=head2 Configuration
296
297B<groupstats> will read its configuration from F<newsstats.conf>
298which should be present in the same directory via Config::Auto.
299
300See doc/INSTALL for an overview of possible configuration options.
301
302You can override configuration options via the B<-g> switch.
303
304=head1 OPTIONS
305
306=over 3
307
308=item B<-V> (version)
309
310Print out version and copyright information on B<yapfaq> and exit.
311
312=item B<-h> (help)
313
314Print this man page and exit.
315
316=item B<-m> I<YYYY-MM> (month)
317
318Set processing period to a month in YYYY-MM format. Ignored if B<-p>
319or B<-a> is set.
320
321=item B<-p> I<YYYY-MM:YYYY-MM> (period)
322
323Set processing period to a time period between two month, each in
324YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
325B<-a> is set.
326
327=item B<-a> (all)
328
329Set no processing period (process whole database). Overrides B<-m>
330and B<-p>.
331
332=item B<-n> I<newsgroup(s)> (newsgroups)
333
334Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
335be a single newsgroup name (de.alt.test), a newsgroup hierarchy
336(de.alt.*) or a list of either of these, separated by colons, for
337example
338
339 de.test:de.alt.test:de.newusers.*
340
341=item B<-t> I<threshold> (threshold)
342
343Only include newsgroups with more than I<threshold> postings per
344month. Can be inverted by the B<-i> switch so that only newsgroups
345with less than I<threshold> postings will be included.
346
347This setting will be ignored if B<-l> or B<-b> is set.
348
349=item B<-l> I<level> (level)
350
351Only include newsgroups with more than I<level> postings per
352month, every month during the whole reporting period. Can be inverted
353by the B<-i> switch so that only newsgroups with less than I<level>
354postings every single month will be included. Output will be ordered
355by newsgroup name, followed by month.
356
357This setting will be ignored if B<-b> is set. Overrides B<-t> and
358can't be used together with B<-q> or B<-d>.
359
360=item B<-b> I<n> (best of)
361
362Create a list of the I<n> newsgroups with the most postings over the
363whole reporting period. Can be inverted by the B<-i> switch so that a
364list of the I<n> newsgroups with the least postings over the whole
365period is generated. Output will be ordered by sum of postings.
366
367Overrides B<-t> and B<-l> and can't be used together with B<-q> or
368B<-d>. Output format is set to I<pretty> (see below).
369
370=item B<-i> (invert)
371
372Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
373threshold or level or generate a "bottom list" instead of a top list.
374
375=item B<-s> (sum per hierarchy level)
376
377Include "virtual" groups for every hierarchy level in output, for
378example:
379
380 de.alt.ALL 10
381 de.alt.test 5
382 de.alt.admin 7
383
384See the B<gatherstats> man page for details.
385
386=item B<-o> I<output type> (output format)
387
388Set output format. Default is I<dump>, consisting of an alphabetical
389list of newsgroups, each on a new line, followed by the number of
390postings in that month. This default format can't be used with time
391periods of more than one month.
392
393I<list> format is like I<dump>, but will print the month in front of
394the newsgroup name.
395
396I<dumpgroup> format can only be use with a group list (see B<-n>) of
397exactly one newsgroup and is like I<dump>, but will output months,
398followed by the number of postings.
399
400If you don't need easily parsable output, you'll mostly use I<pretty>
401format, which will print a header for each new month and try to align
402newsgroup names and posting counts. Usage of B<-b> will force this
403format.
404
405=item B<-c> (captions)
406
407Add captions to output (reporting period, newsgroups list, threshold).
408
409=item B<-q> (quantity of postings)
410
411Sort by number of postings instead of by newsgroup names.
412
413Cannot be used with B<-l> or B<-b>.
414
415=item B<-d> (descending)
416
417Change sort order to descending.
418
419Cannot be used with B<-l> or B<-b>.
420
421=item B<-g> I<table> (postings per group table)
422
423Override I<DBTableGrps> from F<newsstats.conf>.
424
425=back
426
427=head1 INSTALLATION
428
429See doc/INSTALL.
430
431=head1 EXAMPLES
432
433Show number of postings per group for lasth month in I<dump> format:
434
435 groupstats
436
437Show that report for January of 2010 and de.alt.* plus de.test,
438including display of hierarchy levels:
439
440 groupstats -m 2010-01 -n de.alt.*:de.test -s
441
442Show that report for the year of 2010 in I<pretty> format:
443
444 groupstats -p 2010-01:2010-12 -o pretty
445
446Only show newsgroups with less than 30 postings last month, ordered
447by number of postings, descending, in I<pretty> format:
448
449 groupstats -iqdt 30 -o pretty
450
451Show top 10 for the first half-year of of 2010 in I<pretty> format:
452
453 groupstats -p 2010-01:2010-06 -b 10 -o pretty
454
455Report all groups that had less than 30 postings every singele month
456in the year of 2010 (I<pretty> format is forced)
457
458 groupstats -p 2010-01:2010-12 -il 30
459
460=head1 FILES
461
462=over 4
463
464=item F<groupstats.pl>
465
466The script itself.
467
468=item F<NewsStats.pm>
469
470Library functions for the NewsStats package.
471
472=item F<newsstats.conf>
473
474Runtime configuration file for B<yapfaq>.
475
476=back
477
478=head1 BUGS
479
480Please report any bugs or feature requests to the author or use the
481bug tracker at L<http://bugs.th-h.de/>!
482
483=head1 SEE ALSO
484
485=over 2
486
487=item -
488
489doc/README
490
491=item -
492
493doc/INSTALL
494
495=item -
496
497gatherstats -h
498
499=back
500
501This script is part of the B<NewsStats> package.
502
503=head1 AUTHOR
504
505Thomas Hochstein <thh@inter.net>
506
507=head1 COPYRIGHT AND LICENSE
508
509Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
510
511This program is free software; you may redistribute it and/or modify it
512under the same terms as Perl itself.
513
514=cut
This page took 0.013112 seconds and 4 git commands to generate.