groupstats.pl: Add '-f' option.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
78389b28 29my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:');
2832c235
TH
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
78389b28 42# you can't use '-f' with '-b' or '-l'
2832c235 43if ($Options{'b'} or $Options{'l'}) {
78389b28
TH
44 if ($Options{'f'}) {
45 # drop -f
46 warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n");
47 undef($Options{'f'});
48 };
2832c235
TH
49 if ($Options{'t'}) {
50 # drop -t
51 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
52 undef($Options{'t'});
53 };
54 if ($Options{'b'} and $Options{'l'}) {
55 # drop -l
56 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
57 undef($Options{'l'});
58 };
59 # -q/-d don't work with -b or -l
60 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
61 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
62};
63
64### check output type
65# default output type to 'dump'
66$Options{'o'} = 'dump' if !$Options{'o'};
67# fail if more than one newsgroup is combined with 'dumpgroup' type
68die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
69# accept 'dumpgroup' only with -n
70if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
71 $Options{'o'} = 'dump';
72 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
73};
74# set output type to 'pretty' for -l
75if ($Options{'l'}) {
76 $Options{'o'} = 'pretty';
77 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
78};
79
f2ddfd8a
TH
80### init database
81my $DBHandle = InitDB(\%Conf,1);
82
2832c235 83### get time period
f2ddfd8a
TH
84my ($StartMonth,$EndMonth);
85# if '-a' is set, set start/end month from database
86# FIXME - it doesn't make that much sense to get first/last month from database to query it
87# with a time period that equals no time period ...
88if ($Options{'a'}) {
89 undef($Options{'m'});
90 undef($Options{'p'});
91 my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
92 $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
93 ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array;
94} else {
95 ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
96};
e742bcf5 97# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
f2ddfd8a 98if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
e742bcf5 99 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
78389b28
TH
100 # just one newsgroup is defined
101 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
102 $Options{'o'} = 'dumpgroup';
103 } elsif (!defined($Options{'f'})) {
104 # more than one newsgroup - and no file output
105 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
106 $Options{'o'} = 'pretty';
e742bcf5 107 }
2832c235
TH
108};
109
2832c235
TH
110### create report
111# get list of newsgroups (-n)
6b95accb 112my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
2832c235
TH
113my $Newsgroups = $Options{'n'};
114if ($Newsgroups) {
115 # explode list of newsgroups for WHERE clause
6b95accb 116 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
2832c235
TH
117} else {
118 # set to dummy value (always true)
6b95accb 119 $QueryGroupList = 1;
2832c235
TH
120};
121
122# manage thresholds
123if (defined($Options{'t'})) {
124 if ($Options{'i'}) {
125 # -i: list groups below threshold
6b95accb 126 $QueryThreshold .= ' postings < ?';
2832c235
TH
127 } else {
128 # default: list groups above threshold
6b95accb 129 $QueryThreshold .= ' postings > ?';
2832c235 130 };
6b95accb
TH
131 # push threshold to Params
132 push @Params,$Options{'t'};
133} else {
134 # set to dummy value (always true)
135 $QueryThreshold = 1;
2832c235
TH
136}
137
138# construct WHERE clause
6b95accb
TH
139# $QueryGroupList is "list of newsgroup" (or 1),
140# $QueryThreshold is threshold definition (or 1),
2832c235
TH
141# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
142# according to setting of -s
6b95accb 143my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
2832c235 144
404c1acd 145# get length of longest newsgroup delivered by query for formatting purposes
2832c235 146# FIXME
6b95accb 147my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
2832c235
TH
148
149my ($OrderClause,$DBQuery);
150# -b (best of / top list) defined?
151if (!defined($Options{'b'}) and !defined($Options{'l'})) {
152 # default: neither -b nor -l
153 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
154 # according to -q and -d
155 $OrderClause = 'newsgroup';
156 $OrderClause = 'postings' if $Options{'q'};
157 $OrderClause .= ' DESC' if $Options{'d'};
158 # prepare query: get number of postings per group from groups table for given months and newsgroups
159 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
160} elsif ($Options{'b'}) {
161 # -b is set (then -l can't be!)
404c1acd 162 # set sorting order (-i): top or flop list?
2832c235
TH
163 if ($Options{'i'}) {
164 $OrderClause = 'postings';
165 } else {
166 $OrderClause = 'postings DESC';
167 };
d8695b1c
TH
168 # set -b to 10 if < 1 (Top 10)
169 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
6b95accb
TH
170 # push LIMIT to Params
171 push @Params,$Options{'b'};
2832c235
TH
172 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
173 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
174} else {
175 # -l must be set now, as all other cases have been taken care of
404c1acd 176 # which kind of level (-i): more than -l x or less than -l x?
6b95accb 177 my ($Level);
2832c235 178 if ($Options{'i'}) {
6b95accb 179 $Level = '<';
2832c235 180 } else {
6b95accb 181 $Level = '>';
2832c235 182 };
b802bc3d
TH
183 # prepare and execute query: get list of newsgroups meeting level condition
184 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
185 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
186 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
187 # add newsgroups to a comma-seperated list ready for IN(...) query
188 my $GroupList;
189 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
190 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
191 $GroupList .= "'$Newsgroup'";
192 };
193 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
2832c235
TH
194};
195
196# execute query
6b95accb 197$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
2832c235
TH
198 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
199
200# output results
78389b28
TH
201# reset caption (-c) if -f is set
202undef($Options{'c'}) if $Options{'f'};
2832c235 203# print caption (-c) with time period if -m or -p is set
b2e0fd24
TH
204if ($Options{'c'}) {
205 if ($Options{'p'}) {
206 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth);
207 } elsif ($Options{'m'}) {
208 printf ("----- Report for %s\n",$StartMonth);
209 };
210};
2832c235
TH
211# print caption (-c) with newsgroup list if -n is set
212printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
213# print caption (-c) with threshold if -t is set, taking -i in account
214printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
215if (!defined($Options{'b'}) and !defined($Options{'l'})) {
216 # default: neither -b nor -l
78389b28 217 &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength);
2832c235
TH
218} elsif ($Options{'b'}) {
219 # -b is set (then -l can't be!)
220 # we have to read in the query results ourselves, as they do not have standard layout
221 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
b2e0fd24
TH
222 # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
223 print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
2832c235
TH
224 };
225} else {
226 # -l must be set now, as all other cases have been taken care of
b2e0fd24
TH
227 # print caption (-c) with level, taking -i in account
228 printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
2832c235
TH
229 # we have to read in the query results ourselves, as they do not have standard layout
230 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
231 # we just switch $Newsgroups and $Month for output generation
2832c235
TH
232 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
233 };
234};
235
236### close handles
237$DBHandle->disconnect;
238
239__END__
240
241################################ Documentation #################################
242
243=head1 NAME
244
245groupstats - create reports on newsgroup usage
246
247=head1 SYNOPSIS
248
78389b28 249B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-f> I<filename template>] [B<-g> I<database table>]
2832c235
TH
250
251=head1 REQUIREMENTS
252
253See doc/README: Perl 5.8.x itself and the following modules from CPAN:
254
255=over 2
256
257=item -
258
259Config::Auto
260
261=item -
262
263DBI
264
265=back
266
267=head1 DESCRIPTION
268
269This script create reports on newsgroup usage (number of postings per
270group per month) taken from result tables created by
271F<gatherstats.pl>.
272
273The time period to act on defaults to last month; you can assign
274another month via the B<-m> switch or a time period via the B<-p>
275switch; the latter takes preference.
276
277B<groupstats> will process all newsgroups by default; you can limit
278that to only some newsgroups by supplying a list of those groups via
279B<-n> (see below). You can include hierarchy levels in the output by
280adding the B<-s> switch (see below).
281
282Furthermore you can set a threshold via B<-t> so that only newsgroups
283with more postings per month will be included in the report. You can
284invert that by the B<-i> switch so only newsgroups with less than
285I<threshold> postings per month will be included.
286
287You can sort the output by number of postings per month instead of the
288default (alphabetical list of newsgroups) by using B<-q>; you can
289reverse the sorting order (from highest to lowest or in reversed
290alphabetical order) by using B<-d>.
291
292Furthermore, you can create a list of newsgroups that had consistently
293more (or less) than x postings per month during the whole report
294period by using B<-l> (together with B<i> as needed).
295
296Last but not least you can create a "best of" list of the top x
297newsgroups via B<-b> (or a "worst of" list by adding B<i>).
298
299By default, B<groupstats> will dump a very simple alphabetical list of
300newsgroups, one per line, followed by the number of postings in that
301month. This output format of course cannot sensibly be combined with
302time periods, so you can set the output format by using B<-o> (see
303below). Captions can be added by setting the B<-c> switch.
304
305=head2 Configuration
306
f2ddfd8a 307B<groupstats> will read its configuration from F<newsstats.conf>
2832c235
TH
308which should be present in the same directory via Config::Auto.
309
310See doc/INSTALL for an overview of possible configuration options.
311
312You can override configuration options via the B<-g> switch.
313
314=head1 OPTIONS
315
316=over 3
317
318=item B<-V> (version)
319
320Print out version and copyright information on B<yapfaq> and exit.
321
322=item B<-h> (help)
323
324Print this man page and exit.
325
326=item B<-m> I<YYYY-MM> (month)
327
328Set processing period to a month in YYYY-MM format. Ignored if B<-p>
f2ddfd8a 329or B<-a> is set.
2832c235
TH
330
331=item B<-p> I<YYYY-MM:YYYY-MM> (period)
332
333Set processing period to a time period between two month, each in
f2ddfd8a
TH
334YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
335B<-a> is set.
336
337=item B<-a> (all)
338
339Set no processing period (process whole database). Overrides B<-m>
340and B<-p>.
2832c235
TH
341
342=item B<-n> I<newsgroup(s)> (newsgroups)
343
344Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
345be a single newsgroup name (de.alt.test), a newsgroup hierarchy
346(de.alt.*) or a list of either of these, separated by colons, for
347example
348
349 de.test:de.alt.test:de.newusers.*
350
351=item B<-t> I<threshold> (threshold)
352
353Only include newsgroups with more than I<threshold> postings per
354month. Can be inverted by the B<-i> switch so that only newsgroups
355with less than I<threshold> postings will be included.
356
357This setting will be ignored if B<-l> or B<-b> is set.
358
359=item B<-l> I<level> (level)
360
361Only include newsgroups with more than I<level> postings per
362month, every month during the whole reporting period. Can be inverted
363by the B<-i> switch so that only newsgroups with less than I<level>
364postings every single month will be included. Output will be ordered
365by newsgroup name, followed by month.
366
367This setting will be ignored if B<-b> is set. Overrides B<-t> and
78389b28 368can't be used together with B<-q>, B<-d> or B<-f>.
2832c235
TH
369
370=item B<-b> I<n> (best of)
371
372Create a list of the I<n> newsgroups with the most postings over the
373whole reporting period. Can be inverted by the B<-i> switch so that a
374list of the I<n> newsgroups with the least postings over the whole
375period is generated. Output will be ordered by sum of postings.
376
78389b28
TH
377Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d>
378or B<-f>. Output format is set to I<pretty> (see below).
2832c235
TH
379
380=item B<-i> (invert)
381
382Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
383threshold or level or generate a "bottom list" instead of a top list.
384
385=item B<-s> (sum per hierarchy level)
386
387Include "virtual" groups for every hierarchy level in output, for
388example:
389
390 de.alt.ALL 10
391 de.alt.test 5
392 de.alt.admin 7
393
394See the B<gatherstats> man page for details.
395
396=item B<-o> I<output type> (output format)
397
398Set output format. Default is I<dump>, consisting of an alphabetical
399list of newsgroups, each on a new line, followed by the number of
400postings in that month. This default format can't be used with time
401periods of more than one month.
402
403I<list> format is like I<dump>, but will print the month in front of
404the newsgroup name.
405
406I<dumpgroup> format can only be use with a group list (see B<-n>) of
407exactly one newsgroup and is like I<dump>, but will output months,
408followed by the number of postings.
409
410If you don't need easily parsable output, you'll mostly use I<pretty>
411format, which will print a header for each new month and try to align
412newsgroup names and posting counts. Usage of B<-b> will force this
413format.
414
415=item B<-c> (captions)
416
78389b28
TH
417Add captions to output (reporting period, newsgroups list, threshold
418and so on).
419
420This setting will be ignored if B<-f> is set.
2832c235
TH
421
422=item B<-q> (quantity of postings)
423
424Sort by number of postings instead of by newsgroup names.
425
426Cannot be used with B<-l> or B<-b>.
427
428=item B<-d> (descending)
429
430Change sort order to descending.
431
432Cannot be used with B<-l> or B<-b>.
433
78389b28
TH
434=item B<-f> I<filename template> (output file)
435
436Save output to file instead of dumping it to STDOUT. B<groupstats>
437will create one file for each month, with filenames composed by
438adding year and month to the I<filename template>, for example
439with B<-f> I<stats>:
440
441 stats-2010-01
442 stats-2010-02
443 ... and so on
444
445This setting will be ignored if B<-l> or B<-b> is set. Output format
446is set to I<dump> (see above).
447
2832c235
TH
448=item B<-g> I<table> (postings per group table)
449
450Override I<DBTableGrps> from F<newsstats.conf>.
451
452=back
453
454=head1 INSTALLATION
455
456See doc/INSTALL.
457
458=head1 EXAMPLES
459
460Show number of postings per group for lasth month in I<dump> format:
461
462 groupstats
463
464Show that report for January of 2010 and de.alt.* plus de.test,
465including display of hierarchy levels:
466
467 groupstats -m 2010-01 -n de.alt.*:de.test -s
468
469Show that report for the year of 2010 in I<pretty> format:
470
471 groupstats -p 2010-01:2010-12 -o pretty
472
473Only show newsgroups with less than 30 postings last month, ordered
474by number of postings, descending, in I<pretty> format:
475
476 groupstats -iqdt 30 -o pretty
477
478Show top 10 for the first half-year of of 2010 in I<pretty> format:
479
480 groupstats -p 2010-01:2010-06 -b 10 -o pretty
481
482Report all groups that had less than 30 postings every singele month
483in the year of 2010 (I<pretty> format is forced)
484
485 groupstats -p 2010-01:2010-12 -il 30
486
487=head1 FILES
488
489=over 4
490
491=item F<groupstats.pl>
492
493The script itself.
494
495=item F<NewsStats.pm>
496
497Library functions for the NewsStats package.
498
499=item F<newsstats.conf>
500
501Runtime configuration file for B<yapfaq>.
502
503=back
504
505=head1 BUGS
506
507Please report any bugs or feature requests to the author or use the
508bug tracker at L<http://bugs.th-h.de/>!
509
510=head1 SEE ALSO
511
512=over 2
513
514=item -
515
516doc/README
517
518=item -
519
520doc/INSTALL
521
522=item -
523
524gatherstats -h
525
526=back
527
528This script is part of the B<NewsStats> package.
529
530=head1 AUTHOR
531
532Thomas Hochstein <thh@inter.net>
533
534=head1 COPYRIGHT AND LICENSE
535
536Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
537
538This program is free software; you may redistribute it and/or modify it
539under the same terms as Perl itself.
540
541=cut
This page took 0.037877 seconds and 4 git commands to generate.