Merge branch 'tools' into pu
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
d3b6810d 6# from a database.
2832c235
TH
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
78389b28 29my %Options = &ReadOptions('m:p:an:o:t:l:b:iscqdf:g:');
2832c235
TH
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
78389b28 42# you can't use '-f' with '-b' or '-l'
2832c235 43if ($Options{'b'} or $Options{'l'}) {
78389b28
TH
44 if ($Options{'f'}) {
45 # drop -f
46 warn ("$MySelf: W: You cannot save the report to monthly files when using top lists (-b) or levels (-l). Filename template '-f $Options{'f'}' was ignored.\n");
47 undef($Options{'f'});
48 };
2832c235
TH
49 if ($Options{'t'}) {
50 # drop -t
51 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
52 undef($Options{'t'});
53 };
54 if ($Options{'b'} and $Options{'l'}) {
55 # drop -l
56 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
57 undef($Options{'l'});
58 };
59 # -q/-d don't work with -b or -l
60 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
61 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
62};
63
64### check output type
628a183c
TH
65# default output type to 'pretty'
66$Options{'o'} = 'pretty' if !$Options{'o'};
2832c235
TH
67# fail if more than one newsgroup is combined with 'dumpgroup' type
68die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
69# accept 'dumpgroup' only with -n
70if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
71 $Options{'o'} = 'dump';
72 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
73};
74# set output type to 'pretty' for -l
628a183c 75if ($Options{'l'} and $Options{'o'} ne 'pretty') {
2832c235
TH
76 $Options{'o'} = 'pretty';
77 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
78};
628a183c
TH
79# set output type to 'dump' for -f
80if ($Options{'f'} and $Options{'o'} ne 'dump') {
81 $Options{'o'} = 'dump';
82 warn ("$MySelf: W: Output type forced to '-o dump' due to usage of '-f'.\n");
83};
2832c235 84
f2ddfd8a
TH
85### init database
86my $DBHandle = InitDB(\%Conf,1);
87
2832c235 88### get time period
f2ddfd8a
TH
89my ($StartMonth,$EndMonth);
90# if '-a' is set, set start/end month from database
91# FIXME - it doesn't make that much sense to get first/last month from database to query it
92# with a time period that equals no time period ...
93if ($Options{'a'}) {
94 undef($Options{'m'});
95 undef($Options{'p'});
96 my $DBQuery = $DBHandle->prepare(sprintf("SELECT MIN(month),MAX(month) FROM %s.%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
97 $DBQuery->execute or die sprintf("$MySelf: E: Can't get MIN/MAX month from %s.%s: %s\n",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
98 ($StartMonth,$EndMonth) = $DBQuery->fetchrow_array;
99} else {
100 ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
101};
54d04e84
TH
102# if -p or -a are set: drop -m
103undef $Options{'m'} if ($Options{'p'} or $Options{'a'});
628a183c 104# if time period is more than one month: force output type to '-o pretty' or '-o dumpgroup'
f2ddfd8a 105if ($Options{'o'} eq 'dump' and ($Options{'p'} or $Options{'a'})) {
e742bcf5 106 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
78389b28
TH
107 # just one newsgroup is defined
108 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
109 $Options{'o'} = 'dumpgroup';
110 } elsif (!defined($Options{'f'})) {
111 # more than one newsgroup - and no file output
112 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
113 $Options{'o'} = 'pretty';
e742bcf5 114 }
2832c235
TH
115};
116
2832c235
TH
117### create report
118# get list of newsgroups (-n)
6b95accb 119my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
2832c235
TH
120my $Newsgroups = $Options{'n'};
121if ($Newsgroups) {
122 # explode list of newsgroups for WHERE clause
6b95accb 123 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
2832c235
TH
124} else {
125 # set to dummy value (always true)
6b95accb 126 $QueryGroupList = 1;
2832c235
TH
127};
128
129# manage thresholds
130if (defined($Options{'t'})) {
131 if ($Options{'i'}) {
132 # -i: list groups below threshold
6b95accb 133 $QueryThreshold .= ' postings < ?';
2832c235
TH
134 } else {
135 # default: list groups above threshold
6b95accb 136 $QueryThreshold .= ' postings > ?';
2832c235 137 };
6b95accb
TH
138 # push threshold to Params
139 push @Params,$Options{'t'};
140} else {
141 # set to dummy value (always true)
142 $QueryThreshold = 1;
2832c235
TH
143}
144
145# construct WHERE clause
6b95accb
TH
146# $QueryGroupList is "list of newsgroup" (or 1),
147# $QueryThreshold is threshold definition (or 1),
2832c235
TH
148# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
149# according to setting of -s
6b95accb 150my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
2832c235 151
404c1acd 152# get length of longest newsgroup delivered by query for formatting purposes
2832c235 153# FIXME
d3b6810d 154my $MaxLength = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
2832c235
TH
155
156my ($OrderClause,$DBQuery);
157# -b (best of / top list) defined?
158if (!defined($Options{'b'}) and !defined($Options{'l'})) {
159 # default: neither -b nor -l
160 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
161 # according to -q and -d
162 $OrderClause = 'newsgroup';
163 $OrderClause = 'postings' if $Options{'q'};
164 $OrderClause .= ' DESC' if $Options{'d'};
165 # prepare query: get number of postings per group from groups table for given months and newsgroups
166 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
167} elsif ($Options{'b'}) {
168 # -b is set (then -l can't be!)
404c1acd 169 # set sorting order (-i): top or flop list?
2832c235
TH
170 if ($Options{'i'}) {
171 $OrderClause = 'postings';
172 } else {
173 $OrderClause = 'postings DESC';
174 };
d8695b1c
TH
175 # set -b to 10 if < 1 (Top 10)
176 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
6b95accb
TH
177 # push LIMIT to Params
178 push @Params,$Options{'b'};
2832c235
TH
179 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
180 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
181} else {
182 # -l must be set now, as all other cases have been taken care of
404c1acd 183 # which kind of level (-i): more than -l x or less than -l x?
6b95accb 184 my ($Level);
2832c235 185 if ($Options{'i'}) {
6b95accb 186 $Level = '<';
2832c235 187 } else {
6b95accb 188 $Level = '>';
2832c235 189 };
b802bc3d
TH
190 # prepare and execute query: get list of newsgroups meeting level condition
191 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
192 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
193 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
194 # add newsgroups to a comma-seperated list ready for IN(...) query
195 my $GroupList;
196 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
197 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
198 $GroupList .= "'$Newsgroup'";
199 };
200 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
2832c235
TH
201};
202
203# execute query
6b95accb 204$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
2832c235
TH
205 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
206
207# output results
78389b28
TH
208# reset caption (-c) if -f is set
209undef($Options{'c'}) if $Options{'f'};
2832c235 210# print caption (-c) with time period if -m or -p is set
b2e0fd24 211if ($Options{'c'}) {
54d04e84 212 if ($Options{'m'}) {
b2e0fd24 213 printf ("----- Report for %s\n",$StartMonth);
54d04e84
TH
214 } else {
215 printf ("----- Report from %s to %s %s\n",$StartMonth,$EndMonth,$Options{'a'} ? '(all months)' : '');
b2e0fd24
TH
216 };
217};
2832c235
TH
218# print caption (-c) with newsgroup list if -n is set
219printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
220# print caption (-c) with threshold if -t is set, taking -i in account
221printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
222if (!defined($Options{'b'}) and !defined($Options{'l'})) {
223 # default: neither -b nor -l
78389b28 224 &OutputData($Options{'o'},$Options{'f'},$DBQuery,$MaxLength);
2832c235
TH
225} elsif ($Options{'b'}) {
226 # -b is set (then -l can't be!)
227 # we have to read in the query results ourselves, as they do not have standard layout
228 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
b2e0fd24
TH
229 # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
230 print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
2832c235
TH
231 };
232} else {
233 # -l must be set now, as all other cases have been taken care of
b2e0fd24
TH
234 # print caption (-c) with level, taking -i in account
235 printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
2832c235
TH
236 # we have to read in the query results ourselves, as they do not have standard layout
237 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
238 # we just switch $Newsgroups and $Month for output generation
2832c235
TH
239 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
240 };
241};
242
243### close handles
244$DBHandle->disconnect;
245
246__END__
247
248################################ Documentation #################################
249
250=head1 NAME
251
252groupstats - create reports on newsgroup usage
253
254=head1 SYNOPSIS
255
78389b28 256B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM> | B<-p> I<YYYY-MM:YYYY-MM> | B<-a>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-f> I<filename template>] [B<-g> I<database table>]
2832c235
TH
257
258=head1 REQUIREMENTS
259
260See doc/README: Perl 5.8.x itself and the following modules from CPAN:
261
262=over 2
263
264=item -
265
266Config::Auto
267
268=item -
269
270DBI
271
272=back
273
274=head1 DESCRIPTION
275
276This script create reports on newsgroup usage (number of postings per
277group per month) taken from result tables created by
278F<gatherstats.pl>.
279
280The time period to act on defaults to last month; you can assign
281another month via the B<-m> switch or a time period via the B<-p>
282switch; the latter takes preference.
283
284B<groupstats> will process all newsgroups by default; you can limit
285that to only some newsgroups by supplying a list of those groups via
286B<-n> (see below). You can include hierarchy levels in the output by
287adding the B<-s> switch (see below).
288
289Furthermore you can set a threshold via B<-t> so that only newsgroups
290with more postings per month will be included in the report. You can
291invert that by the B<-i> switch so only newsgroups with less than
292I<threshold> postings per month will be included.
293
294You can sort the output by number of postings per month instead of the
295default (alphabetical list of newsgroups) by using B<-q>; you can
296reverse the sorting order (from highest to lowest or in reversed
297alphabetical order) by using B<-d>.
298
299Furthermore, you can create a list of newsgroups that had consistently
300more (or less) than x postings per month during the whole report
301period by using B<-l> (together with B<i> as needed).
302
303Last but not least you can create a "best of" list of the top x
304newsgroups via B<-b> (or a "worst of" list by adding B<i>).
305
628a183c
TH
306By default, B<groupstats> will dump an alphabetical list of newsgroups,
307one per line, followed by the number of postings in that group, for
308every month. You can change the output format by using B<-o> (see
2832c235
TH
309below). Captions can be added by setting the B<-c> switch.
310
311=head2 Configuration
312
f2ddfd8a 313B<groupstats> will read its configuration from F<newsstats.conf>
2832c235
TH
314which should be present in the same directory via Config::Auto.
315
316See doc/INSTALL for an overview of possible configuration options.
317
318You can override configuration options via the B<-g> switch.
319
320=head1 OPTIONS
321
322=over 3
323
324=item B<-V> (version)
325
326Print out version and copyright information on B<yapfaq> and exit.
327
328=item B<-h> (help)
329
330Print this man page and exit.
331
332=item B<-m> I<YYYY-MM> (month)
333
334Set processing period to a month in YYYY-MM format. Ignored if B<-p>
f2ddfd8a 335or B<-a> is set.
2832c235
TH
336
337=item B<-p> I<YYYY-MM:YYYY-MM> (period)
338
339Set processing period to a time period between two month, each in
f2ddfd8a
TH
340YYYY-MM format, separated by a colon. Overrides B<-m>. Ignored if
341B<-a> is set.
342
343=item B<-a> (all)
344
345Set no processing period (process whole database). Overrides B<-m>
346and B<-p>.
2832c235
TH
347
348=item B<-n> I<newsgroup(s)> (newsgroups)
349
350Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
351be a single newsgroup name (de.alt.test), a newsgroup hierarchy
352(de.alt.*) or a list of either of these, separated by colons, for
353example
354
355 de.test:de.alt.test:de.newusers.*
356
357=item B<-t> I<threshold> (threshold)
358
359Only include newsgroups with more than I<threshold> postings per
360month. Can be inverted by the B<-i> switch so that only newsgroups
361with less than I<threshold> postings will be included.
362
363This setting will be ignored if B<-l> or B<-b> is set.
364
365=item B<-l> I<level> (level)
366
367Only include newsgroups with more than I<level> postings per
368month, every month during the whole reporting period. Can be inverted
369by the B<-i> switch so that only newsgroups with less than I<level>
370postings every single month will be included. Output will be ordered
371by newsgroup name, followed by month.
372
373This setting will be ignored if B<-b> is set. Overrides B<-t> and
78389b28 374can't be used together with B<-q>, B<-d> or B<-f>.
2832c235
TH
375
376=item B<-b> I<n> (best of)
377
378Create a list of the I<n> newsgroups with the most postings over the
379whole reporting period. Can be inverted by the B<-i> switch so that a
380list of the I<n> newsgroups with the least postings over the whole
381period is generated. Output will be ordered by sum of postings.
382
78389b28
TH
383Overrides B<-t> and B<-l> and can't be used together with B<-q>, B<-d>
384or B<-f>. Output format is set to I<pretty> (see below).
2832c235
TH
385
386=item B<-i> (invert)
387
388Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
389threshold or level or generate a "bottom list" instead of a top list.
390
391=item B<-s> (sum per hierarchy level)
392
393Include "virtual" groups for every hierarchy level in output, for
394example:
395
396 de.alt.ALL 10
397 de.alt.test 5
398 de.alt.admin 7
399
400See the B<gatherstats> man page for details.
401
402=item B<-o> I<output type> (output format)
403
628a183c
TH
404Set output format. Default is I<pretty>, which will print a header for
405each new month, followed by an alphabetical list of newsgroups, each
406on a new line, followed by the number of postings in that month.
407B<groupstats> will try to align newsgroup names and posting counts.
408Usage of B<-b> will force this format; it cannot be used together with
409B<-f>.
410
411I<dump> format is used to create an easily parsable output consisting
412of an alphabetical list of newsgroups, each on a new line, followed by
413the number of postings in that month, without any alignment. This
414default format can't be used with time periods of more than one month.
415Usage of B<-f> will force this format.
2832c235
TH
416
417I<list> format is like I<dump>, but will print the month in front of
418the newsgroup name.
419
420I<dumpgroup> format can only be use with a group list (see B<-n>) of
421exactly one newsgroup and is like I<dump>, but will output months,
422followed by the number of postings.
423
2832c235
TH
424=item B<-c> (captions)
425
78389b28
TH
426Add captions to output (reporting period, newsgroups list, threshold
427and so on).
428
429This setting will be ignored if B<-f> is set.
2832c235
TH
430
431=item B<-q> (quantity of postings)
432
433Sort by number of postings instead of by newsgroup names.
434
435Cannot be used with B<-l> or B<-b>.
436
437=item B<-d> (descending)
438
439Change sort order to descending.
440
441Cannot be used with B<-l> or B<-b>.
442
78389b28
TH
443=item B<-f> I<filename template> (output file)
444
445Save output to file instead of dumping it to STDOUT. B<groupstats>
446will create one file for each month, with filenames composed by
447adding year and month to the I<filename template>, for example
448with B<-f> I<stats>:
449
450 stats-2010-01
451 stats-2010-02
452 ... and so on
453
454This setting will be ignored if B<-l> or B<-b> is set. Output format
455is set to I<dump> (see above).
456
2832c235
TH
457=item B<-g> I<table> (postings per group table)
458
459Override I<DBTableGrps> from F<newsstats.conf>.
460
461=back
462
463=head1 INSTALLATION
464
465See doc/INSTALL.
466
467=head1 EXAMPLES
468
469Show number of postings per group for lasth month in I<dump> format:
470
471 groupstats
472
473Show that report for January of 2010 and de.alt.* plus de.test,
474including display of hierarchy levels:
475
476 groupstats -m 2010-01 -n de.alt.*:de.test -s
477
478Show that report for the year of 2010 in I<pretty> format:
479
480 groupstats -p 2010-01:2010-12 -o pretty
481
482Only show newsgroups with less than 30 postings last month, ordered
483by number of postings, descending, in I<pretty> format:
484
485 groupstats -iqdt 30 -o pretty
486
487Show top 10 for the first half-year of of 2010 in I<pretty> format:
488
489 groupstats -p 2010-01:2010-06 -b 10 -o pretty
490
491Report all groups that had less than 30 postings every singele month
492in the year of 2010 (I<pretty> format is forced)
493
494 groupstats -p 2010-01:2010-12 -il 30
495
496=head1 FILES
497
498=over 4
499
500=item F<groupstats.pl>
501
502The script itself.
503
504=item F<NewsStats.pm>
505
506Library functions for the NewsStats package.
507
508=item F<newsstats.conf>
509
510Runtime configuration file for B<yapfaq>.
511
512=back
513
514=head1 BUGS
515
516Please report any bugs or feature requests to the author or use the
517bug tracker at L<http://bugs.th-h.de/>!
518
519=head1 SEE ALSO
520
521=over 2
522
523=item -
524
525doc/README
526
527=item -
528
529doc/INSTALL
530
531=item -
532
533gatherstats -h
534
535=back
536
537This script is part of the B<NewsStats> package.
538
539=head1 AUTHOR
540
541Thomas Hochstein <thh@inter.net>
542
543=head1 COPYRIGHT AND LICENSE
544
545Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
546
547This program is free software; you may redistribute it and/or modify it
548under the same terms as Perl itself.
549
550=cut
This page took 0.040724 seconds and 4 git commands to generate.