groupstats.pl: Improve formatting of output.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
e742bcf5 76# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
2832c235 77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
e742bcf5
TH
78 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
80 $Options{'o'} = 'dumpgroup';
81 } else {
82 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
83 $Options{'o'} = 'pretty';
84 }
2832c235
TH
85};
86
87### init database
88my $DBHandle = InitDB(\%Conf,1);
89
90### create report
91# get list of newsgroups (-n)
6b95accb 92my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
2832c235
TH
93my $Newsgroups = $Options{'n'};
94if ($Newsgroups) {
95 # explode list of newsgroups for WHERE clause
6b95accb 96 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
2832c235
TH
97} else {
98 # set to dummy value (always true)
6b95accb 99 $QueryGroupList = 1;
2832c235
TH
100};
101
102# manage thresholds
103if (defined($Options{'t'})) {
104 if ($Options{'i'}) {
105 # -i: list groups below threshold
6b95accb 106 $QueryThreshold .= ' postings < ?';
2832c235
TH
107 } else {
108 # default: list groups above threshold
6b95accb 109 $QueryThreshold .= ' postings > ?';
2832c235 110 };
6b95accb
TH
111 # push threshold to Params
112 push @Params,$Options{'t'};
113} else {
114 # set to dummy value (always true)
115 $QueryThreshold = 1;
2832c235
TH
116}
117
118# construct WHERE clause
6b95accb
TH
119# $QueryGroupList is "list of newsgroup" (or 1),
120# $QueryThreshold is threshold definition (or 1),
2832c235
TH
121# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
122# according to setting of -s
6b95accb 123my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
2832c235 124
404c1acd 125# get length of longest newsgroup delivered by query for formatting purposes
2832c235 126# FIXME
6b95accb 127my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
2832c235
TH
128
129my ($OrderClause,$DBQuery);
130# -b (best of / top list) defined?
131if (!defined($Options{'b'}) and !defined($Options{'l'})) {
132 # default: neither -b nor -l
133 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
134 # according to -q and -d
135 $OrderClause = 'newsgroup';
136 $OrderClause = 'postings' if $Options{'q'};
137 $OrderClause .= ' DESC' if $Options{'d'};
138 # prepare query: get number of postings per group from groups table for given months and newsgroups
139 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
140} elsif ($Options{'b'}) {
141 # -b is set (then -l can't be!)
404c1acd 142 # set sorting order (-i): top or flop list?
2832c235
TH
143 if ($Options{'i'}) {
144 $OrderClause = 'postings';
145 } else {
146 $OrderClause = 'postings DESC';
147 };
d8695b1c
TH
148 # set -b to 10 if < 1 (Top 10)
149 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
6b95accb
TH
150 # push LIMIT to Params
151 push @Params,$Options{'b'};
2832c235
TH
152 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
153 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
154} else {
155 # -l must be set now, as all other cases have been taken care of
404c1acd 156 # which kind of level (-i): more than -l x or less than -l x?
6b95accb 157 my ($Level);
2832c235 158 if ($Options{'i'}) {
6b95accb 159 $Level = '<';
2832c235 160 } else {
6b95accb 161 $Level = '>';
2832c235 162 };
b802bc3d
TH
163 # prepare and execute query: get list of newsgroups meeting level condition
164 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
165 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
166 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
167 # add newsgroups to a comma-seperated list ready for IN(...) query
168 my $GroupList;
169 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
170 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
171 $GroupList .= "'$Newsgroup'";
172 };
173 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
2832c235
TH
174};
175
176# execute query
6b95accb 177$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
2832c235
TH
178 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
179
180# output results
181# print caption (-c) with time period if -m or -p is set
b2e0fd24
TH
182if ($Options{'c'}) {
183 if ($Options{'p'}) {
184 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth);
185 } elsif ($Options{'m'}) {
186 printf ("----- Report for %s\n",$StartMonth);
187 };
188};
2832c235
TH
189# print caption (-c) with newsgroup list if -n is set
190printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
191# print caption (-c) with threshold if -t is set, taking -i in account
192printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
193if (!defined($Options{'b'}) and !defined($Options{'l'})) {
194 # default: neither -b nor -l
195 &OutputData($Options{'o'},$DBQuery,$MaxLength);
196} elsif ($Options{'b'}) {
197 # -b is set (then -l can't be!)
198 # we have to read in the query results ourselves, as they do not have standard layout
199 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
b2e0fd24
TH
200 # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
201 print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
2832c235
TH
202 };
203} else {
204 # -l must be set now, as all other cases have been taken care of
b2e0fd24
TH
205 # print caption (-c) with level, taking -i in account
206 printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
2832c235
TH
207 # we have to read in the query results ourselves, as they do not have standard layout
208 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
209 # we just switch $Newsgroups and $Month for output generation
2832c235
TH
210 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
211 };
212};
213
214### close handles
215$DBHandle->disconnect;
216
217__END__
218
219################################ Documentation #################################
220
221=head1 NAME
222
223groupstats - create reports on newsgroup usage
224
225=head1 SYNOPSIS
226
227B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
228
229=head1 REQUIREMENTS
230
231See doc/README: Perl 5.8.x itself and the following modules from CPAN:
232
233=over 2
234
235=item -
236
237Config::Auto
238
239=item -
240
241DBI
242
243=back
244
245=head1 DESCRIPTION
246
247This script create reports on newsgroup usage (number of postings per
248group per month) taken from result tables created by
249F<gatherstats.pl>.
250
251The time period to act on defaults to last month; you can assign
252another month via the B<-m> switch or a time period via the B<-p>
253switch; the latter takes preference.
254
255B<groupstats> will process all newsgroups by default; you can limit
256that to only some newsgroups by supplying a list of those groups via
257B<-n> (see below). You can include hierarchy levels in the output by
258adding the B<-s> switch (see below).
259
260Furthermore you can set a threshold via B<-t> so that only newsgroups
261with more postings per month will be included in the report. You can
262invert that by the B<-i> switch so only newsgroups with less than
263I<threshold> postings per month will be included.
264
265You can sort the output by number of postings per month instead of the
266default (alphabetical list of newsgroups) by using B<-q>; you can
267reverse the sorting order (from highest to lowest or in reversed
268alphabetical order) by using B<-d>.
269
270Furthermore, you can create a list of newsgroups that had consistently
271more (or less) than x postings per month during the whole report
272period by using B<-l> (together with B<i> as needed).
273
274Last but not least you can create a "best of" list of the top x
275newsgroups via B<-b> (or a "worst of" list by adding B<i>).
276
277By default, B<groupstats> will dump a very simple alphabetical list of
278newsgroups, one per line, followed by the number of postings in that
279month. This output format of course cannot sensibly be combined with
280time periods, so you can set the output format by using B<-o> (see
281below). Captions can be added by setting the B<-c> switch.
282
283=head2 Configuration
284
285F<groupstats.pl> will read its configuration from F<newsstats.conf>
286which should be present in the same directory via Config::Auto.
287
288See doc/INSTALL for an overview of possible configuration options.
289
290You can override configuration options via the B<-g> switch.
291
292=head1 OPTIONS
293
294=over 3
295
296=item B<-V> (version)
297
298Print out version and copyright information on B<yapfaq> and exit.
299
300=item B<-h> (help)
301
302Print this man page and exit.
303
304=item B<-m> I<YYYY-MM> (month)
305
306Set processing period to a month in YYYY-MM format. Ignored if B<-p>
307is set.
308
309=item B<-p> I<YYYY-MM:YYYY-MM> (period)
310
311Set processing period to a time period between two month, each in
312YYYY-MM format, separated by a colon. Overrides B<-m>.
313
314=item B<-n> I<newsgroup(s)> (newsgroups)
315
316Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
317be a single newsgroup name (de.alt.test), a newsgroup hierarchy
318(de.alt.*) or a list of either of these, separated by colons, for
319example
320
321 de.test:de.alt.test:de.newusers.*
322
323=item B<-t> I<threshold> (threshold)
324
325Only include newsgroups with more than I<threshold> postings per
326month. Can be inverted by the B<-i> switch so that only newsgroups
327with less than I<threshold> postings will be included.
328
329This setting will be ignored if B<-l> or B<-b> is set.
330
331=item B<-l> I<level> (level)
332
333Only include newsgroups with more than I<level> postings per
334month, every month during the whole reporting period. Can be inverted
335by the B<-i> switch so that only newsgroups with less than I<level>
336postings every single month will be included. Output will be ordered
337by newsgroup name, followed by month.
338
339This setting will be ignored if B<-b> is set. Overrides B<-t> and
340can't be used together with B<-q> or B<-d>.
341
342=item B<-b> I<n> (best of)
343
344Create a list of the I<n> newsgroups with the most postings over the
345whole reporting period. Can be inverted by the B<-i> switch so that a
346list of the I<n> newsgroups with the least postings over the whole
347period is generated. Output will be ordered by sum of postings.
348
349Overrides B<-t> and B<-l> and can't be used together with B<-q> or
350B<-d>. Output format is set to I<pretty> (see below).
351
352=item B<-i> (invert)
353
354Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
355threshold or level or generate a "bottom list" instead of a top list.
356
357=item B<-s> (sum per hierarchy level)
358
359Include "virtual" groups for every hierarchy level in output, for
360example:
361
362 de.alt.ALL 10
363 de.alt.test 5
364 de.alt.admin 7
365
366See the B<gatherstats> man page for details.
367
368=item B<-o> I<output type> (output format)
369
370Set output format. Default is I<dump>, consisting of an alphabetical
371list of newsgroups, each on a new line, followed by the number of
372postings in that month. This default format can't be used with time
373periods of more than one month.
374
375I<list> format is like I<dump>, but will print the month in front of
376the newsgroup name.
377
378I<dumpgroup> format can only be use with a group list (see B<-n>) of
379exactly one newsgroup and is like I<dump>, but will output months,
380followed by the number of postings.
381
382If you don't need easily parsable output, you'll mostly use I<pretty>
383format, which will print a header for each new month and try to align
384newsgroup names and posting counts. Usage of B<-b> will force this
385format.
386
387=item B<-c> (captions)
388
389Add captions to output (reporting period, newsgroups list, threshold).
390
391=item B<-q> (quantity of postings)
392
393Sort by number of postings instead of by newsgroup names.
394
395Cannot be used with B<-l> or B<-b>.
396
397=item B<-d> (descending)
398
399Change sort order to descending.
400
401Cannot be used with B<-l> or B<-b>.
402
403=item B<-g> I<table> (postings per group table)
404
405Override I<DBTableGrps> from F<newsstats.conf>.
406
407=back
408
409=head1 INSTALLATION
410
411See doc/INSTALL.
412
413=head1 EXAMPLES
414
415Show number of postings per group for lasth month in I<dump> format:
416
417 groupstats
418
419Show that report for January of 2010 and de.alt.* plus de.test,
420including display of hierarchy levels:
421
422 groupstats -m 2010-01 -n de.alt.*:de.test -s
423
424Show that report for the year of 2010 in I<pretty> format:
425
426 groupstats -p 2010-01:2010-12 -o pretty
427
428Only show newsgroups with less than 30 postings last month, ordered
429by number of postings, descending, in I<pretty> format:
430
431 groupstats -iqdt 30 -o pretty
432
433Show top 10 for the first half-year of of 2010 in I<pretty> format:
434
435 groupstats -p 2010-01:2010-06 -b 10 -o pretty
436
437Report all groups that had less than 30 postings every singele month
438in the year of 2010 (I<pretty> format is forced)
439
440 groupstats -p 2010-01:2010-12 -il 30
441
442=head1 FILES
443
444=over 4
445
446=item F<groupstats.pl>
447
448The script itself.
449
450=item F<NewsStats.pm>
451
452Library functions for the NewsStats package.
453
454=item F<newsstats.conf>
455
456Runtime configuration file for B<yapfaq>.
457
458=back
459
460=head1 BUGS
461
462Please report any bugs or feature requests to the author or use the
463bug tracker at L<http://bugs.th-h.de/>!
464
465=head1 SEE ALSO
466
467=over 2
468
469=item -
470
471doc/README
472
473=item -
474
475doc/INSTALL
476
477=item -
478
479gatherstats -h
480
481=back
482
483This script is part of the B<NewsStats> package.
484
485=head1 AUTHOR
486
487Thomas Hochstein <thh@inter.net>
488
489=head1 COPYRIGHT AND LICENSE
490
491Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
492
493This program is free software; you may redistribute it and/or modify it
494under the same terms as Perl itself.
495
496=cut
This page took 0.037007 seconds and 4 git commands to generate.