groupstats.pl: Improve formatting of output.
[usenet/newsstats.git] / groupstats.pl
... / ...
CommitLineData
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
78 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
80 $Options{'o'} = 'dumpgroup';
81 } else {
82 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
83 $Options{'o'} = 'pretty';
84 }
85};
86
87### init database
88my $DBHandle = InitDB(\%Conf,1);
89
90### create report
91# get list of newsgroups (-n)
92my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
93my $Newsgroups = $Options{'n'};
94if ($Newsgroups) {
95 # explode list of newsgroups for WHERE clause
96 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
97} else {
98 # set to dummy value (always true)
99 $QueryGroupList = 1;
100};
101
102# manage thresholds
103if (defined($Options{'t'})) {
104 if ($Options{'i'}) {
105 # -i: list groups below threshold
106 $QueryThreshold .= ' postings < ?';
107 } else {
108 # default: list groups above threshold
109 $QueryThreshold .= ' postings > ?';
110 };
111 # push threshold to Params
112 push @Params,$Options{'t'};
113} else {
114 # set to dummy value (always true)
115 $QueryThreshold = 1;
116}
117
118# construct WHERE clause
119# $QueryGroupList is "list of newsgroup" (or 1),
120# $QueryThreshold is threshold definition (or 1),
121# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
122# according to setting of -s
123my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
124
125# get length of longest newsgroup delivered by query for formatting purposes
126# FIXME
127my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
128
129my ($OrderClause,$DBQuery);
130# -b (best of / top list) defined?
131if (!defined($Options{'b'}) and !defined($Options{'l'})) {
132 # default: neither -b nor -l
133 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
134 # according to -q and -d
135 $OrderClause = 'newsgroup';
136 $OrderClause = 'postings' if $Options{'q'};
137 $OrderClause .= ' DESC' if $Options{'d'};
138 # prepare query: get number of postings per group from groups table for given months and newsgroups
139 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
140} elsif ($Options{'b'}) {
141 # -b is set (then -l can't be!)
142 # set sorting order (-i): top or flop list?
143 if ($Options{'i'}) {
144 $OrderClause = 'postings';
145 } else {
146 $OrderClause = 'postings DESC';
147 };
148 # set -b to 10 if < 1 (Top 10)
149 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
150 # push LIMIT to Params
151 push @Params,$Options{'b'};
152 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
153 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
154} else {
155 # -l must be set now, as all other cases have been taken care of
156 # which kind of level (-i): more than -l x or less than -l x?
157 my ($Level);
158 if ($Options{'i'}) {
159 $Level = '<';
160 } else {
161 $Level = '>';
162 };
163 # prepare and execute query: get list of newsgroups meeting level condition
164 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
165 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
166 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
167 # add newsgroups to a comma-seperated list ready for IN(...) query
168 my $GroupList;
169 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
170 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
171 $GroupList .= "'$Newsgroup'";
172 };
173 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
174};
175
176# execute query
177$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
178 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
179
180# output results
181# print caption (-c) with time period if -m or -p is set
182if ($Options{'c'}) {
183 if ($Options{'p'}) {
184 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth);
185 } elsif ($Options{'m'}) {
186 printf ("----- Report for %s\n",$StartMonth);
187 };
188};
189# print caption (-c) with newsgroup list if -n is set
190printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
191# print caption (-c) with threshold if -t is set, taking -i in account
192printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
193if (!defined($Options{'b'}) and !defined($Options{'l'})) {
194 # default: neither -b nor -l
195 &OutputData($Options{'o'},$DBQuery,$MaxLength);
196} elsif ($Options{'b'}) {
197 # -b is set (then -l can't be!)
198 # we have to read in the query results ourselves, as they do not have standard layout
199 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
200 # we just assign "top x" or "bottom x" instead of a month for the caption and force an output type of pretty
201 print &FormatOutput('pretty', ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
202 };
203} else {
204 # -l must be set now, as all other cases have been taken care of
205 # print caption (-c) with level, taking -i in account
206 printf ("----- Newsgroups with %s than %u postings over the whole time period\n",$Options{'i'} ? 'less' : 'more',$Options{'l'}) if $Options{'c'};
207 # we have to read in the query results ourselves, as they do not have standard layout
208 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
209 # we just switch $Newsgroups and $Month for output generation
210 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
211 };
212};
213
214### close handles
215$DBHandle->disconnect;
216
217__END__
218
219################################ Documentation #################################
220
221=head1 NAME
222
223groupstats - create reports on newsgroup usage
224
225=head1 SYNOPSIS
226
227B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
228
229=head1 REQUIREMENTS
230
231See doc/README: Perl 5.8.x itself and the following modules from CPAN:
232
233=over 2
234
235=item -
236
237Config::Auto
238
239=item -
240
241DBI
242
243=back
244
245=head1 DESCRIPTION
246
247This script create reports on newsgroup usage (number of postings per
248group per month) taken from result tables created by
249F<gatherstats.pl>.
250
251The time period to act on defaults to last month; you can assign
252another month via the B<-m> switch or a time period via the B<-p>
253switch; the latter takes preference.
254
255B<groupstats> will process all newsgroups by default; you can limit
256that to only some newsgroups by supplying a list of those groups via
257B<-n> (see below). You can include hierarchy levels in the output by
258adding the B<-s> switch (see below).
259
260Furthermore you can set a threshold via B<-t> so that only newsgroups
261with more postings per month will be included in the report. You can
262invert that by the B<-i> switch so only newsgroups with less than
263I<threshold> postings per month will be included.
264
265You can sort the output by number of postings per month instead of the
266default (alphabetical list of newsgroups) by using B<-q>; you can
267reverse the sorting order (from highest to lowest or in reversed
268alphabetical order) by using B<-d>.
269
270Furthermore, you can create a list of newsgroups that had consistently
271more (or less) than x postings per month during the whole report
272period by using B<-l> (together with B<i> as needed).
273
274Last but not least you can create a "best of" list of the top x
275newsgroups via B<-b> (or a "worst of" list by adding B<i>).
276
277By default, B<groupstats> will dump a very simple alphabetical list of
278newsgroups, one per line, followed by the number of postings in that
279month. This output format of course cannot sensibly be combined with
280time periods, so you can set the output format by using B<-o> (see
281below). Captions can be added by setting the B<-c> switch.
282
283=head2 Configuration
284
285F<groupstats.pl> will read its configuration from F<newsstats.conf>
286which should be present in the same directory via Config::Auto.
287
288See doc/INSTALL for an overview of possible configuration options.
289
290You can override configuration options via the B<-g> switch.
291
292=head1 OPTIONS
293
294=over 3
295
296=item B<-V> (version)
297
298Print out version and copyright information on B<yapfaq> and exit.
299
300=item B<-h> (help)
301
302Print this man page and exit.
303
304=item B<-m> I<YYYY-MM> (month)
305
306Set processing period to a month in YYYY-MM format. Ignored if B<-p>
307is set.
308
309=item B<-p> I<YYYY-MM:YYYY-MM> (period)
310
311Set processing period to a time period between two month, each in
312YYYY-MM format, separated by a colon. Overrides B<-m>.
313
314=item B<-n> I<newsgroup(s)> (newsgroups)
315
316Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
317be a single newsgroup name (de.alt.test), a newsgroup hierarchy
318(de.alt.*) or a list of either of these, separated by colons, for
319example
320
321 de.test:de.alt.test:de.newusers.*
322
323=item B<-t> I<threshold> (threshold)
324
325Only include newsgroups with more than I<threshold> postings per
326month. Can be inverted by the B<-i> switch so that only newsgroups
327with less than I<threshold> postings will be included.
328
329This setting will be ignored if B<-l> or B<-b> is set.
330
331=item B<-l> I<level> (level)
332
333Only include newsgroups with more than I<level> postings per
334month, every month during the whole reporting period. Can be inverted
335by the B<-i> switch so that only newsgroups with less than I<level>
336postings every single month will be included. Output will be ordered
337by newsgroup name, followed by month.
338
339This setting will be ignored if B<-b> is set. Overrides B<-t> and
340can't be used together with B<-q> or B<-d>.
341
342=item B<-b> I<n> (best of)
343
344Create a list of the I<n> newsgroups with the most postings over the
345whole reporting period. Can be inverted by the B<-i> switch so that a
346list of the I<n> newsgroups with the least postings over the whole
347period is generated. Output will be ordered by sum of postings.
348
349Overrides B<-t> and B<-l> and can't be used together with B<-q> or
350B<-d>. Output format is set to I<pretty> (see below).
351
352=item B<-i> (invert)
353
354Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
355threshold or level or generate a "bottom list" instead of a top list.
356
357=item B<-s> (sum per hierarchy level)
358
359Include "virtual" groups for every hierarchy level in output, for
360example:
361
362 de.alt.ALL 10
363 de.alt.test 5
364 de.alt.admin 7
365
366See the B<gatherstats> man page for details.
367
368=item B<-o> I<output type> (output format)
369
370Set output format. Default is I<dump>, consisting of an alphabetical
371list of newsgroups, each on a new line, followed by the number of
372postings in that month. This default format can't be used with time
373periods of more than one month.
374
375I<list> format is like I<dump>, but will print the month in front of
376the newsgroup name.
377
378I<dumpgroup> format can only be use with a group list (see B<-n>) of
379exactly one newsgroup and is like I<dump>, but will output months,
380followed by the number of postings.
381
382If you don't need easily parsable output, you'll mostly use I<pretty>
383format, which will print a header for each new month and try to align
384newsgroup names and posting counts. Usage of B<-b> will force this
385format.
386
387=item B<-c> (captions)
388
389Add captions to output (reporting period, newsgroups list, threshold).
390
391=item B<-q> (quantity of postings)
392
393Sort by number of postings instead of by newsgroup names.
394
395Cannot be used with B<-l> or B<-b>.
396
397=item B<-d> (descending)
398
399Change sort order to descending.
400
401Cannot be used with B<-l> or B<-b>.
402
403=item B<-g> I<table> (postings per group table)
404
405Override I<DBTableGrps> from F<newsstats.conf>.
406
407=back
408
409=head1 INSTALLATION
410
411See doc/INSTALL.
412
413=head1 EXAMPLES
414
415Show number of postings per group for lasth month in I<dump> format:
416
417 groupstats
418
419Show that report for January of 2010 and de.alt.* plus de.test,
420including display of hierarchy levels:
421
422 groupstats -m 2010-01 -n de.alt.*:de.test -s
423
424Show that report for the year of 2010 in I<pretty> format:
425
426 groupstats -p 2010-01:2010-12 -o pretty
427
428Only show newsgroups with less than 30 postings last month, ordered
429by number of postings, descending, in I<pretty> format:
430
431 groupstats -iqdt 30 -o pretty
432
433Show top 10 for the first half-year of of 2010 in I<pretty> format:
434
435 groupstats -p 2010-01:2010-06 -b 10 -o pretty
436
437Report all groups that had less than 30 postings every singele month
438in the year of 2010 (I<pretty> format is forced)
439
440 groupstats -p 2010-01:2010-12 -il 30
441
442=head1 FILES
443
444=over 4
445
446=item F<groupstats.pl>
447
448The script itself.
449
450=item F<NewsStats.pm>
451
452Library functions for the NewsStats package.
453
454=item F<newsstats.conf>
455
456Runtime configuration file for B<yapfaq>.
457
458=back
459
460=head1 BUGS
461
462Please report any bugs or feature requests to the author or use the
463bug tracker at L<http://bugs.th-h.de/>!
464
465=head1 SEE ALSO
466
467=over 2
468
469=item -
470
471doc/README
472
473=item -
474
475doc/INSTALL
476
477=item -
478
479gatherstats -h
480
481=back
482
483This script is part of the B<NewsStats> package.
484
485=head1 AUTHOR
486
487Thomas Hochstein <thh@inter.net>
488
489=head1 COPYRIGHT AND LICENSE
490
491Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
492
493This program is free software; you may redistribute it and/or modify it
494under the same terms as Perl itself.
495
496=cut
This page took 0.011759 seconds and 4 git commands to generate.