groupstats.pl: Improve automatic adaption of output type.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
e742bcf5 76# if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup'
2832c235 77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
e742bcf5
TH
78 if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) {
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n");
80 $Options{'o'} = 'dumpgroup';
81 } else {
82 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
83 $Options{'o'} = 'pretty';
84 }
2832c235
TH
85};
86
87### init database
88my $DBHandle = InitDB(\%Conf,1);
89
90### create report
91# get list of newsgroups (-n)
6b95accb 92my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
2832c235
TH
93my $Newsgroups = $Options{'n'};
94if ($Newsgroups) {
95 # explode list of newsgroups for WHERE clause
6b95accb 96 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
2832c235
TH
97} else {
98 # set to dummy value (always true)
6b95accb 99 $QueryGroupList = 1;
2832c235
TH
100};
101
102# manage thresholds
103if (defined($Options{'t'})) {
104 if ($Options{'i'}) {
105 # -i: list groups below threshold
6b95accb 106 $QueryThreshold .= ' postings < ?';
2832c235
TH
107 } else {
108 # default: list groups above threshold
6b95accb 109 $QueryThreshold .= ' postings > ?';
2832c235 110 };
6b95accb
TH
111 # push threshold to Params
112 push @Params,$Options{'t'};
113} else {
114 # set to dummy value (always true)
115 $QueryThreshold = 1;
2832c235
TH
116}
117
118# construct WHERE clause
6b95accb
TH
119# $QueryGroupList is "list of newsgroup" (or 1),
120# $QueryThreshold is threshold definition (or 1),
2832c235
TH
121# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
122# according to setting of -s
6b95accb 123my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
2832c235 124
404c1acd 125# get length of longest newsgroup delivered by query for formatting purposes
2832c235 126# FIXME
6b95accb 127my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
2832c235
TH
128
129my ($OrderClause,$DBQuery);
130# -b (best of / top list) defined?
131if (!defined($Options{'b'}) and !defined($Options{'l'})) {
132 # default: neither -b nor -l
133 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
134 # according to -q and -d
135 $OrderClause = 'newsgroup';
136 $OrderClause = 'postings' if $Options{'q'};
137 $OrderClause .= ' DESC' if $Options{'d'};
138 # prepare query: get number of postings per group from groups table for given months and newsgroups
139 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
140} elsif ($Options{'b'}) {
141 # -b is set (then -l can't be!)
404c1acd 142 # set sorting order (-i): top or flop list?
2832c235
TH
143 if ($Options{'i'}) {
144 $OrderClause = 'postings';
145 } else {
146 $OrderClause = 'postings DESC';
147 };
d8695b1c
TH
148 # set -b to 10 if < 1 (Top 10)
149 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
6b95accb
TH
150 # push LIMIT to Params
151 push @Params,$Options{'b'};
2832c235
TH
152 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
153 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
154} else {
155 # -l must be set now, as all other cases have been taken care of
404c1acd 156 # which kind of level (-i): more than -l x or less than -l x?
6b95accb 157 my ($Level);
2832c235 158 if ($Options{'i'}) {
6b95accb 159 $Level = '<';
2832c235 160 } else {
6b95accb 161 $Level = '>';
2832c235 162 };
b802bc3d
TH
163 # prepare and execute query: get list of newsgroups meeting level condition
164 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level));
165 $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'})
166 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
167 # add newsgroups to a comma-seperated list ready for IN(...) query
168 my $GroupList;
169 while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
170 $GroupList .= ',' if (defined($GroupList) and $GroupList ne '');
171 $GroupList .= "'$Newsgroup'";
172 };
173 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause));
2832c235
TH
174};
175
176# execute query
6b95accb 177$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
2832c235
TH
178 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
179
180# output results
181# print caption (-c) with time period if -m or -p is set
182# FIXME - month or period should handled differently
183printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
184# print caption (-c) with newsgroup list if -n is set
185printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
186# print caption (-c) with threshold if -t is set, taking -i in account
187printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
188if (!defined($Options{'b'}) and !defined($Options{'l'})) {
189 # default: neither -b nor -l
190 &OutputData($Options{'o'},$DBQuery,$MaxLength);
191} elsif ($Options{'b'}) {
192 # -b is set (then -l can't be!)
193 # we have to read in the query results ourselves, as they do not have standard layout
194 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
195 # we just assign "top x" or "bottom x" instead of a month for the caption
196 # FIXME
197 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
198 };
199} else {
200 # -l must be set now, as all other cases have been taken care of
201 # we have to read in the query results ourselves, as they do not have standard layout
202 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
203 # we just switch $Newsgroups and $Month for output generation
204 # FIXME
205 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
206 };
207};
208
209### close handles
210$DBHandle->disconnect;
211
212__END__
213
214################################ Documentation #################################
215
216=head1 NAME
217
218groupstats - create reports on newsgroup usage
219
220=head1 SYNOPSIS
221
222B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
223
224=head1 REQUIREMENTS
225
226See doc/README: Perl 5.8.x itself and the following modules from CPAN:
227
228=over 2
229
230=item -
231
232Config::Auto
233
234=item -
235
236DBI
237
238=back
239
240=head1 DESCRIPTION
241
242This script create reports on newsgroup usage (number of postings per
243group per month) taken from result tables created by
244F<gatherstats.pl>.
245
246The time period to act on defaults to last month; you can assign
247another month via the B<-m> switch or a time period via the B<-p>
248switch; the latter takes preference.
249
250B<groupstats> will process all newsgroups by default; you can limit
251that to only some newsgroups by supplying a list of those groups via
252B<-n> (see below). You can include hierarchy levels in the output by
253adding the B<-s> switch (see below).
254
255Furthermore you can set a threshold via B<-t> so that only newsgroups
256with more postings per month will be included in the report. You can
257invert that by the B<-i> switch so only newsgroups with less than
258I<threshold> postings per month will be included.
259
260You can sort the output by number of postings per month instead of the
261default (alphabetical list of newsgroups) by using B<-q>; you can
262reverse the sorting order (from highest to lowest or in reversed
263alphabetical order) by using B<-d>.
264
265Furthermore, you can create a list of newsgroups that had consistently
266more (or less) than x postings per month during the whole report
267period by using B<-l> (together with B<i> as needed).
268
269Last but not least you can create a "best of" list of the top x
270newsgroups via B<-b> (or a "worst of" list by adding B<i>).
271
272By default, B<groupstats> will dump a very simple alphabetical list of
273newsgroups, one per line, followed by the number of postings in that
274month. This output format of course cannot sensibly be combined with
275time periods, so you can set the output format by using B<-o> (see
276below). Captions can be added by setting the B<-c> switch.
277
278=head2 Configuration
279
280F<groupstats.pl> will read its configuration from F<newsstats.conf>
281which should be present in the same directory via Config::Auto.
282
283See doc/INSTALL for an overview of possible configuration options.
284
285You can override configuration options via the B<-g> switch.
286
287=head1 OPTIONS
288
289=over 3
290
291=item B<-V> (version)
292
293Print out version and copyright information on B<yapfaq> and exit.
294
295=item B<-h> (help)
296
297Print this man page and exit.
298
299=item B<-m> I<YYYY-MM> (month)
300
301Set processing period to a month in YYYY-MM format. Ignored if B<-p>
302is set.
303
304=item B<-p> I<YYYY-MM:YYYY-MM> (period)
305
306Set processing period to a time period between two month, each in
307YYYY-MM format, separated by a colon. Overrides B<-m>.
308
309=item B<-n> I<newsgroup(s)> (newsgroups)
310
311Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
312be a single newsgroup name (de.alt.test), a newsgroup hierarchy
313(de.alt.*) or a list of either of these, separated by colons, for
314example
315
316 de.test:de.alt.test:de.newusers.*
317
318=item B<-t> I<threshold> (threshold)
319
320Only include newsgroups with more than I<threshold> postings per
321month. Can be inverted by the B<-i> switch so that only newsgroups
322with less than I<threshold> postings will be included.
323
324This setting will be ignored if B<-l> or B<-b> is set.
325
326=item B<-l> I<level> (level)
327
328Only include newsgroups with more than I<level> postings per
329month, every month during the whole reporting period. Can be inverted
330by the B<-i> switch so that only newsgroups with less than I<level>
331postings every single month will be included. Output will be ordered
332by newsgroup name, followed by month.
333
334This setting will be ignored if B<-b> is set. Overrides B<-t> and
335can't be used together with B<-q> or B<-d>.
336
337=item B<-b> I<n> (best of)
338
339Create a list of the I<n> newsgroups with the most postings over the
340whole reporting period. Can be inverted by the B<-i> switch so that a
341list of the I<n> newsgroups with the least postings over the whole
342period is generated. Output will be ordered by sum of postings.
343
344Overrides B<-t> and B<-l> and can't be used together with B<-q> or
345B<-d>. Output format is set to I<pretty> (see below).
346
347=item B<-i> (invert)
348
349Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
350threshold or level or generate a "bottom list" instead of a top list.
351
352=item B<-s> (sum per hierarchy level)
353
354Include "virtual" groups for every hierarchy level in output, for
355example:
356
357 de.alt.ALL 10
358 de.alt.test 5
359 de.alt.admin 7
360
361See the B<gatherstats> man page for details.
362
363=item B<-o> I<output type> (output format)
364
365Set output format. Default is I<dump>, consisting of an alphabetical
366list of newsgroups, each on a new line, followed by the number of
367postings in that month. This default format can't be used with time
368periods of more than one month.
369
370I<list> format is like I<dump>, but will print the month in front of
371the newsgroup name.
372
373I<dumpgroup> format can only be use with a group list (see B<-n>) of
374exactly one newsgroup and is like I<dump>, but will output months,
375followed by the number of postings.
376
377If you don't need easily parsable output, you'll mostly use I<pretty>
378format, which will print a header for each new month and try to align
379newsgroup names and posting counts. Usage of B<-b> will force this
380format.
381
382=item B<-c> (captions)
383
384Add captions to output (reporting period, newsgroups list, threshold).
385
386=item B<-q> (quantity of postings)
387
388Sort by number of postings instead of by newsgroup names.
389
390Cannot be used with B<-l> or B<-b>.
391
392=item B<-d> (descending)
393
394Change sort order to descending.
395
396Cannot be used with B<-l> or B<-b>.
397
398=item B<-g> I<table> (postings per group table)
399
400Override I<DBTableGrps> from F<newsstats.conf>.
401
402=back
403
404=head1 INSTALLATION
405
406See doc/INSTALL.
407
408=head1 EXAMPLES
409
410Show number of postings per group for lasth month in I<dump> format:
411
412 groupstats
413
414Show that report for January of 2010 and de.alt.* plus de.test,
415including display of hierarchy levels:
416
417 groupstats -m 2010-01 -n de.alt.*:de.test -s
418
419Show that report for the year of 2010 in I<pretty> format:
420
421 groupstats -p 2010-01:2010-12 -o pretty
422
423Only show newsgroups with less than 30 postings last month, ordered
424by number of postings, descending, in I<pretty> format:
425
426 groupstats -iqdt 30 -o pretty
427
428Show top 10 for the first half-year of of 2010 in I<pretty> format:
429
430 groupstats -p 2010-01:2010-06 -b 10 -o pretty
431
432Report all groups that had less than 30 postings every singele month
433in the year of 2010 (I<pretty> format is forced)
434
435 groupstats -p 2010-01:2010-12 -il 30
436
437=head1 FILES
438
439=over 4
440
441=item F<groupstats.pl>
442
443The script itself.
444
445=item F<NewsStats.pm>
446
447Library functions for the NewsStats package.
448
449=item F<newsstats.conf>
450
451Runtime configuration file for B<yapfaq>.
452
453=back
454
455=head1 BUGS
456
457Please report any bugs or feature requests to the author or use the
458bug tracker at L<http://bugs.th-h.de/>!
459
460=head1 SEE ALSO
461
462=over 2
463
464=item -
465
466doc/README
467
468=item -
469
470doc/INSTALL
471
472=item -
473
474gatherstats -h
475
476=back
477
478This script is part of the B<NewsStats> package.
479
480=head1 AUTHOR
481
482Thomas Hochstein <thh@inter.net>
483
484=head1 COPYRIGHT AND LICENSE
485
486Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
487
488This program is free software; you may redistribute it and/or modify it
489under the same terms as Perl itself.
490
491=cut
This page took 0.034373 seconds and 4 git commands to generate.