groupstats.pl: More input validation.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76# reset to one month for 'dump' output type
77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
d8695b1c
TH
78 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
79 $Options{'o'} = 'pretty';
2832c235
TH
80};
81
82### init database
83my $DBHandle = InitDB(\%Conf,1);
84
85### create report
86# get list of newsgroups (-n)
87my ($QueryPart,@GroupList);
88my $Newsgroups = $Options{'n'};
89if ($Newsgroups) {
90 # explode list of newsgroups for WHERE clause
91 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
92} else {
93 # set to dummy value (always true)
94 $QueryPart = 1;
95};
96
97# manage thresholds
98if (defined($Options{'t'})) {
99 if ($Options{'i'}) {
100 # -i: list groups below threshold
101 $QueryPart .= ' AND postings < ?';
102 } else {
103 # default: list groups above threshold
104 $QueryPart .= ' AND postings > ?';
105 };
106 # push threshold to GroupList to match number of binding vars for DBQuery->execute
107 push @GroupList,$Options{'t'};
108}
109
110# construct WHERE clause
111# $QueryPart is "list of newsgroup" (or 1),
112# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
113# according to setting of -s
114my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
115
116# get lenght of longest newsgroup delivered by query for formatting purposes
117# FIXME
118my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
119
120my ($OrderClause,$DBQuery);
121# -b (best of / top list) defined?
122if (!defined($Options{'b'}) and !defined($Options{'l'})) {
123 # default: neither -b nor -l
124 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
125 # according to -q and -d
126 $OrderClause = 'newsgroup';
127 $OrderClause = 'postings' if $Options{'q'};
128 $OrderClause .= ' DESC' if $Options{'d'};
129 # prepare query: get number of postings per group from groups table for given months and newsgroups
130 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
131} elsif ($Options{'b'}) {
132 # -b is set (then -l can't be!)
133 # set sorting order (-i)
134 if ($Options{'i'}) {
135 $OrderClause = 'postings';
136 } else {
137 $OrderClause = 'postings DESC';
138 };
d8695b1c
TH
139 # set -b to 10 if < 1 (Top 10)
140 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
2832c235
TH
141 # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
142 push @GroupList,$Options{'b'};
143 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
144 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
145} else {
146 # -l must be set now, as all other cases have been taken care of
147 # set sorting order (-i)
148 if ($Options{'i'}) {
149 $OrderClause = '<';
150 } else {
151 $OrderClause = '>';
152 };
153 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
154 # FIXME -- together with the query (see below)
155 push @GroupList,$Options{'l'};
156 push @GroupList,$StartMonth,$EndMonth;
157 # prepare query: get number of postings per group from groups table for given months and
158 # FIXME -- this query is ... in dire need of impromevent
159 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
160};
161
162# execute query
163$DBQuery->execute($StartMonth,$EndMonth,@GroupList)
164 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
165
166# output results
167# print caption (-c) with time period if -m or -p is set
168# FIXME - month or period should handled differently
169printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
170# print caption (-c) with newsgroup list if -n is set
171printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
172# print caption (-c) with threshold if -t is set, taking -i in account
173printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
174if (!defined($Options{'b'}) and !defined($Options{'l'})) {
175 # default: neither -b nor -l
176 &OutputData($Options{'o'},$DBQuery,$MaxLength);
177} elsif ($Options{'b'}) {
178 # -b is set (then -l can't be!)
179 # we have to read in the query results ourselves, as they do not have standard layout
180 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
181 # we just assign "top x" or "bottom x" instead of a month for the caption
182 # FIXME
183 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
184 };
185} else {
186 # -l must be set now, as all other cases have been taken care of
187 # we have to read in the query results ourselves, as they do not have standard layout
188 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
189 # we just switch $Newsgroups and $Month for output generation
190 # FIXME
191 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
192 };
193};
194
195### close handles
196$DBHandle->disconnect;
197
198__END__
199
200################################ Documentation #################################
201
202=head1 NAME
203
204groupstats - create reports on newsgroup usage
205
206=head1 SYNOPSIS
207
208B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
209
210=head1 REQUIREMENTS
211
212See doc/README: Perl 5.8.x itself and the following modules from CPAN:
213
214=over 2
215
216=item -
217
218Config::Auto
219
220=item -
221
222DBI
223
224=back
225
226=head1 DESCRIPTION
227
228This script create reports on newsgroup usage (number of postings per
229group per month) taken from result tables created by
230F<gatherstats.pl>.
231
232The time period to act on defaults to last month; you can assign
233another month via the B<-m> switch or a time period via the B<-p>
234switch; the latter takes preference.
235
236B<groupstats> will process all newsgroups by default; you can limit
237that to only some newsgroups by supplying a list of those groups via
238B<-n> (see below). You can include hierarchy levels in the output by
239adding the B<-s> switch (see below).
240
241Furthermore you can set a threshold via B<-t> so that only newsgroups
242with more postings per month will be included in the report. You can
243invert that by the B<-i> switch so only newsgroups with less than
244I<threshold> postings per month will be included.
245
246You can sort the output by number of postings per month instead of the
247default (alphabetical list of newsgroups) by using B<-q>; you can
248reverse the sorting order (from highest to lowest or in reversed
249alphabetical order) by using B<-d>.
250
251Furthermore, you can create a list of newsgroups that had consistently
252more (or less) than x postings per month during the whole report
253period by using B<-l> (together with B<i> as needed).
254
255Last but not least you can create a "best of" list of the top x
256newsgroups via B<-b> (or a "worst of" list by adding B<i>).
257
258By default, B<groupstats> will dump a very simple alphabetical list of
259newsgroups, one per line, followed by the number of postings in that
260month. This output format of course cannot sensibly be combined with
261time periods, so you can set the output format by using B<-o> (see
262below). Captions can be added by setting the B<-c> switch.
263
264=head2 Configuration
265
266F<groupstats.pl> will read its configuration from F<newsstats.conf>
267which should be present in the same directory via Config::Auto.
268
269See doc/INSTALL for an overview of possible configuration options.
270
271You can override configuration options via the B<-g> switch.
272
273=head1 OPTIONS
274
275=over 3
276
277=item B<-V> (version)
278
279Print out version and copyright information on B<yapfaq> and exit.
280
281=item B<-h> (help)
282
283Print this man page and exit.
284
285=item B<-m> I<YYYY-MM> (month)
286
287Set processing period to a month in YYYY-MM format. Ignored if B<-p>
288is set.
289
290=item B<-p> I<YYYY-MM:YYYY-MM> (period)
291
292Set processing period to a time period between two month, each in
293YYYY-MM format, separated by a colon. Overrides B<-m>.
294
295=item B<-n> I<newsgroup(s)> (newsgroups)
296
297Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
298be a single newsgroup name (de.alt.test), a newsgroup hierarchy
299(de.alt.*) or a list of either of these, separated by colons, for
300example
301
302 de.test:de.alt.test:de.newusers.*
303
304=item B<-t> I<threshold> (threshold)
305
306Only include newsgroups with more than I<threshold> postings per
307month. Can be inverted by the B<-i> switch so that only newsgroups
308with less than I<threshold> postings will be included.
309
310This setting will be ignored if B<-l> or B<-b> is set.
311
312=item B<-l> I<level> (level)
313
314Only include newsgroups with more than I<level> postings per
315month, every month during the whole reporting period. Can be inverted
316by the B<-i> switch so that only newsgroups with less than I<level>
317postings every single month will be included. Output will be ordered
318by newsgroup name, followed by month.
319
320This setting will be ignored if B<-b> is set. Overrides B<-t> and
321can't be used together with B<-q> or B<-d>.
322
323=item B<-b> I<n> (best of)
324
325Create a list of the I<n> newsgroups with the most postings over the
326whole reporting period. Can be inverted by the B<-i> switch so that a
327list of the I<n> newsgroups with the least postings over the whole
328period is generated. Output will be ordered by sum of postings.
329
330Overrides B<-t> and B<-l> and can't be used together with B<-q> or
331B<-d>. Output format is set to I<pretty> (see below).
332
333=item B<-i> (invert)
334
335Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
336threshold or level or generate a "bottom list" instead of a top list.
337
338=item B<-s> (sum per hierarchy level)
339
340Include "virtual" groups for every hierarchy level in output, for
341example:
342
343 de.alt.ALL 10
344 de.alt.test 5
345 de.alt.admin 7
346
347See the B<gatherstats> man page for details.
348
349=item B<-o> I<output type> (output format)
350
351Set output format. Default is I<dump>, consisting of an alphabetical
352list of newsgroups, each on a new line, followed by the number of
353postings in that month. This default format can't be used with time
354periods of more than one month.
355
356I<list> format is like I<dump>, but will print the month in front of
357the newsgroup name.
358
359I<dumpgroup> format can only be use with a group list (see B<-n>) of
360exactly one newsgroup and is like I<dump>, but will output months,
361followed by the number of postings.
362
363If you don't need easily parsable output, you'll mostly use I<pretty>
364format, which will print a header for each new month and try to align
365newsgroup names and posting counts. Usage of B<-b> will force this
366format.
367
368=item B<-c> (captions)
369
370Add captions to output (reporting period, newsgroups list, threshold).
371
372=item B<-q> (quantity of postings)
373
374Sort by number of postings instead of by newsgroup names.
375
376Cannot be used with B<-l> or B<-b>.
377
378=item B<-d> (descending)
379
380Change sort order to descending.
381
382Cannot be used with B<-l> or B<-b>.
383
384=item B<-g> I<table> (postings per group table)
385
386Override I<DBTableGrps> from F<newsstats.conf>.
387
388=back
389
390=head1 INSTALLATION
391
392See doc/INSTALL.
393
394=head1 EXAMPLES
395
396Show number of postings per group for lasth month in I<dump> format:
397
398 groupstats
399
400Show that report for January of 2010 and de.alt.* plus de.test,
401including display of hierarchy levels:
402
403 groupstats -m 2010-01 -n de.alt.*:de.test -s
404
405Show that report for the year of 2010 in I<pretty> format:
406
407 groupstats -p 2010-01:2010-12 -o pretty
408
409Only show newsgroups with less than 30 postings last month, ordered
410by number of postings, descending, in I<pretty> format:
411
412 groupstats -iqdt 30 -o pretty
413
414Show top 10 for the first half-year of of 2010 in I<pretty> format:
415
416 groupstats -p 2010-01:2010-06 -b 10 -o pretty
417
418Report all groups that had less than 30 postings every singele month
419in the year of 2010 (I<pretty> format is forced)
420
421 groupstats -p 2010-01:2010-12 -il 30
422
423=head1 FILES
424
425=over 4
426
427=item F<groupstats.pl>
428
429The script itself.
430
431=item F<NewsStats.pm>
432
433Library functions for the NewsStats package.
434
435=item F<newsstats.conf>
436
437Runtime configuration file for B<yapfaq>.
438
439=back
440
441=head1 BUGS
442
443Please report any bugs or feature requests to the author or use the
444bug tracker at L<http://bugs.th-h.de/>!
445
446=head1 SEE ALSO
447
448=over 2
449
450=item -
451
452doc/README
453
454=item -
455
456doc/INSTALL
457
458=item -
459
460gatherstats -h
461
462=back
463
464This script is part of the B<NewsStats> package.
465
466=head1 AUTHOR
467
468Thomas Hochstein <thh@inter.net>
469
470=head1 COPYRIGHT AND LICENSE
471
472Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
473
474This program is free software; you may redistribute it and/or modify it
475under the same terms as Perl itself.
476
477=cut
This page took 0.032785 seconds and 4 git commands to generate.