groupstats.pl: Improve comments.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76# reset to one month for 'dump' output type
77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
d8695b1c
TH
78 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n");
79 $Options{'o'} = 'pretty';
2832c235
TH
80};
81
82### init database
83my $DBHandle = InitDB(\%Conf,1);
84
85### create report
86# get list of newsgroups (-n)
6b95accb 87my ($QueryGroupList,$QueryThreshold,@GroupList,@Params);
2832c235
TH
88my $Newsgroups = $Options{'n'};
89if ($Newsgroups) {
90 # explode list of newsgroups for WHERE clause
6b95accb 91 ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups);
2832c235
TH
92} else {
93 # set to dummy value (always true)
6b95accb 94 $QueryGroupList = 1;
2832c235
TH
95};
96
97# manage thresholds
98if (defined($Options{'t'})) {
99 if ($Options{'i'}) {
100 # -i: list groups below threshold
6b95accb 101 $QueryThreshold .= ' postings < ?';
2832c235
TH
102 } else {
103 # default: list groups above threshold
6b95accb 104 $QueryThreshold .= ' postings > ?';
2832c235 105 };
6b95accb
TH
106 # push threshold to Params
107 push @Params,$Options{'t'};
108} else {
109 # set to dummy value (always true)
110 $QueryThreshold = 1;
2832c235
TH
111}
112
113# construct WHERE clause
6b95accb
TH
114# $QueryGroupList is "list of newsgroup" (or 1),
115# $QueryThreshold is threshold definition (or 1),
2832c235
TH
116# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
117# according to setting of -s
6b95accb 118my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'}));
2832c235 119
404c1acd 120# get length of longest newsgroup delivered by query for formatting purposes
2832c235 121# FIXME
6b95accb 122my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params));
2832c235
TH
123
124my ($OrderClause,$DBQuery);
125# -b (best of / top list) defined?
126if (!defined($Options{'b'}) and !defined($Options{'l'})) {
127 # default: neither -b nor -l
128 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
129 # according to -q and -d
130 $OrderClause = 'newsgroup';
131 $OrderClause = 'postings' if $Options{'q'};
132 $OrderClause .= ' DESC' if $Options{'d'};
133 # prepare query: get number of postings per group from groups table for given months and newsgroups
134 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
135} elsif ($Options{'b'}) {
136 # -b is set (then -l can't be!)
404c1acd 137 # set sorting order (-i): top or flop list?
2832c235
TH
138 if ($Options{'i'}) {
139 $OrderClause = 'postings';
140 } else {
141 $OrderClause = 'postings DESC';
142 };
d8695b1c
TH
143 # set -b to 10 if < 1 (Top 10)
144 $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1;
6b95accb
TH
145 # push LIMIT to Params
146 push @Params,$Options{'b'};
2832c235
TH
147 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
148 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
149} else {
150 # -l must be set now, as all other cases have been taken care of
404c1acd 151 # which kind of level (-i): more than -l x or less than -l x?
6b95accb 152 my ($Level);
2832c235 153 if ($Options{'i'}) {
6b95accb 154 $Level = '<';
2832c235 155 } else {
6b95accb 156 $Level = '>';
2832c235 157 };
6b95accb 158 # push level and $StartMonth,$EndMonth - again - to Params
2832c235 159 # FIXME -- together with the query (see below)
6b95accb
TH
160 push @Params,$Options{'l'};
161 push @Params,$StartMonth,$EndMonth;
2832c235
TH
162 # prepare query: get number of postings per group from groups table for given months and
163 # FIXME -- this query is ... in dire need of impromevent
6b95accb 164 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level,$WhereClause));
2832c235
TH
165};
166
167# execute query
6b95accb 168$DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params)
2832c235
TH
169 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
170
171# output results
172# print caption (-c) with time period if -m or -p is set
173# FIXME - month or period should handled differently
174printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
175# print caption (-c) with newsgroup list if -n is set
176printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
177# print caption (-c) with threshold if -t is set, taking -i in account
178printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
179if (!defined($Options{'b'}) and !defined($Options{'l'})) {
180 # default: neither -b nor -l
181 &OutputData($Options{'o'},$DBQuery,$MaxLength);
182} elsif ($Options{'b'}) {
183 # -b is set (then -l can't be!)
184 # we have to read in the query results ourselves, as they do not have standard layout
185 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
186 # we just assign "top x" or "bottom x" instead of a month for the caption
187 # FIXME
188 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
189 };
190} else {
191 # -l must be set now, as all other cases have been taken care of
192 # we have to read in the query results ourselves, as they do not have standard layout
193 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
194 # we just switch $Newsgroups and $Month for output generation
195 # FIXME
196 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
197 };
198};
199
200### close handles
201$DBHandle->disconnect;
202
203__END__
204
205################################ Documentation #################################
206
207=head1 NAME
208
209groupstats - create reports on newsgroup usage
210
211=head1 SYNOPSIS
212
213B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
214
215=head1 REQUIREMENTS
216
217See doc/README: Perl 5.8.x itself and the following modules from CPAN:
218
219=over 2
220
221=item -
222
223Config::Auto
224
225=item -
226
227DBI
228
229=back
230
231=head1 DESCRIPTION
232
233This script create reports on newsgroup usage (number of postings per
234group per month) taken from result tables created by
235F<gatherstats.pl>.
236
237The time period to act on defaults to last month; you can assign
238another month via the B<-m> switch or a time period via the B<-p>
239switch; the latter takes preference.
240
241B<groupstats> will process all newsgroups by default; you can limit
242that to only some newsgroups by supplying a list of those groups via
243B<-n> (see below). You can include hierarchy levels in the output by
244adding the B<-s> switch (see below).
245
246Furthermore you can set a threshold via B<-t> so that only newsgroups
247with more postings per month will be included in the report. You can
248invert that by the B<-i> switch so only newsgroups with less than
249I<threshold> postings per month will be included.
250
251You can sort the output by number of postings per month instead of the
252default (alphabetical list of newsgroups) by using B<-q>; you can
253reverse the sorting order (from highest to lowest or in reversed
254alphabetical order) by using B<-d>.
255
256Furthermore, you can create a list of newsgroups that had consistently
257more (or less) than x postings per month during the whole report
258period by using B<-l> (together with B<i> as needed).
259
260Last but not least you can create a "best of" list of the top x
261newsgroups via B<-b> (or a "worst of" list by adding B<i>).
262
263By default, B<groupstats> will dump a very simple alphabetical list of
264newsgroups, one per line, followed by the number of postings in that
265month. This output format of course cannot sensibly be combined with
266time periods, so you can set the output format by using B<-o> (see
267below). Captions can be added by setting the B<-c> switch.
268
269=head2 Configuration
270
271F<groupstats.pl> will read its configuration from F<newsstats.conf>
272which should be present in the same directory via Config::Auto.
273
274See doc/INSTALL for an overview of possible configuration options.
275
276You can override configuration options via the B<-g> switch.
277
278=head1 OPTIONS
279
280=over 3
281
282=item B<-V> (version)
283
284Print out version and copyright information on B<yapfaq> and exit.
285
286=item B<-h> (help)
287
288Print this man page and exit.
289
290=item B<-m> I<YYYY-MM> (month)
291
292Set processing period to a month in YYYY-MM format. Ignored if B<-p>
293is set.
294
295=item B<-p> I<YYYY-MM:YYYY-MM> (period)
296
297Set processing period to a time period between two month, each in
298YYYY-MM format, separated by a colon. Overrides B<-m>.
299
300=item B<-n> I<newsgroup(s)> (newsgroups)
301
302Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
303be a single newsgroup name (de.alt.test), a newsgroup hierarchy
304(de.alt.*) or a list of either of these, separated by colons, for
305example
306
307 de.test:de.alt.test:de.newusers.*
308
309=item B<-t> I<threshold> (threshold)
310
311Only include newsgroups with more than I<threshold> postings per
312month. Can be inverted by the B<-i> switch so that only newsgroups
313with less than I<threshold> postings will be included.
314
315This setting will be ignored if B<-l> or B<-b> is set.
316
317=item B<-l> I<level> (level)
318
319Only include newsgroups with more than I<level> postings per
320month, every month during the whole reporting period. Can be inverted
321by the B<-i> switch so that only newsgroups with less than I<level>
322postings every single month will be included. Output will be ordered
323by newsgroup name, followed by month.
324
325This setting will be ignored if B<-b> is set. Overrides B<-t> and
326can't be used together with B<-q> or B<-d>.
327
328=item B<-b> I<n> (best of)
329
330Create a list of the I<n> newsgroups with the most postings over the
331whole reporting period. Can be inverted by the B<-i> switch so that a
332list of the I<n> newsgroups with the least postings over the whole
333period is generated. Output will be ordered by sum of postings.
334
335Overrides B<-t> and B<-l> and can't be used together with B<-q> or
336B<-d>. Output format is set to I<pretty> (see below).
337
338=item B<-i> (invert)
339
340Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
341threshold or level or generate a "bottom list" instead of a top list.
342
343=item B<-s> (sum per hierarchy level)
344
345Include "virtual" groups for every hierarchy level in output, for
346example:
347
348 de.alt.ALL 10
349 de.alt.test 5
350 de.alt.admin 7
351
352See the B<gatherstats> man page for details.
353
354=item B<-o> I<output type> (output format)
355
356Set output format. Default is I<dump>, consisting of an alphabetical
357list of newsgroups, each on a new line, followed by the number of
358postings in that month. This default format can't be used with time
359periods of more than one month.
360
361I<list> format is like I<dump>, but will print the month in front of
362the newsgroup name.
363
364I<dumpgroup> format can only be use with a group list (see B<-n>) of
365exactly one newsgroup and is like I<dump>, but will output months,
366followed by the number of postings.
367
368If you don't need easily parsable output, you'll mostly use I<pretty>
369format, which will print a header for each new month and try to align
370newsgroup names and posting counts. Usage of B<-b> will force this
371format.
372
373=item B<-c> (captions)
374
375Add captions to output (reporting period, newsgroups list, threshold).
376
377=item B<-q> (quantity of postings)
378
379Sort by number of postings instead of by newsgroup names.
380
381Cannot be used with B<-l> or B<-b>.
382
383=item B<-d> (descending)
384
385Change sort order to descending.
386
387Cannot be used with B<-l> or B<-b>.
388
389=item B<-g> I<table> (postings per group table)
390
391Override I<DBTableGrps> from F<newsstats.conf>.
392
393=back
394
395=head1 INSTALLATION
396
397See doc/INSTALL.
398
399=head1 EXAMPLES
400
401Show number of postings per group for lasth month in I<dump> format:
402
403 groupstats
404
405Show that report for January of 2010 and de.alt.* plus de.test,
406including display of hierarchy levels:
407
408 groupstats -m 2010-01 -n de.alt.*:de.test -s
409
410Show that report for the year of 2010 in I<pretty> format:
411
412 groupstats -p 2010-01:2010-12 -o pretty
413
414Only show newsgroups with less than 30 postings last month, ordered
415by number of postings, descending, in I<pretty> format:
416
417 groupstats -iqdt 30 -o pretty
418
419Show top 10 for the first half-year of of 2010 in I<pretty> format:
420
421 groupstats -p 2010-01:2010-06 -b 10 -o pretty
422
423Report all groups that had less than 30 postings every singele month
424in the year of 2010 (I<pretty> format is forced)
425
426 groupstats -p 2010-01:2010-12 -il 30
427
428=head1 FILES
429
430=over 4
431
432=item F<groupstats.pl>
433
434The script itself.
435
436=item F<NewsStats.pm>
437
438Library functions for the NewsStats package.
439
440=item F<newsstats.conf>
441
442Runtime configuration file for B<yapfaq>.
443
444=back
445
446=head1 BUGS
447
448Please report any bugs or feature requests to the author or use the
449bug tracker at L<http://bugs.th-h.de/>!
450
451=head1 SEE ALSO
452
453=over 2
454
455=item -
456
457doc/README
458
459=item -
460
461doc/INSTALL
462
463=item -
464
465gatherstats -h
466
467=back
468
469This script is part of the B<NewsStats> package.
470
471=head1 AUTHOR
472
473Thomas Hochstein <thh@inter.net>
474
475=head1 COPYRIGHT AND LICENSE
476
477Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
478
479This program is free software; you may redistribute it and/or modify it
480under the same terms as Perl itself.
481
482=cut
This page took 0.034476 seconds and 4 git commands to generate.