Introduce end-of-line normalization and add .gitattributes.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# groupstats.pl
4#
5# This script will get statistical data on newgroup usage
6# form a database.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
23
24use DBI;
25
26################################# Main program #################################
27
28### read commandline options
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
30
31### read configuration
32my %Conf = %{ReadConfig('newsstats.conf')};
33
34### override configuration via commandline options
35my %ConfOverride;
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
37&OverrideConfig(\%Conf,\%ConfOverride);
38
39### check for incompatible command line options
40# you can't mix '-t', '-b' and '-l'
41# -b/-l take preference over -t, and -b takes preference over -l
42if ($Options{'b'} or $Options{'l'}) {
43 if ($Options{'t'}) {
44 # drop -t
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
46 undef($Options{'t'});
47 };
48 if ($Options{'b'} and $Options{'l'}) {
49 # drop -l
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
51 undef($Options{'l'});
52 };
53 # -q/-d don't work with -b or -l
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
56};
57
58### check output type
59# default output type to 'dump'
60$Options{'o'} = 'dump' if !$Options{'o'};
61# fail if more than one newsgroup is combined with 'dumpgroup' type
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
63# accept 'dumpgroup' only with -n
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
65 $Options{'o'} = 'dump';
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
67};
68# set output type to 'pretty' for -l
69if ($Options{'l'}) {
70 $Options{'o'} = 'pretty';
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
72};
73
74### get time period
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
76# reset to one month for 'dump' output type
77if ($Options{'o'} eq 'dump' and $Options{'p'}) {
78 $StartMonth = $EndMonth;
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
80};
81
82### init database
83my $DBHandle = InitDB(\%Conf,1);
84
85### create report
86# get list of newsgroups (-n)
87my ($QueryPart,@GroupList);
88my $Newsgroups = $Options{'n'};
89if ($Newsgroups) {
90 # explode list of newsgroups for WHERE clause
91 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
92} else {
93 # set to dummy value (always true)
94 $QueryPart = 1;
95};
96
97# manage thresholds
98if (defined($Options{'t'})) {
99 if ($Options{'i'}) {
100 # -i: list groups below threshold
101 $QueryPart .= ' AND postings < ?';
102 } else {
103 # default: list groups above threshold
104 $QueryPart .= ' AND postings > ?';
105 };
106 # push threshold to GroupList to match number of binding vars for DBQuery->execute
107 push @GroupList,$Options{'t'};
108}
109
110# construct WHERE clause
111# $QueryPart is "list of newsgroup" (or 1),
112# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)
113# according to setting of -s
114my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
115
116# get lenght of longest newsgroup delivered by query for formatting purposes
117# FIXME
118my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
119
120my ($OrderClause,$DBQuery);
121# -b (best of / top list) defined?
122if (!defined($Options{'b'}) and !defined($Options{'l'})) {
123 # default: neither -b nor -l
124 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"
125 # according to -q and -d
126 $OrderClause = 'newsgroup';
127 $OrderClause = 'postings' if $Options{'q'};
128 $OrderClause .= ' DESC' if $Options{'d'};
129 # prepare query: get number of postings per group from groups table for given months and newsgroups
130 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
131} elsif ($Options{'b'}) {
132 # -b is set (then -l can't be!)
133 # set sorting order (-i)
134 if ($Options{'i'}) {
135 $OrderClause = 'postings';
136 } else {
137 $OrderClause = 'postings DESC';
138 };
139 # push LIMIT to GroupList to match number of binding vars for DBQuery->execute
140 push @GroupList,$Options{'b'};
141 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
142 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
143} else {
144 # -l must be set now, as all other cases have been taken care of
145 # set sorting order (-i)
146 if ($Options{'i'}) {
147 $OrderClause = '<';
148 } else {
149 $OrderClause = '>';
150 };
151 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute
152 # FIXME -- together with the query (see below)
153 push @GroupList,$Options{'l'};
154 push @GroupList,$StartMonth,$EndMonth;
155 # prepare query: get number of postings per group from groups table for given months and
156 # FIXME -- this query is ... in dire need of impromevent
157 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
158};
159
160# execute query
161$DBQuery->execute($StartMonth,$EndMonth,@GroupList)
162 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
163
164# output results
165# print caption (-c) with time period if -m or -p is set
166# FIXME - month or period should handled differently
167printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
168# print caption (-c) with newsgroup list if -n is set
169printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
170# print caption (-c) with threshold if -t is set, taking -i in account
171printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
172if (!defined($Options{'b'}) and !defined($Options{'l'})) {
173 # default: neither -b nor -l
174 &OutputData($Options{'o'},$DBQuery,$MaxLength);
175} elsif ($Options{'b'}) {
176 # -b is set (then -l can't be!)
177 # we have to read in the query results ourselves, as they do not have standard layout
178 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
179 # we just assign "top x" or "bottom x" instead of a month for the caption
180 # FIXME
181 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
182 };
183} else {
184 # -l must be set now, as all other cases have been taken care of
185 # we have to read in the query results ourselves, as they do not have standard layout
186 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
187 # we just switch $Newsgroups and $Month for output generation
188 # FIXME
189 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
190 };
191};
192
193### close handles
194$DBHandle->disconnect;
195
196__END__
197
198################################ Documentation #################################
199
200=head1 NAME
201
202groupstats - create reports on newsgroup usage
203
204=head1 SYNOPSIS
205
206B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]
207
208=head1 REQUIREMENTS
209
210See doc/README: Perl 5.8.x itself and the following modules from CPAN:
211
212=over 2
213
214=item -
215
216Config::Auto
217
218=item -
219
220DBI
221
222=back
223
224=head1 DESCRIPTION
225
226This script create reports on newsgroup usage (number of postings per
227group per month) taken from result tables created by
228F<gatherstats.pl>.
229
230The time period to act on defaults to last month; you can assign
231another month via the B<-m> switch or a time period via the B<-p>
232switch; the latter takes preference.
233
234B<groupstats> will process all newsgroups by default; you can limit
235that to only some newsgroups by supplying a list of those groups via
236B<-n> (see below). You can include hierarchy levels in the output by
237adding the B<-s> switch (see below).
238
239Furthermore you can set a threshold via B<-t> so that only newsgroups
240with more postings per month will be included in the report. You can
241invert that by the B<-i> switch so only newsgroups with less than
242I<threshold> postings per month will be included.
243
244You can sort the output by number of postings per month instead of the
245default (alphabetical list of newsgroups) by using B<-q>; you can
246reverse the sorting order (from highest to lowest or in reversed
247alphabetical order) by using B<-d>.
248
249Furthermore, you can create a list of newsgroups that had consistently
250more (or less) than x postings per month during the whole report
251period by using B<-l> (together with B<i> as needed).
252
253Last but not least you can create a "best of" list of the top x
254newsgroups via B<-b> (or a "worst of" list by adding B<i>).
255
256By default, B<groupstats> will dump a very simple alphabetical list of
257newsgroups, one per line, followed by the number of postings in that
258month. This output format of course cannot sensibly be combined with
259time periods, so you can set the output format by using B<-o> (see
260below). Captions can be added by setting the B<-c> switch.
261
262=head2 Configuration
263
264F<groupstats.pl> will read its configuration from F<newsstats.conf>
265which should be present in the same directory via Config::Auto.
266
267See doc/INSTALL for an overview of possible configuration options.
268
269You can override configuration options via the B<-g> switch.
270
271=head1 OPTIONS
272
273=over 3
274
275=item B<-V> (version)
276
277Print out version and copyright information on B<yapfaq> and exit.
278
279=item B<-h> (help)
280
281Print this man page and exit.
282
283=item B<-m> I<YYYY-MM> (month)
284
285Set processing period to a month in YYYY-MM format. Ignored if B<-p>
286is set.
287
288=item B<-p> I<YYYY-MM:YYYY-MM> (period)
289
290Set processing period to a time period between two month, each in
291YYYY-MM format, separated by a colon. Overrides B<-m>.
292
293=item B<-n> I<newsgroup(s)> (newsgroups)
294
295Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
296be a single newsgroup name (de.alt.test), a newsgroup hierarchy
297(de.alt.*) or a list of either of these, separated by colons, for
298example
299
300 de.test:de.alt.test:de.newusers.*
301
302=item B<-t> I<threshold> (threshold)
303
304Only include newsgroups with more than I<threshold> postings per
305month. Can be inverted by the B<-i> switch so that only newsgroups
306with less than I<threshold> postings will be included.
307
308This setting will be ignored if B<-l> or B<-b> is set.
309
310=item B<-l> I<level> (level)
311
312Only include newsgroups with more than I<level> postings per
313month, every month during the whole reporting period. Can be inverted
314by the B<-i> switch so that only newsgroups with less than I<level>
315postings every single month will be included. Output will be ordered
316by newsgroup name, followed by month.
317
318This setting will be ignored if B<-b> is set. Overrides B<-t> and
319can't be used together with B<-q> or B<-d>.
320
321=item B<-b> I<n> (best of)
322
323Create a list of the I<n> newsgroups with the most postings over the
324whole reporting period. Can be inverted by the B<-i> switch so that a
325list of the I<n> newsgroups with the least postings over the whole
326period is generated. Output will be ordered by sum of postings.
327
328Overrides B<-t> and B<-l> and can't be used together with B<-q> or
329B<-d>. Output format is set to I<pretty> (see below).
330
331=item B<-i> (invert)
332
333Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower
334threshold or level or generate a "bottom list" instead of a top list.
335
336=item B<-s> (sum per hierarchy level)
337
338Include "virtual" groups for every hierarchy level in output, for
339example:
340
341 de.alt.ALL 10
342 de.alt.test 5
343 de.alt.admin 7
344
345See the B<gatherstats> man page for details.
346
347=item B<-o> I<output type> (output format)
348
349Set output format. Default is I<dump>, consisting of an alphabetical
350list of newsgroups, each on a new line, followed by the number of
351postings in that month. This default format can't be used with time
352periods of more than one month.
353
354I<list> format is like I<dump>, but will print the month in front of
355the newsgroup name.
356
357I<dumpgroup> format can only be use with a group list (see B<-n>) of
358exactly one newsgroup and is like I<dump>, but will output months,
359followed by the number of postings.
360
361If you don't need easily parsable output, you'll mostly use I<pretty>
362format, which will print a header for each new month and try to align
363newsgroup names and posting counts. Usage of B<-b> will force this
364format.
365
366=item B<-c> (captions)
367
368Add captions to output (reporting period, newsgroups list, threshold).
369
370=item B<-q> (quantity of postings)
371
372Sort by number of postings instead of by newsgroup names.
373
374Cannot be used with B<-l> or B<-b>.
375
376=item B<-d> (descending)
377
378Change sort order to descending.
379
380Cannot be used with B<-l> or B<-b>.
381
382=item B<-g> I<table> (postings per group table)
383
384Override I<DBTableGrps> from F<newsstats.conf>.
385
386=back
387
388=head1 INSTALLATION
389
390See doc/INSTALL.
391
392=head1 EXAMPLES
393
394Show number of postings per group for lasth month in I<dump> format:
395
396 groupstats
397
398Show that report for January of 2010 and de.alt.* plus de.test,
399including display of hierarchy levels:
400
401 groupstats -m 2010-01 -n de.alt.*:de.test -s
402
403Show that report for the year of 2010 in I<pretty> format:
404
405 groupstats -p 2010-01:2010-12 -o pretty
406
407Only show newsgroups with less than 30 postings last month, ordered
408by number of postings, descending, in I<pretty> format:
409
410 groupstats -iqdt 30 -o pretty
411
412Show top 10 for the first half-year of of 2010 in I<pretty> format:
413
414 groupstats -p 2010-01:2010-06 -b 10 -o pretty
415
416Report all groups that had less than 30 postings every singele month
417in the year of 2010 (I<pretty> format is forced)
418
419 groupstats -p 2010-01:2010-12 -il 30
420
421=head1 FILES
422
423=over 4
424
425=item F<groupstats.pl>
426
427The script itself.
428
429=item F<NewsStats.pm>
430
431Library functions for the NewsStats package.
432
433=item F<newsstats.conf>
434
435Runtime configuration file for B<yapfaq>.
436
437=back
438
439=head1 BUGS
440
441Please report any bugs or feature requests to the author or use the
442bug tracker at L<http://bugs.th-h.de/>!
443
444=head1 SEE ALSO
445
446=over 2
447
448=item -
449
450doc/README
451
452=item -
453
454doc/INSTALL
455
456=item -
457
458gatherstats -h
459
460=back
461
462This script is part of the B<NewsStats> package.
463
464=head1 AUTHOR
465
466Thomas Hochstein <thh@inter.net>
467
468=head1 COPYRIGHT AND LICENSE
469
470Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
471
472This program is free software; you may redistribute it and/or modify it
473under the same terms as Perl itself.
474
475=cut
This page took 0.032468 seconds and 4 git commands to generate.