Add comments and POD.
[usenet/newsstats.git] / groupstats.pl
CommitLineData
741336c2
TH
1#! /usr/bin/perl -W\r
2#\r
3# groupstats.pl\r
4#\r
5# This script will get statistical data on newgroup usage\r
6# form a database.\r
7# \r
8# It is part of the NewsStats package.\r
9#\r
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
11#\r
12# It can be redistributed and/or modified under the same terms under \r
13# which Perl itself is published.\r
14\r
15BEGIN {\r
16 our $VERSION = "0.01";\r
17 use File::Basename;\r
18 push(@INC, dirname($0));\r
19}\r
20use strict;\r
21\r
22use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);\r
23\r
24use DBI;\r
25\r
741336c2
TH
26################################# Main program #################################\r
27\r
28### read commandline options\r
29my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');\r
30\r
31### read configuration\r
32my %Conf = %{ReadConfig('newsstats.conf')};\r
33\r
34### override configuration via commandline options\r
35my %ConfOverride;\r
36$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r
37&OverrideConfig(\%Conf,\%ConfOverride);\r
38\r
13c5a175
TH
39### check for incompatible command line options\r
40# you can't mix '-t', '-b' and '-l'\r
41# -b/-l take preference over -t, and -b takes preference over -l\r
42if ($Options{'b'} or $Options{'l'}) {\r
43 if ($Options{'t'}) {\r
44 # drop -t\r
45 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");\r
46 undef($Options{'t'});\r
47 };\r
48 if ($Options{'b'} and $Options{'l'}) {\r
49 # drop -l\r
50 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");\r
51 undef($Options{'l'});\r
52 };\r
53 # -q/-d don't work with -b or -l\r
54 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};\r
55 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};\r
56};\r
57\r
58### check output type\r
59# default output type to 'dump'\r
741336c2
TH
60$Options{'o'} = 'dump' if !$Options{'o'};\r
61# fail if more than one newsgroup is combined with 'dumpgroup' type\r
62die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);\r
63# accept 'dumpgroup' only with -n\r
64if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {\r
65 $Options{'o'} = 'dump';\r
66 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");\r
67};\r
13c5a175
TH
68# set output type to 'pretty' for -l\r
69if ($Options{'l'}) {\r
70 $Options{'o'} = 'pretty';\r
71 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");\r
741336c2
TH
72};\r
73\r
741336c2
TH
74### get time period\r
75my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r
3430c898 76# reset to one month for 'dump' output type\r
741336c2
TH
77if ($Options{'o'} eq 'dump' and $Options{'p'}) {\r
78 $StartMonth = $EndMonth;\r
79 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");\r
80};\r
81\r
82### init database\r
83my $DBHandle = InitDB(\%Conf,1);\r
84\r
3430c898 85### create report\r
741336c2
TH
86# get list of newsgroups (-n)\r
87my ($QueryPart,@GroupList);\r
88my $Newsgroups = $Options{'n'};\r
89if ($Newsgroups) {\r
3430c898 90 # explode list of newsgroups for WHERE clause\r
741336c2
TH
91 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);\r
92} else {\r
3430c898 93 # set to dummy value (always true)\r
741336c2
TH
94 $QueryPart = 1;\r
95};\r
96\r
97# manage thresholds\r
98if (defined($Options{'t'})) {\r
99 if ($Options{'i'}) {\r
3430c898 100 # -i: list groups below threshold\r
741336c2
TH
101 $QueryPart .= ' AND postings < ?';\r
102 } else {\r
3430c898 103 # default: list groups above threshold\r
741336c2
TH
104 $QueryPart .= ' AND postings > ?';\r
105 };\r
3430c898 106 # push threshold to GroupList to match number of binding vars for DBQuery->execute\r
741336c2
TH
107 push @GroupList,$Options{'t'};\r
108}\r
109\r
110# construct WHERE clause\r
3430c898
TH
111# $QueryPart is "list of newsgroup" (or 1),\r
112# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL)\r
113# according to setting of -s\r
741336c2
TH
114my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));\r
115\r
116# get lenght of longest newsgroup delivered by query for formatting purposes\r
3430c898 117# FIXME\r
741336c2
TH
118my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);\r
119\r
120my ($OrderClause,$DBQuery);\r
3430c898 121# -b (best of / top list) defined?\r
741336c2 122if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r
3430c898
TH
123 # default: neither -b nor -l\r
124 # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC"\r
125 # according to -q and -d\r
741336c2
TH
126 $OrderClause = 'newsgroup';\r
127 $OrderClause = 'postings' if $Options{'q'};\r
128 $OrderClause .= ' DESC' if $Options{'d'};\r
3430c898 129 # prepare query: get number of postings per group from groups table for given months and newsgroups\r
741336c2
TH
130 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r
131} elsif ($Options{'b'}) {\r
3430c898 132 # -b is set (then -l can't be!)\r
741336c2
TH
133 # set sorting order (-i)\r
134 if ($Options{'i'}) {\r
135 $OrderClause = 'postings';\r
136 } else {\r
137 $OrderClause = 'postings DESC';\r
138 };\r
3430c898 139 # push LIMIT to GroupList to match number of binding vars for DBQuery->execute\r
741336c2 140 push @GroupList,$Options{'b'};\r
3430c898 141 # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT\r
741336c2 142 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r
3430c898
TH
143} else {\r
144 # -l must be set now, as all other cases have been taken care of\r
741336c2
TH
145 # set sorting order (-i)\r
146 if ($Options{'i'}) {\r
147 $OrderClause = '<';\r
148 } else {\r
149 $OrderClause = '>';\r
150 };\r
3430c898
TH
151 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute\r
152 # FIXME -- together with the query (see below)\r
741336c2
TH
153 push @GroupList,$Options{'l'};\r
154 push @GroupList,$StartMonth,$EndMonth;\r
3430c898
TH
155 # prepare query: get number of postings per group from groups table for given months and \r
156 # FIXME -- this query is ... in dire need of impromevent\r
741336c2
TH
157 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));\r
158};\r
159\r
160# execute query\r
3430c898
TH
161$DBQuery->execute($StartMonth,$EndMonth,@GroupList)\r
162 or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);\r
741336c2 163\r
3430c898
TH
164# output results\r
165# print caption (-c) with time period if -m or -p is set\r
166# FIXME - month or period should handled differently\r
741336c2 167printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});\r
3430c898 168# print caption (-c) with newsgroup list if -n is set\r
741336c2 169printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};\r
3430c898 170# print caption (-c) with threshold if -t is set, taking -i in account\r
741336c2
TH
171printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};\r
172if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r
3430c898
TH
173 # default: neither -b nor -l\r
174 &OutputData($Options{'o'},$DBQuery,$MaxLength);\r
741336c2 175} elsif ($Options{'b'}) {\r
3430c898
TH
176 # -b is set (then -l can't be!)\r
177 # we have to read in the query results ourselves, as they do not have standard layout\r
178 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r
179 # we just assign "top x" or "bottom x" instead of a month for the caption\r
180 # FIXME\r
741336c2
TH
181 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);\r
182 };\r
3430c898
TH
183} else {\r
184 # -l must be set now, as all other cases have been taken care of\r
185 # we have to read in the query results ourselves, as they do not have standard layout\r
186 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r
187 # we just switch $Newsgroups and $Month for output generation\r
188 # FIXME\r
741336c2
TH
189 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);\r
190 };\r
191};\r
192\r
193### close handles\r
194$DBHandle->disconnect;\r
195\r
3430c898
TH
196__END__\r
197\r
198################################ Documentation #################################\r
199\r
200=head1 NAME\r
201\r
202groupstats - create reports on newsgroup usage\r
203\r
204=head1 SYNOPSIS\r
205\r
206B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>]\r
207\r
208=head1 REQUIREMENTS\r
209\r
210See doc/README: Perl 5.8.x itself and the following modules from CPAN:\r
211\r
212=over 2\r
213\r
214=item -\r
215\r
216Config::Auto\r
217\r
218=item -\r
219\r
220DBI\r
221\r
222=back\r
223\r
224=head1 DESCRIPTION\r
225\r
226This script create reports on newsgroup usage (number of postings per\r
227group per month) taken from result tables created by\r
228F<gatherstats.pl>.\r
229\r
230The time period to act on defaults to last month; you can assign\r
231another month via the B<-m> switch or a time period via the B<-p>\r
232switch; the latter takes preference.\r
233\r
234B<groupstats> will process all newsgroups by default; you can limit\r
235that to only some newsgroups by supplying a list of those groups via\r
236B<-n> (see below). You can include hierarchy levels in the output by\r
237adding the B<-s> switch (see below).\r
238\r
239Furthermore you can set a threshold via B<-t> so that only newsgroups\r
240with more postings per month will be included in the report. You can\r
241invert that by the B<-i> switch so only newsgroups with less than\r
242I<threshold> postings per month will be included.\r
243\r
244You can sort the output by number of postings per month instead of the\r
245default (alphabetical list of newsgroups) by using B<-q>; you can\r
246reverse the sorting order (from highest to lowest or in reversed\r
247alphabetical order) by using B<-d>.\r
248\r
249Furthermore, you can create a list of newsgroups that had consistently\r
250more (or less) than x postings per month during the whole report\r
251period by using B<-l> (together with B<i> as needed).\r
252\r
253Last but not least you can create a "best of" list of the top x\r
254newsgroups via B<-b> (or a "worst of" list by adding B<i>).\r
255\r
256By default, B<groupstats> will dump a very simple alphabetical list of\r
257newsgroups, one per line, followed by the number of postings in that\r
258month. This output format of course cannot sensibly be combined with\r
259time periods, so you can set the output format by using B<-o> (see\r
260below). Captions can be added by setting the B<-c> switch.\r
261\r
262=head2 Configuration\r
263\r
264F<groupstats.pl> will read its configuration from F<newsstats.conf>\r
265which should be present in the same directory via Config::Auto.\r
266\r
267See doc/INSTALL for an overview of possible configuration options.\r
268\r
269You can override configuration options via the B<-g> switch.\r
270\r
271=head1 OPTIONS\r
272\r
273=over 3\r
274\r
275=item B<-V> (version)\r
276\r
277Print out version and copyright information on B<yapfaq> and exit.\r
278\r
279=item B<-h> (help)\r
280\r
281Print this man page and exit.\r
282\r
283=item B<-m> I<YYYY-MM> (month)\r
284\r
285Set processing period to a month in YYYY-MM format. Ignored if B<-p>\r
286is set.\r
287\r
288=item B<-p> I<YYYY-MM:YYYY-MM> (period)\r
289\r
290Set processing period to a time period between two month, each in\r
291YYYY-MM format, separated by a colon. Overrides B<-m>.\r
292\r
293=item B<-n> I<newsgroup(s)> (newsgroups)\r
294\r
295Limit processing to a certain set of newsgroups. I<newsgroup(s)> can\r
296be a single newsgroup name (de.alt.test), a newsgroup hierarchy\r
297(de.alt.*) or a list of either of these, separated by colons, for\r
298example\r
299\r
300 de.test:de.alt.test:de.newusers.*\r
301\r
302=item B<-t> I<threshold> (threshold)\r
303\r
304Only include newsgroups with more than I<threshold> postings per\r
305month. Can be inverted by the B<-i> switch so that only newsgroups\r
306with less than I<threshold> postings will be included.\r
307\r
308This setting will be ignored if B<-l> or B<-b> is set.\r
309\r
310=item B<-l> I<level> (level)\r
311\r
312Only include newsgroups with more than I<level> postings per\r
313month, every month during the whole reporting period. Can be inverted\r
314by the B<-i> switch so that only newsgroups with less than I<level>\r
315postings every single month will be included. Output will be ordered\r
316by newsgroup name, followed by month.\r
317\r
318This setting will be ignored if B<-b> is set. Overrides B<-t> and\r
319can't be used together with B<-q> or B<-d>.\r
320\r
321=item B<-b> I<n> (best of)\r
322\r
323Create a list of the I<n> newsgroups with the most postings over the\r
324whole reporting period. Can be inverted by the B<-i> switch so that a\r
325list of the I<n> newsgroups with the least postings over the whole\r
326period is generated. Output will be ordered by sum of postings.\r
327\r
328Overrides B<-t> and B<-l> and can't be used together with B<-q> or\r
329B<-d>. Output format is set to I<pretty> (see below).\r
330\r
331=item B<-i> (invert)\r
332\r
333Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower\r
334threshold or level or generate a "bottom list" instead of a top list.\r
335\r
336=item B<-s> (sum per hierarchy level)\r
337\r
338Include "virtual" groups for every hierarchy level in output, for\r
339example:\r
340\r
341 de.alt.ALL 10\r
342 de.alt.test 5\r
343 de.alt.admin 7\r
344\r
345See the B<gatherstats> man page for details.\r
346\r
347=item B<-o> I<output type> (output format)\r
348\r
349Set output format. Default is I<dump>, consisting of an alphabetical\r
350list of newsgroups, each on a new line, followed by the number of\r
351postings in that month. This default format can't be used with time\r
352periods of more than one month.\r
353\r
354I<list> format is like I<dump>, but will print the month in front of\r
355the newsgroup name.\r
356\r
357I<dumpgroup> format can only be use with a group list (see B<-n>) of\r
358exactly one newsgroup and is like I<dump>, but will output months,\r
359followed by the number of postings.\r
360\r
361If you don't need easily parsable output, you'll mostly use I<pretty>\r
362format, which will print a header for each new month and try to align\r
363newsgroup names and posting counts. Usage of B<-b> will force this\r
364format.\r
365\r
366=item B<-c> (captions)\r
367\r
368Add captions to output (reporting period, newsgroups list, threshold).\r
369\r
370=item B<-q> (quantity of postings)\r
371\r
372Sort by number of postings instead of by newsgroup names.\r
373\r
374Cannot be used with B<-l> or B<-b>.\r
375\r
376=item B<-d> (descending)\r
377\r
378Change sort order to descending.\r
379\r
380Cannot be used with B<-l> or B<-b>.\r
381\r
382=item B<-g> I<table> (postings per group table)\r
383\r
384Override I<DBTableGrps> from F<newsstats.conf>.\r
385\r
386=back\r
387\r
388=head1 INSTALLATION\r
389\r
390See doc/INSTALL.\r
391\r
392=head1 EXAMPLES\r
393\r
394Show number of postings per group for lasth month in I<dump> format:\r
395\r
396 groupstats\r
397\r
398Show that report for January of 2010 and de.alt.* plus de.test,\r
399including display of hierarchy levels:\r
400\r
401 groupstats -m 2010-01 -n de.alt.*:de.test -s\r
402\r
403Show that report for the year of 2010 in I<pretty> format:\r
404\r
405 groupstats -p 2010-01:2010-12 -o pretty\r
406\r
407Only show newsgroups with less than 30 postings last month, ordered\r
408by number of postings, descending, in I<pretty> format:\r
409\r
410 groupstats -iqdt 30 -o pretty\r
411\r
412Show top 10 for the first half-year of of 2010 in I<pretty> format:\r
413\r
414 groupstats -p 2010-01:2010-06 -b 10 -o pretty\r
415\r
416Report all groups that had less than 30 postings every singele month\r
417in the year of 2010 (I<pretty> format is forced)\r
418\r
419 groupstats -p 2010-01:2010-12 -il 30\r
420\r
421=head1 FILES\r
422\r
423=over 4\r
424\r
425=item F<groupstats.pl>\r
426\r
427The script itself.\r
428\r
429=item F<NewsStats.pm>\r
430\r
431Library functions for the NewsStats package.\r
432\r
433=item F<newsstats.conf>\r
434\r
435Runtime configuration file for B<yapfaq>.\r
436\r
437=back\r
438\r
439=head1 BUGS\r
440\r
441Please report any bugs or feature requests to the author or use the\r
442bug tracker at L<http://bugs.th-h.de/>!\r
443\r
444=head1 SEE ALSO\r
445\r
446=over 2\r
447\r
448=item -\r
449\r
450doc/README\r
451\r
452=item -\r
453\r
454doc/INSTALL\r
455\r
456=item -\r
457\r
458gatherstats -h\r
459\r
460=back\r
461\r
462This script is part of the B<NewsStats> package.\r
463\r
464=head1 AUTHOR\r
465\r
466Thomas Hochstein <thh@inter.net>\r
467\r
468=head1 COPYRIGHT AND LICENSE\r
469\r
470Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
471\r
472This program is free software; you may redistribute it and/or modify it\r
473under the same terms as Perl itself.\r
474\r
475=cut\r
This page took 0.034879 seconds and 4 git commands to generate.