Commit | Line | Data |
---|---|---|
2832c235 TH |
1 | #! /usr/bin/perl -W |
2 | # | |
3 | # groupstats.pl | |
4 | # | |
5 | # This script will get statistical data on newgroup usage | |
6 | # form a database. | |
7 | # | |
8 | # It is part of the NewsStats package. | |
9 | # | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
11 | # | |
12 | # It can be redistributed and/or modified under the same terms under | |
13 | # which Perl itself is published. | |
14 | ||
15 | BEGIN { | |
16 | our $VERSION = "0.01"; | |
17 | use File::Basename; | |
18 | push(@INC, dirname($0)); | |
19 | } | |
20 | use strict; | |
21 | ||
22 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); | |
23 | ||
24 | use DBI; | |
25 | ||
26 | ################################# Main program ################################# | |
27 | ||
28 | ### read commandline options | |
29 | my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); | |
30 | ||
31 | ### read configuration | |
32 | my %Conf = %{ReadConfig('newsstats.conf')}; | |
33 | ||
34 | ### override configuration via commandline options | |
35 | my %ConfOverride; | |
36 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; | |
37 | &OverrideConfig(\%Conf,\%ConfOverride); | |
38 | ||
39 | ### check for incompatible command line options | |
40 | # you can't mix '-t', '-b' and '-l' | |
41 | # -b/-l take preference over -t, and -b takes preference over -l | |
42 | if ($Options{'b'} or $Options{'l'}) { | |
43 | if ($Options{'t'}) { | |
44 | # drop -t | |
45 | warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); | |
46 | undef($Options{'t'}); | |
47 | }; | |
48 | if ($Options{'b'} and $Options{'l'}) { | |
49 | # drop -l | |
50 | warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); | |
51 | undef($Options{'l'}); | |
52 | }; | |
53 | # -q/-d don't work with -b or -l | |
54 | warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; | |
55 | warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; | |
56 | }; | |
57 | ||
58 | ### check output type | |
59 | # default output type to 'dump' | |
60 | $Options{'o'} = 'dump' if !$Options{'o'}; | |
61 | # fail if more than one newsgroup is combined with 'dumpgroup' type | |
62 | die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); | |
63 | # accept 'dumpgroup' only with -n | |
64 | if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { | |
65 | $Options{'o'} = 'dump'; | |
66 | warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); | |
67 | }; | |
68 | # set output type to 'pretty' for -l | |
69 | if ($Options{'l'}) { | |
70 | $Options{'o'} = 'pretty'; | |
71 | warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); | |
72 | }; | |
73 | ||
74 | ### get time period | |
75 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); | |
e742bcf5 | 76 | # if time period is more than one month: set output type to '-o pretty' or '-o dumpgroup' |
2832c235 | 77 | if ($Options{'o'} eq 'dump' and $Options{'p'}) { |
e742bcf5 TH |
78 | if (defined($Options{'n'}) and $Options{'n'} !~ /:|\*/) { |
79 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o dumpgroup'.\n"); | |
80 | $Options{'o'} = 'dumpgroup'; | |
81 | } else { | |
82 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); | |
83 | $Options{'o'} = 'pretty'; | |
84 | } | |
2832c235 TH |
85 | }; |
86 | ||
87 | ### init database | |
88 | my $DBHandle = InitDB(\%Conf,1); | |
89 | ||
90 | ### create report | |
91 | # get list of newsgroups (-n) | |
6b95accb | 92 | my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); |
2832c235 TH |
93 | my $Newsgroups = $Options{'n'}; |
94 | if ($Newsgroups) { | |
95 | # explode list of newsgroups for WHERE clause | |
6b95accb | 96 | ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups); |
2832c235 TH |
97 | } else { |
98 | # set to dummy value (always true) | |
6b95accb | 99 | $QueryGroupList = 1; |
2832c235 TH |
100 | }; |
101 | ||
102 | # manage thresholds | |
103 | if (defined($Options{'t'})) { | |
104 | if ($Options{'i'}) { | |
105 | # -i: list groups below threshold | |
6b95accb | 106 | $QueryThreshold .= ' postings < ?'; |
2832c235 TH |
107 | } else { |
108 | # default: list groups above threshold | |
6b95accb | 109 | $QueryThreshold .= ' postings > ?'; |
2832c235 | 110 | }; |
6b95accb TH |
111 | # push threshold to Params |
112 | push @Params,$Options{'t'}; | |
113 | } else { | |
114 | # set to dummy value (always true) | |
115 | $QueryThreshold = 1; | |
2832c235 TH |
116 | } |
117 | ||
118 | # construct WHERE clause | |
6b95accb TH |
119 | # $QueryGroupList is "list of newsgroup" (or 1), |
120 | # $QueryThreshold is threshold definition (or 1), | |
2832c235 TH |
121 | # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) |
122 | # according to setting of -s | |
6b95accb | 123 | my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'})); |
2832c235 | 124 | |
404c1acd | 125 | # get length of longest newsgroup delivered by query for formatting purposes |
2832c235 | 126 | # FIXME |
6b95accb | 127 | my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params)); |
2832c235 TH |
128 | |
129 | my ($OrderClause,$DBQuery); | |
130 | # -b (best of / top list) defined? | |
131 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
132 | # default: neither -b nor -l | |
133 | # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" | |
134 | # according to -q and -d | |
135 | $OrderClause = 'newsgroup'; | |
136 | $OrderClause = 'postings' if $Options{'q'}; | |
137 | $OrderClause .= ' DESC' if $Options{'d'}; | |
138 | # prepare query: get number of postings per group from groups table for given months and newsgroups | |
139 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
140 | } elsif ($Options{'b'}) { | |
141 | # -b is set (then -l can't be!) | |
404c1acd | 142 | # set sorting order (-i): top or flop list? |
2832c235 TH |
143 | if ($Options{'i'}) { |
144 | $OrderClause = 'postings'; | |
145 | } else { | |
146 | $OrderClause = 'postings DESC'; | |
147 | }; | |
d8695b1c TH |
148 | # set -b to 10 if < 1 (Top 10) |
149 | $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1; | |
6b95accb TH |
150 | # push LIMIT to Params |
151 | push @Params,$Options{'b'}; | |
2832c235 TH |
152 | # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT |
153 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
154 | } else { | |
155 | # -l must be set now, as all other cases have been taken care of | |
404c1acd | 156 | # which kind of level (-i): more than -l x or less than -l x? |
6b95accb | 157 | my ($Level); |
2832c235 | 158 | if ($Options{'i'}) { |
6b95accb | 159 | $Level = '<'; |
2832c235 | 160 | } else { |
6b95accb | 161 | $Level = '>'; |
2832c235 | 162 | }; |
b802bc3d TH |
163 | # prepare and execute query: get list of newsgroups meeting level condition |
164 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level)); | |
165 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList,$Options{'l'}) | |
166 | or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); | |
167 | # add newsgroups to a comma-seperated list ready for IN(...) query | |
168 | my $GroupList; | |
169 | while (my ($Newsgroup) = $DBQuery->fetchrow_array) { | |
170 | $GroupList .= ',' if (defined($GroupList) and $GroupList ne ''); | |
171 | $GroupList .= "'$Newsgroup'"; | |
172 | }; | |
173 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (%s) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$GroupList,$WhereClause)); | |
2832c235 TH |
174 | }; |
175 | ||
176 | # execute query | |
6b95accb | 177 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) |
2832c235 TH |
178 | or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); |
179 | ||
180 | # output results | |
181 | # print caption (-c) with time period if -m or -p is set | |
182 | # FIXME - month or period should handled differently | |
183 | printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); | |
184 | # print caption (-c) with newsgroup list if -n is set | |
185 | printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; | |
186 | # print caption (-c) with threshold if -t is set, taking -i in account | |
187 | printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; | |
188 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
189 | # default: neither -b nor -l | |
190 | &OutputData($Options{'o'},$DBQuery,$MaxLength); | |
191 | } elsif ($Options{'b'}) { | |
192 | # -b is set (then -l can't be!) | |
193 | # we have to read in the query results ourselves, as they do not have standard layout | |
194 | while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
195 | # we just assign "top x" or "bottom x" instead of a month for the caption | |
196 | # FIXME | |
197 | print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); | |
198 | }; | |
199 | } else { | |
200 | # -l must be set now, as all other cases have been taken care of | |
201 | # we have to read in the query results ourselves, as they do not have standard layout | |
202 | while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
203 | # we just switch $Newsgroups and $Month for output generation | |
204 | # FIXME | |
205 | print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); | |
206 | }; | |
207 | }; | |
208 | ||
209 | ### close handles | |
210 | $DBHandle->disconnect; | |
211 | ||
212 | __END__ | |
213 | ||
214 | ################################ Documentation ################################# | |
215 | ||
216 | =head1 NAME | |
217 | ||
218 | groupstats - create reports on newsgroup usage | |
219 | ||
220 | =head1 SYNOPSIS | |
221 | ||
222 | B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>] | |
223 | ||
224 | =head1 REQUIREMENTS | |
225 | ||
226 | See doc/README: Perl 5.8.x itself and the following modules from CPAN: | |
227 | ||
228 | =over 2 | |
229 | ||
230 | =item - | |
231 | ||
232 | Config::Auto | |
233 | ||
234 | =item - | |
235 | ||
236 | DBI | |
237 | ||
238 | =back | |
239 | ||
240 | =head1 DESCRIPTION | |
241 | ||
242 | This script create reports on newsgroup usage (number of postings per | |
243 | group per month) taken from result tables created by | |
244 | F<gatherstats.pl>. | |
245 | ||
246 | The time period to act on defaults to last month; you can assign | |
247 | another month via the B<-m> switch or a time period via the B<-p> | |
248 | switch; the latter takes preference. | |
249 | ||
250 | B<groupstats> will process all newsgroups by default; you can limit | |
251 | that to only some newsgroups by supplying a list of those groups via | |
252 | B<-n> (see below). You can include hierarchy levels in the output by | |
253 | adding the B<-s> switch (see below). | |
254 | ||
255 | Furthermore you can set a threshold via B<-t> so that only newsgroups | |
256 | with more postings per month will be included in the report. You can | |
257 | invert that by the B<-i> switch so only newsgroups with less than | |
258 | I<threshold> postings per month will be included. | |
259 | ||
260 | You can sort the output by number of postings per month instead of the | |
261 | default (alphabetical list of newsgroups) by using B<-q>; you can | |
262 | reverse the sorting order (from highest to lowest or in reversed | |
263 | alphabetical order) by using B<-d>. | |
264 | ||
265 | Furthermore, you can create a list of newsgroups that had consistently | |
266 | more (or less) than x postings per month during the whole report | |
267 | period by using B<-l> (together with B<i> as needed). | |
268 | ||
269 | Last but not least you can create a "best of" list of the top x | |
270 | newsgroups via B<-b> (or a "worst of" list by adding B<i>). | |
271 | ||
272 | By default, B<groupstats> will dump a very simple alphabetical list of | |
273 | newsgroups, one per line, followed by the number of postings in that | |
274 | month. This output format of course cannot sensibly be combined with | |
275 | time periods, so you can set the output format by using B<-o> (see | |
276 | below). Captions can be added by setting the B<-c> switch. | |
277 | ||
278 | =head2 Configuration | |
279 | ||
280 | F<groupstats.pl> will read its configuration from F<newsstats.conf> | |
281 | which should be present in the same directory via Config::Auto. | |
282 | ||
283 | See doc/INSTALL for an overview of possible configuration options. | |
284 | ||
285 | You can override configuration options via the B<-g> switch. | |
286 | ||
287 | =head1 OPTIONS | |
288 | ||
289 | =over 3 | |
290 | ||
291 | =item B<-V> (version) | |
292 | ||
293 | Print out version and copyright information on B<yapfaq> and exit. | |
294 | ||
295 | =item B<-h> (help) | |
296 | ||
297 | Print this man page and exit. | |
298 | ||
299 | =item B<-m> I<YYYY-MM> (month) | |
300 | ||
301 | Set processing period to a month in YYYY-MM format. Ignored if B<-p> | |
302 | is set. | |
303 | ||
304 | =item B<-p> I<YYYY-MM:YYYY-MM> (period) | |
305 | ||
306 | Set processing period to a time period between two month, each in | |
307 | YYYY-MM format, separated by a colon. Overrides B<-m>. | |
308 | ||
309 | =item B<-n> I<newsgroup(s)> (newsgroups) | |
310 | ||
311 | Limit processing to a certain set of newsgroups. I<newsgroup(s)> can | |
312 | be a single newsgroup name (de.alt.test), a newsgroup hierarchy | |
313 | (de.alt.*) or a list of either of these, separated by colons, for | |
314 | example | |
315 | ||
316 | de.test:de.alt.test:de.newusers.* | |
317 | ||
318 | =item B<-t> I<threshold> (threshold) | |
319 | ||
320 | Only include newsgroups with more than I<threshold> postings per | |
321 | month. Can be inverted by the B<-i> switch so that only newsgroups | |
322 | with less than I<threshold> postings will be included. | |
323 | ||
324 | This setting will be ignored if B<-l> or B<-b> is set. | |
325 | ||
326 | =item B<-l> I<level> (level) | |
327 | ||
328 | Only include newsgroups with more than I<level> postings per | |
329 | month, every month during the whole reporting period. Can be inverted | |
330 | by the B<-i> switch so that only newsgroups with less than I<level> | |
331 | postings every single month will be included. Output will be ordered | |
332 | by newsgroup name, followed by month. | |
333 | ||
334 | This setting will be ignored if B<-b> is set. Overrides B<-t> and | |
335 | can't be used together with B<-q> or B<-d>. | |
336 | ||
337 | =item B<-b> I<n> (best of) | |
338 | ||
339 | Create a list of the I<n> newsgroups with the most postings over the | |
340 | whole reporting period. Can be inverted by the B<-i> switch so that a | |
341 | list of the I<n> newsgroups with the least postings over the whole | |
342 | period is generated. Output will be ordered by sum of postings. | |
343 | ||
344 | Overrides B<-t> and B<-l> and can't be used together with B<-q> or | |
345 | B<-d>. Output format is set to I<pretty> (see below). | |
346 | ||
347 | =item B<-i> (invert) | |
348 | ||
349 | Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower | |
350 | threshold or level or generate a "bottom list" instead of a top list. | |
351 | ||
352 | =item B<-s> (sum per hierarchy level) | |
353 | ||
354 | Include "virtual" groups for every hierarchy level in output, for | |
355 | example: | |
356 | ||
357 | de.alt.ALL 10 | |
358 | de.alt.test 5 | |
359 | de.alt.admin 7 | |
360 | ||
361 | See the B<gatherstats> man page for details. | |
362 | ||
363 | =item B<-o> I<output type> (output format) | |
364 | ||
365 | Set output format. Default is I<dump>, consisting of an alphabetical | |
366 | list of newsgroups, each on a new line, followed by the number of | |
367 | postings in that month. This default format can't be used with time | |
368 | periods of more than one month. | |
369 | ||
370 | I<list> format is like I<dump>, but will print the month in front of | |
371 | the newsgroup name. | |
372 | ||
373 | I<dumpgroup> format can only be use with a group list (see B<-n>) of | |
374 | exactly one newsgroup and is like I<dump>, but will output months, | |
375 | followed by the number of postings. | |
376 | ||
377 | If you don't need easily parsable output, you'll mostly use I<pretty> | |
378 | format, which will print a header for each new month and try to align | |
379 | newsgroup names and posting counts. Usage of B<-b> will force this | |
380 | format. | |
381 | ||
382 | =item B<-c> (captions) | |
383 | ||
384 | Add captions to output (reporting period, newsgroups list, threshold). | |
385 | ||
386 | =item B<-q> (quantity of postings) | |
387 | ||
388 | Sort by number of postings instead of by newsgroup names. | |
389 | ||
390 | Cannot be used with B<-l> or B<-b>. | |
391 | ||
392 | =item B<-d> (descending) | |
393 | ||
394 | Change sort order to descending. | |
395 | ||
396 | Cannot be used with B<-l> or B<-b>. | |
397 | ||
398 | =item B<-g> I<table> (postings per group table) | |
399 | ||
400 | Override I<DBTableGrps> from F<newsstats.conf>. | |
401 | ||
402 | =back | |
403 | ||
404 | =head1 INSTALLATION | |
405 | ||
406 | See doc/INSTALL. | |
407 | ||
408 | =head1 EXAMPLES | |
409 | ||
410 | Show number of postings per group for lasth month in I<dump> format: | |
411 | ||
412 | groupstats | |
413 | ||
414 | Show that report for January of 2010 and de.alt.* plus de.test, | |
415 | including display of hierarchy levels: | |
416 | ||
417 | groupstats -m 2010-01 -n de.alt.*:de.test -s | |
418 | ||
419 | Show that report for the year of 2010 in I<pretty> format: | |
420 | ||
421 | groupstats -p 2010-01:2010-12 -o pretty | |
422 | ||
423 | Only show newsgroups with less than 30 postings last month, ordered | |
424 | by number of postings, descending, in I<pretty> format: | |
425 | ||
426 | groupstats -iqdt 30 -o pretty | |
427 | ||
428 | Show top 10 for the first half-year of of 2010 in I<pretty> format: | |
429 | ||
430 | groupstats -p 2010-01:2010-06 -b 10 -o pretty | |
431 | ||
432 | Report all groups that had less than 30 postings every singele month | |
433 | in the year of 2010 (I<pretty> format is forced) | |
434 | ||
435 | groupstats -p 2010-01:2010-12 -il 30 | |
436 | ||
437 | =head1 FILES | |
438 | ||
439 | =over 4 | |
440 | ||
441 | =item F<groupstats.pl> | |
442 | ||
443 | The script itself. | |
444 | ||
445 | =item F<NewsStats.pm> | |
446 | ||
447 | Library functions for the NewsStats package. | |
448 | ||
449 | =item F<newsstats.conf> | |
450 | ||
451 | Runtime configuration file for B<yapfaq>. | |
452 | ||
453 | =back | |
454 | ||
455 | =head1 BUGS | |
456 | ||
457 | Please report any bugs or feature requests to the author or use the | |
458 | bug tracker at L<http://bugs.th-h.de/>! | |
459 | ||
460 | =head1 SEE ALSO | |
461 | ||
462 | =over 2 | |
463 | ||
464 | =item - | |
465 | ||
466 | doc/README | |
467 | ||
468 | =item - | |
469 | ||
470 | doc/INSTALL | |
471 | ||
472 | =item - | |
473 | ||
474 | gatherstats -h | |
475 | ||
476 | =back | |
477 | ||
478 | This script is part of the B<NewsStats> package. | |
479 | ||
480 | =head1 AUTHOR | |
481 | ||
482 | Thomas Hochstein <thh@inter.net> | |
483 | ||
484 | =head1 COPYRIGHT AND LICENSE | |
485 | ||
486 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
487 | ||
488 | This program is free software; you may redistribute it and/or modify it | |
489 | under the same terms as Perl itself. | |
490 | ||
491 | =cut |