Commit | Line | Data |
---|---|---|
2832c235 TH |
1 | #! /usr/bin/perl -W |
2 | # | |
3 | # groupstats.pl | |
4 | # | |
5 | # This script will get statistical data on newgroup usage | |
6 | # form a database. | |
7 | # | |
8 | # It is part of the NewsStats package. | |
9 | # | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
11 | # | |
12 | # It can be redistributed and/or modified under the same terms under | |
13 | # which Perl itself is published. | |
14 | ||
15 | BEGIN { | |
16 | our $VERSION = "0.01"; | |
17 | use File::Basename; | |
18 | push(@INC, dirname($0)); | |
19 | } | |
20 | use strict; | |
21 | ||
22 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); | |
23 | ||
24 | use DBI; | |
25 | ||
26 | ################################# Main program ################################# | |
27 | ||
28 | ### read commandline options | |
29 | my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); | |
30 | ||
31 | ### read configuration | |
32 | my %Conf = %{ReadConfig('newsstats.conf')}; | |
33 | ||
34 | ### override configuration via commandline options | |
35 | my %ConfOverride; | |
36 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; | |
37 | &OverrideConfig(\%Conf,\%ConfOverride); | |
38 | ||
39 | ### check for incompatible command line options | |
40 | # you can't mix '-t', '-b' and '-l' | |
41 | # -b/-l take preference over -t, and -b takes preference over -l | |
42 | if ($Options{'b'} or $Options{'l'}) { | |
43 | if ($Options{'t'}) { | |
44 | # drop -t | |
45 | warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); | |
46 | undef($Options{'t'}); | |
47 | }; | |
48 | if ($Options{'b'} and $Options{'l'}) { | |
49 | # drop -l | |
50 | warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); | |
51 | undef($Options{'l'}); | |
52 | }; | |
53 | # -q/-d don't work with -b or -l | |
54 | warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; | |
55 | warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; | |
56 | }; | |
57 | ||
58 | ### check output type | |
59 | # default output type to 'dump' | |
60 | $Options{'o'} = 'dump' if !$Options{'o'}; | |
61 | # fail if more than one newsgroup is combined with 'dumpgroup' type | |
62 | die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); | |
63 | # accept 'dumpgroup' only with -n | |
64 | if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { | |
65 | $Options{'o'} = 'dump'; | |
66 | warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); | |
67 | }; | |
68 | # set output type to 'pretty' for -l | |
69 | if ($Options{'l'}) { | |
70 | $Options{'o'} = 'pretty'; | |
71 | warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); | |
72 | }; | |
73 | ||
74 | ### get time period | |
75 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); | |
76 | # reset to one month for 'dump' output type | |
77 | if ($Options{'o'} eq 'dump' and $Options{'p'}) { | |
d8695b1c TH |
78 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump', changing output type to '-o pretty'.\n"); |
79 | $Options{'o'} = 'pretty'; | |
2832c235 TH |
80 | }; |
81 | ||
82 | ### init database | |
83 | my $DBHandle = InitDB(\%Conf,1); | |
84 | ||
85 | ### create report | |
86 | # get list of newsgroups (-n) | |
6b95accb | 87 | my ($QueryGroupList,$QueryThreshold,@GroupList,@Params); |
2832c235 TH |
88 | my $Newsgroups = $Options{'n'}; |
89 | if ($Newsgroups) { | |
90 | # explode list of newsgroups for WHERE clause | |
6b95accb | 91 | ($QueryGroupList,@GroupList) = &SQLGroupList($Newsgroups); |
2832c235 TH |
92 | } else { |
93 | # set to dummy value (always true) | |
6b95accb | 94 | $QueryGroupList = 1; |
2832c235 TH |
95 | }; |
96 | ||
97 | # manage thresholds | |
98 | if (defined($Options{'t'})) { | |
99 | if ($Options{'i'}) { | |
100 | # -i: list groups below threshold | |
6b95accb | 101 | $QueryThreshold .= ' postings < ?'; |
2832c235 TH |
102 | } else { |
103 | # default: list groups above threshold | |
6b95accb | 104 | $QueryThreshold .= ' postings > ?'; |
2832c235 | 105 | }; |
6b95accb TH |
106 | # push threshold to Params |
107 | push @Params,$Options{'t'}; | |
108 | } else { | |
109 | # set to dummy value (always true) | |
110 | $QueryThreshold = 1; | |
2832c235 TH |
111 | } |
112 | ||
113 | # construct WHERE clause | |
6b95accb TH |
114 | # $QueryGroupList is "list of newsgroup" (or 1), |
115 | # $QueryThreshold is threshold definition (or 1), | |
2832c235 TH |
116 | # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) |
117 | # according to setting of -s | |
6b95accb | 118 | my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s AND %s %s',$QueryGroupList,$QueryThreshold,&SQLHierarchies($Options{'s'})); |
2832c235 TH |
119 | |
120 | # get lenght of longest newsgroup delivered by query for formatting purposes | |
121 | # FIXME | |
6b95accb | 122 | my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,(@GroupList,@Params)); |
2832c235 TH |
123 | |
124 | my ($OrderClause,$DBQuery); | |
125 | # -b (best of / top list) defined? | |
126 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
127 | # default: neither -b nor -l | |
128 | # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" | |
129 | # according to -q and -d | |
130 | $OrderClause = 'newsgroup'; | |
131 | $OrderClause = 'postings' if $Options{'q'}; | |
132 | $OrderClause .= ' DESC' if $Options{'d'}; | |
133 | # prepare query: get number of postings per group from groups table for given months and newsgroups | |
134 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
135 | } elsif ($Options{'b'}) { | |
136 | # -b is set (then -l can't be!) | |
137 | # set sorting order (-i) | |
138 | if ($Options{'i'}) { | |
139 | $OrderClause = 'postings'; | |
140 | } else { | |
141 | $OrderClause = 'postings DESC'; | |
142 | }; | |
d8695b1c TH |
143 | # set -b to 10 if < 1 (Top 10) |
144 | $Options{'b'} = 10 if $Options{'b'} !~ /^\d*$/ or $Options{'b'} < 1; | |
6b95accb TH |
145 | # push LIMIT to Params |
146 | push @Params,$Options{'b'}; | |
2832c235 TH |
147 | # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT |
148 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
149 | } else { | |
150 | # -l must be set now, as all other cases have been taken care of | |
151 | # set sorting order (-i) | |
6b95accb | 152 | my ($Level); |
2832c235 | 153 | if ($Options{'i'}) { |
6b95accb | 154 | $Level = '<'; |
2832c235 | 155 | } else { |
6b95accb | 156 | $Level = '>'; |
2832c235 | 157 | }; |
6b95accb | 158 | # push level and $StartMonth,$EndMonth - again - to Params |
2832c235 | 159 | # FIXME -- together with the query (see below) |
6b95accb TH |
160 | push @Params,$Options{'l'}; |
161 | push @Params,$StartMonth,$EndMonth; | |
2832c235 TH |
162 | # prepare query: get number of postings per group from groups table for given months and |
163 | # FIXME -- this query is ... in dire need of impromevent | |
6b95accb | 164 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$Level,$WhereClause)); |
2832c235 TH |
165 | }; |
166 | ||
167 | # execute query | |
6b95accb | 168 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList,@Params) |
2832c235 TH |
169 | or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); |
170 | ||
171 | # output results | |
172 | # print caption (-c) with time period if -m or -p is set | |
173 | # FIXME - month or period should handled differently | |
174 | printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); | |
175 | # print caption (-c) with newsgroup list if -n is set | |
176 | printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; | |
177 | # print caption (-c) with threshold if -t is set, taking -i in account | |
178 | printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; | |
179 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
180 | # default: neither -b nor -l | |
181 | &OutputData($Options{'o'},$DBQuery,$MaxLength); | |
182 | } elsif ($Options{'b'}) { | |
183 | # -b is set (then -l can't be!) | |
184 | # we have to read in the query results ourselves, as they do not have standard layout | |
185 | while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
186 | # we just assign "top x" or "bottom x" instead of a month for the caption | |
187 | # FIXME | |
188 | print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); | |
189 | }; | |
190 | } else { | |
191 | # -l must be set now, as all other cases have been taken care of | |
192 | # we have to read in the query results ourselves, as they do not have standard layout | |
193 | while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
194 | # we just switch $Newsgroups and $Month for output generation | |
195 | # FIXME | |
196 | print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); | |
197 | }; | |
198 | }; | |
199 | ||
200 | ### close handles | |
201 | $DBHandle->disconnect; | |
202 | ||
203 | __END__ | |
204 | ||
205 | ################################ Documentation ################################# | |
206 | ||
207 | =head1 NAME | |
208 | ||
209 | groupstats - create reports on newsgroup usage | |
210 | ||
211 | =head1 SYNOPSIS | |
212 | ||
213 | B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>] | |
214 | ||
215 | =head1 REQUIREMENTS | |
216 | ||
217 | See doc/README: Perl 5.8.x itself and the following modules from CPAN: | |
218 | ||
219 | =over 2 | |
220 | ||
221 | =item - | |
222 | ||
223 | Config::Auto | |
224 | ||
225 | =item - | |
226 | ||
227 | DBI | |
228 | ||
229 | =back | |
230 | ||
231 | =head1 DESCRIPTION | |
232 | ||
233 | This script create reports on newsgroup usage (number of postings per | |
234 | group per month) taken from result tables created by | |
235 | F<gatherstats.pl>. | |
236 | ||
237 | The time period to act on defaults to last month; you can assign | |
238 | another month via the B<-m> switch or a time period via the B<-p> | |
239 | switch; the latter takes preference. | |
240 | ||
241 | B<groupstats> will process all newsgroups by default; you can limit | |
242 | that to only some newsgroups by supplying a list of those groups via | |
243 | B<-n> (see below). You can include hierarchy levels in the output by | |
244 | adding the B<-s> switch (see below). | |
245 | ||
246 | Furthermore you can set a threshold via B<-t> so that only newsgroups | |
247 | with more postings per month will be included in the report. You can | |
248 | invert that by the B<-i> switch so only newsgroups with less than | |
249 | I<threshold> postings per month will be included. | |
250 | ||
251 | You can sort the output by number of postings per month instead of the | |
252 | default (alphabetical list of newsgroups) by using B<-q>; you can | |
253 | reverse the sorting order (from highest to lowest or in reversed | |
254 | alphabetical order) by using B<-d>. | |
255 | ||
256 | Furthermore, you can create a list of newsgroups that had consistently | |
257 | more (or less) than x postings per month during the whole report | |
258 | period by using B<-l> (together with B<i> as needed). | |
259 | ||
260 | Last but not least you can create a "best of" list of the top x | |
261 | newsgroups via B<-b> (or a "worst of" list by adding B<i>). | |
262 | ||
263 | By default, B<groupstats> will dump a very simple alphabetical list of | |
264 | newsgroups, one per line, followed by the number of postings in that | |
265 | month. This output format of course cannot sensibly be combined with | |
266 | time periods, so you can set the output format by using B<-o> (see | |
267 | below). Captions can be added by setting the B<-c> switch. | |
268 | ||
269 | =head2 Configuration | |
270 | ||
271 | F<groupstats.pl> will read its configuration from F<newsstats.conf> | |
272 | which should be present in the same directory via Config::Auto. | |
273 | ||
274 | See doc/INSTALL for an overview of possible configuration options. | |
275 | ||
276 | You can override configuration options via the B<-g> switch. | |
277 | ||
278 | =head1 OPTIONS | |
279 | ||
280 | =over 3 | |
281 | ||
282 | =item B<-V> (version) | |
283 | ||
284 | Print out version and copyright information on B<yapfaq> and exit. | |
285 | ||
286 | =item B<-h> (help) | |
287 | ||
288 | Print this man page and exit. | |
289 | ||
290 | =item B<-m> I<YYYY-MM> (month) | |
291 | ||
292 | Set processing period to a month in YYYY-MM format. Ignored if B<-p> | |
293 | is set. | |
294 | ||
295 | =item B<-p> I<YYYY-MM:YYYY-MM> (period) | |
296 | ||
297 | Set processing period to a time period between two month, each in | |
298 | YYYY-MM format, separated by a colon. Overrides B<-m>. | |
299 | ||
300 | =item B<-n> I<newsgroup(s)> (newsgroups) | |
301 | ||
302 | Limit processing to a certain set of newsgroups. I<newsgroup(s)> can | |
303 | be a single newsgroup name (de.alt.test), a newsgroup hierarchy | |
304 | (de.alt.*) or a list of either of these, separated by colons, for | |
305 | example | |
306 | ||
307 | de.test:de.alt.test:de.newusers.* | |
308 | ||
309 | =item B<-t> I<threshold> (threshold) | |
310 | ||
311 | Only include newsgroups with more than I<threshold> postings per | |
312 | month. Can be inverted by the B<-i> switch so that only newsgroups | |
313 | with less than I<threshold> postings will be included. | |
314 | ||
315 | This setting will be ignored if B<-l> or B<-b> is set. | |
316 | ||
317 | =item B<-l> I<level> (level) | |
318 | ||
319 | Only include newsgroups with more than I<level> postings per | |
320 | month, every month during the whole reporting period. Can be inverted | |
321 | by the B<-i> switch so that only newsgroups with less than I<level> | |
322 | postings every single month will be included. Output will be ordered | |
323 | by newsgroup name, followed by month. | |
324 | ||
325 | This setting will be ignored if B<-b> is set. Overrides B<-t> and | |
326 | can't be used together with B<-q> or B<-d>. | |
327 | ||
328 | =item B<-b> I<n> (best of) | |
329 | ||
330 | Create a list of the I<n> newsgroups with the most postings over the | |
331 | whole reporting period. Can be inverted by the B<-i> switch so that a | |
332 | list of the I<n> newsgroups with the least postings over the whole | |
333 | period is generated. Output will be ordered by sum of postings. | |
334 | ||
335 | Overrides B<-t> and B<-l> and can't be used together with B<-q> or | |
336 | B<-d>. Output format is set to I<pretty> (see below). | |
337 | ||
338 | =item B<-i> (invert) | |
339 | ||
340 | Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower | |
341 | threshold or level or generate a "bottom list" instead of a top list. | |
342 | ||
343 | =item B<-s> (sum per hierarchy level) | |
344 | ||
345 | Include "virtual" groups for every hierarchy level in output, for | |
346 | example: | |
347 | ||
348 | de.alt.ALL 10 | |
349 | de.alt.test 5 | |
350 | de.alt.admin 7 | |
351 | ||
352 | See the B<gatherstats> man page for details. | |
353 | ||
354 | =item B<-o> I<output type> (output format) | |
355 | ||
356 | Set output format. Default is I<dump>, consisting of an alphabetical | |
357 | list of newsgroups, each on a new line, followed by the number of | |
358 | postings in that month. This default format can't be used with time | |
359 | periods of more than one month. | |
360 | ||
361 | I<list> format is like I<dump>, but will print the month in front of | |
362 | the newsgroup name. | |
363 | ||
364 | I<dumpgroup> format can only be use with a group list (see B<-n>) of | |
365 | exactly one newsgroup and is like I<dump>, but will output months, | |
366 | followed by the number of postings. | |
367 | ||
368 | If you don't need easily parsable output, you'll mostly use I<pretty> | |
369 | format, which will print a header for each new month and try to align | |
370 | newsgroup names and posting counts. Usage of B<-b> will force this | |
371 | format. | |
372 | ||
373 | =item B<-c> (captions) | |
374 | ||
375 | Add captions to output (reporting period, newsgroups list, threshold). | |
376 | ||
377 | =item B<-q> (quantity of postings) | |
378 | ||
379 | Sort by number of postings instead of by newsgroup names. | |
380 | ||
381 | Cannot be used with B<-l> or B<-b>. | |
382 | ||
383 | =item B<-d> (descending) | |
384 | ||
385 | Change sort order to descending. | |
386 | ||
387 | Cannot be used with B<-l> or B<-b>. | |
388 | ||
389 | =item B<-g> I<table> (postings per group table) | |
390 | ||
391 | Override I<DBTableGrps> from F<newsstats.conf>. | |
392 | ||
393 | =back | |
394 | ||
395 | =head1 INSTALLATION | |
396 | ||
397 | See doc/INSTALL. | |
398 | ||
399 | =head1 EXAMPLES | |
400 | ||
401 | Show number of postings per group for lasth month in I<dump> format: | |
402 | ||
403 | groupstats | |
404 | ||
405 | Show that report for January of 2010 and de.alt.* plus de.test, | |
406 | including display of hierarchy levels: | |
407 | ||
408 | groupstats -m 2010-01 -n de.alt.*:de.test -s | |
409 | ||
410 | Show that report for the year of 2010 in I<pretty> format: | |
411 | ||
412 | groupstats -p 2010-01:2010-12 -o pretty | |
413 | ||
414 | Only show newsgroups with less than 30 postings last month, ordered | |
415 | by number of postings, descending, in I<pretty> format: | |
416 | ||
417 | groupstats -iqdt 30 -o pretty | |
418 | ||
419 | Show top 10 for the first half-year of of 2010 in I<pretty> format: | |
420 | ||
421 | groupstats -p 2010-01:2010-06 -b 10 -o pretty | |
422 | ||
423 | Report all groups that had less than 30 postings every singele month | |
424 | in the year of 2010 (I<pretty> format is forced) | |
425 | ||
426 | groupstats -p 2010-01:2010-12 -il 30 | |
427 | ||
428 | =head1 FILES | |
429 | ||
430 | =over 4 | |
431 | ||
432 | =item F<groupstats.pl> | |
433 | ||
434 | The script itself. | |
435 | ||
436 | =item F<NewsStats.pm> | |
437 | ||
438 | Library functions for the NewsStats package. | |
439 | ||
440 | =item F<newsstats.conf> | |
441 | ||
442 | Runtime configuration file for B<yapfaq>. | |
443 | ||
444 | =back | |
445 | ||
446 | =head1 BUGS | |
447 | ||
448 | Please report any bugs or feature requests to the author or use the | |
449 | bug tracker at L<http://bugs.th-h.de/>! | |
450 | ||
451 | =head1 SEE ALSO | |
452 | ||
453 | =over 2 | |
454 | ||
455 | =item - | |
456 | ||
457 | doc/README | |
458 | ||
459 | =item - | |
460 | ||
461 | doc/INSTALL | |
462 | ||
463 | =item - | |
464 | ||
465 | gatherstats -h | |
466 | ||
467 | =back | |
468 | ||
469 | This script is part of the B<NewsStats> package. | |
470 | ||
471 | =head1 AUTHOR | |
472 | ||
473 | Thomas Hochstein <thh@inter.net> | |
474 | ||
475 | =head1 COPYRIGHT AND LICENSE | |
476 | ||
477 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
478 | ||
479 | This program is free software; you may redistribute it and/or modify it | |
480 | under the same terms as Perl itself. | |
481 | ||
482 | =cut |