Commit | Line | Data |
---|---|---|
2832c235 TH |
1 | #! /usr/bin/perl -W |
2 | # | |
3 | # groupstats.pl | |
4 | # | |
5 | # This script will get statistical data on newgroup usage | |
6 | # form a database. | |
7 | # | |
8 | # It is part of the NewsStats package. | |
9 | # | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
11 | # | |
12 | # It can be redistributed and/or modified under the same terms under | |
13 | # which Perl itself is published. | |
14 | ||
15 | BEGIN { | |
16 | our $VERSION = "0.01"; | |
17 | use File::Basename; | |
18 | push(@INC, dirname($0)); | |
19 | } | |
20 | use strict; | |
21 | ||
22 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); | |
23 | ||
24 | use DBI; | |
25 | ||
26 | ################################# Main program ################################# | |
27 | ||
28 | ### read commandline options | |
29 | my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); | |
30 | ||
31 | ### read configuration | |
32 | my %Conf = %{ReadConfig('newsstats.conf')}; | |
33 | ||
34 | ### override configuration via commandline options | |
35 | my %ConfOverride; | |
36 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; | |
37 | &OverrideConfig(\%Conf,\%ConfOverride); | |
38 | ||
39 | ### check for incompatible command line options | |
40 | # you can't mix '-t', '-b' and '-l' | |
41 | # -b/-l take preference over -t, and -b takes preference over -l | |
42 | if ($Options{'b'} or $Options{'l'}) { | |
43 | if ($Options{'t'}) { | |
44 | # drop -t | |
45 | warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); | |
46 | undef($Options{'t'}); | |
47 | }; | |
48 | if ($Options{'b'} and $Options{'l'}) { | |
49 | # drop -l | |
50 | warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); | |
51 | undef($Options{'l'}); | |
52 | }; | |
53 | # -q/-d don't work with -b or -l | |
54 | warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; | |
55 | warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; | |
56 | }; | |
57 | ||
58 | ### check output type | |
59 | # default output type to 'dump' | |
60 | $Options{'o'} = 'dump' if !$Options{'o'}; | |
61 | # fail if more than one newsgroup is combined with 'dumpgroup' type | |
62 | die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); | |
63 | # accept 'dumpgroup' only with -n | |
64 | if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { | |
65 | $Options{'o'} = 'dump'; | |
66 | warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); | |
67 | }; | |
68 | # set output type to 'pretty' for -l | |
69 | if ($Options{'l'}) { | |
70 | $Options{'o'} = 'pretty'; | |
71 | warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); | |
72 | }; | |
73 | ||
74 | ### get time period | |
75 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); | |
76 | # reset to one month for 'dump' output type | |
77 | if ($Options{'o'} eq 'dump' and $Options{'p'}) { | |
78 | $StartMonth = $EndMonth; | |
79 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); | |
80 | }; | |
81 | ||
82 | ### init database | |
83 | my $DBHandle = InitDB(\%Conf,1); | |
84 | ||
85 | ### create report | |
86 | # get list of newsgroups (-n) | |
87 | my ($QueryPart,@GroupList); | |
88 | my $Newsgroups = $Options{'n'}; | |
89 | if ($Newsgroups) { | |
90 | # explode list of newsgroups for WHERE clause | |
91 | ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); | |
92 | } else { | |
93 | # set to dummy value (always true) | |
94 | $QueryPart = 1; | |
95 | }; | |
96 | ||
97 | # manage thresholds | |
98 | if (defined($Options{'t'})) { | |
99 | if ($Options{'i'}) { | |
100 | # -i: list groups below threshold | |
101 | $QueryPart .= ' AND postings < ?'; | |
102 | } else { | |
103 | # default: list groups above threshold | |
104 | $QueryPart .= ' AND postings > ?'; | |
105 | }; | |
106 | # push threshold to GroupList to match number of binding vars for DBQuery->execute | |
107 | push @GroupList,$Options{'t'}; | |
108 | } | |
109 | ||
110 | # construct WHERE clause | |
111 | # $QueryPart is "list of newsgroup" (or 1), | |
112 | # &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) | |
113 | # according to setting of -s | |
114 | my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); | |
115 | ||
116 | # get lenght of longest newsgroup delivered by query for formatting purposes | |
117 | # FIXME | |
118 | my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); | |
119 | ||
120 | my ($OrderClause,$DBQuery); | |
121 | # -b (best of / top list) defined? | |
122 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
123 | # default: neither -b nor -l | |
124 | # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" | |
125 | # according to -q and -d | |
126 | $OrderClause = 'newsgroup'; | |
127 | $OrderClause = 'postings' if $Options{'q'}; | |
128 | $OrderClause .= ' DESC' if $Options{'d'}; | |
129 | # prepare query: get number of postings per group from groups table for given months and newsgroups | |
130 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
131 | } elsif ($Options{'b'}) { | |
132 | # -b is set (then -l can't be!) | |
133 | # set sorting order (-i) | |
134 | if ($Options{'i'}) { | |
135 | $OrderClause = 'postings'; | |
136 | } else { | |
137 | $OrderClause = 'postings DESC'; | |
138 | }; | |
139 | # push LIMIT to GroupList to match number of binding vars for DBQuery->execute | |
140 | push @GroupList,$Options{'b'}; | |
141 | # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT | |
142 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); | |
143 | } else { | |
144 | # -l must be set now, as all other cases have been taken care of | |
145 | # set sorting order (-i) | |
146 | if ($Options{'i'}) { | |
147 | $OrderClause = '<'; | |
148 | } else { | |
149 | $OrderClause = '>'; | |
150 | }; | |
151 | # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute | |
152 | # FIXME -- together with the query (see below) | |
153 | push @GroupList,$Options{'l'}; | |
154 | push @GroupList,$StartMonth,$EndMonth; | |
155 | # prepare query: get number of postings per group from groups table for given months and | |
156 | # FIXME -- this query is ... in dire need of impromevent | |
157 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); | |
158 | }; | |
159 | ||
160 | # execute query | |
161 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList) | |
162 | or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); | |
163 | ||
164 | # output results | |
165 | # print caption (-c) with time period if -m or -p is set | |
166 | # FIXME - month or period should handled differently | |
167 | printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); | |
168 | # print caption (-c) with newsgroup list if -n is set | |
169 | printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; | |
170 | # print caption (-c) with threshold if -t is set, taking -i in account | |
171 | printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; | |
172 | if (!defined($Options{'b'}) and !defined($Options{'l'})) { | |
173 | # default: neither -b nor -l | |
174 | &OutputData($Options{'o'},$DBQuery,$MaxLength); | |
175 | } elsif ($Options{'b'}) { | |
176 | # -b is set (then -l can't be!) | |
177 | # we have to read in the query results ourselves, as they do not have standard layout | |
178 | while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
179 | # we just assign "top x" or "bottom x" instead of a month for the caption | |
180 | # FIXME | |
181 | print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); | |
182 | }; | |
183 | } else { | |
184 | # -l must be set now, as all other cases have been taken care of | |
185 | # we have to read in the query results ourselves, as they do not have standard layout | |
186 | while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { | |
187 | # we just switch $Newsgroups and $Month for output generation | |
188 | # FIXME | |
189 | print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); | |
190 | }; | |
191 | }; | |
192 | ||
193 | ### close handles | |
194 | $DBHandle->disconnect; | |
195 | ||
196 | __END__ | |
197 | ||
198 | ################################ Documentation ################################# | |
199 | ||
200 | =head1 NAME | |
201 | ||
202 | groupstats - create reports on newsgroup usage | |
203 | ||
204 | =head1 SYNOPSIS | |
205 | ||
206 | B<groupstats> [B<-Vhiscqd>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-n> I<newsgroup(s)>] [B<-t> I<threshold>] [B<-l> I<level>] [B<-b> I<number>] [B<-o> I<output type>] [B<-g> I<database table>] | |
207 | ||
208 | =head1 REQUIREMENTS | |
209 | ||
210 | See doc/README: Perl 5.8.x itself and the following modules from CPAN: | |
211 | ||
212 | =over 2 | |
213 | ||
214 | =item - | |
215 | ||
216 | Config::Auto | |
217 | ||
218 | =item - | |
219 | ||
220 | DBI | |
221 | ||
222 | =back | |
223 | ||
224 | =head1 DESCRIPTION | |
225 | ||
226 | This script create reports on newsgroup usage (number of postings per | |
227 | group per month) taken from result tables created by | |
228 | F<gatherstats.pl>. | |
229 | ||
230 | The time period to act on defaults to last month; you can assign | |
231 | another month via the B<-m> switch or a time period via the B<-p> | |
232 | switch; the latter takes preference. | |
233 | ||
234 | B<groupstats> will process all newsgroups by default; you can limit | |
235 | that to only some newsgroups by supplying a list of those groups via | |
236 | B<-n> (see below). You can include hierarchy levels in the output by | |
237 | adding the B<-s> switch (see below). | |
238 | ||
239 | Furthermore you can set a threshold via B<-t> so that only newsgroups | |
240 | with more postings per month will be included in the report. You can | |
241 | invert that by the B<-i> switch so only newsgroups with less than | |
242 | I<threshold> postings per month will be included. | |
243 | ||
244 | You can sort the output by number of postings per month instead of the | |
245 | default (alphabetical list of newsgroups) by using B<-q>; you can | |
246 | reverse the sorting order (from highest to lowest or in reversed | |
247 | alphabetical order) by using B<-d>. | |
248 | ||
249 | Furthermore, you can create a list of newsgroups that had consistently | |
250 | more (or less) than x postings per month during the whole report | |
251 | period by using B<-l> (together with B<i> as needed). | |
252 | ||
253 | Last but not least you can create a "best of" list of the top x | |
254 | newsgroups via B<-b> (or a "worst of" list by adding B<i>). | |
255 | ||
256 | By default, B<groupstats> will dump a very simple alphabetical list of | |
257 | newsgroups, one per line, followed by the number of postings in that | |
258 | month. This output format of course cannot sensibly be combined with | |
259 | time periods, so you can set the output format by using B<-o> (see | |
260 | below). Captions can be added by setting the B<-c> switch. | |
261 | ||
262 | =head2 Configuration | |
263 | ||
264 | F<groupstats.pl> will read its configuration from F<newsstats.conf> | |
265 | which should be present in the same directory via Config::Auto. | |
266 | ||
267 | See doc/INSTALL for an overview of possible configuration options. | |
268 | ||
269 | You can override configuration options via the B<-g> switch. | |
270 | ||
271 | =head1 OPTIONS | |
272 | ||
273 | =over 3 | |
274 | ||
275 | =item B<-V> (version) | |
276 | ||
277 | Print out version and copyright information on B<yapfaq> and exit. | |
278 | ||
279 | =item B<-h> (help) | |
280 | ||
281 | Print this man page and exit. | |
282 | ||
283 | =item B<-m> I<YYYY-MM> (month) | |
284 | ||
285 | Set processing period to a month in YYYY-MM format. Ignored if B<-p> | |
286 | is set. | |
287 | ||
288 | =item B<-p> I<YYYY-MM:YYYY-MM> (period) | |
289 | ||
290 | Set processing period to a time period between two month, each in | |
291 | YYYY-MM format, separated by a colon. Overrides B<-m>. | |
292 | ||
293 | =item B<-n> I<newsgroup(s)> (newsgroups) | |
294 | ||
295 | Limit processing to a certain set of newsgroups. I<newsgroup(s)> can | |
296 | be a single newsgroup name (de.alt.test), a newsgroup hierarchy | |
297 | (de.alt.*) or a list of either of these, separated by colons, for | |
298 | example | |
299 | ||
300 | de.test:de.alt.test:de.newusers.* | |
301 | ||
302 | =item B<-t> I<threshold> (threshold) | |
303 | ||
304 | Only include newsgroups with more than I<threshold> postings per | |
305 | month. Can be inverted by the B<-i> switch so that only newsgroups | |
306 | with less than I<threshold> postings will be included. | |
307 | ||
308 | This setting will be ignored if B<-l> or B<-b> is set. | |
309 | ||
310 | =item B<-l> I<level> (level) | |
311 | ||
312 | Only include newsgroups with more than I<level> postings per | |
313 | month, every month during the whole reporting period. Can be inverted | |
314 | by the B<-i> switch so that only newsgroups with less than I<level> | |
315 | postings every single month will be included. Output will be ordered | |
316 | by newsgroup name, followed by month. | |
317 | ||
318 | This setting will be ignored if B<-b> is set. Overrides B<-t> and | |
319 | can't be used together with B<-q> or B<-d>. | |
320 | ||
321 | =item B<-b> I<n> (best of) | |
322 | ||
323 | Create a list of the I<n> newsgroups with the most postings over the | |
324 | whole reporting period. Can be inverted by the B<-i> switch so that a | |
325 | list of the I<n> newsgroups with the least postings over the whole | |
326 | period is generated. Output will be ordered by sum of postings. | |
327 | ||
328 | Overrides B<-t> and B<-l> and can't be used together with B<-q> or | |
329 | B<-d>. Output format is set to I<pretty> (see below). | |
330 | ||
331 | =item B<-i> (invert) | |
332 | ||
333 | Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower | |
334 | threshold or level or generate a "bottom list" instead of a top list. | |
335 | ||
336 | =item B<-s> (sum per hierarchy level) | |
337 | ||
338 | Include "virtual" groups for every hierarchy level in output, for | |
339 | example: | |
340 | ||
341 | de.alt.ALL 10 | |
342 | de.alt.test 5 | |
343 | de.alt.admin 7 | |
344 | ||
345 | See the B<gatherstats> man page for details. | |
346 | ||
347 | =item B<-o> I<output type> (output format) | |
348 | ||
349 | Set output format. Default is I<dump>, consisting of an alphabetical | |
350 | list of newsgroups, each on a new line, followed by the number of | |
351 | postings in that month. This default format can't be used with time | |
352 | periods of more than one month. | |
353 | ||
354 | I<list> format is like I<dump>, but will print the month in front of | |
355 | the newsgroup name. | |
356 | ||
357 | I<dumpgroup> format can only be use with a group list (see B<-n>) of | |
358 | exactly one newsgroup and is like I<dump>, but will output months, | |
359 | followed by the number of postings. | |
360 | ||
361 | If you don't need easily parsable output, you'll mostly use I<pretty> | |
362 | format, which will print a header for each new month and try to align | |
363 | newsgroup names and posting counts. Usage of B<-b> will force this | |
364 | format. | |
365 | ||
366 | =item B<-c> (captions) | |
367 | ||
368 | Add captions to output (reporting period, newsgroups list, threshold). | |
369 | ||
370 | =item B<-q> (quantity of postings) | |
371 | ||
372 | Sort by number of postings instead of by newsgroup names. | |
373 | ||
374 | Cannot be used with B<-l> or B<-b>. | |
375 | ||
376 | =item B<-d> (descending) | |
377 | ||
378 | Change sort order to descending. | |
379 | ||
380 | Cannot be used with B<-l> or B<-b>. | |
381 | ||
382 | =item B<-g> I<table> (postings per group table) | |
383 | ||
384 | Override I<DBTableGrps> from F<newsstats.conf>. | |
385 | ||
386 | =back | |
387 | ||
388 | =head1 INSTALLATION | |
389 | ||
390 | See doc/INSTALL. | |
391 | ||
392 | =head1 EXAMPLES | |
393 | ||
394 | Show number of postings per group for lasth month in I<dump> format: | |
395 | ||
396 | groupstats | |
397 | ||
398 | Show that report for January of 2010 and de.alt.* plus de.test, | |
399 | including display of hierarchy levels: | |
400 | ||
401 | groupstats -m 2010-01 -n de.alt.*:de.test -s | |
402 | ||
403 | Show that report for the year of 2010 in I<pretty> format: | |
404 | ||
405 | groupstats -p 2010-01:2010-12 -o pretty | |
406 | ||
407 | Only show newsgroups with less than 30 postings last month, ordered | |
408 | by number of postings, descending, in I<pretty> format: | |
409 | ||
410 | groupstats -iqdt 30 -o pretty | |
411 | ||
412 | Show top 10 for the first half-year of of 2010 in I<pretty> format: | |
413 | ||
414 | groupstats -p 2010-01:2010-06 -b 10 -o pretty | |
415 | ||
416 | Report all groups that had less than 30 postings every singele month | |
417 | in the year of 2010 (I<pretty> format is forced) | |
418 | ||
419 | groupstats -p 2010-01:2010-12 -il 30 | |
420 | ||
421 | =head1 FILES | |
422 | ||
423 | =over 4 | |
424 | ||
425 | =item F<groupstats.pl> | |
426 | ||
427 | The script itself. | |
428 | ||
429 | =item F<NewsStats.pm> | |
430 | ||
431 | Library functions for the NewsStats package. | |
432 | ||
433 | =item F<newsstats.conf> | |
434 | ||
435 | Runtime configuration file for B<yapfaq>. | |
436 | ||
437 | =back | |
438 | ||
439 | =head1 BUGS | |
440 | ||
441 | Please report any bugs or feature requests to the author or use the | |
442 | bug tracker at L<http://bugs.th-h.de/>! | |
443 | ||
444 | =head1 SEE ALSO | |
445 | ||
446 | =over 2 | |
447 | ||
448 | =item - | |
449 | ||
450 | doc/README | |
451 | ||
452 | =item - | |
453 | ||
454 | doc/INSTALL | |
455 | ||
456 | =item - | |
457 | ||
458 | gatherstats -h | |
459 | ||
460 | =back | |
461 | ||
462 | This script is part of the B<NewsStats> package. | |
463 | ||
464 | =head1 AUTHOR | |
465 | ||
466 | Thomas Hochstein <thh@inter.net> | |
467 | ||
468 | =head1 COPYRIGHT AND LICENSE | |
469 | ||
470 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
471 | ||
472 | This program is free software; you may redistribute it and/or modify it | |
473 | under the same terms as Perl itself. | |
474 | ||
475 | =cut |