Commit | Line | Data |
---|---|---|
3f817eb4 | 1 | #! /usr/bin/perl |
2832c235 TH |
2 | # |
3 | # groupstats.pl | |
4 | # | |
5 | # This script will get statistical data on newgroup usage | |
d3b6810d | 6 | # from a database. |
dfc2b81c | 7 | # |
2832c235 TH |
8 | # It is part of the NewsStats package. |
9 | # | |
07c0b258 | 10 | # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
2832c235 | 11 | # |
dfc2b81c | 12 | # It can be redistributed and/or modified under the same terms under |
2832c235 TH |
13 | # which Perl itself is published. |
14 | ||
15 | BEGIN { | |
24d2011f | 16 | our $VERSION = "0.02"; |
2832c235 | 17 | use File::Basename; |
2ad99c20 TH |
18 | # we're in .../bin, so our module is in ../lib |
19 | push(@INC, dirname($0).'/../lib'); | |
2832c235 TH |
20 | } |
21 | use strict; | |
3f817eb4 | 22 | use warnings; |
2832c235 | 23 | |
edd250f2 | 24 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); |
2832c235 TH |
25 | |
26 | use DBI; | |
edd250f2 TH |
27 | use Getopt::Long qw(GetOptions); |
28 | Getopt::Long::config ('bundling'); | |
2832c235 TH |
29 | |
30 | ################################# Main program ################################# | |
31 | ||
32 | ### read commandline options | |
edd250f2 TH |
33 | my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, |
34 | $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, | |
23ab67a0 | 35 | $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound,$OptConfFile); |
edd250f2 TH |
36 | GetOptions ('b|boundary=s' => \$OptBoundType, |
37 | 'c|captions!' => \$OptCaptions, | |
38 | 'checkgroups=s' => \$OptCheckgroupsFile, | |
39 | 'comments!' => \$OptComments, | |
40 | 'filetemplate=s' => \$OptFileTemplate, | |
41 | 'f|format=s' => \$OptFormat, | |
42 | 'g|group-by=s' => \$OptGroupBy, | |
43 | 'groupsdb=s' => \$OptGroupsDB, | |
44 | 'l|lower=i' => \$LowBound, | |
45 | 'm|month=s' => \$OptMonth, | |
46 | 'n|newsgroups=s' => \$OptNewsgroups, | |
47 | 'o|order-by=s' => \$OptOrderBy, | |
48 | 'r|report=s' => \$OptReportType, | |
49 | 's|sums!' => \$OptSums, | |
50 | 'u|upper=i' => \$UppBound, | |
23ab67a0 | 51 | 'conffile=s' => \$OptConfFile, |
edd250f2 TH |
52 | 'h|help' => \&ShowPOD, |
53 | 'V|version' => \&ShowVersion) or exit 1; | |
54 | # parse parameters | |
55 | # $OptComments defaults to TRUE | |
56 | $OptComments = 1 if (!defined($OptComments)); | |
57 | # force --nocomments when --filetemplate is used | |
58 | $OptComments = 0 if ($OptFileTemplate); | |
59 | # parse $OptBoundType | |
60 | if ($OptBoundType) { | |
61 | if ($OptBoundType =~ /level/i) { | |
62 | $OptBoundType = 'level'; | |
63 | } elsif ($OptBoundType =~ /av(era)?ge?/i) { | |
64 | $OptBoundType = 'average'; | |
65 | } elsif ($OptBoundType =~ /sums?/i) { | |
66 | $OptBoundType = 'sum'; | |
67 | } else { | |
68 | $OptBoundType = 'default'; | |
69 | } | |
70 | } | |
71 | # parse $OptReportType | |
72 | if ($OptReportType) { | |
73 | if ($OptReportType =~ /av(era)?ge?/i) { | |
74 | $OptReportType = 'average'; | |
75 | } elsif ($OptReportType =~ /sums?/i) { | |
76 | $OptReportType = 'sum'; | |
77 | } else { | |
78 | $OptReportType = 'default'; | |
79 | } | |
80 | } | |
17ef4408 TH |
81 | # honor $OptCheckgroupsFile, |
82 | # warn for $OptSums if set concurrently | |
83 | my $ValidGroups; | |
84 | if ($OptCheckgroupsFile) { | |
85 | # read list of newsgroups from --checkgroups | |
86 | # into a hash reference | |
87 | $ValidGroups = &ReadGroupList($OptCheckgroupsFile); | |
88 | &Bleat(1,"--sums option can't possibly work with --checkgroups option set") | |
89 | if $OptSums; | |
90 | } | |
2832c235 TH |
91 | |
92 | ### read configuration | |
23ab67a0 | 93 | my %Conf = %{ReadConfig($OptConfFile)}; |
2832c235 TH |
94 | |
95 | ### override configuration via commandline options | |
96 | my %ConfOverride; | |
edd250f2 | 97 | $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; |
2832c235 TH |
98 | &OverrideConfig(\%Conf,\%ConfOverride); |
99 | ||
f2ddfd8a TH |
100 | ### init database |
101 | my $DBHandle = InitDB(\%Conf,1); | |
102 | ||
edd250f2 TH |
103 | ### get time period and newsgroups, prepare SQL 'WHERE' clause |
104 | # get time period | |
105 | # and set caption for output and expression for SQL 'WHERE' clause | |
106 | my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); | |
107 | # bail out if --month is invalid | |
108 | &Bleat(2,"--month option has an invalid format - ". | |
109 | "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; | |
110 | # get list of newsgroups and set expression for SQL 'WHERE' clause | |
111 | # with placeholders as well as a list of newsgroup to bind to them | |
c30822b4 TH |
112 | my ($SQLWhereNewsgroups,@SQLBindNewsgroups); |
113 | if ($OptNewsgroups) { | |
114 | ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups); | |
115 | # bail out if --newsgroups is invalid | |
116 | &Bleat(2,"--newsgroups option has an invalid format!") | |
117 | if !$SQLWhereNewsgroups; | |
118 | } | |
edd250f2 TH |
119 | |
120 | ### build SQL WHERE clause (and HAVING clause, if needed) | |
121 | my ($SQLWhereClause,$SQLHavingClause); | |
122 | # $OptBoundType 'level' | |
123 | if ($OptBoundType and $OptBoundType ne 'default') { | |
124 | $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, | |
125 | $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); | |
126 | $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, | |
127 | $LowBound,$UppBound)); | |
128 | # $OptBoundType 'threshold' / 'default' or none | |
f2ddfd8a | 129 | } else { |
edd250f2 TH |
130 | $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, |
131 | $SQLWhereNewsgroups,&SQLHierarchies($OptSums), | |
132 | &SQLSetBounds('default',$LowBound,$UppBound)); | |
133 | } | |
2832c235 | 134 | |
edd250f2 | 135 | ### get sort order and build SQL 'ORDER BY' clause |
ea91003a TH |
136 | # force to 'month' for $OptReportType 'average' or 'sum' |
137 | $OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); | |
edd250f2 TH |
138 | # default to 'newsgroup' for $OptBoundType 'level' or 'average' |
139 | $OptGroupBy = 'newsgroup' if (!$OptGroupBy and | |
140 | $OptBoundType and $OptBoundType ne 'default'); | |
ea91003a TH |
141 | # default to 'newsgroup' if $OptGroupBy is not set and |
142 | # just one newsgroup is requested, but more than one month | |
22d3d70a TH |
143 | $OptGroupBy = 'newsgroup' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/ |
144 | and $OptNewsgroups and $OptNewsgroups !~ /[:*%]/); | |
edd250f2 | 145 | # parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause |
ea91003a | 146 | # if $OptGroupBy is still not set, SQLSortOrder() will default to 'month' |
edd250f2 TH |
147 | my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); |
148 | # $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) | |
149 | # set it to 'month' or 'key' for OutputData() | |
150 | $GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; | |
151 | ||
152 | ### get report type and build SQL 'SELECT' query | |
153 | my $SQLSelect; | |
154 | my $SQLGroupClause = ''; | |
155 | my $Precision = 0; # number of digits right of decimal point for output | |
156 | if ($OptReportType and $OptReportType ne 'default') { | |
157 | $SQLGroupClause = 'GROUP BY newsgroup'; | |
158 | # change $SQLOrderClause: replace everything before 'postings' | |
159 | $SQLOrderClause =~ s/BY.+postings/BY postings/; | |
160 | if ($OptReportType eq 'average') { | |
161 | $SQLSelect = "'All months',newsgroup,AVG(postings)"; | |
162 | $Precision = 2; | |
163 | # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' | |
164 | $SQLOrderClause =~ s/postings/AVG(postings)/; | |
165 | } elsif ($OptReportType eq 'sum') { | |
166 | $SQLSelect = "'All months',newsgroup,SUM(postings)"; | |
167 | # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' | |
168 | $SQLOrderClause =~ s/postings/SUM(postings)/; | |
169 | } | |
170 | } else { | |
171 | $SQLSelect = 'month,newsgroup,postings'; | |
2832c235 TH |
172 | }; |
173 | ||
edd250f2 TH |
174 | ### get length of longest newsgroup name delivered by query |
175 | ### for formatting purposes | |
176 | my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; | |
b342fcf0 TH |
177 | my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, |
178 | $Field,'postings',$SQLWhereClause, | |
179 | $SQLHavingClause, | |
180 | @SQLBindNewsgroups); | |
edd250f2 TH |
181 | |
182 | ### build and execute SQL query | |
183 | my ($DBQuery); | |
184 | # special query preparation for $OptBoundType 'level', 'average' or 'sums' | |
185 | if ($OptBoundType and $OptBoundType ne 'default') { | |
186 | # prepare and execute first query: | |
187 | # get list of newsgroups meeting level conditions | |
188 | $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. | |
189 | 'GROUP BY newsgroup %s', | |
190 | $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, | |
191 | $SQLWhereClause,$SQLHavingClause)); | |
192 | $DBQuery->execute(@SQLBindNewsgroups) | |
193 | or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", | |
194 | $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, | |
195 | $DBI::errstr)); | |
b802bc3d TH |
196 | # add newsgroups to a comma-seperated list ready for IN(...) query |
197 | my $GroupList; | |
198 | while (my ($Newsgroup) = $DBQuery->fetchrow_array) { | |
edd250f2 | 199 | $GroupList .= ',' if $GroupList; |
b802bc3d TH |
200 | $GroupList .= "'$Newsgroup'"; |
201 | }; | |
edd250f2 TH |
202 | # enhance $WhereClause |
203 | if ($GroupList) { | |
204 | $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, | |
205 | sprintf('newsgroup IN (%s)',$GroupList)); | |
206 | } else { | |
207 | # condition cannot be satisfied; | |
208 | # force query to fail by adding '0=1' | |
209 | $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); | |
210 | } | |
211 | } | |
212 | ||
213 | # prepare query | |
214 | $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', | |
215 | $SQLSelect, | |
216 | $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, | |
c30822b4 TH |
217 | $SQLWhereClause,$SQLGroupClause, |
218 | $SQLOrderClause)); | |
2832c235 TH |
219 | |
220 | # execute query | |
edd250f2 TH |
221 | $DBQuery->execute(@SQLBindNewsgroups) |
222 | or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", | |
223 | $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, | |
224 | $DBI::errstr)); | |
225 | ||
226 | ### output results | |
227 | # set default to 'pretty' | |
228 | $OptFormat = 'pretty' if !$OptFormat; | |
229 | # print captions if --caption is set | |
230 | if ($OptCaptions && $OptComments) { | |
231 | # print time period with report type | |
232 | my $CaptionReportType= '(number of postings for each month)'; | |
233 | if ($OptReportType and $OptReportType ne 'default') { | |
234 | $CaptionReportType= '(average number of postings for each month)' | |
235 | if $OptReportType eq 'average'; | |
236 | $CaptionReportType= '(number of all postings for that time period)' | |
237 | if $OptReportType eq 'sum'; | |
238 | } | |
239 | printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); | |
240 | # print newsgroup list if --newsgroups is set | |
241 | printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) | |
242 | if $OptNewsgroups; | |
243 | # print boundaries, if set | |
244 | my $CaptionBoundary= '(counting only month fulfilling this condition)'; | |
245 | if ($OptBoundType and $OptBoundType ne 'default') { | |
246 | $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; | |
247 | $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; | |
248 | $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; | |
249 | } | |
250 | printf("# ----- Threshold: %s %s x %s %s %s\n", | |
251 | $LowBound ? $LowBound : '',$LowBound ? '=>' : '', | |
252 | $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) | |
253 | if ($LowBound or $UppBound); | |
254 | # print primary and secondary sort order | |
255 | printf("# ----- Grouped by %s (%s), sorted %s%s\n", | |
256 | ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', | |
257 | ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', | |
258 | ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', | |
259 | ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); | |
260 | } | |
23ab67a0 | 261 | |
edd250f2 TH |
262 | # output data |
263 | &OutputData($OptFormat,$OptComments,$GroupBy,$Precision, | |
264 | $OptCheckgroupsFile ? $ValidGroups : '', | |
b342fcf0 | 265 | $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength); |
2832c235 TH |
266 | |
267 | ### close handles | |
268 | $DBHandle->disconnect; | |
269 | ||
270 | __END__ | |
271 | ||
272 | ################################ Documentation ################################# | |
273 | ||
274 | =head1 NAME | |
275 | ||
276 | groupstats - create reports on newsgroup usage | |
277 | ||
278 | =head1 SYNOPSIS | |
279 | ||
e39d4207 | 280 | B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>] [B<--conffile> I<filename>] |
2832c235 TH |
281 | |
282 | =head1 REQUIREMENTS | |
283 | ||
edd250f2 | 284 | See L<doc/README>. |
2832c235 TH |
285 | |
286 | =head1 DESCRIPTION | |
287 | ||
288 | This script create reports on newsgroup usage (number of postings per | |
289 | group per month) taken from result tables created by | |
edd250f2 | 290 | B<gatherstats.pl>. |
2832c235 | 291 | |
edd250f2 | 292 | =head2 Features and options |
2832c235 | 293 | |
edd250f2 | 294 | =head3 Time period and newsgroups |
2832c235 | 295 | |
edd250f2 TH |
296 | The time period to act on defaults to last month; you can assign another |
297 | time period or a single month (or drop all time constraints) via the | |
298 | B<--month> option (see below). | |
2832c235 | 299 | |
edd250f2 TH |
300 | B<groupstats> will process all newsgroups by default; you can limit |
301 | processing to only some newsgroups by supplying a list of those groups via | |
302 | B<--newsgroups> option (see below). You can include hierarchy levels in | |
303 | the output by adding the B<--sums> switch (see below). Optionally | |
304 | newsgroups not present in a checkgroups file can be excluded from output, | |
305 | sse B<--checkgroups> below. | |
306 | ||
307 | =head3 Report type | |
308 | ||
309 | You can choose between different B<--report> types: postings per month, | |
310 | average postings per month or all postings summed up; for details, see | |
311 | below. | |
312 | ||
313 | =head3 Upper and lower boundaries | |
314 | ||
315 | Furthermore you can set an upper and/or lower boundary to exclude some | |
316 | results from output via the B<--lower> and B<--upper> options, | |
317 | respectively. By default, all newsgroups with more and/or less postings | |
318 | per month will be excluded from the result set (i.e. not shown and not | |
319 | considered for average and sum reports). You can change the meaning of | |
320 | those boundaries with the B<--boundary> option. For details, please see | |
321 | below. | |
322 | ||
323 | =head3 Sorting and formatting the output | |
324 | ||
325 | By default, all results are grouped by month; you can group results by | |
326 | newsgroup instead via the B<--groupy-by> option. Within those groups, the | |
327 | list of newsgroups (or months) is sorted alphabetically (or | |
328 | chronologically, respectively) ascending. You can change that order (and | |
329 | sort by number of postings) with the B<--order-by> option. For details and | |
330 | exceptions, please see below. | |
331 | ||
332 | The results will be formatted as a kind of table; you can change the | |
333 | output format to a simple list or just a list of newsgroups and number of | |
334 | postings with the B<--format> option. Captions will be added by means of | |
335 | the B<--caption> option; all comments (and captions) can be supressed by | |
336 | using B<--nocomments>. | |
337 | ||
338 | Last but not least you can redirect all output to a number of files, e.g. | |
339 | one for each month, by submitting the B<--filetemplate> option, see below. | |
340 | Captions and comments are automatically disabled in this case. | |
2832c235 TH |
341 | |
342 | =head2 Configuration | |
343 | ||
f2ddfd8a | 344 | B<groupstats> will read its configuration from F<newsstats.conf> |
44c19709 TH |
345 | which should be present in etc/ via Config::Auto or from a configuration file |
346 | submitted by the B<--conffile> option. | |
2832c235 TH |
347 | |
348 | See doc/INSTALL for an overview of possible configuration options. | |
349 | ||
edd250f2 | 350 | You can override some configuration options via the B<--groupsdb> option. |
2832c235 TH |
351 | |
352 | =head1 OPTIONS | |
353 | ||
354 | =over 3 | |
355 | ||
edd250f2 | 356 | =item B<-V>, B<--version> |
2832c235 | 357 | |
edd250f2 | 358 | Print out version and copyright information and exit. |
2832c235 | 359 | |
edd250f2 | 360 | =item B<-h>, B<--help> |
2832c235 TH |
361 | |
362 | Print this man page and exit. | |
363 | ||
dfc2b81c | 364 | =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all> |
2832c235 | 365 | |
edd250f2 TH |
366 | Set processing period to a single month in YYYY-MM format or to a time |
367 | period between two month in YYYY-MM:YYYY-MM format (two month, separated | |
368 | by a colon). By using the keyword I<all> instead, you can set no | |
369 | processing period to process the whole database. | |
2832c235 | 370 | |
edd250f2 | 371 | =item B<-n>, B<--newsgroups> I<newsgroup(s)> |
2832c235 TH |
372 | |
373 | Limit processing to a certain set of newsgroups. I<newsgroup(s)> can | |
374 | be a single newsgroup name (de.alt.test), a newsgroup hierarchy | |
375 | (de.alt.*) or a list of either of these, separated by colons, for | |
376 | example | |
377 | ||
378 | de.test:de.alt.test:de.newusers.* | |
379 | ||
edd250f2 | 380 | =item B<-s>, B<--sums|--nosums> (sum per hierarchy level) |
2832c235 | 381 | |
edd250f2 TH |
382 | Include "virtual" groups for every hierarchy level in output, for |
383 | example: | |
2832c235 | 384 | |
edd250f2 TH |
385 | de.alt.ALL 10 |
386 | de.alt.test 5 | |
387 | de.alt.admin 7 | |
2832c235 | 388 | |
edd250f2 | 389 | See the B<gatherstats> man page for details. |
2832c235 | 390 | |
17ef4408 TH |
391 | This option does not work together with the B<--checkgroups> option as |
392 | all "virtual" groups will not be present in the checkgroups file. | |
393 | ||
edd250f2 | 394 | =item B<--checkgroups> I<filename> |
2832c235 | 395 | |
edd250f2 TH |
396 | Restrict output to those newgroups present in a file in checkgroups format |
397 | (one newgroup name per line; everything after the first whitespace on each | |
398 | line is ignored). All other newsgroups will be removed from output. | |
2832c235 | 399 | |
95d9fe2c TH |
400 | Contrary to B<gatherstats>, I<filename> is not a template, but refers to |
401 | a single file in checkgroups format. | |
402 | ||
17ef4408 TH |
403 | The B<--sums> option will not work together with this option as "virtual" |
404 | groups will not be present in the checkgroups file. | |
405 | ||
edd250f2 | 406 | =item B<-r>, B<--report> I<default|average|sums> |
2832c235 | 407 | |
edd250f2 | 408 | Choose the report type: I<default>, I<average> or I<sums> |
2832c235 | 409 | |
edd250f2 TH |
410 | By default, B<groupstats> will report the number of postings for each |
411 | newsgroup in each month. But it can also report the average number of | |
412 | postings per group for all months or the total sum of postings per group | |
413 | for all months. | |
2832c235 | 414 | |
edd250f2 TH |
415 | For report types I<average> and I<sums>, the B<group-by> option has no |
416 | meaning and will be silently ignored (see below). | |
2832c235 | 417 | |
edd250f2 | 418 | =item B<-l>, B<--lower> I<lower boundary> |
2832c235 | 419 | |
edd250f2 | 420 | Set the lower boundary. See B<--boundary> below. |
2832c235 | 421 | |
edd250f2 | 422 | =item B<-l>, B<--upper> I<upper boundary> |
2832c235 | 423 | |
edd250f2 | 424 | Set the upper boundary. See B<--boundary> below. |
2832c235 | 425 | |
edd250f2 TH |
426 | =item B<-b>, B<--boundary> I<boundary type> |
427 | ||
428 | Set the boundary type to one of I<default>, I<level>, I<average> or | |
429 | I<sums>. | |
430 | ||
431 | By default, all newsgroups with more postings per month than the upper | |
432 | boundary and/or less postings per month than the lower boundary will be | |
433 | excluded from further processing. For the default report that means each | |
434 | month only newsgroups with a number of postings between the boundaries | |
435 | will be displayed. For the other report types, newsgroups with a number of | |
436 | postings exceeding the boundaries in all (!) months will not be | |
437 | considered. | |
438 | ||
439 | For example, lets take a list of newsgroups like this: | |
440 | ||
441 | ----- 2012-01: | |
442 | de.comp.datenbanken.misc 6 | |
443 | de.comp.datenbanken.ms-access 84 | |
444 | de.comp.datenbanken.mysql 88 | |
445 | ----- 2012-02: | |
446 | de.comp.datenbanken.misc 8 | |
447 | de.comp.datenbanken.ms-access 126 | |
448 | de.comp.datenbanken.mysql 21 | |
449 | ----- 2012-03: | |
450 | de.comp.datenbanken.misc 24 | |
451 | de.comp.datenbanken.ms-access 83 | |
452 | de.comp.datenbanken.mysql 36 | |
453 | ||
454 | With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>, | |
455 | you'll get the following result: | |
2832c235 | 456 | |
edd250f2 TH |
457 | ----- All months: |
458 | de.comp.datenbanken.ms-access 293 | |
459 | de.comp.datenbanken.mysql 124 | |
2832c235 | 460 | |
edd250f2 TH |
461 | de.comp.datenbanken.misc has not been considered even though it has 38 |
462 | postings in total, because it has less than 25 postings in every single | |
95d9fe2c TH |
463 | month. If you want to list all newsgroups with more than 25 postings |
464 | I<in total>, you'll have to set the boundary type to I<sum>, see below. | |
628a183c | 465 | |
edd250f2 TH |
466 | A boundary type of I<level> will show only those newsgroups - at all - |
467 | that satisfy the boundaries in each and every single month. With the above | |
468 | list of newsgroups and | |
469 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>, | |
470 | you'll get this result: | |
2832c235 | 471 | |
edd250f2 TH |
472 | ----- All months: |
473 | de.comp.datenbanken.ms-access 293 | |
2832c235 | 474 | |
edd250f2 | 475 | de.comp.datenbanken.mysql has not been considered because it had less than |
95d9fe2c | 476 | 25 postings in 2012-02 (only). |
2832c235 | 477 | |
edd250f2 | 478 | You can use that to get a list of newsgroups that have more (or less) then |
95d9fe2c | 479 | x postings in every month during the whole reporting period. |
2832c235 | 480 | |
edd250f2 TH |
481 | A boundary type of I<average> will show only those newsgroups - at all -that |
482 | satisfy the boundaries on average. With the above list of newsgroups and | |
483 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>, | |
484 | you'll get this result: | |
78389b28 | 485 | |
edd250f2 TH |
486 | ----- All months: |
487 | de.comp.datenbanken.ms-access 293 | |
488 | de.comp.datenbanken.mysql 145 | |
2832c235 | 489 | |
edd250f2 | 490 | The average number of postings in the three groups is: |
2832c235 | 491 | |
edd250f2 TH |
492 | de.comp.datenbanken.misc 12.67 |
493 | de.comp.datenbanken.ms-access 97.67 | |
494 | de.comp.datenbanken.mysql 48.33 | |
2832c235 | 495 | |
edd250f2 TH |
496 | Last but not least, a boundary type of I<sums> will show only those |
497 | newsgroups - at all - that satisfy the boundaries with the total sum of | |
498 | all postings during the reporting period. With the above list of | |
499 | newsgroups and | |
500 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>, | |
501 | you'll finally get this result: | |
2832c235 | 502 | |
edd250f2 TH |
503 | ----- All months: |
504 | de.comp.datenbanken.misc 38 | |
505 | de.comp.datenbanken.ms-access 293 | |
506 | de.comp.datenbanken.mysql 145 | |
2832c235 | 507 | |
2832c235 | 508 | |
edd250f2 | 509 | =item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]> |
2832c235 | 510 | |
edd250f2 TH |
511 | By default, all results are grouped by month, sorted chronologically in |
512 | ascending order, like this: | |
78389b28 | 513 | |
edd250f2 TH |
514 | ----- 2012-01: |
515 | de.comp.datenbanken.ms-access 84 | |
516 | de.comp.datenbanken.mysql 88 | |
517 | ----- 2012-02: | |
518 | de.comp.datenbanken.ms-access 126 | |
519 | de.comp.datenbanken.mysql 21 | |
78389b28 | 520 | |
edd250f2 TH |
521 | The results can be grouped by newsgroups instead via |
522 | B<--group-by> I<newsgroup>: | |
523 | ||
524 | ----- de.comp.datenbanken.ms-access: | |
525 | 2012-01 84 | |
526 | 2012-02 126 | |
527 | ----- de.comp.datenbanken.mysql: | |
528 | 2012-01 88 | |
529 | 2012-02 21 | |
530 | ||
531 | By appending I<-desc> to the group-by option parameter, you can reverse | |
532 | the sort order - e.g. B<--group-by> I<month-desc> will give: | |
533 | ||
534 | ----- 2012-02: | |
535 | de.comp.datenbanken.ms-access 126 | |
536 | de.comp.datenbanken.mysql 21 | |
537 | ----- 2012-01: | |
538 | de.comp.datenbanken.ms-access 84 | |
539 | de.comp.datenbanken.mysql 88 | |
540 | ||
541 | Average and sums reports (see above) will always be grouped by months; | |
542 | this option will therefore be ignored. | |
543 | ||
544 | =item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]> | |
545 | ||
546 | Within each group (a single month or single newsgroup, see above), the | |
547 | report will be sorted by newsgroup names in ascending alphabetical order | |
548 | by default. You can change the sort order to descending or sort by number | |
549 | of postings instead. | |
550 | ||
551 | =item B<-f>, B<--format> I<pretty|list|dump> | |
552 | ||
553 | Select the output format, I<pretty> being the default: | |
554 | ||
555 | ----- 2012-01: | |
556 | de.comp.datenbanken.ms-access 84 | |
557 | de.comp.datenbanken.mysql 88 | |
558 | ----- 2012-02: | |
559 | de.comp.datenbanken.ms-access 126 | |
560 | de.comp.datenbanken.mysql 21 | |
561 | ||
562 | I<list> format looks like this: | |
563 | ||
564 | 2012-01 de.comp.datenbanken.ms-access 84 | |
565 | 2012-01 de.comp.datenbanken.mysql 88 | |
566 | 2012-02 de.comp.datenbanken.ms-access 126 | |
567 | 2012-02 de.comp.datenbanken.mysql 21 | |
568 | ||
569 | And I<dump> format looks like this: | |
570 | ||
571 | # 2012-01: | |
572 | de.comp.datenbanken.ms-access 84 | |
573 | de.comp.datenbanken.mysql 88 | |
574 | # 2012-02: | |
575 | de.comp.datenbanken.ms-access 126 | |
576 | de.comp.datenbanken.mysql 21 | |
577 | ||
578 | You can remove the comments by using B<--nocomments>, see below. | |
579 | ||
580 | =item B<-c>, B<--captions|--nocaptions> | |
581 | ||
582 | Add captions to output, like this: | |
583 | ||
584 | ----- Report for 2012-01 to 2012-02 (number of postings for each month) | |
585 | ----- Newsgroups: de.comp.datenbanken.* | |
586 | ----- Threshold: 10 => x <= 20 (on average) | |
587 | ----- Grouped by Newsgroups (ascending), sorted by number of postings descending | |
588 | ||
589 | False by default. | |
590 | ||
591 | =item B<--comments|--nocomments> | |
592 | ||
593 | Add comments (group headers) to I<dump> and I<pretty> output. True by default. | |
594 | ||
595 | Use I<--nocomments> to suppress anything except newsgroup names/months and | |
596 | numbers of postings. This is enforced when using B<--filetemplate>, see below. | |
597 | ||
598 | =item B<--filetemplate> I<filename template> | |
599 | ||
600 | Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will | |
601 | create one file for each month (or each newsgroup, accordant to the | |
602 | setting of B<--group-by>, see above), with filenames composed by adding | |
603 | year and month (or newsgroup names) to the I<filename template>, for | |
604 | example with B<--filetemplate> I<stats>: | |
605 | ||
606 | stats-2012-01 | |
607 | stats-2012-02 | |
78389b28 TH |
608 | ... and so on |
609 | ||
edd250f2 | 610 | B<--nocomments> is enforced, see above. |
78389b28 | 611 | |
edd250f2 | 612 | =item B<--groupsdb> I<database table> |
2832c235 TH |
613 | |
614 | Override I<DBTableGrps> from F<newsstats.conf>. | |
615 | ||
23ab67a0 TH |
616 | =item B<--conffile> I<filename> |
617 | ||
618 | Load configuration from I<filename> instead of F<newsstats.conf>. | |
619 | ||
2832c235 TH |
620 | =back |
621 | ||
622 | =head1 INSTALLATION | |
623 | ||
edd250f2 | 624 | See L<doc/INSTALL>. |
2832c235 TH |
625 | |
626 | =head1 EXAMPLES | |
627 | ||
edd250f2 | 628 | Show number of postings per group for lasth month in I<pretty> format: |
2832c235 TH |
629 | |
630 | groupstats | |
631 | ||
632 | Show that report for January of 2010 and de.alt.* plus de.test, | |
633 | including display of hierarchy levels: | |
634 | ||
edd250f2 | 635 | groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums |
2832c235 | 636 | |
edd250f2 TH |
637 | Only show newsgroups with 30 postings or less last month, ordered |
638 | by number of postings, descending, in I<pretty> format: | |
2832c235 | 639 | |
edd250f2 | 640 | groupstats --upper 30 --order-by postings-desc |
2832c235 | 641 | |
edd250f2 TH |
642 | Show the total of all postings for the year of 2010 for all groups that |
643 | had 30 postings or less in every single month in that year, ordered by | |
644 | number of postings in descending order: | |
645 | ||
646 | groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc | |
2832c235 | 647 | |
edd250f2 | 648 | The same for the average number of postings in the year of 2010: |
2832c235 | 649 | |
edd250f2 | 650 | groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc |
2832c235 | 651 | |
edd250f2 TH |
652 | List number of postings per group for eacht month of 2010 and redirect |
653 | output to one file for each month, namend stats-2010-01 and so on, in | |
654 | machine-readable form (without formatting): | |
2832c235 | 655 | |
edd250f2 | 656 | groupstats -m 2010-01:2010-12 -f dump --filetemplate stats |
2832c235 | 657 | |
2832c235 TH |
658 | |
659 | =head1 FILES | |
660 | ||
661 | =over 4 | |
662 | ||
2ad99c20 | 663 | =item F<bin/groupstats.pl> |
2832c235 TH |
664 | |
665 | The script itself. | |
666 | ||
2ad99c20 | 667 | =item F<lib/NewsStats.pm> |
2832c235 TH |
668 | |
669 | Library functions for the NewsStats package. | |
670 | ||
2ad99c20 | 671 | =item F<etc/newsstats.conf> |
2832c235 | 672 | |
edd250f2 | 673 | Runtime configuration file. |
2832c235 TH |
674 | |
675 | =back | |
676 | ||
677 | =head1 BUGS | |
678 | ||
679 | Please report any bugs or feature requests to the author or use the | |
680 | bug tracker at L<http://bugs.th-h.de/>! | |
681 | ||
682 | =head1 SEE ALSO | |
683 | ||
684 | =over 2 | |
685 | ||
686 | =item - | |
687 | ||
edd250f2 | 688 | L<doc/README> |
2832c235 TH |
689 | |
690 | =item - | |
691 | ||
edd250f2 | 692 | l>doc/INSTALL> |
2832c235 TH |
693 | |
694 | =item - | |
695 | ||
696 | gatherstats -h | |
697 | ||
698 | =back | |
699 | ||
700 | This script is part of the B<NewsStats> package. | |
701 | ||
702 | =head1 AUTHOR | |
703 | ||
704 | Thomas Hochstein <thh@inter.net> | |
705 | ||
706 | =head1 COPYRIGHT AND LICENSE | |
707 | ||
28717921 | 708 | Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
2832c235 TH |
709 | |
710 | This program is free software; you may redistribute it and/or modify it | |
711 | under the same terms as Perl itself. | |
712 | ||
713 | =cut |