| 1 | #! /usr/bin/perl |
| 2 | # |
| 3 | # groupstats.pl |
| 4 | # |
| 5 | # This script will get statistical data on newgroup usage |
| 6 | # from a database. |
| 7 | # |
| 8 | # It is part of the NewsStats package. |
| 9 | # |
| 10 | # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
| 11 | # |
| 12 | # It can be redistributed and/or modified under the same terms under |
| 13 | # which Perl itself is published. |
| 14 | |
| 15 | BEGIN { |
| 16 | our $VERSION = "0.01"; |
| 17 | use File::Basename; |
| 18 | # we're in .../bin, so our module is in ../lib |
| 19 | push(@INC, dirname($0).'/../lib'); |
| 20 | } |
| 21 | use strict; |
| 22 | use warnings; |
| 23 | |
| 24 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); |
| 25 | |
| 26 | use DBI; |
| 27 | use Getopt::Long qw(GetOptions); |
| 28 | Getopt::Long::config ('bundling'); |
| 29 | |
| 30 | ################################# Main program ################################# |
| 31 | |
| 32 | ### read commandline options |
| 33 | my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, |
| 34 | $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, |
| 35 | $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound,$OptConfFile); |
| 36 | GetOptions ('b|boundary=s' => \$OptBoundType, |
| 37 | 'c|captions!' => \$OptCaptions, |
| 38 | 'checkgroups=s' => \$OptCheckgroupsFile, |
| 39 | 'comments!' => \$OptComments, |
| 40 | 'filetemplate=s' => \$OptFileTemplate, |
| 41 | 'f|format=s' => \$OptFormat, |
| 42 | 'g|group-by=s' => \$OptGroupBy, |
| 43 | 'groupsdb=s' => \$OptGroupsDB, |
| 44 | 'l|lower=i' => \$LowBound, |
| 45 | 'm|month=s' => \$OptMonth, |
| 46 | 'n|newsgroups=s' => \$OptNewsgroups, |
| 47 | 'o|order-by=s' => \$OptOrderBy, |
| 48 | 'r|report=s' => \$OptReportType, |
| 49 | 's|sums!' => \$OptSums, |
| 50 | 'u|upper=i' => \$UppBound, |
| 51 | 'conffile=s' => \$OptConfFile, |
| 52 | 'h|help' => \&ShowPOD, |
| 53 | 'V|version' => \&ShowVersion) or exit 1; |
| 54 | # parse parameters |
| 55 | # $OptComments defaults to TRUE |
| 56 | $OptComments = 1 if (!defined($OptComments)); |
| 57 | # force --nocomments when --filetemplate is used |
| 58 | $OptComments = 0 if ($OptFileTemplate); |
| 59 | # parse $OptBoundType |
| 60 | if ($OptBoundType) { |
| 61 | if ($OptBoundType =~ /level/i) { |
| 62 | $OptBoundType = 'level'; |
| 63 | } elsif ($OptBoundType =~ /av(era)?ge?/i) { |
| 64 | $OptBoundType = 'average'; |
| 65 | } elsif ($OptBoundType =~ /sums?/i) { |
| 66 | $OptBoundType = 'sum'; |
| 67 | } else { |
| 68 | $OptBoundType = 'default'; |
| 69 | } |
| 70 | } |
| 71 | # parse $OptReportType |
| 72 | if ($OptReportType) { |
| 73 | if ($OptReportType =~ /av(era)?ge?/i) { |
| 74 | $OptReportType = 'average'; |
| 75 | } elsif ($OptReportType =~ /sums?/i) { |
| 76 | $OptReportType = 'sum'; |
| 77 | } else { |
| 78 | $OptReportType = 'default'; |
| 79 | } |
| 80 | } |
| 81 | # read list of newsgroups from --checkgroups |
| 82 | # into a hash reference |
| 83 | my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile; |
| 84 | |
| 85 | ### read configuration |
| 86 | my %Conf = %{ReadConfig($OptConfFile)}; |
| 87 | |
| 88 | ### override configuration via commandline options |
| 89 | my %ConfOverride; |
| 90 | $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; |
| 91 | &OverrideConfig(\%Conf,\%ConfOverride); |
| 92 | |
| 93 | ### init database |
| 94 | my $DBHandle = InitDB(\%Conf,1); |
| 95 | |
| 96 | ### get time period and newsgroups, prepare SQL 'WHERE' clause |
| 97 | # get time period |
| 98 | # and set caption for output and expression for SQL 'WHERE' clause |
| 99 | my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); |
| 100 | # bail out if --month is invalid |
| 101 | &Bleat(2,"--month option has an invalid format - ". |
| 102 | "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; |
| 103 | # get list of newsgroups and set expression for SQL 'WHERE' clause |
| 104 | # with placeholders as well as a list of newsgroup to bind to them |
| 105 | my ($SQLWhereNewsgroups,@SQLBindNewsgroups); |
| 106 | if ($OptNewsgroups) { |
| 107 | ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups); |
| 108 | # bail out if --newsgroups is invalid |
| 109 | &Bleat(2,"--newsgroups option has an invalid format!") |
| 110 | if !$SQLWhereNewsgroups; |
| 111 | } |
| 112 | |
| 113 | ### build SQL WHERE clause (and HAVING clause, if needed) |
| 114 | my ($SQLWhereClause,$SQLHavingClause); |
| 115 | # $OptBoundType 'level' |
| 116 | if ($OptBoundType and $OptBoundType ne 'default') { |
| 117 | $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, |
| 118 | $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); |
| 119 | $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, |
| 120 | $LowBound,$UppBound)); |
| 121 | # $OptBoundType 'threshold' / 'default' or none |
| 122 | } else { |
| 123 | $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, |
| 124 | $SQLWhereNewsgroups,&SQLHierarchies($OptSums), |
| 125 | &SQLSetBounds('default',$LowBound,$UppBound)); |
| 126 | } |
| 127 | |
| 128 | ### get sort order and build SQL 'ORDER BY' clause |
| 129 | # default to 'newsgroup' for $OptBoundType 'level' or 'average' |
| 130 | $OptGroupBy = 'newsgroup' if (!$OptGroupBy and |
| 131 | $OptBoundType and $OptBoundType ne 'default'); |
| 132 | # force to 'month' for $OptReportType 'average' or 'sum' |
| 133 | $OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); |
| 134 | # parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause |
| 135 | my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); |
| 136 | # $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) |
| 137 | # set it to 'month' or 'key' for OutputData() |
| 138 | $GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; |
| 139 | |
| 140 | ### get report type and build SQL 'SELECT' query |
| 141 | my $SQLSelect; |
| 142 | my $SQLGroupClause = ''; |
| 143 | my $Precision = 0; # number of digits right of decimal point for output |
| 144 | if ($OptReportType and $OptReportType ne 'default') { |
| 145 | $SQLGroupClause = 'GROUP BY newsgroup'; |
| 146 | # change $SQLOrderClause: replace everything before 'postings' |
| 147 | $SQLOrderClause =~ s/BY.+postings/BY postings/; |
| 148 | if ($OptReportType eq 'average') { |
| 149 | $SQLSelect = "'All months',newsgroup,AVG(postings)"; |
| 150 | $Precision = 2; |
| 151 | # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' |
| 152 | $SQLOrderClause =~ s/postings/AVG(postings)/; |
| 153 | } elsif ($OptReportType eq 'sum') { |
| 154 | $SQLSelect = "'All months',newsgroup,SUM(postings)"; |
| 155 | # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' |
| 156 | $SQLOrderClause =~ s/postings/SUM(postings)/; |
| 157 | } |
| 158 | } else { |
| 159 | $SQLSelect = 'month,newsgroup,postings'; |
| 160 | }; |
| 161 | |
| 162 | ### get length of longest newsgroup name delivered by query |
| 163 | ### for formatting purposes |
| 164 | my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; |
| 165 | my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, |
| 166 | $Field,'postings',$SQLWhereClause, |
| 167 | $SQLHavingClause, |
| 168 | @SQLBindNewsgroups); |
| 169 | |
| 170 | ### build and execute SQL query |
| 171 | my ($DBQuery); |
| 172 | # special query preparation for $OptBoundType 'level', 'average' or 'sums' |
| 173 | if ($OptBoundType and $OptBoundType ne 'default') { |
| 174 | # prepare and execute first query: |
| 175 | # get list of newsgroups meeting level conditions |
| 176 | $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. |
| 177 | 'GROUP BY newsgroup %s', |
| 178 | $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, |
| 179 | $SQLWhereClause,$SQLHavingClause)); |
| 180 | $DBQuery->execute(@SQLBindNewsgroups) |
| 181 | or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", |
| 182 | $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, |
| 183 | $DBI::errstr)); |
| 184 | # add newsgroups to a comma-seperated list ready for IN(...) query |
| 185 | my $GroupList; |
| 186 | while (my ($Newsgroup) = $DBQuery->fetchrow_array) { |
| 187 | $GroupList .= ',' if $GroupList; |
| 188 | $GroupList .= "'$Newsgroup'"; |
| 189 | }; |
| 190 | # enhance $WhereClause |
| 191 | if ($GroupList) { |
| 192 | $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, |
| 193 | sprintf('newsgroup IN (%s)',$GroupList)); |
| 194 | } else { |
| 195 | # condition cannot be satisfied; |
| 196 | # force query to fail by adding '0=1' |
| 197 | $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | # prepare query |
| 202 | $DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', |
| 203 | $SQLSelect, |
| 204 | $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, |
| 205 | $SQLWhereClause,$SQLGroupClause, |
| 206 | $SQLOrderClause)); |
| 207 | |
| 208 | # execute query |
| 209 | $DBQuery->execute(@SQLBindNewsgroups) |
| 210 | or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", |
| 211 | $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, |
| 212 | $DBI::errstr)); |
| 213 | |
| 214 | ### output results |
| 215 | # set default to 'pretty' |
| 216 | $OptFormat = 'pretty' if !$OptFormat; |
| 217 | # print captions if --caption is set |
| 218 | if ($OptCaptions && $OptComments) { |
| 219 | # print time period with report type |
| 220 | my $CaptionReportType= '(number of postings for each month)'; |
| 221 | if ($OptReportType and $OptReportType ne 'default') { |
| 222 | $CaptionReportType= '(average number of postings for each month)' |
| 223 | if $OptReportType eq 'average'; |
| 224 | $CaptionReportType= '(number of all postings for that time period)' |
| 225 | if $OptReportType eq 'sum'; |
| 226 | } |
| 227 | printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); |
| 228 | # print newsgroup list if --newsgroups is set |
| 229 | printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) |
| 230 | if $OptNewsgroups; |
| 231 | # print boundaries, if set |
| 232 | my $CaptionBoundary= '(counting only month fulfilling this condition)'; |
| 233 | if ($OptBoundType and $OptBoundType ne 'default') { |
| 234 | $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; |
| 235 | $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; |
| 236 | $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; |
| 237 | } |
| 238 | printf("# ----- Threshold: %s %s x %s %s %s\n", |
| 239 | $LowBound ? $LowBound : '',$LowBound ? '=>' : '', |
| 240 | $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) |
| 241 | if ($LowBound or $UppBound); |
| 242 | # print primary and secondary sort order |
| 243 | printf("# ----- Grouped by %s (%s), sorted %s%s\n", |
| 244 | ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', |
| 245 | ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', |
| 246 | ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', |
| 247 | ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); |
| 248 | } |
| 249 | |
| 250 | # output data |
| 251 | &OutputData($OptFormat,$OptComments,$GroupBy,$Precision, |
| 252 | $OptCheckgroupsFile ? $ValidGroups : '', |
| 253 | $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength); |
| 254 | |
| 255 | ### close handles |
| 256 | $DBHandle->disconnect; |
| 257 | |
| 258 | __END__ |
| 259 | |
| 260 | ################################ Documentation ################################# |
| 261 | |
| 262 | =head1 NAME |
| 263 | |
| 264 | groupstats - create reports on newsgroup usage |
| 265 | |
| 266 | =head1 SYNOPSIS |
| 267 | |
| 268 | B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>] [--conffile I<filename>] |
| 269 | |
| 270 | =head1 REQUIREMENTS |
| 271 | |
| 272 | See L<doc/README>. |
| 273 | |
| 274 | =head1 DESCRIPTION |
| 275 | |
| 276 | This script create reports on newsgroup usage (number of postings per |
| 277 | group per month) taken from result tables created by |
| 278 | B<gatherstats.pl>. |
| 279 | |
| 280 | =head2 Features and options |
| 281 | |
| 282 | =head3 Time period and newsgroups |
| 283 | |
| 284 | The time period to act on defaults to last month; you can assign another |
| 285 | time period or a single month (or drop all time constraints) via the |
| 286 | B<--month> option (see below). |
| 287 | |
| 288 | B<groupstats> will process all newsgroups by default; you can limit |
| 289 | processing to only some newsgroups by supplying a list of those groups via |
| 290 | B<--newsgroups> option (see below). You can include hierarchy levels in |
| 291 | the output by adding the B<--sums> switch (see below). Optionally |
| 292 | newsgroups not present in a checkgroups file can be excluded from output, |
| 293 | sse B<--checkgroups> below. |
| 294 | |
| 295 | =head3 Report type |
| 296 | |
| 297 | You can choose between different B<--report> types: postings per month, |
| 298 | average postings per month or all postings summed up; for details, see |
| 299 | below. |
| 300 | |
| 301 | =head3 Upper and lower boundaries |
| 302 | |
| 303 | Furthermore you can set an upper and/or lower boundary to exclude some |
| 304 | results from output via the B<--lower> and B<--upper> options, |
| 305 | respectively. By default, all newsgroups with more and/or less postings |
| 306 | per month will be excluded from the result set (i.e. not shown and not |
| 307 | considered for average and sum reports). You can change the meaning of |
| 308 | those boundaries with the B<--boundary> option. For details, please see |
| 309 | below. |
| 310 | |
| 311 | =head3 Sorting and formatting the output |
| 312 | |
| 313 | By default, all results are grouped by month; you can group results by |
| 314 | newsgroup instead via the B<--groupy-by> option. Within those groups, the |
| 315 | list of newsgroups (or months) is sorted alphabetically (or |
| 316 | chronologically, respectively) ascending. You can change that order (and |
| 317 | sort by number of postings) with the B<--order-by> option. For details and |
| 318 | exceptions, please see below. |
| 319 | |
| 320 | The results will be formatted as a kind of table; you can change the |
| 321 | output format to a simple list or just a list of newsgroups and number of |
| 322 | postings with the B<--format> option. Captions will be added by means of |
| 323 | the B<--caption> option; all comments (and captions) can be supressed by |
| 324 | using B<--nocomments>. |
| 325 | |
| 326 | Last but not least you can redirect all output to a number of files, e.g. |
| 327 | one for each month, by submitting the B<--filetemplate> option, see below. |
| 328 | Captions and comments are automatically disabled in this case. |
| 329 | |
| 330 | =head2 Configuration |
| 331 | |
| 332 | B<groupstats> will read its configuration from F<newsstats.conf> |
| 333 | which should be present in the same directory via Config::Auto. |
| 334 | |
| 335 | See doc/INSTALL for an overview of possible configuration options. |
| 336 | |
| 337 | You can override some configuration options via the B<--groupsdb> option. |
| 338 | |
| 339 | =head1 OPTIONS |
| 340 | |
| 341 | =over 3 |
| 342 | |
| 343 | =item B<-V>, B<--version> |
| 344 | |
| 345 | Print out version and copyright information and exit. |
| 346 | |
| 347 | =item B<-h>, B<--help> |
| 348 | |
| 349 | Print this man page and exit. |
| 350 | |
| 351 | =item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all> |
| 352 | |
| 353 | Set processing period to a single month in YYYY-MM format or to a time |
| 354 | period between two month in YYYY-MM:YYYY-MM format (two month, separated |
| 355 | by a colon). By using the keyword I<all> instead, you can set no |
| 356 | processing period to process the whole database. |
| 357 | |
| 358 | =item B<-n>, B<--newsgroups> I<newsgroup(s)> |
| 359 | |
| 360 | Limit processing to a certain set of newsgroups. I<newsgroup(s)> can |
| 361 | be a single newsgroup name (de.alt.test), a newsgroup hierarchy |
| 362 | (de.alt.*) or a list of either of these, separated by colons, for |
| 363 | example |
| 364 | |
| 365 | de.test:de.alt.test:de.newusers.* |
| 366 | |
| 367 | =item B<-s>, B<--sums|--nosums> (sum per hierarchy level) |
| 368 | |
| 369 | Include "virtual" groups for every hierarchy level in output, for |
| 370 | example: |
| 371 | |
| 372 | de.alt.ALL 10 |
| 373 | de.alt.test 5 |
| 374 | de.alt.admin 7 |
| 375 | |
| 376 | See the B<gatherstats> man page for details. |
| 377 | |
| 378 | =item B<--checkgroups> I<filename> |
| 379 | |
| 380 | Restrict output to those newgroups present in a file in checkgroups format |
| 381 | (one newgroup name per line; everything after the first whitespace on each |
| 382 | line is ignored). All other newsgroups will be removed from output. |
| 383 | |
| 384 | Contrary to B<gatherstats>, I<filename> is not a template, but refers to |
| 385 | a single file in checkgroups format. |
| 386 | |
| 387 | =item B<-r>, B<--report> I<default|average|sums> |
| 388 | |
| 389 | Choose the report type: I<default>, I<average> or I<sums> |
| 390 | |
| 391 | By default, B<groupstats> will report the number of postings for each |
| 392 | newsgroup in each month. But it can also report the average number of |
| 393 | postings per group for all months or the total sum of postings per group |
| 394 | for all months. |
| 395 | |
| 396 | For report types I<average> and I<sums>, the B<group-by> option has no |
| 397 | meaning and will be silently ignored (see below). |
| 398 | |
| 399 | =item B<-l>, B<--lower> I<lower boundary> |
| 400 | |
| 401 | Set the lower boundary. See B<--boundary> below. |
| 402 | |
| 403 | =item B<-l>, B<--upper> I<upper boundary> |
| 404 | |
| 405 | Set the upper boundary. See B<--boundary> below. |
| 406 | |
| 407 | =item B<-b>, B<--boundary> I<boundary type> |
| 408 | |
| 409 | Set the boundary type to one of I<default>, I<level>, I<average> or |
| 410 | I<sums>. |
| 411 | |
| 412 | By default, all newsgroups with more postings per month than the upper |
| 413 | boundary and/or less postings per month than the lower boundary will be |
| 414 | excluded from further processing. For the default report that means each |
| 415 | month only newsgroups with a number of postings between the boundaries |
| 416 | will be displayed. For the other report types, newsgroups with a number of |
| 417 | postings exceeding the boundaries in all (!) months will not be |
| 418 | considered. |
| 419 | |
| 420 | For example, lets take a list of newsgroups like this: |
| 421 | |
| 422 | ----- 2012-01: |
| 423 | de.comp.datenbanken.misc 6 |
| 424 | de.comp.datenbanken.ms-access 84 |
| 425 | de.comp.datenbanken.mysql 88 |
| 426 | ----- 2012-02: |
| 427 | de.comp.datenbanken.misc 8 |
| 428 | de.comp.datenbanken.ms-access 126 |
| 429 | de.comp.datenbanken.mysql 21 |
| 430 | ----- 2012-03: |
| 431 | de.comp.datenbanken.misc 24 |
| 432 | de.comp.datenbanken.ms-access 83 |
| 433 | de.comp.datenbanken.mysql 36 |
| 434 | |
| 435 | With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>, |
| 436 | you'll get the following result: |
| 437 | |
| 438 | ----- All months: |
| 439 | de.comp.datenbanken.ms-access 293 |
| 440 | de.comp.datenbanken.mysql 124 |
| 441 | |
| 442 | de.comp.datenbanken.misc has not been considered even though it has 38 |
| 443 | postings in total, because it has less than 25 postings in every single |
| 444 | month. If you want to list all newsgroups with more than 25 postings |
| 445 | I<in total>, you'll have to set the boundary type to I<sum>, see below. |
| 446 | |
| 447 | A boundary type of I<level> will show only those newsgroups - at all - |
| 448 | that satisfy the boundaries in each and every single month. With the above |
| 449 | list of newsgroups and |
| 450 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>, |
| 451 | you'll get this result: |
| 452 | |
| 453 | ----- All months: |
| 454 | de.comp.datenbanken.ms-access 293 |
| 455 | |
| 456 | de.comp.datenbanken.mysql has not been considered because it had less than |
| 457 | 25 postings in 2012-02 (only). |
| 458 | |
| 459 | You can use that to get a list of newsgroups that have more (or less) then |
| 460 | x postings in every month during the whole reporting period. |
| 461 | |
| 462 | A boundary type of I<average> will show only those newsgroups - at all -that |
| 463 | satisfy the boundaries on average. With the above list of newsgroups and |
| 464 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>, |
| 465 | you'll get this result: |
| 466 | |
| 467 | ----- All months: |
| 468 | de.comp.datenbanken.ms-access 293 |
| 469 | de.comp.datenbanken.mysql 145 |
| 470 | |
| 471 | The average number of postings in the three groups is: |
| 472 | |
| 473 | de.comp.datenbanken.misc 12.67 |
| 474 | de.comp.datenbanken.ms-access 97.67 |
| 475 | de.comp.datenbanken.mysql 48.33 |
| 476 | |
| 477 | Last but not least, a boundary type of I<sums> will show only those |
| 478 | newsgroups - at all - that satisfy the boundaries with the total sum of |
| 479 | all postings during the reporting period. With the above list of |
| 480 | newsgroups and |
| 481 | C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>, |
| 482 | you'll finally get this result: |
| 483 | |
| 484 | ----- All months: |
| 485 | de.comp.datenbanken.misc 38 |
| 486 | de.comp.datenbanken.ms-access 293 |
| 487 | de.comp.datenbanken.mysql 145 |
| 488 | |
| 489 | |
| 490 | =item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]> |
| 491 | |
| 492 | By default, all results are grouped by month, sorted chronologically in |
| 493 | ascending order, like this: |
| 494 | |
| 495 | ----- 2012-01: |
| 496 | de.comp.datenbanken.ms-access 84 |
| 497 | de.comp.datenbanken.mysql 88 |
| 498 | ----- 2012-02: |
| 499 | de.comp.datenbanken.ms-access 126 |
| 500 | de.comp.datenbanken.mysql 21 |
| 501 | |
| 502 | The results can be grouped by newsgroups instead via |
| 503 | B<--group-by> I<newsgroup>: |
| 504 | |
| 505 | ----- de.comp.datenbanken.ms-access: |
| 506 | 2012-01 84 |
| 507 | 2012-02 126 |
| 508 | ----- de.comp.datenbanken.mysql: |
| 509 | 2012-01 88 |
| 510 | 2012-02 21 |
| 511 | |
| 512 | By appending I<-desc> to the group-by option parameter, you can reverse |
| 513 | the sort order - e.g. B<--group-by> I<month-desc> will give: |
| 514 | |
| 515 | ----- 2012-02: |
| 516 | de.comp.datenbanken.ms-access 126 |
| 517 | de.comp.datenbanken.mysql 21 |
| 518 | ----- 2012-01: |
| 519 | de.comp.datenbanken.ms-access 84 |
| 520 | de.comp.datenbanken.mysql 88 |
| 521 | |
| 522 | Average and sums reports (see above) will always be grouped by months; |
| 523 | this option will therefore be ignored. |
| 524 | |
| 525 | =item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]> |
| 526 | |
| 527 | Within each group (a single month or single newsgroup, see above), the |
| 528 | report will be sorted by newsgroup names in ascending alphabetical order |
| 529 | by default. You can change the sort order to descending or sort by number |
| 530 | of postings instead. |
| 531 | |
| 532 | =item B<-f>, B<--format> I<pretty|list|dump> |
| 533 | |
| 534 | Select the output format, I<pretty> being the default: |
| 535 | |
| 536 | ----- 2012-01: |
| 537 | de.comp.datenbanken.ms-access 84 |
| 538 | de.comp.datenbanken.mysql 88 |
| 539 | ----- 2012-02: |
| 540 | de.comp.datenbanken.ms-access 126 |
| 541 | de.comp.datenbanken.mysql 21 |
| 542 | |
| 543 | I<list> format looks like this: |
| 544 | |
| 545 | 2012-01 de.comp.datenbanken.ms-access 84 |
| 546 | 2012-01 de.comp.datenbanken.mysql 88 |
| 547 | 2012-02 de.comp.datenbanken.ms-access 126 |
| 548 | 2012-02 de.comp.datenbanken.mysql 21 |
| 549 | |
| 550 | And I<dump> format looks like this: |
| 551 | |
| 552 | # 2012-01: |
| 553 | de.comp.datenbanken.ms-access 84 |
| 554 | de.comp.datenbanken.mysql 88 |
| 555 | # 2012-02: |
| 556 | de.comp.datenbanken.ms-access 126 |
| 557 | de.comp.datenbanken.mysql 21 |
| 558 | |
| 559 | You can remove the comments by using B<--nocomments>, see below. |
| 560 | |
| 561 | =item B<-c>, B<--captions|--nocaptions> |
| 562 | |
| 563 | Add captions to output, like this: |
| 564 | |
| 565 | ----- Report for 2012-01 to 2012-02 (number of postings for each month) |
| 566 | ----- Newsgroups: de.comp.datenbanken.* |
| 567 | ----- Threshold: 10 => x <= 20 (on average) |
| 568 | ----- Grouped by Newsgroups (ascending), sorted by number of postings descending |
| 569 | |
| 570 | False by default. |
| 571 | |
| 572 | =item B<--comments|--nocomments> |
| 573 | |
| 574 | Add comments (group headers) to I<dump> and I<pretty> output. True by default. |
| 575 | |
| 576 | Use I<--nocomments> to suppress anything except newsgroup names/months and |
| 577 | numbers of postings. This is enforced when using B<--filetemplate>, see below. |
| 578 | |
| 579 | =item B<--filetemplate> I<filename template> |
| 580 | |
| 581 | Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will |
| 582 | create one file for each month (or each newsgroup, accordant to the |
| 583 | setting of B<--group-by>, see above), with filenames composed by adding |
| 584 | year and month (or newsgroup names) to the I<filename template>, for |
| 585 | example with B<--filetemplate> I<stats>: |
| 586 | |
| 587 | stats-2012-01 |
| 588 | stats-2012-02 |
| 589 | ... and so on |
| 590 | |
| 591 | B<--nocomments> is enforced, see above. |
| 592 | |
| 593 | =item B<--groupsdb> I<database table> |
| 594 | |
| 595 | Override I<DBTableGrps> from F<newsstats.conf>. |
| 596 | |
| 597 | =item B<--conffile> I<filename> |
| 598 | |
| 599 | Load configuration from I<filename> instead of F<newsstats.conf>. |
| 600 | |
| 601 | =back |
| 602 | |
| 603 | =head1 INSTALLATION |
| 604 | |
| 605 | See L<doc/INSTALL>. |
| 606 | |
| 607 | =head1 EXAMPLES |
| 608 | |
| 609 | Show number of postings per group for lasth month in I<pretty> format: |
| 610 | |
| 611 | groupstats |
| 612 | |
| 613 | Show that report for January of 2010 and de.alt.* plus de.test, |
| 614 | including display of hierarchy levels: |
| 615 | |
| 616 | groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums |
| 617 | |
| 618 | Only show newsgroups with 30 postings or less last month, ordered |
| 619 | by number of postings, descending, in I<pretty> format: |
| 620 | |
| 621 | groupstats --upper 30 --order-by postings-desc |
| 622 | |
| 623 | Show the total of all postings for the year of 2010 for all groups that |
| 624 | had 30 postings or less in every single month in that year, ordered by |
| 625 | number of postings in descending order: |
| 626 | |
| 627 | groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc |
| 628 | |
| 629 | The same for the average number of postings in the year of 2010: |
| 630 | |
| 631 | groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc |
| 632 | |
| 633 | List number of postings per group for eacht month of 2010 and redirect |
| 634 | output to one file for each month, namend stats-2010-01 and so on, in |
| 635 | machine-readable form (without formatting): |
| 636 | |
| 637 | groupstats -m 2010-01:2010-12 -f dump --filetemplate stats |
| 638 | |
| 639 | |
| 640 | =head1 FILES |
| 641 | |
| 642 | =over 4 |
| 643 | |
| 644 | =item F<bin/groupstats.pl> |
| 645 | |
| 646 | The script itself. |
| 647 | |
| 648 | =item F<lib/NewsStats.pm> |
| 649 | |
| 650 | Library functions for the NewsStats package. |
| 651 | |
| 652 | =item F<etc/newsstats.conf> |
| 653 | |
| 654 | Runtime configuration file. |
| 655 | |
| 656 | =back |
| 657 | |
| 658 | =head1 BUGS |
| 659 | |
| 660 | Please report any bugs or feature requests to the author or use the |
| 661 | bug tracker at L<http://bugs.th-h.de/>! |
| 662 | |
| 663 | =head1 SEE ALSO |
| 664 | |
| 665 | =over 2 |
| 666 | |
| 667 | =item - |
| 668 | |
| 669 | L<doc/README> |
| 670 | |
| 671 | =item - |
| 672 | |
| 673 | l>doc/INSTALL> |
| 674 | |
| 675 | =item - |
| 676 | |
| 677 | gatherstats -h |
| 678 | |
| 679 | =back |
| 680 | |
| 681 | This script is part of the B<NewsStats> package. |
| 682 | |
| 683 | =head1 AUTHOR |
| 684 | |
| 685 | Thomas Hochstein <thh@inter.net> |
| 686 | |
| 687 | =head1 COPYRIGHT AND LICENSE |
| 688 | |
| 689 | Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net> |
| 690 | |
| 691 | This program is free software; you may redistribute it and/or modify it |
| 692 | under the same terms as Perl itself. |
| 693 | |
| 694 | =cut |