tmp/
tmp/*
-newsstats.conf
+etc/newsstats.conf
+++ /dev/null
-# NewsStats.pm
-#
-# Library functions for the NewsStats package.
-#
-# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
-#
-# This module can be redistributed and/or modified under the same terms under
-# which Perl itself is published.
-
-package NewsStats;
-
-use strict;
-use warnings;
-our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
-
-require Exporter;
-@ISA = qw(Exporter);
-@EXPORT = qw(
- $MyVersion
- $PackageVersion
- $FullPath
- $HomePath
- ShowVersion
- ShowPOD
- ReadConfig
- OverrideConfig
- InitDB
- Bleat
-);
-@EXPORT_OK = qw(
- GetTimePeriod
- LastMonth
- SplitPeriod
- ListMonth
- ListNewsgroups
- ParseHierarchies
- ReadGroupList
- OutputData
- FormatOutput
- SQLHierarchies
- SQLSortOrder
- SQLGroupList
- SQLSetBounds
- SQLBuildClause
- GetMaxLength
-);
-%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
- ListMonth)],
- Output => [qw(OutputData FormatOutput)],
- SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
- SQLSetBounds SQLBuildClause GetMaxLength)]);
-$VERSION = '0.01';
-our $PackageVersion = '0.01';
-
-use Data::Dumper;
-use File::Basename;
-
-use Config::Auto;
-use DBI;
-
-#####-------------------------------- Vars --------------------------------#####
-
-# trim the path
-our $FullPath = $0;
-our $HomePath = dirname($0);
-$0 =~ s%.*/%%;
-# set version string
-our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
-
-#####------------------------------- Basics -------------------------------#####
-
-################################################################################
-
-################################################################################
-sub ShowVersion {
-################################################################################
-### display version and exit
- print "NewsStats v$PackageVersion\n$MyVersion\n";
- print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
- print "This program is free software; you may redistribute it ".
- "and/or modify it under the same terms as Perl itself.\n";
- exit(100);
-};
-################################################################################
-
-################################################################################
-sub ShowPOD {
-################################################################################
-### feed myself to perldoc and exit
- exec('perldoc', $FullPath);
- exit(100);
-};
-################################################################################
-
-################################################################################
-sub ReadConfig {
-################################################################################
-### read config via Config::Auto
-### IN : $ConfFile: config filename
-### OUT: reference to a hash containing the configuration
- my ($ConfFile) = @_;
- # mandatory configuration options
- my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
- 'DBTableRaw','DBTableGrps');
- # read config via Config::Auto
- my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
- my %Conf = %{$ConfR};
- # check for mandatory options
- foreach (@Mandatory) {
- &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
- if (!defined($Conf{$_}));
- }
- # $Conf{'TLH'} is checked in gatherstats.pl
- return $ConfR;
-};
-################################################################################
-
-################################################################################
-sub OverrideConfig {
-################################################################################
-### override configuration values
-### IN : $ConfigR : reference to configuration hash
-### $OverrideR: reference to a hash containing overrides
- my ($ConfigR,$OverrideR) = @_;
- my %Override = %$OverrideR;
- # Config hash empty?
- &Bleat(1,"Empty configuration hash passed to OverrideConfig()")
- if ( keys %$ConfigR < 1);
- # return if no overrides
- return if (keys %Override < 1 or keys %$ConfigR < 1);
- foreach my $Key (keys %Override) {
- $$ConfigR{$Key} = $Override{$Key};
- };
-};
-################################################################################
-
-################################################################################
-sub InitDB {
-################################################################################
-### initialise database connection
-### IN : $ConfigR: reference to configuration hash
-### $Die : if TRUE, die if connection fails
-### OUT: DBHandle
- my ($ConfigR,$Die) = @_;
- my %Conf = %$ConfigR;
- my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
- $Conf{'DBDriver'},$Conf{'DBDatabase'},
- $Conf{'DBHost'}), $Conf{'DBUser'},
- $Conf{'DBPw'}, { PrintError => 0 });
- if (!$DBHandle) {
- &Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
- &Bleat(1,$DBI::errstr);
- };
- return $DBHandle;
-};
-################################################################################
-
-################################################################################
-sub Bleat {
-################################################################################
-### print warning or error messages and terminate in case of error
-### IN : $Level : 1 = warning, 2 = error
-### $Message: warning or error message
- my ($Level,$Message) = @_;
- if ($Level == 1) {
- warn "$0 W: $Message\n"
- } elsif ($Level == 2) {
- die "$0 E: $Message\n"
- } else {
- print "$0: $Message\n"
- }
-};
-################################################################################
-
-#####------------------------------ GetStats ------------------------------#####
-
-################################################################################
-sub ListNewsgroups {
-################################################################################
-### explode a (scalar) list of newsgroup names to a list of newsgroup and
-### hierarchy names where every newsgroup and hierarchy appears only once:
-### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
-### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
-### $TLH : top level hierarchy (all other newsgroups are ignored)
-### $ValidGroupsR: reference to a hash containing all valid newsgroups
-### as keys
-### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
- my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
- my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
- my %Newsgroups;
- chomp($Newsgroups);
- # remove whitespace from contents of Newsgroups:
- $Newsgroups =~ s/\s//;
- # call &HierarchyCount for each newsgroup in $Newsgroups:
- for (split /,/, $Newsgroups) {
- # don't count newsgroup/hierarchy in wrong TLH
- next if($TLH and !/^$TLH/);
- # don't count invalid newsgroups
- if(%ValidGroups and !defined($ValidGroups{$_})) {
- warn (sprintf("DROPPED: %s\n",$_));
- next;
- }
- # add original newsgroup to %Newsgroups
- $Newsgroups{$_} = 1;
- # add all hierarchy elements to %Newsgroups, amended by '.ALL',
- # i.e. de.alt.ALL and de.ALL
- foreach (ParseHierarchies($_)) {
- $Newsgroups{$_.'.ALL'} = 1;
- }
- };
- return %Newsgroups;
-};
-
-################################################################################
-sub ParseHierarchies {
-################################################################################
-### return a list of all hierarchy levels a newsgroup belongs to
-### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
-### IN : $Newsgroup : a newsgroup name
-### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
- my ($Newsgroup) = @_;
- my @Hierarchies;
- # strip trailing dots
- $Newsgroup =~ s/(.+)\.+$/$1/;
- # butcher newsgroup name by "." and add each hierarchy to @Hierarchies
- # i.e. de.alt.test: "de.alt" and "de"
- while ($Newsgroup =~ /\./) {
- $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
- push @Hierarchies, $Newsgroup;
- };
- return @Hierarchies;
-};
-
-################################################################################
-sub ReadGroupList {
-################################################################################
-### read a list of valid newsgroups from file (each group on one line,
-### ignoring everything after the first whitespace and so accepting files
-### in checkgroups format as well as (parts of) an INN active file)
-### IN : $Filename : file to read
-### OUT: \%ValidGroups: hash containing all valid newsgroups
- my ($Filename) = @_;
- my %ValidGroups;
- open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
- while (<$LIST>) {
- s/^\s*(\S+).*$/$1/;
- chomp;
- next if /^$/;
- $ValidGroups{$_} = '1';
- };
- close $LIST;
- return \%ValidGroups;
-};
-
-################################################################################
-
-#####----------------------------- TimePeriods ----------------------------#####
-
-################################################################################
-sub GetTimePeriod {
-################################################################################
-### get a time period to act on from --month option;
-### if empty, default to last month
-### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
-### OUT: $Verbal,$SQL: verbal description and WHERE-clause
-### of the chosen time period
- my ($Month) = @_;
- # define result variables
- my ($Verbal, $SQL);
- # define a regular expression for a month
- my $REMonth = '\d{4}-\d{2}';
-
- # default to last month if option is not set
- if(!$Month) {
- $Month = &LastMonth;
- }
-
- # check for valid input
- if ($Month =~ /^$REMonth$/) {
- # single month (YYYY-MM)
- ($Month) = &CheckMonth($Month);
- $Verbal = $Month;
- $SQL = sprintf("month = '%s'",$Month);
- } elsif ($Month =~ /^$REMonth:$REMonth$/) {
- # time period (YYYY-MM:YYYY-MM)
- $Verbal = sprintf('%s to %s',&SplitPeriod($Month));
- $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
- } elsif ($Month =~ /^all$/i) {
- # special case: ALL
- $Verbal = 'all time';
- $SQL = '';
- } else {
- # invalid input
- return (undef,undef);
- }
-
- return ($Verbal,$SQL);
-};
-
-################################################################################
-sub LastMonth {
-################################################################################
-### get last month from todays date in YYYY-MM format
-### OUT: last month as YYYY-MM
- # get today's date
- my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
- # $Month is already defined from 0 to 11, so no need to decrease it by 1
- $Year += 1900;
- if ($Month < 1) {
- $Month = 12;
- $Year--;
- };
- # return last month
- return sprintf('%4d-%02d',$Year,$Month);
-};
-
-################################################################################
-sub CheckMonth {
-################################################################################
-### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
-### otherwise, fix it
-### IN : @Month: array of month
-### OUT: @Month: a valid month
- my (@Month) = @_;
- foreach my $Month (@Month) {
- my ($OldMonth) = $Month;
- my ($CalMonth) = substr ($Month, -2);
- if ($CalMonth < 1 or $CalMonth > 12) {
- $CalMonth = '12' if $CalMonth > 12;
- $CalMonth = '01' if $CalMonth < 1;
- substr($Month, -2) = $CalMonth;
- &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
- "and '12'), set to '%s'.",$OldMonth,$Month));
- }
- }
- return @Month;
-};
-
-################################################################################
-sub SplitPeriod {
-################################################################################
-### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
-### IN : $Period: time period
-### OUT: $StartMonth, $EndMonth
- my ($Period) = @_;
- my ($StartMonth, $EndMonth) = split /:/, $Period;
- ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
- # switch parameters as necessary
- if ($EndMonth gt $StartMonth) {
- return ($StartMonth, $EndMonth);
- } else {
- return ($EndMonth, $StartMonth);
- };
-};
-
-################################################################################
-sub ListMonth {
-################################################################################
-### return a list of months (YYYY-MM) between start and end month
-### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
-### OUT: @Months: array containing all months from $MonthExpression enumerated
- my ($MonthExpression )= @_;
- # return if single month
- return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
- # parse $MonthExpression
- my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
- # set $Year, $Month from $StartMonth
- my ($Year, $Month) = split /-/, $StartMonth;
- # define @Months
- my (@Months);
- until ("$Year-$Month" gt $EndMonth) {
- push @Months, "$Year-$Month";
- $Month = "$Month"; # force string context
- $Month++;
- if ($Month > 12) {
- $Month = '01';
- $Year++;
- };
- };
- return @Months;
-};
-
-#####---------------------------- OutputFormats ---------------------------#####
-
-################################################################################
-sub OutputData {
-################################################################################
-### read database query results from DBHandle and print results with formatting
-### IN : $Format : format specifier
-### $Comments : print or suppress all comments for machine-readable output
-### $GroupBy : primary sorting order (month or key)
-### $Precision: number of digits right of decimal point (0 or 2)
-### $ValidKeys: reference to a hash containing all valid keys
-### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
-### $DBQuery : database query handle with executed query,
-### containing $Month, $Key, $Value
-### $PadField : padding length for key field (optional) for 'pretty'
-### $PadValue : padding length for value field (optional) for 'pretty'
- my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
- $DBQuery, $PadField, $PadValue) = @_;
- my %ValidKeys = %{$ValidKeys} if $ValidKeys;
- my ($FileName, $Handle, $OUT);
- our $LastIteration;
-
- # define output types
- my %LegalOutput;
- @LegalOutput{('dump','list','pretty')} = ();
- # bail out if format is unknown
- &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
-
- while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
- # don't display invalid keys
- if(%ValidKeys and !defined($ValidKeys{$Key})) {
- # FIXME
- # &Bleat(1,sprintf("DROPPED: %s",$Key));
- next;
- };
- # care for correct sorting order and abstract from month and keys:
- # $Caption will be $Month or $Key, according to sorting order,
- # and $Key will be $Key or $Month, respectively
- my $Caption;
- if ($GroupBy eq 'key') {
- $Caption = $Key;
- $Key = $Month;
- } else {
- $Caption = $Month;
- }
- # set output file handle
- if (!$FileTempl) {
- $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
- } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
- close $OUT if ($LastIteration);
- # safeguards for filename creation:
- # replace potential problem characters with '_'
- $FileName = sprintf('%s-%s',$FileTempl,$Caption);
- $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
- open ($OUT,">$FileName")
- or &Bleat(2,sprintf("Cannot open output file '%s': $!",
- $FileName));
- $Handle = $OUT;
- };
- print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
- $Precision, $PadField, $PadValue);
- $LastIteration = $Caption;
- };
- close $OUT if ($FileTempl);
-};
-
-################################################################################
-sub FormatOutput {
-################################################################################
-### format information for output according to format specifier
-### IN : $Format : format specifier
-### $Comments : print or suppress all comments for machine-readable output
-### $Caption : month (as YYYY-MM) or $Key, according to sorting order
-### $Key : newsgroup, client, ... or $Month, as above
-### $Value : number of postings with that attribute
-### $Precision: number of digits right of decimal point (0 or 2)
-### $PadField : padding length for key field (optional) for 'pretty'
-### $PadValue : padding length for value field (optional) for 'pretty'
-### OUT: $Output: formatted output
- my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
- $PadValue) = @_;
- my ($Output);
- # keep last caption in mind
- our ($LastIteration);
- # create one line of output
- if ($Format eq 'dump') {
- # output as dump (key value)
- $Output = sprintf ("# %s:\n",$Caption)
- if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
- $Output .= sprintf ("%s %u\n",$Key,$Value);
- } elsif ($Format eq 'list') {
- # output as list (caption key value)
- $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
- } elsif ($Format eq 'pretty') {
- # output as a table
- $Output = sprintf ("# ----- %s:\n",$Caption)
- if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
- # increase $PadValue for numbers with decimal point
- $PadValue += $Precision+1 if $Precision;
- # add padding if $PadField is set; $PadValue HAS to be set then
- $Output .= sprintf ($PadField ?
- sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
- "%s%s %.*f\n",$Key,$Comments ? ':' : '',
- $Precision,$Value);
- };
- return $Output;
-};
-
-#####------------------------- QueryModifications -------------------------#####
-
-################################################################################
-sub SQLHierarchies {
-################################################################################
-### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
-### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
-### true, accordingly)
-### IN : $ShowHierarchies: boolean value
-### OUT: SQL code
- my ($ShowHierarchies) = @_;
- return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
-};
-
-################################################################################
-sub GetMaxLength {
-################################################################################
-### get length of longest fields in future query result
-### IN : $DBHandle : database handle
-### $Table : table to query
-### $Field : field (key!, i.e. month, newsgroup, ...) to check
-### $Value : field (value!, i.e. postings) to check
-### $WhereClause : WHERE clause
-### $HavingClause: HAVING clause
-### @BindVars : bind variables for WHERE clause
-### OUT: $FieldLength : length of longest instance of $Field
-### $ValueLength : length of longest instance of $Value
- my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
- my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
- "MAX(%s) ".
- "FROM %s %s %s",$Field,,$Value,
- $Table,$WhereClause,$HavingClause ?
- 'GROUP BY newsgroup' . $HavingClause .
- ' ORDER BY LENGTH(newsgroup) '.
- 'DESC LIMIT 1': ''));
- $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
- "for '%s' from table '%s': ".
- "$DBI::errstr",$Field,$Table));
- my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
- my $ValueLength = length($ValueMax) if ($ValueMax);
- return ($FieldLength,$ValueLength);
-};
-
-################################################################################
-sub SQLSortOrder {
-################################################################################
-### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
-### $OptOrderBy (secondary sorting), both ascending or descending;
-### descending sorting order is done by adding '-desc'
-### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
-### $OrderBy: secondary sort by month/newsgroups (default)
-### or number of 'postings'
-### OUT: a SQL ORDER BY clause
- my ($GroupBy,$OrderBy) = @_;
- my ($GroupSort,$OrderSort) = ('','');
- # $GroupBy (primary sorting)
- if (!$GroupBy) {
- $GroupBy = 'month';
- } else {
- ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
- if ($GroupBy =~ /group/i) {
- $GroupBy = 'newsgroup';
- } else {
- $GroupBy = 'month';
- }
- }
- my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
- # $OrderBy (secondary sorting)
- if (!$OrderBy) {
- $OrderBy = $Secondary;
- } else {
- ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
- if ($OrderBy =~ /posting/i) {
- $OrderBy = "postings $OrderSort, $Secondary";
- } else {
- $OrderBy = "$Secondary $OrderSort";
- }
- }
- return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
-};
-
-################################################################################
-sub SQLParseOrder {
-################################################################################
-### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
-### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
-### IN : $OrderOption: order option (see above)
-### OUT: parameter to sort by,
-### sort order ('DESC' or nothing, meaning 'ASC')
- my ($OrderOption) = @_;
- my $SortOrder = '';
- if ($OrderOption =~ s/-?desc$//i) {
- $SortOrder = 'DESC';
- } else {
- $OrderOption =~ s/-?asc$//i
- }
- return ($OrderOption,$SortOrder);
-};
-
-################################################################################
-sub SQLGroupList {
-################################################################################
-### explode list of newsgroups separated by : (with wildcards)
-### to a SQL 'WHERE' expression
-### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
-### OUT: SQL code to become part of a 'WHERE' clause,
-### list of newsgroups for SQL bindings
- my ($Newsgroups) = @_;
- # substitute '*' wildcard with SQL wildcard character '%'
- $Newsgroups =~ s/\*/%/g;
- return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
- # just one newsgroup?
- return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
- my ($SQL,@WildcardGroups,@NoWildcardGroups);
- # list of newsgroups separated by ':'
- my @GroupList = split /:/, $Newsgroups;
- foreach (@GroupList) {
- if ($_ !~ /%/) {
- # add to list of newsgroup names WITHOUT wildcard
- push (@NoWildcardGroups,$_);
- } else {
- # add to list of newsgroup names WITH wildcard
- push (@WildcardGroups,$_);
- # add wildcard to SQL clause
- # 'OR' if SQL clause is not empty
- $SQL .= ' OR ' if $SQL;
- $SQL .= 'newsgroup LIKE ?'
- }
- };
- if (scalar(@NoWildcardGroups)) {
- # add 'OR' if SQL clause is not empty
- $SQL .= ' OR ' if $SQL;
- if (scalar(@NoWildcardGroups) < 2) {
- # special case: just one newsgroup without wildcard
- $SQL .= 'newsgroup = ?';
- } else {
- # create list of newsgroups to include: 'newsgroup IN (...)'
- $SQL .= 'newsgroup IN (';
- my $SQLin;
- foreach (@NoWildcardGroups) {
- $SQLin .= ',' if $SQLin;
- $SQLin .= '?';
- }
- # add list to SQL clause
- $SQL .= $SQLin .= ')';
- }
- }
- # add brackets '()' to SQL clause as needed (more than one wildcard group)
- if (scalar(@WildcardGroups)) {
- $SQL = '(' . $SQL .')';
- }
- # rebuild @GroupList in (now) correct order
- @GroupList = (@WildcardGroups,@NoWildcardGroups);
- return ($SQL,@GroupList);
-};
-
-################################################################################
-sub SQLGroupWildcard {
-################################################################################
-### build a valid SQL 'WHERE' expression with or without wildcards
-### IN : $Newsgroup: newsgroup expression, probably with wildcard
-### (group.name or group.name.%)
-### OUT: SQL code to become part of a 'WHERE' clause
- my ($Newsgroup) = @_;
- if ($Newsgroup !~ /%/) {
- return 'newsgroup = ?';
- } else {
- return 'newsgroup LIKE ?';
- }
-};
-
-################################################################################
-sub SQLSetBounds {
-################################################################################
-### set upper and/or lower boundary (number of postings)
-### IN : $Type: 'level', 'average', 'sum' or 'default'
-### $LowBound,$UppBound: lower/upper boundary, respectively
-### OUT: SQL code to become part of a WHERE or HAVING clause
- my ($Type,$LowBound,$UppBound) = @_;
- ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
- if($LowBound and $UppBound and $LowBound > $UppBound) {
- &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
- "$UppBound, exchanging boundaries.");
- ($LowBound,$UppBound) = ($UppBound,$LowBound);
- }
- # default to 'default'
- my $WhereHavingFunction = 'postings';
- # set $LowBound to SQL statement:
- # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
- if ($Type eq 'level') {
- $WhereHavingFunction = 'MIN(postings)'
- } elsif ($Type eq 'average') {
- $WhereHavingFunction = 'AVG(postings)'
- } elsif ($Type eq 'sum') {
- $WhereHavingFunction = 'SUM(postings)'
- }
- $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
- # set $LowBound to SQL statement:
- # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
- if ($Type eq 'level') {
- $WhereHavingFunction = 'MAX(postings)'
- } elsif ($Type eq 'average') {
- $WhereHavingFunction = 'AVG(postings)'
- } elsif ($Type eq 'sum') {
- $WhereHavingFunction = 'SUM(postings)'
- }
- $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
- return ($LowBound,$UppBound);
-};
-
-################################################################################
-sub SQLCheckNumber {
-################################################################################
-### check if input is a valid positive integer; otherwise, make it one
-### IN : @Numbers: array of parameters
-### OUT: @Numbers: a valid positive integer
- my (@Numbers) = @_;
- foreach my $Number (@Numbers) {
- if ($Number and $Number < 0) {
- &Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
- $Number = -$Number;
- }
- $Number = '' if ($Number and $Number !~ /^\d+$/);
- }
- return @Numbers;
-};
-
-################################################################################
-sub SQLBuildClause {
-################################################################################
-### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
-### from multiple expressions which *may* be empty
-### IN : $Type: 'where', 'having', 'group' or 'order'
-### @Expressions: array of expressions
-### OUT: $SQLClause: a SQL clause
- my ($Type,@Expressions) = @_;
- my ($SQLClause,$Separator,$Statement);
- # set separator ('AND' or ',')
- if ($Type eq 'where' or $Type eq 'having') {
- $Separator = 'AND';
- } else {
- $Separator = ',';
- }
- # set statement
- if ($Type eq 'where') {
- $Statement = 'WHERE';
- } elsif ($Type eq 'order') {
- $Statement = 'ORDER BY';
- } elsif ($Type eq 'having') {
- $Statement = 'HAVING';
- } else {
- $Statement = 'GROUP BY';
- }
- # build query from expressions with separators
- foreach my $Expression (@Expressions) {
- if ($Expression) {
- $SQLClause .= " $Separator " if ($SQLClause);
- $SQLClause .= $Expression;
- }
- }
- # add statement in front if not already present
- $SQLClause = " $Statement " . $SQLClause
- if ($SQLClause and $SQLClause !~ /$Statement/);
- return $SQLClause;
-};
-
-#####--------------------------- Verifications ----------------------------#####
-
-################################################################################
-sub CheckValidNewsgroups {
-################################################################################
-### syntax check of newgroup list
-### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
-### OUT: boolean
- my ($Newsgroups) = @_;
- my $InvalidCharRegExp = ',; ';
- return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
-};
-
-
-#####------------------------------- done ---------------------------------#####
-1;
-
-
--- /dev/null
+#! /usr/bin/perl
+#
+# feedlog.pl
+#
+# This script will log headers and other data to a database
+# for further analysis by parsing a feed from INN.
+#
+# It is part of the NewsStats package.
+#
+# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
+#
+# It can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+BEGIN {
+ our $VERSION = "0.01";
+ use File::Basename;
+ # we're in .../bin, so our module is in ../lib
+ push(@INC, dirname($0).'/../lib');
+}
+use strict;
+use warnings;
+
+use NewsStats;
+
+use Sys::Syslog qw(:standard :macros);
+
+use Date::Format;
+use DBI;
+use Getopt::Long qw(GetOptions);
+Getopt::Long::config ('bundling');
+
+################################# Subroutines ##################################
+
+sub PrepareDB {
+### initialise database connection, prepare statement
+### and catch errors
+### IN : \%Conf : reference to configuration hash
+### OUT: $DBHandle: database handle
+### $DBQuery : prepared statement
+ our ($DBHandle, $DBQuery, $OptQuiet);
+ my ($ConfigR) = @_;
+ my %Conf = %$ConfigR;
+ # drop current database connection - hard, if necessary
+ if ($DBHandle) {
+ $DBHandle->disconnect;
+ undef $DBHandle;
+ };
+ # connect to database; try again every 5 seconds
+ while (!$DBHandle) {
+ $DBHandle = InitDB($ConfigR,0);
+ if (!$DBHandle) {
+ syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
+ sleep(5);
+ } else {;
+ syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
+ }
+ };
+ $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
+ timestamp,token,size,peer,path,
+ newsgroups,headers)
+ VALUES (?,?,?,?,?,?,?,?,?,?)",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableRaw'}));
+ return ($DBHandle,$DBQuery);
+}
+
+
+################################# Main program #################################
+
+### read commandline options
+my ($OptDebug,$OptQuiet);
+GetOptions ('d|debug!' => \$OptDebug,
+ 'q|test!' => \$OptQuiet,
+ 'h|help' => \&ShowPOD,
+ 'V|version' => \&ShowVersion) or exit 1;
+
+### read configuration
+my %Conf = %{ReadConfig('')};
+
+### init syslog
+openlog($0, 'nofatal,pid', LOG_NEWS);
+syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
+
+### init database
+my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
+
+### main loop
+while (<>) {
+ chomp;
+ # catch empty lines trailing or leading
+ if ($_ eq '') {
+ next;
+ }
+ # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
+ my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
+ # remaining lines contain headers
+ my $Headers = "";
+ while (<>) {
+ chomp;
+ # empty line terminates this article
+ if ($_ eq '') {
+ last;
+ }
+ # collect headers
+ $Headers .= $_."\n" ;
+ }
+
+ # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
+ my $Day = time2str("%Y-%m-%d", $Timestamp);
+ my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
+
+ # write to database
+ if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
+ $Path, $Newsgroups, $Headers)) {
+ syslog(LOG_ERR, 'Database error %s while processing %s: %s',
+ $DBI::err, $Mid, $DBI::errstr);
+ # if "MySQL server has gone away", try to recover
+ if ($DBI::err == 2006) {
+ # try to reconnect to database
+ ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
+ # try to repeat the write attempt as before
+ if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
+ $Path, $Newsgroups, $Headers)) {
+ syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
+ };
+ # otherwise log missing posting
+ } else {
+ syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
+ };
+ };
+ $DBQuery->finish;
+
+ warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
+ "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
+ $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
+ $Newsgroups, $Headers) if $OptDebug;
+}
+
+### close handles
+$DBHandle->disconnect;
+syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
+closelog();
+
+__END__
+
+################################ Documentation #################################
+
+=head1 NAME
+
+feedlog - log data from an INN feed to a database
+
+=head1 SYNOPSIS
+
+B<feedlog> [B<-Vhdq>]
+
+=head1 REQUIREMENTS
+
+See L<doc/README>.
+
+=head1 DESCRIPTION
+
+This script will log overview data and complete headers to a database
+table for further examination by parsing a feed from INN. It will
+parse that information and write it to a mysql database table in real
+time.
+
+All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
+fails to initiate a database connection at startup, it will log to
+I<syslog> with I<CRIT> priority and go in an endless loop, as
+terminating would only result in a rapid respawn.
+
+=head2 Configuration
+
+B<feedlog> will read its configuration from F<newsstats.conf> which
+should be present in the same directory via Config::Auto.
+
+See L<doc/INSTALL> for an overview of possible configuration options.
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-V>, B<--version>
+
+Print out version and copyright information and exit.
+
+=item B<-h>, B<--help>
+
+Print this man page and exit.
+
+=item B<-d>, B<--debug>
+
+Output debugging information to STDERR while parsing STDIN. You'll
+find that information most probably in your B<INN> F<errlog> file.
+
+=item B<-q>, B<--quiet>
+
+Suppress logging to syslog.
+
+=back
+
+=head1 INSTALLATION
+
+See L<doc/INSTALL>.
+
+=head1 EXAMPLES
+
+Set up a feed like that in your B<INN> F<newsfeeds> file:
+
+ ## gather statistics for NewsStats
+ newsstats!
+ :!*,de.*
+ :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
+
+See L<doc/INSTALL> for further information.
+
+=head1 FILES
+
+=over 4
+
+=item F<bin/feedlog.pl>
+
+The script itself.
+
+=item F<lib/NewsStats.pm>
+
+Library functions for the NewsStats package.
+
+=item F<etc/newsstats.conf>
+
+Runtime configuration file.
+
+=back
+
+=head1 BUGS
+
+Please report any bugs or feature requests to the author or use the
+bug tracker at L<http://bugs.th-h.de/>!
+
+=head1 SEE ALSO
+
+=over 2
+
+=item -
+
+L<doc/README>
+
+=item -
+
+L<doc/INSTALL>
+
+=back
+
+This script is part of the B<NewsStats> package.
+
+=head1 AUTHOR
+
+Thomas Hochstein <thh@inter.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
--- /dev/null
+#! /usr/bin/perl
+#
+# gatherstats.pl
+#
+# This script will gather statistical information from a database
+# containing headers and other information from a INN feed.
+#
+# It is part of the NewsStats package.
+#
+# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
+#
+# It can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+BEGIN {
+ our $VERSION = "0.01";
+ use File::Basename;
+ # we're in .../bin, so our module is in ../lib
+ push(@INC, dirname($0).'/../lib');
+}
+use strict;
+use warnings;
+
+use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
+
+use DBI;
+use Getopt::Long qw(GetOptions);
+Getopt::Long::config ('bundling');
+
+################################# Definitions ##################################
+
+# define types of information that can be gathered
+# all / groups (/ clients / hosts)
+my %LegalStats;
+@LegalStats{('all','groups')} = ();
+
+################################# Main program #################################
+
+### read commandline options
+my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
+ $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
+GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
+ 'clientsdb=s' => \$OptClientsDB,
+ 'd|debug!' => \$OptDebug,
+ 'groupsdb=s' => \$OptGroupsDB,
+ 'hierarchy=s' => \$OptTLH,
+ 'hostsdb=s' => \$OptHostsDB,
+ 'm|month=s' => \$OptMonth,
+ 'rawdb=s' => \$OptRawDB,
+ 's|stats=s' => \$OptStatsType,
+ 't|test!' => \$OptTest,
+ 'h|help' => \&ShowPOD,
+ 'V|version' => \&ShowVersion) or exit 1;
+
+### read configuration
+my %Conf = %{ReadConfig('')};
+
+### override configuration via commandline options
+my %ConfOverride;
+$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
+$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
+$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
+$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
+$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
+&OverrideConfig(\%Conf,\%ConfOverride);
+
+### get type of information to gather, defaulting to 'all'
+$OptStatsType = 'all' if !$OptStatsType;
+&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
+ if !exists($LegalStats{$OptStatsType});
+
+### get time period from --month
+# get verbal description of time period, drop SQL code
+my ($Period) = &GetTimePeriod($OptMonth);
+&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
+ "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
+
+### reformat $Conf{'TLH'}
+my $TLH;
+if ($Conf{'TLH'}) {
+ # $Conf{'TLH'} is parsed as an array by Config::Auto;
+ # make a flat list again, separated by :
+ if (ref($Conf{'TLH'}) eq 'ARRAY') {
+ $TLH = join(':',@{$Conf{'TLH'}});
+ } else {
+ $TLH = $Conf{'TLH'};
+ }
+ # strip whitespace
+ $TLH =~ s/\s//g;
+ # add trailing dots if none are present yet
+ # (using negative look-behind assertions)
+ $TLH =~ s/(?<!\.):/.:/g;
+ $TLH =~ s/(?<!\.)$/./;
+ # check for illegal characters
+ &Bleat(2,'Config error - illegal characters in TLH definition!')
+ if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
+ # escape dots
+ $TLH =~ s/\./\\./g;
+ if ($TLH =~ /:/) {
+ # reformat $TLH from a:b to (a)|(b),
+ # e.g. replace ':' by ')|('
+ $TLH =~ s/:/)|(/g;
+ $TLH = '(' . $TLH . ')';
+ };
+};
+
+### init database
+my $DBHandle = InitDB(\%Conf,1);
+
+### get data for each month
+&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
+foreach my $Month (&ListMonth($Period)) {
+
+ print "---------- $Month ----------\n" if $OptDebug;
+
+ if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
+ # read list of newsgroups from --checkgroups
+ # into a hash
+ my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
+ if $OptCheckgroupsFile;
+
+ ### ----------------------------------------------
+ ### get groups data (number of postings per group)
+ # get groups data from raw table for given month
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
+ "WHERE day LIKE ? AND NOT disregard",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableRaw'}));
+ $DBQuery->execute($Month.'-%')
+ or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
+ "$DBI::errstr\n",$Month,
+ $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
+
+ # count postings per group
+ my %Postings;
+ while (($_) = $DBQuery->fetchrow_array) {
+ # get list of newsgroups and hierarchies from Newsgroups:
+ my %Newsgroups = ListNewsgroups($_,$TLH,
+ $OptCheckgroupsFile ? \%ValidGroups : '');
+ # count each newsgroup and hierarchy once
+ foreach (sort keys %Newsgroups) {
+ $Postings{$_}++;
+ };
+ };
+
+ # add valid but empty groups if --checkgroups is set
+ if (%ValidGroups) {
+ foreach (sort keys %ValidGroups) {
+ if (!defined($Postings{$_})) {
+ # add current newsgroup as empty group
+ $Postings{$_} = 0;
+ warn (sprintf("ADDED: %s as empty group\n",$_));
+ # add empty hierarchies for current newsgroup as needed
+ foreach (ParseHierarchies($_)) {
+ my $Hierarchy = $_ . '.ALL';
+ if (!defined($Postings{$Hierarchy})) {
+ $Postings{$Hierarchy} = 0;
+ warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
+ };
+ };
+ }
+ };
+ };
+
+ # delete old data for that month
+ if (!$OptTest) {
+ $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
+ undef,$Month)
+ or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
+ "$DBI::errstr\n",$Month,
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
+ };
+
+ print "----- GroupStats -----\n" if $OptDebug;
+ foreach my $Newsgroup (sort keys %Postings) {
+ print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
+ if (!$OptTest) {
+ # write to database
+ $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
+ "(month,newsgroup,postings) ".
+ "VALUES (?, ?, ?)",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableGrps'}));
+ $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
+ or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
+ "$DBI::errstr\n",$Month,$Newsgroup,
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
+ $DBQuery->finish;
+ };
+ };
+ } else {
+ # other types of information go here - later on
+ };
+};
+
+### close handles
+$DBHandle->disconnect;
+
+__END__
+
+################################ Documentation #################################
+
+=head1 NAME
+
+gatherstats - process statistical data from a raw source
+
+=head1 SYNOPSIS
+
+B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
+
+=head1 REQUIREMENTS
+
+See L<doc/README>.
+
+=head1 DESCRIPTION
+
+This script will extract and process statistical information from a
+database table which is fed from F<feedlog.pl> for a given time period
+and write its results to (an)other database table(s). Entries marked
+with I<'disregard'> in the database will be ignored; currently, you
+have to set this flag yourself, using your database management tools.
+You can exclude erroneous entries that way (e.g. automatic reposts
+(think of cancels flood and resurrectors); spam; ...).
+
+The time period to act on defaults to last month; you can assign
+another time period or a single month via the B<--month> option (see
+below).
+
+By default B<gatherstats> will process all types of information; you
+can change that using the B<--stats> option and assigning the type of
+information to process. Currently that doesn't matter yet as only
+processing of the number of postings per group per month is
+implemented anyway.
+
+Possible information types include:
+
+=over 3
+
+=item B<groups> (postings per group per month)
+
+B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
+counted for each single group they appear in. Groups not in I<TLH>
+will be ignored.
+
+B<gatherstats> will also add up the number of postings for each
+hierarchy level, but only count each posting once. A posting to
+de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
+respectively. A crossposting to de.alt.test and de.alt.admin, on the
+other hand, will be counted for de.alt.test and de.alt.admin each, but
+only once for de.alt.ALL and de.ALL.
+
+Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
+override that default through the B<--groupsdb> option.
+
+=back
+
+=head2 Configuration
+
+B<gatherstats> will read its configuration from F<newsstats.conf>
+which should be present in the same directory via Config::Auto.
+
+See L<doc/INSTALL> for an overview of possible configuration options.
+
+You can override configuration options via the B<--hierarchy>,
+B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
+respectively.
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-V>, B<--version>
+
+Print out version and copyright information and exit.
+
+=item B<-h>, B<--help>
+
+Print this man page and exit.
+
+=item B<-d>, B<--debug>
+
+Output debugging information to STDOUT while processing (number of
+postings per group).
+
+=item B<-t>, B<--test>
+
+Do not write results to database. You should use B<--debug> in
+conjunction with B<--test> ... everything else seems a bit pointless.
+
+=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
+
+Set processing period to a single month in YYYY-MM format or to a time
+period between two month in YYYY-MM:YYYY-MM format (two month, separated
+by a colon).
+
+=item B<-s>, B<--stats> I<type>
+
+Set processing type to one of I<all> and I<groups>. Defaults to all
+(and is currently rather pointless as only I<groups> has been
+implemented).
+
+=item B<-c>, B<--checkgroups> I<filename template>
+
+Check each group against a list of valid newsgroups read from a file,
+one group on each line and ignoring everything after the first
+whitespace (so you can use a file in checkgroups format or (part of)
+your INN active file).
+
+The filename is taken from I<filename template>, amended by each
+B<--month> B<gatherstats> is processing in the form of I<template-YYYY-MM>,
+so that
+
+ gatherstats -m 2010-01:2010-12 -c checkgroups
+
+will check against F<checkgroups-2010-01> for January 2010, against
+F<checkgroups-2010-02> for February 2010 and so on.
+
+Newsgroups not found in the checkgroups file will be dropped (and
+logged to STDERR), and newsgroups found there but having no postings
+will be added with a count of 0 (and logged to STDERR).
+
+=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
+
+Override I<TLH> from F<newsstats.conf>.
+
+=item B<--rawdb> I<table> (raw data table)
+
+Override I<DBTableRaw> from F<newsstats.conf>.
+
+=item B<--groupsdb> I<table> (postings per group table)
+
+Override I<DBTableGrps> from F<newsstats.conf>.
+
+=item B<--clientsdb> I<table> (client data table)
+
+Override I<DBTableClnts> from F<newsstats.conf>.
+
+=item B<--hostsdb> I<table> (host data table)
+
+Override I<DBTableHosts> from F<newsstats.conf>.
+
+=back
+
+=head1 INSTALLATION
+
+See L<doc/INSTALL>.
+
+=head1 EXAMPLES
+
+Process all types of information for lasth month:
+
+ gatherstats
+
+Do a dry run, showing results of processing:
+
+ gatherstats --debug --test
+
+Process all types of information for January of 2010:
+
+ gatherstats --month 2010-01
+
+Process only number of postings for the year of 2010,
+checking against checkgroups-*:
+
+ gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
+
+=head1 FILES
+
+=over 4
+
+=item F<bin/gatherstats.pl>
+
+The script itself.
+
+=item F<lib/NewsStats.pm>
+
+Library functions for the NewsStats package.
+
+=item F<etc/newsstats.conf>
+
+Runtime configuration file.
+
+=back
+
+=head1 BUGS
+
+Please report any bugs or feature requests to the author or use the
+bug tracker at L<http://bugs.th-h.de/>!
+
+=head1 SEE ALSO
+
+=over 2
+
+=item -
+
+L<doc/README>
+
+=item -
+
+L<doc/INSTALL>
+
+=back
+
+This script is part of the B<NewsStats> package.
+
+=head1 AUTHOR
+
+Thomas Hochstein <thh@inter.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
--- /dev/null
+#! /usr/bin/perl
+#
+# groupstats.pl
+#
+# This script will get statistical data on newgroup usage
+# from a database.
+#
+# It is part of the NewsStats package.
+#
+# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
+#
+# It can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+BEGIN {
+ our $VERSION = "0.01";
+ use File::Basename;
+ # we're in .../bin, so our module is in ../lib
+ push(@INC, dirname($0).'/../lib');
+}
+use strict;
+use warnings;
+
+use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
+
+use DBI;
+use Getopt::Long qw(GetOptions);
+Getopt::Long::config ('bundling');
+
+################################# Main program #################################
+
+### read commandline options
+my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
+ $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
+ $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
+GetOptions ('b|boundary=s' => \$OptBoundType,
+ 'c|captions!' => \$OptCaptions,
+ 'checkgroups=s' => \$OptCheckgroupsFile,
+ 'comments!' => \$OptComments,
+ 'filetemplate=s' => \$OptFileTemplate,
+ 'f|format=s' => \$OptFormat,
+ 'g|group-by=s' => \$OptGroupBy,
+ 'groupsdb=s' => \$OptGroupsDB,
+ 'l|lower=i' => \$LowBound,
+ 'm|month=s' => \$OptMonth,
+ 'n|newsgroups=s' => \$OptNewsgroups,
+ 'o|order-by=s' => \$OptOrderBy,
+ 'r|report=s' => \$OptReportType,
+ 's|sums!' => \$OptSums,
+ 'u|upper=i' => \$UppBound,
+ 'h|help' => \&ShowPOD,
+ 'V|version' => \&ShowVersion) or exit 1;
+# parse parameters
+# $OptComments defaults to TRUE
+$OptComments = 1 if (!defined($OptComments));
+# force --nocomments when --filetemplate is used
+$OptComments = 0 if ($OptFileTemplate);
+# parse $OptBoundType
+if ($OptBoundType) {
+ if ($OptBoundType =~ /level/i) {
+ $OptBoundType = 'level';
+ } elsif ($OptBoundType =~ /av(era)?ge?/i) {
+ $OptBoundType = 'average';
+ } elsif ($OptBoundType =~ /sums?/i) {
+ $OptBoundType = 'sum';
+ } else {
+ $OptBoundType = 'default';
+ }
+}
+# parse $OptReportType
+if ($OptReportType) {
+ if ($OptReportType =~ /av(era)?ge?/i) {
+ $OptReportType = 'average';
+ } elsif ($OptReportType =~ /sums?/i) {
+ $OptReportType = 'sum';
+ } else {
+ $OptReportType = 'default';
+ }
+}
+# read list of newsgroups from --checkgroups
+# into a hash reference
+my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
+
+### read configuration
+my %Conf = %{ReadConfig('')};
+
+### override configuration via commandline options
+my %ConfOverride;
+$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
+&OverrideConfig(\%Conf,\%ConfOverride);
+
+### init database
+my $DBHandle = InitDB(\%Conf,1);
+
+### get time period and newsgroups, prepare SQL 'WHERE' clause
+# get time period
+# and set caption for output and expression for SQL 'WHERE' clause
+my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
+# bail out if --month is invalid
+&Bleat(2,"--month option has an invalid format - ".
+ "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
+# get list of newsgroups and set expression for SQL 'WHERE' clause
+# with placeholders as well as a list of newsgroup to bind to them
+my ($SQLWhereNewsgroups,@SQLBindNewsgroups);
+if ($OptNewsgroups) {
+ ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups);
+ # bail out if --newsgroups is invalid
+ &Bleat(2,"--newsgroups option has an invalid format!")
+ if !$SQLWhereNewsgroups;
+}
+
+### build SQL WHERE clause (and HAVING clause, if needed)
+my ($SQLWhereClause,$SQLHavingClause);
+# $OptBoundType 'level'
+if ($OptBoundType and $OptBoundType ne 'default') {
+ $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
+ $SQLWhereNewsgroups,&SQLHierarchies($OptSums));
+ $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType,
+ $LowBound,$UppBound));
+# $OptBoundType 'threshold' / 'default' or none
+} else {
+ $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
+ $SQLWhereNewsgroups,&SQLHierarchies($OptSums),
+ &SQLSetBounds('default',$LowBound,$UppBound));
+}
+
+### get sort order and build SQL 'ORDER BY' clause
+# default to 'newsgroup' for $OptBoundType 'level' or 'average'
+$OptGroupBy = 'newsgroup' if (!$OptGroupBy and
+ $OptBoundType and $OptBoundType ne 'default');
+# force to 'month' for $OptReportType 'average' or 'sum'
+$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
+# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
+my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
+# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
+# set it to 'month' or 'key' for OutputData()
+$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
+
+### get report type and build SQL 'SELECT' query
+my $SQLSelect;
+my $SQLGroupClause = '';
+my $Precision = 0; # number of digits right of decimal point for output
+if ($OptReportType and $OptReportType ne 'default') {
+ $SQLGroupClause = 'GROUP BY newsgroup';
+ # change $SQLOrderClause: replace everything before 'postings'
+ $SQLOrderClause =~ s/BY.+postings/BY postings/;
+ if ($OptReportType eq 'average') {
+ $SQLSelect = "'All months',newsgroup,AVG(postings)";
+ $Precision = 2;
+ # change $SQLOrderClause: replace 'postings' with 'AVG(postings)'
+ $SQLOrderClause =~ s/postings/AVG(postings)/;
+ } elsif ($OptReportType eq 'sum') {
+ $SQLSelect = "'All months',newsgroup,SUM(postings)";
+ # change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
+ $SQLOrderClause =~ s/postings/SUM(postings)/;
+ }
+ } else {
+ $SQLSelect = 'month,newsgroup,postings';
+};
+
+### get length of longest newsgroup name delivered by query
+### for formatting purposes
+my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
+my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},
+ $Field,'postings',$SQLWhereClause,
+ $SQLHavingClause,
+ @SQLBindNewsgroups);
+
+### build and execute SQL query
+my ($DBQuery);
+# special query preparation for $OptBoundType 'level', 'average' or 'sums'
+if ($OptBoundType and $OptBoundType ne 'default') {
+ # prepare and execute first query:
+ # get list of newsgroups meeting level conditions
+ $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '.
+ 'GROUP BY newsgroup %s',
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'},
+ $SQLWhereClause,$SQLHavingClause));
+ $DBQuery->execute(@SQLBindNewsgroups)
+ or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
+ $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
+ $DBI::errstr));
+ # add newsgroups to a comma-seperated list ready for IN(...) query
+ my $GroupList;
+ while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
+ $GroupList .= ',' if $GroupList;
+ $GroupList .= "'$Newsgroup'";
+ };
+ # enhance $WhereClause
+ if ($GroupList) {
+ $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,
+ sprintf('newsgroup IN (%s)',$GroupList));
+ } else {
+ # condition cannot be satisfied;
+ # force query to fail by adding '0=1'
+ $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1');
+ }
+}
+
+# prepare query
+$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
+ $SQLSelect,
+ $Conf{'DBDatabase'},$Conf{'DBTableGrps'},
+ $SQLWhereClause,$SQLGroupClause,
+ $SQLOrderClause));
+
+# execute query
+$DBQuery->execute(@SQLBindNewsgroups)
+ or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
+ $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
+ $DBI::errstr));
+
+### output results
+# set default to 'pretty'
+$OptFormat = 'pretty' if !$OptFormat;
+# print captions if --caption is set
+if ($OptCaptions && $OptComments) {
+ # print time period with report type
+ my $CaptionReportType= '(number of postings for each month)';
+ if ($OptReportType and $OptReportType ne 'default') {
+ $CaptionReportType= '(average number of postings for each month)'
+ if $OptReportType eq 'average';
+ $CaptionReportType= '(number of all postings for that time period)'
+ if $OptReportType eq 'sum';
+ }
+ printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
+ # print newsgroup list if --newsgroups is set
+ printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
+ if $OptNewsgroups;
+ # print boundaries, if set
+ my $CaptionBoundary= '(counting only month fulfilling this condition)';
+ if ($OptBoundType and $OptBoundType ne 'default') {
+ $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level';
+ $CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
+ $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
+ }
+ printf("# ----- Threshold: %s %s x %s %s %s\n",
+ $LowBound ? $LowBound : '',$LowBound ? '=>' : '',
+ $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
+ if ($LowBound or $UppBound);
+ # print primary and secondary sort order
+ printf("# ----- Grouped by %s (%s), sorted %s%s\n",
+ ($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
+ ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
+ ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
+ ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
+}
+
+# output data
+&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
+ $OptCheckgroupsFile ? $ValidGroups : '',
+ $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
+
+### close handles
+$DBHandle->disconnect;
+
+__END__
+
+################################ Documentation #################################
+
+=head1 NAME
+
+groupstats - create reports on newsgroup usage
+
+=head1 SYNOPSIS
+
+B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
+
+=head1 REQUIREMENTS
+
+See L<doc/README>.
+
+=head1 DESCRIPTION
+
+This script create reports on newsgroup usage (number of postings per
+group per month) taken from result tables created by
+B<gatherstats.pl>.
+
+=head2 Features and options
+
+=head3 Time period and newsgroups
+
+The time period to act on defaults to last month; you can assign another
+time period or a single month (or drop all time constraints) via the
+B<--month> option (see below).
+
+B<groupstats> will process all newsgroups by default; you can limit
+processing to only some newsgroups by supplying a list of those groups via
+B<--newsgroups> option (see below). You can include hierarchy levels in
+the output by adding the B<--sums> switch (see below). Optionally
+newsgroups not present in a checkgroups file can be excluded from output,
+sse B<--checkgroups> below.
+
+=head3 Report type
+
+You can choose between different B<--report> types: postings per month,
+average postings per month or all postings summed up; for details, see
+below.
+
+=head3 Upper and lower boundaries
+
+Furthermore you can set an upper and/or lower boundary to exclude some
+results from output via the B<--lower> and B<--upper> options,
+respectively. By default, all newsgroups with more and/or less postings
+per month will be excluded from the result set (i.e. not shown and not
+considered for average and sum reports). You can change the meaning of
+those boundaries with the B<--boundary> option. For details, please see
+below.
+
+=head3 Sorting and formatting the output
+
+By default, all results are grouped by month; you can group results by
+newsgroup instead via the B<--groupy-by> option. Within those groups, the
+list of newsgroups (or months) is sorted alphabetically (or
+chronologically, respectively) ascending. You can change that order (and
+sort by number of postings) with the B<--order-by> option. For details and
+exceptions, please see below.
+
+The results will be formatted as a kind of table; you can change the
+output format to a simple list or just a list of newsgroups and number of
+postings with the B<--format> option. Captions will be added by means of
+the B<--caption> option; all comments (and captions) can be supressed by
+using B<--nocomments>.
+
+Last but not least you can redirect all output to a number of files, e.g.
+one for each month, by submitting the B<--filetemplate> option, see below.
+Captions and comments are automatically disabled in this case.
+
+=head2 Configuration
+
+B<groupstats> will read its configuration from F<newsstats.conf>
+which should be present in the same directory via Config::Auto.
+
+See doc/INSTALL for an overview of possible configuration options.
+
+You can override some configuration options via the B<--groupsdb> option.
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-V>, B<--version>
+
+Print out version and copyright information and exit.
+
+=item B<-h>, B<--help>
+
+Print this man page and exit.
+
+=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
+
+Set processing period to a single month in YYYY-MM format or to a time
+period between two month in YYYY-MM:YYYY-MM format (two month, separated
+by a colon). By using the keyword I<all> instead, you can set no
+processing period to process the whole database.
+
+=item B<-n>, B<--newsgroups> I<newsgroup(s)>
+
+Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
+be a single newsgroup name (de.alt.test), a newsgroup hierarchy
+(de.alt.*) or a list of either of these, separated by colons, for
+example
+
+ de.test:de.alt.test:de.newusers.*
+
+=item B<-s>, B<--sums|--nosums> (sum per hierarchy level)
+
+Include "virtual" groups for every hierarchy level in output, for
+example:
+
+ de.alt.ALL 10
+ de.alt.test 5
+ de.alt.admin 7
+
+See the B<gatherstats> man page for details.
+
+=item B<--checkgroups> I<filename>
+
+Restrict output to those newgroups present in a file in checkgroups format
+(one newgroup name per line; everything after the first whitespace on each
+line is ignored). All other newsgroups will be removed from output.
+
+Contrary to B<gatherstats>, I<filename> is not a template, but refers to
+a single file in checkgroups format.
+
+=item B<-r>, B<--report> I<default|average|sums>
+
+Choose the report type: I<default>, I<average> or I<sums>
+
+By default, B<groupstats> will report the number of postings for each
+newsgroup in each month. But it can also report the average number of
+postings per group for all months or the total sum of postings per group
+for all months.
+
+For report types I<average> and I<sums>, the B<group-by> option has no
+meaning and will be silently ignored (see below).
+
+=item B<-l>, B<--lower> I<lower boundary>
+
+Set the lower boundary. See B<--boundary> below.
+
+=item B<-l>, B<--upper> I<upper boundary>
+
+Set the upper boundary. See B<--boundary> below.
+
+=item B<-b>, B<--boundary> I<boundary type>
+
+Set the boundary type to one of I<default>, I<level>, I<average> or
+I<sums>.
+
+By default, all newsgroups with more postings per month than the upper
+boundary and/or less postings per month than the lower boundary will be
+excluded from further processing. For the default report that means each
+month only newsgroups with a number of postings between the boundaries
+will be displayed. For the other report types, newsgroups with a number of
+postings exceeding the boundaries in all (!) months will not be
+considered.
+
+For example, lets take a list of newsgroups like this:
+
+ ----- 2012-01:
+ de.comp.datenbanken.misc 6
+ de.comp.datenbanken.ms-access 84
+ de.comp.datenbanken.mysql 88
+ ----- 2012-02:
+ de.comp.datenbanken.misc 8
+ de.comp.datenbanken.ms-access 126
+ de.comp.datenbanken.mysql 21
+ ----- 2012-03:
+ de.comp.datenbanken.misc 24
+ de.comp.datenbanken.ms-access 83
+ de.comp.datenbanken.mysql 36
+
+With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>,
+you'll get the following result:
+
+ ----- All months:
+ de.comp.datenbanken.ms-access 293
+ de.comp.datenbanken.mysql 124
+
+de.comp.datenbanken.misc has not been considered even though it has 38
+postings in total, because it has less than 25 postings in every single
+month. If you want to list all newsgroups with more than 25 postings
+I<in total>, you'll have to set the boundary type to I<sum>, see below.
+
+A boundary type of I<level> will show only those newsgroups - at all -
+that satisfy the boundaries in each and every single month. With the above
+list of newsgroups and
+C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
+you'll get this result:
+
+ ----- All months:
+ de.comp.datenbanken.ms-access 293
+
+de.comp.datenbanken.mysql has not been considered because it had less than
+25 postings in 2012-02 (only).
+
+You can use that to get a list of newsgroups that have more (or less) then
+x postings in every month during the whole reporting period.
+
+A boundary type of I<average> will show only those newsgroups - at all -that
+satisfy the boundaries on average. With the above list of newsgroups and
+C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
+you'll get this result:
+
+ ----- All months:
+ de.comp.datenbanken.ms-access 293
+ de.comp.datenbanken.mysql 145
+
+The average number of postings in the three groups is:
+
+ de.comp.datenbanken.misc 12.67
+ de.comp.datenbanken.ms-access 97.67
+ de.comp.datenbanken.mysql 48.33
+
+Last but not least, a boundary type of I<sums> will show only those
+newsgroups - at all - that satisfy the boundaries with the total sum of
+all postings during the reporting period. With the above list of
+newsgroups and
+C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
+you'll finally get this result:
+
+ ----- All months:
+ de.comp.datenbanken.misc 38
+ de.comp.datenbanken.ms-access 293
+ de.comp.datenbanken.mysql 145
+
+
+=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
+
+By default, all results are grouped by month, sorted chronologically in
+ascending order, like this:
+
+ ----- 2012-01:
+ de.comp.datenbanken.ms-access 84
+ de.comp.datenbanken.mysql 88
+ ----- 2012-02:
+ de.comp.datenbanken.ms-access 126
+ de.comp.datenbanken.mysql 21
+
+The results can be grouped by newsgroups instead via
+B<--group-by> I<newsgroup>:
+
+ ----- de.comp.datenbanken.ms-access:
+ 2012-01 84
+ 2012-02 126
+ ----- de.comp.datenbanken.mysql:
+ 2012-01 88
+ 2012-02 21
+
+By appending I<-desc> to the group-by option parameter, you can reverse
+the sort order - e.g. B<--group-by> I<month-desc> will give:
+
+ ----- 2012-02:
+ de.comp.datenbanken.ms-access 126
+ de.comp.datenbanken.mysql 21
+ ----- 2012-01:
+ de.comp.datenbanken.ms-access 84
+ de.comp.datenbanken.mysql 88
+
+Average and sums reports (see above) will always be grouped by months;
+this option will therefore be ignored.
+
+=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
+
+Within each group (a single month or single newsgroup, see above), the
+report will be sorted by newsgroup names in ascending alphabetical order
+by default. You can change the sort order to descending or sort by number
+of postings instead.
+
+=item B<-f>, B<--format> I<pretty|list|dump>
+
+Select the output format, I<pretty> being the default:
+
+ ----- 2012-01:
+ de.comp.datenbanken.ms-access 84
+ de.comp.datenbanken.mysql 88
+ ----- 2012-02:
+ de.comp.datenbanken.ms-access 126
+ de.comp.datenbanken.mysql 21
+
+I<list> format looks like this:
+
+ 2012-01 de.comp.datenbanken.ms-access 84
+ 2012-01 de.comp.datenbanken.mysql 88
+ 2012-02 de.comp.datenbanken.ms-access 126
+ 2012-02 de.comp.datenbanken.mysql 21
+
+And I<dump> format looks like this:
+
+ # 2012-01:
+ de.comp.datenbanken.ms-access 84
+ de.comp.datenbanken.mysql 88
+ # 2012-02:
+ de.comp.datenbanken.ms-access 126
+ de.comp.datenbanken.mysql 21
+
+You can remove the comments by using B<--nocomments>, see below.
+
+=item B<-c>, B<--captions|--nocaptions>
+
+Add captions to output, like this:
+
+ ----- Report for 2012-01 to 2012-02 (number of postings for each month)
+ ----- Newsgroups: de.comp.datenbanken.*
+ ----- Threshold: 10 => x <= 20 (on average)
+ ----- Grouped by Newsgroups (ascending), sorted by number of postings descending
+
+False by default.
+
+=item B<--comments|--nocomments>
+
+Add comments (group headers) to I<dump> and I<pretty> output. True by default.
+
+Use I<--nocomments> to suppress anything except newsgroup names/months and
+numbers of postings. This is enforced when using B<--filetemplate>, see below.
+
+=item B<--filetemplate> I<filename template>
+
+Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
+create one file for each month (or each newsgroup, accordant to the
+setting of B<--group-by>, see above), with filenames composed by adding
+year and month (or newsgroup names) to the I<filename template>, for
+example with B<--filetemplate> I<stats>:
+
+ stats-2012-01
+ stats-2012-02
+ ... and so on
+
+B<--nocomments> is enforced, see above.
+
+=item B<--groupsdb> I<database table>
+
+Override I<DBTableGrps> from F<newsstats.conf>.
+
+=back
+
+=head1 INSTALLATION
+
+See L<doc/INSTALL>.
+
+=head1 EXAMPLES
+
+Show number of postings per group for lasth month in I<pretty> format:
+
+ groupstats
+
+Show that report for January of 2010 and de.alt.* plus de.test,
+including display of hierarchy levels:
+
+ groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums
+
+Only show newsgroups with 30 postings or less last month, ordered
+by number of postings, descending, in I<pretty> format:
+
+ groupstats --upper 30 --order-by postings-desc
+
+Show the total of all postings for the year of 2010 for all groups that
+had 30 postings or less in every single month in that year, ordered by
+number of postings in descending order:
+
+ groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
+
+The same for the average number of postings in the year of 2010:
+
+ groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc
+
+List number of postings per group for eacht month of 2010 and redirect
+output to one file for each month, namend stats-2010-01 and so on, in
+machine-readable form (without formatting):
+
+ groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
+
+
+=head1 FILES
+
+=over 4
+
+=item F<bin/groupstats.pl>
+
+The script itself.
+
+=item F<lib/NewsStats.pm>
+
+Library functions for the NewsStats package.
+
+=item F<etc/newsstats.conf>
+
+Runtime configuration file.
+
+=back
+
+=head1 BUGS
+
+Please report any bugs or feature requests to the author or use the
+bug tracker at L<http://bugs.th-h.de/>!
+
+=head1 SEE ALSO
+
+=over 2
+
+=item -
+
+L<doc/README>
+
+=item -
+
+l>doc/INSTALL>
+
+=item -
+
+gatherstats -h
+
+=back
+
+This script is part of the B<NewsStats> package.
+
+=head1 AUTHOR
+
+Thomas Hochstein <thh@inter.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
--- /dev/null
+### database configuration
+#
+# driver, host, credentials and database
+#
+DBDriver = mysql
+DBHost = localhost
+DBUser =
+DBPw =
+DBDatabase = newsstats
+#
+# tables
+#
+DBTableRaw = raw_de
+DBTableGrps = groups_de
+#DBTableClnts =
+#DBTableHosts =
+
+### hierarchy configuration
+TLH = de
+++ /dev/null
-#! /usr/bin/perl
-#
-# feedlog.pl
-#
-# This script will log headers and other data to a database
-# for further analysis by parsing a feed from INN.
-#
-# It is part of the NewsStats package.
-#
-# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
-#
-# It can be redistributed and/or modified under the same terms under
-# which Perl itself is published.
-
-BEGIN {
- our $VERSION = "0.01";
- use File::Basename;
- push(@INC, dirname($0));
-}
-use strict;
-use warnings;
-
-use NewsStats;
-
-use Sys::Syslog qw(:standard :macros);
-
-use Date::Format;
-use DBI;
-use Getopt::Long qw(GetOptions);
-Getopt::Long::config ('bundling');
-
-################################# Subroutines ##################################
-
-sub PrepareDB {
-### initialise database connection, prepare statement
-### and catch errors
-### IN : \%Conf : reference to configuration hash
-### OUT: $DBHandle: database handle
-### $DBQuery : prepared statement
- our ($DBHandle, $DBQuery, $OptQuiet);
- my ($ConfigR) = @_;
- my %Conf = %$ConfigR;
- # drop current database connection - hard, if necessary
- if ($DBHandle) {
- $DBHandle->disconnect;
- undef $DBHandle;
- };
- # connect to database; try again every 5 seconds
- while (!$DBHandle) {
- $DBHandle = InitDB($ConfigR,0);
- if (!$DBHandle) {
- syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
- sleep(5);
- } else {;
- syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
- }
- };
- $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
- timestamp,token,size,peer,path,
- newsgroups,headers)
- VALUES (?,?,?,?,?,?,?,?,?,?)",
- $Conf{'DBDatabase'},
- $Conf{'DBTableRaw'}));
- return ($DBHandle,$DBQuery);
-}
-
-
-################################# Main program #################################
-
-### read commandline options
-my ($OptDebug,$OptQuiet);
-GetOptions ('d|debug!' => \$OptDebug,
- 'q|test!' => \$OptQuiet,
- 'h|help' => \&ShowPOD,
- 'V|version' => \&ShowVersion) or exit 1;
-
-### read configuration
-my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
-
-### init syslog
-openlog($0, 'nofatal,pid', LOG_NEWS);
-syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
-
-### init database
-my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
-
-### main loop
-while (<>) {
- chomp;
- # catch empty lines trailing or leading
- if ($_ eq '') {
- next;
- }
- # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
- my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
- # remaining lines contain headers
- my $Headers = "";
- while (<>) {
- chomp;
- # empty line terminates this article
- if ($_ eq '') {
- last;
- }
- # collect headers
- $Headers .= $_."\n" ;
- }
-
- # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
- my $Day = time2str("%Y-%m-%d", $Timestamp);
- my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
-
- # write to database
- if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
- $Path, $Newsgroups, $Headers)) {
- syslog(LOG_ERR, 'Database error %s while processing %s: %s',
- $DBI::err, $Mid, $DBI::errstr);
- # if "MySQL server has gone away", try to recover
- if ($DBI::err == 2006) {
- # try to reconnect to database
- ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
- # try to repeat the write attempt as before
- if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
- $Path, $Newsgroups, $Headers)) {
- syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
- };
- # otherwise log missing posting
- } else {
- syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
- };
- };
- $DBQuery->finish;
-
- warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
- "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
- $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
- $Newsgroups, $Headers) if $OptDebug;
-}
-
-### close handles
-$DBHandle->disconnect;
-syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
-closelog();
-
-__END__
-
-################################ Documentation #################################
-
-=head1 NAME
-
-feedlog - log data from an INN feed to a database
-
-=head1 SYNOPSIS
-
-B<feedlog> [B<-Vhdq>]
-
-=head1 REQUIREMENTS
-
-See L<doc/README>.
-
-=head1 DESCRIPTION
-
-This script will log overview data and complete headers to a database
-table for further examination by parsing a feed from INN. It will
-parse that information and write it to a mysql database table in real
-time.
-
-All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
-fails to initiate a database connection at startup, it will log to
-I<syslog> with I<CRIT> priority and go in an endless loop, as
-terminating would only result in a rapid respawn.
-
-=head2 Configuration
-
-B<feedlog> will read its configuration from F<newsstats.conf> which
-should be present in the same directory via Config::Auto.
-
-See L<doc/INSTALL> for an overview of possible configuration options.
-
-=head1 OPTIONS
-
-=over 3
-
-=item B<-V>, B<--version>
-
-Print out version and copyright information and exit.
-
-=item B<-h>, B<--help>
-
-Print this man page and exit.
-
-=item B<-d>, B<--debug>
-
-Output debugging information to STDERR while parsing STDIN. You'll
-find that information most probably in your B<INN> F<errlog> file.
-
-=item B<-q>, B<--quiet>
-
-Suppress logging to syslog.
-
-=back
-
-=head1 INSTALLATION
-
-See L<doc/INSTALL>.
-
-=head1 EXAMPLES
-
-Set up a feed like that in your B<INN> F<newsfeeds> file:
-
- ## gather statistics for NewsStats
- newsstats!
- :!*,de.*
- :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
-
-See L<doc/INSTALL> for further information.
-
-=head1 FILES
-
-=over 4
-
-=item F<feedlog.pl>
-
-The script itself.
-
-=item F<NewsStats.pm>
-
-Library functions for the NewsStats package.
-
-=item F<newsstats.conf>
-
-Runtime configuration file.
-
-=back
-
-=head1 BUGS
-
-Please report any bugs or feature requests to the author or use the
-bug tracker at L<http://bugs.th-h.de/>!
-
-=head1 SEE ALSO
-
-=over 2
-
-=item -
-
-L<doc/README>
-
-=item -
-
-L<doc/INSTALL>
-
-=back
-
-This script is part of the B<NewsStats> package.
-
-=head1 AUTHOR
-
-Thomas Hochstein <thh@inter.net>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
-
-This program is free software; you may redistribute it and/or modify it
-under the same terms as Perl itself.
-
-=cut
+++ /dev/null
-#! /usr/bin/perl
-#
-# gatherstats.pl
-#
-# This script will gather statistical information from a database
-# containing headers and other information from a INN feed.
-#
-# It is part of the NewsStats package.
-#
-# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
-#
-# It can be redistributed and/or modified under the same terms under
-# which Perl itself is published.
-
-BEGIN {
- our $VERSION = "0.01";
- use File::Basename;
- push(@INC, dirname($0));
-}
-use strict;
-use warnings;
-
-use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
-
-use DBI;
-use Getopt::Long qw(GetOptions);
-Getopt::Long::config ('bundling');
-
-################################# Definitions ##################################
-
-# define types of information that can be gathered
-# all / groups (/ clients / hosts)
-my %LegalStats;
-@LegalStats{('all','groups')} = ();
-
-################################# Main program #################################
-
-### read commandline options
-my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
- $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest);
-GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
- 'clientsdb=s' => \$OptClientsDB,
- 'd|debug!' => \$OptDebug,
- 'groupsdb=s' => \$OptGroupsDB,
- 'hierarchy=s' => \$OptTLH,
- 'hostsdb=s' => \$OptHostsDB,
- 'm|month=s' => \$OptMonth,
- 'rawdb=s' => \$OptRawDB,
- 's|stats=s' => \$OptStatsType,
- 't|test!' => \$OptTest,
- 'h|help' => \&ShowPOD,
- 'V|version' => \&ShowVersion) or exit 1;
-
-### read configuration
-my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
-
-### override configuration via commandline options
-my %ConfOverride;
-$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
-$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
-$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
-$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
-$ConfOverride{'TLH'} = $OptTLH if $OptTLH;
-&OverrideConfig(\%Conf,\%ConfOverride);
-
-### get type of information to gather, defaulting to 'all'
-$OptStatsType = 'all' if !$OptStatsType;
-&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType))
- if !exists($LegalStats{$OptStatsType});
-
-### get time period from --month
-# get verbal description of time period, drop SQL code
-my ($Period) = &GetTimePeriod($OptMonth);
-&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ".
- "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time');
-
-### reformat $Conf{'TLH'}
-my $TLH;
-if ($Conf{'TLH'}) {
- # $Conf{'TLH'} is parsed as an array by Config::Auto;
- # make a flat list again, separated by :
- if (ref($Conf{'TLH'}) eq 'ARRAY') {
- $TLH = join(':',@{$Conf{'TLH'}});
- } else {
- $TLH = $Conf{'TLH'};
- }
- # strip whitespace
- $TLH =~ s/\s//g;
- # add trailing dots if none are present yet
- # (using negative look-behind assertions)
- $TLH =~ s/(?<!\.):/.:/g;
- $TLH =~ s/(?<!\.)$/./;
- # check for illegal characters
- &Bleat(2,'Config error - illegal characters in TLH definition!')
- if ($TLH !~ /^[a-zA-Z0-9:+.-]+$/);
- # escape dots
- $TLH =~ s/\./\\./g;
- if ($TLH =~ /:/) {
- # reformat $TLH from a:b to (a)|(b),
- # e.g. replace ':' by ')|('
- $TLH =~ s/:/)|(/g;
- $TLH = '(' . $TLH . ')';
- };
-};
-
-### init database
-my $DBHandle = InitDB(\%Conf,1);
-
-### get data for each month
-&Bleat(1,'Test mode. Database is not updated.') if $OptTest;
-foreach my $Month (&ListMonth($Period)) {
-
- print "---------- $Month ----------\n" if $OptDebug;
-
- if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
- # read list of newsgroups from --checkgroups
- # into a hash
- my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
- if $OptCheckgroupsFile;
-
- ### ----------------------------------------------
- ### get groups data (number of postings per group)
- # get groups data from raw table for given month
- my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
- "WHERE day LIKE ? AND NOT disregard",
- $Conf{'DBDatabase'},
- $Conf{'DBTableRaw'}));
- $DBQuery->execute($Month.'-%')
- or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
- "$DBI::errstr\n",$Month,
- $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
-
- # count postings per group
- my %Postings;
- while (($_) = $DBQuery->fetchrow_array) {
- # get list of newsgroups and hierarchies from Newsgroups:
- my %Newsgroups = ListNewsgroups($_,$TLH,
- $OptCheckgroupsFile ? \%ValidGroups : '');
- # count each newsgroup and hierarchy once
- foreach (sort keys %Newsgroups) {
- $Postings{$_}++;
- };
- };
-
- # add valid but empty groups if --checkgroups is set
- if (%ValidGroups) {
- foreach (sort keys %ValidGroups) {
- if (!defined($Postings{$_})) {
- # add current newsgroup as empty group
- $Postings{$_} = 0;
- warn (sprintf("ADDED: %s as empty group\n",$_));
- # add empty hierarchies for current newsgroup as needed
- foreach (ParseHierarchies($_)) {
- my $Hierarchy = $_ . '.ALL';
- if (!defined($Postings{$Hierarchy})) {
- $Postings{$Hierarchy} = 0;
- warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
- };
- };
- }
- };
- };
-
- # delete old data for that month
- if (!$OptTest) {
- $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
- $Conf{'DBDatabase'},$Conf{'DBTableGrps'}),
- undef,$Month)
- or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ".
- "$DBI::errstr\n",$Month,
- $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
- };
-
- print "----- GroupStats -----\n" if $OptDebug;
- foreach my $Newsgroup (sort keys %Postings) {
- print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug;
- if (!$OptTest) {
- # write to database
- $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ".
- "(month,newsgroup,postings) ".
- "VALUES (?, ?, ?)",
- $Conf{'DBDatabase'},
- $Conf{'DBTableGrps'}));
- $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup})
- or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ".
- "$DBI::errstr\n",$Month,$Newsgroup,
- $Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
- $DBQuery->finish;
- };
- };
- } else {
- # other types of information go here - later on
- };
-};
-
-### close handles
-$DBHandle->disconnect;
-
-__END__
-
-################################ Documentation #################################
-
-=head1 NAME
-
-gatherstats - process statistical data from a raw source
-
-=head1 SYNOPSIS
-
-B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
-
-=head1 REQUIREMENTS
-
-See L<doc/README>.
-
-=head1 DESCRIPTION
-
-This script will extract and process statistical information from a
-database table which is fed from F<feedlog.pl> for a given time period
-and write its results to (an)other database table(s). Entries marked
-with I<'disregard'> in the database will be ignored; currently, you
-have to set this flag yourself, using your database management tools.
-You can exclude erroneous entries that way (e.g. automatic reposts
-(think of cancels flood and resurrectors); spam; ...).
-
-The time period to act on defaults to last month; you can assign
-another time period or a single month via the B<--month> option (see
-below).
-
-By default B<gatherstats> will process all types of information; you
-can change that using the B<--stats> option and assigning the type of
-information to process. Currently that doesn't matter yet as only
-processing of the number of postings per group per month is
-implemented anyway.
-
-Possible information types include:
-
-=over 3
-
-=item B<groups> (postings per group per month)
-
-B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
-counted for each single group they appear in. Groups not in I<TLH>
-will be ignored.
-
-B<gatherstats> will also add up the number of postings for each
-hierarchy level, but only count each posting once. A posting to
-de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
-respectively. A crossposting to de.alt.test and de.alt.admin, on the
-other hand, will be counted for de.alt.test and de.alt.admin each, but
-only once for de.alt.ALL and de.ALL.
-
-Data is written to I<DBTableGrps> (see L<doc/INSTALL>); you can
-override that default through the B<--groupsdb> option.
-
-=back
-
-=head2 Configuration
-
-B<gatherstats> will read its configuration from F<newsstats.conf>
-which should be present in the same directory via Config::Auto.
-
-See L<doc/INSTALL> for an overview of possible configuration options.
-
-You can override configuration options via the B<--hierarchy>,
-B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
-respectively.
-
-=head1 OPTIONS
-
-=over 3
-
-=item B<-V>, B<--version>
-
-Print out version and copyright information and exit.
-
-=item B<-h>, B<--help>
-
-Print this man page and exit.
-
-=item B<-d>, B<--debug>
-
-Output debugging information to STDOUT while processing (number of
-postings per group).
-
-=item B<-t>, B<--test>
-
-Do not write results to database. You should use B<--debug> in
-conjunction with B<--test> ... everything else seems a bit pointless.
-
-=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]>
-
-Set processing period to a single month in YYYY-MM format or to a time
-period between two month in YYYY-MM:YYYY-MM format (two month, separated
-by a colon).
-
-=item B<-s>, B<--stats> I<type>
-
-Set processing type to one of I<all> and I<groups>. Defaults to all
-(and is currently rather pointless as only I<groups> has been
-implemented).
-
-=item B<-c>, B<--checkgroups> I<filename template>
-
-Check each group against a list of valid newsgroups read from a file,
-one group on each line and ignoring everything after the first
-whitespace (so you can use a file in checkgroups format or (part of)
-your INN active file).
-
-The filename is taken from I<filename template>, amended by each
-B<--month> B<gatherstats> is processing in the form of I<template-YYYY-MM>,
-so that
-
- gatherstats -m 2010-01:2010-12 -c checkgroups
-
-will check against F<checkgroups-2010-01> for January 2010, against
-F<checkgroups-2010-02> for February 2010 and so on.
-
-Newsgroups not found in the checkgroups file will be dropped (and
-logged to STDERR), and newsgroups found there but having no postings
-will be added with a count of 0 (and logged to STDERR).
-
-=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
-
-Override I<TLH> from F<newsstats.conf>.
-
-=item B<--rawdb> I<table> (raw data table)
-
-Override I<DBTableRaw> from F<newsstats.conf>.
-
-=item B<--groupsdb> I<table> (postings per group table)
-
-Override I<DBTableGrps> from F<newsstats.conf>.
-
-=item B<--clientsdb> I<table> (client data table)
-
-Override I<DBTableClnts> from F<newsstats.conf>.
-
-=item B<--hostsdb> I<table> (host data table)
-
-Override I<DBTableHosts> from F<newsstats.conf>.
-
-=back
-
-=head1 INSTALLATION
-
-See L<doc/INSTALL>.
-
-=head1 EXAMPLES
-
-Process all types of information for lasth month:
-
- gatherstats
-
-Do a dry run, showing results of processing:
-
- gatherstats --debug --test
-
-Process all types of information for January of 2010:
-
- gatherstats --month 2010-01
-
-Process only number of postings for the year of 2010,
-checking against checkgroups-*:
-
- gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
-
-=head1 FILES
-
-=over 4
-
-=item F<gatherstats.pl>
-
-The script itself.
-
-=item F<NewsStats.pm>
-
-Library functions for the NewsStats package.
-
-=item F<newsstats.conf>
-
-Runtime configuration file.
-
-=back
-
-=head1 BUGS
-
-Please report any bugs or feature requests to the author or use the
-bug tracker at L<http://bugs.th-h.de/>!
-
-=head1 SEE ALSO
-
-=over 2
-
-=item -
-
-L<doc/README>
-
-=item -
-
-L<doc/INSTALL>
-
-=back
-
-This script is part of the B<NewsStats> package.
-
-=head1 AUTHOR
-
-Thomas Hochstein <thh@inter.net>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
-
-This program is free software; you may redistribute it and/or modify it
-under the same terms as Perl itself.
-
-=cut
+++ /dev/null
-#! /usr/bin/perl
-#
-# groupstats.pl
-#
-# This script will get statistical data on newgroup usage
-# from a database.
-#
-# It is part of the NewsStats package.
-#
-# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
-#
-# It can be redistributed and/or modified under the same terms under
-# which Perl itself is published.
-
-BEGIN {
- our $VERSION = "0.01";
- use File::Basename;
- push(@INC, dirname($0));
-}
-use strict;
-use warnings;
-
-use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);
-
-use DBI;
-use Getopt::Long qw(GetOptions);
-Getopt::Long::config ('bundling');
-
-################################# Main program #################################
-
-### read commandline options
-my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments,
- $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth,
- $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound);
-GetOptions ('b|boundary=s' => \$OptBoundType,
- 'c|captions!' => \$OptCaptions,
- 'checkgroups=s' => \$OptCheckgroupsFile,
- 'comments!' => \$OptComments,
- 'filetemplate=s' => \$OptFileTemplate,
- 'f|format=s' => \$OptFormat,
- 'g|group-by=s' => \$OptGroupBy,
- 'groupsdb=s' => \$OptGroupsDB,
- 'l|lower=i' => \$LowBound,
- 'm|month=s' => \$OptMonth,
- 'n|newsgroups=s' => \$OptNewsgroups,
- 'o|order-by=s' => \$OptOrderBy,
- 'r|report=s' => \$OptReportType,
- 's|sums!' => \$OptSums,
- 'u|upper=i' => \$UppBound,
- 'h|help' => \&ShowPOD,
- 'V|version' => \&ShowVersion) or exit 1;
-# parse parameters
-# $OptComments defaults to TRUE
-$OptComments = 1 if (!defined($OptComments));
-# force --nocomments when --filetemplate is used
-$OptComments = 0 if ($OptFileTemplate);
-# parse $OptBoundType
-if ($OptBoundType) {
- if ($OptBoundType =~ /level/i) {
- $OptBoundType = 'level';
- } elsif ($OptBoundType =~ /av(era)?ge?/i) {
- $OptBoundType = 'average';
- } elsif ($OptBoundType =~ /sums?/i) {
- $OptBoundType = 'sum';
- } else {
- $OptBoundType = 'default';
- }
-}
-# parse $OptReportType
-if ($OptReportType) {
- if ($OptReportType =~ /av(era)?ge?/i) {
- $OptReportType = 'average';
- } elsif ($OptReportType =~ /sums?/i) {
- $OptReportType = 'sum';
- } else {
- $OptReportType = 'default';
- }
-}
-# read list of newsgroups from --checkgroups
-# into a hash reference
-my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile;
-
-### read configuration
-my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
-
-### override configuration via commandline options
-my %ConfOverride;
-$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
-&OverrideConfig(\%Conf,\%ConfOverride);
-
-### init database
-my $DBHandle = InitDB(\%Conf,1);
-
-### get time period and newsgroups, prepare SQL 'WHERE' clause
-# get time period
-# and set caption for output and expression for SQL 'WHERE' clause
-my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
-# bail out if --month is invalid
-&Bleat(2,"--month option has an invalid format - ".
- "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
-# get list of newsgroups and set expression for SQL 'WHERE' clause
-# with placeholders as well as a list of newsgroup to bind to them
-my ($SQLWhereNewsgroups,@SQLBindNewsgroups);
-if ($OptNewsgroups) {
- ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups);
- # bail out if --newsgroups is invalid
- &Bleat(2,"--newsgroups option has an invalid format!")
- if !$SQLWhereNewsgroups;
-}
-
-### build SQL WHERE clause (and HAVING clause, if needed)
-my ($SQLWhereClause,$SQLHavingClause);
-# $OptBoundType 'level'
-if ($OptBoundType and $OptBoundType ne 'default') {
- $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
- $SQLWhereNewsgroups,&SQLHierarchies($OptSums));
- $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType,
- $LowBound,$UppBound));
-# $OptBoundType 'threshold' / 'default' or none
-} else {
- $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,
- $SQLWhereNewsgroups,&SQLHierarchies($OptSums),
- &SQLSetBounds('default',$LowBound,$UppBound));
-}
-
-### get sort order and build SQL 'ORDER BY' clause
-# default to 'newsgroup' for $OptBoundType 'level' or 'average'
-$OptGroupBy = 'newsgroup' if (!$OptGroupBy and
- $OptBoundType and $OptBoundType ne 'default');
-# force to 'month' for $OptReportType 'average' or 'sum'
-$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
-# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
-my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy);
-# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy)
-# set it to 'month' or 'key' for OutputData()
-$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';
-
-### get report type and build SQL 'SELECT' query
-my $SQLSelect;
-my $SQLGroupClause = '';
-my $Precision = 0; # number of digits right of decimal point for output
-if ($OptReportType and $OptReportType ne 'default') {
- $SQLGroupClause = 'GROUP BY newsgroup';
- # change $SQLOrderClause: replace everything before 'postings'
- $SQLOrderClause =~ s/BY.+postings/BY postings/;
- if ($OptReportType eq 'average') {
- $SQLSelect = "'All months',newsgroup,AVG(postings)";
- $Precision = 2;
- # change $SQLOrderClause: replace 'postings' with 'AVG(postings)'
- $SQLOrderClause =~ s/postings/AVG(postings)/;
- } elsif ($OptReportType eq 'sum') {
- $SQLSelect = "'All months',newsgroup,SUM(postings)";
- # change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
- $SQLOrderClause =~ s/postings/SUM(postings)/;
- }
- } else {
- $SQLSelect = 'month,newsgroup,postings';
-};
-
-### get length of longest newsgroup name delivered by query
-### for formatting purposes
-my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
-my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'},
- $Field,'postings',$SQLWhereClause,
- $SQLHavingClause,
- @SQLBindNewsgroups);
-
-### build and execute SQL query
-my ($DBQuery);
-# special query preparation for $OptBoundType 'level', 'average' or 'sums'
-if ($OptBoundType and $OptBoundType ne 'default') {
- # prepare and execute first query:
- # get list of newsgroups meeting level conditions
- $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '.
- 'GROUP BY newsgroup %s',
- $Conf{'DBDatabase'},$Conf{'DBTableGrps'},
- $SQLWhereClause,$SQLHavingClause));
- $DBQuery->execute(@SQLBindNewsgroups)
- or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
- $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
- $DBI::errstr));
- # add newsgroups to a comma-seperated list ready for IN(...) query
- my $GroupList;
- while (my ($Newsgroup) = $DBQuery->fetchrow_array) {
- $GroupList .= ',' if $GroupList;
- $GroupList .= "'$Newsgroup'";
- };
- # enhance $WhereClause
- if ($GroupList) {
- $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,
- sprintf('newsgroup IN (%s)',$GroupList));
- } else {
- # condition cannot be satisfied;
- # force query to fail by adding '0=1'
- $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1');
- }
-}
-
-# prepare query
-$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
- $SQLSelect,
- $Conf{'DBDatabase'},$Conf{'DBTableGrps'},
- $SQLWhereClause,$SQLGroupClause,
- $SQLOrderClause));
-
-# execute query
-$DBQuery->execute(@SQLBindNewsgroups)
- or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n",
- $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},
- $DBI::errstr));
-
-### output results
-# set default to 'pretty'
-$OptFormat = 'pretty' if !$OptFormat;
-# print captions if --caption is set
-if ($OptCaptions && $OptComments) {
- # print time period with report type
- my $CaptionReportType= '(number of postings for each month)';
- if ($OptReportType and $OptReportType ne 'default') {
- $CaptionReportType= '(average number of postings for each month)'
- if $OptReportType eq 'average';
- $CaptionReportType= '(number of all postings for that time period)'
- if $OptReportType eq 'sum';
- }
- printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
- # print newsgroup list if --newsgroups is set
- printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups)))
- if $OptNewsgroups;
- # print boundaries, if set
- my $CaptionBoundary= '(counting only month fulfilling this condition)';
- if ($OptBoundType and $OptBoundType ne 'default') {
- $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level';
- $CaptionBoundary= '(on average)' if $OptBoundType eq 'average';
- $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum';
- }
- printf("# ----- Threshold: %s %s x %s %s %s\n",
- $LowBound ? $LowBound : '',$LowBound ? '=>' : '',
- $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
- if ($LowBound or $UppBound);
- # print primary and secondary sort order
- printf("# ----- Grouped by %s (%s), sorted %s%s\n",
- ($GroupBy eq 'month') ? 'Months' : 'Newsgroups',
- ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
- ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
- ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
-}
-
-# output data
-&OutputData($OptFormat,$OptComments,$GroupBy,$Precision,
- $OptCheckgroupsFile ? $ValidGroups : '',
- $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength);
-
-### close handles
-$DBHandle->disconnect;
-
-__END__
-
-################################ Documentation #################################
-
-=head1 NAME
-
-groupstats - create reports on newsgroup usage
-
-=head1 SYNOPSIS
-
-B<groupstats> [B<-Vhcs> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<newsgroup(s)>] [B<--checkgroups> I<checkgroups file>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-b> I<boundary type>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--groupsdb> I<database table>]
-
-=head1 REQUIREMENTS
-
-See L<doc/README>.
-
-=head1 DESCRIPTION
-
-This script create reports on newsgroup usage (number of postings per
-group per month) taken from result tables created by
-B<gatherstats.pl>.
-
-=head2 Features and options
-
-=head3 Time period and newsgroups
-
-The time period to act on defaults to last month; you can assign another
-time period or a single month (or drop all time constraints) via the
-B<--month> option (see below).
-
-B<groupstats> will process all newsgroups by default; you can limit
-processing to only some newsgroups by supplying a list of those groups via
-B<--newsgroups> option (see below). You can include hierarchy levels in
-the output by adding the B<--sums> switch (see below). Optionally
-newsgroups not present in a checkgroups file can be excluded from output,
-sse B<--checkgroups> below.
-
-=head3 Report type
-
-You can choose between different B<--report> types: postings per month,
-average postings per month or all postings summed up; for details, see
-below.
-
-=head3 Upper and lower boundaries
-
-Furthermore you can set an upper and/or lower boundary to exclude some
-results from output via the B<--lower> and B<--upper> options,
-respectively. By default, all newsgroups with more and/or less postings
-per month will be excluded from the result set (i.e. not shown and not
-considered for average and sum reports). You can change the meaning of
-those boundaries with the B<--boundary> option. For details, please see
-below.
-
-=head3 Sorting and formatting the output
-
-By default, all results are grouped by month; you can group results by
-newsgroup instead via the B<--groupy-by> option. Within those groups, the
-list of newsgroups (or months) is sorted alphabetically (or
-chronologically, respectively) ascending. You can change that order (and
-sort by number of postings) with the B<--order-by> option. For details and
-exceptions, please see below.
-
-The results will be formatted as a kind of table; you can change the
-output format to a simple list or just a list of newsgroups and number of
-postings with the B<--format> option. Captions will be added by means of
-the B<--caption> option; all comments (and captions) can be supressed by
-using B<--nocomments>.
-
-Last but not least you can redirect all output to a number of files, e.g.
-one for each month, by submitting the B<--filetemplate> option, see below.
-Captions and comments are automatically disabled in this case.
-
-=head2 Configuration
-
-B<groupstats> will read its configuration from F<newsstats.conf>
-which should be present in the same directory via Config::Auto.
-
-See doc/INSTALL for an overview of possible configuration options.
-
-You can override some configuration options via the B<--groupsdb> option.
-
-=head1 OPTIONS
-
-=over 3
-
-=item B<-V>, B<--version>
-
-Print out version and copyright information and exit.
-
-=item B<-h>, B<--help>
-
-Print this man page and exit.
-
-=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>
-
-Set processing period to a single month in YYYY-MM format or to a time
-period between two month in YYYY-MM:YYYY-MM format (two month, separated
-by a colon). By using the keyword I<all> instead, you can set no
-processing period to process the whole database.
-
-=item B<-n>, B<--newsgroups> I<newsgroup(s)>
-
-Limit processing to a certain set of newsgroups. I<newsgroup(s)> can
-be a single newsgroup name (de.alt.test), a newsgroup hierarchy
-(de.alt.*) or a list of either of these, separated by colons, for
-example
-
- de.test:de.alt.test:de.newusers.*
-
-=item B<-s>, B<--sums|--nosums> (sum per hierarchy level)
-
-Include "virtual" groups for every hierarchy level in output, for
-example:
-
- de.alt.ALL 10
- de.alt.test 5
- de.alt.admin 7
-
-See the B<gatherstats> man page for details.
-
-=item B<--checkgroups> I<filename>
-
-Restrict output to those newgroups present in a file in checkgroups format
-(one newgroup name per line; everything after the first whitespace on each
-line is ignored). All other newsgroups will be removed from output.
-
-Contrary to B<gatherstats>, I<filename> is not a template, but refers to
-a single file in checkgroups format.
-
-=item B<-r>, B<--report> I<default|average|sums>
-
-Choose the report type: I<default>, I<average> or I<sums>
-
-By default, B<groupstats> will report the number of postings for each
-newsgroup in each month. But it can also report the average number of
-postings per group for all months or the total sum of postings per group
-for all months.
-
-For report types I<average> and I<sums>, the B<group-by> option has no
-meaning and will be silently ignored (see below).
-
-=item B<-l>, B<--lower> I<lower boundary>
-
-Set the lower boundary. See B<--boundary> below.
-
-=item B<-l>, B<--upper> I<upper boundary>
-
-Set the upper boundary. See B<--boundary> below.
-
-=item B<-b>, B<--boundary> I<boundary type>
-
-Set the boundary type to one of I<default>, I<level>, I<average> or
-I<sums>.
-
-By default, all newsgroups with more postings per month than the upper
-boundary and/or less postings per month than the lower boundary will be
-excluded from further processing. For the default report that means each
-month only newsgroups with a number of postings between the boundaries
-will be displayed. For the other report types, newsgroups with a number of
-postings exceeding the boundaries in all (!) months will not be
-considered.
-
-For example, lets take a list of newsgroups like this:
-
- ----- 2012-01:
- de.comp.datenbanken.misc 6
- de.comp.datenbanken.ms-access 84
- de.comp.datenbanken.mysql 88
- ----- 2012-02:
- de.comp.datenbanken.misc 8
- de.comp.datenbanken.ms-access 126
- de.comp.datenbanken.mysql 21
- ----- 2012-03:
- de.comp.datenbanken.misc 24
- de.comp.datenbanken.ms-access 83
- de.comp.datenbanken.mysql 36
-
-With C<groupstats --month 2012-01:2012-03 --lower 25 --report sums>,
-you'll get the following result:
-
- ----- All months:
- de.comp.datenbanken.ms-access 293
- de.comp.datenbanken.mysql 124
-
-de.comp.datenbanken.misc has not been considered even though it has 38
-postings in total, because it has less than 25 postings in every single
-month. If you want to list all newsgroups with more than 25 postings
-I<in total>, you'll have to set the boundary type to I<sum>, see below.
-
-A boundary type of I<level> will show only those newsgroups - at all -
-that satisfy the boundaries in each and every single month. With the above
-list of newsgroups and
-C<groupstats --month 2012-01:2012-03 --lower 25 --boundary level --report sums>,
-you'll get this result:
-
- ----- All months:
- de.comp.datenbanken.ms-access 293
-
-de.comp.datenbanken.mysql has not been considered because it had less than
-25 postings in 2012-02 (only).
-
-You can use that to get a list of newsgroups that have more (or less) then
-x postings in every month during the whole reporting period.
-
-A boundary type of I<average> will show only those newsgroups - at all -that
-satisfy the boundaries on average. With the above list of newsgroups and
-C<groupstats --month 2012-01:2012-03 --lower 25 --boundary avg --report sums>,
-you'll get this result:
-
- ----- All months:
- de.comp.datenbanken.ms-access 293
- de.comp.datenbanken.mysql 145
-
-The average number of postings in the three groups is:
-
- de.comp.datenbanken.misc 12.67
- de.comp.datenbanken.ms-access 97.67
- de.comp.datenbanken.mysql 48.33
-
-Last but not least, a boundary type of I<sums> will show only those
-newsgroups - at all - that satisfy the boundaries with the total sum of
-all postings during the reporting period. With the above list of
-newsgroups and
-C<groupstats --month 2012-01:2012-03 --lower 25 --boundary sum --report sums>,
-you'll finally get this result:
-
- ----- All months:
- de.comp.datenbanken.misc 38
- de.comp.datenbanken.ms-access 293
- de.comp.datenbanken.mysql 145
-
-
-=item B<-g>, B<--group-by> I<month[-desc]|newsgroups[-desc]>
-
-By default, all results are grouped by month, sorted chronologically in
-ascending order, like this:
-
- ----- 2012-01:
- de.comp.datenbanken.ms-access 84
- de.comp.datenbanken.mysql 88
- ----- 2012-02:
- de.comp.datenbanken.ms-access 126
- de.comp.datenbanken.mysql 21
-
-The results can be grouped by newsgroups instead via
-B<--group-by> I<newsgroup>:
-
- ----- de.comp.datenbanken.ms-access:
- 2012-01 84
- 2012-02 126
- ----- de.comp.datenbanken.mysql:
- 2012-01 88
- 2012-02 21
-
-By appending I<-desc> to the group-by option parameter, you can reverse
-the sort order - e.g. B<--group-by> I<month-desc> will give:
-
- ----- 2012-02:
- de.comp.datenbanken.ms-access 126
- de.comp.datenbanken.mysql 21
- ----- 2012-01:
- de.comp.datenbanken.ms-access 84
- de.comp.datenbanken.mysql 88
-
-Average and sums reports (see above) will always be grouped by months;
-this option will therefore be ignored.
-
-=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>
-
-Within each group (a single month or single newsgroup, see above), the
-report will be sorted by newsgroup names in ascending alphabetical order
-by default. You can change the sort order to descending or sort by number
-of postings instead.
-
-=item B<-f>, B<--format> I<pretty|list|dump>
-
-Select the output format, I<pretty> being the default:
-
- ----- 2012-01:
- de.comp.datenbanken.ms-access 84
- de.comp.datenbanken.mysql 88
- ----- 2012-02:
- de.comp.datenbanken.ms-access 126
- de.comp.datenbanken.mysql 21
-
-I<list> format looks like this:
-
- 2012-01 de.comp.datenbanken.ms-access 84
- 2012-01 de.comp.datenbanken.mysql 88
- 2012-02 de.comp.datenbanken.ms-access 126
- 2012-02 de.comp.datenbanken.mysql 21
-
-And I<dump> format looks like this:
-
- # 2012-01:
- de.comp.datenbanken.ms-access 84
- de.comp.datenbanken.mysql 88
- # 2012-02:
- de.comp.datenbanken.ms-access 126
- de.comp.datenbanken.mysql 21
-
-You can remove the comments by using B<--nocomments>, see below.
-
-=item B<-c>, B<--captions|--nocaptions>
-
-Add captions to output, like this:
-
- ----- Report for 2012-01 to 2012-02 (number of postings for each month)
- ----- Newsgroups: de.comp.datenbanken.*
- ----- Threshold: 10 => x <= 20 (on average)
- ----- Grouped by Newsgroups (ascending), sorted by number of postings descending
-
-False by default.
-
-=item B<--comments|--nocomments>
-
-Add comments (group headers) to I<dump> and I<pretty> output. True by default.
-
-Use I<--nocomments> to suppress anything except newsgroup names/months and
-numbers of postings. This is enforced when using B<--filetemplate>, see below.
-
-=item B<--filetemplate> I<filename template>
-
-Save output to file(s) instead of dumping it to STDOUT. B<groupstats> will
-create one file for each month (or each newsgroup, accordant to the
-setting of B<--group-by>, see above), with filenames composed by adding
-year and month (or newsgroup names) to the I<filename template>, for
-example with B<--filetemplate> I<stats>:
-
- stats-2012-01
- stats-2012-02
- ... and so on
-
-B<--nocomments> is enforced, see above.
-
-=item B<--groupsdb> I<database table>
-
-Override I<DBTableGrps> from F<newsstats.conf>.
-
-=back
-
-=head1 INSTALLATION
-
-See L<doc/INSTALL>.
-
-=head1 EXAMPLES
-
-Show number of postings per group for lasth month in I<pretty> format:
-
- groupstats
-
-Show that report for January of 2010 and de.alt.* plus de.test,
-including display of hierarchy levels:
-
- groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums
-
-Only show newsgroups with 30 postings or less last month, ordered
-by number of postings, descending, in I<pretty> format:
-
- groupstats --upper 30 --order-by postings-desc
-
-Show the total of all postings for the year of 2010 for all groups that
-had 30 postings or less in every single month in that year, ordered by
-number of postings in descending order:
-
- groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc
-
-The same for the average number of postings in the year of 2010:
-
- groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc
-
-List number of postings per group for eacht month of 2010 and redirect
-output to one file for each month, namend stats-2010-01 and so on, in
-machine-readable form (without formatting):
-
- groupstats -m 2010-01:2010-12 -f dump --filetemplate stats
-
-
-=head1 FILES
-
-=over 4
-
-=item F<groupstats.pl>
-
-The script itself.
-
-=item F<NewsStats.pm>
-
-Library functions for the NewsStats package.
-
-=item F<newsstats.conf>
-
-Runtime configuration file.
-
-=back
-
-=head1 BUGS
-
-Please report any bugs or feature requests to the author or use the
-bug tracker at L<http://bugs.th-h.de/>!
-
-=head1 SEE ALSO
-
-=over 2
-
-=item -
-
-L<doc/README>
-
-=item -
-
-l>doc/INSTALL>
-
-=item -
-
-gatherstats -h
-
-=back
-
-This script is part of the B<NewsStats> package.
-
-=head1 AUTHOR
-
-Thomas Hochstein <thh@inter.net>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
-
-This program is free software; you may redistribute it and/or modify it
-under the same terms as Perl itself.
-
-=cut
BEGIN {
our $VERSION = "0.01";
use File::Basename;
- # we're in .../install, so our module is in ..
- push(@INC, dirname($0).'/..');
+ # we're in .../install, so our module is in ../lib
+ push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;
### read configuration
print("Reading configuration.\n");
-my %Conf = %{ReadConfig($Path.'/newsstats.conf')};
+my %Conf = %{ReadConfig('')};
##### --------------------------------------------------------------------------
##### Database table definitions
=over 4
-=item F<install.pl>
+=item F<install/install.pl>
The script itself.
-=item F<NewsStats.pm>
+=item F<lib/NewsStats.pm>
Library functions for the NewsStats package.
-=item F<newsstats.conf>
+=item F<etc/newsstats.conf>
Runtime configuration file.
--- /dev/null
+# NewsStats.pm
+#
+# Library functions for the NewsStats package.
+#
+# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
+#
+# This module can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+package NewsStats;
+
+use strict;
+use warnings;
+our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(
+ $MyVersion
+ $PackageVersion
+ $FullPath
+ $HomePath
+ ShowVersion
+ ShowPOD
+ ReadConfig
+ OverrideConfig
+ InitDB
+ Bleat
+);
+@EXPORT_OK = qw(
+ GetTimePeriod
+ LastMonth
+ SplitPeriod
+ ListMonth
+ ListNewsgroups
+ ParseHierarchies
+ ReadGroupList
+ OutputData
+ FormatOutput
+ SQLHierarchies
+ SQLSortOrder
+ SQLGroupList
+ SQLSetBounds
+ SQLBuildClause
+ GetMaxLength
+);
+%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
+ ListMonth)],
+ Output => [qw(OutputData FormatOutput)],
+ SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
+ SQLSetBounds SQLBuildClause GetMaxLength)]);
+$VERSION = '0.01';
+our $PackageVersion = '0.01';
+
+use Data::Dumper;
+use File::Basename;
+use Cwd qw(realpath);
+
+use Config::Auto;
+use DBI;
+
+#####-------------------------------- Vars --------------------------------#####
+
+# save $0 in $FullPath
+our $FullPath = $0;
+# strip filename and /bin or /install directory to create the $HomePath
+our $HomePath = dirname(realpath($0));
+$HomePath =~ s/\/(bin|install)//;
+# trim $0
+$0 =~ s%.*/%%;
+# set version string
+our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
+
+#####------------------------------- Basics -------------------------------#####
+
+################################################################################
+
+################################################################################
+sub ShowVersion {
+################################################################################
+### display version and exit
+ print "NewsStats v$PackageVersion\n$MyVersion\n";
+ print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
+ print "This program is free software; you may redistribute it ".
+ "and/or modify it under the same terms as Perl itself.\n";
+ exit(100);
+};
+################################################################################
+
+################################################################################
+sub ShowPOD {
+################################################################################
+### feed myself to perldoc and exit
+ exec('perldoc', $FullPath);
+ exit(100);
+};
+################################################################################
+
+################################################################################
+sub ReadConfig {
+################################################################################
+### read config via Config::Auto
+### IN : $ConfFile: config filename
+### OUT: reference to a hash containing the configuration
+ my ($ConfFile) = @_;
+ # set default
+ $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
+ # mandatory configuration options
+ my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
+ 'DBTableRaw','DBTableGrps');
+ # read config via Config::Auto
+ my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
+ my %Conf = %{$ConfR};
+ # check for mandatory options
+ foreach (@Mandatory) {
+ &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
+ if (!defined($Conf{$_}));
+ }
+ # $Conf{'TLH'} is checked in gatherstats.pl
+ return $ConfR;
+};
+################################################################################
+
+################################################################################
+sub OverrideConfig {
+################################################################################
+### override configuration values
+### IN : $ConfigR : reference to configuration hash
+### $OverrideR: reference to a hash containing overrides
+ my ($ConfigR,$OverrideR) = @_;
+ my %Override = %$OverrideR;
+ # Config hash empty?
+ &Bleat(1,"Empty configuration hash passed to OverrideConfig()")
+ if ( keys %$ConfigR < 1);
+ # return if no overrides
+ return if (keys %Override < 1 or keys %$ConfigR < 1);
+ foreach my $Key (keys %Override) {
+ $$ConfigR{$Key} = $Override{$Key};
+ };
+};
+################################################################################
+
+################################################################################
+sub InitDB {
+################################################################################
+### initialise database connection
+### IN : $ConfigR: reference to configuration hash
+### $Die : if TRUE, die if connection fails
+### OUT: DBHandle
+ my ($ConfigR,$Die) = @_;
+ my %Conf = %$ConfigR;
+ my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
+ $Conf{'DBDriver'},$Conf{'DBDatabase'},
+ $Conf{'DBHost'}), $Conf{'DBUser'},
+ $Conf{'DBPw'}, { PrintError => 0 });
+ if (!$DBHandle) {
+ &Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
+ &Bleat(1,$DBI::errstr);
+ };
+ return $DBHandle;
+};
+################################################################################
+
+################################################################################
+sub Bleat {
+################################################################################
+### print warning or error messages and terminate in case of error
+### IN : $Level : 1 = warning, 2 = error
+### $Message: warning or error message
+ my ($Level,$Message) = @_;
+ if ($Level == 1) {
+ warn "$0 W: $Message\n"
+ } elsif ($Level == 2) {
+ die "$0 E: $Message\n"
+ } else {
+ print "$0: $Message\n"
+ }
+};
+################################################################################
+
+#####------------------------------ GetStats ------------------------------#####
+
+################################################################################
+sub ListNewsgroups {
+################################################################################
+### explode a (scalar) list of newsgroup names to a list of newsgroup and
+### hierarchy names where every newsgroup and hierarchy appears only once:
+### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
+### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
+### $TLH : top level hierarchy (all other newsgroups are ignored)
+### $ValidGroupsR: reference to a hash containing all valid newsgroups
+### as keys
+### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
+ my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
+ my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
+ my %Newsgroups;
+ chomp($Newsgroups);
+ # remove whitespace from contents of Newsgroups:
+ $Newsgroups =~ s/\s//;
+ # call &HierarchyCount for each newsgroup in $Newsgroups:
+ for (split /,/, $Newsgroups) {
+ # don't count newsgroup/hierarchy in wrong TLH
+ next if($TLH and !/^$TLH/);
+ # don't count invalid newsgroups
+ if(%ValidGroups and !defined($ValidGroups{$_})) {
+ warn (sprintf("DROPPED: %s\n",$_));
+ next;
+ }
+ # add original newsgroup to %Newsgroups
+ $Newsgroups{$_} = 1;
+ # add all hierarchy elements to %Newsgroups, amended by '.ALL',
+ # i.e. de.alt.ALL and de.ALL
+ foreach (ParseHierarchies($_)) {
+ $Newsgroups{$_.'.ALL'} = 1;
+ }
+ };
+ return %Newsgroups;
+};
+
+################################################################################
+sub ParseHierarchies {
+################################################################################
+### return a list of all hierarchy levels a newsgroup belongs to
+### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
+### IN : $Newsgroup : a newsgroup name
+### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
+ my ($Newsgroup) = @_;
+ my @Hierarchies;
+ # strip trailing dots
+ $Newsgroup =~ s/(.+)\.+$/$1/;
+ # butcher newsgroup name by "." and add each hierarchy to @Hierarchies
+ # i.e. de.alt.test: "de.alt" and "de"
+ while ($Newsgroup =~ /\./) {
+ $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
+ push @Hierarchies, $Newsgroup;
+ };
+ return @Hierarchies;
+};
+
+################################################################################
+sub ReadGroupList {
+################################################################################
+### read a list of valid newsgroups from file (each group on one line,
+### ignoring everything after the first whitespace and so accepting files
+### in checkgroups format as well as (parts of) an INN active file)
+### IN : $Filename : file to read
+### OUT: \%ValidGroups: hash containing all valid newsgroups
+ my ($Filename) = @_;
+ my %ValidGroups;
+ open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
+ while (<$LIST>) {
+ s/^\s*(\S+).*$/$1/;
+ chomp;
+ next if /^$/;
+ $ValidGroups{$_} = '1';
+ };
+ close $LIST;
+ return \%ValidGroups;
+};
+
+################################################################################
+
+#####----------------------------- TimePeriods ----------------------------#####
+
+################################################################################
+sub GetTimePeriod {
+################################################################################
+### get a time period to act on from --month option;
+### if empty, default to last month
+### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
+### OUT: $Verbal,$SQL: verbal description and WHERE-clause
+### of the chosen time period
+ my ($Month) = @_;
+ # define result variables
+ my ($Verbal, $SQL);
+ # define a regular expression for a month
+ my $REMonth = '\d{4}-\d{2}';
+
+ # default to last month if option is not set
+ if(!$Month) {
+ $Month = &LastMonth;
+ }
+
+ # check for valid input
+ if ($Month =~ /^$REMonth$/) {
+ # single month (YYYY-MM)
+ ($Month) = &CheckMonth($Month);
+ $Verbal = $Month;
+ $SQL = sprintf("month = '%s'",$Month);
+ } elsif ($Month =~ /^$REMonth:$REMonth$/) {
+ # time period (YYYY-MM:YYYY-MM)
+ $Verbal = sprintf('%s to %s',&SplitPeriod($Month));
+ $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
+ } elsif ($Month =~ /^all$/i) {
+ # special case: ALL
+ $Verbal = 'all time';
+ $SQL = '';
+ } else {
+ # invalid input
+ return (undef,undef);
+ }
+
+ return ($Verbal,$SQL);
+};
+
+################################################################################
+sub LastMonth {
+################################################################################
+### get last month from todays date in YYYY-MM format
+### OUT: last month as YYYY-MM
+ # get today's date
+ my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
+ # $Month is already defined from 0 to 11, so no need to decrease it by 1
+ $Year += 1900;
+ if ($Month < 1) {
+ $Month = 12;
+ $Year--;
+ };
+ # return last month
+ return sprintf('%4d-%02d',$Year,$Month);
+};
+
+################################################################################
+sub CheckMonth {
+################################################################################
+### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
+### otherwise, fix it
+### IN : @Month: array of month
+### OUT: @Month: a valid month
+ my (@Month) = @_;
+ foreach my $Month (@Month) {
+ my ($OldMonth) = $Month;
+ my ($CalMonth) = substr ($Month, -2);
+ if ($CalMonth < 1 or $CalMonth > 12) {
+ $CalMonth = '12' if $CalMonth > 12;
+ $CalMonth = '01' if $CalMonth < 1;
+ substr($Month, -2) = $CalMonth;
+ &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
+ "and '12'), set to '%s'.",$OldMonth,$Month));
+ }
+ }
+ return @Month;
+};
+
+################################################################################
+sub SplitPeriod {
+################################################################################
+### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
+### IN : $Period: time period
+### OUT: $StartMonth, $EndMonth
+ my ($Period) = @_;
+ my ($StartMonth, $EndMonth) = split /:/, $Period;
+ ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
+ # switch parameters as necessary
+ if ($EndMonth gt $StartMonth) {
+ return ($StartMonth, $EndMonth);
+ } else {
+ return ($EndMonth, $StartMonth);
+ };
+};
+
+################################################################################
+sub ListMonth {
+################################################################################
+### return a list of months (YYYY-MM) between start and end month
+### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
+### OUT: @Months: array containing all months from $MonthExpression enumerated
+ my ($MonthExpression )= @_;
+ # return if single month
+ return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
+ # parse $MonthExpression
+ my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
+ # set $Year, $Month from $StartMonth
+ my ($Year, $Month) = split /-/, $StartMonth;
+ # define @Months
+ my (@Months);
+ until ("$Year-$Month" gt $EndMonth) {
+ push @Months, "$Year-$Month";
+ $Month = "$Month"; # force string context
+ $Month++;
+ if ($Month > 12) {
+ $Month = '01';
+ $Year++;
+ };
+ };
+ return @Months;
+};
+
+#####---------------------------- OutputFormats ---------------------------#####
+
+################################################################################
+sub OutputData {
+################################################################################
+### read database query results from DBHandle and print results with formatting
+### IN : $Format : format specifier
+### $Comments : print or suppress all comments for machine-readable output
+### $GroupBy : primary sorting order (month or key)
+### $Precision: number of digits right of decimal point (0 or 2)
+### $ValidKeys: reference to a hash containing all valid keys
+### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
+### $DBQuery : database query handle with executed query,
+### containing $Month, $Key, $Value
+### $PadField : padding length for key field (optional) for 'pretty'
+### $PadValue : padding length for value field (optional) for 'pretty'
+ my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
+ $DBQuery, $PadField, $PadValue) = @_;
+ my %ValidKeys = %{$ValidKeys} if $ValidKeys;
+ my ($FileName, $Handle, $OUT);
+ our $LastIteration;
+
+ # define output types
+ my %LegalOutput;
+ @LegalOutput{('dump','list','pretty')} = ();
+ # bail out if format is unknown
+ &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
+
+ while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
+ # don't display invalid keys
+ if(%ValidKeys and !defined($ValidKeys{$Key})) {
+ # FIXME
+ # &Bleat(1,sprintf("DROPPED: %s",$Key));
+ next;
+ };
+ # care for correct sorting order and abstract from month and keys:
+ # $Caption will be $Month or $Key, according to sorting order,
+ # and $Key will be $Key or $Month, respectively
+ my $Caption;
+ if ($GroupBy eq 'key') {
+ $Caption = $Key;
+ $Key = $Month;
+ } else {
+ $Caption = $Month;
+ }
+ # set output file handle
+ if (!$FileTempl) {
+ $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
+ } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
+ close $OUT if ($LastIteration);
+ # safeguards for filename creation:
+ # replace potential problem characters with '_'
+ $FileName = sprintf('%s-%s',$FileTempl,$Caption);
+ $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
+ open ($OUT,">$FileName")
+ or &Bleat(2,sprintf("Cannot open output file '%s': $!",
+ $FileName));
+ $Handle = $OUT;
+ };
+ print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
+ $Precision, $PadField, $PadValue);
+ $LastIteration = $Caption;
+ };
+ close $OUT if ($FileTempl);
+};
+
+################################################################################
+sub FormatOutput {
+################################################################################
+### format information for output according to format specifier
+### IN : $Format : format specifier
+### $Comments : print or suppress all comments for machine-readable output
+### $Caption : month (as YYYY-MM) or $Key, according to sorting order
+### $Key : newsgroup, client, ... or $Month, as above
+### $Value : number of postings with that attribute
+### $Precision: number of digits right of decimal point (0 or 2)
+### $PadField : padding length for key field (optional) for 'pretty'
+### $PadValue : padding length for value field (optional) for 'pretty'
+### OUT: $Output: formatted output
+ my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
+ $PadValue) = @_;
+ my ($Output);
+ # keep last caption in mind
+ our ($LastIteration);
+ # create one line of output
+ if ($Format eq 'dump') {
+ # output as dump (key value)
+ $Output = sprintf ("# %s:\n",$Caption)
+ if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
+ $Output .= sprintf ("%s %u\n",$Key,$Value);
+ } elsif ($Format eq 'list') {
+ # output as list (caption key value)
+ $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
+ } elsif ($Format eq 'pretty') {
+ # output as a table
+ $Output = sprintf ("# ----- %s:\n",$Caption)
+ if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
+ # increase $PadValue for numbers with decimal point
+ $PadValue += $Precision+1 if $Precision;
+ # add padding if $PadField is set; $PadValue HAS to be set then
+ $Output .= sprintf ($PadField ?
+ sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
+ "%s%s %.*f\n",$Key,$Comments ? ':' : '',
+ $Precision,$Value);
+ };
+ return $Output;
+};
+
+#####------------------------- QueryModifications -------------------------#####
+
+################################################################################
+sub SQLHierarchies {
+################################################################################
+### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
+### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
+### true, accordingly)
+### IN : $ShowHierarchies: boolean value
+### OUT: SQL code
+ my ($ShowHierarchies) = @_;
+ return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
+};
+
+################################################################################
+sub GetMaxLength {
+################################################################################
+### get length of longest fields in future query result
+### IN : $DBHandle : database handle
+### $Table : table to query
+### $Field : field (key!, i.e. month, newsgroup, ...) to check
+### $Value : field (value!, i.e. postings) to check
+### $WhereClause : WHERE clause
+### $HavingClause: HAVING clause
+### @BindVars : bind variables for WHERE clause
+### OUT: $FieldLength : length of longest instance of $Field
+### $ValueLength : length of longest instance of $Value
+ my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
+ "MAX(%s) ".
+ "FROM %s %s %s",$Field,,$Value,
+ $Table,$WhereClause,$HavingClause ?
+ 'GROUP BY newsgroup' . $HavingClause .
+ ' ORDER BY LENGTH(newsgroup) '.
+ 'DESC LIMIT 1': ''));
+ $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
+ "for '%s' from table '%s': ".
+ "$DBI::errstr",$Field,$Table));
+ my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
+ my $ValueLength = length($ValueMax) if ($ValueMax);
+ return ($FieldLength,$ValueLength);
+};
+
+################################################################################
+sub SQLSortOrder {
+################################################################################
+### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
+### $OptOrderBy (secondary sorting), both ascending or descending;
+### descending sorting order is done by adding '-desc'
+### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
+### $OrderBy: secondary sort by month/newsgroups (default)
+### or number of 'postings'
+### OUT: a SQL ORDER BY clause
+ my ($GroupBy,$OrderBy) = @_;
+ my ($GroupSort,$OrderSort) = ('','');
+ # $GroupBy (primary sorting)
+ if (!$GroupBy) {
+ $GroupBy = 'month';
+ } else {
+ ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
+ if ($GroupBy =~ /group/i) {
+ $GroupBy = 'newsgroup';
+ } else {
+ $GroupBy = 'month';
+ }
+ }
+ my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
+ # $OrderBy (secondary sorting)
+ if (!$OrderBy) {
+ $OrderBy = $Secondary;
+ } else {
+ ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
+ if ($OrderBy =~ /posting/i) {
+ $OrderBy = "postings $OrderSort, $Secondary";
+ } else {
+ $OrderBy = "$Secondary $OrderSort";
+ }
+ }
+ return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
+};
+
+################################################################################
+sub SQLParseOrder {
+################################################################################
+### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
+### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
+### IN : $OrderOption: order option (see above)
+### OUT: parameter to sort by,
+### sort order ('DESC' or nothing, meaning 'ASC')
+ my ($OrderOption) = @_;
+ my $SortOrder = '';
+ if ($OrderOption =~ s/-?desc$//i) {
+ $SortOrder = 'DESC';
+ } else {
+ $OrderOption =~ s/-?asc$//i
+ }
+ return ($OrderOption,$SortOrder);
+};
+
+################################################################################
+sub SQLGroupList {
+################################################################################
+### explode list of newsgroups separated by : (with wildcards)
+### to a SQL 'WHERE' expression
+### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
+### OUT: SQL code to become part of a 'WHERE' clause,
+### list of newsgroups for SQL bindings
+ my ($Newsgroups) = @_;
+ # substitute '*' wildcard with SQL wildcard character '%'
+ $Newsgroups =~ s/\*/%/g;
+ return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
+ # just one newsgroup?
+ return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
+ my ($SQL,@WildcardGroups,@NoWildcardGroups);
+ # list of newsgroups separated by ':'
+ my @GroupList = split /:/, $Newsgroups;
+ foreach (@GroupList) {
+ if ($_ !~ /%/) {
+ # add to list of newsgroup names WITHOUT wildcard
+ push (@NoWildcardGroups,$_);
+ } else {
+ # add to list of newsgroup names WITH wildcard
+ push (@WildcardGroups,$_);
+ # add wildcard to SQL clause
+ # 'OR' if SQL clause is not empty
+ $SQL .= ' OR ' if $SQL;
+ $SQL .= 'newsgroup LIKE ?'
+ }
+ };
+ if (scalar(@NoWildcardGroups)) {
+ # add 'OR' if SQL clause is not empty
+ $SQL .= ' OR ' if $SQL;
+ if (scalar(@NoWildcardGroups) < 2) {
+ # special case: just one newsgroup without wildcard
+ $SQL .= 'newsgroup = ?';
+ } else {
+ # create list of newsgroups to include: 'newsgroup IN (...)'
+ $SQL .= 'newsgroup IN (';
+ my $SQLin;
+ foreach (@NoWildcardGroups) {
+ $SQLin .= ',' if $SQLin;
+ $SQLin .= '?';
+ }
+ # add list to SQL clause
+ $SQL .= $SQLin .= ')';
+ }
+ }
+ # add brackets '()' to SQL clause as needed (more than one wildcard group)
+ if (scalar(@WildcardGroups)) {
+ $SQL = '(' . $SQL .')';
+ }
+ # rebuild @GroupList in (now) correct order
+ @GroupList = (@WildcardGroups,@NoWildcardGroups);
+ return ($SQL,@GroupList);
+};
+
+################################################################################
+sub SQLGroupWildcard {
+################################################################################
+### build a valid SQL 'WHERE' expression with or without wildcards
+### IN : $Newsgroup: newsgroup expression, probably with wildcard
+### (group.name or group.name.%)
+### OUT: SQL code to become part of a 'WHERE' clause
+ my ($Newsgroup) = @_;
+ if ($Newsgroup !~ /%/) {
+ return 'newsgroup = ?';
+ } else {
+ return 'newsgroup LIKE ?';
+ }
+};
+
+################################################################################
+sub SQLSetBounds {
+################################################################################
+### set upper and/or lower boundary (number of postings)
+### IN : $Type: 'level', 'average', 'sum' or 'default'
+### $LowBound,$UppBound: lower/upper boundary, respectively
+### OUT: SQL code to become part of a WHERE or HAVING clause
+ my ($Type,$LowBound,$UppBound) = @_;
+ ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
+ if($LowBound and $UppBound and $LowBound > $UppBound) {
+ &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
+ "$UppBound, exchanging boundaries.");
+ ($LowBound,$UppBound) = ($UppBound,$LowBound);
+ }
+ # default to 'default'
+ my $WhereHavingFunction = 'postings';
+ # set $LowBound to SQL statement:
+ # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
+ if ($Type eq 'level') {
+ $WhereHavingFunction = 'MIN(postings)'
+ } elsif ($Type eq 'average') {
+ $WhereHavingFunction = 'AVG(postings)'
+ } elsif ($Type eq 'sum') {
+ $WhereHavingFunction = 'SUM(postings)'
+ }
+ $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
+ # set $LowBound to SQL statement:
+ # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
+ if ($Type eq 'level') {
+ $WhereHavingFunction = 'MAX(postings)'
+ } elsif ($Type eq 'average') {
+ $WhereHavingFunction = 'AVG(postings)'
+ } elsif ($Type eq 'sum') {
+ $WhereHavingFunction = 'SUM(postings)'
+ }
+ $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
+ return ($LowBound,$UppBound);
+};
+
+################################################################################
+sub SQLCheckNumber {
+################################################################################
+### check if input is a valid positive integer; otherwise, make it one
+### IN : @Numbers: array of parameters
+### OUT: @Numbers: a valid positive integer
+ my (@Numbers) = @_;
+ foreach my $Number (@Numbers) {
+ if ($Number and $Number < 0) {
+ &Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
+ $Number = -$Number;
+ }
+ $Number = '' if ($Number and $Number !~ /^\d+$/);
+ }
+ return @Numbers;
+};
+
+################################################################################
+sub SQLBuildClause {
+################################################################################
+### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
+### from multiple expressions which *may* be empty
+### IN : $Type: 'where', 'having', 'group' or 'order'
+### @Expressions: array of expressions
+### OUT: $SQLClause: a SQL clause
+ my ($Type,@Expressions) = @_;
+ my ($SQLClause,$Separator,$Statement);
+ # set separator ('AND' or ',')
+ if ($Type eq 'where' or $Type eq 'having') {
+ $Separator = 'AND';
+ } else {
+ $Separator = ',';
+ }
+ # set statement
+ if ($Type eq 'where') {
+ $Statement = 'WHERE';
+ } elsif ($Type eq 'order') {
+ $Statement = 'ORDER BY';
+ } elsif ($Type eq 'having') {
+ $Statement = 'HAVING';
+ } else {
+ $Statement = 'GROUP BY';
+ }
+ # build query from expressions with separators
+ foreach my $Expression (@Expressions) {
+ if ($Expression) {
+ $SQLClause .= " $Separator " if ($SQLClause);
+ $SQLClause .= $Expression;
+ }
+ }
+ # add statement in front if not already present
+ $SQLClause = " $Statement " . $SQLClause
+ if ($SQLClause and $SQLClause !~ /$Statement/);
+ return $SQLClause;
+};
+
+#####--------------------------- Verifications ----------------------------#####
+
+################################################################################
+sub CheckValidNewsgroups {
+################################################################################
+### syntax check of newgroup list
+### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
+### OUT: boolean
+ my ($Newsgroups) = @_;
+ my $InvalidCharRegExp = ',; ';
+ return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
+};
+
+
+#####------------------------------- done ---------------------------------#####
+1;
+
+
+++ /dev/null
-### database configuration
-#
-# driver, host, credentials and database
-#
-DBDriver = mysql
-DBHost = localhost
-DBUser =
-DBPw =
-DBDatabase = newsstats
-#
-# tables
-#
-DBTableRaw = raw_de
-DBTableGrps = groups_de
-#DBTableClnts =
-#DBTableHosts =
-
-### hierarchy configuration
-TLH = de