From: Thomas Hochstein Date: Tue, 3 Sep 2013 07:21:55 +0000 (+0200) Subject: Redo directory structure. X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=commitdiff_plain;h=2ad99c20bcc297362aeccabb1c51e20c4bd4b533 Redo directory structure. * Move all scripts to /bin * Move configuration to /etc * Move NewsStats.pm to /lib * Add new path to NewsStats.pm to all scripts * Set $HomePath to top level directory * Move setting of config file name to ReadConf() Signed-off-by: Thomas Hochstein --- diff --git a/.gitignore b/.gitignore index 40a80e5..a71cb91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ tmp/ tmp/* -newsstats.conf +etc/newsstats.conf diff --git a/NewsStats.pm b/NewsStats.pm deleted file mode 100644 index a237774..0000000 --- a/NewsStats.pm +++ /dev/null @@ -1,774 +0,0 @@ -# NewsStats.pm -# -# Library functions for the NewsStats package. -# -# Copyright (c) 2010-2013 Thomas Hochstein -# -# This module can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -package NewsStats; - -use strict; -use warnings; -our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); - -require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw( - $MyVersion - $PackageVersion - $FullPath - $HomePath - ShowVersion - ShowPOD - ReadConfig - OverrideConfig - InitDB - Bleat -); -@EXPORT_OK = qw( - GetTimePeriod - LastMonth - SplitPeriod - ListMonth - ListNewsgroups - ParseHierarchies - ReadGroupList - OutputData - FormatOutput - SQLHierarchies - SQLSortOrder - SQLGroupList - SQLSetBounds - SQLBuildClause - GetMaxLength -); -%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod - ListMonth)], - Output => [qw(OutputData FormatOutput)], - SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList - SQLSetBounds SQLBuildClause GetMaxLength)]); -$VERSION = '0.01'; -our $PackageVersion = '0.01'; - -use Data::Dumper; -use File::Basename; - -use Config::Auto; -use DBI; - -#####-------------------------------- Vars --------------------------------##### - -# trim the path -our $FullPath = $0; -our $HomePath = dirname($0); -$0 =~ s%.*/%%; -# set version string -our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; - -#####------------------------------- Basics -------------------------------##### - -################################################################################ - -################################################################################ -sub ShowVersion { -################################################################################ -### display version and exit - print "NewsStats v$PackageVersion\n$MyVersion\n"; - print "Copyright (c) 2010-2012 Thomas Hochstein \n"; - print "This program is free software; you may redistribute it ". - "and/or modify it under the same terms as Perl itself.\n"; - exit(100); -}; -################################################################################ - -################################################################################ -sub ShowPOD { -################################################################################ -### feed myself to perldoc and exit - exec('perldoc', $FullPath); - exit(100); -}; -################################################################################ - -################################################################################ -sub ReadConfig { -################################################################################ -### read config via Config::Auto -### IN : $ConfFile: config filename -### OUT: reference to a hash containing the configuration - my ($ConfFile) = @_; - # mandatory configuration options - my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', - 'DBTableRaw','DBTableGrps'); - # read config via Config::Auto - my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); - my %Conf = %{$ConfR}; - # check for mandatory options - foreach (@Mandatory) { - &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) - if (!defined($Conf{$_})); - } - # $Conf{'TLH'} is checked in gatherstats.pl - return $ConfR; -}; -################################################################################ - -################################################################################ -sub OverrideConfig { -################################################################################ -### override configuration values -### IN : $ConfigR : reference to configuration hash -### $OverrideR: reference to a hash containing overrides - my ($ConfigR,$OverrideR) = @_; - my %Override = %$OverrideR; - # Config hash empty? - &Bleat(1,"Empty configuration hash passed to OverrideConfig()") - if ( keys %$ConfigR < 1); - # return if no overrides - return if (keys %Override < 1 or keys %$ConfigR < 1); - foreach my $Key (keys %Override) { - $$ConfigR{$Key} = $Override{$Key}; - }; -}; -################################################################################ - -################################################################################ -sub InitDB { -################################################################################ -### initialise database connection -### IN : $ConfigR: reference to configuration hash -### $Die : if TRUE, die if connection fails -### OUT: DBHandle - my ($ConfigR,$Die) = @_; - my %Conf = %$ConfigR; - my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', - $Conf{'DBDriver'},$Conf{'DBDatabase'}, - $Conf{'DBHost'}), $Conf{'DBUser'}, - $Conf{'DBPw'}, { PrintError => 0 }); - if (!$DBHandle) { - &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); - &Bleat(1,$DBI::errstr); - }; - return $DBHandle; -}; -################################################################################ - -################################################################################ -sub Bleat { -################################################################################ -### print warning or error messages and terminate in case of error -### IN : $Level : 1 = warning, 2 = error -### $Message: warning or error message - my ($Level,$Message) = @_; - if ($Level == 1) { - warn "$0 W: $Message\n" - } elsif ($Level == 2) { - die "$0 E: $Message\n" - } else { - print "$0: $Message\n" - } -}; -################################################################################ - -#####------------------------------ GetStats ------------------------------##### - -################################################################################ -sub ListNewsgroups { -################################################################################ -### explode a (scalar) list of newsgroup names to a list of newsgroup and -### hierarchy names where every newsgroup and hierarchy appears only once: -### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin -### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) -### $TLH : top level hierarchy (all other newsgroups are ignored) -### $ValidGroupsR: reference to a hash containing all valid newsgroups -### as keys -### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys - my ($Newsgroups,$TLH,$ValidGroupsR) = @_; - my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; - my %Newsgroups; - chomp($Newsgroups); - # remove whitespace from contents of Newsgroups: - $Newsgroups =~ s/\s//; - # call &HierarchyCount for each newsgroup in $Newsgroups: - for (split /,/, $Newsgroups) { - # don't count newsgroup/hierarchy in wrong TLH - next if($TLH and !/^$TLH/); - # don't count invalid newsgroups - if(%ValidGroups and !defined($ValidGroups{$_})) { - warn (sprintf("DROPPED: %s\n",$_)); - next; - } - # add original newsgroup to %Newsgroups - $Newsgroups{$_} = 1; - # add all hierarchy elements to %Newsgroups, amended by '.ALL', - # i.e. de.alt.ALL and de.ALL - foreach (ParseHierarchies($_)) { - $Newsgroups{$_.'.ALL'} = 1; - } - }; - return %Newsgroups; -}; - -################################################################################ -sub ParseHierarchies { -################################################################################ -### return a list of all hierarchy levels a newsgroup belongs to -### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) -### IN : $Newsgroup : a newsgroup name -### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to - my ($Newsgroup) = @_; - my @Hierarchies; - # strip trailing dots - $Newsgroup =~ s/(.+)\.+$/$1/; - # butcher newsgroup name by "." and add each hierarchy to @Hierarchies - # i.e. de.alt.test: "de.alt" and "de" - while ($Newsgroup =~ /\./) { - $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; - push @Hierarchies, $Newsgroup; - }; - return @Hierarchies; -}; - -################################################################################ -sub ReadGroupList { -################################################################################ -### read a list of valid newsgroups from file (each group on one line, -### ignoring everything after the first whitespace and so accepting files -### in checkgroups format as well as (parts of) an INN active file) -### IN : $Filename : file to read -### OUT: \%ValidGroups: hash containing all valid newsgroups - my ($Filename) = @_; - my %ValidGroups; - open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); - while (<$LIST>) { - s/^\s*(\S+).*$/$1/; - chomp; - next if /^$/; - $ValidGroups{$_} = '1'; - }; - close $LIST; - return \%ValidGroups; -}; - -################################################################################ - -#####----------------------------- TimePeriods ----------------------------##### - -################################################################################ -sub GetTimePeriod { -################################################################################ -### get a time period to act on from --month option; -### if empty, default to last month -### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' -### OUT: $Verbal,$SQL: verbal description and WHERE-clause -### of the chosen time period - my ($Month) = @_; - # define result variables - my ($Verbal, $SQL); - # define a regular expression for a month - my $REMonth = '\d{4}-\d{2}'; - - # default to last month if option is not set - if(!$Month) { - $Month = &LastMonth; - } - - # check for valid input - if ($Month =~ /^$REMonth$/) { - # single month (YYYY-MM) - ($Month) = &CheckMonth($Month); - $Verbal = $Month; - $SQL = sprintf("month = '%s'",$Month); - } elsif ($Month =~ /^$REMonth:$REMonth$/) { - # time period (YYYY-MM:YYYY-MM) - $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); - $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); - } elsif ($Month =~ /^all$/i) { - # special case: ALL - $Verbal = 'all time'; - $SQL = ''; - } else { - # invalid input - return (undef,undef); - } - - return ($Verbal,$SQL); -}; - -################################################################################ -sub LastMonth { -################################################################################ -### get last month from todays date in YYYY-MM format -### OUT: last month as YYYY-MM - # get today's date - my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); - # $Month is already defined from 0 to 11, so no need to decrease it by 1 - $Year += 1900; - if ($Month < 1) { - $Month = 12; - $Year--; - }; - # return last month - return sprintf('%4d-%02d',$Year,$Month); -}; - -################################################################################ -sub CheckMonth { -################################################################################ -### check if input (in YYYY-MM form) is valid with MM between 01 and 12; -### otherwise, fix it -### IN : @Month: array of month -### OUT: @Month: a valid month - my (@Month) = @_; - foreach my $Month (@Month) { - my ($OldMonth) = $Month; - my ($CalMonth) = substr ($Month, -2); - if ($CalMonth < 1 or $CalMonth > 12) { - $CalMonth = '12' if $CalMonth > 12; - $CalMonth = '01' if $CalMonth < 1; - substr($Month, -2) = $CalMonth; - &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". - "and '12'), set to '%s'.",$OldMonth,$Month)); - } - } - return @Month; -}; - -################################################################################ -sub SplitPeriod { -################################################################################ -### split a time period denoted by YYYY-MM:YYYY-MM into start and end month -### IN : $Period: time period -### OUT: $StartMonth, $EndMonth - my ($Period) = @_; - my ($StartMonth, $EndMonth) = split /:/, $Period; - ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); - # switch parameters as necessary - if ($EndMonth gt $StartMonth) { - return ($StartMonth, $EndMonth); - } else { - return ($EndMonth, $StartMonth); - }; -}; - -################################################################################ -sub ListMonth { -################################################################################ -### return a list of months (YYYY-MM) between start and end month -### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') -### OUT: @Months: array containing all months from $MonthExpression enumerated - my ($MonthExpression )= @_; - # return if single month - return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); - # parse $MonthExpression - my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); - # set $Year, $Month from $StartMonth - my ($Year, $Month) = split /-/, $StartMonth; - # define @Months - my (@Months); - until ("$Year-$Month" gt $EndMonth) { - push @Months, "$Year-$Month"; - $Month = "$Month"; # force string context - $Month++; - if ($Month > 12) { - $Month = '01'; - $Year++; - }; - }; - return @Months; -}; - -#####---------------------------- OutputFormats ---------------------------##### - -################################################################################ -sub OutputData { -################################################################################ -### read database query results from DBHandle and print results with formatting -### IN : $Format : format specifier -### $Comments : print or suppress all comments for machine-readable output -### $GroupBy : primary sorting order (month or key) -### $Precision: number of digits right of decimal point (0 or 2) -### $ValidKeys: reference to a hash containing all valid keys -### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM -### $DBQuery : database query handle with executed query, -### containing $Month, $Key, $Value -### $PadField : padding length for key field (optional) for 'pretty' -### $PadValue : padding length for value field (optional) for 'pretty' - my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, - $DBQuery, $PadField, $PadValue) = @_; - my %ValidKeys = %{$ValidKeys} if $ValidKeys; - my ($FileName, $Handle, $OUT); - our $LastIteration; - - # define output types - my %LegalOutput; - @LegalOutput{('dump','list','pretty')} = (); - # bail out if format is unknown - &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); - - while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { - # don't display invalid keys - if(%ValidKeys and !defined($ValidKeys{$Key})) { - # FIXME - # &Bleat(1,sprintf("DROPPED: %s",$Key)); - next; - }; - # care for correct sorting order and abstract from month and keys: - # $Caption will be $Month or $Key, according to sorting order, - # and $Key will be $Key or $Month, respectively - my $Caption; - if ($GroupBy eq 'key') { - $Caption = $Key; - $Key = $Month; - } else { - $Caption = $Month; - } - # set output file handle - if (!$FileTempl) { - $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT - } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { - close $OUT if ($LastIteration); - # safeguards for filename creation: - # replace potential problem characters with '_' - $FileName = sprintf('%s-%s',$FileTempl,$Caption); - $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; - open ($OUT,">$FileName") - or &Bleat(2,sprintf("Cannot open output file '%s': $!", - $FileName)); - $Handle = $OUT; - }; - print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, - $Precision, $PadField, $PadValue); - $LastIteration = $Caption; - }; - close $OUT if ($FileTempl); -}; - -################################################################################ -sub FormatOutput { -################################################################################ -### format information for output according to format specifier -### IN : $Format : format specifier -### $Comments : print or suppress all comments for machine-readable output -### $Caption : month (as YYYY-MM) or $Key, according to sorting order -### $Key : newsgroup, client, ... or $Month, as above -### $Value : number of postings with that attribute -### $Precision: number of digits right of decimal point (0 or 2) -### $PadField : padding length for key field (optional) for 'pretty' -### $PadValue : padding length for value field (optional) for 'pretty' -### OUT: $Output: formatted output - my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, - $PadValue) = @_; - my ($Output); - # keep last caption in mind - our ($LastIteration); - # create one line of output - if ($Format eq 'dump') { - # output as dump (key value) - $Output = sprintf ("# %s:\n",$Caption) - if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); - $Output .= sprintf ("%s %u\n",$Key,$Value); - } elsif ($Format eq 'list') { - # output as list (caption key value) - $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); - } elsif ($Format eq 'pretty') { - # output as a table - $Output = sprintf ("# ----- %s:\n",$Caption) - if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); - # increase $PadValue for numbers with decimal point - $PadValue += $Precision+1 if $Precision; - # add padding if $PadField is set; $PadValue HAS to be set then - $Output .= sprintf ($PadField ? - sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : - "%s%s %.*f\n",$Key,$Comments ? ':' : '', - $Precision,$Value); - }; - return $Output; -}; - -#####------------------------- QueryModifications -------------------------##### - -################################################################################ -sub SQLHierarchies { -################################################################################ -### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by -### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is -### true, accordingly) -### IN : $ShowHierarchies: boolean value -### OUT: SQL code - my ($ShowHierarchies) = @_; - return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; -}; - -################################################################################ -sub GetMaxLength { -################################################################################ -### get length of longest fields in future query result -### IN : $DBHandle : database handle -### $Table : table to query -### $Field : field (key!, i.e. month, newsgroup, ...) to check -### $Value : field (value!, i.e. postings) to check -### $WhereClause : WHERE clause -### $HavingClause: HAVING clause -### @BindVars : bind variables for WHERE clause -### OUT: $FieldLength : length of longest instance of $Field -### $ValueLength : length of longest instance of $Value - my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". - "MAX(%s) ". - "FROM %s %s %s",$Field,,$Value, - $Table,$WhereClause,$HavingClause ? - 'GROUP BY newsgroup' . $HavingClause . - ' ORDER BY LENGTH(newsgroup) '. - 'DESC LIMIT 1': '')); - $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". - "for '%s' from table '%s': ". - "$DBI::errstr",$Field,$Table)); - my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; - my $ValueLength = length($ValueMax) if ($ValueMax); - return ($FieldLength,$ValueLength); -}; - -################################################################################ -sub SQLSortOrder { -################################################################################ -### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and -### $OptOrderBy (secondary sorting), both ascending or descending; -### descending sorting order is done by adding '-desc' -### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' -### $OrderBy: secondary sort by month/newsgroups (default) -### or number of 'postings' -### OUT: a SQL ORDER BY clause - my ($GroupBy,$OrderBy) = @_; - my ($GroupSort,$OrderSort) = ('',''); - # $GroupBy (primary sorting) - if (!$GroupBy) { - $GroupBy = 'month'; - } else { - ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); - if ($GroupBy =~ /group/i) { - $GroupBy = 'newsgroup'; - } else { - $GroupBy = 'month'; - } - } - my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; - # $OrderBy (secondary sorting) - if (!$OrderBy) { - $OrderBy = $Secondary; - } else { - ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); - if ($OrderBy =~ /posting/i) { - $OrderBy = "postings $OrderSort, $Secondary"; - } else { - $OrderBy = "$Secondary $OrderSort"; - } - } - return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); -}; - -################################################################################ -sub SQLParseOrder { -################################################################################ -### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. -### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' -### IN : $OrderOption: order option (see above) -### OUT: parameter to sort by, -### sort order ('DESC' or nothing, meaning 'ASC') - my ($OrderOption) = @_; - my $SortOrder = ''; - if ($OrderOption =~ s/-?desc$//i) { - $SortOrder = 'DESC'; - } else { - $OrderOption =~ s/-?asc$//i - } - return ($OrderOption,$SortOrder); -}; - -################################################################################ -sub SQLGroupList { -################################################################################ -### explode list of newsgroups separated by : (with wildcards) -### to a SQL 'WHERE' expression -### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) -### OUT: SQL code to become part of a 'WHERE' clause, -### list of newsgroups for SQL bindings - my ($Newsgroups) = @_; - # substitute '*' wildcard with SQL wildcard character '%' - $Newsgroups =~ s/\*/%/g; - return (undef,undef) if !CheckValidNewsgroups($Newsgroups); - # just one newsgroup? - return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; - my ($SQL,@WildcardGroups,@NoWildcardGroups); - # list of newsgroups separated by ':' - my @GroupList = split /:/, $Newsgroups; - foreach (@GroupList) { - if ($_ !~ /%/) { - # add to list of newsgroup names WITHOUT wildcard - push (@NoWildcardGroups,$_); - } else { - # add to list of newsgroup names WITH wildcard - push (@WildcardGroups,$_); - # add wildcard to SQL clause - # 'OR' if SQL clause is not empty - $SQL .= ' OR ' if $SQL; - $SQL .= 'newsgroup LIKE ?' - } - }; - if (scalar(@NoWildcardGroups)) { - # add 'OR' if SQL clause is not empty - $SQL .= ' OR ' if $SQL; - if (scalar(@NoWildcardGroups) < 2) { - # special case: just one newsgroup without wildcard - $SQL .= 'newsgroup = ?'; - } else { - # create list of newsgroups to include: 'newsgroup IN (...)' - $SQL .= 'newsgroup IN ('; - my $SQLin; - foreach (@NoWildcardGroups) { - $SQLin .= ',' if $SQLin; - $SQLin .= '?'; - } - # add list to SQL clause - $SQL .= $SQLin .= ')'; - } - } - # add brackets '()' to SQL clause as needed (more than one wildcard group) - if (scalar(@WildcardGroups)) { - $SQL = '(' . $SQL .')'; - } - # rebuild @GroupList in (now) correct order - @GroupList = (@WildcardGroups,@NoWildcardGroups); - return ($SQL,@GroupList); -}; - -################################################################################ -sub SQLGroupWildcard { -################################################################################ -### build a valid SQL 'WHERE' expression with or without wildcards -### IN : $Newsgroup: newsgroup expression, probably with wildcard -### (group.name or group.name.%) -### OUT: SQL code to become part of a 'WHERE' clause - my ($Newsgroup) = @_; - if ($Newsgroup !~ /%/) { - return 'newsgroup = ?'; - } else { - return 'newsgroup LIKE ?'; - } -}; - -################################################################################ -sub SQLSetBounds { -################################################################################ -### set upper and/or lower boundary (number of postings) -### IN : $Type: 'level', 'average', 'sum' or 'default' -### $LowBound,$UppBound: lower/upper boundary, respectively -### OUT: SQL code to become part of a WHERE or HAVING clause - my ($Type,$LowBound,$UppBound) = @_; - ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); - if($LowBound and $UppBound and $LowBound > $UppBound) { - &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". - "$UppBound, exchanging boundaries."); - ($LowBound,$UppBound) = ($UppBound,$LowBound); - } - # default to 'default' - my $WhereHavingFunction = 'postings'; - # set $LowBound to SQL statement: - # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' - if ($Type eq 'level') { - $WhereHavingFunction = 'MIN(postings)' - } elsif ($Type eq 'average') { - $WhereHavingFunction = 'AVG(postings)' - } elsif ($Type eq 'sum') { - $WhereHavingFunction = 'SUM(postings)' - } - $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); - # set $LowBound to SQL statement: - # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' - if ($Type eq 'level') { - $WhereHavingFunction = 'MAX(postings)' - } elsif ($Type eq 'average') { - $WhereHavingFunction = 'AVG(postings)' - } elsif ($Type eq 'sum') { - $WhereHavingFunction = 'SUM(postings)' - } - $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); - return ($LowBound,$UppBound); -}; - -################################################################################ -sub SQLCheckNumber { -################################################################################ -### check if input is a valid positive integer; otherwise, make it one -### IN : @Numbers: array of parameters -### OUT: @Numbers: a valid positive integer - my (@Numbers) = @_; - foreach my $Number (@Numbers) { - if ($Number and $Number < 0) { - &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); - $Number = -$Number; - } - $Number = '' if ($Number and $Number !~ /^\d+$/); - } - return @Numbers; -}; - -################################################################################ -sub SQLBuildClause { -################################################################################ -### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause -### from multiple expressions which *may* be empty -### IN : $Type: 'where', 'having', 'group' or 'order' -### @Expressions: array of expressions -### OUT: $SQLClause: a SQL clause - my ($Type,@Expressions) = @_; - my ($SQLClause,$Separator,$Statement); - # set separator ('AND' or ',') - if ($Type eq 'where' or $Type eq 'having') { - $Separator = 'AND'; - } else { - $Separator = ','; - } - # set statement - if ($Type eq 'where') { - $Statement = 'WHERE'; - } elsif ($Type eq 'order') { - $Statement = 'ORDER BY'; - } elsif ($Type eq 'having') { - $Statement = 'HAVING'; - } else { - $Statement = 'GROUP BY'; - } - # build query from expressions with separators - foreach my $Expression (@Expressions) { - if ($Expression) { - $SQLClause .= " $Separator " if ($SQLClause); - $SQLClause .= $Expression; - } - } - # add statement in front if not already present - $SQLClause = " $Statement " . $SQLClause - if ($SQLClause and $SQLClause !~ /$Statement/); - return $SQLClause; -}; - -#####--------------------------- Verifications ----------------------------##### - -################################################################################ -sub CheckValidNewsgroups { -################################################################################ -### syntax check of newgroup list -### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) -### OUT: boolean - my ($Newsgroups) = @_; - my $InvalidCharRegExp = ',; '; - return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; -}; - - -#####------------------------------- done ---------------------------------##### -1; - - diff --git a/bin/feedlog.pl b/bin/feedlog.pl new file mode 100755 index 0000000..ef9429c --- /dev/null +++ b/bin/feedlog.pl @@ -0,0 +1,268 @@ +#! /usr/bin/perl +# +# feedlog.pl +# +# This script will log headers and other data to a database +# for further analysis by parsing a feed from INN. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010-2013 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats; + +use Sys::Syslog qw(:standard :macros); + +use Date::Format; +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +################################# Subroutines ################################## + +sub PrepareDB { +### initialise database connection, prepare statement +### and catch errors +### IN : \%Conf : reference to configuration hash +### OUT: $DBHandle: database handle +### $DBQuery : prepared statement + our ($DBHandle, $DBQuery, $OptQuiet); + my ($ConfigR) = @_; + my %Conf = %$ConfigR; + # drop current database connection - hard, if necessary + if ($DBHandle) { + $DBHandle->disconnect; + undef $DBHandle; + }; + # connect to database; try again every 5 seconds + while (!$DBHandle) { + $DBHandle = InitDB($ConfigR,0); + if (!$DBHandle) { + syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); + sleep(5); + } else {; + syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet; + } + }; + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, + timestamp,token,size,peer,path, + newsgroups,headers) + VALUES (?,?,?,?,?,?,?,?,?,?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + return ($DBHandle,$DBQuery); +} + + +################################# Main program ################################# + +### read commandline options +my ($OptDebug,$OptQuiet); +GetOptions ('d|debug!' => \$OptDebug, + 'q|test!' => \$OptQuiet, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; + +### read configuration +my %Conf = %{ReadConfig('')}; + +### init syslog +openlog($0, 'nofatal,pid', LOG_NEWS); +syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet; + +### init database +my ($DBHandle,$DBQuery) = PrepareDB(\%Conf); + +### main loop +while (<>) { + chomp; + # catch empty lines trailing or leading + if ($_ eq '') { + next; + } + # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups + my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; + # remaining lines contain headers + my $Headers = ""; + while (<>) { + chomp; + # empty line terminates this article + if ($_ eq '') { + last; + } + # collect headers + $Headers .= $_."\n" ; + } + + # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp + my $Day = time2str("%Y-%m-%d", $Timestamp); + my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); + + # write to database + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, 'Database error %s while processing %s: %s', + $DBI::err, $Mid, $DBI::errstr); + # if "MySQL server has gone away", try to recover + if ($DBI::err == 2006) { + # try to reconnect to database + ($DBHandle,$DBQuery) = PrepareDB(\%Conf); + # try to repeat the write attempt as before + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; + # otherwise log missing posting + } else { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; + }; + $DBQuery->finish; + + warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n". + "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n", + $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, + $Newsgroups, $Headers) if $OptDebug; +} + +### close handles +$DBHandle->disconnect; +syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet; +closelog(); + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +feedlog - log data from an INN feed to a database + +=head1 SYNOPSIS + +B [B<-Vhdq>] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script will log overview data and complete headers to a database +table for further examination by parsing a feed from INN. It will +parse that information and write it to a mysql database table in real +time. + +All reporting is done to I via I facility. If B +fails to initiate a database connection at startup, it will log to +I with I priority and go in an endless loop, as +terminating would only result in a rapid respawn. + +=head2 Configuration + +B will read its configuration from F which +should be present in the same directory via Config::Auto. + +See L for an overview of possible configuration options. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<-d>, B<--debug> + +Output debugging information to STDERR while parsing STDIN. You'll +find that information most probably in your B F file. + +=item B<-q>, B<--quiet> + +Suppress logging to syslog. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +Set up a feed like that in your B F file: + + ## gather statistics for NewsStats + newsstats! + :!*,de.* + :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl + +See L for further information. + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +L + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010-2012 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/bin/gatherstats.pl b/bin/gatherstats.pl new file mode 100755 index 0000000..f545456 --- /dev/null +++ b/bin/gatherstats.pl @@ -0,0 +1,418 @@ +#! /usr/bin/perl +# +# gatherstats.pl +# +# This script will gather statistical information from a database +# containing headers and other information from a INN feed. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010-2013 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); + +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +################################# Definitions ################################## + +# define types of information that can be gathered +# all / groups (/ clients / hosts) +my %LegalStats; +@LegalStats{('all','groups')} = (); + +################################# Main program ################################# + +### read commandline options +my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, + $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest); +GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, + 'clientsdb=s' => \$OptClientsDB, + 'd|debug!' => \$OptDebug, + 'groupsdb=s' => \$OptGroupsDB, + 'hierarchy=s' => \$OptTLH, + 'hostsdb=s' => \$OptHostsDB, + 'm|month=s' => \$OptMonth, + 'rawdb=s' => \$OptRawDB, + 's|stats=s' => \$OptStatsType, + 't|test!' => \$OptTest, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; + +### read configuration +my %Conf = %{ReadConfig('')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; +$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; +$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; +$ConfOverride{'TLH'} = $OptTLH if $OptTLH; +&OverrideConfig(\%Conf,\%ConfOverride); + +### get type of information to gather, defaulting to 'all' +$OptStatsType = 'all' if !$OptStatsType; +&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType)) + if !exists($LegalStats{$OptStatsType}); + +### get time period from --month +# get verbal description of time period, drop SQL code +my ($Period) = &GetTimePeriod($OptMonth); +&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". + "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); + +### reformat $Conf{'TLH'} +my $TLH; +if ($Conf{'TLH'}) { + # $Conf{'TLH'} is parsed as an array by Config::Auto; + # make a flat list again, separated by : + if (ref($Conf{'TLH'}) eq 'ARRAY') { + $TLH = join(':',@{$Conf{'TLH'}}); + } else { + $TLH = $Conf{'TLH'}; + } + # strip whitespace + $TLH =~ s/\s//g; + # add trailing dots if none are present yet + # (using negative look-behind assertions) + $TLH =~ s/(?prepare(sprintf("SELECT newsgroups FROM %s.%s ". + "WHERE day LIKE ? AND NOT disregard", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + + # count postings per group + my %Postings; + while (($_) = $DBQuery->fetchrow_array) { + # get list of newsgroups and hierarchies from Newsgroups: + my %Newsgroups = ListNewsgroups($_,$TLH, + $OptCheckgroupsFile ? \%ValidGroups : ''); + # count each newsgroup and hierarchy once + foreach (sort keys %Newsgroups) { + $Postings{$_}++; + }; + }; + + # add valid but empty groups if --checkgroups is set + if (%ValidGroups) { + foreach (sort keys %ValidGroups) { + if (!defined($Postings{$_})) { + # add current newsgroup as empty group + $Postings{$_} = 0; + warn (sprintf("ADDED: %s as empty group\n",$_)); + # add empty hierarchies for current newsgroup as needed + foreach (ParseHierarchies($_)) { + my $Hierarchy = $_ . '.ALL'; + if (!defined($Postings{$Hierarchy})) { + $Postings{$Hierarchy} = 0; + warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); + }; + }; + } + }; + }; + + # delete old data for that month + if (!$OptTest) { + $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}), + undef,$Month) + or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + }; + + print "----- GroupStats -----\n" if $OptDebug; + foreach my $Newsgroup (sort keys %Postings) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug; + if (!$OptTest) { + # write to database + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ". + "(month,newsgroup,postings) ". + "VALUES (?, ?, ?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) + or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ". + "$DBI::errstr\n",$Month,$Newsgroup, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + $DBQuery->finish; + }; + }; + } else { + # other types of information go here - later on + }; +}; + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +gatherstats - process statistical data from a raw source + +=head1 SYNOPSIS + +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I] [B<-c> I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script will extract and process statistical information from a +database table which is fed from F for a given time period +and write its results to (an)other database table(s). Entries marked +with I<'disregard'> in the database will be ignored; currently, you +have to set this flag yourself, using your database management tools. +You can exclude erroneous entries that way (e.g. automatic reposts +(think of cancels flood and resurrectors); spam; ...). + +The time period to act on defaults to last month; you can assign +another time period or a single month via the B<--month> option (see +below). + +By default B will process all types of information; you +can change that using the B<--stats> option and assigning the type of +information to process. Currently that doesn't matter yet as only +processing of the number of postings per group per month is +implemented anyway. + +Possible information types include: + +=over 3 + +=item B (postings per group per month) + +B will examine Newsgroups: headers. Crosspostings will be +counted for each single group they appear in. Groups not in I +will be ignored. + +B will also add up the number of postings for each +hierarchy level, but only count each posting once. A posting to +de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, +respectively. A crossposting to de.alt.test and de.alt.admin, on the +other hand, will be counted for de.alt.test and de.alt.admin each, but +only once for de.alt.ALL and de.ALL. + +Data is written to I (see L); you can +override that default through the B<--groupsdb> option. + +=back + +=head2 Configuration + +B will read its configuration from F +which should be present in the same directory via Config::Auto. + +See L for an overview of possible configuration options. + +You can override configuration options via the B<--hierarchy>, +B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, +respectively. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<-d>, B<--debug> + +Output debugging information to STDOUT while processing (number of +postings per group). + +=item B<-t>, B<--test> + +Do not write results to database. You should use B<--debug> in +conjunction with B<--test> ... everything else seems a bit pointless. + +=item B<-m>, B<--month> I + +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). + +=item B<-s>, B<--stats> I + +Set processing type to one of I and I. Defaults to all +(and is currently rather pointless as only I has been +implemented). + +=item B<-c>, B<--checkgroups> I + +Check each group against a list of valid newsgroups read from a file, +one group on each line and ignoring everything after the first +whitespace (so you can use a file in checkgroups format or (part of) +your INN active file). + +The filename is taken from I, amended by each +B<--month> B is processing in the form of I, +so that + + gatherstats -m 2010-01:2010-12 -c checkgroups + +will check against F for January 2010, against +F for February 2010 and so on. + +Newsgroups not found in the checkgroups file will be dropped (and +logged to STDERR), and newsgroups found there but having no postings +will be added with a count of 0 (and logged to STDERR). + +=item B<--hierarchy> I (newsgroup hierarchy) + +Override I from F. + +=item B<--rawdb> I (raw data table) + +Override I from F. + +=item B<--groupsdb> I
(postings per group table) + +Override I from F. + +=item B<--clientsdb> I
(client data table) + +Override I from F. + +=item B<--hostsdb> I
(host data table) + +Override I from F. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +Process all types of information for lasth month: + + gatherstats + +Do a dry run, showing results of processing: + + gatherstats --debug --test + +Process all types of information for January of 2010: + + gatherstats --month 2010-01 + +Process only number of postings for the year of 2010, +checking against checkgroups-*: + + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +L + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010-2012 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/bin/groupstats.pl b/bin/groupstats.pl new file mode 100755 index 0000000..f1e566b --- /dev/null +++ b/bin/groupstats.pl @@ -0,0 +1,689 @@ +#! /usr/bin/perl +# +# groupstats.pl +# +# This script will get statistical data on newgroup usage +# from a database. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010-2013 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); + +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +################################# Main program ################################# + +### read commandline options +my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, + $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, + $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound); +GetOptions ('b|boundary=s' => \$OptBoundType, + 'c|captions!' => \$OptCaptions, + 'checkgroups=s' => \$OptCheckgroupsFile, + 'comments!' => \$OptComments, + 'filetemplate=s' => \$OptFileTemplate, + 'f|format=s' => \$OptFormat, + 'g|group-by=s' => \$OptGroupBy, + 'groupsdb=s' => \$OptGroupsDB, + 'l|lower=i' => \$LowBound, + 'm|month=s' => \$OptMonth, + 'n|newsgroups=s' => \$OptNewsgroups, + 'o|order-by=s' => \$OptOrderBy, + 'r|report=s' => \$OptReportType, + 's|sums!' => \$OptSums, + 'u|upper=i' => \$UppBound, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; +# parse parameters +# $OptComments defaults to TRUE +$OptComments = 1 if (!defined($OptComments)); +# force --nocomments when --filetemplate is used +$OptComments = 0 if ($OptFileTemplate); +# parse $OptBoundType +if ($OptBoundType) { + if ($OptBoundType =~ /level/i) { + $OptBoundType = 'level'; + } elsif ($OptBoundType =~ /av(era)?ge?/i) { + $OptBoundType = 'average'; + } elsif ($OptBoundType =~ /sums?/i) { + $OptBoundType = 'sum'; + } else { + $OptBoundType = 'default'; + } +} +# parse $OptReportType +if ($OptReportType) { + if ($OptReportType =~ /av(era)?ge?/i) { + $OptReportType = 'average'; + } elsif ($OptReportType =~ /sums?/i) { + $OptReportType = 'sum'; + } else { + $OptReportType = 'default'; + } +} +# read list of newsgroups from --checkgroups +# into a hash reference +my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile; + +### read configuration +my %Conf = %{ReadConfig('')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; +&OverrideConfig(\%Conf,\%ConfOverride); + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get time period and newsgroups, prepare SQL 'WHERE' clause +# get time period +# and set caption for output and expression for SQL 'WHERE' clause +my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); +# bail out if --month is invalid +&Bleat(2,"--month option has an invalid format - ". + "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; +# get list of newsgroups and set expression for SQL 'WHERE' clause +# with placeholders as well as a list of newsgroup to bind to them +my ($SQLWhereNewsgroups,@SQLBindNewsgroups); +if ($OptNewsgroups) { + ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups); + # bail out if --newsgroups is invalid + &Bleat(2,"--newsgroups option has an invalid format!") + if !$SQLWhereNewsgroups; +} + +### build SQL WHERE clause (and HAVING clause, if needed) +my ($SQLWhereClause,$SQLHavingClause); +# $OptBoundType 'level' +if ($OptBoundType and $OptBoundType ne 'default') { + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); + $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, + $LowBound,$UppBound)); +# $OptBoundType 'threshold' / 'default' or none +} else { + $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, + $SQLWhereNewsgroups,&SQLHierarchies($OptSums), + &SQLSetBounds('default',$LowBound,$UppBound)); +} + +### get sort order and build SQL 'ORDER BY' clause +# default to 'newsgroup' for $OptBoundType 'level' or 'average' +$OptGroupBy = 'newsgroup' if (!$OptGroupBy and + $OptBoundType and $OptBoundType ne 'default'); +# force to 'month' for $OptReportType 'average' or 'sum' +$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); +# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause +my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); +# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) +# set it to 'month' or 'key' for OutputData() +$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; + +### get report type and build SQL 'SELECT' query +my $SQLSelect; +my $SQLGroupClause = ''; +my $Precision = 0; # number of digits right of decimal point for output +if ($OptReportType and $OptReportType ne 'default') { + $SQLGroupClause = 'GROUP BY newsgroup'; + # change $SQLOrderClause: replace everything before 'postings' + $SQLOrderClause =~ s/BY.+postings/BY postings/; + if ($OptReportType eq 'average') { + $SQLSelect = "'All months',newsgroup,AVG(postings)"; + $Precision = 2; + # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' + $SQLOrderClause =~ s/postings/AVG(postings)/; + } elsif ($OptReportType eq 'sum') { + $SQLSelect = "'All months',newsgroup,SUM(postings)"; + # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' + $SQLOrderClause =~ s/postings/SUM(postings)/; + } + } else { + $SQLSelect = 'month,newsgroup,postings'; +}; + +### get length of longest newsgroup name delivered by query +### for formatting purposes +my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; +my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, + $Field,'postings',$SQLWhereClause, + $SQLHavingClause, + @SQLBindNewsgroups); + +### build and execute SQL query +my ($DBQuery); +# special query preparation for $OptBoundType 'level', 'average' or 'sums' +if ($OptBoundType and $OptBoundType ne 'default') { + # prepare and execute first query: + # get list of newsgroups meeting level conditions + $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. + 'GROUP BY newsgroup %s', + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLHavingClause)); + $DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); + # add newsgroups to a comma-seperated list ready for IN(...) query + my $GroupList; + while (my ($Newsgroup) = $DBQuery->fetchrow_array) { + $GroupList .= ',' if $GroupList; + $GroupList .= "'$Newsgroup'"; + }; + # enhance $WhereClause + if ($GroupList) { + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, + sprintf('newsgroup IN (%s)',$GroupList)); + } else { + # condition cannot be satisfied; + # force query to fail by adding '0=1' + $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); + } +} + +# prepare query +$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', + $SQLSelect, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $SQLWhereClause,$SQLGroupClause, + $SQLOrderClause)); + +# execute query +$DBQuery->execute(@SQLBindNewsgroups) + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", + $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, + $DBI::errstr)); + +### output results +# set default to 'pretty' +$OptFormat = 'pretty' if !$OptFormat; +# print captions if --caption is set +if ($OptCaptions && $OptComments) { + # print time period with report type + my $CaptionReportType= '(number of postings for each month)'; + if ($OptReportType and $OptReportType ne 'default') { + $CaptionReportType= '(average number of postings for each month)' + if $OptReportType eq 'average'; + $CaptionReportType= '(number of all postings for that time period)' + if $OptReportType eq 'sum'; + } + printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); + # print newsgroup list if --newsgroups is set + printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) + if $OptNewsgroups; + # print boundaries, if set + my $CaptionBoundary= '(counting only month fulfilling this condition)'; + if ($OptBoundType and $OptBoundType ne 'default') { + $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; + $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; + $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; + } + printf("# ----- Threshold: %s %s x %s %s %s\n", + $LowBound ? $LowBound : '',$LowBound ? '=>' : '', + $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) + if ($LowBound or $UppBound); + # print primary and secondary sort order + printf("# ----- Grouped by %s (%s), sorted %s%s\n", + ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', + ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', + ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', + ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); +} + +# output data +&OutputData($OptFormat,$OptComments,$GroupBy,$Precision, + $OptCheckgroupsFile ? $ValidGroups : '', + $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength); + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +groupstats - create reports on newsgroup usage + +=head1 SYNOPSIS + +B [B<-Vhcs> B<--comments>] [B<-m> I[:I] | I] [B<-n> I] [B<--checkgroups> I] [B<-r> I] [B<-l> I] [B<-u> I] [B<-b> I] [B<-g> I] [B<-o> I] [B<-f> I] [B<--filetemplate> I] [B<--groupsdb> I] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script create reports on newsgroup usage (number of postings per +group per month) taken from result tables created by +B. + +=head2 Features and options + +=head3 Time period and newsgroups + +The time period to act on defaults to last month; you can assign another +time period or a single month (or drop all time constraints) via the +B<--month> option (see below). + +B will process all newsgroups by default; you can limit +processing to only some newsgroups by supplying a list of those groups via +B<--newsgroups> option (see below). You can include hierarchy levels in +the output by adding the B<--sums> switch (see below). Optionally +newsgroups not present in a checkgroups file can be excluded from output, +sse B<--checkgroups> below. + +=head3 Report type + +You can choose between different B<--report> types: postings per month, +average postings per month or all postings summed up; for details, see +below. + +=head3 Upper and lower boundaries + +Furthermore you can set an upper and/or lower boundary to exclude some +results from output via the B<--lower> and B<--upper> options, +respectively. By default, all newsgroups with more and/or less postings +per month will be excluded from the result set (i.e. not shown and not +considered for average and sum reports). You can change the meaning of +those boundaries with the B<--boundary> option. For details, please see +below. + +=head3 Sorting and formatting the output + +By default, all results are grouped by month; you can group results by +newsgroup instead via the B<--groupy-by> option. Within those groups, the +list of newsgroups (or months) is sorted alphabetically (or +chronologically, respectively) ascending. You can change that order (and +sort by number of postings) with the B<--order-by> option. For details and +exceptions, please see below. + +The results will be formatted as a kind of table; you can change the +output format to a simple list or just a list of newsgroups and number of +postings with the B<--format> option. Captions will be added by means of +the B<--caption> option; all comments (and captions) can be supressed by +using B<--nocomments>. + +Last but not least you can redirect all output to a number of files, e.g. +one for each month, by submitting the B<--filetemplate> option, see below. +Captions and comments are automatically disabled in this case. + +=head2 Configuration + +B will read its configuration from F +which should be present in the same directory via Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +You can override some configuration options via the B<--groupsdb> option. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<-m>, B<--month> I + +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). By using the keyword I instead, you can set no +processing period to process the whole database. + +=item B<-n>, B<--newsgroups> I + +Limit processing to a certain set of newsgroups. I can +be a single newsgroup name (de.alt.test), a newsgroup hierarchy +(de.alt.*) or a list of either of these, separated by colons, for +example + + de.test:de.alt.test:de.newusers.* + +=item B<-s>, B<--sums|--nosums> (sum per hierarchy level) + +Include "virtual" groups for every hierarchy level in output, for +example: + + de.alt.ALL 10 + de.alt.test 5 + de.alt.admin 7 + +See the B man page for details. + +=item B<--checkgroups> I + +Restrict output to those newgroups present in a file in checkgroups format +(one newgroup name per line; everything after the first whitespace on each +line is ignored). All other newsgroups will be removed from output. + +Contrary to B, I is not a template, but refers to +a single file in checkgroups format. + +=item B<-r>, B<--report> I + +Choose the report type: I, I or I + +By default, B will report the number of postings for each +newsgroup in each month. But it can also report the average number of +postings per group for all months or the total sum of postings per group +for all months. + +For report types I and I, the B option has no +meaning and will be silently ignored (see below). + +=item B<-l>, B<--lower> I + +Set the lower boundary. See B<--boundary> below. + +=item B<-l>, B<--upper> I + +Set the upper boundary. See B<--boundary> below. + +=item B<-b>, B<--boundary> I + +Set the boundary type to one of I, I, I or +I. + +By default, all newsgroups with more postings per month than the upper +boundary and/or less postings per month than the lower boundary will be +excluded from further processing. For the default report that means each +month only newsgroups with a number of postings between the boundaries +will be displayed. For the other report types, newsgroups with a number of +postings exceeding the boundaries in all (!) months will not be +considered. + +For example, lets take a list of newsgroups like this: + + ----- 2012-01: + de.comp.datenbanken.misc 6 + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.misc 8 + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-03: + de.comp.datenbanken.misc 24 + de.comp.datenbanken.ms-access 83 + de.comp.datenbanken.mysql 36 + +With C, +you'll get the following result: + + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 124 + +de.comp.datenbanken.misc has not been considered even though it has 38 +postings in total, because it has less than 25 postings in every single +month. If you want to list all newsgroups with more than 25 postings +I, you'll have to set the boundary type to I, see below. + +A boundary type of I will show only those newsgroups - at all - +that satisfy the boundaries in each and every single month. With the above +list of newsgroups and +C, +you'll get this result: + + ----- All months: + de.comp.datenbanken.ms-access 293 + +de.comp.datenbanken.mysql has not been considered because it had less than +25 postings in 2012-02 (only). + +You can use that to get a list of newsgroups that have more (or less) then +x postings in every month during the whole reporting period. + +A boundary type of I will show only those newsgroups - at all -that +satisfy the boundaries on average. With the above list of newsgroups and +C, +you'll get this result: + + ----- All months: + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 + +The average number of postings in the three groups is: + + de.comp.datenbanken.misc 12.67 + de.comp.datenbanken.ms-access 97.67 + de.comp.datenbanken.mysql 48.33 + +Last but not least, a boundary type of I will show only those +newsgroups - at all - that satisfy the boundaries with the total sum of +all postings during the reporting period. With the above list of +newsgroups and +C, +you'll finally get this result: + + ----- All months: + de.comp.datenbanken.misc 38 + de.comp.datenbanken.ms-access 293 + de.comp.datenbanken.mysql 145 + + +=item B<-g>, B<--group-by> I + +By default, all results are grouped by month, sorted chronologically in +ascending order, like this: + + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +The results can be grouped by newsgroups instead via +B<--group-by> I: + + ----- de.comp.datenbanken.ms-access: + 2012-01 84 + 2012-02 126 + ----- de.comp.datenbanken.mysql: + 2012-01 88 + 2012-02 21 + +By appending I<-desc> to the group-by option parameter, you can reverse +the sort order - e.g. B<--group-by> I will give: + + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + +Average and sums reports (see above) will always be grouped by months; +this option will therefore be ignored. + +=item B<-o>, B<--order-by> I + +Within each group (a single month or single newsgroup, see above), the +report will be sorted by newsgroup names in ascending alphabetical order +by default. You can change the sort order to descending or sort by number +of postings instead. + +=item B<-f>, B<--format> I + +Select the output format, I being the default: + + ----- 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + ----- 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +I format looks like this: + + 2012-01 de.comp.datenbanken.ms-access 84 + 2012-01 de.comp.datenbanken.mysql 88 + 2012-02 de.comp.datenbanken.ms-access 126 + 2012-02 de.comp.datenbanken.mysql 21 + +And I format looks like this: + + # 2012-01: + de.comp.datenbanken.ms-access 84 + de.comp.datenbanken.mysql 88 + # 2012-02: + de.comp.datenbanken.ms-access 126 + de.comp.datenbanken.mysql 21 + +You can remove the comments by using B<--nocomments>, see below. + +=item B<-c>, B<--captions|--nocaptions> + +Add captions to output, like this: + + ----- Report for 2012-01 to 2012-02 (number of postings for each month) + ----- Newsgroups: de.comp.datenbanken.* + ----- Threshold: 10 => x <= 20 (on average) + ----- Grouped by Newsgroups (ascending), sorted by number of postings descending + +False by default. + +=item B<--comments|--nocomments> + +Add comments (group headers) to I and I output. True by default. + +Use I<--nocomments> to suppress anything except newsgroup names/months and +numbers of postings. This is enforced when using B<--filetemplate>, see below. + +=item B<--filetemplate> I + +Save output to file(s) instead of dumping it to STDOUT. B will +create one file for each month (or each newsgroup, accordant to the +setting of B<--group-by>, see above), with filenames composed by adding +year and month (or newsgroup names) to the I, for +example with B<--filetemplate> I: + + stats-2012-01 + stats-2012-02 + ... and so on + +B<--nocomments> is enforced, see above. + +=item B<--groupsdb> I + +Override I from F. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +Show number of postings per group for lasth month in I format: + + groupstats + +Show that report for January of 2010 and de.alt.* plus de.test, +including display of hierarchy levels: + + groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums + +Only show newsgroups with 30 postings or less last month, ordered +by number of postings, descending, in I format: + + groupstats --upper 30 --order-by postings-desc + +Show the total of all postings for the year of 2010 for all groups that +had 30 postings or less in every single month in that year, ordered by +number of postings in descending order: + + groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc + +The same for the average number of postings in the year of 2010: + + groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc + +List number of postings per group for eacht month of 2010 and redirect +output to one file for each month, namend stats-2010-01 and so on, in +machine-readable form (without formatting): + + groupstats -m 2010-01:2010-12 -f dump --filetemplate stats + + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +l>doc/INSTALL> + +=item - + +gatherstats -h + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010-2012 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/etc/newsstats.conf.sample b/etc/newsstats.conf.sample new file mode 100644 index 0000000..3133ed2 --- /dev/null +++ b/etc/newsstats.conf.sample @@ -0,0 +1,19 @@ +### database configuration +# +# driver, host, credentials and database +# +DBDriver = mysql +DBHost = localhost +DBUser = +DBPw = +DBDatabase = newsstats +# +# tables +# +DBTableRaw = raw_de +DBTableGrps = groups_de +#DBTableClnts = +#DBTableHosts = + +### hierarchy configuration +TLH = de diff --git a/feedlog.pl b/feedlog.pl deleted file mode 100755 index a5ecfb0..0000000 --- a/feedlog.pl +++ /dev/null @@ -1,267 +0,0 @@ -#! /usr/bin/perl -# -# feedlog.pl -# -# This script will log headers and other data to a database -# for further analysis by parsing a feed from INN. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010-2013 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; -use warnings; - -use NewsStats; - -use Sys::Syslog qw(:standard :macros); - -use Date::Format; -use DBI; -use Getopt::Long qw(GetOptions); -Getopt::Long::config ('bundling'); - -################################# Subroutines ################################## - -sub PrepareDB { -### initialise database connection, prepare statement -### and catch errors -### IN : \%Conf : reference to configuration hash -### OUT: $DBHandle: database handle -### $DBQuery : prepared statement - our ($DBHandle, $DBQuery, $OptQuiet); - my ($ConfigR) = @_; - my %Conf = %$ConfigR; - # drop current database connection - hard, if necessary - if ($DBHandle) { - $DBHandle->disconnect; - undef $DBHandle; - }; - # connect to database; try again every 5 seconds - while (!$DBHandle) { - $DBHandle = InitDB($ConfigR,0); - if (!$DBHandle) { - syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); - sleep(5); - } else {; - syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet; - } - }; - $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, - timestamp,token,size,peer,path, - newsgroups,headers) - VALUES (?,?,?,?,?,?,?,?,?,?)", - $Conf{'DBDatabase'}, - $Conf{'DBTableRaw'})); - return ($DBHandle,$DBQuery); -} - - -################################# Main program ################################# - -### read commandline options -my ($OptDebug,$OptQuiet); -GetOptions ('d|debug!' => \$OptDebug, - 'q|test!' => \$OptQuiet, - 'h|help' => \&ShowPOD, - 'V|version' => \&ShowVersion) or exit 1; - -### read configuration -my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; - -### init syslog -openlog($0, 'nofatal,pid', LOG_NEWS); -syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet; - -### init database -my ($DBHandle,$DBQuery) = PrepareDB(\%Conf); - -### main loop -while (<>) { - chomp; - # catch empty lines trailing or leading - if ($_ eq '') { - next; - } - # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups - my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; - # remaining lines contain headers - my $Headers = ""; - while (<>) { - chomp; - # empty line terminates this article - if ($_ eq '') { - last; - } - # collect headers - $Headers .= $_."\n" ; - } - - # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp - my $Day = time2str("%Y-%m-%d", $Timestamp); - my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); - - # write to database - if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, - $Path, $Newsgroups, $Headers)) { - syslog(LOG_ERR, 'Database error %s while processing %s: %s', - $DBI::err, $Mid, $DBI::errstr); - # if "MySQL server has gone away", try to recover - if ($DBI::err == 2006) { - # try to reconnect to database - ($DBHandle,$DBQuery) = PrepareDB(\%Conf); - # try to repeat the write attempt as before - if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, - $Path, $Newsgroups, $Headers)) { - syslog(LOG_ERR, '%s was dropped and lost.',$Mid); - }; - # otherwise log missing posting - } else { - syslog(LOG_ERR, '%s was dropped and lost.',$Mid); - }; - }; - $DBQuery->finish; - - warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n". - "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n", - $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, - $Newsgroups, $Headers) if $OptDebug; -} - -### close handles -$DBHandle->disconnect; -syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet; -closelog(); - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -feedlog - log data from an INN feed to a database - -=head1 SYNOPSIS - -B [B<-Vhdq>] - -=head1 REQUIREMENTS - -See L. - -=head1 DESCRIPTION - -This script will log overview data and complete headers to a database -table for further examination by parsing a feed from INN. It will -parse that information and write it to a mysql database table in real -time. - -All reporting is done to I via I facility. If B -fails to initiate a database connection at startup, it will log to -I with I priority and go in an endless loop, as -terminating would only result in a rapid respawn. - -=head2 Configuration - -B will read its configuration from F which -should be present in the same directory via Config::Auto. - -See L for an overview of possible configuration options. - -=head1 OPTIONS - -=over 3 - -=item B<-V>, B<--version> - -Print out version and copyright information and exit. - -=item B<-h>, B<--help> - -Print this man page and exit. - -=item B<-d>, B<--debug> - -Output debugging information to STDERR while parsing STDIN. You'll -find that information most probably in your B F file. - -=item B<-q>, B<--quiet> - -Suppress logging to syslog. - -=back - -=head1 INSTALLATION - -See L. - -=head1 EXAMPLES - -Set up a feed like that in your B F file: - - ## gather statistics for NewsStats - newsstats! - :!*,de.* - :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl - -See L for further information. - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -L - -=item - - -L - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010-2012 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut diff --git a/gatherstats.pl b/gatherstats.pl deleted file mode 100755 index f1fe7de..0000000 --- a/gatherstats.pl +++ /dev/null @@ -1,417 +0,0 @@ -#! /usr/bin/perl -# -# gatherstats.pl -# -# This script will gather statistical information from a database -# containing headers and other information from a INN feed. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010-2013 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; -use warnings; - -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); - -use DBI; -use Getopt::Long qw(GetOptions); -Getopt::Long::config ('bundling'); - -################################# Definitions ################################## - -# define types of information that can be gathered -# all / groups (/ clients / hosts) -my %LegalStats; -@LegalStats{('all','groups')} = (); - -################################# Main program ################################# - -### read commandline options -my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, - $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest); -GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, - 'clientsdb=s' => \$OptClientsDB, - 'd|debug!' => \$OptDebug, - 'groupsdb=s' => \$OptGroupsDB, - 'hierarchy=s' => \$OptTLH, - 'hostsdb=s' => \$OptHostsDB, - 'm|month=s' => \$OptMonth, - 'rawdb=s' => \$OptRawDB, - 's|stats=s' => \$OptStatsType, - 't|test!' => \$OptTest, - 'h|help' => \&ShowPOD, - 'V|version' => \&ShowVersion) or exit 1; - -### read configuration -my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; - -### override configuration via commandline options -my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; -$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; -$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; -$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; -$ConfOverride{'TLH'} = $OptTLH if $OptTLH; -&OverrideConfig(\%Conf,\%ConfOverride); - -### get type of information to gather, defaulting to 'all' -$OptStatsType = 'all' if !$OptStatsType; -&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType)) - if !exists($LegalStats{$OptStatsType}); - -### get time period from --month -# get verbal description of time period, drop SQL code -my ($Period) = &GetTimePeriod($OptMonth); -&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". - "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); - -### reformat $Conf{'TLH'} -my $TLH; -if ($Conf{'TLH'}) { - # $Conf{'TLH'} is parsed as an array by Config::Auto; - # make a flat list again, separated by : - if (ref($Conf{'TLH'}) eq 'ARRAY') { - $TLH = join(':',@{$Conf{'TLH'}}); - } else { - $TLH = $Conf{'TLH'}; - } - # strip whitespace - $TLH =~ s/\s//g; - # add trailing dots if none are present yet - # (using negative look-behind assertions) - $TLH =~ s/(?prepare(sprintf("SELECT newsgroups FROM %s.%s ". - "WHERE day LIKE ? AND NOT disregard", - $Conf{'DBDatabase'}, - $Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') - or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". - "$DBI::errstr\n",$Month, - $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - - # count postings per group - my %Postings; - while (($_) = $DBQuery->fetchrow_array) { - # get list of newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_,$TLH, - $OptCheckgroupsFile ? \%ValidGroups : ''); - # count each newsgroup and hierarchy once - foreach (sort keys %Newsgroups) { - $Postings{$_}++; - }; - }; - - # add valid but empty groups if --checkgroups is set - if (%ValidGroups) { - foreach (sort keys %ValidGroups) { - if (!defined($Postings{$_})) { - # add current newsgroup as empty group - $Postings{$_} = 0; - warn (sprintf("ADDED: %s as empty group\n",$_)); - # add empty hierarchies for current newsgroup as needed - foreach (ParseHierarchies($_)) { - my $Hierarchy = $_ . '.ALL'; - if (!defined($Postings{$Hierarchy})) { - $Postings{$Hierarchy} = 0; - warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); - }; - }; - } - }; - }; - - # delete old data for that month - if (!$OptTest) { - $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", - $Conf{'DBDatabase'},$Conf{'DBTableGrps'}), - undef,$Month) - or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ". - "$DBI::errstr\n",$Month, - $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - }; - - print "----- GroupStats -----\n" if $OptDebug; - foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug; - if (!$OptTest) { - # write to database - $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ". - "(month,newsgroup,postings) ". - "VALUES (?, ?, ?)", - $Conf{'DBDatabase'}, - $Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) - or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ". - "$DBI::errstr\n",$Month,$Newsgroup, - $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->finish; - }; - }; - } else { - # other types of information go here - later on - }; -}; - -### close handles -$DBHandle->disconnect; - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -gatherstats - process statistical data from a raw source - -=head1 SYNOPSIS - -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I] [B<-c> I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] - -=head1 REQUIREMENTS - -See L. - -=head1 DESCRIPTION - -This script will extract and process statistical information from a -database table which is fed from F for a given time period -and write its results to (an)other database table(s). Entries marked -with I<'disregard'> in the database will be ignored; currently, you -have to set this flag yourself, using your database management tools. -You can exclude erroneous entries that way (e.g. automatic reposts -(think of cancels flood and resurrectors); spam; ...). - -The time period to act on defaults to last month; you can assign -another time period or a single month via the B<--month> option (see -below). - -By default B will process all types of information; you -can change that using the B<--stats> option and assigning the type of -information to process. Currently that doesn't matter yet as only -processing of the number of postings per group per month is -implemented anyway. - -Possible information types include: - -=over 3 - -=item B (postings per group per month) - -B will examine Newsgroups: headers. Crosspostings will be -counted for each single group they appear in. Groups not in I -will be ignored. - -B will also add up the number of postings for each -hierarchy level, but only count each posting once. A posting to -de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, -respectively. A crossposting to de.alt.test and de.alt.admin, on the -other hand, will be counted for de.alt.test and de.alt.admin each, but -only once for de.alt.ALL and de.ALL. - -Data is written to I (see L); you can -override that default through the B<--groupsdb> option. - -=back - -=head2 Configuration - -B will read its configuration from F -which should be present in the same directory via Config::Auto. - -See L for an overview of possible configuration options. - -You can override configuration options via the B<--hierarchy>, -B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, -respectively. - -=head1 OPTIONS - -=over 3 - -=item B<-V>, B<--version> - -Print out version and copyright information and exit. - -=item B<-h>, B<--help> - -Print this man page and exit. - -=item B<-d>, B<--debug> - -Output debugging information to STDOUT while processing (number of -postings per group). - -=item B<-t>, B<--test> - -Do not write results to database. You should use B<--debug> in -conjunction with B<--test> ... everything else seems a bit pointless. - -=item B<-m>, B<--month> I - -Set processing period to a single month in YYYY-MM format or to a time -period between two month in YYYY-MM:YYYY-MM format (two month, separated -by a colon). - -=item B<-s>, B<--stats> I - -Set processing type to one of I and I. Defaults to all -(and is currently rather pointless as only I has been -implemented). - -=item B<-c>, B<--checkgroups> I - -Check each group against a list of valid newsgroups read from a file, -one group on each line and ignoring everything after the first -whitespace (so you can use a file in checkgroups format or (part of) -your INN active file). - -The filename is taken from I, amended by each -B<--month> B is processing in the form of I, -so that - - gatherstats -m 2010-01:2010-12 -c checkgroups - -will check against F for January 2010, against -F for February 2010 and so on. - -Newsgroups not found in the checkgroups file will be dropped (and -logged to STDERR), and newsgroups found there but having no postings -will be added with a count of 0 (and logged to STDERR). - -=item B<--hierarchy> I (newsgroup hierarchy) - -Override I from F. - -=item B<--rawdb> I
(raw data table) - -Override I from F. - -=item B<--groupsdb> I
(postings per group table) - -Override I from F. - -=item B<--clientsdb> I
(client data table) - -Override I from F. - -=item B<--hostsdb> I
(host data table) - -Override I from F. - -=back - -=head1 INSTALLATION - -See L. - -=head1 EXAMPLES - -Process all types of information for lasth month: - - gatherstats - -Do a dry run, showing results of processing: - - gatherstats --debug --test - -Process all types of information for January of 2010: - - gatherstats --month 2010-01 - -Process only number of postings for the year of 2010, -checking against checkgroups-*: - - gatherstats -m 2010-01:2010-12 -s groups -c checkgroups - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -L - -=item - - -L - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010-2012 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut diff --git a/groupstats.pl b/groupstats.pl deleted file mode 100755 index 499e333..0000000 --- a/groupstats.pl +++ /dev/null @@ -1,688 +0,0 @@ -#! /usr/bin/perl -# -# groupstats.pl -# -# This script will get statistical data on newgroup usage -# from a database. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010-2013 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; -use warnings; - -use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList); - -use DBI; -use Getopt::Long qw(GetOptions); -Getopt::Long::config ('bundling'); - -################################# Main program ################################# - -### read commandline options -my ($OptBoundType,$OptCaptions,$OptCheckgroupsFile,$OptComments, - $OptFileTemplate,$OptFormat,$OptGroupBy,$OptGroupsDB,$LowBound,$OptMonth, - $OptNewsgroups,$OptOrderBy,$OptReportType,$OptSums,$UppBound); -GetOptions ('b|boundary=s' => \$OptBoundType, - 'c|captions!' => \$OptCaptions, - 'checkgroups=s' => \$OptCheckgroupsFile, - 'comments!' => \$OptComments, - 'filetemplate=s' => \$OptFileTemplate, - 'f|format=s' => \$OptFormat, - 'g|group-by=s' => \$OptGroupBy, - 'groupsdb=s' => \$OptGroupsDB, - 'l|lower=i' => \$LowBound, - 'm|month=s' => \$OptMonth, - 'n|newsgroups=s' => \$OptNewsgroups, - 'o|order-by=s' => \$OptOrderBy, - 'r|report=s' => \$OptReportType, - 's|sums!' => \$OptSums, - 'u|upper=i' => \$UppBound, - 'h|help' => \&ShowPOD, - 'V|version' => \&ShowVersion) or exit 1; -# parse parameters -# $OptComments defaults to TRUE -$OptComments = 1 if (!defined($OptComments)); -# force --nocomments when --filetemplate is used -$OptComments = 0 if ($OptFileTemplate); -# parse $OptBoundType -if ($OptBoundType) { - if ($OptBoundType =~ /level/i) { - $OptBoundType = 'level'; - } elsif ($OptBoundType =~ /av(era)?ge?/i) { - $OptBoundType = 'average'; - } elsif ($OptBoundType =~ /sums?/i) { - $OptBoundType = 'sum'; - } else { - $OptBoundType = 'default'; - } -} -# parse $OptReportType -if ($OptReportType) { - if ($OptReportType =~ /av(era)?ge?/i) { - $OptReportType = 'average'; - } elsif ($OptReportType =~ /sums?/i) { - $OptReportType = 'sum'; - } else { - $OptReportType = 'default'; - } -} -# read list of newsgroups from --checkgroups -# into a hash reference -my $ValidGroups = &ReadGroupList($OptCheckgroupsFile) if $OptCheckgroupsFile; - -### read configuration -my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; - -### override configuration via commandline options -my %ConfOverride; -$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; -&OverrideConfig(\%Conf,\%ConfOverride); - -### init database -my $DBHandle = InitDB(\%Conf,1); - -### get time period and newsgroups, prepare SQL 'WHERE' clause -# get time period -# and set caption for output and expression for SQL 'WHERE' clause -my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth); -# bail out if --month is invalid -&Bleat(2,"--month option has an invalid format - ". - "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod; -# get list of newsgroups and set expression for SQL 'WHERE' clause -# with placeholders as well as a list of newsgroup to bind to them -my ($SQLWhereNewsgroups,@SQLBindNewsgroups); -if ($OptNewsgroups) { - ($SQLWhereNewsgroups,@SQLBindNewsgroups) = &SQLGroupList($OptNewsgroups); - # bail out if --newsgroups is invalid - &Bleat(2,"--newsgroups option has an invalid format!") - if !$SQLWhereNewsgroups; -} - -### build SQL WHERE clause (and HAVING clause, if needed) -my ($SQLWhereClause,$SQLHavingClause); -# $OptBoundType 'level' -if ($OptBoundType and $OptBoundType ne 'default') { - $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, - $SQLWhereNewsgroups,&SQLHierarchies($OptSums)); - $SQLHavingClause = SQLBuildClause('having',&SQLSetBounds($OptBoundType, - $LowBound,$UppBound)); -# $OptBoundType 'threshold' / 'default' or none -} else { - $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod, - $SQLWhereNewsgroups,&SQLHierarchies($OptSums), - &SQLSetBounds('default',$LowBound,$UppBound)); -} - -### get sort order and build SQL 'ORDER BY' clause -# default to 'newsgroup' for $OptBoundType 'level' or 'average' -$OptGroupBy = 'newsgroup' if (!$OptGroupBy and - $OptBoundType and $OptBoundType ne 'default'); -# force to 'month' for $OptReportType 'average' or 'sum' -$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default'); -# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause -my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy); -# $GroupBy will contain 'month' or 'newsgroup' (parsed result of $OptGroupBy) -# set it to 'month' or 'key' for OutputData() -$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key'; - -### get report type and build SQL 'SELECT' query -my $SQLSelect; -my $SQLGroupClause = ''; -my $Precision = 0; # number of digits right of decimal point for output -if ($OptReportType and $OptReportType ne 'default') { - $SQLGroupClause = 'GROUP BY newsgroup'; - # change $SQLOrderClause: replace everything before 'postings' - $SQLOrderClause =~ s/BY.+postings/BY postings/; - if ($OptReportType eq 'average') { - $SQLSelect = "'All months',newsgroup,AVG(postings)"; - $Precision = 2; - # change $SQLOrderClause: replace 'postings' with 'AVG(postings)' - $SQLOrderClause =~ s/postings/AVG(postings)/; - } elsif ($OptReportType eq 'sum') { - $SQLSelect = "'All months',newsgroup,SUM(postings)"; - # change $SQLOrderClause: replace 'postings' with 'SUM(postings)' - $SQLOrderClause =~ s/postings/SUM(postings)/; - } - } else { - $SQLSelect = 'month,newsgroup,postings'; -}; - -### get length of longest newsgroup name delivered by query -### for formatting purposes -my $Field = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; -my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTableGrps'}, - $Field,'postings',$SQLWhereClause, - $SQLHavingClause, - @SQLBindNewsgroups); - -### build and execute SQL query -my ($DBQuery); -# special query preparation for $OptBoundType 'level', 'average' or 'sums' -if ($OptBoundType and $OptBoundType ne 'default') { - # prepare and execute first query: - # get list of newsgroups meeting level conditions - $DBQuery = $DBHandle->prepare(sprintf('SELECT newsgroup FROM %s.%s %s '. - 'GROUP BY newsgroup %s', - $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, - $SQLWhereClause,$SQLHavingClause)); - $DBQuery->execute(@SQLBindNewsgroups) - or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", - $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, - $DBI::errstr)); - # add newsgroups to a comma-seperated list ready for IN(...) query - my $GroupList; - while (my ($Newsgroup) = $DBQuery->fetchrow_array) { - $GroupList .= ',' if $GroupList; - $GroupList .= "'$Newsgroup'"; - }; - # enhance $WhereClause - if ($GroupList) { - $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause, - sprintf('newsgroup IN (%s)',$GroupList)); - } else { - # condition cannot be satisfied; - # force query to fail by adding '0=1' - $SQLWhereClause = SQLBuildClause('where',$SQLWhereClause,'0=1'); - } -} - -# prepare query -$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s', - $SQLSelect, - $Conf{'DBDatabase'},$Conf{'DBTableGrps'}, - $SQLWhereClause,$SQLGroupClause, - $SQLOrderClause)); - -# execute query -$DBQuery->execute(@SQLBindNewsgroups) - or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: %s\n", - $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}, - $DBI::errstr)); - -### output results -# set default to 'pretty' -$OptFormat = 'pretty' if !$OptFormat; -# print captions if --caption is set -if ($OptCaptions && $OptComments) { - # print time period with report type - my $CaptionReportType= '(number of postings for each month)'; - if ($OptReportType and $OptReportType ne 'default') { - $CaptionReportType= '(average number of postings for each month)' - if $OptReportType eq 'average'; - $CaptionReportType= '(number of all postings for that time period)' - if $OptReportType eq 'sum'; - } - printf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType); - # print newsgroup list if --newsgroups is set - printf("# ----- Newsgroups: %s\n",join(',',split(/:/,$OptNewsgroups))) - if $OptNewsgroups; - # print boundaries, if set - my $CaptionBoundary= '(counting only month fulfilling this condition)'; - if ($OptBoundType and $OptBoundType ne 'default') { - $CaptionBoundary= '(every single month)' if $OptBoundType eq 'level'; - $CaptionBoundary= '(on average)' if $OptBoundType eq 'average'; - $CaptionBoundary= '(all month summed up)' if $OptBoundType eq 'sum'; - } - printf("# ----- Threshold: %s %s x %s %s %s\n", - $LowBound ? $LowBound : '',$LowBound ? '=>' : '', - $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary) - if ($LowBound or $UppBound); - # print primary and secondary sort order - printf("# ----- Grouped by %s (%s), sorted %s%s\n", - ($GroupBy eq 'month') ? 'Months' : 'Newsgroups', - ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending', - ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '', - ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending'); -} - -# output data -&OutputData($OptFormat,$OptComments,$GroupBy,$Precision, - $OptCheckgroupsFile ? $ValidGroups : '', - $OptFileTemplate,$DBQuery,$MaxLength,$MaxValLength); - -### close handles -$DBHandle->disconnect; - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -groupstats - create reports on newsgroup usage - -=head1 SYNOPSIS - -B [B<-Vhcs> B<--comments>] [B<-m> I[:I] | I] [B<-n> I] [B<--checkgroups> I] [B<-r> I] [B<-l> I] [B<-u> I] [B<-b> I] [B<-g> I] [B<-o> I] [B<-f> I] [B<--filetemplate> I] [B<--groupsdb> I] - -=head1 REQUIREMENTS - -See L. - -=head1 DESCRIPTION - -This script create reports on newsgroup usage (number of postings per -group per month) taken from result tables created by -B. - -=head2 Features and options - -=head3 Time period and newsgroups - -The time period to act on defaults to last month; you can assign another -time period or a single month (or drop all time constraints) via the -B<--month> option (see below). - -B will process all newsgroups by default; you can limit -processing to only some newsgroups by supplying a list of those groups via -B<--newsgroups> option (see below). You can include hierarchy levels in -the output by adding the B<--sums> switch (see below). Optionally -newsgroups not present in a checkgroups file can be excluded from output, -sse B<--checkgroups> below. - -=head3 Report type - -You can choose between different B<--report> types: postings per month, -average postings per month or all postings summed up; for details, see -below. - -=head3 Upper and lower boundaries - -Furthermore you can set an upper and/or lower boundary to exclude some -results from output via the B<--lower> and B<--upper> options, -respectively. By default, all newsgroups with more and/or less postings -per month will be excluded from the result set (i.e. not shown and not -considered for average and sum reports). You can change the meaning of -those boundaries with the B<--boundary> option. For details, please see -below. - -=head3 Sorting and formatting the output - -By default, all results are grouped by month; you can group results by -newsgroup instead via the B<--groupy-by> option. Within those groups, the -list of newsgroups (or months) is sorted alphabetically (or -chronologically, respectively) ascending. You can change that order (and -sort by number of postings) with the B<--order-by> option. For details and -exceptions, please see below. - -The results will be formatted as a kind of table; you can change the -output format to a simple list or just a list of newsgroups and number of -postings with the B<--format> option. Captions will be added by means of -the B<--caption> option; all comments (and captions) can be supressed by -using B<--nocomments>. - -Last but not least you can redirect all output to a number of files, e.g. -one for each month, by submitting the B<--filetemplate> option, see below. -Captions and comments are automatically disabled in this case. - -=head2 Configuration - -B will read its configuration from F -which should be present in the same directory via Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -You can override some configuration options via the B<--groupsdb> option. - -=head1 OPTIONS - -=over 3 - -=item B<-V>, B<--version> - -Print out version and copyright information and exit. - -=item B<-h>, B<--help> - -Print this man page and exit. - -=item B<-m>, B<--month> I - -Set processing period to a single month in YYYY-MM format or to a time -period between two month in YYYY-MM:YYYY-MM format (two month, separated -by a colon). By using the keyword I instead, you can set no -processing period to process the whole database. - -=item B<-n>, B<--newsgroups> I - -Limit processing to a certain set of newsgroups. I can -be a single newsgroup name (de.alt.test), a newsgroup hierarchy -(de.alt.*) or a list of either of these, separated by colons, for -example - - de.test:de.alt.test:de.newusers.* - -=item B<-s>, B<--sums|--nosums> (sum per hierarchy level) - -Include "virtual" groups for every hierarchy level in output, for -example: - - de.alt.ALL 10 - de.alt.test 5 - de.alt.admin 7 - -See the B man page for details. - -=item B<--checkgroups> I - -Restrict output to those newgroups present in a file in checkgroups format -(one newgroup name per line; everything after the first whitespace on each -line is ignored). All other newsgroups will be removed from output. - -Contrary to B, I is not a template, but refers to -a single file in checkgroups format. - -=item B<-r>, B<--report> I - -Choose the report type: I, I or I - -By default, B will report the number of postings for each -newsgroup in each month. But it can also report the average number of -postings per group for all months or the total sum of postings per group -for all months. - -For report types I and I, the B option has no -meaning and will be silently ignored (see below). - -=item B<-l>, B<--lower> I - -Set the lower boundary. See B<--boundary> below. - -=item B<-l>, B<--upper> I - -Set the upper boundary. See B<--boundary> below. - -=item B<-b>, B<--boundary> I - -Set the boundary type to one of I, I, I or -I. - -By default, all newsgroups with more postings per month than the upper -boundary and/or less postings per month than the lower boundary will be -excluded from further processing. For the default report that means each -month only newsgroups with a number of postings between the boundaries -will be displayed. For the other report types, newsgroups with a number of -postings exceeding the boundaries in all (!) months will not be -considered. - -For example, lets take a list of newsgroups like this: - - ----- 2012-01: - de.comp.datenbanken.misc 6 - de.comp.datenbanken.ms-access 84 - de.comp.datenbanken.mysql 88 - ----- 2012-02: - de.comp.datenbanken.misc 8 - de.comp.datenbanken.ms-access 126 - de.comp.datenbanken.mysql 21 - ----- 2012-03: - de.comp.datenbanken.misc 24 - de.comp.datenbanken.ms-access 83 - de.comp.datenbanken.mysql 36 - -With C, -you'll get the following result: - - ----- All months: - de.comp.datenbanken.ms-access 293 - de.comp.datenbanken.mysql 124 - -de.comp.datenbanken.misc has not been considered even though it has 38 -postings in total, because it has less than 25 postings in every single -month. If you want to list all newsgroups with more than 25 postings -I, you'll have to set the boundary type to I, see below. - -A boundary type of I will show only those newsgroups - at all - -that satisfy the boundaries in each and every single month. With the above -list of newsgroups and -C, -you'll get this result: - - ----- All months: - de.comp.datenbanken.ms-access 293 - -de.comp.datenbanken.mysql has not been considered because it had less than -25 postings in 2012-02 (only). - -You can use that to get a list of newsgroups that have more (or less) then -x postings in every month during the whole reporting period. - -A boundary type of I will show only those newsgroups - at all -that -satisfy the boundaries on average. With the above list of newsgroups and -C, -you'll get this result: - - ----- All months: - de.comp.datenbanken.ms-access 293 - de.comp.datenbanken.mysql 145 - -The average number of postings in the three groups is: - - de.comp.datenbanken.misc 12.67 - de.comp.datenbanken.ms-access 97.67 - de.comp.datenbanken.mysql 48.33 - -Last but not least, a boundary type of I will show only those -newsgroups - at all - that satisfy the boundaries with the total sum of -all postings during the reporting period. With the above list of -newsgroups and -C, -you'll finally get this result: - - ----- All months: - de.comp.datenbanken.misc 38 - de.comp.datenbanken.ms-access 293 - de.comp.datenbanken.mysql 145 - - -=item B<-g>, B<--group-by> I - -By default, all results are grouped by month, sorted chronologically in -ascending order, like this: - - ----- 2012-01: - de.comp.datenbanken.ms-access 84 - de.comp.datenbanken.mysql 88 - ----- 2012-02: - de.comp.datenbanken.ms-access 126 - de.comp.datenbanken.mysql 21 - -The results can be grouped by newsgroups instead via -B<--group-by> I: - - ----- de.comp.datenbanken.ms-access: - 2012-01 84 - 2012-02 126 - ----- de.comp.datenbanken.mysql: - 2012-01 88 - 2012-02 21 - -By appending I<-desc> to the group-by option parameter, you can reverse -the sort order - e.g. B<--group-by> I will give: - - ----- 2012-02: - de.comp.datenbanken.ms-access 126 - de.comp.datenbanken.mysql 21 - ----- 2012-01: - de.comp.datenbanken.ms-access 84 - de.comp.datenbanken.mysql 88 - -Average and sums reports (see above) will always be grouped by months; -this option will therefore be ignored. - -=item B<-o>, B<--order-by> I - -Within each group (a single month or single newsgroup, see above), the -report will be sorted by newsgroup names in ascending alphabetical order -by default. You can change the sort order to descending or sort by number -of postings instead. - -=item B<-f>, B<--format> I - -Select the output format, I being the default: - - ----- 2012-01: - de.comp.datenbanken.ms-access 84 - de.comp.datenbanken.mysql 88 - ----- 2012-02: - de.comp.datenbanken.ms-access 126 - de.comp.datenbanken.mysql 21 - -I format looks like this: - - 2012-01 de.comp.datenbanken.ms-access 84 - 2012-01 de.comp.datenbanken.mysql 88 - 2012-02 de.comp.datenbanken.ms-access 126 - 2012-02 de.comp.datenbanken.mysql 21 - -And I format looks like this: - - # 2012-01: - de.comp.datenbanken.ms-access 84 - de.comp.datenbanken.mysql 88 - # 2012-02: - de.comp.datenbanken.ms-access 126 - de.comp.datenbanken.mysql 21 - -You can remove the comments by using B<--nocomments>, see below. - -=item B<-c>, B<--captions|--nocaptions> - -Add captions to output, like this: - - ----- Report for 2012-01 to 2012-02 (number of postings for each month) - ----- Newsgroups: de.comp.datenbanken.* - ----- Threshold: 10 => x <= 20 (on average) - ----- Grouped by Newsgroups (ascending), sorted by number of postings descending - -False by default. - -=item B<--comments|--nocomments> - -Add comments (group headers) to I and I output. True by default. - -Use I<--nocomments> to suppress anything except newsgroup names/months and -numbers of postings. This is enforced when using B<--filetemplate>, see below. - -=item B<--filetemplate> I - -Save output to file(s) instead of dumping it to STDOUT. B will -create one file for each month (or each newsgroup, accordant to the -setting of B<--group-by>, see above), with filenames composed by adding -year and month (or newsgroup names) to the I, for -example with B<--filetemplate> I: - - stats-2012-01 - stats-2012-02 - ... and so on - -B<--nocomments> is enforced, see above. - -=item B<--groupsdb> I - -Override I from F. - -=back - -=head1 INSTALLATION - -See L. - -=head1 EXAMPLES - -Show number of postings per group for lasth month in I format: - - groupstats - -Show that report for January of 2010 and de.alt.* plus de.test, -including display of hierarchy levels: - - groupstats --month 2010-01 --newsgroups de.alt.*:de.test --sums - -Only show newsgroups with 30 postings or less last month, ordered -by number of postings, descending, in I format: - - groupstats --upper 30 --order-by postings-desc - -Show the total of all postings for the year of 2010 for all groups that -had 30 postings or less in every single month in that year, ordered by -number of postings in descending order: - - groupstats -m 2010-01:2010-12 -u 30 -b level -r sums -o postings-desc - -The same for the average number of postings in the year of 2010: - - groupstats -m 2010-01:2010-12 -u 30 -b level -r avg -o postings-desc - -List number of postings per group for eacht month of 2010 and redirect -output to one file for each month, namend stats-2010-01 and so on, in -machine-readable form (without formatting): - - groupstats -m 2010-01:2010-12 -f dump --filetemplate stats - - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -L - -=item - - -l>doc/INSTALL> - -=item - - -gatherstats -h - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010-2012 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut diff --git a/install/install.pl b/install/install.pl index ff392df..a5860d4 100755 --- a/install/install.pl +++ b/install/install.pl @@ -14,8 +14,8 @@ BEGIN { our $VERSION = "0.01"; use File::Basename; - # we're in .../install, so our module is in .. - push(@INC, dirname($0).'/..'); + # we're in .../install, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); } use strict; use warnings; @@ -42,7 +42,7 @@ my $Path = cwd(); ### read configuration print("Reading configuration.\n"); -my %Conf = %{ReadConfig($Path.'/newsstats.conf')}; +my %Conf = %{ReadConfig('')}; ##### -------------------------------------------------------------------------- ##### Database table definitions @@ -294,15 +294,15 @@ Don't do a fresh install, but update from I. =over 4 -=item F +=item F The script itself. -=item F +=item F Library functions for the NewsStats package. -=item F +=item F Runtime configuration file. diff --git a/lib/NewsStats.pm b/lib/NewsStats.pm new file mode 100644 index 0000000..8a62179 --- /dev/null +++ b/lib/NewsStats.pm @@ -0,0 +1,780 @@ +# NewsStats.pm +# +# Library functions for the NewsStats package. +# +# Copyright (c) 2010-2013 Thomas Hochstein +# +# This module can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +package NewsStats; + +use strict; +use warnings; +our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); + +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw( + $MyVersion + $PackageVersion + $FullPath + $HomePath + ShowVersion + ShowPOD + ReadConfig + OverrideConfig + InitDB + Bleat +); +@EXPORT_OK = qw( + GetTimePeriod + LastMonth + SplitPeriod + ListMonth + ListNewsgroups + ParseHierarchies + ReadGroupList + OutputData + FormatOutput + SQLHierarchies + SQLSortOrder + SQLGroupList + SQLSetBounds + SQLBuildClause + GetMaxLength +); +%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod + ListMonth)], + Output => [qw(OutputData FormatOutput)], + SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList + SQLSetBounds SQLBuildClause GetMaxLength)]); +$VERSION = '0.01'; +our $PackageVersion = '0.01'; + +use Data::Dumper; +use File::Basename; +use Cwd qw(realpath); + +use Config::Auto; +use DBI; + +#####-------------------------------- Vars --------------------------------##### + +# save $0 in $FullPath +our $FullPath = $0; +# strip filename and /bin or /install directory to create the $HomePath +our $HomePath = dirname(realpath($0)); +$HomePath =~ s/\/(bin|install)//; +# trim $0 +$0 =~ s%.*/%%; +# set version string +our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; + +#####------------------------------- Basics -------------------------------##### + +################################################################################ + +################################################################################ +sub ShowVersion { +################################################################################ +### display version and exit + print "NewsStats v$PackageVersion\n$MyVersion\n"; + print "Copyright (c) 2010-2012 Thomas Hochstein \n"; + print "This program is free software; you may redistribute it ". + "and/or modify it under the same terms as Perl itself.\n"; + exit(100); +}; +################################################################################ + +################################################################################ +sub ShowPOD { +################################################################################ +### feed myself to perldoc and exit + exec('perldoc', $FullPath); + exit(100); +}; +################################################################################ + +################################################################################ +sub ReadConfig { +################################################################################ +### read config via Config::Auto +### IN : $ConfFile: config filename +### OUT: reference to a hash containing the configuration + my ($ConfFile) = @_; + # set default + $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile; + # mandatory configuration options + my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', + 'DBTableRaw','DBTableGrps'); + # read config via Config::Auto + my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); + my %Conf = %{$ConfR}; + # check for mandatory options + foreach (@Mandatory) { + &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) + if (!defined($Conf{$_})); + } + # $Conf{'TLH'} is checked in gatherstats.pl + return $ConfR; +}; +################################################################################ + +################################################################################ +sub OverrideConfig { +################################################################################ +### override configuration values +### IN : $ConfigR : reference to configuration hash +### $OverrideR: reference to a hash containing overrides + my ($ConfigR,$OverrideR) = @_; + my %Override = %$OverrideR; + # Config hash empty? + &Bleat(1,"Empty configuration hash passed to OverrideConfig()") + if ( keys %$ConfigR < 1); + # return if no overrides + return if (keys %Override < 1 or keys %$ConfigR < 1); + foreach my $Key (keys %Override) { + $$ConfigR{$Key} = $Override{$Key}; + }; +}; +################################################################################ + +################################################################################ +sub InitDB { +################################################################################ +### initialise database connection +### IN : $ConfigR: reference to configuration hash +### $Die : if TRUE, die if connection fails +### OUT: DBHandle + my ($ConfigR,$Die) = @_; + my %Conf = %$ConfigR; + my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', + $Conf{'DBDriver'},$Conf{'DBDatabase'}, + $Conf{'DBHost'}), $Conf{'DBUser'}, + $Conf{'DBPw'}, { PrintError => 0 }); + if (!$DBHandle) { + &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); + &Bleat(1,$DBI::errstr); + }; + return $DBHandle; +}; +################################################################################ + +################################################################################ +sub Bleat { +################################################################################ +### print warning or error messages and terminate in case of error +### IN : $Level : 1 = warning, 2 = error +### $Message: warning or error message + my ($Level,$Message) = @_; + if ($Level == 1) { + warn "$0 W: $Message\n" + } elsif ($Level == 2) { + die "$0 E: $Message\n" + } else { + print "$0: $Message\n" + } +}; +################################################################################ + +#####------------------------------ GetStats ------------------------------##### + +################################################################################ +sub ListNewsgroups { +################################################################################ +### explode a (scalar) list of newsgroup names to a list of newsgroup and +### hierarchy names where every newsgroup and hierarchy appears only once: +### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin +### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) +### $TLH : top level hierarchy (all other newsgroups are ignored) +### $ValidGroupsR: reference to a hash containing all valid newsgroups +### as keys +### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups,$TLH,$ValidGroupsR) = @_; + my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; + my %Newsgroups; + chomp($Newsgroups); + # remove whitespace from contents of Newsgroups: + $Newsgroups =~ s/\s//; + # call &HierarchyCount for each newsgroup in $Newsgroups: + for (split /,/, $Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if($TLH and !/^$TLH/); + # don't count invalid newsgroups + if(%ValidGroups and !defined($ValidGroups{$_})) { + warn (sprintf("DROPPED: %s\n",$_)); + next; + } + # add original newsgroup to %Newsgroups + $Newsgroups{$_} = 1; + # add all hierarchy elements to %Newsgroups, amended by '.ALL', + # i.e. de.alt.ALL and de.ALL + foreach (ParseHierarchies($_)) { + $Newsgroups{$_.'.ALL'} = 1; + } + }; + return %Newsgroups; +}; + +################################################################################ +sub ParseHierarchies { +################################################################################ +### return a list of all hierarchy levels a newsgroup belongs to +### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) +### IN : $Newsgroup : a newsgroup name +### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to + my ($Newsgroup) = @_; + my @Hierarchies; + # strip trailing dots + $Newsgroup =~ s/(.+)\.+$/$1/; + # butcher newsgroup name by "." and add each hierarchy to @Hierarchies + # i.e. de.alt.test: "de.alt" and "de" + while ($Newsgroup =~ /\./) { + $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; + push @Hierarchies, $Newsgroup; + }; + return @Hierarchies; +}; + +################################################################################ +sub ReadGroupList { +################################################################################ +### read a list of valid newsgroups from file (each group on one line, +### ignoring everything after the first whitespace and so accepting files +### in checkgroups format as well as (parts of) an INN active file) +### IN : $Filename : file to read +### OUT: \%ValidGroups: hash containing all valid newsgroups + my ($Filename) = @_; + my %ValidGroups; + open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); + while (<$LIST>) { + s/^\s*(\S+).*$/$1/; + chomp; + next if /^$/; + $ValidGroups{$_} = '1'; + }; + close $LIST; + return \%ValidGroups; +}; + +################################################################################ + +#####----------------------------- TimePeriods ----------------------------##### + +################################################################################ +sub GetTimePeriod { +################################################################################ +### get a time period to act on from --month option; +### if empty, default to last month +### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' +### OUT: $Verbal,$SQL: verbal description and WHERE-clause +### of the chosen time period + my ($Month) = @_; + # define result variables + my ($Verbal, $SQL); + # define a regular expression for a month + my $REMonth = '\d{4}-\d{2}'; + + # default to last month if option is not set + if(!$Month) { + $Month = &LastMonth; + } + + # check for valid input + if ($Month =~ /^$REMonth$/) { + # single month (YYYY-MM) + ($Month) = &CheckMonth($Month); + $Verbal = $Month; + $SQL = sprintf("month = '%s'",$Month); + } elsif ($Month =~ /^$REMonth:$REMonth$/) { + # time period (YYYY-MM:YYYY-MM) + $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); + $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); + } elsif ($Month =~ /^all$/i) { + # special case: ALL + $Verbal = 'all time'; + $SQL = ''; + } else { + # invalid input + return (undef,undef); + } + + return ($Verbal,$SQL); +}; + +################################################################################ +sub LastMonth { +################################################################################ +### get last month from todays date in YYYY-MM format +### OUT: last month as YYYY-MM + # get today's date + my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); + # $Month is already defined from 0 to 11, so no need to decrease it by 1 + $Year += 1900; + if ($Month < 1) { + $Month = 12; + $Year--; + }; + # return last month + return sprintf('%4d-%02d',$Year,$Month); +}; + +################################################################################ +sub CheckMonth { +################################################################################ +### check if input (in YYYY-MM form) is valid with MM between 01 and 12; +### otherwise, fix it +### IN : @Month: array of month +### OUT: @Month: a valid month + my (@Month) = @_; + foreach my $Month (@Month) { + my ($OldMonth) = $Month; + my ($CalMonth) = substr ($Month, -2); + if ($CalMonth < 1 or $CalMonth > 12) { + $CalMonth = '12' if $CalMonth > 12; + $CalMonth = '01' if $CalMonth < 1; + substr($Month, -2) = $CalMonth; + &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". + "and '12'), set to '%s'.",$OldMonth,$Month)); + } + } + return @Month; +}; + +################################################################################ +sub SplitPeriod { +################################################################################ +### split a time period denoted by YYYY-MM:YYYY-MM into start and end month +### IN : $Period: time period +### OUT: $StartMonth, $EndMonth + my ($Period) = @_; + my ($StartMonth, $EndMonth) = split /:/, $Period; + ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); + # switch parameters as necessary + if ($EndMonth gt $StartMonth) { + return ($StartMonth, $EndMonth); + } else { + return ($EndMonth, $StartMonth); + }; +}; + +################################################################################ +sub ListMonth { +################################################################################ +### return a list of months (YYYY-MM) between start and end month +### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') +### OUT: @Months: array containing all months from $MonthExpression enumerated + my ($MonthExpression )= @_; + # return if single month + return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); + # parse $MonthExpression + my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); + # set $Year, $Month from $StartMonth + my ($Year, $Month) = split /-/, $StartMonth; + # define @Months + my (@Months); + until ("$Year-$Month" gt $EndMonth) { + push @Months, "$Year-$Month"; + $Month = "$Month"; # force string context + $Month++; + if ($Month > 12) { + $Month = '01'; + $Year++; + }; + }; + return @Months; +}; + +#####---------------------------- OutputFormats ---------------------------##### + +################################################################################ +sub OutputData { +################################################################################ +### read database query results from DBHandle and print results with formatting +### IN : $Format : format specifier +### $Comments : print or suppress all comments for machine-readable output +### $GroupBy : primary sorting order (month or key) +### $Precision: number of digits right of decimal point (0 or 2) +### $ValidKeys: reference to a hash containing all valid keys +### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM +### $DBQuery : database query handle with executed query, +### containing $Month, $Key, $Value +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' + my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, + $DBQuery, $PadField, $PadValue) = @_; + my %ValidKeys = %{$ValidKeys} if $ValidKeys; + my ($FileName, $Handle, $OUT); + our $LastIteration; + + # define output types + my %LegalOutput; + @LegalOutput{('dump','list','pretty')} = (); + # bail out if format is unknown + &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); + + while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { + # don't display invalid keys + if(%ValidKeys and !defined($ValidKeys{$Key})) { + # FIXME + # &Bleat(1,sprintf("DROPPED: %s",$Key)); + next; + }; + # care for correct sorting order and abstract from month and keys: + # $Caption will be $Month or $Key, according to sorting order, + # and $Key will be $Key or $Month, respectively + my $Caption; + if ($GroupBy eq 'key') { + $Caption = $Key; + $Key = $Month; + } else { + $Caption = $Month; + } + # set output file handle + if (!$FileTempl) { + $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT + } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { + close $OUT if ($LastIteration); + # safeguards for filename creation: + # replace potential problem characters with '_' + $FileName = sprintf('%s-%s',$FileTempl,$Caption); + $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; + open ($OUT,">$FileName") + or &Bleat(2,sprintf("Cannot open output file '%s': $!", + $FileName)); + $Handle = $OUT; + }; + print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, + $Precision, $PadField, $PadValue); + $LastIteration = $Caption; + }; + close $OUT if ($FileTempl); +}; + +################################################################################ +sub FormatOutput { +################################################################################ +### format information for output according to format specifier +### IN : $Format : format specifier +### $Comments : print or suppress all comments for machine-readable output +### $Caption : month (as YYYY-MM) or $Key, according to sorting order +### $Key : newsgroup, client, ... or $Month, as above +### $Value : number of postings with that attribute +### $Precision: number of digits right of decimal point (0 or 2) +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' +### OUT: $Output: formatted output + my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, + $PadValue) = @_; + my ($Output); + # keep last caption in mind + our ($LastIteration); + # create one line of output + if ($Format eq 'dump') { + # output as dump (key value) + $Output = sprintf ("# %s:\n",$Caption) + if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); + $Output .= sprintf ("%s %u\n",$Key,$Value); + } elsif ($Format eq 'list') { + # output as list (caption key value) + $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); + } elsif ($Format eq 'pretty') { + # output as a table + $Output = sprintf ("# ----- %s:\n",$Caption) + if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); + # increase $PadValue for numbers with decimal point + $PadValue += $Precision+1 if $Precision; + # add padding if $PadField is set; $PadValue HAS to be set then + $Output .= sprintf ($PadField ? + sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : + "%s%s %.*f\n",$Key,$Comments ? ':' : '', + $Precision,$Value); + }; + return $Output; +}; + +#####------------------------- QueryModifications -------------------------##### + +################################################################################ +sub SQLHierarchies { +################################################################################ +### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by +### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is +### true, accordingly) +### IN : $ShowHierarchies: boolean value +### OUT: SQL code + my ($ShowHierarchies) = @_; + return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; +}; + +################################################################################ +sub GetMaxLength { +################################################################################ +### get length of longest fields in future query result +### IN : $DBHandle : database handle +### $Table : table to query +### $Field : field (key!, i.e. month, newsgroup, ...) to check +### $Value : field (value!, i.e. postings) to check +### $WhereClause : WHERE clause +### $HavingClause: HAVING clause +### @BindVars : bind variables for WHERE clause +### OUT: $FieldLength : length of longest instance of $Field +### $ValueLength : length of longest instance of $Value + my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". + "MAX(%s) ". + "FROM %s %s %s",$Field,,$Value, + $Table,$WhereClause,$HavingClause ? + 'GROUP BY newsgroup' . $HavingClause . + ' ORDER BY LENGTH(newsgroup) '. + 'DESC LIMIT 1': '')); + $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". + "for '%s' from table '%s': ". + "$DBI::errstr",$Field,$Table)); + my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; + my $ValueLength = length($ValueMax) if ($ValueMax); + return ($FieldLength,$ValueLength); +}; + +################################################################################ +sub SQLSortOrder { +################################################################################ +### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and +### $OptOrderBy (secondary sorting), both ascending or descending; +### descending sorting order is done by adding '-desc' +### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' +### $OrderBy: secondary sort by month/newsgroups (default) +### or number of 'postings' +### OUT: a SQL ORDER BY clause + my ($GroupBy,$OrderBy) = @_; + my ($GroupSort,$OrderSort) = ('',''); + # $GroupBy (primary sorting) + if (!$GroupBy) { + $GroupBy = 'month'; + } else { + ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); + if ($GroupBy =~ /group/i) { + $GroupBy = 'newsgroup'; + } else { + $GroupBy = 'month'; + } + } + my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; + # $OrderBy (secondary sorting) + if (!$OrderBy) { + $OrderBy = $Secondary; + } else { + ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); + if ($OrderBy =~ /posting/i) { + $OrderBy = "postings $OrderSort, $Secondary"; + } else { + $OrderBy = "$Secondary $OrderSort"; + } + } + return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); +}; + +################################################################################ +sub SQLParseOrder { +################################################################################ +### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. +### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' +### IN : $OrderOption: order option (see above) +### OUT: parameter to sort by, +### sort order ('DESC' or nothing, meaning 'ASC') + my ($OrderOption) = @_; + my $SortOrder = ''; + if ($OrderOption =~ s/-?desc$//i) { + $SortOrder = 'DESC'; + } else { + $OrderOption =~ s/-?asc$//i + } + return ($OrderOption,$SortOrder); +}; + +################################################################################ +sub SQLGroupList { +################################################################################ +### explode list of newsgroups separated by : (with wildcards) +### to a SQL 'WHERE' expression +### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) +### OUT: SQL code to become part of a 'WHERE' clause, +### list of newsgroups for SQL bindings + my ($Newsgroups) = @_; + # substitute '*' wildcard with SQL wildcard character '%' + $Newsgroups =~ s/\*/%/g; + return (undef,undef) if !CheckValidNewsgroups($Newsgroups); + # just one newsgroup? + return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; + my ($SQL,@WildcardGroups,@NoWildcardGroups); + # list of newsgroups separated by ':' + my @GroupList = split /:/, $Newsgroups; + foreach (@GroupList) { + if ($_ !~ /%/) { + # add to list of newsgroup names WITHOUT wildcard + push (@NoWildcardGroups,$_); + } else { + # add to list of newsgroup names WITH wildcard + push (@WildcardGroups,$_); + # add wildcard to SQL clause + # 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + $SQL .= 'newsgroup LIKE ?' + } + }; + if (scalar(@NoWildcardGroups)) { + # add 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + if (scalar(@NoWildcardGroups) < 2) { + # special case: just one newsgroup without wildcard + $SQL .= 'newsgroup = ?'; + } else { + # create list of newsgroups to include: 'newsgroup IN (...)' + $SQL .= 'newsgroup IN ('; + my $SQLin; + foreach (@NoWildcardGroups) { + $SQLin .= ',' if $SQLin; + $SQLin .= '?'; + } + # add list to SQL clause + $SQL .= $SQLin .= ')'; + } + } + # add brackets '()' to SQL clause as needed (more than one wildcard group) + if (scalar(@WildcardGroups)) { + $SQL = '(' . $SQL .')'; + } + # rebuild @GroupList in (now) correct order + @GroupList = (@WildcardGroups,@NoWildcardGroups); + return ($SQL,@GroupList); +}; + +################################################################################ +sub SQLGroupWildcard { +################################################################################ +### build a valid SQL 'WHERE' expression with or without wildcards +### IN : $Newsgroup: newsgroup expression, probably with wildcard +### (group.name or group.name.%) +### OUT: SQL code to become part of a 'WHERE' clause + my ($Newsgroup) = @_; + if ($Newsgroup !~ /%/) { + return 'newsgroup = ?'; + } else { + return 'newsgroup LIKE ?'; + } +}; + +################################################################################ +sub SQLSetBounds { +################################################################################ +### set upper and/or lower boundary (number of postings) +### IN : $Type: 'level', 'average', 'sum' or 'default' +### $LowBound,$UppBound: lower/upper boundary, respectively +### OUT: SQL code to become part of a WHERE or HAVING clause + my ($Type,$LowBound,$UppBound) = @_; + ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); + if($LowBound and $UppBound and $LowBound > $UppBound) { + &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". + "$UppBound, exchanging boundaries."); + ($LowBound,$UppBound) = ($UppBound,$LowBound); + } + # default to 'default' + my $WhereHavingFunction = 'postings'; + # set $LowBound to SQL statement: + # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' + if ($Type eq 'level') { + $WhereHavingFunction = 'MIN(postings)' + } elsif ($Type eq 'average') { + $WhereHavingFunction = 'AVG(postings)' + } elsif ($Type eq 'sum') { + $WhereHavingFunction = 'SUM(postings)' + } + $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); + # set $LowBound to SQL statement: + # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' + if ($Type eq 'level') { + $WhereHavingFunction = 'MAX(postings)' + } elsif ($Type eq 'average') { + $WhereHavingFunction = 'AVG(postings)' + } elsif ($Type eq 'sum') { + $WhereHavingFunction = 'SUM(postings)' + } + $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); + return ($LowBound,$UppBound); +}; + +################################################################################ +sub SQLCheckNumber { +################################################################################ +### check if input is a valid positive integer; otherwise, make it one +### IN : @Numbers: array of parameters +### OUT: @Numbers: a valid positive integer + my (@Numbers) = @_; + foreach my $Number (@Numbers) { + if ($Number and $Number < 0) { + &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); + $Number = -$Number; + } + $Number = '' if ($Number and $Number !~ /^\d+$/); + } + return @Numbers; +}; + +################################################################################ +sub SQLBuildClause { +################################################################################ +### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause +### from multiple expressions which *may* be empty +### IN : $Type: 'where', 'having', 'group' or 'order' +### @Expressions: array of expressions +### OUT: $SQLClause: a SQL clause + my ($Type,@Expressions) = @_; + my ($SQLClause,$Separator,$Statement); + # set separator ('AND' or ',') + if ($Type eq 'where' or $Type eq 'having') { + $Separator = 'AND'; + } else { + $Separator = ','; + } + # set statement + if ($Type eq 'where') { + $Statement = 'WHERE'; + } elsif ($Type eq 'order') { + $Statement = 'ORDER BY'; + } elsif ($Type eq 'having') { + $Statement = 'HAVING'; + } else { + $Statement = 'GROUP BY'; + } + # build query from expressions with separators + foreach my $Expression (@Expressions) { + if ($Expression) { + $SQLClause .= " $Separator " if ($SQLClause); + $SQLClause .= $Expression; + } + } + # add statement in front if not already present + $SQLClause = " $Statement " . $SQLClause + if ($SQLClause and $SQLClause !~ /$Statement/); + return $SQLClause; +}; + +#####--------------------------- Verifications ----------------------------##### + +################################################################################ +sub CheckValidNewsgroups { +################################################################################ +### syntax check of newgroup list +### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) +### OUT: boolean + my ($Newsgroups) = @_; + my $InvalidCharRegExp = ',; '; + return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; +}; + + +#####------------------------------- done ---------------------------------##### +1; + + diff --git a/newsstats.conf.sample b/newsstats.conf.sample deleted file mode 100644 index 3133ed2..0000000 --- a/newsstats.conf.sample +++ /dev/null @@ -1,19 +0,0 @@ -### database configuration -# -# driver, host, credentials and database -# -DBDriver = mysql -DBHost = localhost -DBUser = -DBPw = -DBDatabase = newsstats -# -# tables -# -DBTableRaw = raw_de -DBTableGrps = groups_de -#DBTableClnts = -#DBTableHosts = - -### hierarchy configuration -TLH = de