From 880c3eb2270c235048aedfa6becfc0bc0e7b8ed8 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Mon, 7 May 2012 20:29:25 +0200 Subject: [PATCH] Adapt gatherstats.pl to new coding style. * Switch to Getopt::Long, change coding style; limit line length. * Replace 'die' and 'warn' by calls to &Bleat(). * Completely changed options due to new GetOpt::Long processing. - merged -m/-p into --month * Adapt to changes in NewsStats.pm * Redo documentation. * Update TODO. Signed-off-by: Thomas Hochstein --- NewsStats.pm | 14 ++-- doc/TODO | 2 +- gatherstats.pl | 212 +++++++++++++++++++++++++++---------------------- 3 files changed, 127 insertions(+), 101 deletions(-) diff --git a/NewsStats.pm b/NewsStats.pm index a04ac0f..bdfdcf5 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -343,13 +343,13 @@ sub SplitPeriod { sub ListMonth { ################################################################################ ### return a list of months (YYYY-MM) between start and end month -### IN : $StartMonth, $EndMonth -### OUT: @Months: array containing all months from $StartMonth to $EndMonth - my ($StartMonth, $EndMonth) = @_; - return (undef,undef) - if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); - # return if $StartMonth = $EndMonth - return ($StartMonth) if ($StartMonth eq $EndMonth); +### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') +### OUT: @Months: array containing all months from $MonthExpression enumerated + my ($MonthExpression )= @_; + # return if single month + return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); + # parse $MonthExpression + my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); # set $Year, $Month from $StartMonth my ($Year, $Month) = split /-/, $StartMonth; # define @Months diff --git a/doc/TODO b/doc/TODO index ce6f795..a30cdcd 100644 --- a/doc/TODO +++ b/doc/TODO @@ -42,6 +42,7 @@ Bug numbers refer to the Mantis issue tracker at . names - would be nice. + install/install.pl + - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Add / enhance / test error handling - General tests and optimisations @@ -54,7 +55,6 @@ Bug numbers refer to the Mantis issue tracker at . - General tests and optimisations + gatherstats.pl - - Complete rewrite (like groupstats.pl, include changes in NewsStats.pm) - Use hierarchy information (see GroupInfo above) - Add gathering of other stats (clients, hosts, ...) - better modularisation (code reuse for other reports!) diff --git a/gatherstats.pl b/gatherstats.pl index e9ae0f8..b570cd8 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -7,7 +7,7 @@ # # It is part of the NewsStats package. # -# Copyright (c) 2010 Thomas Hochstein +# Copyright (c) 2010-2012 Thomas Hochstein # # It can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -22,37 +22,56 @@ use strict; use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); ################################# Definitions ################################## # define types of information that can be gathered # all / groups (/ clients / hosts) -my %LegalTypes; -@LegalTypes{('all','groups')} = (); +my %LegalStats; +@LegalStats{('all','groups')} = (); ################################# Main program ################################# ### read commandline options -my %Options = &ReadOptions('dom:p:t:l:n:r:g:c:s:'); +my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, + $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest); +GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, + 'clientsdb=s' => \$OptClientsDB, + 'd|debug!' => \$OptDebug, + 'groupsdb=s' => \$OptGroupsDB, + 'hierarchy=s' => \$OptTLH, + 'hostsdb=s' => \$OptHostsDB, + 'm|month=s' => \$OptMonth, + 'rawdb=s' => \$OptRawDB, + 's|stats=s' => \$OptStatsType, + 't|test!' => \$OptTest, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; ### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; +my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')}; ### override configuration via commandline options my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; -$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; -$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; -$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; +$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; +$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; +$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; +$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; +$ConfOverride{'TLH'} = $OptTLH if $OptTLH; &OverrideConfig(\%Conf,\%ConfOverride); ### get type of information to gather, defaulting to 'all' -$Options{'t'} = 'all' if !$Options{'t'}; -die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); +$OptStatsType = 'all' if !$OptStatsType; +&Bleat(2, sprintf("Unknown type '%s'!", $OptStatsType)) + if !exists($LegalStats{$OptStatsType}); -### get time period (-m or -p) -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); +### get time period from --month +# get verbal description of time period, drop SQL code +my ($Period) = &GetTimePeriod($OptMonth); +&Bleat(2,"--month option has an invalid format - please use 'YYYY-MM' or ". + "'YYYY-MM:YYYY-MM'!") if (!$Period or $Period eq 'all time'); ### reformat $Conf{'TLH'} my $TLH; @@ -67,45 +86,55 @@ if ($Conf{'TLH'}) { # strip whitespace $TLH =~ s/\s//g; # check for illegal characters - die "$MySelf: E: Config error - illegal characters in TLH definition\n" if ($TLH !~ /^[a-zA-Z0-9:]+$/); + &Bleat(2,'Config error - illegal characters in TLH definition!') + if ($TLH !~ /^[a-zA-Z0-9:]+$/); if ($TLH =~ /:/) { - # reformat $TLH form a:b to (a)|(b) + # reformat $TLH from a:b to (a)|(b), + # e.g. replace '.' by '|' $TLH =~ s/:/)|(/g; $TLH = '(' . $TLH . ')'; }; }; -### read newsgroups list from -l -my %ValidGroups = %{&ReadGroupList($Options{'l'})} if $Options{'l'}; +# read list of newsgroups from --checkgroups +# into a hash +my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile; ### init database my $DBHandle = InitDB(\%Conf,1); ### get data for each month -warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; -foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { +&Bleat(1,'Test mode. Database is not updated.') if $OptTest; +foreach my $Month (&ListMonth($Period)) { - print "---------- $Month ----------\n" if $Options{'d'}; + print "---------- $Month ----------\n" if $OptDebug; - if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { + if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { ### ---------------------------------------------- ### get groups data (number of postings per group) # get groups data from raw table for given month - my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ". + "WHERE day LIKE ? AND NOT disregard", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') + or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); # count postings per group my %Postings; while (($_) = $DBQuery->fetchrow_array) { # get list oft newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_,$TLH,$Options{'l'} ? \%ValidGroups : ''); + my %Newsgroups = ListNewsgroups($_,$TLH, + $OptCheckgroupsFile ? \%ValidGroups : ''); # count each newsgroup and hierarchy once foreach (sort keys %Newsgroups) { $Postings{$_}++; }; }; - # add valid but empty groups if -l is set + # add valid but empty groups if --checkgroups is set if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { @@ -116,19 +145,29 @@ foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { }; # delete old data for that month - if (!$Options{'o'}) { - $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),undef,$Month) - or warn sprintf("$MySelf: E: Can't delete old groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + if (!$OptTest) { + $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", + $Conf{'DBDatabase'},$Conf{'DBTableGrps'}), + undef,$Month) + or &Bleat(2,sprintf("Can't delete old groups data for %s from %s.%s: ". + "$DBI::errstr\n",$Month, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); }; - print "----- GroupStats -----\n" if $Options{'d'}; + print "----- GroupStats -----\n" if $OptDebug; foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; - if (!$Options{'o'}) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $OptDebug; + if (!$OptTest) { # write to database - $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - # $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s ". + "(month,newsgroup,postings) ". + "VALUES (?, ?, ?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) + or &Bleat(2,sprintf("Can't write groups data for %s/%s to %s.%s: ". + "$DBI::errstr\n",$Month,$Newsgroup, + $Conf{'DBDatabase'},$Conf{'DBTableGrps'})); $DBQuery->finish; }; }; @@ -150,43 +189,31 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-l> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back +See L. =head1 DESCRIPTION This script will extract and process statistical information from a database table which is fed from F for a given time period and write its results to (an)other database table(s). Entries marked -with I<'disregard'> in the database will be ignored; currently, you have -to set this flag yourself, using your database management tools. You -can exclude erroneous entries that way (e.g. automatic reposts (think -of cancels flood and resurrectors); spam; ...). +with I<'disregard'> in the database will be ignored; currently, you +have to set this flag yourself, using your database management tools. +You can exclude erroneous entries that way (e.g. automatic reposts +(think of cancels flood and resurrectors); spam; ...). The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. +another time period or a single month via the B<--month> option (see +below). By default B will process all types of information; you -can change that using the B<-t> switch and assigning the type of -information to process. Currently only processing of the number of -postings per group per month is implemented anyway, so that doesn't -matter yet. +can change that using the B<--stats> option and assigning the type of +information to process. Currently that doesn't matter yet as only +processing of the number of postings per group per month is +implemented anyway. Possible information types include: @@ -205,59 +232,58 @@ respectively. A crossposting to de.alt.test and de.alt.admin, on the other hand, will be counted for de.alt.test and de.alt.admin each, but only once for de.alt.ALL and de.ALL. -Data is written to I (see doc/INSTALL). +Data is written to I (see L); you can +override that default through the B<--groupsdb> option. =back =head2 Configuration -F will read its configuration from F +B will read its configuration from F which should be present in the same directory via Config::Auto. -See doc/INSTALL for an overview of possible configuration options. +See L for an overview of possible configuration options. -You can override configuration options via the B<-n>, B<-r>, B<-g>, -B<-c> and B<-s> switches, respectively. +You can override configuration options via the B<--hierarchy>, +B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, +respectively. =head1 OPTIONS =over 3 -=item B<-V> (version) +=item B<-V>, B<--version> -Print out version and copyright information on B and exit. +Print out version and copyright information and exit. -=item B<-h> (help) +=item B<-h>, B<--help> Print this man page and exit. -=item B<-d> (debug) +=item B<-d>, B<--debug> Output debugging information to STDOUT while processing (number of postings per group). -=item B<-o> (output only) - -Do not write results to database. You should use B<-d> in conjunction -with B<-o> ... everything else seems a bit pointless. +=item B<-t>, B<--test> -=item B<-m> I (month) +Do not write results to database. You should use B<--debug> in +conjunction with B<--test> ... everything else seems a bit pointless. -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. +=item B<-m>, B<--month> I -=item B<-p> I (period) +Set processing period to a single month in YYYY-MM format or to a time +period between two month in YYYY-MM:YYYY-MM format (two month, separated +by a colon). -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. -=item B<-t> I (type) +=item B<-s>, B<--stats> I Set processing type to one of I and I. Defaults to all (and is currently rather pointless as only I has been implemented). -=item B<-l> I (check against list) +=item B<-c>, B<--checkgroups> I Check each group against a list of valid newsgroups read from I, one group on each line and ignoring everything after the @@ -268,23 +294,23 @@ Newsgroups not found in I will be dropped (and logged to STDERR), and newsgroups found in I but having no postings will be added with a count of 0 (and logged to STDERR). -=item B<-n> I (newsgroup hierarchy) +=item B<--hierarchy> I (newsgroup hierarchy) Override I from F. -=item B<-r> I (raw data table) +=item B<--rawdb> I
(raw data table) Override I from F. -=item B<-g> I
(postings per group table) +=item B<--groupsdb> I
(postings per group table) Override I from F. -=item B<-c> I
(client data table) +=item B<--clientsdb> I
(client data table) Override I from F. -=item B<-s> I
(server/host data table) +=item B<--hostsdb> I
(host data table) Override I from F. @@ -292,7 +318,7 @@ Override I from F. =head1 INSTALLATION -See doc/INSTALL. +See L. =head1 EXAMPLES @@ -302,16 +328,16 @@ Process all types of information for lasth month: Do a dry run, showing results of processing: - gatherstats -do + gatherstats --debug --test Process all types of information for January of 2010: - gatherstats -m 2010-01 + gatherstats --month 2010-01 Process only number of postings for the year of 2010, checking against checkgroups-2010.txt: - gatherstats -p 2010-01:2010-12 -t groups -l checkgroups-2010.txt + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt =head1 FILES @@ -327,7 +353,7 @@ Library functions for the NewsStats package. =item F -Runtime configuration file for B. +Runtime configuration file. =back @@ -342,11 +368,11 @@ bug tracker at L! =item - -doc/README +L =item - -doc/INSTALL +L =back @@ -358,7 +384,7 @@ Thomas Hochstein =head1 COPYRIGHT AND LICENSE -Copyright (c) 2010 Thomas Hochstein +Copyright (c) 2010-2012 Thomas Hochstein This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. -- 2.20.1