# gatherstats.pl
#
# This script will gather statistical information from a database
- # containing headers and other information from a INN feed.
+ # containing headers and other information from an INN feed.
#
# It is part of the NewsStats package.
#
# which Perl itself is published.
BEGIN {
- our $VERSION = "0.01";
+ our $VERSION = "0.02";
use File::Basename;
# we're in .../bin, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
### read commandline options
my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH,
- $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,$OptConfFile);
+ $OptHostsDB,$OptMonth,$OptParseDB,$OptStatsType,$OptTest,$OptConfFile);
GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile,
'clientsdb=s' => \$OptClientsDB,
'd|debug!' => \$OptDebug,
'hierarchy=s' => \$OptTLH,
'hostsdb=s' => \$OptHostsDB,
'm|month=s' => \$OptMonth,
- 'rawdb=s' => \$OptRawDB,
+ 'parsedb=s' => \$OptParseDB,
's|stats=s' => \$OptStatsType,
't|test!' => \$OptTest,
'conffile=s' => \$OptConfFile,
### override configuration via commandline options
my %ConfOverride;
- $ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB;
+ $ConfOverride{'DBTableParse'} = $OptParseDB if $OptParseDB;
$ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB;
$ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB;
$ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB;
### ----------------------------------------------
### get groups data (number of postings per group)
- # get groups data from raw table for given month
+ # get groups data from parsed table for given month
my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ".
"WHERE day LIKE ? AND NOT disregard",
$Conf{'DBDatabase'},
- $Conf{'DBTableRaw'}));
+ $Conf{'DBTableParse'}));
$DBQuery->execute($Month.'-%')
or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ".
"$DBI::errstr\n",$Month,
- $Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
+ $Conf{'DBDatabase'},$Conf{'DBTableParse'}));
# count postings per group
my %Postings;
=head1 NAME
- gatherstats - process statistical data from a raw source
+ gatherstats - process statistical data from a parsed source
=head1 SYNOPSIS
- B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>]
-B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--parsedb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [--conffile I<filename>]
++B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats>] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--parsedb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
=head1 DESCRIPTION
This script will extract and process statistical information from a
- database table which is fed from F<feedlog.pl> for a given time period
+ database table which is filled from F<parsedb.pl> for a given time period
and write its results to (an)other database table(s). Entries marked
with I<'disregard'> in the database will be ignored; currently, you
have to set this flag yourself, using your database management tools.
=head2 Configuration
B<gatherstats> will read its configuration from F<newsstats.conf>
-which should be present in the same directory via Config::Auto.
+which should be present in etc/ via Config::Auto or from a configuration file
+submitted by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options.
You can override configuration options via the B<--hierarchy>,
- B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
+ B<--parsedb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options,
respectively.
=head1 OPTIONS
Override I<TLH> from F<newsstats.conf>.
- =item B<--rawdb> I<table> (raw data table)
+ =item B<--parsedb> I<table> (parsed data table)
- Override I<DBTableRaw> from F<newsstats.conf>.
+ Override I<DBTableParse> from F<newsstats.conf>.
=item B<--groupsdb> I<table> (postings per group table)
=head1 COPYRIGHT AND LICENSE
-Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
1) Install the scripts
+ * Get INN, mysql, Perl, and the necessary modules installed (see README).
+
* Download the current version of NewsStats from
<http://th-h.de/download/scripts.php>.
* Copy the sample configuration file newsstats.conf.sample to
newsstats.conf and modify it for your purposes:
- # cp newsstats.conf.sample newsstats.conf
- # vim newsstats.conf
+ # cp etc/newsstats.conf.sample etc/newsstats.conf
+ # vim etc/newsstats.conf
a) Mandatory configuration options
* DBDriver = mysql
Database driver used; currently only mysql is supported.
-
+
* DBHost = localhost
The host your mysql server is running on.
* DBUser =
The username to connect to the database server.
- * DBPw =
+ * DBPw =
Matching password for your username.
* DBDatabase = newsstats
* TLH = de
Limit examination to that top-level hierarchy.
-
+
3) Database (mysql) setup
* Setup your database server with a username, password and
database matching the NewsStats configuration (see 2 a).
* Start the installation script:
-
+
# install/install.pl
- It will setup the necessary database tables and display some
+ It will setup the necessary database tables and display some
information on the next steps.
4) Feed (INN) setup
# which Perl itself is published.
BEGIN {
- our $VERSION = "0.01";
+ our $VERSION = "0.02";
use File::Basename;
# we're in .../install, so our module is in ../lib
push(@INC, dirname($0).'/../lib');
use NewsStats qw(:DEFAULT);
-use Cwd;
-
use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');
'h|help' => \&ShowPOD,
'V|version' => \&ShowVersion) or exit 1;
-### change working directory to .. (as we're in .../install)
-chdir dirname($FullPath).'/..';
-my $Path = cwd();
-
### read configuration
print("Reading configuration.\n");
my %Conf = %{ReadConfig($OptConfFile)};
CREATE DATABASE IF NOT EXISTS `$Conf{'DBDatabase'}` DEFAULT CHARSET=utf8;
SQLDB
- my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);
+ my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableParse' => <<PARSE, 'DBTableGrps' => <<GRPS);
--
-- Table structure for table DBTableRaw
--
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';
RAW
--
+ -- Table structure for table DBTableParse
+ --
+
+ CREATE TABLE IF NOT EXISTS `$Conf{'DBTableParse'}` (
+ `id` bigint(20) unsigned NOT NULL auto_increment,
+ `day` date NOT NULL,
+ `mid` varchar(250) character set ascii NOT NULL,
+ `refs` varchar(1000) character set ascii,
+ `date` varchar(100) NOT NULL,
+ `path` varchar(1000) NOT NULL,
+ `newsgroups` varchar(1000) NOT NULL,
+ `fupto` varchar(200),
+ `from_` varchar(500),
+ `from_parsed` varchar(200),
+ `from_name` varchar(200),
+ `from_address` varchar(200),
+ `sender` varchar(500),
+ `sender_parsed` varchar(200),
+ `sender_name` varchar(200),
+ `sender_address` varchar(200),
+ `replyto` varchar(500),
+ `replyto_parsed` varchar(200),
+ `replyto_name` varchar(200),
+ `replyto_address` varchar(200),
+ `subject` varchar(1000) NOT NULL,
+ `subject_parsed` varchar(1000),
+ `organization` varchar(1000),
+ `linecount` int(4) unsigned,
+ `approved` varchar(250),
+ `supersedes` varchar(250),
+ `expires` varchar(100),
+ `useragent` varchar(500),
+ `xnewsreader` varchar(500),
+ `xmailer` varchar(500),
+ `xnoarchive` varchar(100),
+ `contenttype` varchar(500),
+ `contentencoding` varchar(500),
+ `cancellock` varchar(500),
+ `injectioninfo` varchar(500),
+ `xtrace` varchar(500),
+ `postinghost` varchar(1000),
+ `headers` longtext,
+ `disregard` tinyint(1) default '0',
+ PRIMARY KEY (`id`),
+ KEY `day` (`day`),
+ KEY `mid` (`mid`),
+ KEY `newsgroups` (`newsgroups`)
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Parsed data';
+ PARSE
+ --
-- Table structure for table DBTableGrps
--
## gather statistics for NewsStats
newsstats!\\
:!*,de.*\\
- :Tc,WmtfbsPNH,Ac:$Path/feedlog.pl
+ :Tc,WmtfbsPNH,Ac:$HomePath/bin/feedlog.pl
Please
=head1 SYNOPSIS
-B<install> [B<-Vh> [--update I<version>] [--conffile I<filename>]
+B<install> [B<-Vh> [--update I<version>] [B<--conffile> I<filename>]
=head1 REQUIREMENTS
=head2 Configuration
-B<install> will read its configuration from F<newsstats.conf> via
-Config::Auto.
+B<install> will read its configuration from F<newsstats.conf> which should
+be present in etc/ via Config::Auto or from a configuration file submitted
+by the B<--conffile> option.
See L<doc/INSTALL> for an overview of possible configuration options.
=head1 COPYRIGHT AND LICENSE
-Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.
Output => [qw(OutputData FormatOutput)],
SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
SQLSetBounds SQLBuildClause GetMaxLength)]);
-$VERSION = '0.01';
-our $PackageVersion = '0.01';
+$VERSION = '0.02';
+our $PackageVersion = '0.02';
use Data::Dumper;
use File::Basename;
################################################################################
### display version and exit
print "NewsStats v$PackageVersion\n$MyVersion\n";
- print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
+ print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@inter.net>\n";
print "This program is free software; you may redistribute it ".
"and/or modify it under the same terms as Perl itself.\n";
exit(100);
$ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
# mandatory configuration options
my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
- 'DBTableRaw','DBTableGrps');
+ 'DBTableRaw','DBTableParse','DBTableGrps');
# read config via Config::Auto
my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
my %Conf = %{$ConfR};
################################################################################
sub GetTimePeriod {
################################################################################
- ### get a time period to act on from --month option;
- ### if empty, default to last month
- ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all'
+ ### get a time period to act on from --month / --day option;
+ ### if empty, default to last month / day
+ ### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)'
+ ### or 'all'
+ ### $Type : may be 'month' or 'day'
### OUT: $Verbal,$SQL: verbal description and WHERE-clause
### of the chosen time period
- my ($Month) = @_;
+ my ($Period,$Type) = @_;
# define result variables
my ($Verbal, $SQL);
- # define a regular expression for a month
- my $REMonth = '\d{4}-\d{2}';
+ # check $Type
+ $Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day'));
+ # define a regular expressions for a month or day
+ my $REPeriod = '\d{4}-\d{2}';
+ $REPeriod .= '-\d{2}' if ($Type eq 'day');
- # default to last month if option is not set
- if(!$Month) {
- $Month = &LastMonth;
+ # default to last month / day if option is not set
+ if(!$Period) {
+ $Period = &LastMonthDay($Type);
}
# check for valid input
- if ($Month =~ /^$REMonth$/) {
- # single month (YYYY-MM)
- ($Month) = &CheckMonth($Month);
- $Verbal = $Month;
- $SQL = sprintf("month = '%s'",$Month);
- } elsif ($Month =~ /^$REMonth:$REMonth$/) {
- # time period (YYYY-MM:YYYY-MM)
- $Verbal = sprintf('%s to %s',&SplitPeriod($Month));
- $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month));
- } elsif ($Month =~ /^all$/i) {
+ if ($Period =~ /^$REPeriod$/) {
+ # single month/day [YYYY-MM(-DD)]
+ ($Period) = &CheckPeriod($Type,$Period);
+ $Verbal = $Period;
+ $SQL = sprintf("%s = '%s'",$Type,$Period);
+ } elsif ($Period =~ /^$REPeriod:$REPeriod$/) {
+ # time period [YYYY-MM(-DD):YYYY-MM(-DD)]
+ $Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type));
+ $SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type,
+ &SplitPeriod($Period,$Type));
+ } elsif ($Period =~ /^all$/i) {
# special case: ALL
$Verbal = 'all time';
$SQL = '';
};
################################################################################
- sub LastMonth {
- ################################################################################
- ### get last month from todays date in YYYY-MM format
- ### OUT: last month as YYYY-MM
- # get today's date
- my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
- # $Month is already defined from 0 to 11, so no need to decrease it by 1
+ sub LastMonthDay {
+ ################################################################################
+ ### get last month/day from todays date in YYYY-MM format
+ ### IN : $Type : may be 'month' or 'day'
+ ### OUT: last month/day as YYYY-MM(-DD)
+ my ($Type) = @_;
+ my ($Day,$Month,$Year);
+ if ($Type eq 'day') {
+ # get yesterdays's date
+ (undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
+ # $Month is defined from 0 to 11, so add 1
+ $Month++;
+ } else {
+ # get today's date (month and year)
+ (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
+ # $Month is already defined from 0 to 11, so no need to decrease it by 1
+ if ($Month < 1) {
+ $Month = 12;
+ $Year--;
+ };
+ }
$Year += 1900;
- if ($Month < 1) {
- $Month = 12;
- $Year--;
- };
- # return last month
- return sprintf('%4d-%02d',$Year,$Month);
+ # return last month / day
+ if ($Type eq 'day') {
+ return sprintf('%4d-%02d-%02d',$Year,$Month,$Day);
+ } else {
+ return sprintf('%4d-%02d',$Year,$Month);
+ }
};
################################################################################
- sub CheckMonth {
+ sub CheckPeriod {
################################################################################
- ### check if input (in YYYY-MM form) is valid with MM between 01 and 12;
+ ### check if input (in YYYY-MM(-DD) form) is a valid month / day;
### otherwise, fix it
- ### IN : @Month: array of month
- ### OUT: @Month: a valid month
- my (@Month) = @_;
- foreach my $Month (@Month) {
- my ($OldMonth) = $Month;
- my ($CalMonth) = substr ($Month, -2);
- if ($CalMonth < 1 or $CalMonth > 12) {
+ ### IN : $Type : may be 'month' or 'day'
+ ### @Period: array of month/day
+ ### OUT: @Period: a valid month/day
+ my ($Type,@Period) = @_;
+ foreach my $Period (@Period) {
+ my ($OldPeriod) = $Period;
+ my ($CalMonth,$CalDay);
+ $Period .= '-01' if ($Type eq 'month');
+ $CalDay = substr ($Period, -2);
+ $CalMonth = substr ($Period, 5, 2);
+ if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
$CalMonth = '12' if $CalMonth > 12;
$CalMonth = '01' if $CalMonth < 1;
- substr($Month, -2) = $CalMonth;
- &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ".
- "and '12'), set to '%s'.",$OldMonth,$Month));
+ substr($Period, 5, 2) = $CalMonth;
+ $CalDay = '01' if $CalDay < 1;
+ $CalDay = '31' if $CalDay > 31;
+ # FIXME! - month with less than 31 days ...
+ substr($Period, -2) = $CalDay;
+ &Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
+ $OldPeriod,$Period));
}
+ $Period = substr($Period,0,7) if ($Type eq 'month');
}
- return @Month;
+ return @Period;
};
################################################################################
sub SplitPeriod {
################################################################################
- ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month
+ ### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end
### IN : $Period: time period
- ### OUT: $StartMonth, $EndMonth
- my ($Period) = @_;
- my ($StartMonth, $EndMonth) = split /:/, $Period;
- ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth);
+ ### $Type : may be 'month' or 'day'
+ ### OUT: $StartTime, $EndTime
+ my ($Period,$Type) = @_;
+ my ($StartTime, $EndTime) = split /:/, $Period;
+ ($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
# switch parameters as necessary
- if ($EndMonth gt $StartMonth) {
- return ($StartMonth, $EndMonth);
+ if ($EndTime gt $StartTime) {
+ return ($StartTime, $EndTime);
} else {
- return ($EndMonth, $StartMonth);
+ return ($EndTime, $StartTime);
};
};