From: Thomas Hochstein Date: Mon, 1 Jan 2018 15:56:56 +0000 (+0100) Subject: Merge branch 'thh-parsedb' into pu X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=commitdiff_plain;h=84e9923abeadcfb488985b9e60eb78d4eaa950fd;hp=fd0717a15cb1b36e25a019060d3f40aac9ca9517 Merge branch 'thh-parsedb' into pu * thh-parsedb: Add some input validation. Add documentation to parsedb.pl. Handle more than one entitiy in From: etc. Let gatherstats read its data from DBTableParse. Add decoding and parsing of From: etc. Create a database table with parsed raw data. Make GetTimePeriod() and others accept days. # Conflicts: # bin/gatherstats.pl --- diff --git a/bin/gatherstats.pl b/bin/gatherstats.pl old mode 100755 new mode 100644 index b4f07d9..b09b73f --- a/bin/gatherstats.pl +++ b/bin/gatherstats.pl @@ -3,7 +3,7 @@ # gatherstats.pl # # This script will gather statistical information from a database -# containing headers and other information from a INN feed. +# containing headers and other information from an INN feed. # # It is part of the NewsStats package. # @@ -38,7 +38,7 @@ my %LegalStats; ### read commandline options my ($OptCheckgroupsFile,$OptClientsDB,$OptDebug,$OptGroupsDB,$OptTLH, - $OptHostsDB,$OptMonth,$OptRawDB,$OptStatsType,$OptTest,$OptConfFile); + $OptHostsDB,$OptMonth,$OptParseDB,$OptStatsType,$OptTest,$OptConfFile); GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, 'clientsdb=s' => \$OptClientsDB, 'd|debug!' => \$OptDebug, @@ -46,7 +46,7 @@ GetOptions ('c|checkgroups=s' => \$OptCheckgroupsFile, 'hierarchy=s' => \$OptTLH, 'hostsdb=s' => \$OptHostsDB, 'm|month=s' => \$OptMonth, - 'rawdb=s' => \$OptRawDB, + 'parsedb=s' => \$OptParseDB, 's|stats=s' => \$OptStatsType, 't|test!' => \$OptTest, 'conffile=s' => \$OptConfFile, @@ -58,7 +58,7 @@ my %Conf = %{ReadConfig($OptConfFile)}; ### override configuration via commandline options my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; +$ConfOverride{'DBTableParse'} = $OptParseDB if $OptParseDB; $ConfOverride{'DBTableGrps'} = $OptGroupsDB if $OptGroupsDB; $ConfOverride{'DBTableClnts'} = $OptClientsDB if $OptClientsDB; $ConfOverride{'DBTableHosts'} = $OptHostsDB if $OptHostsDB; @@ -124,15 +124,15 @@ foreach my $Month (&ListMonth($Period)) { ### ---------------------------------------------- ### get groups data (number of postings per group) - # get groups data from raw table for given month + # get groups data from parsed table for given month my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s ". "WHERE day LIKE ? AND NOT disregard", $Conf{'DBDatabase'}, - $Conf{'DBTableRaw'})); + $Conf{'DBTableParse'})); $DBQuery->execute($Month.'-%') or &Bleat(2,sprintf("Can't get groups data for %s from %s.%s: ". "$DBI::errstr\n",$Month, - $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + $Conf{'DBDatabase'},$Conf{'DBTableParse'})); # count postings per group my %Postings; @@ -206,11 +206,11 @@ __END__ =head1 NAME -gatherstats - process statistical data from a raw source +gatherstats - process statistical data from a parsed source =head1 SYNOPSIS -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I] [B<-c> I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] [B<--conffile> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I] [B<-c> I]] [B<--hierarchy> I] [B<--parsedb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] [B<--conffile> I] =head1 REQUIREMENTS @@ -219,7 +219,7 @@ See L. =head1 DESCRIPTION This script will extract and process statistical information from a -database table which is fed from F for a given time period +database table which is filled from F for a given time period and write its results to (an)other database table(s). Entries marked with I<'disregard'> in the database will be ignored; currently, you have to set this flag yourself, using your database management tools. @@ -267,7 +267,7 @@ submitted by the B<--conffile> option. See L for an overview of possible configuration options. You can override configuration options via the B<--hierarchy>, -B<--rawdb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, +B<--parsedb>, B<--groupsdb>, B<--clientsdb> and B<--hostsdb> options, respectively. =head1 OPTIONS @@ -328,9 +328,9 @@ will be added with a count of 0 (and logged to STDERR). Override I from F. -=item B<--rawdb> I (raw data table) +=item B<--parsedb> I
(parsed data table) -Override I from F. +Override I from F. =item B<--groupsdb> I
(postings per group table) diff --git a/bin/parsedb.pl b/bin/parsedb.pl new file mode 100755 index 0000000..b4c2056 --- /dev/null +++ b/bin/parsedb.pl @@ -0,0 +1,425 @@ +#! /usr/bin/perl +# +# parsedb.pl +# +# This script will parse a database with raw header information +# from a INN feed to a structured database. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2013 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats qw(:DEFAULT :TimePeriods :SQLHelper); + +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +use Encode qw/decode/; +use Mail::Address; + +################################# Definitions ################################## + +# define header names with separate database fields +my %DBFields = ('date' => 'date', + 'references' => 'refs', + 'followup-to' => 'fupto', + 'from' => 'from_', + 'sender' => 'sender', + 'reply-to' => 'replyto', + 'subject' => 'subject', + 'organization' => 'organization', + 'lines' => 'linecount', + 'approved' => 'approved', + 'supersedes' => 'supersedes', + 'expires' => 'expires', + 'user-agent' => 'useragent', + 'x-newsreader' => 'xnewsreader', + 'x-mailer' => 'xmailer', + 'x-no-archive' => 'xnoarchive', + 'content-type' => 'contenttype', + 'content-transfer-encoding' => 'contentencoding', + 'cancel-lock' => 'cancellock', + 'injection-info' => 'injectioninfo', + 'x-trace' => 'xtrace', + 'nntp-posting-host' => 'postinghost'); + +# define field list for database +my @DBFields = qw/day mid refs date path newsgroups fupto from_ from_parsed + from_name from_address sender sender_parsed sender_name + sender_address replyto replyto_parsed replyto_name + replyto_address subject subject_parsed organization linecount + approved supersedes expires useragent xnewsreader xmailer + xnoarchive contenttype contentencoding cancellock injectioninfo + xtrace postinghost headers disregard/; + +################################# Main program ################################# + +### read commandline options +my ($OptDay,$OptDebug,$OptParseDB,$OptRawDB,$OptTest,$OptConfFile); +GetOptions ('d|day=s' => \$OptDay, + 'debug!' => \$OptDebug, + 'parsedb=s' => \$OptParseDB, + 'rawdb=s' => \$OptRawDB, + 't|test!' => \$OptTest, + 'conffile=s' => \$OptConfFile, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; + +### read configuration +my %Conf = %{ReadConfig($OptConfFile)}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableRaw'} = $OptRawDB if $OptRawDB; +$ConfOverride{'DBTableParse'} = $OptParseDB if $OptParseDB; +&OverrideConfig(\%Conf,\%ConfOverride); + +### get time period +### and set $Period for output and expression for SQL 'WHERE' clause +my ($Period,$SQLWherePeriod) = &GetTimePeriod($OptDay,'day'); +# bail out if --month is invalid or "all" +&Bleat(2,"--day option has an invalid format - please use 'YYYY-MM-DD' or ". + "'YYYY-MM-DD:YYYY-MM-DD'!") if (!$Period or $Period eq 'all time'); + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get & write data +&Bleat(1,'Test mode. Database is not updated.') if $OptTest; + +# create $SQLWhereClause +my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,'NOT disregard'); + +# delete old data for current period +if (!$OptTest) { + print "----------- Deleting old data ... -----------\n" if $OptDebug; + my $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s %s", + $Conf{'DBDatabase'},$Conf{'DBTableParse'}, + $SQLWhereClause)) + or &Bleat(2,sprintf("Can't delete old parsed data for %s from %s.%s: ". + "$DBI::errstr\n",$Period, + $Conf{'DBDatabase'},$Conf{'DBTableParse'})); +}; + +# read from DBTableRaw +print "-------------- Reading data ... -------------\n" if $OptDebug; +my $DBQuery = $DBHandle->prepare(sprintf("SELECT id, day, mid, peer, path, ". + "newsgroups, headers, disregard ". + "FROM %s.%s %s", $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'}, $SQLWhereClause)); +$DBQuery->execute() + or &Bleat(2,sprintf("Can't get data for %s from %s.%s: ". + "$DBI::errstr\n",$Period, + $Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + +# set output and database connection to UTF-8 +# as we're going to write decoded header contents containing UTF-8 chars +binmode(STDOUT, ":utf8"); +$DBHandle->do("SET NAMES 'utf8'"); + +# create a list of supported encondings +my %LegalEncodings; +foreach (Encode->encodings()) { + $LegalEncodings{$_} = 1; +} +# parse data in a loop and write it out +print "-------------- Parsing data ... -------------\n" if $OptDebug; +while (my $HeadersR = $DBQuery->fetchrow_hashref) { + my %Headers = %{$HeadersR}; + + # parse $Headers{'headers'} ('headers' from DBTableRaw) + # remove empty lines (that should not even exist in a header!) + $Headers{'headers'} =~ s/\n\s*\n/\n/g; + # merge continuation lines + # from Perl Cookbook, 1st German ed. 1999, pg. 91 + $Headers{'headers'} =~ s/\n\s+/ /g; + # split headers in single lines + my $OtherHeaders; + for (split(/\n/,$Headers{'headers'})) { + # split header lines in header name and header content + my ($key,$value); + if ($_ =~ /:/) { + ($key,$value) = split(/:/,$_,2); + $key =~ s/\s*//; + $value =~ s/^\s*(.+)\s*$/$1/; + } else { + &Bleat(1,sprintf("Illegal header line in %s.%s id %s: %s", + $Conf{'DBDatabase'}, $Conf{'DBTableRaw'}, + $Headers{'id'},$_)); + next; + } + # check for empty (mandatory) fields from DBTableRaw + # and set them from $Headers{'headers', if necessary + if (lc($key) =~ /^(message-id|path|newsgroups)$/) { + my $HeaderName = lc($key); + $HeaderName = 'mid' if ($HeaderName eq 'message-id'); + if (!defined($Headers{$HeaderName}) or $Headers{$HeaderName} eq '') { + $Headers{$HeaderName} = $value; + &Bleat(1,sprintf("Taking missing %s from 'headers' in %s.%s id %s.", + $HeaderName, $Conf{'DBDatabase'}, $Conf{'DBTableRaw'}, + $Headers{'id'})); + } + } + # save each header, separate database fields in %Headers, + # the rest in $OtherHeaders (but not Message-ID, Path, Peer + # and Newsgroups as those do already exist) + if (defined($DBFields{lc($key)})) { + $Headers{$DBFields{lc($key)}} = $value; + } else { + $OtherHeaders .= sprintf("%s: %s\n",$key,$value) + if lc($key) !~ /^(message-id|path|peer|newsgroups)$/; + } + } + # replace old (now parsed) $Headers{'headers'} with remanining $OtherHeaders + chomp($OtherHeaders); + $Headers{'headers'} = $OtherHeaders; + + foreach ('from_','sender', 'replyto', 'subject') { + if ($Headers{$_}) { + my $HeaderName = $_; + $HeaderName =~ s/_$//; + # decode From: / Sender: / Reply-To: / Subject: + if ($Headers{$_} =~ /\?(B|Q)\?/) { + # check for legal encoding and decode + (my $Encoding) = $Headers{$_} =~ /\?([^?]+)\?(B|Q)\?/; + $Headers{$HeaderName.'_parsed'} = decode('MIME-Header',$Headers{$_}) + if (exists($LegalEncodings{$Encoding})); + } + # extract name(s) and mail(s) from From: / Sender: / Reply-To: + # in parsed form, if available + if ($_ ne 'subject') { + my @Address; + # start parser on header or parsed header + # @Address will have an array of Mail::Address objects, one for + # each name/mail (you can have more than one person in From:!) + if (defined($Headers{$HeaderName.'_parsed'})) { + @Address = Mail::Address->parse($Headers{$HeaderName.'_parsed'}); + } else { + @Address = Mail::Address->parse($Headers{$_}); + } + # split each Mail::Address object to @Names and @Adresses + my (@Names,@Adresses); + foreach (@Address) { + # take address part in @Addresses + push (@Adresses, $_->address()); + # take name part form "phrase", if there is one: + # From: My Name (Comment) + # otherwise, take it from "comment": + # From: addr@ess (Comment) + # and push it in @Names + my ($Name); + $Name = $_->comment() unless $Name = $_->phrase; + $Name =~ s/^\((.+)\)$/$1/; + push (@Names, $Name); + } + # put all @Adresses and all @Names in %Headers as comma separated lists + $Headers{$HeaderName.'_address'} = join(', ',@Adresses); + $Headers{$HeaderName.'_name'} = join(', ',@Names); + } + } + } + + # order output for database entry: fill @SQLBindVars + print "-------------- Next entry:\n" if $OptDebug; + my @SQLBindVars; + foreach (@DBFields) { + if (defined($Headers{$_}) and $Headers{$_} ne '') { + push (@SQLBindVars,$Headers{$_}); + printf ("FOUND: %s -> %s\n",$_,$Headers{$_}) if $OptDebug; + } else { + push (@SQLBindVars,undef); + } + } + + # write data to DBTableParse + if (!$OptTest) { + print "-------------- Writing data ... -------------\n" if $OptDebug; + my $DBWrite = + $DBHandle->prepare(sprintf("INSERT INTO %s.%s (%s) VALUES (%s)", + $Conf{'DBDatabase'}, + $Conf{'DBTableParse'}, + # get field names from @DBFields + join(', ',@DBFields), + # create a list of '?' for each DBField + join(', ', + split(/ /,'? ' x scalar(@DBFields))) + )); + $DBWrite->execute(@SQLBindVars) + or &Bleat(2,sprintf("Can't write parsed data for %s to %s.%s: ". + "$DBI::errstr\n",$Period, + $Conf{'DBDatabase'},$Conf{'DBTableParse'})); + $DBWrite->finish; + } +}; +$DBQuery->finish; + +### close handles +$DBHandle->disconnect; + +print "------------------- DONE! -------------------\n" if $OptDebug; +__END__ + +################################ Documentation ################################# + +=head1 NAME + +parsedb - parse raw data and save it to a database + +=head1 SYNOPSIS + +B [B<-Vht>] [B<--day> I | I] [B<--rawdb> I] [B<--parsedb> I] [B<--conffile> I] [B<--debug>] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script will parse raw, unstructured headers from a database table which is +fed from F for a given time period and write its results to +nother database table with separate fields (columns) for most (or even all) +relevant headers. + +I, I, I and I will be parsed from MIME +encoded words to UTF-8 as needed while the unparsed copy is kept. From that +parsed copy, I, I and I will also be split into +separate name(s) and address(es) fields while the un-splitted copy is kept, +too. + +B should be run nightly from cron for yesterdays data so all +other scripts get current information. The time period to act on defaults to +yesterday, accordingly; you can assign another time period or a single day via +the B<--day> option (see below). + +=head2 Configuration + +B will read its configuration from F +should be present in etc/ via Config::Auto or from a configuration file +submitted by the B<--conffile> option. + +See L for an overview of possible configuration options. + +You can override configuration options via the B<--rawdb> and +B<--parsedb> options, respectively. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<--debug> + +Output (rather much) debugging information to STDOUT while processing. + +=item B<-t>, B<--test> + +Do not write results to database. You should use B<--debug> in +conjunction with B<--test> ... everything else seems a bit pointless. + +=item B<-d>, B<--day> I + +Set processing period to a single day in YYYY-MM-DD format or to a time +period between two days in YYYY-MM-DD:YYYY-MM-DD format (two days, separated +by a colon). + +Defaults to yesterday. + +=item B<--rawdb> I
(raw data table) + +Override I from F. + +=item B<--parsedb> I
(parsed data table) + +Override I from F. + +=item B<--conffile> I + +Load configuration from I instead of F. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +An example crontab entry: + + 0 1 * * * /path/to/bin/parsedb.pl + +Do a dry run for yesterday's data, showing results of processing: + + parsedb --debug --test | less + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +L + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2013 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/doc/INSTALL b/doc/INSTALL index 307ec0e..626fb6d 100644 --- a/doc/INSTALL +++ b/doc/INSTALL @@ -13,6 +13,8 @@ INSTALLATION INSTRUCTIONS 1) Install the scripts + * Get INN, mysql, Perl, and the necessary modules installed (see README). + * Download the current version of NewsStats from . diff --git a/doc/README b/doc/README index e809cea..0ccfad5 100644 --- a/doc/README +++ b/doc/README @@ -47,6 +47,7 @@ Prerequisites - Config::Auto - Date::Format - DBI + - Mail::Address * mysql 5.0.x diff --git a/etc/newsstats.conf.sample b/etc/newsstats.conf.sample index 3133ed2..19a9d67 100644 --- a/etc/newsstats.conf.sample +++ b/etc/newsstats.conf.sample @@ -4,13 +4,14 @@ # DBDriver = mysql DBHost = localhost -DBUser = -DBPw = +DBUser = +DBPw = DBDatabase = newsstats # # tables # DBTableRaw = raw_de +DBTableParse = parsed_de DBTableGrps = groups_de #DBTableClnts = #DBTableHosts = diff --git a/install/install.pl b/install/install.pl index e2acf66..5116881 100755 --- a/install/install.pl +++ b/install/install.pl @@ -47,7 +47,7 @@ my $DBCreate = < < < < < < 'equal'); my %Conf = %{$ConfR}; @@ -265,33 +265,39 @@ sub ReadGroupList { ################################################################################ sub GetTimePeriod { ################################################################################ -### get a time period to act on from --month option; -### if empty, default to last month -### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' +### get a time period to act on from --month / --day option; +### if empty, default to last month / day +### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)' +### or 'all' +### $Type : may be 'month' or 'day' ### OUT: $Verbal,$SQL: verbal description and WHERE-clause ### of the chosen time period - my ($Month) = @_; + my ($Period,$Type) = @_; # define result variables my ($Verbal, $SQL); - # define a regular expression for a month - my $REMonth = '\d{4}-\d{2}'; - - # default to last month if option is not set - if(!$Month) { - $Month = &LastMonth; + # check $Type + $Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day')); + # define a regular expressions for a month or day + my $REPeriod = '\d{4}-\d{2}'; + $REPeriod .= '-\d{2}' if ($Type eq 'day'); + + # default to last month / day if option is not set + if(!$Period) { + $Period = &LastMonthDay($Type); } # check for valid input - if ($Month =~ /^$REMonth$/) { - # single month (YYYY-MM) - ($Month) = &CheckMonth($Month); - $Verbal = $Month; - $SQL = sprintf("month = '%s'",$Month); - } elsif ($Month =~ /^$REMonth:$REMonth$/) { - # time period (YYYY-MM:YYYY-MM) - $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); - $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); - } elsif ($Month =~ /^all$/i) { + if ($Period =~ /^$REPeriod$/) { + # single month/day [YYYY-MM(-DD)] + ($Period) = &CheckPeriod($Type,$Period); + $Verbal = $Period; + $SQL = sprintf("%s = '%s'",$Type,$Period); + } elsif ($Period =~ /^$REPeriod:$REPeriod$/) { + # time period [YYYY-MM(-DD):YYYY-MM(-DD)] + $Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type)); + $SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type, + &SplitPeriod($Period,$Type)); + } elsif ($Period =~ /^all$/i) { # special case: ALL $Verbal = 'all time'; $SQL = ''; @@ -304,58 +310,82 @@ sub GetTimePeriod { }; ################################################################################ -sub LastMonth { +sub LastMonthDay { ################################################################################ -### get last month from todays date in YYYY-MM format -### OUT: last month as YYYY-MM - # get today's date - my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); - # $Month is already defined from 0 to 11, so no need to decrease it by 1 +### get last month/day from todays date in YYYY-MM format +### IN : $Type : may be 'month' or 'day' +### OUT: last month/day as YYYY-MM(-DD) + my ($Type) = @_; + my ($Day,$Month,$Year); + if ($Type eq 'day') { + # get yesterdays's date + (undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400); + # $Month is defined from 0 to 11, so add 1 + $Month++; + } else { + # get today's date (month and year) + (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); + # $Month is already defined from 0 to 11, so no need to decrease it by 1 + if ($Month < 1) { + $Month = 12; + $Year--; + }; + } $Year += 1900; - if ($Month < 1) { - $Month = 12; - $Year--; - }; - # return last month - return sprintf('%4d-%02d',$Year,$Month); + # return last month / day + if ($Type eq 'day') { + return sprintf('%4d-%02d-%02d',$Year,$Month,$Day); + } else { + return sprintf('%4d-%02d',$Year,$Month); + } }; ################################################################################ -sub CheckMonth { +sub CheckPeriod { ################################################################################ -### check if input (in YYYY-MM form) is valid with MM between 01 and 12; +### check if input (in YYYY-MM(-DD) form) is a valid month / day; ### otherwise, fix it -### IN : @Month: array of month -### OUT: @Month: a valid month - my (@Month) = @_; - foreach my $Month (@Month) { - my ($OldMonth) = $Month; - my ($CalMonth) = substr ($Month, -2); - if ($CalMonth < 1 or $CalMonth > 12) { +### IN : $Type : may be 'month' or 'day' +### @Period: array of month/day +### OUT: @Period: a valid month/day + my ($Type,@Period) = @_; + foreach my $Period (@Period) { + my ($OldPeriod) = $Period; + my ($CalMonth,$CalDay); + $Period .= '-01' if ($Type eq 'month'); + $CalDay = substr ($Period, -2); + $CalMonth = substr ($Period, 5, 2); + if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) { $CalMonth = '12' if $CalMonth > 12; $CalMonth = '01' if $CalMonth < 1; - substr($Month, -2) = $CalMonth; - &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". - "and '12'), set to '%s'.",$OldMonth,$Month)); + substr($Period, 5, 2) = $CalMonth; + $CalDay = '01' if $CalDay < 1; + $CalDay = '31' if $CalDay > 31; + # FIXME! - month with less than 31 days ... + substr($Period, -2) = $CalDay; + &Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.", + $OldPeriod,$Period)); } + $Period = substr($Period,0,7) if ($Type eq 'month'); } - return @Month; + return @Period; }; ################################################################################ sub SplitPeriod { ################################################################################ -### split a time period denoted by YYYY-MM:YYYY-MM into start and end month +### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end ### IN : $Period: time period -### OUT: $StartMonth, $EndMonth - my ($Period) = @_; - my ($StartMonth, $EndMonth) = split /:/, $Period; - ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); +### $Type : may be 'month' or 'day' +### OUT: $StartTime, $EndTime + my ($Period,$Type) = @_; + my ($StartTime, $EndTime) = split /:/, $Period; + ($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime); # switch parameters as necessary - if ($EndMonth gt $StartMonth) { - return ($StartMonth, $EndMonth); + if ($EndTime gt $StartTime) { + return ($StartTime, $EndTime); } else { - return ($EndMonth, $StartMonth); + return ($EndTime, $StartTime); }; };