X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=NewsStats.pm;h=a23777482bc23e0f7d4ae8a0460c0a38d7e7a3e7;hp=bdfdcf5ab6762d5df383ae9647bef19d117b55d3;hb=07c0b2589af779c33d5d35b6a7fa0e7883201674;hpb=a915469e0ccd46be3a8b293998ac7e110c896096;ds=sidebyside diff --git a/NewsStats.pm b/NewsStats.pm index bdfdcf5..a237774 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -2,7 +2,7 @@ # # Library functions for the NewsStats package. # -# Copyright (c) 2010-2012 Thomas Hochstein +# Copyright (c) 2010-2013 Thomas Hochstein # # This module can be redistributed and/or modified under the same terms under # which Perl itself is published. @@ -33,6 +33,7 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ParseHierarchies ReadGroupList OutputData FormatOutput @@ -98,7 +99,19 @@ sub ReadConfig { ### IN : $ConfFile: config filename ### OUT: reference to a hash containing the configuration my ($ConfFile) = @_; - return Config::Auto::parse($ConfFile, format => 'equal'); + # mandatory configuration options + my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', + 'DBTableRaw','DBTableGrps'); + # read config via Config::Auto + my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); + my %Conf = %{$ConfR}; + # check for mandatory options + foreach (@Mandatory) { + &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) + if (!defined($Conf{$_})); + } + # $Conf{'TLH'} is checked in gatherstats.pl + return $ConfR; }; ################################################################################ @@ -184,7 +197,7 @@ sub ListNewsgroups { next if($TLH and !/^$TLH/); # don't count invalid newsgroups if(%ValidGroups and !defined($ValidGroups{$_})) { - &Bleat(1,sprintf("DROPPED: %s",$_)); + warn (sprintf("DROPPED: %s\n",$_)); next; } # add original newsgroup to %Newsgroups @@ -230,8 +243,9 @@ sub ReadGroupList { my %ValidGroups; open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); while (<$LIST>) { - s/^(\S+).*$/$1/; + s/^\s*(\S+).*$/$1/; chomp; + next if /^$/; $ValidGroups{$_} = '1'; }; close $LIST; @@ -380,16 +394,17 @@ sub OutputData { ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM ### $DBQuery : database query handle with executed query, ### containing $Month, $Key, $Value -### $PadGroup : padding length for key field (optional) for 'pretty' +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, - $DBQuery, $PadGroup) = @_; + $DBQuery, $PadField, $PadValue) = @_; my %ValidKeys = %{$ValidKeys} if $ValidKeys; my ($FileName, $Handle, $OUT); our $LastIteration; # define output types my %LegalOutput; - @LegalOutput{('dump',,'list','pretty')} = (); + @LegalOutput{('dump','list','pretty')} = (); # bail out if format is unknown &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); @@ -425,7 +440,7 @@ sub OutputData { $Handle = $OUT; }; print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, - $Precision, $PadGroup); + $Precision, $PadField, $PadValue); $LastIteration = $Caption; }; close $OUT if ($FileTempl); @@ -441,9 +456,11 @@ sub FormatOutput { ### $Key : newsgroup, client, ... or $Month, as above ### $Value : number of postings with that attribute ### $Precision: number of digits right of decimal point (0 or 2) -### $PadGroup : padding length for key field (optional) for 'pretty' +### $PadField : padding length for key field (optional) for 'pretty' +### $PadValue : padding length for value field (optional) for 'pretty' ### OUT: $Output: formatted output - my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; + my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, + $PadValue) = @_; my ($Output); # keep last caption in mind our ($LastIteration); @@ -460,8 +477,13 @@ sub FormatOutput { # output as a table $Output = sprintf ("# ----- %s:\n",$Caption) if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); - $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : - "%s %.*f\n",$Key,$Precision,$Value); + # increase $PadValue for numbers with decimal point + $PadValue += $Precision+1 if $Precision; + # add padding if $PadField is set; $PadValue HAS to be set then + $Output .= sprintf ($PadField ? + sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : + "%s%s %.*f\n",$Key,$Comments ? ':' : '', + $Precision,$Value); }; return $Output; }; @@ -483,26 +505,30 @@ sub SQLHierarchies { ################################################################################ sub GetMaxLength { ################################################################################ -### get length of longest field in future query result -### IN : $DBHandle : database handel +### get length of longest fields in future query result +### IN : $DBHandle : database handle ### $Table : table to query -### $Field : field to check +### $Field : field (key!, i.e. month, newsgroup, ...) to check +### $Value : field (value!, i.e. postings) to check ### $WhereClause : WHERE clause ### $HavingClause: HAVING clause ### @BindVars : bind variables for WHERE clause -### OUT: $Length: length of longest instnace of $Field - my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; - my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". - "FROM %s %s %s",$Field,$Table, - $WhereClause,$HavingClause ? +### OUT: $FieldLength : length of longest instance of $Field +### $ValueLength : length of longest instance of $Value + my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". + "MAX(%s) ". + "FROM %s %s %s",$Field,,$Value, + $Table,$WhereClause,$HavingClause ? 'GROUP BY newsgroup' . $HavingClause . ' ORDER BY LENGTH(newsgroup) '. 'DESC LIMIT 1': '')); $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". "for '%s' from table '%s': ". "$DBI::errstr",$Field,$Table)); - my ($Length) = $DBQuery->fetchrow_array; - return $Length; + my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; + my $ValueLength = length($ValueMax) if ($ValueMax); + return ($FieldLength,$ValueLength); }; ################################################################################ @@ -572,16 +598,49 @@ sub SQLGroupList { my ($Newsgroups) = @_; # substitute '*' wildcard with SQL wildcard character '%' $Newsgroups =~ s/\*/%/g; + return (undef,undef) if !CheckValidNewsgroups($Newsgroups); # just one newsgroup? return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; + my ($SQL,@WildcardGroups,@NoWildcardGroups); # list of newsgroups separated by ':' - my $SQL = '('; my @GroupList = split /:/, $Newsgroups; foreach (@GroupList) { - $SQL .= ' OR ' if $SQL gt '('; - $SQL .= SQLGroupWildcard($_); + if ($_ !~ /%/) { + # add to list of newsgroup names WITHOUT wildcard + push (@NoWildcardGroups,$_); + } else { + # add to list of newsgroup names WITH wildcard + push (@WildcardGroups,$_); + # add wildcard to SQL clause + # 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + $SQL .= 'newsgroup LIKE ?' + } }; - $SQL .= ')'; + if (scalar(@NoWildcardGroups)) { + # add 'OR' if SQL clause is not empty + $SQL .= ' OR ' if $SQL; + if (scalar(@NoWildcardGroups) < 2) { + # special case: just one newsgroup without wildcard + $SQL .= 'newsgroup = ?'; + } else { + # create list of newsgroups to include: 'newsgroup IN (...)' + $SQL .= 'newsgroup IN ('; + my $SQLin; + foreach (@NoWildcardGroups) { + $SQLin .= ',' if $SQLin; + $SQLin .= '?'; + } + # add list to SQL clause + $SQL .= $SQLin .= ')'; + } + } + # add brackets '()' to SQL clause as needed (more than one wildcard group) + if (scalar(@WildcardGroups)) { + $SQL = '(' . $SQL .')'; + } + # rebuild @GroupList in (now) correct order + @GroupList = (@WildcardGroups,@NoWildcardGroups); return ($SQL,@GroupList); }; @@ -593,7 +652,6 @@ sub SQLGroupWildcard { ### (group.name or group.name.%) ### OUT: SQL code to become part of a 'WHERE' clause my ($Newsgroup) = @_; - # FIXME: check for validity if ($Newsgroup !~ /%/) { return 'newsgroup = ?'; } else { @@ -696,6 +754,19 @@ sub SQLBuildClause { return $SQLClause; }; +#####--------------------------- Verifications ----------------------------##### + +################################################################################ +sub CheckValidNewsgroups { +################################################################################ +### syntax check of newgroup list +### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) +### OUT: boolean + my ($Newsgroups) = @_; + my $InvalidCharRegExp = ',; '; + return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; +}; + #####------------------------------- done ---------------------------------##### 1;