| 1 | # NewsStats.pm |
| 2 | # |
| 3 | # Library functions for the NewsStats package. |
| 4 | # |
| 5 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> |
| 6 | # |
| 7 | # This module can be redistributed and/or modified under the same terms under |
| 8 | # which Perl itself is published. |
| 9 | |
| 10 | package NewsStats; |
| 11 | |
| 12 | use strict; |
| 13 | use warnings; |
| 14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
| 15 | |
| 16 | require Exporter; |
| 17 | @ISA = qw(Exporter); |
| 18 | @EXPORT = qw( |
| 19 | $MySelf |
| 20 | $MyVersion |
| 21 | ReadOptions |
| 22 | ReadConfig |
| 23 | OverrideConfig |
| 24 | InitDB |
| 25 | ); |
| 26 | @EXPORT_OK = qw( |
| 27 | GetTimePeriod |
| 28 | LastMonth |
| 29 | CheckMonth |
| 30 | SplitPeriod |
| 31 | ListMonth |
| 32 | ListNewsgroups |
| 33 | OutputData |
| 34 | FormatOutput |
| 35 | SQLHierarchies |
| 36 | SQLGroupList |
| 37 | GetMaxLenght |
| 38 | ); |
| 39 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], |
| 40 | Output => [qw(OutputData FormatOutput)], |
| 41 | SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); |
| 42 | $VERSION = '0.1'; |
| 43 | |
| 44 | use Data::Dumper; |
| 45 | use File::Basename; |
| 46 | use Getopt::Std; |
| 47 | |
| 48 | use Config::Auto; |
| 49 | use DBI; |
| 50 | |
| 51 | #####-------------------------------- Vars --------------------------------##### |
| 52 | |
| 53 | our $MySelf = fileparse($0, '.pl'); |
| 54 | our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; |
| 55 | |
| 56 | #####------------------------------- Basics -------------------------------##### |
| 57 | |
| 58 | ################################################################################ |
| 59 | sub ReadOptions { |
| 60 | ################################################################################ |
| 61 | ### read commandline options and act on standard options -h and -V |
| 62 | ### IN : $Params: list of legal commandline paramaters (without -h and -V) |
| 63 | ### OUT: a hash containing the commandline options |
| 64 | $Getopt::Std::STANDARD_HELP_VERSION = 1; |
| 65 | |
| 66 | my ($Params) = @_; |
| 67 | my %Options; |
| 68 | |
| 69 | getopts('Vh'.$Params, \%Options); |
| 70 | |
| 71 | # -V: display version |
| 72 | &ShowVersion if ($Options{'V'}); |
| 73 | |
| 74 | # -h: feed myself to perldoc |
| 75 | &ShowPOD if ($Options{'h'}); |
| 76 | |
| 77 | return %Options; |
| 78 | }; |
| 79 | ################################################################################ |
| 80 | |
| 81 | ################################################################################ |
| 82 | sub ShowVersion { |
| 83 | ################################################################################ |
| 84 | ### display version and exit |
| 85 | print "$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n"; |
| 86 | print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; |
| 87 | exit(100); |
| 88 | }; |
| 89 | ################################################################################ |
| 90 | |
| 91 | ################################################################################ |
| 92 | sub ShowPOD { |
| 93 | ################################################################################ |
| 94 | ### feed myself to perldoc and exit |
| 95 | exec('perldoc', $0); |
| 96 | exit(100); |
| 97 | }; |
| 98 | ################################################################################ |
| 99 | |
| 100 | ################################################################################ |
| 101 | sub ReadConfig { |
| 102 | ################################################################################ |
| 103 | ### read config via Config::Auto |
| 104 | ### IN : $ConfFile: config filename |
| 105 | ### OUT: reference to a hash containing the configuration |
| 106 | my ($ConfFile) = @_; |
| 107 | return Config::Auto::parse($ConfFile, format => 'equal'); |
| 108 | }; |
| 109 | ################################################################################ |
| 110 | |
| 111 | ################################################################################ |
| 112 | sub OverrideConfig { |
| 113 | ################################################################################ |
| 114 | ### override configuration values |
| 115 | ### IN : $ConfigR : reference to configuration hash |
| 116 | ### $OverrideR: reference to a hash containing overrides |
| 117 | my ($ConfigR,$OverrideR) = @_; |
| 118 | my %Override = %$OverrideR; |
| 119 | # Config hash empty? |
| 120 | warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); |
| 121 | # return if no overrides |
| 122 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
| 123 | foreach my $Key (keys %Override) { |
| 124 | $$ConfigR{$Key} = $Override{$Key}; |
| 125 | }; |
| 126 | }; |
| 127 | ################################################################################ |
| 128 | |
| 129 | ################################################################################ |
| 130 | sub InitDB { |
| 131 | ################################################################################ |
| 132 | ### initialise database connection |
| 133 | ### IN : $ConfigR: reference to configuration hash |
| 134 | ### $Die : if TRUE, die if connection fails |
| 135 | ### OUT: DBHandle |
| 136 | my ($ConfigR,$Die) = @_; |
| 137 | my %Conf = %$ConfigR; |
| 138 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); |
| 139 | if (!$DBHandle) { |
| 140 | die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); |
| 141 | warn("$MySelf: W: $DBI::errstr\n"); |
| 142 | }; |
| 143 | return $DBHandle; |
| 144 | }; |
| 145 | ################################################################################ |
| 146 | |
| 147 | #####------------------------------ GetStats ------------------------------##### |
| 148 | |
| 149 | ################################################################################ |
| 150 | sub ListNewsgroups { |
| 151 | ################################################################################ |
| 152 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
| 153 | ### hierarchy names where every newsgroup and hierarchy appears only once: |
| 154 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin |
| 155 | ### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header) |
| 156 | ### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys |
| 157 | my ($Newsgroups) = @_; |
| 158 | my %Newsgroups; |
| 159 | chomp($Newsgroups); |
| 160 | # remove whitespace from contents of Newsgroups: |
| 161 | $Newsgroups =~ s/\s//; |
| 162 | # call &HierarchyCount for each newsgroup in $Newsgroups: |
| 163 | for (split /,/, $Newsgroups) { |
| 164 | # add original newsgroup to %Newsgroups |
| 165 | $Newsgroups{$_} = 1; |
| 166 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', |
| 167 | # i.e. de.alt.ALL and de.ALL |
| 168 | foreach (ParseHierarchies($_)) { |
| 169 | $Newsgroups{$_.'.ALL'} = 1; |
| 170 | } |
| 171 | }; |
| 172 | return %Newsgroups; |
| 173 | }; |
| 174 | |
| 175 | ################################################################################ |
| 176 | sub ParseHierarchies { |
| 177 | ################################################################################ |
| 178 | ### return a list of all hierarchy levels a newsgroup belongs to |
| 179 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) |
| 180 | ### IN : $Newsgroup : a newsgroup name |
| 181 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to |
| 182 | my ($Newsgroup) = @_; |
| 183 | my @Hierarchies; |
| 184 | # strip trailing dots |
| 185 | $Newsgroup =~ s/(.+)\.+$/$1/; |
| 186 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies |
| 187 | # i.e. de.alt.test: "de.alt" and "de" |
| 188 | while ($Newsgroup =~ /\./) { |
| 189 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; |
| 190 | push @Hierarchies, $Newsgroup; |
| 191 | }; |
| 192 | return @Hierarchies; |
| 193 | }; |
| 194 | |
| 195 | ################################################################################ |
| 196 | |
| 197 | #####----------------------------- TimePeriods ----------------------------##### |
| 198 | |
| 199 | ################################################################################ |
| 200 | sub GetTimePeriod { |
| 201 | ################################################################################ |
| 202 | ### get a time period to act on, in order of preference: by default the |
| 203 | ### last month; or a month submitted by -m YYYY-MM; or a time period submitted |
| 204 | ### by -p YYYY-MM:YYYY-MM |
| 205 | ### IN : $Month,$Period: contents of -m and -p |
| 206 | ### OUT: $StartMonth, $EndMonth (identical if period is just one month) |
| 207 | my ($Month,$Period) = @_; |
| 208 | # exit if -m is set and not like YYYY-MM |
| 209 | die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); |
| 210 | # warn if -m and -p is set |
| 211 | warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period); |
| 212 | # default: set -m to last month |
| 213 | $Month = &LastMonth if (!defined($Month) and !defined($Period)); |
| 214 | # set $StartMonth, $EndMonth |
| 215 | my ($StartMonth, $EndMonth); |
| 216 | if ($Period) { |
| 217 | # -p: get date range |
| 218 | ($StartMonth, $EndMonth) = &SplitPeriod($Period); |
| 219 | die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); |
| 220 | } else { |
| 221 | # set $StartMonth = $EndMonth = $Month if -p is not set |
| 222 | $StartMonth = $EndMonth = $Month; |
| 223 | }; |
| 224 | return ($StartMonth, $EndMonth); |
| 225 | }; |
| 226 | |
| 227 | ################################################################################ |
| 228 | sub LastMonth { |
| 229 | ################################################################################ |
| 230 | ### get last month from todays date in YYYY-MM format |
| 231 | ### OUT: last month as YYYY-MM |
| 232 | # get today's date |
| 233 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); |
| 234 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 |
| 235 | $Year += 1900; |
| 236 | if ($Month < 1) { |
| 237 | $Month = 12; |
| 238 | $Year--; |
| 239 | }; |
| 240 | # return last month |
| 241 | return sprintf('%4d-%02d',$Year,$Month); |
| 242 | }; |
| 243 | |
| 244 | ################################################################################ |
| 245 | sub CheckMonth { |
| 246 | ################################################################################ |
| 247 | ### check if input is a valid month in YYYY-MM form |
| 248 | ### IN : $Month: month |
| 249 | ### OUT: TRUE / FALSE |
| 250 | my ($Month) = @_; |
| 251 | return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); |
| 252 | return 1; |
| 253 | }; |
| 254 | |
| 255 | ################################################################################ |
| 256 | sub SplitPeriod { |
| 257 | ################################################################################ |
| 258 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
| 259 | ### IN : $Period: time period |
| 260 |