| 1 | # NewsStats.pm |
| 2 | # |
| 3 | # Library functions for the NewsStats package. |
| 4 | # |
| 5 | # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
| 6 | # |
| 7 | # This module can be redistributed and/or modified under the same terms under |
| 8 | # which Perl itself is published. |
| 9 | |
| 10 | package NewsStats; |
| 11 | |
| 12 | use strict; |
| 13 | use warnings; |
| 14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
| 15 | |
| 16 | require Exporter; |
| 17 | @ISA = qw(Exporter); |
| 18 | @EXPORT = qw( |
| 19 | $MyVersion |
| 20 | $PackageVersion |
| 21 | $FullPath |
| 22 | $HomePath |
| 23 | ShowVersion |
| 24 | ShowPOD |
| 25 | ReadConfig |
| 26 | OverrideConfig |
| 27 | InitDB |
| 28 | Bleat |
| 29 | ); |
| 30 | @EXPORT_OK = qw( |
| 31 | GetTimePeriod |
| 32 | LastMonth |
| 33 | SplitPeriod |
| 34 | ListMonth |
| 35 | ListNewsgroups |
| 36 | ParseHierarchies |
| 37 | ReadGroupList |
| 38 | OutputData |
| 39 | FormatOutput |
| 40 | SQLHierarchies |
| 41 | SQLSortOrder |
| 42 | SQLGroupList |
| 43 | SQLSetBounds |
| 44 | SQLBuildClause |
| 45 | GetMaxLength |
| 46 | ); |
| 47 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
| 48 | ListMonth)], |
| 49 | Output => [qw(OutputData FormatOutput)], |
| 50 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
| 51 | SQLSetBounds SQLBuildClause GetMaxLength)]); |
| 52 | $VERSION = '0.02'; |
| 53 | our $PackageVersion = '0.02'; |
| 54 | |
| 55 | use Data::Dumper; |
| 56 | use File::Basename; |
| 57 | use Cwd qw(realpath); |
| 58 | |
| 59 | use Config::Auto; |
| 60 | use DBI; |
| 61 | |
| 62 | #####-------------------------------- Vars --------------------------------##### |
| 63 | |
| 64 | # save $0 in $FullPath |
| 65 | our $FullPath = $0; |
| 66 | # strip filename and /bin or /install directory to create the $HomePath |
| 67 | our $HomePath = dirname(realpath($0)); |
| 68 | $HomePath =~ s/\/(bin|install)//; |
| 69 | # trim $0 |
| 70 | $0 =~ s%.*/%%; |
| 71 | # set version string |
| 72 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; |
| 73 | |
| 74 | #####------------------------------- Basics -------------------------------##### |
| 75 | |
| 76 | ################################################################################ |
| 77 | |
| 78 | ################################################################################ |
| 79 | sub ShowVersion { |
| 80 | ################################################################################ |
| 81 | ### display version and exit |
| 82 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
| 83 | print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@inter.net>\n"; |
| 84 | print "This program is free software; you may redistribute it ". |
| 85 | "and/or modify it under the same terms as Perl itself.\n"; |
| 86 | exit(100); |
| 87 | }; |
| 88 | ################################################################################ |
| 89 | |
| 90 | ################################################################################ |
| 91 | sub ShowPOD { |
| 92 | ################################################################################ |
| 93 | ### feed myself to perldoc and exit |
| 94 | exec('perldoc', $FullPath); |
| 95 | exit(100); |
| 96 | }; |
| 97 | ################################################################################ |
| 98 | |
| 99 | ################################################################################ |
| 100 | sub ReadConfig { |
| 101 | ################################################################################ |
| 102 | ### read config via Config::Auto |
| 103 | ### IN : $ConfFile: config filename |
| 104 | ### OUT: reference to a hash containing the configuration |
| 105 | my ($ConfFile) = @_; |
| 106 | # set default |
| 107 | $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile; |
| 108 | # mandatory configuration options |
| 109 | my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', |
| 110 | 'DBTableRaw','DBTableParse','DBTableGrps'); |
| 111 | # read config via Config::Auto |
| 112 | my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); |
| 113 | my %Conf = %{$ConfR}; |
| 114 | # check for mandatory options |
| 115 | foreach (@Mandatory) { |
| 116 | &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) |
| 117 | if (!defined($Conf{$_})); |
| 118 | } |
| 119 | # $Conf{'TLH'} is checked in gatherstats.pl |
| 120 | return $ConfR; |
| 121 | }; |
| 122 | ################################################################################ |
| 123 | |
| 124 | ################################################################################ |
| 125 | sub OverrideConfig { |
| 126 | ################################################################################ |
| 127 | ### override configuration values |
| 128 | ### IN : $ConfigR : reference to configuration hash |
| 129 | ### $OverrideR: reference to a hash containing overrides |
| 130 | my ($ConfigR,$OverrideR) = @_; |
| 131 | my %Override = %$OverrideR; |
| 132 | # Config hash empty? |
| 133 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
| 134 | if ( keys %$ConfigR < 1); |
| 135 | # return if no overrides |
| 136 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
| 137 | foreach my $Key (keys %Override) { |
| 138 | $$ConfigR{$Key} = $Override{$Key}; |
| 139 | }; |
| 140 | }; |
| 141 | ################################################################################ |
| 142 | |
| 143 | ################################################################################ |
| 144 | sub InitDB { |
| 145 | ################################################################################ |
| 146 | ### initialise database connection |
| 147 | ### IN : $ConfigR: reference to configuration hash |
| 148 | ### $Die : if TRUE, die if connection fails |
| 149 | ### OUT: DBHandle |
| 150 | my ($ConfigR,$Die) = @_; |
| 151 | my %Conf = %$ConfigR; |
| 152 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
| 153 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, |
| 154 | $Conf{'DBHost'}), $Conf{'DBUser'}, |
| 155 | $Conf{'DBPw'}, { PrintError => 0 }); |
| 156 | if (!$DBHandle) { |
| 157 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
| 158 | &Bleat(1,$DBI::errstr); |
| 159 | }; |
| 160 | return $DBHandle; |
| 161 | }; |
| 162 | ################################################################################ |
| 163 | |
| 164 | ################################################################################ |
| 165 | sub Bleat { |
| 166 | ################################################################################ |
| 167 | ### print warning or error messages and terminate in case of error |
| 168 | ### IN : $Level : 1 = warning, 2 = error |
| 169 | ### $Message: warning or error message |
| 170 | my ($Level,$Message) = @_; |
| 171 | if ($Level == 1) { |
| 172 | warn "$0 W: $Message\n" |
| 173 | } elsif ($Level == 2) { |
| 174 | die "$0 E: $Message\n" |
| 175 | } else { |
| 176 | print "$0: $Message\n" |
| 177 | } |
| 178 | }; |
| 179 | ################################################################################ |
| 180 | |
| 181 | #####------------------------------ GetStats ------------------------------##### |
| 182 | |
| 183 | ################################################################################ |
| 184 | sub ListNewsgroups { |
| 185 | ################################################################################ |
| 186 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
| 187 | ### hierarchy names where every newsgroup and hierarchy appears only once: |
| 188 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin |
| 189 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
| 190 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
| 191 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
| 192 | ### as keys |
| 193 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys |
| 194 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
| 195 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
| 196 | my %Newsgroups; |
| 197 | chomp($Newsgroups); |
| 198 | # remove whitespace from contents of Newsgroups: |
| 199 | $Newsgroups =~ s/\s//; |
| 200 | # call &HierarchyCount for each newsgroup in $Newsgroups: |
| 201 | for (split /,/, $Newsgroups) { |
| 202 | # don't count newsgroup/hierarchy in wrong TLH |
| 203 | next if($TLH and !/^$TLH/); |
| 204 | # don't count invalid newsgroups |
| 205 | if(%ValidGroups and !defined($ValidGroups{$_})) { |
| 206 | warn (sprintf("DROPPED: %s\n",$_)); |
| 207 | next; |
| 208 | } |
| 209 | # add original newsgroup to %Newsgroups |
| 210 | $Newsgroups{$_} = 1; |
| 211 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', |
| 212 | # i.e. de.alt.ALL and de.ALL |
| 213 | foreach (ParseHierarchies($_)) { |
| 214 | $Newsgroups{$_.'.ALL'} = 1; |
| 215 | } |
| 216 | }; |
| 217 | return %Newsgroups; |
| 218 | }; |
| 219 | |
| 220 | ################################################################################ |
| 221 | sub ParseHierarchies { |
| 222 | ################################################################################ |
| 223 | ### return a list of all hierarchy levels a newsgroup belongs to |
| 224 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) |
| 225 | ### IN : $Newsgroup : a newsgroup name |
| 226 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to |
| 227 | my ($Newsgroup) = @_; |
| 228 | my @Hierarchies; |
| 229 | # strip trailing dots |
| 230 | $Newsgroup =~ s/(.+)\.+$/$1/; |
| 231 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies |
| 232 | # i.e. de.alt.test: "de.alt" and "de" |
| 233 | while ($Newsgroup =~ /\./) { |
| 234 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; |
| 235 | push @Hierarchies, $Newsgroup; |
| 236 | }; |
| 237 | return @Hierarchies; |
| 238 | }; |
| 239 | |
| 240 | ################################################################################ |
| 241 | sub ReadGroupList { |
| 242 | ################################################################################ |
| 243 | ### read a list of valid newsgroups from file (each group on one line, |
| 244 | ### ignoring everything after the first whitespace and so accepting files |
| 245 | ### in checkgroups format as well as (parts of) an INN active file) |
| 246 | ### IN : $Filename : file to read |
| 247 | ### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups |
| 248 | my ($Filename) = @_; |
| 249 | my %ValidGroups; |
| 250 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
| 251 | while (<$LIST>) { |
| 252 | s/^\s*(\S+).*$/$1/; |
| 253 | chomp; |
| 254 | next if /^$/; |
| 255 | $ValidGroups{$_} = '1'; |
| 256 | }; |
| 257 | close $LIST; |
| 258 | return \%ValidGroups; |
| 259 | }; |
| 260 | |
| 261 | ################################################################################ |
| 262 | |
| 263 | #####----------------------------- TimePeriods ----------------------------##### |
| 264 | |
| 265 | ################################################################################ |
| 266 | sub GetTimePeriod { |
| 267 | ################################################################################ |
| 268 | ### get a time period to act on from --month / --day option; |
| 269 | ### if empty, default to last month / day |
| 270 | ### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)' |
| 271 | ### or 'all' |
| 272 | ### $Type : may be 'month' or 'day' |
| 273 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause |
| 274 | ### of the chosen time period |
| 275 | my ($Period,$Type) = @_; |
| 276 | # define result variables |
| 277 | my ($Verbal, $SQL); |
| 278 | # check $Type |
| 279 | $Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day')); |
| 280 | # define a regular expressions for a month or day |
| 281 | my $REPeriod = '\d{4}-\d{2}'; |
| 282 | $REPeriod .= '-\d{2}' if ($Type eq 'day'); |
| 283 | |
| 284 | # default to last month / day if option is not set |
| 285 | if(!$Period) { |
| 286 | $Period = &LastMonthDay($Type); |
| 287 | } |
| 288 | |
| 289 | # check for valid input |
| 290 | if ($Period =~ /^$REPeriod$/) { |
| 291 | # single month/day [YYYY-MM(-DD)] |
| 292 | ($Period) = &CheckPeriod($Type,$Period); |
| 293 | $Verbal = $Period; |
| 294 | $SQL = sprintf("%s = '%s'",$Type,$Period); |
| 295 | } elsif ($Period =~ /^$REPeriod:$REPeriod$/) { |
| 296 | # time period [YYYY-MM(-DD):YYYY-MM(-DD)] |
| 297 | $Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type)); |
| 298 | $SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type, |
| 299 | &SplitPeriod($Period,$Type)); |
| 300 | } elsif ($Period =~ /^all$/i) { |
| 301 | # special case: ALL |
| 302 | $Verbal = 'all time'; |
| 303 | $SQL = ''; |
| 304 | } else { |
| 305 | # invalid input |
| 306 | return (undef,undef); |
| 307 | } |
| 308 | |
| 309 | return ($Verbal,$SQL); |
| 310 | }; |
| 311 | |
| 312 | ################################################################################ |
| 313 | sub LastMonthDay { |
| 314 | ################################################################################ |
| 315 | ### get last month/day from todays date in YYYY-MM format |
| 316 | ### IN : $Type : may be 'month' or 'day' |
| 317 | ### OUT: last month/day as YYYY-MM(-DD) |
| 318 | my ($Type) = @_; |
| 319 | my ($Day,$Month,$Year); |
| 320 | if ($Type eq 'day') { |
| 321 | # get yesterdays's date |
| 322 | (undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400); |
| 323 | # $Month is defined from 0 to 11, so add 1 |
| 324 | $Month++; |
| 325 | } else { |
| 326 | # get today's date (month and year) |
| 327 | (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); |
| 328 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 |
| 329 | if ($Month < 1) { |
| 330 | $Month = 12; |
| 331 | $Year--; |
| 332 | }; |
| 333 | } |
| 334 | $Year += 1900; |
| 335 | # return last month / day |
| 336 | if ($Type eq 'day') { |
| 337 | return sprintf('%4d-%02d-%02d',$Year,$Month,$Day); |
| 338 | } else { |
| 339 | return sprintf('%4d-%02d',$Year,$Month); |
| 340 | } |
| 341 | }; |
| 342 | |
| 343 | ################################################################################ |
| 344 | sub CheckPeriod { |
| 345 | ################################################################################ |
| 346 | ### check if input (in YYYY-MM(-DD) form) is a valid month / day; |
| 347 | ### otherwise, fix it |
| 348 | ### IN : $Type : may be 'month' or 'day' |
| 349 | ### @Period: array of month/day |
| 350 | ### OUT: @Period: a valid month/day |
| 351 | my ($Type,@Period) = @_; |
| 352 | foreach my $Period (@Period) { |
| 353 | my ($OldPeriod) = $Period; |
| 354 | my ($CalMonth,$CalDay); |
| 355 | $Period .= '-01' if ($Type eq 'month'); |
| 356 | $CalDay = substr ($Period, -2); |
| 357 | $CalMonth = substr ($Period, 5, 2); |
| 358 | if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) { |
| 359 | $CalMonth = '12' if $CalMonth > 12; |
| 360 | $CalMonth = '01' if $CalMonth < 1; |
| 361 | substr($Period, 5, 2) = $CalMonth; |
| 362 | $CalDay = '01' if $CalDay < 1; |
| 363 | $CalDay = '31' if $CalDay > 31; |
| 364 | # FIXME! - month with less than 31 days ... |
| 365 | substr($Period, -2) = $CalDay; |
| 366 | &Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.", |
| 367 | $OldPeriod,$Period)); |
| 368 | } |
| 369 | $Period = substr($Period,0,7) if ($Type eq 'month'); |
| 370 | } |
| 371 | return @Period; |
| 372 | }; |
| 373 | |
| 374 | ################################################################################ |
| 375 | sub SplitPeriod { |
| 376 | ################################################################################ |
| 377 | ### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end |
| 378 | ### IN : $Period: time period |
| 379 | ### $Type : may be 'month' or 'day' |
| 380 | ### OUT: $StartTime, $EndTime |
| 381 | my ($Period,$Type) = @_; |
| 382 | my ($StartTime, $EndTime) = split /:/, $Period; |
| 383 | ($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime); |
| 384 | # switch parameters as necessary |
| 385 | if ($EndTime gt $StartTime) { |
| 386 | return ($StartTime, $EndTime); |
| 387 | } else { |
| 388 | return ($EndTime, $StartTime); |
| 389 | }; |
| 390 | }; |
| 391 | |
| 392 | ################################################################################ |
| 393 | sub ListMonth { |
| 394 | ################################################################################ |
| 395 | ### return a list of months (YYYY-MM) between start and end month |
| 396 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
| 397 | ### OUT: @Months: array containing all months from $MonthExpression enumerated |
| 398 | my ($MonthExpression )= @_; |
| 399 | # return if single month |
| 400 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); |
| 401 | # parse $MonthExpression |
| 402 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); |
| 403 | # set $Year, $Month from $StartMonth |
| 404 | my ($Year, $Month) = split /-/, $StartMonth; |
| 405 | # define @Months |
| 406 | my (@Months); |
| 407 | until ("$Year-$Month" gt $EndMonth) { |
| 408 | push @Months, "$Year-$Month"; |
| 409 | $Month = "$Month"; # force string context |
| 410 | $Month++; |
| 411 | if ($Month > 12) { |
| 412 | $Month = '01'; |
| 413 | $Year++; |
| 414 | }; |
| 415 | }; |
| 416 | return @Months; |
| 417 | }; |
| 418 | |
| 419 | #####---------------------------- OutputFormats ---------------------------##### |
| 420 | |
| 421 | ################################################################################ |
| 422 | sub OutputData { |
| 423 | ################################################################################ |
| 424 | ### read database query results from DBHandle and print results with formatting |
| 425 | ### IN : $Format : format specifier |
| 426 | ### $Comments : print or suppress all comments for machine-readable output |
| 427 | ### $GroupBy : primary sorting order (month or key) |
| 428 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 429 | ### $ValidKeys: reference to a hash containing all valid keys |
| 430 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM |
| 431 | ### $DBQuery : database query handle with executed query, |
| 432 | ### containing $Month, $Key, $Value |
| 433 | ### $PadField : padding length for key field (optional) for 'pretty' |
| 434 | ### $PadValue : padding length for value field (optional) for 'pretty' |
| 435 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, |
| 436 | $DBQuery, $PadField, $PadValue) = @_; |
| 437 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; |
| 438 | my ($FileName, $Handle, $OUT); |
| 439 | our $LastIteration; |
| 440 | |
| 441 | # define output types |
| 442 | my %LegalOutput; |
| 443 | @LegalOutput{('dump','list','pretty')} = (); |
| 444 | # bail out if format is unknown |
| 445 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); |
| 446 | |
| 447 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
| 448 | # don't display invalid keys |
| 449 | if(%ValidKeys and !defined($ValidKeys{$Key})) { |
| 450 | # FIXME |
| 451 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); |
| 452 | next; |
| 453 | }; |
| 454 | # care for correct sorting order and abstract from month and keys: |
| 455 | # $Caption will be $Month or $Key, according to sorting order, |
| 456 | # and $Key will be $Key or $Month, respectively |
| 457 | my $Caption; |
| 458 | if ($GroupBy eq 'key') { |
| 459 | $Caption = $Key; |
| 460 | $Key = $Month; |
| 461 | } else { |
| 462 | $Caption = $Month; |
| 463 | } |
| 464 | # set output file handle |
| 465 | if (!$FileTempl) { |
| 466 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
| 467 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
| 468 | close $OUT if ($LastIteration); |
| 469 | # safeguards for filename creation: |
| 470 | # replace potential problem characters with '_' |
| 471 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); |
| 472 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; |
| 473 | open ($OUT,">$FileName") |
| 474 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", |
| 475 | $FileName)); |
| 476 | $Handle = $OUT; |
| 477 | }; |
| 478 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
| 479 | $Precision, $PadField, $PadValue); |
| 480 | $LastIteration = $Caption; |
| 481 | }; |
| 482 | close $OUT if ($FileTempl); |
| 483 | }; |
| 484 | |
| 485 | ################################################################################ |
| 486 | sub FormatOutput { |
| 487 | ################################################################################ |
| 488 | ### format information for output according to format specifier |
| 489 | ### IN : $Format : format specifier |
| 490 | ### $Comments : print or suppress all comments for machine-readable output |
| 491 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order |
| 492 | ### $Key : newsgroup, client, ... or $Month, as above |
| 493 | ### $Value : number of postings with that attribute |
| 494 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 495 | ### $PadField : padding length for key field (optional) for 'pretty' |
| 496 | ### $PadValue : padding length for value field (optional) for 'pretty' |
| 497 | ### OUT: $Output: formatted output |
| 498 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, |
| 499 | $PadValue) = @_; |
| 500 | my ($Output); |
| 501 | # keep last caption in mind |
| 502 | our ($LastIteration); |
| 503 | # create one line of output |
| 504 | if ($Format eq 'dump') { |
| 505 | # output as dump (key value) |
| 506 | $Output = sprintf ("# %s:\n",$Caption) |
| 507 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 508 | $Output .= sprintf ("%s %u\n",$Key,$Value); |
| 509 | } elsif ($Format eq 'list') { |
| 510 | # output as list (caption key value) |
| 511 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); |
| 512 | } elsif ($Format eq 'pretty') { |
| 513 | # output as a table |
| 514 | $Output = sprintf ("# ----- %s:\n",$Caption) |
| 515 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 516 | # increase $PadValue for numbers with decimal point |
| 517 | $PadValue += $Precision+1 if $Precision; |
| 518 | # add padding if $PadField is set; $PadValue HAS to be set then |
| 519 | $Output .= sprintf ($PadField ? |
| 520 | sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : |
| 521 | "%s%s %.*f\n",$Key,$Comments ? ':' : '', |
| 522 | $Precision,$Value); |
| 523 | }; |
| 524 | return $Output; |
| 525 | }; |
| 526 | |
| 527 | #####------------------------- QueryModifications -------------------------##### |
| 528 | |
| 529 | ################################################################################ |
| 530 | sub SQLHierarchies { |
| 531 | ################################################################################ |
| 532 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
| 533 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is |
| 534 | ### true, accordingly) |
| 535 | ### IN : $ShowHierarchies: boolean value |
| 536 | ### OUT: SQL code |
| 537 | my ($ShowHierarchies) = @_; |
| 538 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
| 539 | }; |
| 540 | |
| 541 | ################################################################################ |
| 542 | sub GetMaxLength { |
| 543 | ################################################################################ |
| 544 | ### get length of longest fields in future query result |
| 545 | ### IN : $DBHandle : database handle |
| 546 | ### $Table : table to query |
| 547 | ### $Field : field (key!, i.e. month, newsgroup, ...) to check |
| 548 | ### $Value : field (value!, i.e. postings) to check |
| 549 | ### $WhereClause : WHERE clause |
| 550 | ### $HavingClause: HAVING clause |
| 551 | ### @BindVars : bind variables for WHERE clause |
| 552 | ### OUT: $FieldLength : length of longest instance of $Field |
| 553 | ### $ValueLength : length of longest instance of $Value |
| 554 | my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; |
| 555 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". |
| 556 | "MAX(%s) ". |
| 557 | "FROM %s %s %s",$Field,,$Value, |
| 558 | $Table,$WhereClause,$HavingClause ? |
| 559 | 'GROUP BY newsgroup' . $HavingClause . |
| 560 | ' ORDER BY LENGTH(newsgroup) '. |
| 561 | 'DESC LIMIT 1': '')); |
| 562 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". |
| 563 | "for '%s' from table '%s': ". |
| 564 | "$DBI::errstr",$Field,$Table)); |
| 565 | my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; |
| 566 | my $ValueLength = length($ValueMax) if ($ValueMax); |
| 567 | return ($FieldLength,$ValueLength); |
| 568 | }; |
| 569 | |
| 570 | ################################################################################ |
| 571 | sub SQLSortOrder { |
| 572 | ################################################################################ |
| 573 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and |
| 574 | ### $OptOrderBy (secondary sorting), both ascending or descending; |
| 575 | ### descending sorting order is done by adding '-desc' |
| 576 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' |
| 577 | ### $OrderBy: secondary sort by month/newsgroups (default) |
| 578 | ### or number of 'postings' |
| 579 | ### OUT: a SQL ORDER BY clause |
| 580 | my ($GroupBy,$OrderBy) = @_; |
| 581 | my ($GroupSort,$OrderSort) = ('',''); |
| 582 | # $GroupBy (primary sorting) |
| 583 | if (!$GroupBy) { |
| 584 | $GroupBy = 'month'; |
| 585 | } else { |
| 586 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); |
| 587 | if ($GroupBy =~ /group/i) { |
| 588 | $GroupBy = 'newsgroup'; |
| 589 | } else { |
| 590 | $GroupBy = 'month'; |
| 591 | } |
| 592 | } |
| 593 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; |
| 594 | # $OrderBy (secondary sorting) |
| 595 | if (!$OrderBy) { |
| 596 | $OrderBy = $Secondary; |
| 597 | } else { |
| 598 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); |
| 599 | if ($OrderBy =~ /posting/i) { |
| 600 | $OrderBy = "postings $OrderSort, $Secondary"; |
| 601 | } else { |
| 602 | $OrderBy = "$Secondary $OrderSort"; |
| 603 | } |
| 604 | } |
| 605 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); |
| 606 | }; |
| 607 | |
| 608 | ################################################################################ |
| 609 | sub SQLParseOrder { |
| 610 | ################################################################################ |
| 611 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. |
| 612 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' |
| 613 | ### IN : $OrderOption: order option (see above) |
| 614 | ### OUT: parameter to sort by, |
| 615 | ### sort order ('DESC' or nothing, meaning 'ASC') |
| 616 | my ($OrderOption) = @_; |
| 617 | my $SortOrder = ''; |
| 618 | if ($OrderOption =~ s/-?desc$//i) { |
| 619 | $SortOrder = 'DESC'; |
| 620 | } else { |
| 621 | $OrderOption =~ s/-?asc$//i |
| 622 | } |
| 623 | return ($OrderOption,$SortOrder); |
| 624 | }; |
| 625 | |
| 626 | ################################################################################ |
| 627 | sub SQLGroupList { |
| 628 | ################################################################################ |
| 629 | ### explode list of newsgroups separated by : (with wildcards) |
| 630 | ### to a SQL 'WHERE' expression |
| 631 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
| 632 | ### OUT: SQL code to become part of a 'WHERE' clause, |
| 633 | ### list of newsgroups for SQL bindings |
| 634 | my ($Newsgroups) = @_; |
| 635 | # substitute '*' wildcard with SQL wildcard character '%' |
| 636 | $Newsgroups =~ s/\*/%/g; |
| 637 | return (undef,undef) if !CheckValidNewsgroups($Newsgroups); |
| 638 | # just one newsgroup? |
| 639 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; |
| 640 | my ($SQL,@WildcardGroups,@NoWildcardGroups); |
| 641 | # list of newsgroups separated by ':' |
| 642 | my @GroupList = split /:/, $Newsgroups; |
| 643 | foreach (@GroupList) { |
| 644 | if ($_ !~ /%/) { |
| 645 | # add to list of newsgroup names WITHOUT wildcard |
| 646 | push (@NoWildcardGroups,$_); |
| 647 | } else { |
| 648 | # add to list of newsgroup names WITH wildcard |
| 649 | push (@WildcardGroups,$_); |
| 650 | # add wildcard to SQL clause |
| 651 | # 'OR' if SQL clause is not empty |
| 652 | $SQL .= ' OR ' if $SQL; |
| 653 | $SQL .= 'newsgroup LIKE ?' |
| 654 | } |
| 655 | }; |
| 656 | if (scalar(@NoWildcardGroups)) { |
| 657 | # add 'OR' if SQL clause is not empty |
| 658 | $SQL .= ' OR ' if $SQL; |
| 659 | if (scalar(@NoWildcardGroups) < 2) { |
| 660 | # special case: just one newsgroup without wildcard |
| 661 | $SQL .= 'newsgroup = ?'; |
| 662 | } else { |
| 663 | # create list of newsgroups to include: 'newsgroup IN (...)' |
| 664 | $SQL .= 'newsgroup IN ('; |
| 665 | my $SQLin; |
| 666 | foreach (@NoWildcardGroups) { |
| 667 | $SQLin .= ',' if $SQLin; |
| 668 | $SQLin .= '?'; |
| 669 | } |
| 670 | # add list to SQL clause |
| 671 | $SQL .= $SQLin .= ')'; |
| 672 | } |
| 673 | } |
| 674 | # add brackets '()' to SQL clause as needed (more than one wildcard group) |
| 675 | if (scalar(@WildcardGroups)) { |
| 676 | $SQL = '(' . $SQL .')'; |
| 677 | } |
| 678 | # rebuild @GroupList in (now) correct order |
| 679 | @GroupList = (@WildcardGroups,@NoWildcardGroups); |
| 680 | return ($SQL,@GroupList); |
| 681 | }; |
| 682 | |
| 683 | ################################################################################ |
| 684 | sub SQLGroupWildcard { |
| 685 | ################################################################################ |
| 686 | ### build a valid SQL 'WHERE' expression with or without wildcards |
| 687 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard |
| 688 | ### (group.name or group.name.%) |
| 689 | ### OUT: SQL code to become part of a 'WHERE' clause |
| 690 | my ($Newsgroup) = @_; |
| 691 | if ($Newsgroup !~ /%/) { |
| 692 | return 'newsgroup = ?'; |
| 693 | } else { |
| 694 | return 'newsgroup LIKE ?'; |
| 695 | } |
| 696 | }; |
| 697 | |
| 698 | ################################################################################ |
| 699 | sub SQLSetBounds { |
| 700 | ################################################################################ |
| 701 | ### set upper and/or lower boundary (number of postings) |
| 702 | ### IN : $Type: 'level', 'average', 'sum' or 'default' |
| 703 | ### $LowBound,$UppBound: lower/upper boundary, respectively |
| 704 | ### OUT: SQL code to become part of a WHERE or HAVING clause |
| 705 | my ($Type,$LowBound,$UppBound) = @_; |
| 706 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); |
| 707 | if($LowBound and $UppBound and $LowBound > $UppBound) { |
| 708 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". |
| 709 | "$UppBound, exchanging boundaries."); |
| 710 | ($LowBound,$UppBound) = ($UppBound,$LowBound); |
| 711 | } |
| 712 | # default to 'default' |
| 713 | my $WhereHavingFunction = 'postings'; |
| 714 | # set $LowBound to SQL statement: |
| 715 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' |
| 716 | if ($Type eq 'level') { |
| 717 | $WhereHavingFunction = 'MIN(postings)' |
| 718 | } elsif ($Type eq 'average') { |
| 719 | $WhereHavingFunction = 'AVG(postings)' |
| 720 | } elsif ($Type eq 'sum') { |
| 721 | $WhereHavingFunction = 'SUM(postings)' |
| 722 | } |
| 723 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); |
| 724 | # set $LowBound to SQL statement: |
| 725 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' |
| 726 | if ($Type eq 'level') { |
| 727 | $WhereHavingFunction = 'MAX(postings)' |
| 728 | } elsif ($Type eq 'average') { |
| 729 | $WhereHavingFunction = 'AVG(postings)' |
| 730 | } elsif ($Type eq 'sum') { |
| 731 | $WhereHavingFunction = 'SUM(postings)' |
| 732 | } |
| 733 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); |
| 734 | return ($LowBound,$UppBound); |
| 735 | }; |
| 736 | |
| 737 | ################################################################################ |
| 738 | sub SQLCheckNumber { |
| 739 | ################################################################################ |
| 740 | ### check if input is a valid positive integer; otherwise, make it one |
| 741 | ### IN : @Numbers: array of parameters |
| 742 | ### OUT: @Numbers: a valid positive integer |
| 743 | my (@Numbers) = @_; |
| 744 | foreach my $Number (@Numbers) { |
| 745 | if ($Number and $Number < 0) { |
| 746 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); |
| 747 | $Number = -$Number; |
| 748 | } |
| 749 | $Number = '' if ($Number and $Number !~ /^\d+$/); |
| 750 | } |
| 751 | return @Numbers; |
| 752 | }; |
| 753 | |
| 754 | ################################################################################ |
| 755 | sub SQLBuildClause { |
| 756 | ################################################################################ |
| 757 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause |
| 758 | ### from multiple expressions which *may* be empty |
| 759 | ### IN : $Type: 'where', 'having', 'group' or 'order' |
| 760 | ### @Expressions: array of expressions |
| 761 | ### OUT: $SQLClause: a SQL clause |
| 762 | my ($Type,@Expressions) = @_; |
| 763 | my ($SQLClause,$Separator,$Statement); |
| 764 | # set separator ('AND' or ',') |
| 765 | if ($Type eq 'where' or $Type eq 'having') { |
| 766 | $Separator = 'AND'; |
| 767 | } else { |
| 768 | $Separator = ','; |
| 769 | } |
| 770 | # set statement |
| 771 | if ($Type eq 'where') { |
| 772 | $Statement = 'WHERE'; |
| 773 | } elsif ($Type eq 'order') { |
| 774 | $Statement = 'ORDER BY'; |
| 775 | } elsif ($Type eq 'having') { |
| 776 | $Statement = 'HAVING'; |
| 777 | } else { |
| 778 | $Statement = 'GROUP BY'; |
| 779 | } |
| 780 | # build query from expressions with separators |
| 781 | foreach my $Expression (@Expressions) { |
| 782 | if ($Expression) { |
| 783 | $SQLClause .= " $Separator " if ($SQLClause); |
| 784 | $SQLClause .= $Expression; |
| 785 | } |
| 786 | } |
| 787 | # add statement in front if not already present |
| 788 | $SQLClause = " $Statement " . $SQLClause |
| 789 | if ($SQLClause and $SQLClause !~ /$Statement/); |
| 790 | return $SQLClause; |
| 791 | }; |
| 792 | |
| 793 | #####--------------------------- Verifications ----------------------------##### |
| 794 | |
| 795 | ################################################################################ |
| 796 | sub CheckValidNewsgroups { |
| 797 | ################################################################################ |
| 798 | ### syntax check of newgroup list |
| 799 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
| 800 | ### OUT: boolean |
| 801 | my ($Newsgroups) = @_; |
| 802 | my $InvalidCharRegExp = ',; '; |
| 803 | return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; |
| 804 | }; |
| 805 | |
| 806 | |
| 807 | #####------------------------------- done ---------------------------------##### |
| 808 | 1; |