| 1 | # NewsStats.pm |
| 2 | # |
| 3 | # Library functions for the NewsStats package. |
| 4 | # |
| 5 | # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
| 6 | # |
| 7 | # This module can be redistributed and/or modified under the same terms under |
| 8 | # which Perl itself is published. |
| 9 | |
| 10 | package NewsStats; |
| 11 | |
| 12 | use strict; |
| 13 | use warnings; |
| 14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
| 15 | |
| 16 | require Exporter; |
| 17 | @ISA = qw(Exporter); |
| 18 | @EXPORT = qw( |
| 19 | $MyVersion |
| 20 | $PackageVersion |
| 21 | $FullPath |
| 22 | $HomePath |
| 23 | ShowVersion |
| 24 | ShowPOD |
| 25 | ReadConfig |
| 26 | OverrideConfig |
| 27 | InitDB |
| 28 | Bleat |
| 29 | ); |
| 30 | @EXPORT_OK = qw( |
| 31 | GetTimePeriod |
| 32 | LastMonth |
| 33 | SplitPeriod |
| 34 | ListMonth |
| 35 | ListNewsgroups |
| 36 | ParseHierarchies |
| 37 | ReadGroupList |
| 38 | OutputData |
| 39 | FormatOutput |
| 40 | SQLHierarchies |
| 41 | SQLSortOrder |
| 42 | SQLGroupList |
| 43 | SQLSetBounds |
| 44 | SQLBuildClause |
| 45 | GetMaxLength |
| 46 | ); |
| 47 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
| 48 | ListMonth)], |
| 49 | Output => [qw(OutputData FormatOutput)], |
| 50 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
| 51 | SQLSetBounds SQLBuildClause GetMaxLength)]); |
| 52 | $VERSION = '0.02'; |
| 53 | our $PackageVersion = '0.02'; |
| 54 | |
| 55 | use Data::Dumper; |
| 56 | use File::Basename; |
| 57 | use Cwd qw(realpath); |
| 58 | |
| 59 | use Config::Auto; |
| 60 | use DBI; |
| 61 | |
| 62 | #####-------------------------------- Vars --------------------------------##### |
| 63 | |
| 64 | # save $0 in $FullPath |
| 65 | our $FullPath = $0; |
| 66 | # strip filename and /bin or /install directory to create the $HomePath |
| 67 | our $HomePath = dirname(realpath($0)); |
| 68 | $HomePath =~ s/\/(bin|install)//; |
| 69 | # trim $0 |
| 70 | $0 =~ s%.*/%%; |
| 71 | # set version string |
| 72 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; |
| 73 | |
| 74 | #####------------------------------- Basics -------------------------------##### |
| 75 | |
| 76 | ################################################################################ |
| 77 | |
| 78 | ################################################################################ |
| 79 | sub ShowVersion { |
| 80 | ################################################################################ |
| 81 | ### display version and exit |
| 82 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
| 83 | print "Copyright (c) 2010-2013 Thomas Hochstein <thh\@inter.net>\n"; |
| 84 | print "This program is free software; you may redistribute it ". |
| 85 | "and/or modify it under the same terms as Perl itself.\n"; |
| 86 | exit(100); |
| 87 | }; |
| 88 | ################################################################################ |
| 89 | |
| 90 | ################################################################################ |
| 91 | sub ShowPOD { |
| 92 | ################################################################################ |
| 93 | ### feed myself to perldoc and exit |
| 94 | exec('perldoc', $FullPath); |
| 95 | exit(100); |
| 96 | }; |
| 97 | ################################################################################ |
| 98 | |
| 99 | ################################################################################ |
| 100 | sub ReadConfig { |
| 101 | ################################################################################ |
| 102 | ### read config via Config::Auto |
| 103 | ### IN : $ConfFile: config filename |
| 104 | ### OUT: reference to a hash containing the configuration |
| 105 | my ($ConfFile) = @_; |
| 106 | # set default |
| 107 | $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile; |
| 108 | # mandatory configuration options |
| 109 | my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', |
| 110 | 'DBTableRaw','DBTableGrps'); |
| 111 | # read config via Config::Auto |
| 112 | my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); |
| 113 | my %Conf = %{$ConfR}; |
| 114 | # check for mandatory options |
| 115 | foreach (@Mandatory) { |
| 116 | &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) |
| 117 | if (!defined($Conf{$_})); |
| 118 | } |
| 119 | # $Conf{'TLH'} is checked in gatherstats.pl |
| 120 | return $ConfR; |
| 121 | }; |
| 122 | ################################################################################ |
| 123 | |
| 124 | ################################################################################ |
| 125 | sub OverrideConfig { |
| 126 | ################################################################################ |
| 127 | ### override configuration values |
| 128 | ### IN : $ConfigR : reference to configuration hash |
| 129 | ### $OverrideR: reference to a hash containing overrides |
| 130 | my ($ConfigR,$OverrideR) = @_; |
| 131 | my %Override = %$OverrideR; |
| 132 | # Config hash empty? |
| 133 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
| 134 | if ( keys %$ConfigR < 1); |
| 135 | # return if no overrides |
| 136 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
| 137 | foreach my $Key (keys %Override) { |
| 138 | $$ConfigR{$Key} = $Override{$Key}; |
| 139 | }; |
| 140 | }; |
| 141 | ################################################################################ |
| 142 | |
| 143 | ################################################################################ |
| 144 | sub InitDB { |
| 145 | ################################################################################ |
| 146 | ### initialise database connection |
| 147 | ### IN : $ConfigR: reference to configuration hash |
| 148 | ### $Die : if TRUE, die if connection fails |
| 149 | ### OUT: DBHandle |
| 150 | my ($ConfigR,$Die) = @_; |
| 151 | my %Conf = %$ConfigR; |
| 152 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
| 153 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, |
| 154 | $Conf{'DBHost'}), $Conf{'DBUser'}, |
| 155 | $Conf{'DBPw'}, { PrintError => 0 }); |
| 156 | if (!$DBHandle) { |
| 157 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
| 158 | &Bleat(1,$DBI::errstr); |
| 159 | }; |
| 160 | return $DBHandle; |
| 161 | }; |
| 162 | ################################################################################ |
| 163 | |
| 164 | ################################################################################ |
| 165 | sub Bleat { |
| 166 | ################################################################################ |
| 167 | ### print warning or error messages and terminate in case of error |
| 168 | ### IN : $Level : 1 = warning, 2 = error |
| 169 | ### $Message: warning or error message |
| 170 | my ($Level,$Message) = @_; |
| 171 | if ($Level == 1) { |
| 172 | warn "$0 W: $Message\n" |
| 173 | } elsif ($Level == 2) { |
| 174 | die "$0 E: $Message\n" |
| 175 | } else { |
| 176 | print "$0: $Message\n" |
| 177 | } |
| 178 | }; |
| 179 | ################################################################################ |
| 180 | |
| 181 | #####------------------------------ GetStats ------------------------------##### |
| 182 | |
| 183 | ################################################################################ |
| 184 | sub ListNewsgroups { |
| 185 | ################################################################################ |
| 186 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
| 187 | ### hierarchy names where every newsgroup and hierarchy appears only once: |
| 188 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin |
| 189 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
| 190 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
| 191 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
| 192 | ### as keys |
| 193 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys |
| 194 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
| 195 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
| 196 | my %Newsgroups; |
| 197 | chomp($Newsgroups); |
| 198 | # remove whitespace from contents of Newsgroups: |
| 199 | $Newsgroups =~ s/\s//; |
| 200 | # call &HierarchyCount for each newsgroup in $Newsgroups: |
| 201 | for (split /,/, $Newsgroups) { |
| 202 | # don't count newsgroup/hierarchy in wrong TLH |
| 203 | next if($TLH and !/^$TLH/); |
| 204 | # don't count invalid newsgroups |
| 205 | if(%ValidGroups and !defined($ValidGroups{$_})) { |
| 206 | warn (sprintf("DROPPED: %s\n",$_)); |
| 207 | next; |
| 208 | } |
| 209 | # add original newsgroup to %Newsgroups |
| 210 | $Newsgroups{$_} = 1; |
| 211 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', |
| 212 | # i.e. de.alt.ALL and de.ALL |
| 213 | foreach (ParseHierarchies($_)) { |
| 214 | $Newsgroups{$_.'.ALL'} = 1; |
| 215 | } |
| 216 | }; |
| 217 | return %Newsgroups; |
| 218 | }; |
| 219 | |
| 220 | ################################################################################ |
| 221 | sub ParseHierarchies { |
| 222 | ################################################################################ |
| 223 | ### return a list of all hierarchy levels a newsgroup belongs to |
| 224 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) |
| 225 | ### IN : $Newsgroup : a newsgroup name |
| 226 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to |
| 227 | my ($Newsgroup) = @_; |
| 228 | my @Hierarchies; |
| 229 | # strip trailing dots |
| 230 | $Newsgroup =~ s/(.+)\.+$/$1/; |
| 231 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies |
| 232 | # i.e. de.alt.test: "de.alt" and "de" |
| 233 | while ($Newsgroup =~ /\./) { |
| 234 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; |
| 235 | push @Hierarchies, $Newsgroup; |
| 236 | }; |
| 237 | return @Hierarchies; |
| 238 | }; |
| 239 | |
| 240 | ################################################################################ |
| 241 | sub ReadGroupList { |
| 242 | ################################################################################ |
| 243 | ### read a list of valid newsgroups from file (each group on one line, |
| 244 | ### ignoring everything after the first whitespace and so accepting files |
| 245 | ### in checkgroups format as well as (parts of) an INN active file) |
| 246 | ### IN : $Filename : file to read |
| 247 | ### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups |
| 248 | my ($Filename) = @_; |
| 249 | my %ValidGroups; |
| 250 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
| 251 | while (<$LIST>) { |
| 252 | s/^\s*(\S+).*$/$1/; |
| 253 | chomp; |
| 254 | next if /^$/; |
| 255 | $ValidGroups{$_} = '1'; |
| 256 | }; |
| 257 | close $LIST; |
| 258 | return \%ValidGroups; |
| 259 | }; |
| 260 | |
| 261 | ################################################################################ |
| 262 | |
| 263 | #####----------------------------- TimePeriods ----------------------------##### |
| 264 | |
| 265 | ################################################################################ |
| 266 | sub GetTimePeriod { |
| 267 | ################################################################################ |
| 268 | ### get a time period to act on from --month option; |
| 269 | ### if empty, default to last month |
| 270 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' |
| 271 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause |
| 272 | ### of the chosen time period |
| 273 | my ($Month) = @_; |
| 274 | # define result variables |
| 275 | my ($Verbal, $SQL); |
| 276 | # define a regular expression for a month |
| 277 | my $REMonth = '\d{4}-\d{2}'; |
| 278 | |
| 279 | # default to last month if option is not set |
| 280 | if(!$Month) { |
| 281 | $Month = &LastMonth; |
| 282 | } |
| 283 | |
| 284 | # check for valid input |
| 285 | if ($Month =~ /^$REMonth$/) { |
| 286 | # single month (YYYY-MM) |
| 287 | ($Month) = &CheckMonth($Month); |
| 288 | $Verbal = $Month; |
| 289 | $SQL = sprintf("month = '%s'",$Month); |
| 290 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { |
| 291 | # time period (YYYY-MM:YYYY-MM) |
| 292 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); |
| 293 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); |
| 294 | } elsif ($Month =~ /^all$/i) { |
| 295 | # special case: ALL |
| 296 | $Verbal = 'all time'; |
| 297 | $SQL = ''; |
| 298 | } else { |
| 299 | # invalid input |
| 300 | return (undef,undef); |
| 301 | } |
| 302 | |
| 303 | return ($Verbal,$SQL); |
| 304 | }; |
| 305 | |
| 306 | ################################################################################ |
| 307 | sub LastMonth { |
| 308 | ################################################################################ |
| 309 | ### get last month from todays date in YYYY-MM format |
| 310 | ### OUT: last month as YYYY-MM |
| 311 | # get today's date |
| 312 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); |
| 313 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 |
| 314 | $Year += 1900; |
| 315 | if ($Month < 1) { |
| 316 | $Month = 12; |
| 317 | $Year--; |
| 318 | }; |
| 319 | # return last month |
| 320 | return sprintf('%4d-%02d',$Year,$Month); |
| 321 | }; |
| 322 | |
| 323 | ################################################################################ |
| 324 | sub CheckMonth { |
| 325 | ################################################################################ |
| 326 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
| 327 | ### otherwise, fix it |
| 328 | ### IN : @Month: array of month |
| 329 | ### OUT: @Month: a valid month |
| 330 | my (@Month) = @_; |
| 331 | foreach my $Month (@Month) { |
| 332 | my ($OldMonth) = $Month; |
| 333 | my ($CalMonth) = substr ($Month, -2); |
| 334 | if ($CalMonth < 1 or $CalMonth > 12) { |
| 335 | $CalMonth = '12' if $CalMonth > 12; |
| 336 | $CalMonth = '01' if $CalMonth < 1; |
| 337 | substr($Month, -2) = $CalMonth; |
| 338 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". |
| 339 | "and '12'), set to '%s'.",$OldMonth,$Month)); |
| 340 | } |
| 341 | } |
| 342 | return @Month; |
| 343 | }; |
| 344 | |
| 345 | ################################################################################ |
| 346 | sub SplitPeriod { |
| 347 | ################################################################################ |
| 348 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
| 349 | ### IN : $Period: time period |
| 350 | ### OUT: $StartMonth, $EndMonth |
| 351 | my ($Period) = @_; |
| 352 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
| 353 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
| 354 | # switch parameters as necessary |
| 355 | if ($EndMonth gt $StartMonth) { |
| 356 | return ($StartMonth, $EndMonth); |
| 357 | } else { |
| 358 | return ($EndMonth, $StartMonth); |
| 359 | }; |
| 360 | }; |
| 361 | |
| 362 | ################################################################################ |
| 363 | sub ListMonth { |
| 364 | ################################################################################ |
| 365 | ### return a list of months (YYYY-MM) between start and end month |
| 366 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
| 367 | ### OUT: @Months: array containing all months from $MonthExpression enumerated |
| 368 | my ($MonthExpression )= @_; |
| 369 | # return if single month |
| 370 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); |
| 371 | # parse $MonthExpression |
| 372 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); |
| 373 | # set $Year, $Month from $StartMonth |
| 374 | my ($Year, $Month) = split /-/, $StartMonth; |
| 375 | # define @Months |
| 376 | my (@Months); |
| 377 | until ("$Year-$Month" gt $EndMonth) { |
| 378 | push @Months, "$Year-$Month"; |
| 379 | $Month = "$Month"; # force string context |
| 380 | $Month++; |
| 381 | if ($Month > 12) { |
| 382 | $Month = '01'; |
| 383 | $Year++; |
| 384 | }; |
| 385 | }; |
| 386 | return @Months; |
| 387 | }; |
| 388 | |
| 389 | #####---------------------------- OutputFormats ---------------------------##### |
| 390 | |
| 391 | ################################################################################ |
| 392 | sub OutputData { |
| 393 | ################################################################################ |
| 394 | ### read database query results from DBHandle and print results with formatting |
| 395 | ### IN : $Format : format specifier |
| 396 | ### $Comments : print or suppress all comments for machine-readable output |
| 397 | ### $GroupBy : primary sorting order (month or key) |
| 398 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 399 | ### $ValidKeys: reference to a hash containing all valid keys |
| 400 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM |
| 401 | ### $DBQuery : database query handle with executed query, |
| 402 | ### containing $Month, $Key, $Value |
| 403 | ### $PadField : padding length for key field (optional) for 'pretty' |
| 404 | ### $PadValue : padding length for value field (optional) for 'pretty' |
| 405 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, |
| 406 | $DBQuery, $PadField, $PadValue) = @_; |
| 407 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; |
| 408 | my ($FileName, $Handle, $OUT); |
| 409 | our $LastIteration; |
| 410 | |
| 411 | # define output types |
| 412 | my %LegalOutput; |
| 413 | @LegalOutput{('dump','list','pretty')} = (); |
| 414 | # bail out if format is unknown |
| 415 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); |
| 416 | |
| 417 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
| 418 | # don't display invalid keys |
| 419 | if(%ValidKeys and !defined($ValidKeys{$Key})) { |
| 420 | # FIXME |
| 421 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); |
| 422 | next; |
| 423 | }; |
| 424 | # care for correct sorting order and abstract from month and keys: |
| 425 | # $Caption will be $Month or $Key, according to sorting order, |
| 426 | # and $Key will be $Key or $Month, respectively |
| 427 | my $Caption; |
| 428 | if ($GroupBy eq 'key') { |
| 429 | $Caption = $Key; |
| 430 | $Key = $Month; |
| 431 | } else { |
| 432 | $Caption = $Month; |
| 433 | } |
| 434 | # set output file handle |
| 435 | if (!$FileTempl) { |
| 436 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
| 437 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
| 438 | close $OUT if ($LastIteration); |
| 439 | # safeguards for filename creation: |
| 440 | # replace potential problem characters with '_' |
| 441 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); |
| 442 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; |
| 443 | open ($OUT,">$FileName") |
| 444 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", |
| 445 | $FileName)); |
| 446 | $Handle = $OUT; |
| 447 | }; |
| 448 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
| 449 | $Precision, $PadField, $PadValue); |
| 450 | $LastIteration = $Caption; |
| 451 | }; |
| 452 | close $OUT if ($FileTempl); |
| 453 | }; |
| 454 | |
| 455 | ################################################################################ |
| 456 | sub FormatOutput { |
| 457 | ################################################################################ |
| 458 | ### format information for output according to format specifier |
| 459 | ### IN : $Format : format specifier |
| 460 | ### $Comments : print or suppress all comments for machine-readable output |
| 461 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order |
| 462 | ### $Key : newsgroup, client, ... or $Month, as above |
| 463 | ### $Value : number of postings with that attribute |
| 464 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 465 | ### $PadField : padding length for key field (optional) for 'pretty' |
| 466 | ### $PadValue : padding length for value field (optional) for 'pretty' |
| 467 | ### OUT: $Output: formatted output |
| 468 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, |
| 469 | $PadValue) = @_; |
| 470 | my ($Output); |
| 471 | # keep last caption in mind |
| 472 | our ($LastIteration); |
| 473 | # create one line of output |
| 474 | if ($Format eq 'dump') { |
| 475 | # output as dump (key value) |
| 476 | $Output = sprintf ("# %s:\n",$Caption) |
| 477 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 478 | $Output .= sprintf ("%s %u\n",$Key,$Value); |
| 479 | } elsif ($Format eq 'list') { |
| 480 | # output as list (caption key value) |
| 481 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); |
| 482 | } elsif ($Format eq 'pretty') { |
| 483 | # output as a table |
| 484 | $Output = sprintf ("# ----- %s:\n",$Caption) |
| 485 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 486 | # increase $PadValue for numbers with decimal point |
| 487 | $PadValue += $Precision+1 if $Precision; |
| 488 | # add padding if $PadField is set; $PadValue HAS to be set then |
| 489 | $Output .= sprintf ($PadField ? |
| 490 | sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : |
| 491 | "%s%s %.*f\n",$Key,$Comments ? ':' : '', |
| 492 | $Precision,$Value); |
| 493 | }; |
| 494 | return $Output; |
| 495 | }; |
| 496 | |
| 497 | #####------------------------- QueryModifications -------------------------##### |
| 498 | |
| 499 | ################################################################################ |
| 500 | sub SQLHierarchies { |
| 501 | ################################################################################ |
| 502 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
| 503 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is |
| 504 | ### true, accordingly) |
| 505 | ### IN : $ShowHierarchies: boolean value |
| 506 | ### OUT: SQL code |
| 507 | my ($ShowHierarchies) = @_; |
| 508 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
| 509 | }; |
| 510 | |
| 511 | ################################################################################ |
| 512 | sub GetMaxLength { |
| 513 | ################################################################################ |
| 514 | ### get length of longest fields in future query result |
| 515 | ### IN : $DBHandle : database handle |
| 516 | ### $Table : table to query |
| 517 | ### $Field : field (key!, i.e. month, newsgroup, ...) to check |
| 518 | ### $Value : field (value!, i.e. postings) to check |
| 519 | ### $WhereClause : WHERE clause |
| 520 | ### $HavingClause: HAVING clause |
| 521 | ### @BindVars : bind variables for WHERE clause |
| 522 | ### OUT: $FieldLength : length of longest instance of $Field |
| 523 | ### $ValueLength : length of longest instance of $Value |
| 524 | my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; |
| 525 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". |
| 526 | "MAX(%s) ". |
| 527 | "FROM %s %s %s",$Field,,$Value, |
| 528 | $Table,$WhereClause,$HavingClause ? |
| 529 | 'GROUP BY newsgroup' . $HavingClause . |
| 530 | ' ORDER BY LENGTH(newsgroup) '. |
| 531 | 'DESC LIMIT 1': '')); |
| 532 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". |
| 533 | "for '%s' from table '%s': ". |
| 534 | "$DBI::errstr",$Field,$Table)); |
| 535 | my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; |
| 536 | my $ValueLength = length($ValueMax) if ($ValueMax); |
| 537 | return ($FieldLength,$ValueLength); |
| 538 | }; |
| 539 | |
| 540 | ################################################################################ |
| 541 | sub SQLSortOrder { |
| 542 | ################################################################################ |
| 543 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and |
| 544 | ### $OptOrderBy (secondary sorting), both ascending or descending; |
| 545 | ### descending sorting order is done by adding '-desc' |
| 546 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' |
| 547 | ### $OrderBy: secondary sort by month/newsgroups (default) |
| 548 | ### or number of 'postings' |
| 549 | ### OUT: a SQL ORDER BY clause |
| 550 | my ($GroupBy,$OrderBy) = @_; |
| 551 | my ($GroupSort,$OrderSort) = ('',''); |
| 552 | # $GroupBy (primary sorting) |
| 553 | if (!$GroupBy) { |
| 554 | $GroupBy = 'month'; |
| 555 | } else { |
| 556 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); |
| 557 | if ($GroupBy =~ /group/i) { |
| 558 | $GroupBy = 'newsgroup'; |
| 559 | } else { |
| 560 | $GroupBy = 'month'; |
| 561 | } |
| 562 | } |
| 563 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; |
| 564 | # $OrderBy (secondary sorting) |
| 565 | if (!$OrderBy) { |
| 566 | $OrderBy = $Secondary; |
| 567 | } else { |
| 568 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); |
| 569 | if ($OrderBy =~ /posting/i) { |
| 570 | $OrderBy = "postings $OrderSort, $Secondary"; |
| 571 | } else { |
| 572 | $OrderBy = "$Secondary $OrderSort"; |
| 573 | } |
| 574 | } |
| 575 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); |
| 576 | }; |
| 577 | |
| 578 | ################################################################################ |
| 579 | sub SQLParseOrder { |
| 580 | ################################################################################ |
| 581 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. |
| 582 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' |
| 583 | ### IN : $OrderOption: order option (see above) |
| 584 | ### OUT: parameter to sort by, |
| 585 | ### sort order ('DESC' or nothing, meaning 'ASC') |
| 586 | my ($OrderOption) = @_; |
| 587 | my $SortOrder = ''; |
| 588 | if ($OrderOption =~ s/-?desc$//i) { |
| 589 | $SortOrder = 'DESC'; |
| 590 | } else { |
| 591 | $OrderOption =~ s/-?asc$//i |
| 592 | } |
| 593 | return ($OrderOption,$SortOrder); |
| 594 | }; |
| 595 | |
| 596 | ################################################################################ |
| 597 | sub SQLGroupList { |
| 598 | ################################################################################ |
| 599 | ### explode list of newsgroups separated by : (with wildcards) |
| 600 | ### to a SQL 'WHERE' expression |
| 601 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
| 602 | ### OUT: SQL code to become part of a 'WHERE' clause, |
| 603 | ### list of newsgroups for SQL bindings |
| 604 | my ($Newsgroups) = @_; |
| 605 | # substitute '*' wildcard with SQL wildcard character '%' |
| 606 | $Newsgroups =~ s/\*/%/g; |
| 607 | return (undef,undef) if !CheckValidNewsgroups($Newsgroups); |
| 608 | # just one newsgroup? |
| 609 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; |
| 610 | my ($SQL,@WildcardGroups,@NoWildcardGroups); |
| 611 | # list of newsgroups separated by ':' |
| 612 | my @GroupList = split /:/, $Newsgroups; |
| 613 | foreach (@GroupList) { |
| 614 | if ($_ !~ /%/) { |
| 615 | # add to list of newsgroup names WITHOUT wildcard |
| 616 | push (@NoWildcardGroups,$_); |
| 617 | } else { |
| 618 | # add to list of newsgroup names WITH wildcard |
| 619 | push (@WildcardGroups,$_); |
| 620 | # add wildcard to SQL clause |
| 621 | # 'OR' if SQL clause is not empty |
| 622 | $SQL .= ' OR ' if $SQL; |
| 623 | $SQL .= 'newsgroup LIKE ?' |
| 624 | } |
| 625 | }; |
| 626 | if (scalar(@NoWildcardGroups)) { |
| 627 | # add 'OR' if SQL clause is not empty |
| 628 | $SQL .= ' OR ' if $SQL; |
| 629 | if (scalar(@NoWildcardGroups) < 2) { |
| 630 | # special case: just one newsgroup without wildcard |
| 631 | $SQL .= 'newsgroup = ?'; |
| 632 | } else { |
| 633 | # create list of newsgroups to include: 'newsgroup IN (...)' |
| 634 | $SQL .= 'newsgroup IN ('; |
| 635 | my $SQLin; |
| 636 | foreach (@NoWildcardGroups) { |
| 637 | $SQLin .= ',' if $SQLin; |
| 638 | $SQLin .= '?'; |
| 639 | } |
| 640 | # add list to SQL clause |
| 641 | $SQL .= $SQLin .= ')'; |
| 642 | } |
| 643 | } |
| 644 | # add brackets '()' to SQL clause as needed (more than one wildcard group) |
| 645 | if (scalar(@WildcardGroups)) { |
| 646 | $SQL = '(' . $SQL .')'; |
| 647 | } |
| 648 | # rebuild @GroupList in (now) correct order |
| 649 | @GroupList = (@WildcardGroups,@NoWildcardGroups); |
| 650 | return ($SQL,@GroupList); |
| 651 | }; |
| 652 | |
| 653 | ################################################################################ |
| 654 | sub SQLGroupWildcard { |
| 655 | ################################################################################ |
| 656 | ### build a valid SQL 'WHERE' expression with or without wildcards |
| 657 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard |
| 658 | ### (group.name or group.name.%) |
| 659 | ### OUT: SQL code to become part of a 'WHERE' clause |
| 660 | my ($Newsgroup) = @_; |
| 661 | if ($Newsgroup !~ /%/) { |
| 662 | return 'newsgroup = ?'; |
| 663 | } else { |
| 664 | return 'newsgroup LIKE ?'; |
| 665 | } |
| 666 | }; |
| 667 | |
| 668 | ################################################################################ |
| 669 | sub SQLSetBounds { |
| 670 | ################################################################################ |
| 671 | ### set upper and/or lower boundary (number of postings) |
| 672 | ### IN : $Type: 'level', 'average', 'sum' or 'default' |
| 673 | ### $LowBound,$UppBound: lower/upper boundary, respectively |
| 674 | ### OUT: SQL code to become part of a WHERE or HAVING clause |
| 675 | my ($Type,$LowBound,$UppBound) = @_; |
| 676 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); |
| 677 | if($LowBound and defined($UppBound) and $LowBound > $UppBound) { |
| 678 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". |
| 679 | "$UppBound, exchanging boundaries."); |
| 680 | ($LowBound,$UppBound) = ($UppBound,$LowBound); |
| 681 | } |
| 682 | # default to 'default' |
| 683 | my $WhereHavingFunction = 'postings'; |
| 684 | # set $LowBound to SQL statement: |
| 685 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' |
| 686 | if ($Type eq 'level') { |
| 687 | $WhereHavingFunction = 'MIN(postings)' |
| 688 | } elsif ($Type eq 'average') { |
| 689 | $WhereHavingFunction = 'AVG(postings)' |
| 690 | } elsif ($Type eq 'sum') { |
| 691 | $WhereHavingFunction = 'SUM(postings)' |
| 692 | } |
| 693 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if defined($LowBound); |
| 694 | # set $LowBound to SQL statement: |
| 695 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' |
| 696 | if ($Type eq 'level') { |
| 697 | $WhereHavingFunction = 'MAX(postings)' |
| 698 | } elsif ($Type eq 'average') { |
| 699 | $WhereHavingFunction = 'AVG(postings)' |
| 700 | } elsif ($Type eq 'sum') { |
| 701 | $WhereHavingFunction = 'SUM(postings)' |
| 702 | } |
| 703 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if defined($UppBound); |
| 704 | return ($LowBound,$UppBound); |
| 705 | }; |
| 706 | |
| 707 | ################################################################################ |
| 708 | sub SQLCheckNumber { |
| 709 | ################################################################################ |
| 710 | ### check if input is a valid positive integer; otherwise, make it one |
| 711 | ### IN : @Numbers: array of parameters |
| 712 | ### OUT: @Numbers: a valid positive integer |
| 713 | my (@Numbers) = @_; |
| 714 | foreach my $Number (@Numbers) { |
| 715 | if ($Number and $Number < 0) { |
| 716 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); |
| 717 | $Number = -$Number; |
| 718 | } |
| 719 | $Number = '' if ($Number and $Number !~ /^\d+$/); |
| 720 | } |
| 721 | return @Numbers; |
| 722 | }; |
| 723 | |
| 724 | ################################################################################ |
| 725 | sub SQLBuildClause { |
| 726 | ################################################################################ |
| 727 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause |
| 728 | ### from multiple expressions which *may* be empty |
| 729 | ### IN : $Type: 'where', 'having', 'group' or 'order' |
| 730 | ### @Expressions: array of expressions |
| 731 | ### OUT: $SQLClause: a SQL clause |
| 732 | my ($Type,@Expressions) = @_; |
| 733 | my ($SQLClause,$Separator,$Statement); |
| 734 | # set separator ('AND' or ',') |
| 735 | if ($Type eq 'where' or $Type eq 'having') { |
| 736 | $Separator = 'AND'; |
| 737 | } else { |
| 738 | $Separator = ','; |
| 739 | } |
| 740 | # set statement |
| 741 | if ($Type eq 'where') { |
| 742 | $Statement = 'WHERE'; |
| 743 | } elsif ($Type eq 'order') { |
| 744 | $Statement = 'ORDER BY'; |
| 745 | } elsif ($Type eq 'having') { |
| 746 | $Statement = 'HAVING'; |
| 747 | } else { |
| 748 | $Statement = 'GROUP BY'; |
| 749 | } |
| 750 | # build query from expressions with separators |
| 751 | foreach my $Expression (@Expressions) { |
| 752 | if ($Expression) { |
| 753 | $SQLClause .= " $Separator " if ($SQLClause); |
| 754 | $SQLClause .= $Expression; |
| 755 | } |
| 756 | } |
| 757 | # add statement in front if not already present |
| 758 | $SQLClause = " $Statement " . $SQLClause |
| 759 | if ($SQLClause and $SQLClause !~ /$Statement/); |
| 760 | return $SQLClause; |
| 761 | }; |
| 762 | |
| 763 | #####--------------------------- Verifications ----------------------------##### |
| 764 | |
| 765 | ################################################################################ |
| 766 | sub CheckValidNewsgroups { |
| 767 | ################################################################################ |
| 768 | ### syntax check of newgroup list |
| 769 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
| 770 | ### OUT: boolean |
| 771 | my ($Newsgroups) = @_; |
| 772 | my $InvalidCharRegExp = ',; '; |
| 773 | return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; |
| 774 | }; |
| 775 | |
| 776 | |
| 777 | #####------------------------------- done ---------------------------------##### |
| 778 | 1; |