| 1 | # NewsStats.pm |
| 2 | # |
| 3 | # Library functions for the NewsStats package. |
| 4 | # |
| 5 | # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net> |
| 6 | # |
| 7 | # This module can be redistributed and/or modified under the same terms under |
| 8 | # which Perl itself is published. |
| 9 | |
| 10 | package NewsStats; |
| 11 | |
| 12 | use strict; |
| 13 | use warnings; |
| 14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
| 15 | |
| 16 | require Exporter; |
| 17 | @ISA = qw(Exporter); |
| 18 | @EXPORT = qw( |
| 19 | $MyVersion |
| 20 | $PackageVersion |
| 21 | $FullPath |
| 22 | $HomePath |
| 23 | ShowVersion |
| 24 | ShowPOD |
| 25 | ReadConfig |
| 26 | OverrideConfig |
| 27 | InitDB |
| 28 | Bleat |
| 29 | ); |
| 30 | @EXPORT_OK = qw( |
| 31 | GetTimePeriod |
| 32 | LastMonth |
| 33 | SplitPeriod |
| 34 | ListMonth |
| 35 | ListNewsgroups |
| 36 | ReadGroupList |
| 37 | OutputData |
| 38 | FormatOutput |
| 39 | SQLHierarchies |
| 40 | SQLSortOrder |
| 41 | SQLGroupList |
| 42 | SQLSetBounds |
| 43 | SQLBuildClause |
| 44 | GetMaxLength |
| 45 | ); |
| 46 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
| 47 | ListMonth)], |
| 48 | Output => [qw(OutputData FormatOutput)], |
| 49 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
| 50 | SQLSetBounds SQLBuildClause GetMaxLength)]); |
| 51 | $VERSION = '0.01'; |
| 52 | our $PackageVersion = '0.01'; |
| 53 | |
| 54 | use Data::Dumper; |
| 55 | use File::Basename; |
| 56 | |
| 57 | use Config::Auto; |
| 58 | use DBI; |
| 59 | |
| 60 | #####-------------------------------- Vars --------------------------------##### |
| 61 | |
| 62 | # trim the path |
| 63 | our $FullPath = $0; |
| 64 | our $HomePath = dirname($0); |
| 65 | $0 =~ s%.*/%%; |
| 66 | # set version string |
| 67 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; |
| 68 | |
| 69 | #####------------------------------- Basics -------------------------------##### |
| 70 | |
| 71 | ################################################################################ |
| 72 | |
| 73 | ################################################################################ |
| 74 | sub ShowVersion { |
| 75 | ################################################################################ |
| 76 | ### display version and exit |
| 77 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
| 78 | print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n"; |
| 79 | print "This program is free software; you may redistribute it ". |
| 80 | "and/or modify it under the same terms as Perl itself.\n"; |
| 81 | exit(100); |
| 82 | }; |
| 83 | ################################################################################ |
| 84 | |
| 85 | ################################################################################ |
| 86 | sub ShowPOD { |
| 87 | ################################################################################ |
| 88 | ### feed myself to perldoc and exit |
| 89 | exec('perldoc', $FullPath); |
| 90 | exit(100); |
| 91 | }; |
| 92 | ################################################################################ |
| 93 | |
| 94 | ################################################################################ |
| 95 | sub ReadConfig { |
| 96 | ################################################################################ |
| 97 | ### read config via Config::Auto |
| 98 | ### IN : $ConfFile: config filename |
| 99 | ### OUT: reference to a hash containing the configuration |
| 100 | my ($ConfFile) = @_; |
| 101 | return Config::Auto::parse($ConfFile, format => 'equal'); |
| 102 | }; |
| 103 | ################################################################################ |
| 104 | |
| 105 | ################################################################################ |
| 106 | sub OverrideConfig { |
| 107 | ################################################################################ |
| 108 | ### override configuration values |
| 109 | ### IN : $ConfigR : reference to configuration hash |
| 110 | ### $OverrideR: reference to a hash containing overrides |
| 111 | my ($ConfigR,$OverrideR) = @_; |
| 112 | my %Override = %$OverrideR; |
| 113 | # Config hash empty? |
| 114 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
| 115 | if ( keys %$ConfigR < 1); |
| 116 | # return if no overrides |
| 117 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
| 118 | foreach my $Key (keys %Override) { |
| 119 | $$ConfigR{$Key} = $Override{$Key}; |
| 120 | }; |
| 121 | }; |
| 122 | ################################################################################ |
| 123 | |
| 124 | ################################################################################ |
| 125 | sub InitDB { |
| 126 | ################################################################################ |
| 127 | ### initialise database connection |
| 128 | ### IN : $ConfigR: reference to configuration hash |
| 129 | ### $Die : if TRUE, die if connection fails |
| 130 | ### OUT: DBHandle |
| 131 | my ($ConfigR,$Die) = @_; |
| 132 | my %Conf = %$ConfigR; |
| 133 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
| 134 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, |
| 135 | $Conf{'DBHost'}), $Conf{'DBUser'}, |
| 136 | $Conf{'DBPw'}, { PrintError => 0 }); |
| 137 | if (!$DBHandle) { |
| 138 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
| 139 | &Bleat(1,$DBI::errstr); |
| 140 | }; |
| 141 | return $DBHandle; |
| 142 | }; |
| 143 | ################################################################################ |
| 144 | |
| 145 | ################################################################################ |
| 146 | sub Bleat { |
| 147 | ################################################################################ |
| 148 | ### print warning or error messages and terminate in case of error |
| 149 | ### IN : $Level : 1 = warning, 2 = error |
| 150 | ### $Message: warning or error message |
| 151 | my ($Level,$Message) = @_; |
| 152 | if ($Level == 1) { |
| 153 | warn "$0 W: $Message\n" |
| 154 | } elsif ($Level == 2) { |
| 155 | die "$0 E: $Message\n" |
| 156 | } else { |
| 157 | print "$0: $Message\n" |
| 158 | } |
| 159 | }; |
| 160 | ################################################################################ |
| 161 | |
| 162 | #####------------------------------ GetStats ------------------------------##### |
| 163 | |
| 164 | ################################################################################ |
| 165 | sub ListNewsgroups { |
| 166 | ################################################################################ |
| 167 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
| 168 | ### hierarchy names where every newsgroup and hierarchy appears only once: |
| 169 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin |
| 170 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
| 171 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
| 172 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
| 173 | ### as keys |
| 174 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys |
| 175 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
| 176 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
| 177 | my %Newsgroups; |
| 178 | chomp($Newsgroups); |
| 179 | # remove whitespace from contents of Newsgroups: |
| 180 | $Newsgroups =~ s/\s//; |
| 181 | # call &HierarchyCount for each newsgroup in $Newsgroups: |
| 182 | for (split /,/, $Newsgroups) { |
| 183 | # don't count newsgroup/hierarchy in wrong TLH |
| 184 | next if($TLH and !/^$TLH/); |
| 185 | # don't count invalid newsgroups |
| 186 | if(%ValidGroups and !defined($ValidGroups{$_})) { |
| 187 | warn (sprintf("DROPPED: %s\n",$_)); |
| 188 | next; |
| 189 | } |
| 190 | # add original newsgroup to %Newsgroups |
| 191 | $Newsgroups{$_} = 1; |
| 192 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', |
| 193 | # i.e. de.alt.ALL and de.ALL |
| 194 | foreach (ParseHierarchies($_)) { |
| 195 | $Newsgroups{$_.'.ALL'} = 1; |
| 196 | } |
| 197 | }; |
| 198 | return %Newsgroups; |
| 199 | }; |
| 200 | |
| 201 | ################################################################################ |
| 202 | sub ParseHierarchies { |
| 203 | ################################################################################ |
| 204 | ### return a list of all hierarchy levels a newsgroup belongs to |
| 205 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) |
| 206 | ### IN : $Newsgroup : a newsgroup name |
| 207 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to |
| 208 | my ($Newsgroup) = @_; |
| 209 | my @Hierarchies; |
| 210 | # strip trailing dots |
| 211 | $Newsgroup =~ s/(.+)\.+$/$1/; |
| 212 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies |
| 213 | # i.e. de.alt.test: "de.alt" and "de" |
| 214 | while ($Newsgroup =~ /\./) { |
| 215 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; |
| 216 | push @Hierarchies, $Newsgroup; |
| 217 | }; |
| 218 | return @Hierarchies; |
| 219 | }; |
| 220 | |
| 221 | ################################################################################ |
| 222 | sub ReadGroupList { |
| 223 | ################################################################################ |
| 224 | ### read a list of valid newsgroups from file (each group on one line, |
| 225 | ### ignoring everything after the first whitespace and so accepting files |
| 226 | ### in checkgroups format as well as (parts of) an INN active file) |
| 227 | ### IN : $Filename : file to read |
| 228 | ### OUT: \%ValidGroups: hash containing all valid newsgroups |
| 229 | my ($Filename) = @_; |
| 230 | my %ValidGroups; |
| 231 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
| 232 | while (<$LIST>) { |
| 233 | s/^\s*(\S+).*$/$1/; |
| 234 | chomp; |
| 235 | next if /^$/; |
| 236 | $ValidGroups{$_} = '1'; |
| 237 | }; |
| 238 | close $LIST; |
| 239 | return \%ValidGroups; |
| 240 | }; |
| 241 | |
| 242 | ################################################################################ |
| 243 | |
| 244 | #####----------------------------- TimePeriods ----------------------------##### |
| 245 | |
| 246 | ################################################################################ |
| 247 | sub GetTimePeriod { |
| 248 | ################################################################################ |
| 249 | ### get a time period to act on from --month option; |
| 250 | ### if empty, default to last month |
| 251 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' |
| 252 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause |
| 253 | ### of the chosen time period |
| 254 | my ($Month) = @_; |
| 255 | # define result variables |
| 256 | my ($Verbal, $SQL); |
| 257 | # define a regular expression for a month |
| 258 | my $REMonth = '\d{4}-\d{2}'; |
| 259 | |
| 260 | # default to last month if option is not set |
| 261 | if(!$Month) { |
| 262 | $Month = &LastMonth; |
| 263 | } |
| 264 | |
| 265 | # check for valid input |
| 266 | if ($Month =~ /^$REMonth$/) { |
| 267 | # single month (YYYY-MM) |
| 268 | ($Month) = &CheckMonth($Month); |
| 269 | $Verbal = $Month; |
| 270 | $SQL = sprintf("month = '%s'",$Month); |
| 271 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { |
| 272 | # time period (YYYY-MM:YYYY-MM) |
| 273 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); |
| 274 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); |
| 275 | } elsif ($Month =~ /^all$/i) { |
| 276 | # special case: ALL |
| 277 | $Verbal = 'all time'; |
| 278 | $SQL = ''; |
| 279 | } else { |
| 280 | # invalid input |
| 281 | return (undef,undef); |
| 282 | } |
| 283 | |
| 284 | return ($Verbal,$SQL); |
| 285 | }; |
| 286 | |
| 287 | ################################################################################ |
| 288 | sub LastMonth { |
| 289 | ################################################################################ |
| 290 | ### get last month from todays date in YYYY-MM format |
| 291 | ### OUT: last month as YYYY-MM |
| 292 | # get today's date |
| 293 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); |
| 294 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 |
| 295 | $Year += 1900; |
| 296 | if ($Month < 1) { |
| 297 | $Month = 12; |
| 298 | $Year--; |
| 299 | }; |
| 300 | # return last month |
| 301 | return sprintf('%4d-%02d',$Year,$Month); |
| 302 | }; |
| 303 | |
| 304 | ################################################################################ |
| 305 | sub CheckMonth { |
| 306 | ################################################################################ |
| 307 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
| 308 | ### otherwise, fix it |
| 309 | ### IN : @Month: array of month |
| 310 | ### OUT: @Month: a valid month |
| 311 | my (@Month) = @_; |
| 312 | foreach my $Month (@Month) { |
| 313 | my ($OldMonth) = $Month; |
| 314 | my ($CalMonth) = substr ($Month, -2); |
| 315 | if ($CalMonth < 1 or $CalMonth > 12) { |
| 316 | $CalMonth = '12' if $CalMonth > 12; |
| 317 | $CalMonth = '01' if $CalMonth < 1; |
| 318 | substr($Month, -2) = $CalMonth; |
| 319 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". |
| 320 | "and '12'), set to '%s'.",$OldMonth,$Month)); |
| 321 | } |
| 322 | } |
| 323 | return @Month; |
| 324 | }; |
| 325 | |
| 326 | ################################################################################ |
| 327 | sub SplitPeriod { |
| 328 | ################################################################################ |
| 329 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
| 330 | ### IN : $Period: time period |
| 331 | ### OUT: $StartMonth, $EndMonth |
| 332 | my ($Period) = @_; |
| 333 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
| 334 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
| 335 | # switch parameters as necessary |
| 336 | if ($EndMonth gt $StartMonth) { |
| 337 | return ($StartMonth, $EndMonth); |
| 338 | } else { |
| 339 | return ($EndMonth, $StartMonth); |
| 340 | }; |
| 341 | }; |
| 342 | |
| 343 | ################################################################################ |
| 344 | sub ListMonth { |
| 345 | ################################################################################ |
| 346 | ### return a list of months (YYYY-MM) between start and end month |
| 347 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
| 348 | ### OUT: @Months: array containing all months from $MonthExpression enumerated |
| 349 | my ($MonthExpression )= @_; |
| 350 | # return if single month |
| 351 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); |
| 352 | # parse $MonthExpression |
| 353 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); |
| 354 | # set $Year, $Month from $StartMonth |
| 355 | my ($Year, $Month) = split /-/, $StartMonth; |
| 356 | # define @Months |
| 357 | my (@Months); |
| 358 | until ("$Year-$Month" gt $EndMonth) { |
| 359 | push @Months, "$Year-$Month"; |
| 360 | $Month = "$Month"; # force string context |
| 361 | $Month++; |
| 362 | if ($Month > 12) { |
| 363 | $Month = '01'; |
| 364 | $Year++; |
| 365 | }; |
| 366 | }; |
| 367 | return @Months; |
| 368 | }; |
| 369 | |
| 370 | #####---------------------------- OutputFormats ---------------------------##### |
| 371 | |
| 372 | ################################################################################ |
| 373 | sub OutputData { |
| 374 | ################################################################################ |
| 375 | ### read database query results from DBHandle and print results with formatting |
| 376 | ### IN : $Format : format specifier |
| 377 | ### $Comments : print or suppress all comments for machine-readable output |
| 378 | ### $GroupBy : primary sorting order (month or key) |
| 379 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 380 | ### $ValidKeys: reference to a hash containing all valid keys |
| 381 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM |
| 382 | ### $DBQuery : database query handle with executed query, |
| 383 | ### containing $Month, $Key, $Value |
| 384 | ### $PadGroup : padding length for key field (optional) for 'pretty' |
| 385 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, |
| 386 | $DBQuery, $PadGroup) = @_; |
| 387 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; |
| 388 | my ($FileName, $Handle, $OUT); |
| 389 | our $LastIteration; |
| 390 | |
| 391 | # define output types |
| 392 | my %LegalOutput; |
| 393 | @LegalOutput{('dump',,'list','pretty')} = (); |
| 394 | # bail out if format is unknown |
| 395 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); |
| 396 | |
| 397 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
| 398 | # don't display invalid keys |
| 399 | if(%ValidKeys and !defined($ValidKeys{$Key})) { |
| 400 | # FIXME |
| 401 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); |
| 402 | next; |
| 403 | }; |
| 404 | # care for correct sorting order and abstract from month and keys: |
| 405 | # $Caption will be $Month or $Key, according to sorting order, |
| 406 | # and $Key will be $Key or $Month, respectively |
| 407 | my $Caption; |
| 408 | if ($GroupBy eq 'key') { |
| 409 | $Caption = $Key; |
| 410 | $Key = $Month; |
| 411 | } else { |
| 412 | $Caption = $Month; |
| 413 | } |
| 414 | # set output file handle |
| 415 | if (!$FileTempl) { |
| 416 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
| 417 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
| 418 | close $OUT if ($LastIteration); |
| 419 | # safeguards for filename creation: |
| 420 | # replace potential problem characters with '_' |
| 421 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); |
| 422 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; |
| 423 | open ($OUT,">$FileName") |
| 424 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", |
| 425 | $FileName)); |
| 426 | $Handle = $OUT; |
| 427 | }; |
| 428 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
| 429 | $Precision, $PadGroup); |
| 430 | $LastIteration = $Caption; |
| 431 | }; |
| 432 | close $OUT if ($FileTempl); |
| 433 | }; |
| 434 | |
| 435 | ################################################################################ |
| 436 | sub FormatOutput { |
| 437 | ################################################################################ |
| 438 | ### format information for output according to format specifier |
| 439 | ### IN : $Format : format specifier |
| 440 | ### $Comments : print or suppress all comments for machine-readable output |
| 441 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order |
| 442 | ### $Key : newsgroup, client, ... or $Month, as above |
| 443 | ### $Value : number of postings with that attribute |
| 444 | ### $Precision: number of digits right of decimal point (0 or 2) |
| 445 | ### $PadGroup : padding length for key field (optional) for 'pretty' |
| 446 | ### OUT: $Output: formatted output |
| 447 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; |
| 448 | my ($Output); |
| 449 | # keep last caption in mind |
| 450 | our ($LastIteration); |
| 451 | # create one line of output |
| 452 | if ($Format eq 'dump') { |
| 453 | # output as dump (key value) |
| 454 | $Output = sprintf ("# %s:\n",$Caption) |
| 455 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 456 | $Output .= sprintf ("%s %u\n",$Key,$Value); |
| 457 | } elsif ($Format eq 'list') { |
| 458 | # output as list (caption key value) |
| 459 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); |
| 460 | } elsif ($Format eq 'pretty') { |
| 461 | # output as a table |
| 462 | $Output = sprintf ("# ----- %s:\n",$Caption) |
| 463 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); |
| 464 | $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : |
| 465 | "%s %.*f\n",$Key,$Precision,$Value); |
| 466 | }; |
| 467 | return $Output; |
| 468 | }; |
| 469 | |
| 470 | #####------------------------- QueryModifications -------------------------##### |
| 471 | |
| 472 | ################################################################################ |
| 473 | sub SQLHierarchies { |
| 474 | ################################################################################ |
| 475 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
| 476 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is |
| 477 | ### true, accordingly) |
| 478 | ### IN : $ShowHierarchies: boolean value |
| 479 | ### OUT: SQL code |
| 480 | my ($ShowHierarchies) = @_; |
| 481 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
| 482 | }; |
| 483 | |
| 484 | ################################################################################ |
| 485 | sub GetMaxLength { |
| 486 | ################################################################################ |
| 487 | ### get length of longest field in future query result |
| 488 | ### IN : $DBHandle : database handel |
| 489 | ### $Table : table to query |
| 490 | ### $Field : field to check |
| 491 | ### $WhereClause : WHERE clause |
| 492 | ### $HavingClause: HAVING clause |
| 493 | ### @BindVars : bind variables for WHERE clause |
| 494 | ### OUT: $Length: length of longest instnace of $Field |
| 495 | my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; |
| 496 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". |
| 497 | "FROM %s %s %s",$Field,$Table, |
| 498 | $WhereClause,$HavingClause ? |
| 499 | 'GROUP BY newsgroup' . $HavingClause . |
| 500 | ' ORDER BY LENGTH(newsgroup) '. |
| 501 | 'DESC LIMIT 1': '')); |
| 502 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". |
| 503 | "for '%s' from table '%s': ". |
| 504 | "$DBI::errstr",$Field,$Table)); |
| 505 | my ($Length) = $DBQuery->fetchrow_array; |
| 506 | return $Length; |
| 507 | }; |
| 508 | |
| 509 | ################################################################################ |
| 510 | sub SQLSortOrder { |
| 511 | ################################################################################ |
| 512 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and |
| 513 | ### $OptOrderBy (secondary sorting), both ascending or descending; |
| 514 | ### descending sorting order is done by adding '-desc' |
| 515 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' |
| 516 | ### $OrderBy: secondary sort by month/newsgroups (default) |
| 517 | ### or number of 'postings' |
| 518 | ### OUT: a SQL ORDER BY clause |
| 519 | my ($GroupBy,$OrderBy) = @_; |
| 520 | my ($GroupSort,$OrderSort) = ('',''); |
| 521 | # $GroupBy (primary sorting) |
| 522 | if (!$GroupBy) { |
| 523 | $GroupBy = 'month'; |
| 524 | } else { |
| 525 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); |
| 526 | if ($GroupBy =~ /group/i) { |
| 527 | $GroupBy = 'newsgroup'; |
| 528 | } else { |
| 529 | $GroupBy = 'month'; |
| 530 | } |
| 531 | } |
| 532 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; |
| 533 | # $OrderBy (secondary sorting) |
| 534 | if (!$OrderBy) { |
| 535 | $OrderBy = $Secondary; |
| 536 | } else { |
| 537 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); |
| 538 | if ($OrderBy =~ /posting/i) { |
| 539 | $OrderBy = "postings $OrderSort, $Secondary"; |
| 540 | } else { |
| 541 | $OrderBy = "$Secondary $OrderSort"; |
| 542 | } |
| 543 | } |
| 544 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); |
| 545 | }; |
| 546 | |
| 547 | ################################################################################ |
| 548 | sub SQLParseOrder { |
| 549 | ################################################################################ |
| 550 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. |
| 551 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' |
| 552 | ### IN : $OrderOption: order option (see above) |
| 553 | ### OUT: parameter to sort by, |
| 554 | ### sort order ('DESC' or nothing, meaning 'ASC') |
| 555 | my ($OrderOption) = @_; |
| 556 | my $SortOrder = ''; |
| 557 | if ($OrderOption =~ s/-?desc$//i) { |
| 558 | $SortOrder = 'DESC'; |
| 559 | } else { |
| 560 | $OrderOption =~ s/-?asc$//i |
| 561 | } |
| 562 | return ($OrderOption,$SortOrder); |
| 563 | }; |
| 564 | |
| 565 | ################################################################################ |
| 566 | sub SQLGroupList { |
| 567 | ################################################################################ |
| 568 | ### explode list of newsgroups separated by : (with wildcards) |
| 569 | ### to a SQL 'WHERE' expression |
| 570 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
| 571 | ### OUT: SQL code to become part of a 'WHERE' clause, |
| 572 | ### list of newsgroups for SQL bindings |
| 573 | my ($Newsgroups) = @_; |
| 574 | # substitute '*' wildcard with SQL wildcard character '%' |
| 575 | $Newsgroups =~ s/\*/%/g; |
| 576 | # just one newsgroup? |
| 577 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; |
| 578 | # list of newsgroups separated by ':' |
| 579 | my $SQL = '('; |
| 580 | my @GroupList = split /:/, $Newsgroups; |
| 581 | foreach (@GroupList) { |
| 582 | $SQL .= ' OR ' if $SQL gt '('; |
| 583 | $SQL .= SQLGroupWildcard($_); |
| 584 | }; |
| 585 | $SQL .= ')'; |
| 586 | return ($SQL,@GroupList); |
| 587 | }; |
| 588 | |
| 589 | ################################################################################ |
| 590 | sub SQLGroupWildcard { |
| 591 | ################################################################################ |
| 592 | ### build a valid SQL 'WHERE' expression with or without wildcards |
| 593 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard |
| 594 | ### (group.name or group.name.%) |
| 595 | ### OUT: SQL code to become part of a 'WHERE' clause |
| 596 | my ($Newsgroup) = @_; |
| 597 | # FIXME: check for validity |
| 598 | if ($Newsgroup !~ /%/) { |
| 599 | return 'newsgroup = ?'; |
| 600 | } else { |
| 601 | return 'newsgroup LIKE ?'; |
| 602 | } |
| 603 | }; |
| 604 | |
| 605 | ################################################################################ |
| 606 | sub SQLSetBounds { |
| 607 | ################################################################################ |
| 608 | ### set upper and/or lower boundary (number of postings) |
| 609 | ### IN : $Type: 'level', 'average', 'sum' or 'default' |
| 610 | ### $LowBound,$UppBound: lower/upper boundary, respectively |
| 611 | ### OUT: SQL code to become part of a WHERE or HAVING clause |
| 612 | my ($Type,$LowBound,$UppBound) = @_; |
| 613 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); |
| 614 | if($LowBound and $UppBound and $LowBound > $UppBound) { |
| 615 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". |
| 616 | "$UppBound, exchanging boundaries."); |
| 617 | ($LowBound,$UppBound) = ($UppBound,$LowBound); |
| 618 | } |
| 619 | # default to 'default' |
| 620 | my $WhereHavingFunction = 'postings'; |
| 621 | # set $LowBound to SQL statement: |
| 622 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' |
| 623 | if ($Type eq 'level') { |
| 624 | $WhereHavingFunction = 'MIN(postings)' |
| 625 | } elsif ($Type eq 'average') { |
| 626 | $WhereHavingFunction = 'AVG(postings)' |
| 627 | } elsif ($Type eq 'sum') { |
| 628 | $WhereHavingFunction = 'SUM(postings)' |
| 629 | } |
| 630 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); |
| 631 | # set $LowBound to SQL statement: |
| 632 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' |
| 633 | if ($Type eq 'level') { |
| 634 | $WhereHavingFunction = 'MAX(postings)' |
| 635 | } elsif ($Type eq 'average') { |
| 636 | $WhereHavingFunction = 'AVG(postings)' |
| 637 | } elsif ($Type eq 'sum') { |
| 638 | $WhereHavingFunction = 'SUM(postings)' |
| 639 | } |
| 640 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); |
| 641 | return ($LowBound,$UppBound); |
| 642 | }; |
| 643 | |
| 644 | ################################################################################ |
| 645 | sub SQLCheckNumber { |
| 646 | ################################################################################ |
| 647 | ### check if input is a valid positive integer; otherwise, make it one |
| 648 | ### IN : @Numbers: array of parameters |
| 649 | ### OUT: @Numbers: a valid positive integer |
| 650 | my (@Numbers) = @_; |
| 651 | foreach my $Number (@Numbers) { |
| 652 | if ($Number and $Number < 0) { |
| 653 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); |
| 654 | $Number = -$Number; |
| 655 | } |
| 656 | $Number = '' if ($Number and $Number !~ /^\d+$/); |
| 657 | } |
| 658 | return @Numbers; |
| 659 | }; |
| 660 | |
| 661 | ################################################################################ |
| 662 | sub SQLBuildClause { |
| 663 | ################################################################################ |
| 664 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause |
| 665 | ### from multiple expressions which *may* be empty |
| 666 | ### IN : $Type: 'where', 'having', 'group' or 'order' |
| 667 | ### @Expressions: array of expressions |
| 668 | ### OUT: $SQLClause: a SQL clause |
| 669 | my ($Type,@Expressions) = @_; |
| 670 | my ($SQLClause,$Separator,$Statement); |
| 671 | # set separator ('AND' or ',') |
| 672 | if ($Type eq 'where' or $Type eq 'having') { |
| 673 | $Separator = 'AND'; |
| 674 | } else { |
| 675 | $Separator = ','; |
| 676 | } |
| 677 | # set statement |
| 678 | if ($Type eq 'where') { |
| 679 | $Statement = 'WHERE'; |
| 680 | } elsif ($Type eq 'order') { |
| 681 | $Statement = 'ORDER BY'; |
| 682 | } elsif ($Type eq 'having') { |
| 683 | $Statement = 'HAVING'; |
| 684 | } else { |
| 685 | $Statement = 'GROUP BY'; |
| 686 | } |
| 687 | # build query from expressions with separators |
| 688 | foreach my $Expression (@Expressions) { |
| 689 | if ($Expression) { |
| 690 | $SQLClause .= " $Separator " if ($SQLClause); |
| 691 | $SQLClause .= $Expression; |
| 692 | } |
| 693 | } |
| 694 | # add statement in front if not already present |
| 695 | $SQLClause = " $Statement " . $SQLClause |
| 696 | if ($SQLClause and $SQLClause !~ /$Statement/); |
| 697 | return $SQLClause; |
| 698 | }; |
| 699 | |
| 700 | |
| 701 | #####------------------------------- done ---------------------------------##### |
| 702 | 1; |
| 703 | |
| 704 | |