Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
edd250f2 | 5 | # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net> |
741336c2 TH |
6 | # |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
741336c2 | 19 | $MyVersion |
f6d15ca7 | 20 | $PackageVersion |
edd250f2 TH |
21 | $FullPath |
22 | $HomePath | |
23 | ShowVersion | |
24 | ShowPOD | |
741336c2 TH |
25 | ReadConfig |
26 | OverrideConfig | |
27 | InitDB | |
edd250f2 | 28 | Bleat |
741336c2 TH |
29 | ); |
30 | @EXPORT_OK = qw( | |
31 | GetTimePeriod | |
32 | LastMonth | |
741336c2 TH |
33 | SplitPeriod |
34 | ListMonth | |
35 | ListNewsgroups | |
ad609792 | 36 | ReadGroupList |
741336c2 TH |
37 | OutputData |
38 | FormatOutput | |
39 | SQLHierarchies | |
edd250f2 | 40 | SQLSortOrder |
741336c2 | 41 | SQLGroupList |
edd250f2 TH |
42 | SQLSetBounds |
43 | SQLBuildClause | |
d3b6810d | 44 | GetMaxLength |
741336c2 | 45 | ); |
edd250f2 TH |
46 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
47 | ListMonth)], | |
741336c2 | 48 | Output => [qw(OutputData FormatOutput)], |
edd250f2 TH |
49 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
50 | SQLSetBounds SQLBuildClause GetMaxLength)]); | |
4bbd46d3 TH |
51 | $VERSION = '0.01'; |
52 | our $PackageVersion = '0.01'; | |
741336c2 TH |
53 | |
54 | use Data::Dumper; | |
55 | use File::Basename; | |
741336c2 TH |
56 | |
57 | use Config::Auto; | |
58 | use DBI; | |
59 | ||
60 | #####-------------------------------- Vars --------------------------------##### | |
61 | ||
edd250f2 TH |
62 | # trim the path |
63 | our $FullPath = $0; | |
64 | our $HomePath = dirname($0); | |
65 | $0 =~ s%.*/%%; | |
66 | # set version string | |
67 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; | |
741336c2 TH |
68 | |
69 | #####------------------------------- Basics -------------------------------##### | |
70 | ||
741336c2 TH |
71 | ################################################################################ |
72 | ||
73 | ################################################################################ | |
74 | sub ShowVersion { | |
75 | ################################################################################ | |
76 | ### display version and exit | |
edd250f2 TH |
77 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
78 | print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n"; | |
79 | print "This program is free software; you may redistribute it ". | |
80 | "and/or modify it under the same terms as Perl itself.\n"; | |
741336c2 TH |
81 | exit(100); |
82 | }; | |
83 | ################################################################################ | |
84 | ||
85 | ################################################################################ | |
86 | sub ShowPOD { | |
87 | ################################################################################ | |
88 | ### feed myself to perldoc and exit | |
edd250f2 | 89 | exec('perldoc', $FullPath); |
741336c2 TH |
90 | exit(100); |
91 | }; | |
92 | ################################################################################ | |
93 | ||
94 | ################################################################################ | |
95 | sub ReadConfig { | |
96 | ################################################################################ | |
97 | ### read config via Config::Auto | |
98 | ### IN : $ConfFile: config filename | |
99 | ### OUT: reference to a hash containing the configuration | |
100 | my ($ConfFile) = @_; | |
101 | return Config::Auto::parse($ConfFile, format => 'equal'); | |
102 | }; | |
103 | ################################################################################ | |
104 | ||
105 | ################################################################################ | |
edd250f2 | 106 | sub OverrideConfig { |
741336c2 TH |
107 | ################################################################################ |
108 | ### override configuration values | |
109 | ### IN : $ConfigR : reference to configuration hash | |
110 | ### $OverrideR: reference to a hash containing overrides | |
111 | my ($ConfigR,$OverrideR) = @_; | |
112 | my %Override = %$OverrideR; | |
3430c898 | 113 | # Config hash empty? |
edd250f2 TH |
114 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
115 | if ( keys %$ConfigR < 1); | |
3430c898 | 116 | # return if no overrides |
741336c2 TH |
117 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
118 | foreach my $Key (keys %Override) { | |
119 | $$ConfigR{$Key} = $Override{$Key}; | |
120 | }; | |
121 | }; | |
122 | ################################################################################ | |
123 | ||
124 | ################################################################################ | |
125 | sub InitDB { | |
126 | ################################################################################ | |
127 | ### initialise database connection | |
128 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 129 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
130 | ### OUT: DBHandle |
131 | my ($ConfigR,$Die) = @_; | |
132 | my %Conf = %$ConfigR; | |
edd250f2 TH |
133 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
134 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, | |
135 | $Conf{'DBHost'}), $Conf{'DBUser'}, | |
136 | $Conf{'DBPw'}, { PrintError => 0 }); | |
741336c2 | 137 | if (!$DBHandle) { |
edd250f2 TH |
138 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
139 | &Bleat(1,$DBI::errstr); | |
741336c2 TH |
140 | }; |
141 | return $DBHandle; | |
142 | }; | |
143 | ################################################################################ | |
144 | ||
edd250f2 TH |
145 | ################################################################################ |
146 | sub Bleat { | |
147 | ################################################################################ | |
148 | ### print warning or error messages and terminate in case of error | |
149 | ### IN : $Level : 1 = warning, 2 = error | |
150 | ### $Message: warning or error message | |
151 | my ($Level,$Message) = @_; | |
152 | if ($Level == 1) { | |
153 | warn "$0 W: $Message\n" | |
154 | } elsif ($Level == 2) { | |
155 | die "$0 E: $Message\n" | |
156 | } else { | |
157 | print "$0: $Message\n" | |
158 | } | |
159 | }; | |
160 | ################################################################################ | |
161 | ||
741336c2 TH |
162 | #####------------------------------ GetStats ------------------------------##### |
163 | ||
164 | ################################################################################ | |
165 | sub ListNewsgroups { | |
166 | ################################################################################ | |
3430c898 TH |
167 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
168 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
169 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 | 170 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
89db2f90 | 171 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
ad609792 TH |
172 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
173 | ### as keys | |
174 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
89db2f90 | 175 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
ad609792 | 176 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
741336c2 TH |
177 | my %Newsgroups; |
178 | chomp($Newsgroups); | |
179 | # remove whitespace from contents of Newsgroups: | |
180 | $Newsgroups =~ s/\s//; | |
181 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
182 | for (split /,/, $Newsgroups) { | |
89db2f90 TH |
183 | # don't count newsgroup/hierarchy in wrong TLH |
184 | next if($TLH and !/^$TLH/); | |
ad609792 TH |
185 | # don't count invalid newsgroups |
186 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
edd250f2 | 187 | &Bleat(1,sprintf("DROPPED: %s",$_)); |
ad609792 TH |
188 | next; |
189 | } | |
741336c2 TH |
190 | # add original newsgroup to %Newsgroups |
191 | $Newsgroups{$_} = 1; | |
192 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
193 | # i.e. de.alt.ALL and de.ALL | |
194 | foreach (ParseHierarchies($_)) { | |
195 | $Newsgroups{$_.'.ALL'} = 1; | |
196 | } | |
197 | }; | |
198 | return %Newsgroups; | |
199 | }; | |
200 | ||
201 | ################################################################################ | |
202 | sub ParseHierarchies { | |
203 | ################################################################################ | |
3430c898 TH |
204 | ### return a list of all hierarchy levels a newsgroup belongs to |
205 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
206 | ### IN : $Newsgroup : a newsgroup name |
207 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
208 | my ($Newsgroup) = @_; | |
209 | my @Hierarchies; | |
210 | # strip trailing dots | |
211 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
212 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
213 | # i.e. de.alt.test: "de.alt" and "de" | |
214 | while ($Newsgroup =~ /\./) { | |
215 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
216 | push @Hierarchies, $Newsgroup; | |
217 | }; | |
218 | return @Hierarchies; | |
219 | }; | |
220 | ||
ad609792 TH |
221 | ################################################################################ |
222 | sub ReadGroupList { | |
223 | ################################################################################ | |
224 | ### read a list of valid newsgroups from file (each group on one line, | |
225 | ### ignoring everything after the first whitespace and so accepting files | |
226 | ### in checkgroups format as well as (parts of) an INN active file) | |
227 | ### IN : $Filename : file to read | |
228 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
229 | my ($Filename) = @_; | |
230 | my %ValidGroups; | |
edd250f2 | 231 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
ad609792 TH |
232 | while (<$LIST>) { |
233 | s/^(\S+).*$/$1/; | |
234 | chomp; | |
235 | $ValidGroups{$_} = '1'; | |
236 | }; | |
237 | close $LIST; | |
238 | return \%ValidGroups; | |
239 | }; | |
240 | ||
741336c2 TH |
241 | ################################################################################ |
242 | ||
243 | #####----------------------------- TimePeriods ----------------------------##### | |
244 | ||
245 | ################################################################################ | |
246 | sub GetTimePeriod { | |
247 | ################################################################################ | |
edd250f2 TH |
248 | ### get a time period to act on from --month option; |
249 | ### if empty, default to last month | |
250 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' | |
251 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause | |
252 | ### of the chosen time period | |
253 | my ($Month) = @_; | |
254 | # define result variables | |
255 | my ($Verbal, $SQL); | |
256 | # define a regular expression for a month | |
257 | my $REMonth = '\d{4}-\d{2}'; | |
258 | ||
259 | # default to last month if option is not set | |
260 | if(!$Month) { | |
261 | $Month = &LastMonth; | |
262 | } | |
263 | ||
264 | # check for valid input | |
265 | if ($Month =~ /^$REMonth$/) { | |
266 | # single month (YYYY-MM) | |
267 | ($Month) = &CheckMonth($Month); | |
268 | $Verbal = $Month; | |
269 | $SQL = sprintf("month = '%s'",$Month); | |
270 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { | |
271 | # time period (YYYY-MM:YYYY-MM) | |
272 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); | |
273 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); | |
274 | } elsif ($Month =~ /^all$/i) { | |
275 | # special case: ALL | |
276 | $Verbal = 'all time'; | |
277 | $SQL = ''; | |
741336c2 | 278 | } else { |
edd250f2 TH |
279 | # invalid input |
280 | return (undef,undef); | |
281 | } | |
282 | ||
283 | return ($Verbal,$SQL); | |
741336c2 TH |
284 | }; |
285 | ||
286 | ################################################################################ | |
287 | sub LastMonth { | |
288 | ################################################################################ | |
3430c898 | 289 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
290 | ### OUT: last month as YYYY-MM |
291 | # get today's date | |
292 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
293 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
294 | $Year += 1900; | |
295 | if ($Month < 1) { | |
296 | $Month = 12; | |
297 | $Year--; | |
298 | }; | |
299 | # return last month | |
300 | return sprintf('%4d-%02d',$Year,$Month); | |
301 | }; | |
302 | ||
303 | ################################################################################ | |
304 | sub CheckMonth { | |
305 | ################################################################################ | |
edd250f2 TH |
306 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
307 | ### otherwise, fix it | |
308 | ### IN : @Month: array of month | |
309 | ### OUT: @Month: a valid month | |
310 | my (@Month) = @_; | |
311 | foreach my $Month (@Month) { | |
312 | my ($OldMonth) = $Month; | |
313 | my ($CalMonth) = substr ($Month, -2); | |
314 | if ($CalMonth < 1 or $CalMonth > 12) { | |
315 | $CalMonth = '12' if $CalMonth > 12; | |
316 | $CalMonth = '01' if $CalMonth < 1; | |
317 | substr($Month, -2) = $CalMonth; | |
318 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". | |
319 | "and '12'), set to '%s'.",$OldMonth,$Month)); | |
320 | } | |
321 | } | |
322 | return @Month; | |
741336c2 TH |
323 | }; |
324 | ||
325 | ################################################################################ | |
326 | sub SplitPeriod { | |
327 | ################################################################################ | |
3430c898 | 328 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 | 329 | ### IN : $Period: time period |
edd250f2 | 330 | ### OUT: $StartMonth, $EndMonth |
741336c2 | 331 | my ($Period) = @_; |
741336c2 | 332 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
edd250f2 | 333 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
741336c2 TH |
334 | # switch parameters as necessary |
335 | if ($EndMonth gt $StartMonth) { | |
336 | return ($StartMonth, $EndMonth); | |
337 | } else { | |
338 | return ($EndMonth, $StartMonth); | |
339 | }; | |
340 | }; | |
341 | ||
342 | ################################################################################ | |
343 | sub ListMonth { | |
344 | ################################################################################ | |
3430c898 | 345 | ### return a list of months (YYYY-MM) between start and end month |
880c3eb2 TH |
346 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
347 | ### OUT: @Months: array containing all months from $MonthExpression enumerated | |
348 | my ($MonthExpression )= @_; | |
349 | # return if single month | |
350 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); | |
351 | # parse $MonthExpression | |
352 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); | |
741336c2 TH |
353 | # set $Year, $Month from $StartMonth |
354 | my ($Year, $Month) = split /-/, $StartMonth; | |
355 | # define @Months | |
356 | my (@Months); | |
357 | until ("$Year-$Month" gt $EndMonth) { | |
358 | push @Months, "$Year-$Month"; | |
359 | $Month = "$Month"; # force string context | |
360 | $Month++; | |
361 | if ($Month > 12) { | |
362 | $Month = '01'; | |
363 | $Year++; | |
364 | }; | |
365 | }; | |
366 | return @Months; | |
367 | }; | |
368 | ||
369 | #####---------------------------- OutputFormats ---------------------------##### | |
370 | ||
371 | ################################################################################ | |
372 | sub OutputData { | |
373 | ################################################################################ | |
3430c898 | 374 | ### read database query results from DBHandle and print results with formatting |
edd250f2 TH |
375 | ### IN : $Format : format specifier |
376 | ### $Comments : print or suppress all comments for machine-readable output | |
377 | ### $GroupBy : primary sorting order (month or key) | |
378 | ### $Precision: number of digits right of decimal point (0 or 2) | |
379 | ### $ValidKeys: reference to a hash containing all valid keys | |
380 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM | |
381 | ### $DBQuery : database query handle with executed query, | |
78389b28 | 382 | ### containing $Month, $Key, $Value |
edd250f2 TH |
383 | ### $PadGroup : padding length for key field (optional) for 'pretty' |
384 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, | |
385 | $DBQuery, $PadGroup) = @_; | |
386 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; | |
387 | my ($FileName, $Handle, $OUT); | |
78389b28 | 388 | our $LastIteration; |
edd250f2 TH |
389 | |
390 | # define output types | |
391 | my %LegalOutput; | |
392 | @LegalOutput{('dump',,'list','pretty')} = (); | |
393 | # bail out if format is unknown | |
394 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); | |
395 | ||
741336c2 | 396 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
edd250f2 TH |
397 | # don't display invalid keys |
398 | if(%ValidKeys and !defined($ValidKeys{$Key})) { | |
399 | # FIXME | |
400 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); | |
401 | next; | |
402 | }; | |
403 | # care for correct sorting order and abstract from month and keys: | |
404 | # $Caption will be $Month or $Key, according to sorting order, | |
405 | # and $Key will be $Key or $Month, respectively | |
406 | my $Caption; | |
407 | if ($GroupBy eq 'key') { | |
408 | $Caption = $Key; | |
409 | $Key = $Month; | |
410 | } else { | |
411 | $Caption = $Month; | |
412 | } | |
78389b28 | 413 | # set output file handle |
edd250f2 | 414 | if (!$FileTempl) { |
78389b28 | 415 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
edd250f2 | 416 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
78389b28 | 417 | close $OUT if ($LastIteration); |
edd250f2 TH |
418 | # safeguards for filename creation: |
419 | # replace potential problem characters with '_' | |
420 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); | |
421 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; | |
422 | open ($OUT,">$FileName") | |
423 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", | |
424 | $FileName)); | |
78389b28 TH |
425 | $Handle = $OUT; |
426 | }; | |
edd250f2 TH |
427 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
428 | $Precision, $PadGroup); | |
429 | $LastIteration = $Caption; | |
741336c2 | 430 | }; |
edd250f2 | 431 | close $OUT if ($FileTempl); |
741336c2 TH |
432 | }; |
433 | ||
434 | ################################################################################ | |
435 | sub FormatOutput { | |
436 | ################################################################################ | |
3430c898 | 437 | ### format information for output according to format specifier |
edd250f2 TH |
438 | ### IN : $Format : format specifier |
439 | ### $Comments : print or suppress all comments for machine-readable output | |
440 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order | |
441 | ### $Key : newsgroup, client, ... or $Month, as above | |
442 | ### $Value : number of postings with that attribute | |
443 | ### $Precision: number of digits right of decimal point (0 or 2) | |
444 | ### $PadGroup : padding length for key field (optional) for 'pretty' | |
741336c2 | 445 | ### OUT: $Output: formatted output |
edd250f2 | 446 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; |
741336c2 | 447 | my ($Output); |
edd250f2 | 448 | # keep last caption in mind |
741336c2 | 449 | our ($LastIteration); |
edd250f2 | 450 | # create one line of output |
741336c2 | 451 | if ($Format eq 'dump') { |
edd250f2 TH |
452 | # output as dump (key value) |
453 | $Output = sprintf ("# %s:\n",$Caption) | |
454 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
455 | $Output .= sprintf ("%s %u\n",$Key,$Value); | |
741336c2 | 456 | } elsif ($Format eq 'list') { |
edd250f2 TH |
457 | # output as list (caption key value) |
458 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); | |
741336c2 | 459 | } elsif ($Format eq 'pretty') { |
edd250f2 TH |
460 | # output as a table |
461 | $Output = sprintf ("# ----- %s:\n",$Caption) | |
462 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
463 | $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : | |
464 | "%s %.*f\n",$Key,$Precision,$Value); | |
741336c2 TH |
465 | }; |
466 | return $Output; | |
467 | }; | |
468 | ||
469 | #####------------------------- QueryModifications -------------------------##### | |
470 | ||
471 | ################################################################################ | |
472 | sub SQLHierarchies { | |
473 | ################################################################################ | |
3430c898 TH |
474 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
475 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is | |
476 | ### true, accordingly) | |
741336c2 TH |
477 | ### IN : $ShowHierarchies: boolean value |
478 | ### OUT: SQL code | |
479 | my ($ShowHierarchies) = @_; | |
edd250f2 | 480 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
741336c2 TH |
481 | }; |
482 | ||
483 | ################################################################################ | |
d3b6810d | 484 | sub GetMaxLength { |
741336c2 | 485 | ################################################################################ |
3430c898 | 486 | ### get length of longest field in future query result |
edd250f2 TH |
487 | ### IN : $DBHandle : database handel |
488 | ### $Table : table to query | |
489 | ### $Field : field to check | |
490 | ### $WhereClause : WHERE clause | |
491 | ### $HavingClause: HAVING clause | |
492 | ### @BindVars : bind variables for WHERE clause | |
741336c2 | 493 | ### OUT: $Length: length of longest instnace of $Field |
edd250f2 TH |
494 | my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; |
495 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". | |
496 | "FROM %s %s %s",$Field,$Table, | |
497 | $WhereClause,$HavingClause ? | |
498 | 'GROUP BY newsgroup' . $HavingClause . | |
499 | ' ORDER BY LENGTH(newsgroup) '. | |
500 | 'DESC LIMIT 1': '')); | |
501 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". | |
502 | "for '%s' from table '%s': ". | |
503 | "$DBI::errstr",$Field,$Table)); | |
741336c2 TH |
504 | my ($Length) = $DBQuery->fetchrow_array; |
505 | return $Length; | |
506 | }; | |
507 | ||
edd250f2 TH |
508 | ################################################################################ |
509 | sub SQLSortOrder { | |
510 | ################################################################################ | |
511 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and | |
512 | ### $OptOrderBy (secondary sorting), both ascending or descending; | |
513 | ### descending sorting order is done by adding '-desc' | |
514 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' | |
515 | ### $OrderBy: secondary sort by month/newsgroups (default) | |
516 | ### or number of 'postings' | |
517 | ### OUT: a SQL ORDER BY clause | |
518 | my ($GroupBy,$OrderBy) = @_; | |
519 | my ($GroupSort,$OrderSort) = ('',''); | |
520 | # $GroupBy (primary sorting) | |
521 | if (!$GroupBy) { | |
522 | $GroupBy = 'month'; | |
523 | } else { | |
524 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); | |
525 | if ($GroupBy =~ /group/i) { | |
526 | $GroupBy = 'newsgroup'; | |
527 | } else { | |
528 | $GroupBy = 'month'; | |
529 | } | |
530 | } | |
531 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; | |
532 | # $OrderBy (secondary sorting) | |
533 | if (!$OrderBy) { | |
534 | $OrderBy = $Secondary; | |
535 | } else { | |
536 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); | |
537 | if ($OrderBy =~ /posting/i) { | |
538 | $OrderBy = "postings $OrderSort, $Secondary"; | |
539 | } else { | |
540 | $OrderBy = "$Secondary $OrderSort"; | |
541 | } | |
542 | } | |
543 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); | |
544 | }; | |
545 | ||
546 | ################################################################################ | |
547 | sub SQLParseOrder { | |
548 | ################################################################################ | |
549 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. | |
550 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' | |
551 | ### IN : $OrderOption: order option (see above) | |
552 | ### OUT: parameter to sort by, | |
553 | ### sort order ('DESC' or nothing, meaning 'ASC') | |
554 | my ($OrderOption) = @_; | |
555 | my $SortOrder = ''; | |
556 | if ($OrderOption =~ s/-?desc$//i) { | |
557 | $SortOrder = 'DESC'; | |
558 | } else { | |
559 | $OrderOption =~ s/-?asc$//i | |
560 | } | |
561 | return ($OrderOption,$SortOrder); | |
562 | }; | |
563 | ||
741336c2 TH |
564 | ################################################################################ |
565 | sub SQLGroupList { | |
566 | ################################################################################ | |
edd250f2 TH |
567 | ### explode list of newsgroups separated by : (with wildcards) |
568 | ### to a SQL 'WHERE' expression | |
741336c2 | 569 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
edd250f2 TH |
570 | ### OUT: SQL code to become part of a 'WHERE' clause, |
571 | ### list of newsgroups for SQL bindings | |
741336c2 | 572 | my ($Newsgroups) = @_; |
edd250f2 | 573 | # substitute '*' wildcard with SQL wildcard character '%' |
741336c2 | 574 | $Newsgroups =~ s/\*/%/g; |
edd250f2 TH |
575 | # just one newsgroup? |
576 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; | |
577 | # list of newsgroups separated by ':' | |
741336c2 TH |
578 | my $SQL = '('; |
579 | my @GroupList = split /:/, $Newsgroups; | |
580 | foreach (@GroupList) { | |
581 | $SQL .= ' OR ' if $SQL gt '('; | |
edd250f2 | 582 | $SQL .= SQLGroupWildcard($_); |
741336c2 TH |
583 | }; |
584 | $SQL .= ')'; | |
585 | return ($SQL,@GroupList); | |
586 | }; | |
587 | ||
edd250f2 TH |
588 | ################################################################################ |
589 | sub SQLGroupWildcard { | |
590 | ################################################################################ | |
591 | ### build a valid SQL 'WHERE' expression with or without wildcards | |
592 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard | |
593 | ### (group.name or group.name.%) | |
594 | ### OUT: SQL code to become part of a 'WHERE' clause | |
595 | my ($Newsgroup) = @_; | |
596 | # FIXME: check for validity | |
597 | if ($Newsgroup !~ /%/) { | |
598 | return 'newsgroup = ?'; | |
599 | } else { | |
600 | return 'newsgroup LIKE ?'; | |
601 | } | |
602 | }; | |
603 | ||
604 | ################################################################################ | |
605 | sub SQLSetBounds { | |
606 | ################################################################################ | |
607 | ### set upper and/or lower boundary (number of postings) | |
608 | ### IN : $Type: 'level', 'average', 'sum' or 'default' | |
609 | ### $LowBound,$UppBound: lower/upper boundary, respectively | |
610 | ### OUT: SQL code to become part of a WHERE or HAVING clause | |
611 | my ($Type,$LowBound,$UppBound) = @_; | |
612 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); | |
613 | if($LowBound and $UppBound and $LowBound > $UppBound) { | |
614 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". | |
615 | "$UppBound, exchanging boundaries."); | |
616 | ($LowBound,$UppBound) = ($UppBound,$LowBound); | |
617 | } | |
618 | # default to 'default' | |
619 | my $WhereHavingFunction = 'postings'; | |
620 | # set $LowBound to SQL statement: | |
621 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' | |
622 | if ($Type eq 'level') { | |
623 | $WhereHavingFunction = 'MIN(postings)' | |
624 | } elsif ($Type eq 'average') { | |
625 | $WhereHavingFunction = 'AVG(postings)' | |
626 | } elsif ($Type eq 'sum') { | |
627 | $WhereHavingFunction = 'SUM(postings)' | |
628 | } | |
629 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); | |
630 | # set $LowBound to SQL statement: | |
631 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' | |
632 | if ($Type eq 'level') { | |
633 | $WhereHavingFunction = 'MAX(postings)' | |
634 | } elsif ($Type eq 'average') { | |
635 | $WhereHavingFunction = 'AVG(postings)' | |
636 | } elsif ($Type eq 'sum') { | |
637 | $WhereHavingFunction = 'SUM(postings)' | |
638 | } | |
639 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); | |
640 | return ($LowBound,$UppBound); | |
641 | }; | |
642 | ||
643 | ################################################################################ | |
644 | sub SQLCheckNumber { | |
645 | ################################################################################ | |
646 | ### check if input is a valid positive integer; otherwise, make it one | |
647 | ### IN : @Numbers: array of parameters | |
648 | ### OUT: @Numbers: a valid positive integer | |
649 | my (@Numbers) = @_; | |
650 | foreach my $Number (@Numbers) { | |
651 | if ($Number and $Number < 0) { | |
652 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); | |
653 | $Number = -$Number; | |
654 | } | |
655 | $Number = '' if ($Number and $Number !~ /^\d+$/); | |
656 | } | |
657 | return @Numbers; | |
658 | }; | |
659 | ||
660 | ################################################################################ | |
661 | sub SQLBuildClause { | |
662 | ################################################################################ | |
663 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause | |
664 | ### from multiple expressions which *may* be empty | |
665 | ### IN : $Type: 'where', 'having', 'group' or 'order' | |
666 | ### @Expressions: array of expressions | |
667 | ### OUT: $SQLClause: a SQL clause | |
668 | my ($Type,@Expressions) = @_; | |
669 | my ($SQLClause,$Separator,$Statement); | |
670 | # set separator ('AND' or ',') | |
671 | if ($Type eq 'where' or $Type eq 'having') { | |
672 | $Separator = 'AND'; | |
673 | } else { | |
674 | $Separator = ','; | |
675 | } | |
676 | # set statement | |
677 | if ($Type eq 'where') { | |
678 | $Statement = 'WHERE'; | |
679 | } elsif ($Type eq 'order') { | |
680 | $Statement = 'ORDER BY'; | |
681 | } elsif ($Type eq 'having') { | |
682 | $Statement = 'HAVING'; | |
683 | } else { | |
684 | $Statement = 'GROUP BY'; | |
685 | } | |
686 | # build query from expressions with separators | |
687 | foreach my $Expression (@Expressions) { | |
688 | if ($Expression) { | |
689 | $SQLClause .= " $Separator " if ($SQLClause); | |
690 | $SQLClause .= $Expression; | |
691 | } | |
692 | } | |
693 | # add statement in front if not already present | |
694 | $SQLClause = " $Statement " . $SQLClause | |
695 | if ($SQLClause and $SQLClause !~ /$Statement/); | |
696 | return $SQLClause; | |
697 | }; | |
698 | ||
699 | ||
741336c2 TH |
700 | #####------------------------------- done ---------------------------------##### |
701 | 1; | |
702 | ||
703 |