Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
edd250f2 | 5 | # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net> |
741336c2 TH |
6 | # |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
741336c2 | 19 | $MyVersion |
f6d15ca7 | 20 | $PackageVersion |
edd250f2 TH |
21 | $FullPath |
22 | $HomePath | |
23 | ShowVersion | |
24 | ShowPOD | |
741336c2 TH |
25 | ReadConfig |
26 | OverrideConfig | |
27 | InitDB | |
edd250f2 | 28 | Bleat |
741336c2 TH |
29 | ); |
30 | @EXPORT_OK = qw( | |
31 | GetTimePeriod | |
32 | LastMonth | |
741336c2 TH |
33 | SplitPeriod |
34 | ListMonth | |
35 | ListNewsgroups | |
1703b8e3 | 36 | ParseHierarchies |
ad609792 | 37 | ReadGroupList |
741336c2 TH |
38 | OutputData |
39 | FormatOutput | |
40 | SQLHierarchies | |
edd250f2 | 41 | SQLSortOrder |
741336c2 | 42 | SQLGroupList |
edd250f2 TH |
43 | SQLSetBounds |
44 | SQLBuildClause | |
d3b6810d | 45 | GetMaxLength |
741336c2 | 46 | ); |
edd250f2 TH |
47 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
48 | ListMonth)], | |
741336c2 | 49 | Output => [qw(OutputData FormatOutput)], |
edd250f2 TH |
50 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
51 | SQLSetBounds SQLBuildClause GetMaxLength)]); | |
4bbd46d3 TH |
52 | $VERSION = '0.01'; |
53 | our $PackageVersion = '0.01'; | |
741336c2 TH |
54 | |
55 | use Data::Dumper; | |
56 | use File::Basename; | |
741336c2 TH |
57 | |
58 | use Config::Auto; | |
59 | use DBI; | |
60 | ||
61 | #####-------------------------------- Vars --------------------------------##### | |
62 | ||
edd250f2 TH |
63 | # trim the path |
64 | our $FullPath = $0; | |
65 | our $HomePath = dirname($0); | |
66 | $0 =~ s%.*/%%; | |
67 | # set version string | |
68 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; | |
741336c2 TH |
69 | |
70 | #####------------------------------- Basics -------------------------------##### | |
71 | ||
741336c2 TH |
72 | ################################################################################ |
73 | ||
74 | ################################################################################ | |
75 | sub ShowVersion { | |
76 | ################################################################################ | |
77 | ### display version and exit | |
edd250f2 TH |
78 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
79 | print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n"; | |
80 | print "This program is free software; you may redistribute it ". | |
81 | "and/or modify it under the same terms as Perl itself.\n"; | |
741336c2 TH |
82 | exit(100); |
83 | }; | |
84 | ################################################################################ | |
85 | ||
86 | ################################################################################ | |
87 | sub ShowPOD { | |
88 | ################################################################################ | |
89 | ### feed myself to perldoc and exit | |
edd250f2 | 90 | exec('perldoc', $FullPath); |
741336c2 TH |
91 | exit(100); |
92 | }; | |
93 | ################################################################################ | |
94 | ||
95 | ################################################################################ | |
96 | sub ReadConfig { | |
97 | ################################################################################ | |
98 | ### read config via Config::Auto | |
99 | ### IN : $ConfFile: config filename | |
100 | ### OUT: reference to a hash containing the configuration | |
101 | my ($ConfFile) = @_; | |
102 | return Config::Auto::parse($ConfFile, format => 'equal'); | |
103 | }; | |
104 | ################################################################################ | |
105 | ||
106 | ################################################################################ | |
edd250f2 | 107 | sub OverrideConfig { |
741336c2 TH |
108 | ################################################################################ |
109 | ### override configuration values | |
110 | ### IN : $ConfigR : reference to configuration hash | |
111 | ### $OverrideR: reference to a hash containing overrides | |
112 | my ($ConfigR,$OverrideR) = @_; | |
113 | my %Override = %$OverrideR; | |
3430c898 | 114 | # Config hash empty? |
edd250f2 TH |
115 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
116 | if ( keys %$ConfigR < 1); | |
3430c898 | 117 | # return if no overrides |
741336c2 TH |
118 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
119 | foreach my $Key (keys %Override) { | |
120 | $$ConfigR{$Key} = $Override{$Key}; | |
121 | }; | |
122 | }; | |
123 | ################################################################################ | |
124 | ||
125 | ################################################################################ | |
126 | sub InitDB { | |
127 | ################################################################################ | |
128 | ### initialise database connection | |
129 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 130 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
131 | ### OUT: DBHandle |
132 | my ($ConfigR,$Die) = @_; | |
133 | my %Conf = %$ConfigR; | |
edd250f2 TH |
134 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
135 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, | |
136 | $Conf{'DBHost'}), $Conf{'DBUser'}, | |
137 | $Conf{'DBPw'}, { PrintError => 0 }); | |
741336c2 | 138 | if (!$DBHandle) { |
edd250f2 TH |
139 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
140 | &Bleat(1,$DBI::errstr); | |
741336c2 TH |
141 | }; |
142 | return $DBHandle; | |
143 | }; | |
144 | ################################################################################ | |
145 | ||
edd250f2 TH |
146 | ################################################################################ |
147 | sub Bleat { | |
148 | ################################################################################ | |
149 | ### print warning or error messages and terminate in case of error | |
150 | ### IN : $Level : 1 = warning, 2 = error | |
151 | ### $Message: warning or error message | |
152 | my ($Level,$Message) = @_; | |
153 | if ($Level == 1) { | |
154 | warn "$0 W: $Message\n" | |
155 | } elsif ($Level == 2) { | |
156 | die "$0 E: $Message\n" | |
157 | } else { | |
158 | print "$0: $Message\n" | |
159 | } | |
160 | }; | |
161 | ################################################################################ | |
162 | ||
741336c2 TH |
163 | #####------------------------------ GetStats ------------------------------##### |
164 | ||
165 | ################################################################################ | |
166 | sub ListNewsgroups { | |
167 | ################################################################################ | |
3430c898 TH |
168 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
169 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
170 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 | 171 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
89db2f90 | 172 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
ad609792 TH |
173 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
174 | ### as keys | |
175 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
89db2f90 | 176 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
ad609792 | 177 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
741336c2 TH |
178 | my %Newsgroups; |
179 | chomp($Newsgroups); | |
180 | # remove whitespace from contents of Newsgroups: | |
181 | $Newsgroups =~ s/\s//; | |
182 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
183 | for (split /,/, $Newsgroups) { | |
89db2f90 TH |
184 | # don't count newsgroup/hierarchy in wrong TLH |
185 | next if($TLH and !/^$TLH/); | |
ad609792 TH |
186 | # don't count invalid newsgroups |
187 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
0dc13b39 | 188 | warn (sprintf("DROPPED: %s\n",$_)); |
ad609792 TH |
189 | next; |
190 | } | |
741336c2 TH |
191 | # add original newsgroup to %Newsgroups |
192 | $Newsgroups{$_} = 1; | |
193 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
194 | # i.e. de.alt.ALL and de.ALL | |
195 | foreach (ParseHierarchies($_)) { | |
196 | $Newsgroups{$_.'.ALL'} = 1; | |
197 | } | |
198 | }; | |
199 | return %Newsgroups; | |
200 | }; | |
201 | ||
202 | ################################################################################ | |
203 | sub ParseHierarchies { | |
204 | ################################################################################ | |
3430c898 TH |
205 | ### return a list of all hierarchy levels a newsgroup belongs to |
206 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
207 | ### IN : $Newsgroup : a newsgroup name |
208 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
209 | my ($Newsgroup) = @_; | |
210 | my @Hierarchies; | |
211 | # strip trailing dots | |
212 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
213 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
214 | # i.e. de.alt.test: "de.alt" and "de" | |
215 | while ($Newsgroup =~ /\./) { | |
216 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
217 | push @Hierarchies, $Newsgroup; | |
218 | }; | |
219 | return @Hierarchies; | |
220 | }; | |
221 | ||
ad609792 TH |
222 | ################################################################################ |
223 | sub ReadGroupList { | |
224 | ################################################################################ | |
225 | ### read a list of valid newsgroups from file (each group on one line, | |
226 | ### ignoring everything after the first whitespace and so accepting files | |
227 | ### in checkgroups format as well as (parts of) an INN active file) | |
228 | ### IN : $Filename : file to read | |
229 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
230 | my ($Filename) = @_; | |
231 | my %ValidGroups; | |
edd250f2 | 232 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
ad609792 | 233 | while (<$LIST>) { |
7662b106 | 234 | s/^\s*(\S+).*$/$1/; |
ad609792 | 235 | chomp; |
7662b106 | 236 | next if /^$/; |
ad609792 TH |
237 | $ValidGroups{$_} = '1'; |
238 | }; | |
239 | close $LIST; | |
240 | return \%ValidGroups; | |
241 | }; | |
242 | ||
741336c2 TH |
243 | ################################################################################ |
244 | ||
245 | #####----------------------------- TimePeriods ----------------------------##### | |
246 | ||
247 | ################################################################################ | |
248 | sub GetTimePeriod { | |
249 | ################################################################################ | |
edd250f2 TH |
250 | ### get a time period to act on from --month option; |
251 | ### if empty, default to last month | |
252 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' | |
253 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause | |
254 | ### of the chosen time period | |
255 | my ($Month) = @_; | |
256 | # define result variables | |
257 | my ($Verbal, $SQL); | |
258 | # define a regular expression for a month | |
259 | my $REMonth = '\d{4}-\d{2}'; | |
260 | ||
261 | # default to last month if option is not set | |
262 | if(!$Month) { | |
263 | $Month = &LastMonth; | |
264 | } | |
265 | ||
266 | # check for valid input | |
267 | if ($Month =~ /^$REMonth$/) { | |
268 | # single month (YYYY-MM) | |
269 | ($Month) = &CheckMonth($Month); | |
270 | $Verbal = $Month; | |
271 | $SQL = sprintf("month = '%s'",$Month); | |
272 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { | |
273 | # time period (YYYY-MM:YYYY-MM) | |
274 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); | |
275 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); | |
276 | } elsif ($Month =~ /^all$/i) { | |
277 | # special case: ALL | |
278 | $Verbal = 'all time'; | |
279 | $SQL = ''; | |
741336c2 | 280 | } else { |
edd250f2 TH |
281 | # invalid input |
282 | return (undef,undef); | |
283 | } | |
284 | ||
285 | return ($Verbal,$SQL); | |
741336c2 TH |
286 | }; |
287 | ||
288 | ################################################################################ | |
289 | sub LastMonth { | |
290 | ################################################################################ | |
3430c898 | 291 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
292 | ### OUT: last month as YYYY-MM |
293 | # get today's date | |
294 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
295 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
296 | $Year += 1900; | |
297 | if ($Month < 1) { | |
298 | $Month = 12; | |
299 | $Year--; | |
300 | }; | |
301 | # return last month | |
302 | return sprintf('%4d-%02d',$Year,$Month); | |
303 | }; | |
304 | ||
305 | ################################################################################ | |
306 | sub CheckMonth { | |
307 | ################################################################################ | |
edd250f2 TH |
308 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
309 | ### otherwise, fix it | |
310 | ### IN : @Month: array of month | |
311 | ### OUT: @Month: a valid month | |
312 | my (@Month) = @_; | |
313 | foreach my $Month (@Month) { | |
314 | my ($OldMonth) = $Month; | |
315 | my ($CalMonth) = substr ($Month, -2); | |
316 | if ($CalMonth < 1 or $CalMonth > 12) { | |
317 | $CalMonth = '12' if $CalMonth > 12; | |
318 | $CalMonth = '01' if $CalMonth < 1; | |
319 | substr($Month, -2) = $CalMonth; | |
320 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". | |
321 | "and '12'), set to '%s'.",$OldMonth,$Month)); | |
322 | } | |
323 | } | |
324 | return @Month; | |
741336c2 TH |
325 | }; |
326 | ||
327 | ################################################################################ | |
328 | sub SplitPeriod { | |
329 | ################################################################################ | |
3430c898 | 330 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 | 331 | ### IN : $Period: time period |
edd250f2 | 332 | ### OUT: $StartMonth, $EndMonth |
741336c2 | 333 | my ($Period) = @_; |
741336c2 | 334 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
edd250f2 | 335 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
741336c2 TH |
336 | # switch parameters as necessary |
337 | if ($EndMonth gt $StartMonth) { | |
338 | return ($StartMonth, $EndMonth); | |
339 | } else { | |
340 | return ($EndMonth, $StartMonth); | |
341 | }; | |
342 | }; | |
343 | ||
344 | ################################################################################ | |
345 | sub ListMonth { | |
346 | ################################################################################ | |
3430c898 | 347 | ### return a list of months (YYYY-MM) between start and end month |
880c3eb2 TH |
348 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
349 | ### OUT: @Months: array containing all months from $MonthExpression enumerated | |
350 | my ($MonthExpression )= @_; | |
351 | # return if single month | |
352 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); | |
353 | # parse $MonthExpression | |
354 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); | |
741336c2 TH |
355 | # set $Year, $Month from $StartMonth |
356 | my ($Year, $Month) = split /-/, $StartMonth; | |
357 | # define @Months | |
358 | my (@Months); | |
359 | until ("$Year-$Month" gt $EndMonth) { | |
360 | push @Months, "$Year-$Month"; | |
361 | $Month = "$Month"; # force string context | |
362 | $Month++; | |
363 | if ($Month > 12) { | |
364 | $Month = '01'; | |
365 | $Year++; | |
366 | }; | |
367 | }; | |
368 | return @Months; | |
369 | }; | |
370 | ||
371 | #####---------------------------- OutputFormats ---------------------------##### | |
372 | ||
373 | ################################################################################ | |
374 | sub OutputData { | |
375 | ################################################################################ | |
3430c898 | 376 | ### read database query results from DBHandle and print results with formatting |
edd250f2 TH |
377 | ### IN : $Format : format specifier |
378 | ### $Comments : print or suppress all comments for machine-readable output | |
379 | ### $GroupBy : primary sorting order (month or key) | |
380 | ### $Precision: number of digits right of decimal point (0 or 2) | |
381 | ### $ValidKeys: reference to a hash containing all valid keys | |
382 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM | |
383 | ### $DBQuery : database query handle with executed query, | |
78389b28 | 384 | ### containing $Month, $Key, $Value |
edd250f2 TH |
385 | ### $PadGroup : padding length for key field (optional) for 'pretty' |
386 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, | |
387 | $DBQuery, $PadGroup) = @_; | |
388 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; | |
389 | my ($FileName, $Handle, $OUT); | |
78389b28 | 390 | our $LastIteration; |
edd250f2 TH |
391 | |
392 | # define output types | |
393 | my %LegalOutput; | |
394 | @LegalOutput{('dump',,'list','pretty')} = (); | |
395 | # bail out if format is unknown | |
396 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); | |
397 | ||
741336c2 | 398 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
edd250f2 TH |
399 | # don't display invalid keys |
400 | if(%ValidKeys and !defined($ValidKeys{$Key})) { | |
401 | # FIXME | |
402 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); | |
403 | next; | |
404 | }; | |
405 | # care for correct sorting order and abstract from month and keys: | |
406 | # $Caption will be $Month or $Key, according to sorting order, | |
407 | # and $Key will be $Key or $Month, respectively | |
408 | my $Caption; | |
409 | if ($GroupBy eq 'key') { | |
410 | $Caption = $Key; | |
411 | $Key = $Month; | |
412 | } else { | |
413 | $Caption = $Month; | |
414 | } | |
78389b28 | 415 | # set output file handle |
edd250f2 | 416 | if (!$FileTempl) { |
78389b28 | 417 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
edd250f2 | 418 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
78389b28 | 419 | close $OUT if ($LastIteration); |
edd250f2 TH |
420 | # safeguards for filename creation: |
421 | # replace potential problem characters with '_' | |
422 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); | |
423 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; | |
424 | open ($OUT,">$FileName") | |
425 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", | |
426 | $FileName)); | |
78389b28 TH |
427 | $Handle = $OUT; |
428 | }; | |
edd250f2 TH |
429 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
430 | $Precision, $PadGroup); | |
431 | $LastIteration = $Caption; | |
741336c2 | 432 | }; |
edd250f2 | 433 | close $OUT if ($FileTempl); |
741336c2 TH |
434 | }; |
435 | ||
436 | ################################################################################ | |
437 | sub FormatOutput { | |
438 | ################################################################################ | |
3430c898 | 439 | ### format information for output according to format specifier |
edd250f2 TH |
440 | ### IN : $Format : format specifier |
441 | ### $Comments : print or suppress all comments for machine-readable output | |
442 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order | |
443 | ### $Key : newsgroup, client, ... or $Month, as above | |
444 | ### $Value : number of postings with that attribute | |
445 | ### $Precision: number of digits right of decimal point (0 or 2) | |
446 | ### $PadGroup : padding length for key field (optional) for 'pretty' | |
741336c2 | 447 | ### OUT: $Output: formatted output |
edd250f2 | 448 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; |
741336c2 | 449 | my ($Output); |
edd250f2 | 450 | # keep last caption in mind |
741336c2 | 451 | our ($LastIteration); |
edd250f2 | 452 | # create one line of output |
741336c2 | 453 | if ($Format eq 'dump') { |
edd250f2 TH |
454 | # output as dump (key value) |
455 | $Output = sprintf ("# %s:\n",$Caption) | |
456 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
457 | $Output .= sprintf ("%s %u\n",$Key,$Value); | |
741336c2 | 458 | } elsif ($Format eq 'list') { |
edd250f2 TH |
459 | # output as list (caption key value) |
460 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); | |
741336c2 | 461 | } elsif ($Format eq 'pretty') { |
edd250f2 TH |
462 | # output as a table |
463 | $Output = sprintf ("# ----- %s:\n",$Caption) | |
464 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
465 | $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : | |
466 | "%s %.*f\n",$Key,$Precision,$Value); | |
741336c2 TH |
467 | }; |
468 | return $Output; | |
469 | }; | |
470 | ||
471 | #####------------------------- QueryModifications -------------------------##### | |
472 | ||
473 | ################################################################################ | |
474 | sub SQLHierarchies { | |
475 | ################################################################################ | |
3430c898 TH |
476 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
477 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is | |
478 | ### true, accordingly) | |
741336c2 TH |
479 | ### IN : $ShowHierarchies: boolean value |
480 | ### OUT: SQL code | |
481 | my ($ShowHierarchies) = @_; | |
edd250f2 | 482 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
741336c2 TH |
483 | }; |
484 | ||
485 | ################################################################################ | |
d3b6810d | 486 | sub GetMaxLength { |
741336c2 | 487 | ################################################################################ |
3430c898 | 488 | ### get length of longest field in future query result |
edd250f2 TH |
489 | ### IN : $DBHandle : database handel |
490 | ### $Table : table to query | |
491 | ### $Field : field to check | |
492 | ### $WhereClause : WHERE clause | |
493 | ### $HavingClause: HAVING clause | |
494 | ### @BindVars : bind variables for WHERE clause | |
741336c2 | 495 | ### OUT: $Length: length of longest instnace of $Field |
edd250f2 TH |
496 | my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; |
497 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". | |
498 | "FROM %s %s %s",$Field,$Table, | |
499 | $WhereClause,$HavingClause ? | |
500 | 'GROUP BY newsgroup' . $HavingClause . | |
501 | ' ORDER BY LENGTH(newsgroup) '. | |
502 | 'DESC LIMIT 1': '')); | |
503 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". | |
504 | "for '%s' from table '%s': ". | |
505 | "$DBI::errstr",$Field,$Table)); | |
741336c2 TH |
506 | my ($Length) = $DBQuery->fetchrow_array; |
507 | return $Length; | |
508 | }; | |
509 | ||
edd250f2 TH |
510 | ################################################################################ |
511 | sub SQLSortOrder { | |
512 | ################################################################################ | |
513 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and | |
514 | ### $OptOrderBy (secondary sorting), both ascending or descending; | |
515 | ### descending sorting order is done by adding '-desc' | |
516 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' | |
517 | ### $OrderBy: secondary sort by month/newsgroups (default) | |
518 | ### or number of 'postings' | |
519 | ### OUT: a SQL ORDER BY clause | |
520 | my ($GroupBy,$OrderBy) = @_; | |
521 | my ($GroupSort,$OrderSort) = ('',''); | |
522 | # $GroupBy (primary sorting) | |
523 | if (!$GroupBy) { | |
524 | $GroupBy = 'month'; | |
525 | } else { | |
526 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); | |
527 | if ($GroupBy =~ /group/i) { | |
528 | $GroupBy = 'newsgroup'; | |
529 | } else { | |
530 | $GroupBy = 'month'; | |
531 | } | |
532 | } | |
533 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; | |
534 | # $OrderBy (secondary sorting) | |
535 | if (!$OrderBy) { | |
536 | $OrderBy = $Secondary; | |
537 | } else { | |
538 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); | |
539 | if ($OrderBy =~ /posting/i) { | |
540 | $OrderBy = "postings $OrderSort, $Secondary"; | |
541 | } else { | |
542 | $OrderBy = "$Secondary $OrderSort"; | |
543 | } | |
544 | } | |
545 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); | |
546 | }; | |
547 | ||
548 | ################################################################################ | |
549 | sub SQLParseOrder { | |
550 | ################################################################################ | |
551 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. | |
552 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' | |
553 | ### IN : $OrderOption: order option (see above) | |
554 | ### OUT: parameter to sort by, | |
555 | ### sort order ('DESC' or nothing, meaning 'ASC') | |
556 | my ($OrderOption) = @_; | |
557 | my $SortOrder = ''; | |
558 | if ($OrderOption =~ s/-?desc$//i) { | |
559 | $SortOrder = 'DESC'; | |
560 | } else { | |
561 | $OrderOption =~ s/-?asc$//i | |
562 | } | |
563 | return ($OrderOption,$SortOrder); | |
564 | }; | |
565 | ||
741336c2 TH |
566 | ################################################################################ |
567 | sub SQLGroupList { | |
568 | ################################################################################ | |
edd250f2 TH |
569 | ### explode list of newsgroups separated by : (with wildcards) |
570 | ### to a SQL 'WHERE' expression | |
741336c2 | 571 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
edd250f2 TH |
572 | ### OUT: SQL code to become part of a 'WHERE' clause, |
573 | ### list of newsgroups for SQL bindings | |
741336c2 | 574 | my ($Newsgroups) = @_; |
edd250f2 | 575 | # substitute '*' wildcard with SQL wildcard character '%' |
741336c2 | 576 | $Newsgroups =~ s/\*/%/g; |
edd250f2 TH |
577 | # just one newsgroup? |
578 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; | |
10459ac8 | 579 | my ($SQL,@WildcardGroups,@NoWildcardGroups); |
edd250f2 | 580 | # list of newsgroups separated by ':' |
741336c2 TH |
581 | my @GroupList = split /:/, $Newsgroups; |
582 | foreach (@GroupList) { | |
10459ac8 TH |
583 | if ($_ !~ /%/) { |
584 | # add to list of newsgroup names WITHOUT wildcard | |
585 | push (@NoWildcardGroups,$_); | |
586 | } else { | |
587 | # add to list of newsgroup names WITH wildcard | |
588 | push (@WildcardGroups,$_); | |
589 | # add wildcard to SQL clause | |
590 | # 'OR' if SQL clause is not empty | |
591 | $SQL .= ' OR ' if $SQL; | |
592 | $SQL .= 'newsgroup LIKE ?' | |
593 | } | |
741336c2 | 594 | }; |
10459ac8 TH |
595 | if (scalar(@NoWildcardGroups)) { |
596 | # add 'OR' if SQL clause is not empty | |
597 | $SQL .= ' OR ' if $SQL; | |
598 | if (scalar(@NoWildcardGroups) < 2) { | |
599 | # special case: just one newsgroup without wildcard | |
600 | $SQL .= 'newsgroup = ?'; | |
601 | } else { | |
602 | # create list of newsgroups to include: 'newsgroup IN (...)' | |
603 | $SQL .= 'newsgroup IN ('; | |
604 | my $SQLin; | |
605 | foreach (@NoWildcardGroups) { | |
606 | $SQLin .= ',' if $SQLin; | |
607 | $SQLin .= '?'; | |
608 | } | |
609 | # add list to SQL clause | |
610 | $SQL .= $SQLin .= ')'; | |
611 | } | |
612 | } | |
613 | # add brackets '()' to SQL clause as needed (more than one wildcard group) | |
614 | if (scalar(@WildcardGroups)) { | |
615 | $SQL = '(' . $SQL .')'; | |
616 | } | |
617 | # rebuild @GroupList in (now) correct order | |
618 | @GroupList = (@WildcardGroups,@NoWildcardGroups); | |
741336c2 TH |
619 | return ($SQL,@GroupList); |
620 | }; | |
621 | ||
edd250f2 TH |
622 | ################################################################################ |
623 | sub SQLGroupWildcard { | |
624 | ################################################################################ | |
625 | ### build a valid SQL 'WHERE' expression with or without wildcards | |
626 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard | |
627 | ### (group.name or group.name.%) | |
628 | ### OUT: SQL code to become part of a 'WHERE' clause | |
629 | my ($Newsgroup) = @_; | |
630 | # FIXME: check for validity | |
631 | if ($Newsgroup !~ /%/) { | |
632 | return 'newsgroup = ?'; | |
633 | } else { | |
634 | return 'newsgroup LIKE ?'; | |
635 | } | |
636 | }; | |
637 | ||
638 | ################################################################################ | |
639 | sub SQLSetBounds { | |
640 | ################################################################################ | |
641 | ### set upper and/or lower boundary (number of postings) | |
642 | ### IN : $Type: 'level', 'average', 'sum' or 'default' | |
643 | ### $LowBound,$UppBound: lower/upper boundary, respectively | |
644 | ### OUT: SQL code to become part of a WHERE or HAVING clause | |
645 | my ($Type,$LowBound,$UppBound) = @_; | |
646 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); | |
647 | if($LowBound and $UppBound and $LowBound > $UppBound) { | |
648 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". | |
649 | "$UppBound, exchanging boundaries."); | |
650 | ($LowBound,$UppBound) = ($UppBound,$LowBound); | |
651 | } | |
652 | # default to 'default' | |
653 | my $WhereHavingFunction = 'postings'; | |
654 | # set $LowBound to SQL statement: | |
655 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' | |
656 | if ($Type eq 'level') { | |
657 | $WhereHavingFunction = 'MIN(postings)' | |
658 | } elsif ($Type eq 'average') { | |
659 | $WhereHavingFunction = 'AVG(postings)' | |
660 | } elsif ($Type eq 'sum') { | |
661 | $WhereHavingFunction = 'SUM(postings)' | |
662 | } | |
663 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); | |
664 | # set $LowBound to SQL statement: | |
665 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' | |
666 | if ($Type eq 'level') { | |
667 | $WhereHavingFunction = 'MAX(postings)' | |
668 | } elsif ($Type eq 'average') { | |
669 | $WhereHavingFunction = 'AVG(postings)' | |
670 | } elsif ($Type eq 'sum') { | |
671 | $WhereHavingFunction = 'SUM(postings)' | |
672 | } | |
673 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); | |
674 | return ($LowBound,$UppBound); | |
675 | }; | |
676 | ||
677 | ################################################################################ | |
678 | sub SQLCheckNumber { | |
679 | ################################################################################ | |
680 | ### check if input is a valid positive integer; otherwise, make it one | |
681 | ### IN : @Numbers: array of parameters | |
682 | ### OUT: @Numbers: a valid positive integer | |
683 | my (@Numbers) = @_; | |
684 | foreach my $Number (@Numbers) { | |
685 | if ($Number and $Number < 0) { | |
686 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); | |
687 | $Number = -$Number; | |
688 | } | |
689 | $Number = '' if ($Number and $Number !~ /^\d+$/); | |
690 | } | |
691 | return @Numbers; | |
692 | }; | |
693 | ||
694 | ################################################################################ | |
695 | sub SQLBuildClause { | |
696 | ################################################################################ | |
697 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause | |
698 | ### from multiple expressions which *may* be empty | |
699 | ### IN : $Type: 'where', 'having', 'group' or 'order' | |
700 | ### @Expressions: array of expressions | |
701 | ### OUT: $SQLClause: a SQL clause | |
702 | my ($Type,@Expressions) = @_; | |
703 | my ($SQLClause,$Separator,$Statement); | |
704 | # set separator ('AND' or ',') | |
705 | if ($Type eq 'where' or $Type eq 'having') { | |
706 | $Separator = 'AND'; | |
707 | } else { | |
708 | $Separator = ','; | |
709 | } | |
710 | # set statement | |
711 | if ($Type eq 'where') { | |
712 | $Statement = 'WHERE'; | |
713 | } elsif ($Type eq 'order') { | |
714 | $Statement = 'ORDER BY'; | |
715 | } elsif ($Type eq 'having') { | |
716 | $Statement = 'HAVING'; | |
717 | } else { | |
718 | $Statement = 'GROUP BY'; | |
719 | } | |
720 | # build query from expressions with separators | |
721 | foreach my $Expression (@Expressions) { | |
722 | if ($Expression) { | |
723 | $SQLClause .= " $Separator " if ($SQLClause); | |
724 | $SQLClause .= $Expression; | |
725 | } | |
726 | } | |
727 | # add statement in front if not already present | |
728 | $SQLClause = " $Statement " . $SQLClause | |
729 | if ($SQLClause and $SQLClause !~ /$Statement/); | |
730 | return $SQLClause; | |
731 | }; | |
732 | ||
733 | ||
741336c2 TH |
734 | #####------------------------------- done ---------------------------------##### |
735 | 1; | |
736 | ||
737 |