Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
07c0b258 | 5 | # Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net> |
741336c2 TH |
6 | # |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
741336c2 | 19 | $MyVersion |
f6d15ca7 | 20 | $PackageVersion |
edd250f2 TH |
21 | $FullPath |
22 | $HomePath | |
23 | ShowVersion | |
24 | ShowPOD | |
741336c2 TH |
25 | ReadConfig |
26 | OverrideConfig | |
27 | InitDB | |
edd250f2 | 28 | Bleat |
741336c2 TH |
29 | ); |
30 | @EXPORT_OK = qw( | |
31 | GetTimePeriod | |
32 | LastMonth | |
741336c2 TH |
33 | SplitPeriod |
34 | ListMonth | |
35 | ListNewsgroups | |
1703b8e3 | 36 | ParseHierarchies |
ad609792 | 37 | ReadGroupList |
741336c2 TH |
38 | OutputData |
39 | FormatOutput | |
40 | SQLHierarchies | |
edd250f2 | 41 | SQLSortOrder |
741336c2 | 42 | SQLGroupList |
edd250f2 TH |
43 | SQLSetBounds |
44 | SQLBuildClause | |
d3b6810d | 45 | GetMaxLength |
741336c2 | 46 | ); |
edd250f2 TH |
47 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
48 | ListMonth)], | |
741336c2 | 49 | Output => [qw(OutputData FormatOutput)], |
edd250f2 TH |
50 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
51 | SQLSetBounds SQLBuildClause GetMaxLength)]); | |
4bbd46d3 TH |
52 | $VERSION = '0.01'; |
53 | our $PackageVersion = '0.01'; | |
741336c2 TH |
54 | |
55 | use Data::Dumper; | |
56 | use File::Basename; | |
741336c2 TH |
57 | |
58 | use Config::Auto; | |
59 | use DBI; | |
60 | ||
61 | #####-------------------------------- Vars --------------------------------##### | |
62 | ||
edd250f2 TH |
63 | # trim the path |
64 | our $FullPath = $0; | |
65 | our $HomePath = dirname($0); | |
66 | $0 =~ s%.*/%%; | |
67 | # set version string | |
68 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; | |
741336c2 TH |
69 | |
70 | #####------------------------------- Basics -------------------------------##### | |
71 | ||
741336c2 TH |
72 | ################################################################################ |
73 | ||
74 | ################################################################################ | |
75 | sub ShowVersion { | |
76 | ################################################################################ | |
77 | ### display version and exit | |
edd250f2 TH |
78 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
79 | print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n"; | |
80 | print "This program is free software; you may redistribute it ". | |
81 | "and/or modify it under the same terms as Perl itself.\n"; | |
741336c2 TH |
82 | exit(100); |
83 | }; | |
84 | ################################################################################ | |
85 | ||
86 | ################################################################################ | |
87 | sub ShowPOD { | |
88 | ################################################################################ | |
89 | ### feed myself to perldoc and exit | |
edd250f2 | 90 | exec('perldoc', $FullPath); |
741336c2 TH |
91 | exit(100); |
92 | }; | |
93 | ################################################################################ | |
94 | ||
95 | ################################################################################ | |
96 | sub ReadConfig { | |
97 | ################################################################################ | |
98 | ### read config via Config::Auto | |
99 | ### IN : $ConfFile: config filename | |
100 | ### OUT: reference to a hash containing the configuration | |
101 | my ($ConfFile) = @_; | |
db7696e5 TH |
102 | # mandatory configuration options |
103 | my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', | |
104 | 'DBTableRaw','DBTableGrps'); | |
105 | # read config via Config::Auto | |
106 | my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); | |
107 | my %Conf = %{$ConfR}; | |
108 | # check for mandatory options | |
109 | foreach (@Mandatory) { | |
110 | &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) | |
111 | if (!defined($Conf{$_})); | |
112 | } | |
113 | # $Conf{'TLH'} is checked in gatherstats.pl | |
114 | return $ConfR; | |
741336c2 TH |
115 | }; |
116 | ################################################################################ | |
117 | ||
118 | ################################################################################ | |
edd250f2 | 119 | sub OverrideConfig { |
741336c2 TH |
120 | ################################################################################ |
121 | ### override configuration values | |
122 | ### IN : $ConfigR : reference to configuration hash | |
123 | ### $OverrideR: reference to a hash containing overrides | |
124 | my ($ConfigR,$OverrideR) = @_; | |
125 | my %Override = %$OverrideR; | |
3430c898 | 126 | # Config hash empty? |
edd250f2 TH |
127 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
128 | if ( keys %$ConfigR < 1); | |
3430c898 | 129 | # return if no overrides |
741336c2 TH |
130 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
131 | foreach my $Key (keys %Override) { | |
132 | $$ConfigR{$Key} = $Override{$Key}; | |
133 | }; | |
134 | }; | |
135 | ################################################################################ | |
136 | ||
137 | ################################################################################ | |
138 | sub InitDB { | |
139 | ################################################################################ | |
140 | ### initialise database connection | |
141 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 142 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
143 | ### OUT: DBHandle |
144 | my ($ConfigR,$Die) = @_; | |
145 | my %Conf = %$ConfigR; | |
edd250f2 TH |
146 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
147 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, | |
148 | $Conf{'DBHost'}), $Conf{'DBUser'}, | |
149 | $Conf{'DBPw'}, { PrintError => 0 }); | |
741336c2 | 150 | if (!$DBHandle) { |
edd250f2 TH |
151 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
152 | &Bleat(1,$DBI::errstr); | |
741336c2 TH |
153 | }; |
154 | return $DBHandle; | |
155 | }; | |
156 | ################################################################################ | |
157 | ||
edd250f2 TH |
158 | ################################################################################ |
159 | sub Bleat { | |
160 | ################################################################################ | |
161 | ### print warning or error messages and terminate in case of error | |
162 | ### IN : $Level : 1 = warning, 2 = error | |
163 | ### $Message: warning or error message | |
164 | my ($Level,$Message) = @_; | |
165 | if ($Level == 1) { | |
166 | warn "$0 W: $Message\n" | |
167 | } elsif ($Level == 2) { | |
168 | die "$0 E: $Message\n" | |
169 | } else { | |
170 | print "$0: $Message\n" | |
171 | } | |
172 | }; | |
173 | ################################################################################ | |
174 | ||
741336c2 TH |
175 | #####------------------------------ GetStats ------------------------------##### |
176 | ||
177 | ################################################################################ | |
178 | sub ListNewsgroups { | |
179 | ################################################################################ | |
3430c898 TH |
180 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
181 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
182 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 | 183 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
89db2f90 | 184 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
ad609792 TH |
185 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
186 | ### as keys | |
187 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
89db2f90 | 188 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
ad609792 | 189 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
741336c2 TH |
190 | my %Newsgroups; |
191 | chomp($Newsgroups); | |
192 | # remove whitespace from contents of Newsgroups: | |
193 | $Newsgroups =~ s/\s//; | |
194 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
195 | for (split /,/, $Newsgroups) { | |
89db2f90 TH |
196 | # don't count newsgroup/hierarchy in wrong TLH |
197 | next if($TLH and !/^$TLH/); | |
ad609792 TH |
198 | # don't count invalid newsgroups |
199 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
0dc13b39 | 200 | warn (sprintf("DROPPED: %s\n",$_)); |
ad609792 TH |
201 | next; |
202 | } | |
741336c2 TH |
203 | # add original newsgroup to %Newsgroups |
204 | $Newsgroups{$_} = 1; | |
205 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
206 | # i.e. de.alt.ALL and de.ALL | |
207 | foreach (ParseHierarchies($_)) { | |
208 | $Newsgroups{$_.'.ALL'} = 1; | |
209 | } | |
210 | }; | |
211 | return %Newsgroups; | |
212 | }; | |
213 | ||
214 | ################################################################################ | |
215 | sub ParseHierarchies { | |
216 | ################################################################################ | |
3430c898 TH |
217 | ### return a list of all hierarchy levels a newsgroup belongs to |
218 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
219 | ### IN : $Newsgroup : a newsgroup name |
220 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
221 | my ($Newsgroup) = @_; | |
222 | my @Hierarchies; | |
223 | # strip trailing dots | |
224 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
225 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
226 | # i.e. de.alt.test: "de.alt" and "de" | |
227 | while ($Newsgroup =~ /\./) { | |
228 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
229 | push @Hierarchies, $Newsgroup; | |
230 | }; | |
231 | return @Hierarchies; | |
232 | }; | |
233 | ||
ad609792 TH |
234 | ################################################################################ |
235 | sub ReadGroupList { | |
236 | ################################################################################ | |
237 | ### read a list of valid newsgroups from file (each group on one line, | |
238 | ### ignoring everything after the first whitespace and so accepting files | |
239 | ### in checkgroups format as well as (parts of) an INN active file) | |
240 | ### IN : $Filename : file to read | |
241 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
242 | my ($Filename) = @_; | |
243 | my %ValidGroups; | |
edd250f2 | 244 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
ad609792 | 245 | while (<$LIST>) { |
7662b106 | 246 | s/^\s*(\S+).*$/$1/; |
ad609792 | 247 | chomp; |
7662b106 | 248 | next if /^$/; |
ad609792 TH |
249 | $ValidGroups{$_} = '1'; |
250 | }; | |
251 | close $LIST; | |
252 | return \%ValidGroups; | |
253 | }; | |
254 | ||
741336c2 TH |
255 | ################################################################################ |
256 | ||
257 | #####----------------------------- TimePeriods ----------------------------##### | |
258 | ||
259 | ################################################################################ | |
260 | sub GetTimePeriod { | |
261 | ################################################################################ | |
edd250f2 TH |
262 | ### get a time period to act on from --month option; |
263 | ### if empty, default to last month | |
264 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' | |
265 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause | |
266 | ### of the chosen time period | |
267 | my ($Month) = @_; | |
268 | # define result variables | |
269 | my ($Verbal, $SQL); | |
270 | # define a regular expression for a month | |
271 | my $REMonth = '\d{4}-\d{2}'; | |
272 | ||
273 | # default to last month if option is not set | |
274 | if(!$Month) { | |
275 | $Month = &LastMonth; | |
276 | } | |
277 | ||
278 | # check for valid input | |
279 | if ($Month =~ /^$REMonth$/) { | |
280 | # single month (YYYY-MM) | |
281 | ($Month) = &CheckMonth($Month); | |
282 | $Verbal = $Month; | |
283 | $SQL = sprintf("month = '%s'",$Month); | |
284 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { | |
285 | # time period (YYYY-MM:YYYY-MM) | |
286 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); | |
287 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); | |
288 | } elsif ($Month =~ /^all$/i) { | |
289 | # special case: ALL | |
290 | $Verbal = 'all time'; | |
291 | $SQL = ''; | |
741336c2 | 292 | } else { |
edd250f2 TH |
293 | # invalid input |
294 | return (undef,undef); | |
295 | } | |
296 | ||
297 | return ($Verbal,$SQL); | |
741336c2 TH |
298 | }; |
299 | ||
300 | ################################################################################ | |
301 | sub LastMonth { | |
302 | ################################################################################ | |
3430c898 | 303 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
304 | ### OUT: last month as YYYY-MM |
305 | # get today's date | |
306 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
307 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
308 | $Year += 1900; | |
309 | if ($Month < 1) { | |
310 | $Month = 12; | |
311 | $Year--; | |
312 | }; | |
313 | # return last month | |
314 | return sprintf('%4d-%02d',$Year,$Month); | |
315 | }; | |
316 | ||
317 | ################################################################################ | |
318 | sub CheckMonth { | |
319 | ################################################################################ | |
edd250f2 TH |
320 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
321 | ### otherwise, fix it | |
322 | ### IN : @Month: array of month | |
323 | ### OUT: @Month: a valid month | |
324 | my (@Month) = @_; | |
325 | foreach my $Month (@Month) { | |
326 | my ($OldMonth) = $Month; | |
327 | my ($CalMonth) = substr ($Month, -2); | |
328 | if ($CalMonth < 1 or $CalMonth > 12) { | |
329 | $CalMonth = '12' if $CalMonth > 12; | |
330 | $CalMonth = '01' if $CalMonth < 1; | |
331 | substr($Month, -2) = $CalMonth; | |
332 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". | |
333 | "and '12'), set to '%s'.",$OldMonth,$Month)); | |
334 | } | |
335 | } | |
336 | return @Month; | |
741336c2 TH |
337 | }; |
338 | ||
339 | ################################################################################ | |
340 | sub SplitPeriod { | |
341 | ################################################################################ | |
3430c898 | 342 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 | 343 | ### IN : $Period: time period |
edd250f2 | 344 | ### OUT: $StartMonth, $EndMonth |
741336c2 | 345 | my ($Period) = @_; |
741336c2 | 346 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
edd250f2 | 347 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
741336c2 TH |
348 | # switch parameters as necessary |
349 | if ($EndMonth gt $StartMonth) { | |
350 | return ($StartMonth, $EndMonth); | |
351 | } else { | |
352 | return ($EndMonth, $StartMonth); | |
353 | }; | |
354 | }; | |
355 | ||
356 | ################################################################################ | |
357 | sub ListMonth { | |
358 | ################################################################################ | |
3430c898 | 359 | ### return a list of months (YYYY-MM) between start and end month |
880c3eb2 TH |
360 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
361 | ### OUT: @Months: array containing all months from $MonthExpression enumerated | |
362 | my ($MonthExpression )= @_; | |
363 | # return if single month | |
364 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); | |
365 | # parse $MonthExpression | |
366 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); | |
741336c2 TH |
367 | # set $Year, $Month from $StartMonth |
368 | my ($Year, $Month) = split /-/, $StartMonth; | |
369 | # define @Months | |
370 | my (@Months); | |
371 | until ("$Year-$Month" gt $EndMonth) { | |
372 | push @Months, "$Year-$Month"; | |
373 | $Month = "$Month"; # force string context | |
374 | $Month++; | |
375 | if ($Month > 12) { | |
376 | $Month = '01'; | |
377 | $Year++; | |
378 | }; | |
379 | }; | |
380 | return @Months; | |
381 | }; | |
382 | ||
383 | #####---------------------------- OutputFormats ---------------------------##### | |
384 | ||
385 | ################################################################################ | |
386 | sub OutputData { | |
387 | ################################################################################ | |
3430c898 | 388 | ### read database query results from DBHandle and print results with formatting |
edd250f2 TH |
389 | ### IN : $Format : format specifier |
390 | ### $Comments : print or suppress all comments for machine-readable output | |
391 | ### $GroupBy : primary sorting order (month or key) | |
392 | ### $Precision: number of digits right of decimal point (0 or 2) | |
393 | ### $ValidKeys: reference to a hash containing all valid keys | |
394 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM | |
395 | ### $DBQuery : database query handle with executed query, | |
78389b28 | 396 | ### containing $Month, $Key, $Value |
b342fcf0 TH |
397 | ### $PadField : padding length for key field (optional) for 'pretty' |
398 | ### $PadValue : padding length for value field (optional) for 'pretty' | |
edd250f2 | 399 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, |
b342fcf0 | 400 | $DBQuery, $PadField, $PadValue) = @_; |
edd250f2 TH |
401 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; |
402 | my ($FileName, $Handle, $OUT); | |
78389b28 | 403 | our $LastIteration; |
edd250f2 TH |
404 | |
405 | # define output types | |
406 | my %LegalOutput; | |
b342fcf0 | 407 | @LegalOutput{('dump','list','pretty')} = (); |
edd250f2 TH |
408 | # bail out if format is unknown |
409 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); | |
410 | ||
741336c2 | 411 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
edd250f2 TH |
412 | # don't display invalid keys |
413 | if(%ValidKeys and !defined($ValidKeys{$Key})) { | |
414 | # FIXME | |
415 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); | |
416 | next; | |
417 | }; | |
418 | # care for correct sorting order and abstract from month and keys: | |
419 | # $Caption will be $Month or $Key, according to sorting order, | |
420 | # and $Key will be $Key or $Month, respectively | |
421 | my $Caption; | |
422 | if ($GroupBy eq 'key') { | |
423 | $Caption = $Key; | |
424 | $Key = $Month; | |
425 | } else { | |
426 | $Caption = $Month; | |
427 | } | |
78389b28 | 428 | # set output file handle |
edd250f2 | 429 | if (!$FileTempl) { |
78389b28 | 430 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
edd250f2 | 431 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
78389b28 | 432 | close $OUT if ($LastIteration); |
edd250f2 TH |
433 | # safeguards for filename creation: |
434 | # replace potential problem characters with '_' | |
435 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); | |
436 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; | |
437 | open ($OUT,">$FileName") | |
438 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", | |
439 | $FileName)); | |
78389b28 TH |
440 | $Handle = $OUT; |
441 | }; | |
edd250f2 | 442 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
b342fcf0 | 443 | $Precision, $PadField, $PadValue); |
edd250f2 | 444 | $LastIteration = $Caption; |
741336c2 | 445 | }; |
edd250f2 | 446 | close $OUT if ($FileTempl); |
741336c2 TH |
447 | }; |
448 | ||
449 | ################################################################################ | |
450 | sub FormatOutput { | |
451 | ################################################################################ | |
3430c898 | 452 | ### format information for output according to format specifier |
edd250f2 TH |
453 | ### IN : $Format : format specifier |
454 | ### $Comments : print or suppress all comments for machine-readable output | |
455 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order | |
456 | ### $Key : newsgroup, client, ... or $Month, as above | |
457 | ### $Value : number of postings with that attribute | |
458 | ### $Precision: number of digits right of decimal point (0 or 2) | |
b342fcf0 TH |
459 | ### $PadField : padding length for key field (optional) for 'pretty' |
460 | ### $PadValue : padding length for value field (optional) for 'pretty' | |
741336c2 | 461 | ### OUT: $Output: formatted output |
b342fcf0 TH |
462 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField, |
463 | $PadValue) = @_; | |
741336c2 | 464 | my ($Output); |
edd250f2 | 465 | # keep last caption in mind |
741336c2 | 466 | our ($LastIteration); |
edd250f2 | 467 | # create one line of output |
741336c2 | 468 | if ($Format eq 'dump') { |
edd250f2 TH |
469 | # output as dump (key value) |
470 | $Output = sprintf ("# %s:\n",$Caption) | |
471 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
472 | $Output .= sprintf ("%s %u\n",$Key,$Value); | |
741336c2 | 473 | } elsif ($Format eq 'list') { |
edd250f2 TH |
474 | # output as list (caption key value) |
475 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); | |
741336c2 | 476 | } elsif ($Format eq 'pretty') { |
edd250f2 TH |
477 | # output as a table |
478 | $Output = sprintf ("# ----- %s:\n",$Caption) | |
479 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
b342fcf0 TH |
480 | # increase $PadValue for numbers with decimal point |
481 | $PadValue += $Precision+1 if $Precision; | |
482 | # add padding if $PadField is set; $PadValue HAS to be set then | |
483 | $Output .= sprintf ($PadField ? | |
484 | sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) : | |
485 | "%s%s %.*f\n",$Key,$Comments ? ':' : '', | |
486 | $Precision,$Value); | |
741336c2 TH |
487 | }; |
488 | return $Output; | |
489 | }; | |
490 | ||
491 | #####------------------------- QueryModifications -------------------------##### | |
492 | ||
493 | ################################################################################ | |
494 | sub SQLHierarchies { | |
495 | ################################################################################ | |
3430c898 TH |
496 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
497 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is | |
498 | ### true, accordingly) | |
741336c2 TH |
499 | ### IN : $ShowHierarchies: boolean value |
500 | ### OUT: SQL code | |
501 | my ($ShowHierarchies) = @_; | |
edd250f2 | 502 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
741336c2 TH |
503 | }; |
504 | ||
505 | ################################################################################ | |
d3b6810d | 506 | sub GetMaxLength { |
741336c2 | 507 | ################################################################################ |
b342fcf0 TH |
508 | ### get length of longest fields in future query result |
509 | ### IN : $DBHandle : database handle | |
edd250f2 | 510 | ### $Table : table to query |
b342fcf0 TH |
511 | ### $Field : field (key!, i.e. month, newsgroup, ...) to check |
512 | ### $Value : field (value!, i.e. postings) to check | |
edd250f2 TH |
513 | ### $WhereClause : WHERE clause |
514 | ### $HavingClause: HAVING clause | |
515 | ### @BindVars : bind variables for WHERE clause | |
b342fcf0 TH |
516 | ### OUT: $FieldLength : length of longest instance of $Field |
517 | ### $ValueLength : length of longest instance of $Value | |
518 | my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_; | |
519 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),". | |
520 | "MAX(%s) ". | |
521 | "FROM %s %s %s",$Field,,$Value, | |
522 | $Table,$WhereClause,$HavingClause ? | |
edd250f2 TH |
523 | 'GROUP BY newsgroup' . $HavingClause . |
524 | ' ORDER BY LENGTH(newsgroup) '. | |
525 | 'DESC LIMIT 1': '')); | |
526 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". | |
527 | "for '%s' from table '%s': ". | |
528 | "$DBI::errstr",$Field,$Table)); | |
b342fcf0 TH |
529 | my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array; |
530 | my $ValueLength = length($ValueMax) if ($ValueMax); | |
531 | return ($FieldLength,$ValueLength); | |
741336c2 TH |
532 | }; |
533 | ||
edd250f2 TH |
534 | ################################################################################ |
535 | sub SQLSortOrder { | |
536 | ################################################################################ | |
537 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and | |
538 | ### $OptOrderBy (secondary sorting), both ascending or descending; | |
539 | ### descending sorting order is done by adding '-desc' | |
540 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' | |
541 | ### $OrderBy: secondary sort by month/newsgroups (default) | |
542 | ### or number of 'postings' | |
543 | ### OUT: a SQL ORDER BY clause | |
544 | my ($GroupBy,$OrderBy) = @_; | |
545 | my ($GroupSort,$OrderSort) = ('',''); | |
546 | # $GroupBy (primary sorting) | |
547 | if (!$GroupBy) { | |
548 | $GroupBy = 'month'; | |
549 | } else { | |
550 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); | |
551 | if ($GroupBy =~ /group/i) { | |
552 | $GroupBy = 'newsgroup'; | |
553 | } else { | |
554 | $GroupBy = 'month'; | |
555 | } | |
556 | } | |
557 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; | |
558 | # $OrderBy (secondary sorting) | |
559 | if (!$OrderBy) { | |
560 | $OrderBy = $Secondary; | |
561 | } else { | |
562 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); | |
563 | if ($OrderBy =~ /posting/i) { | |
564 | $OrderBy = "postings $OrderSort, $Secondary"; | |
565 | } else { | |
566 | $OrderBy = "$Secondary $OrderSort"; | |
567 | } | |
568 | } | |
569 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); | |
570 | }; | |
571 | ||
572 | ################################################################################ | |
573 | sub SQLParseOrder { | |
574 | ################################################################################ | |
575 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. | |
576 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' | |
577 | ### IN : $OrderOption: order option (see above) | |
578 | ### OUT: parameter to sort by, | |
579 | ### sort order ('DESC' or nothing, meaning 'ASC') | |
580 | my ($OrderOption) = @_; | |
581 | my $SortOrder = ''; | |
582 | if ($OrderOption =~ s/-?desc$//i) { | |
583 | $SortOrder = 'DESC'; | |
584 | } else { | |
585 | $OrderOption =~ s/-?asc$//i | |
586 | } | |
587 | return ($OrderOption,$SortOrder); | |
588 | }; | |
589 | ||
741336c2 TH |
590 | ################################################################################ |
591 | sub SQLGroupList { | |
592 | ################################################################################ | |
edd250f2 TH |
593 | ### explode list of newsgroups separated by : (with wildcards) |
594 | ### to a SQL 'WHERE' expression | |
741336c2 | 595 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
edd250f2 TH |
596 | ### OUT: SQL code to become part of a 'WHERE' clause, |
597 | ### list of newsgroups for SQL bindings | |
741336c2 | 598 | my ($Newsgroups) = @_; |
edd250f2 | 599 | # substitute '*' wildcard with SQL wildcard character '%' |
741336c2 | 600 | $Newsgroups =~ s/\*/%/g; |
c30822b4 | 601 | return (undef,undef) if !CheckValidNewsgroups($Newsgroups); |
edd250f2 TH |
602 | # just one newsgroup? |
603 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; | |
10459ac8 | 604 | my ($SQL,@WildcardGroups,@NoWildcardGroups); |
edd250f2 | 605 | # list of newsgroups separated by ':' |
741336c2 TH |
606 | my @GroupList = split /:/, $Newsgroups; |
607 | foreach (@GroupList) { | |
10459ac8 TH |
608 | if ($_ !~ /%/) { |
609 | # add to list of newsgroup names WITHOUT wildcard | |
610 | push (@NoWildcardGroups,$_); | |
611 | } else { | |
612 | # add to list of newsgroup names WITH wildcard | |
613 | push (@WildcardGroups,$_); | |
614 | # add wildcard to SQL clause | |
615 | # 'OR' if SQL clause is not empty | |
616 | $SQL .= ' OR ' if $SQL; | |
617 | $SQL .= 'newsgroup LIKE ?' | |
618 | } | |
741336c2 | 619 | }; |
10459ac8 TH |
620 | if (scalar(@NoWildcardGroups)) { |
621 | # add 'OR' if SQL clause is not empty | |
622 | $SQL .= ' OR ' if $SQL; | |
623 | if (scalar(@NoWildcardGroups) < 2) { | |
624 | # special case: just one newsgroup without wildcard | |
625 | $SQL .= 'newsgroup = ?'; | |
626 | } else { | |
627 | # create list of newsgroups to include: 'newsgroup IN (...)' | |
628 | $SQL .= 'newsgroup IN ('; | |
629 | my $SQLin; | |
630 | foreach (@NoWildcardGroups) { | |
631 | $SQLin .= ',' if $SQLin; | |
632 | $SQLin .= '?'; | |
633 | } | |
634 | # add list to SQL clause | |
635 | $SQL .= $SQLin .= ')'; | |
636 | } | |
637 | } | |
638 | # add brackets '()' to SQL clause as needed (more than one wildcard group) | |
639 | if (scalar(@WildcardGroups)) { | |
640 | $SQL = '(' . $SQL .')'; | |
641 | } | |
642 | # rebuild @GroupList in (now) correct order | |
643 | @GroupList = (@WildcardGroups,@NoWildcardGroups); | |
741336c2 TH |
644 | return ($SQL,@GroupList); |
645 | }; | |
646 | ||
edd250f2 TH |
647 | ################################################################################ |
648 | sub SQLGroupWildcard { | |
649 | ################################################################################ | |
650 | ### build a valid SQL 'WHERE' expression with or without wildcards | |
651 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard | |
652 | ### (group.name or group.name.%) | |
653 | ### OUT: SQL code to become part of a 'WHERE' clause | |
654 | my ($Newsgroup) = @_; | |
edd250f2 TH |
655 | if ($Newsgroup !~ /%/) { |
656 | return 'newsgroup = ?'; | |
657 | } else { | |
658 | return 'newsgroup LIKE ?'; | |
659 | } | |
660 | }; | |
661 | ||
662 | ################################################################################ | |
663 | sub SQLSetBounds { | |
664 | ################################################################################ | |
665 | ### set upper and/or lower boundary (number of postings) | |
666 | ### IN : $Type: 'level', 'average', 'sum' or 'default' | |
667 | ### $LowBound,$UppBound: lower/upper boundary, respectively | |
668 | ### OUT: SQL code to become part of a WHERE or HAVING clause | |
669 | my ($Type,$LowBound,$UppBound) = @_; | |
670 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); | |
671 | if($LowBound and $UppBound and $LowBound > $UppBound) { | |
672 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". | |
673 | "$UppBound, exchanging boundaries."); | |
674 | ($LowBound,$UppBound) = ($UppBound,$LowBound); | |
675 | } | |
676 | # default to 'default' | |
677 | my $WhereHavingFunction = 'postings'; | |
678 | # set $LowBound to SQL statement: | |
679 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' | |
680 | if ($Type eq 'level') { | |
681 | $WhereHavingFunction = 'MIN(postings)' | |
682 | } elsif ($Type eq 'average') { | |
683 | $WhereHavingFunction = 'AVG(postings)' | |
684 | } elsif ($Type eq 'sum') { | |
685 | $WhereHavingFunction = 'SUM(postings)' | |
686 | } | |
687 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); | |
688 | # set $LowBound to SQL statement: | |
689 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' | |
690 | if ($Type eq 'level') { | |
691 | $WhereHavingFunction = 'MAX(postings)' | |
692 | } elsif ($Type eq 'average') { | |
693 | $WhereHavingFunction = 'AVG(postings)' | |
694 | } elsif ($Type eq 'sum') { | |
695 | $WhereHavingFunction = 'SUM(postings)' | |
696 | } | |
697 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); | |
698 | return ($LowBound,$UppBound); | |
699 | }; | |
700 | ||
701 | ################################################################################ | |
702 | sub SQLCheckNumber { | |
703 | ################################################################################ | |
704 | ### check if input is a valid positive integer; otherwise, make it one | |
705 | ### IN : @Numbers: array of parameters | |
706 | ### OUT: @Numbers: a valid positive integer | |
707 | my (@Numbers) = @_; | |
708 | foreach my $Number (@Numbers) { | |
709 | if ($Number and $Number < 0) { | |
710 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); | |
711 | $Number = -$Number; | |
712 | } | |
713 | $Number = '' if ($Number and $Number !~ /^\d+$/); | |
714 | } | |
715 | return @Numbers; | |
716 | }; | |
717 | ||
718 | ################################################################################ | |
719 | sub SQLBuildClause { | |
720 | ################################################################################ | |
721 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause | |
722 | ### from multiple expressions which *may* be empty | |
723 | ### IN : $Type: 'where', 'having', 'group' or 'order' | |
724 | ### @Expressions: array of expressions | |
725 | ### OUT: $SQLClause: a SQL clause | |
726 | my ($Type,@Expressions) = @_; | |
727 | my ($SQLClause,$Separator,$Statement); | |
728 | # set separator ('AND' or ',') | |
729 | if ($Type eq 'where' or $Type eq 'having') { | |
730 | $Separator = 'AND'; | |
731 | } else { | |
732 | $Separator = ','; | |
733 | } | |
734 | # set statement | |
735 | if ($Type eq 'where') { | |
736 | $Statement = 'WHERE'; | |
737 | } elsif ($Type eq 'order') { | |
738 | $Statement = 'ORDER BY'; | |
739 | } elsif ($Type eq 'having') { | |
740 | $Statement = 'HAVING'; | |
741 | } else { | |
742 | $Statement = 'GROUP BY'; | |
743 | } | |
744 | # build query from expressions with separators | |
745 | foreach my $Expression (@Expressions) { | |
746 | if ($Expression) { | |
747 | $SQLClause .= " $Separator " if ($SQLClause); | |
748 | $SQLClause .= $Expression; | |
749 | } | |
750 | } | |
751 | # add statement in front if not already present | |
752 | $SQLClause = " $Statement " . $SQLClause | |
753 | if ($SQLClause and $SQLClause !~ /$Statement/); | |
754 | return $SQLClause; | |
755 | }; | |
756 | ||
c30822b4 TH |
757 | #####--------------------------- Verifications ----------------------------##### |
758 | ||
759 | ################################################################################ | |
760 | sub CheckValidNewsgroups { | |
761 | ################################################################################ | |
762 | ### syntax check of newgroup list | |
763 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) | |
764 | ### OUT: boolean | |
765 | my ($Newsgroups) = @_; | |
766 | my $InvalidCharRegExp = ',; '; | |
767 | return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1; | |
768 | }; | |
769 | ||
edd250f2 | 770 | |
741336c2 TH |
771 | #####------------------------------- done ---------------------------------##### |
772 | 1; | |
773 | ||
774 |