Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
edd250f2 | 5 | # Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net> |
741336c2 TH |
6 | # |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
741336c2 | 19 | $MyVersion |
f6d15ca7 | 20 | $PackageVersion |
edd250f2 TH |
21 | $FullPath |
22 | $HomePath | |
23 | ShowVersion | |
24 | ShowPOD | |
741336c2 TH |
25 | ReadConfig |
26 | OverrideConfig | |
27 | InitDB | |
edd250f2 | 28 | Bleat |
741336c2 TH |
29 | ); |
30 | @EXPORT_OK = qw( | |
31 | GetTimePeriod | |
32 | LastMonth | |
741336c2 TH |
33 | SplitPeriod |
34 | ListMonth | |
35 | ListNewsgroups | |
1703b8e3 | 36 | ParseHierarchies |
ad609792 | 37 | ReadGroupList |
741336c2 TH |
38 | OutputData |
39 | FormatOutput | |
40 | SQLHierarchies | |
edd250f2 | 41 | SQLSortOrder |
741336c2 | 42 | SQLGroupList |
edd250f2 TH |
43 | SQLSetBounds |
44 | SQLBuildClause | |
d3b6810d | 45 | GetMaxLength |
741336c2 | 46 | ); |
edd250f2 TH |
47 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod |
48 | ListMonth)], | |
741336c2 | 49 | Output => [qw(OutputData FormatOutput)], |
edd250f2 TH |
50 | SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList |
51 | SQLSetBounds SQLBuildClause GetMaxLength)]); | |
4bbd46d3 TH |
52 | $VERSION = '0.01'; |
53 | our $PackageVersion = '0.01'; | |
741336c2 TH |
54 | |
55 | use Data::Dumper; | |
56 | use File::Basename; | |
741336c2 TH |
57 | |
58 | use Config::Auto; | |
59 | use DBI; | |
60 | ||
61 | #####-------------------------------- Vars --------------------------------##### | |
62 | ||
edd250f2 TH |
63 | # trim the path |
64 | our $FullPath = $0; | |
65 | our $HomePath = dirname($0); | |
66 | $0 =~ s%.*/%%; | |
67 | # set version string | |
68 | our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)"; | |
741336c2 TH |
69 | |
70 | #####------------------------------- Basics -------------------------------##### | |
71 | ||
741336c2 TH |
72 | ################################################################################ |
73 | ||
74 | ################################################################################ | |
75 | sub ShowVersion { | |
76 | ################################################################################ | |
77 | ### display version and exit | |
edd250f2 TH |
78 | print "NewsStats v$PackageVersion\n$MyVersion\n"; |
79 | print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n"; | |
80 | print "This program is free software; you may redistribute it ". | |
81 | "and/or modify it under the same terms as Perl itself.\n"; | |
741336c2 TH |
82 | exit(100); |
83 | }; | |
84 | ################################################################################ | |
85 | ||
86 | ################################################################################ | |
87 | sub ShowPOD { | |
88 | ################################################################################ | |
89 | ### feed myself to perldoc and exit | |
edd250f2 | 90 | exec('perldoc', $FullPath); |
741336c2 TH |
91 | exit(100); |
92 | }; | |
93 | ################################################################################ | |
94 | ||
95 | ################################################################################ | |
96 | sub ReadConfig { | |
97 | ################################################################################ | |
98 | ### read config via Config::Auto | |
99 | ### IN : $ConfFile: config filename | |
100 | ### OUT: reference to a hash containing the configuration | |
101 | my ($ConfFile) = @_; | |
db7696e5 TH |
102 | # mandatory configuration options |
103 | my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase', | |
104 | 'DBTableRaw','DBTableGrps'); | |
105 | # read config via Config::Auto | |
106 | my $ConfR = Config::Auto::parse($ConfFile, format => 'equal'); | |
107 | my %Conf = %{$ConfR}; | |
108 | # check for mandatory options | |
109 | foreach (@Mandatory) { | |
110 | &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_)) | |
111 | if (!defined($Conf{$_})); | |
112 | } | |
113 | # $Conf{'TLH'} is checked in gatherstats.pl | |
114 | return $ConfR; | |
741336c2 TH |
115 | }; |
116 | ################################################################################ | |
117 | ||
118 | ################################################################################ | |
edd250f2 | 119 | sub OverrideConfig { |
741336c2 TH |
120 | ################################################################################ |
121 | ### override configuration values | |
122 | ### IN : $ConfigR : reference to configuration hash | |
123 | ### $OverrideR: reference to a hash containing overrides | |
124 | my ($ConfigR,$OverrideR) = @_; | |
125 | my %Override = %$OverrideR; | |
3430c898 | 126 | # Config hash empty? |
edd250f2 TH |
127 | &Bleat(1,"Empty configuration hash passed to OverrideConfig()") |
128 | if ( keys %$ConfigR < 1); | |
3430c898 | 129 | # return if no overrides |
741336c2 TH |
130 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
131 | foreach my $Key (keys %Override) { | |
132 | $$ConfigR{$Key} = $Override{$Key}; | |
133 | }; | |
134 | }; | |
135 | ################################################################################ | |
136 | ||
137 | ################################################################################ | |
138 | sub InitDB { | |
139 | ################################################################################ | |
140 | ### initialise database connection | |
141 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 142 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
143 | ### OUT: DBHandle |
144 | my ($ConfigR,$Die) = @_; | |
145 | my %Conf = %$ConfigR; | |
edd250f2 TH |
146 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s', |
147 | $Conf{'DBDriver'},$Conf{'DBDatabase'}, | |
148 | $Conf{'DBHost'}), $Conf{'DBUser'}, | |
149 | $Conf{'DBPw'}, { PrintError => 0 }); | |
741336c2 | 150 | if (!$DBHandle) { |
edd250f2 TH |
151 | &Bleat(2,$DBI::errstr) if (defined($Die) and $Die); |
152 | &Bleat(1,$DBI::errstr); | |
741336c2 TH |
153 | }; |
154 | return $DBHandle; | |
155 | }; | |
156 | ################################################################################ | |
157 | ||
edd250f2 TH |
158 | ################################################################################ |
159 | sub Bleat { | |
160 | ################################################################################ | |
161 | ### print warning or error messages and terminate in case of error | |
162 | ### IN : $Level : 1 = warning, 2 = error | |
163 | ### $Message: warning or error message | |
164 | my ($Level,$Message) = @_; | |
165 | if ($Level == 1) { | |
166 | warn "$0 W: $Message\n" | |
167 | } elsif ($Level == 2) { | |
168 | die "$0 E: $Message\n" | |
169 | } else { | |
170 | print "$0: $Message\n" | |
171 | } | |
172 | }; | |
173 | ################################################################################ | |
174 | ||
741336c2 TH |
175 | #####------------------------------ GetStats ------------------------------##### |
176 | ||
177 | ################################################################################ | |
178 | sub ListNewsgroups { | |
179 | ################################################################################ | |
3430c898 TH |
180 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
181 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
182 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 | 183 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
89db2f90 | 184 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
ad609792 TH |
185 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
186 | ### as keys | |
187 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
89db2f90 | 188 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
ad609792 | 189 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
741336c2 TH |
190 | my %Newsgroups; |
191 | chomp($Newsgroups); | |
192 | # remove whitespace from contents of Newsgroups: | |
193 | $Newsgroups =~ s/\s//; | |
194 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
195 | for (split /,/, $Newsgroups) { | |
89db2f90 TH |
196 | # don't count newsgroup/hierarchy in wrong TLH |
197 | next if($TLH and !/^$TLH/); | |
ad609792 TH |
198 | # don't count invalid newsgroups |
199 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
0dc13b39 | 200 | warn (sprintf("DROPPED: %s\n",$_)); |
ad609792 TH |
201 | next; |
202 | } | |
741336c2 TH |
203 | # add original newsgroup to %Newsgroups |
204 | $Newsgroups{$_} = 1; | |
205 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
206 | # i.e. de.alt.ALL and de.ALL | |
207 | foreach (ParseHierarchies($_)) { | |
208 | $Newsgroups{$_.'.ALL'} = 1; | |
209 | } | |
210 | }; | |
211 | return %Newsgroups; | |
212 | }; | |
213 | ||
214 | ################################################################################ | |
215 | sub ParseHierarchies { | |
216 | ################################################################################ | |
3430c898 TH |
217 | ### return a list of all hierarchy levels a newsgroup belongs to |
218 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
219 | ### IN : $Newsgroup : a newsgroup name |
220 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
221 | my ($Newsgroup) = @_; | |
222 | my @Hierarchies; | |
223 | # strip trailing dots | |
224 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
225 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
226 | # i.e. de.alt.test: "de.alt" and "de" | |
227 | while ($Newsgroup =~ /\./) { | |
228 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
229 | push @Hierarchies, $Newsgroup; | |
230 | }; | |
231 | return @Hierarchies; | |
232 | }; | |
233 | ||
ad609792 TH |
234 | ################################################################################ |
235 | sub ReadGroupList { | |
236 | ################################################################################ | |
237 | ### read a list of valid newsgroups from file (each group on one line, | |
238 | ### ignoring everything after the first whitespace and so accepting files | |
239 | ### in checkgroups format as well as (parts of) an INN active file) | |
240 | ### IN : $Filename : file to read | |
241 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
242 | my ($Filename) = @_; | |
243 | my %ValidGroups; | |
edd250f2 | 244 | open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!"); |
ad609792 | 245 | while (<$LIST>) { |
7662b106 | 246 | s/^\s*(\S+).*$/$1/; |
ad609792 | 247 | chomp; |
7662b106 | 248 | next if /^$/; |
ad609792 TH |
249 | $ValidGroups{$_} = '1'; |
250 | }; | |
251 | close $LIST; | |
252 | return \%ValidGroups; | |
253 | }; | |
254 | ||
741336c2 TH |
255 | ################################################################################ |
256 | ||
257 | #####----------------------------- TimePeriods ----------------------------##### | |
258 | ||
259 | ################################################################################ | |
260 | sub GetTimePeriod { | |
261 | ################################################################################ | |
edd250f2 TH |
262 | ### get a time period to act on from --month option; |
263 | ### if empty, default to last month | |
264 | ### IN : $Month: may be empty, 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'all' | |
265 | ### OUT: $Verbal,$SQL: verbal description and WHERE-clause | |
266 | ### of the chosen time period | |
267 | my ($Month) = @_; | |
268 | # define result variables | |
269 | my ($Verbal, $SQL); | |
270 | # define a regular expression for a month | |
271 | my $REMonth = '\d{4}-\d{2}'; | |
272 | ||
273 | # default to last month if option is not set | |
274 | if(!$Month) { | |
275 | $Month = &LastMonth; | |
276 | } | |
277 | ||
278 | # check for valid input | |
279 | if ($Month =~ /^$REMonth$/) { | |
280 | # single month (YYYY-MM) | |
281 | ($Month) = &CheckMonth($Month); | |
282 | $Verbal = $Month; | |
283 | $SQL = sprintf("month = '%s'",$Month); | |
284 | } elsif ($Month =~ /^$REMonth:$REMonth$/) { | |
285 | # time period (YYYY-MM:YYYY-MM) | |
286 | $Verbal = sprintf('%s to %s',&SplitPeriod($Month)); | |
287 | $SQL = sprintf("month BETWEEN '%s' AND '%s'",&SplitPeriod($Month)); | |
288 | } elsif ($Month =~ /^all$/i) { | |
289 | # special case: ALL | |
290 | $Verbal = 'all time'; | |
291 | $SQL = ''; | |
741336c2 | 292 | } else { |
edd250f2 TH |
293 | # invalid input |
294 | return (undef,undef); | |
295 | } | |
296 | ||
297 | return ($Verbal,$SQL); | |
741336c2 TH |
298 | }; |
299 | ||
300 | ################################################################################ | |
301 | sub LastMonth { | |
302 | ################################################################################ | |
3430c898 | 303 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
304 | ### OUT: last month as YYYY-MM |
305 | # get today's date | |
306 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
307 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
308 | $Year += 1900; | |
309 | if ($Month < 1) { | |
310 | $Month = 12; | |
311 | $Year--; | |
312 | }; | |
313 | # return last month | |
314 | return sprintf('%4d-%02d',$Year,$Month); | |
315 | }; | |
316 | ||
317 | ################################################################################ | |
318 | sub CheckMonth { | |
319 | ################################################################################ | |
edd250f2 TH |
320 | ### check if input (in YYYY-MM form) is valid with MM between 01 and 12; |
321 | ### otherwise, fix it | |
322 | ### IN : @Month: array of month | |
323 | ### OUT: @Month: a valid month | |
324 | my (@Month) = @_; | |
325 | foreach my $Month (@Month) { | |
326 | my ($OldMonth) = $Month; | |
327 | my ($CalMonth) = substr ($Month, -2); | |
328 | if ($CalMonth < 1 or $CalMonth > 12) { | |
329 | $CalMonth = '12' if $CalMonth > 12; | |
330 | $CalMonth = '01' if $CalMonth < 1; | |
331 | substr($Month, -2) = $CalMonth; | |
332 | &Bleat(1,sprintf("'%s' is an invalid date (MM must be between '01' ". | |
333 | "and '12'), set to '%s'.",$OldMonth,$Month)); | |
334 | } | |
335 | } | |
336 | return @Month; | |
741336c2 TH |
337 | }; |
338 | ||
339 | ################################################################################ | |
340 | sub SplitPeriod { | |
341 | ################################################################################ | |
3430c898 | 342 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 | 343 | ### IN : $Period: time period |
edd250f2 | 344 | ### OUT: $StartMonth, $EndMonth |
741336c2 | 345 | my ($Period) = @_; |
741336c2 | 346 | my ($StartMonth, $EndMonth) = split /:/, $Period; |
edd250f2 | 347 | ($StartMonth,$EndMonth) = CheckMonth($StartMonth,$EndMonth); |
741336c2 TH |
348 | # switch parameters as necessary |
349 | if ($EndMonth gt $StartMonth) { | |
350 | return ($StartMonth, $EndMonth); | |
351 | } else { | |
352 | return ($EndMonth, $StartMonth); | |
353 | }; | |
354 | }; | |
355 | ||
356 | ################################################################################ | |
357 | sub ListMonth { | |
358 | ################################################################################ | |
3430c898 | 359 | ### return a list of months (YYYY-MM) between start and end month |
880c3eb2 TH |
360 | ### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM') |
361 | ### OUT: @Months: array containing all months from $MonthExpression enumerated | |
362 | my ($MonthExpression )= @_; | |
363 | # return if single month | |
364 | return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/); | |
365 | # parse $MonthExpression | |
366 | my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression); | |
741336c2 TH |
367 | # set $Year, $Month from $StartMonth |
368 | my ($Year, $Month) = split /-/, $StartMonth; | |
369 | # define @Months | |
370 | my (@Months); | |
371 | until ("$Year-$Month" gt $EndMonth) { | |
372 | push @Months, "$Year-$Month"; | |
373 | $Month = "$Month"; # force string context | |
374 | $Month++; | |
375 | if ($Month > 12) { | |
376 | $Month = '01'; | |
377 | $Year++; | |
378 | }; | |
379 | }; | |
380 | return @Months; | |
381 | }; | |
382 | ||
383 | #####---------------------------- OutputFormats ---------------------------##### | |
384 | ||
385 | ################################################################################ | |
386 | sub OutputData { | |
387 | ################################################################################ | |
3430c898 | 388 | ### read database query results from DBHandle and print results with formatting |
edd250f2 TH |
389 | ### IN : $Format : format specifier |
390 | ### $Comments : print or suppress all comments for machine-readable output | |
391 | ### $GroupBy : primary sorting order (month or key) | |
392 | ### $Precision: number of digits right of decimal point (0 or 2) | |
393 | ### $ValidKeys: reference to a hash containing all valid keys | |
394 | ### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM | |
395 | ### $DBQuery : database query handle with executed query, | |
78389b28 | 396 | ### containing $Month, $Key, $Value |
edd250f2 TH |
397 | ### $PadGroup : padding length for key field (optional) for 'pretty' |
398 | my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl, | |
399 | $DBQuery, $PadGroup) = @_; | |
400 | my %ValidKeys = %{$ValidKeys} if $ValidKeys; | |
401 | my ($FileName, $Handle, $OUT); | |
78389b28 | 402 | our $LastIteration; |
edd250f2 TH |
403 | |
404 | # define output types | |
405 | my %LegalOutput; | |
406 | @LegalOutput{('dump',,'list','pretty')} = (); | |
407 | # bail out if format is unknown | |
408 | &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format}); | |
409 | ||
741336c2 | 410 | while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { |
edd250f2 TH |
411 | # don't display invalid keys |
412 | if(%ValidKeys and !defined($ValidKeys{$Key})) { | |
413 | # FIXME | |
414 | # &Bleat(1,sprintf("DROPPED: %s",$Key)); | |
415 | next; | |
416 | }; | |
417 | # care for correct sorting order and abstract from month and keys: | |
418 | # $Caption will be $Month or $Key, according to sorting order, | |
419 | # and $Key will be $Key or $Month, respectively | |
420 | my $Caption; | |
421 | if ($GroupBy eq 'key') { | |
422 | $Caption = $Key; | |
423 | $Key = $Month; | |
424 | } else { | |
425 | $Caption = $Month; | |
426 | } | |
78389b28 | 427 | # set output file handle |
edd250f2 | 428 | if (!$FileTempl) { |
78389b28 | 429 | $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT |
edd250f2 | 430 | } elsif (!defined($LastIteration) or $LastIteration ne $Caption) { |
78389b28 | 431 | close $OUT if ($LastIteration); |
edd250f2 TH |
432 | # safeguards for filename creation: |
433 | # replace potential problem characters with '_' | |
434 | $FileName = sprintf('%s-%s',$FileTempl,$Caption); | |
435 | $FileName =~ s/[^a-zA-Z0-9_-]+/_/g; | |
436 | open ($OUT,">$FileName") | |
437 | or &Bleat(2,sprintf("Cannot open output file '%s': $!", | |
438 | $FileName)); | |
78389b28 TH |
439 | $Handle = $OUT; |
440 | }; | |
edd250f2 TH |
441 | print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value, |
442 | $Precision, $PadGroup); | |
443 | $LastIteration = $Caption; | |
741336c2 | 444 | }; |
edd250f2 | 445 | close $OUT if ($FileTempl); |
741336c2 TH |
446 | }; |
447 | ||
448 | ################################################################################ | |
449 | sub FormatOutput { | |
450 | ################################################################################ | |
3430c898 | 451 | ### format information for output according to format specifier |
edd250f2 TH |
452 | ### IN : $Format : format specifier |
453 | ### $Comments : print or suppress all comments for machine-readable output | |
454 | ### $Caption : month (as YYYY-MM) or $Key, according to sorting order | |
455 | ### $Key : newsgroup, client, ... or $Month, as above | |
456 | ### $Value : number of postings with that attribute | |
457 | ### $Precision: number of digits right of decimal point (0 or 2) | |
458 | ### $PadGroup : padding length for key field (optional) for 'pretty' | |
741336c2 | 459 | ### OUT: $Output: formatted output |
edd250f2 | 460 | my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadGroup) = @_; |
741336c2 | 461 | my ($Output); |
edd250f2 | 462 | # keep last caption in mind |
741336c2 | 463 | our ($LastIteration); |
edd250f2 | 464 | # create one line of output |
741336c2 | 465 | if ($Format eq 'dump') { |
edd250f2 TH |
466 | # output as dump (key value) |
467 | $Output = sprintf ("# %s:\n",$Caption) | |
468 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
469 | $Output .= sprintf ("%s %u\n",$Key,$Value); | |
741336c2 | 470 | } elsif ($Format eq 'list') { |
edd250f2 TH |
471 | # output as list (caption key value) |
472 | $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value); | |
741336c2 | 473 | } elsif ($Format eq 'pretty') { |
edd250f2 TH |
474 | # output as a table |
475 | $Output = sprintf ("# ----- %s:\n",$Caption) | |
476 | if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration)); | |
477 | $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%10.*f\n",$PadGroup) : | |
478 | "%s %.*f\n",$Key,$Precision,$Value); | |
741336c2 TH |
479 | }; |
480 | return $Output; | |
481 | }; | |
482 | ||
483 | #####------------------------- QueryModifications -------------------------##### | |
484 | ||
485 | ################################################################################ | |
486 | sub SQLHierarchies { | |
487 | ################################################################################ | |
3430c898 TH |
488 | ### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by |
489 | ### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is | |
490 | ### true, accordingly) | |
741336c2 TH |
491 | ### IN : $ShowHierarchies: boolean value |
492 | ### OUT: SQL code | |
493 | my ($ShowHierarchies) = @_; | |
edd250f2 | 494 | return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'"; |
741336c2 TH |
495 | }; |
496 | ||
497 | ################################################################################ | |
d3b6810d | 498 | sub GetMaxLength { |
741336c2 | 499 | ################################################################################ |
3430c898 | 500 | ### get length of longest field in future query result |
edd250f2 TH |
501 | ### IN : $DBHandle : database handel |
502 | ### $Table : table to query | |
503 | ### $Field : field to check | |
504 | ### $WhereClause : WHERE clause | |
505 | ### $HavingClause: HAVING clause | |
506 | ### @BindVars : bind variables for WHERE clause | |
741336c2 | 507 | ### OUT: $Length: length of longest instnace of $Field |
edd250f2 TH |
508 | my ($DBHandle,$Table,$Field,$WhereClause,$HavingClause,@BindVars) = @_; |
509 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) ". | |
510 | "FROM %s %s %s",$Field,$Table, | |
511 | $WhereClause,$HavingClause ? | |
512 | 'GROUP BY newsgroup' . $HavingClause . | |
513 | ' ORDER BY LENGTH(newsgroup) '. | |
514 | 'DESC LIMIT 1': '')); | |
515 | $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ". | |
516 | "for '%s' from table '%s': ". | |
517 | "$DBI::errstr",$Field,$Table)); | |
741336c2 TH |
518 | my ($Length) = $DBQuery->fetchrow_array; |
519 | return $Length; | |
520 | }; | |
521 | ||
edd250f2 TH |
522 | ################################################################################ |
523 | sub SQLSortOrder { | |
524 | ################################################################################ | |
525 | ### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and | |
526 | ### $OptOrderBy (secondary sorting), both ascending or descending; | |
527 | ### descending sorting order is done by adding '-desc' | |
528 | ### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups' | |
529 | ### $OrderBy: secondary sort by month/newsgroups (default) | |
530 | ### or number of 'postings' | |
531 | ### OUT: a SQL ORDER BY clause | |
532 | my ($GroupBy,$OrderBy) = @_; | |
533 | my ($GroupSort,$OrderSort) = ('',''); | |
534 | # $GroupBy (primary sorting) | |
535 | if (!$GroupBy) { | |
536 | $GroupBy = 'month'; | |
537 | } else { | |
538 | ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy); | |
539 | if ($GroupBy =~ /group/i) { | |
540 | $GroupBy = 'newsgroup'; | |
541 | } else { | |
542 | $GroupBy = 'month'; | |
543 | } | |
544 | } | |
545 | my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month'; | |
546 | # $OrderBy (secondary sorting) | |
547 | if (!$OrderBy) { | |
548 | $OrderBy = $Secondary; | |
549 | } else { | |
550 | ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy); | |
551 | if ($OrderBy =~ /posting/i) { | |
552 | $OrderBy = "postings $OrderSort, $Secondary"; | |
553 | } else { | |
554 | $OrderBy = "$Secondary $OrderSort"; | |
555 | } | |
556 | } | |
557 | return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy)); | |
558 | }; | |
559 | ||
560 | ################################################################################ | |
561 | sub SQLParseOrder { | |
562 | ################################################################################ | |
563 | ### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g. | |
564 | ### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc' | |
565 | ### IN : $OrderOption: order option (see above) | |
566 | ### OUT: parameter to sort by, | |
567 | ### sort order ('DESC' or nothing, meaning 'ASC') | |
568 | my ($OrderOption) = @_; | |
569 | my $SortOrder = ''; | |
570 | if ($OrderOption =~ s/-?desc$//i) { | |
571 | $SortOrder = 'DESC'; | |
572 | } else { | |
573 | $OrderOption =~ s/-?asc$//i | |
574 | } | |
575 | return ($OrderOption,$SortOrder); | |
576 | }; | |
577 | ||
741336c2 TH |
578 | ################################################################################ |
579 | sub SQLGroupList { | |
580 | ################################################################################ | |
edd250f2 TH |
581 | ### explode list of newsgroups separated by : (with wildcards) |
582 | ### to a SQL 'WHERE' expression | |
741336c2 | 583 | ### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) |
edd250f2 TH |
584 | ### OUT: SQL code to become part of a 'WHERE' clause, |
585 | ### list of newsgroups for SQL bindings | |
741336c2 | 586 | my ($Newsgroups) = @_; |
edd250f2 | 587 | # substitute '*' wildcard with SQL wildcard character '%' |
741336c2 | 588 | $Newsgroups =~ s/\*/%/g; |
edd250f2 TH |
589 | # just one newsgroup? |
590 | return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/; | |
591 | # list of newsgroups separated by ':' | |
741336c2 TH |
592 | my $SQL = '('; |
593 | my @GroupList = split /:/, $Newsgroups; | |
594 | foreach (@GroupList) { | |
595 | $SQL .= ' OR ' if $SQL gt '('; | |
edd250f2 | 596 | $SQL .= SQLGroupWildcard($_); |
741336c2 TH |
597 | }; |
598 | $SQL .= ')'; | |
599 | return ($SQL,@GroupList); | |
600 | }; | |
601 | ||
edd250f2 TH |
602 | ################################################################################ |
603 | sub SQLGroupWildcard { | |
604 | ################################################################################ | |
605 | ### build a valid SQL 'WHERE' expression with or without wildcards | |
606 | ### IN : $Newsgroup: newsgroup expression, probably with wildcard | |
607 | ### (group.name or group.name.%) | |
608 | ### OUT: SQL code to become part of a 'WHERE' clause | |
609 | my ($Newsgroup) = @_; | |
610 | # FIXME: check for validity | |
611 | if ($Newsgroup !~ /%/) { | |
612 | return 'newsgroup = ?'; | |
613 | } else { | |
614 | return 'newsgroup LIKE ?'; | |
615 | } | |
616 | }; | |
617 | ||
618 | ################################################################################ | |
619 | sub SQLSetBounds { | |
620 | ################################################################################ | |
621 | ### set upper and/or lower boundary (number of postings) | |
622 | ### IN : $Type: 'level', 'average', 'sum' or 'default' | |
623 | ### $LowBound,$UppBound: lower/upper boundary, respectively | |
624 | ### OUT: SQL code to become part of a WHERE or HAVING clause | |
625 | my ($Type,$LowBound,$UppBound) = @_; | |
626 | ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound); | |
627 | if($LowBound and $UppBound and $LowBound > $UppBound) { | |
628 | &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ". | |
629 | "$UppBound, exchanging boundaries."); | |
630 | ($LowBound,$UppBound) = ($UppBound,$LowBound); | |
631 | } | |
632 | # default to 'default' | |
633 | my $WhereHavingFunction = 'postings'; | |
634 | # set $LowBound to SQL statement: | |
635 | # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >=' | |
636 | if ($Type eq 'level') { | |
637 | $WhereHavingFunction = 'MIN(postings)' | |
638 | } elsif ($Type eq 'average') { | |
639 | $WhereHavingFunction = 'AVG(postings)' | |
640 | } elsif ($Type eq 'sum') { | |
641 | $WhereHavingFunction = 'SUM(postings)' | |
642 | } | |
643 | $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound); | |
644 | # set $LowBound to SQL statement: | |
645 | # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <=' | |
646 | if ($Type eq 'level') { | |
647 | $WhereHavingFunction = 'MAX(postings)' | |
648 | } elsif ($Type eq 'average') { | |
649 | $WhereHavingFunction = 'AVG(postings)' | |
650 | } elsif ($Type eq 'sum') { | |
651 | $WhereHavingFunction = 'SUM(postings)' | |
652 | } | |
653 | $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound); | |
654 | return ($LowBound,$UppBound); | |
655 | }; | |
656 | ||
657 | ################################################################################ | |
658 | sub SQLCheckNumber { | |
659 | ################################################################################ | |
660 | ### check if input is a valid positive integer; otherwise, make it one | |
661 | ### IN : @Numbers: array of parameters | |
662 | ### OUT: @Numbers: a valid positive integer | |
663 | my (@Numbers) = @_; | |
664 | foreach my $Number (@Numbers) { | |
665 | if ($Number and $Number < 0) { | |
666 | &Bleat(1,"Boundary $Number is < 0, set to ".-$Number); | |
667 | $Number = -$Number; | |
668 | } | |
669 | $Number = '' if ($Number and $Number !~ /^\d+$/); | |
670 | } | |
671 | return @Numbers; | |
672 | }; | |
673 | ||
674 | ################################################################################ | |
675 | sub SQLBuildClause { | |
676 | ################################################################################ | |
677 | ### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause | |
678 | ### from multiple expressions which *may* be empty | |
679 | ### IN : $Type: 'where', 'having', 'group' or 'order' | |
680 | ### @Expressions: array of expressions | |
681 | ### OUT: $SQLClause: a SQL clause | |
682 | my ($Type,@Expressions) = @_; | |
683 | my ($SQLClause,$Separator,$Statement); | |
684 | # set separator ('AND' or ',') | |
685 | if ($Type eq 'where' or $Type eq 'having') { | |
686 | $Separator = 'AND'; | |
687 | } else { | |
688 | $Separator = ','; | |
689 | } | |
690 | # set statement | |
691 | if ($Type eq 'where') { | |
692 | $Statement = 'WHERE'; | |
693 | } elsif ($Type eq 'order') { | |
694 | $Statement = 'ORDER BY'; | |
695 | } elsif ($Type eq 'having') { | |
696 | $Statement = 'HAVING'; | |
697 | } else { | |
698 | $Statement = 'GROUP BY'; | |
699 | } | |
700 | # build query from expressions with separators | |
701 | foreach my $Expression (@Expressions) { | |
702 | if ($Expression) { | |
703 | $SQLClause .= " $Separator " if ($SQLClause); | |
704 | $SQLClause .= $Expression; | |
705 | } | |
706 | } | |
707 | # add statement in front if not already present | |
708 | $SQLClause = " $Statement " . $SQLClause | |
709 | if ($SQLClause and $SQLClause !~ /$Statement/); | |
710 | return $SQLClause; | |
711 | }; | |
712 | ||
713 | ||
741336c2 TH |
714 | #####------------------------------- done ---------------------------------##### |
715 | 1; | |
716 | ||
717 |