Create a database table with parsed raw data.
[usenet/newsstats.git] / lib / NewsStats.pm
CommitLineData
741336c2
TH
1# NewsStats.pm
2#
3# Library functions for the NewsStats package.
4#
07c0b258 5# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
741336c2 6#
8dc6823e 7# This module can be redistributed and/or modified under the same terms under
741336c2
TH
8# which Perl itself is published.
9
10package NewsStats;
11
12use strict;
13use warnings;
14our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
15
16require Exporter;
17@ISA = qw(Exporter);
18@EXPORT = qw(
741336c2 19 $MyVersion
f6d15ca7 20 $PackageVersion
edd250f2
TH
21 $FullPath
22 $HomePath
23 ShowVersion
24 ShowPOD
741336c2
TH
25 ReadConfig
26 OverrideConfig
27 InitDB
edd250f2 28 Bleat
741336c2
TH
29);
30@EXPORT_OK = qw(
31 GetTimePeriod
32 LastMonth
741336c2
TH
33 SplitPeriod
34 ListMonth
35 ListNewsgroups
1703b8e3 36 ParseHierarchies
ad609792 37 ReadGroupList
741336c2
TH
38 OutputData
39 FormatOutput
40 SQLHierarchies
edd250f2 41 SQLSortOrder
741336c2 42 SQLGroupList
edd250f2
TH
43 SQLSetBounds
44 SQLBuildClause
d3b6810d 45 GetMaxLength
741336c2 46);
edd250f2
TH
47%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
48 ListMonth)],
741336c2 49 Output => [qw(OutputData FormatOutput)],
edd250f2
TH
50 SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
51 SQLSetBounds SQLBuildClause GetMaxLength)]);
4bbd46d3
TH
52$VERSION = '0.01';
53our $PackageVersion = '0.01';
741336c2
TH
54
55use Data::Dumper;
56use File::Basename;
2ad99c20 57use Cwd qw(realpath);
741336c2
TH
58
59use Config::Auto;
60use DBI;
61
62#####-------------------------------- Vars --------------------------------#####
63
2ad99c20 64# save $0 in $FullPath
edd250f2 65our $FullPath = $0;
2ad99c20
TH
66# strip filename and /bin or /install directory to create the $HomePath
67our $HomePath = dirname(realpath($0));
68$HomePath =~ s/\/(bin|install)//;
69# trim $0
edd250f2
TH
70$0 =~ s%.*/%%;
71# set version string
72our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
741336c2
TH
73
74#####------------------------------- Basics -------------------------------#####
75
741336c2
TH
76################################################################################
77
78################################################################################
79sub ShowVersion {
80################################################################################
81### display version and exit
edd250f2
TH
82 print "NewsStats v$PackageVersion\n$MyVersion\n";
83 print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
84 print "This program is free software; you may redistribute it ".
85 "and/or modify it under the same terms as Perl itself.\n";
741336c2
TH
86 exit(100);
87};
88################################################################################
89
90################################################################################
91sub ShowPOD {
92################################################################################
93### feed myself to perldoc and exit
edd250f2 94 exec('perldoc', $FullPath);
741336c2
TH
95 exit(100);
96};
97################################################################################
98
99################################################################################
100sub ReadConfig {
101################################################################################
102### read config via Config::Auto
103### IN : $ConfFile: config filename
104### OUT: reference to a hash containing the configuration
105 my ($ConfFile) = @_;
2ad99c20
TH
106 # set default
107 $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
db7696e5
TH
108 # mandatory configuration options
109 my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
6d72dad2 110 'DBTableRaw','DBTableParse','DBTableGrps');
db7696e5
TH
111 # read config via Config::Auto
112 my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
113 my %Conf = %{$ConfR};
114 # check for mandatory options
115 foreach (@Mandatory) {
116 &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
117 if (!defined($Conf{$_}));
118 }
119 # $Conf{'TLH'} is checked in gatherstats.pl
120 return $ConfR;
741336c2
TH
121};
122################################################################################
123
124################################################################################
edd250f2 125sub OverrideConfig {
741336c2
TH
126################################################################################
127### override configuration values
128### IN : $ConfigR : reference to configuration hash
129### $OverrideR: reference to a hash containing overrides
130 my ($ConfigR,$OverrideR) = @_;
131 my %Override = %$OverrideR;
3430c898 132 # Config hash empty?
edd250f2
TH
133 &Bleat(1,"Empty configuration hash passed to OverrideConfig()")
134 if ( keys %$ConfigR < 1);
3430c898 135 # return if no overrides
741336c2
TH
136 return if (keys %Override < 1 or keys %$ConfigR < 1);
137 foreach my $Key (keys %Override) {
138 $$ConfigR{$Key} = $Override{$Key};
139 };
140};
141################################################################################
142
143################################################################################
144sub InitDB {
145################################################################################
146### initialise database connection
147### IN : $ConfigR: reference to configuration hash
3430c898 148### $Die : if TRUE, die if connection fails
741336c2
TH
149### OUT: DBHandle
150 my ($ConfigR,$Die) = @_;
151 my %Conf = %$ConfigR;
edd250f2
TH
152 my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
153 $Conf{'DBDriver'},$Conf{'DBDatabase'},
154 $Conf{'DBHost'}), $Conf{'DBUser'},
155 $Conf{'DBPw'}, { PrintError => 0 });
741336c2 156 if (!$DBHandle) {
edd250f2
TH
157 &Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
158 &Bleat(1,$DBI::errstr);
741336c2
TH
159 };
160 return $DBHandle;
161};
162################################################################################
163
edd250f2
TH
164################################################################################
165sub Bleat {
166################################################################################
167### print warning or error messages and terminate in case of error
168### IN : $Level : 1 = warning, 2 = error
169### $Message: warning or error message
170 my ($Level,$Message) = @_;
171 if ($Level == 1) {
172 warn "$0 W: $Message\n"
173 } elsif ($Level == 2) {
174 die "$0 E: $Message\n"
175 } else {
176 print "$0: $Message\n"
177 }
178};
179################################################################################
180
741336c2
TH
181#####------------------------------ GetStats ------------------------------#####
182
183################################################################################
184sub ListNewsgroups {
185################################################################################
3430c898
TH
186### explode a (scalar) list of newsgroup names to a list of newsgroup and
187### hierarchy names where every newsgroup and hierarchy appears only once:
188### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
ad609792 189### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
89db2f90 190### $TLH : top level hierarchy (all other newsgroups are ignored)
ad609792
TH
191### $ValidGroupsR: reference to a hash containing all valid newsgroups
192### as keys
193### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
89db2f90 194 my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
ad609792 195 my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
741336c2
TH
196 my %Newsgroups;
197 chomp($Newsgroups);
198 # remove whitespace from contents of Newsgroups:
199 $Newsgroups =~ s/\s//;
200 # call &HierarchyCount for each newsgroup in $Newsgroups:
201 for (split /,/, $Newsgroups) {
89db2f90
TH
202 # don't count newsgroup/hierarchy in wrong TLH
203 next if($TLH and !/^$TLH/);
ad609792
TH
204 # don't count invalid newsgroups
205 if(%ValidGroups and !defined($ValidGroups{$_})) {
0dc13b39 206 warn (sprintf("DROPPED: %s\n",$_));
ad609792
TH
207 next;
208 }
741336c2
TH
209 # add original newsgroup to %Newsgroups
210 $Newsgroups{$_} = 1;
211 # add all hierarchy elements to %Newsgroups, amended by '.ALL',
212 # i.e. de.alt.ALL and de.ALL
213 foreach (ParseHierarchies($_)) {
214 $Newsgroups{$_.'.ALL'} = 1;
215 }
216 };
217 return %Newsgroups;
218};
219
220################################################################################
221sub ParseHierarchies {
222################################################################################
3430c898
TH
223### return a list of all hierarchy levels a newsgroup belongs to
224### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
741336c2
TH
225### IN : $Newsgroup : a newsgroup name
226### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
227 my ($Newsgroup) = @_;
228 my @Hierarchies;
229 # strip trailing dots
230 $Newsgroup =~ s/(.+)\.+$/$1/;
231 # butcher newsgroup name by "." and add each hierarchy to @Hierarchies
232 # i.e. de.alt.test: "de.alt" and "de"
233 while ($Newsgroup =~ /\./) {
234 $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
235 push @Hierarchies, $Newsgroup;
236 };
237 return @Hierarchies;
238};
239
ad609792
TH
240################################################################################
241sub ReadGroupList {
242################################################################################
243### read a list of valid newsgroups from file (each group on one line,
244### ignoring everything after the first whitespace and so accepting files
245### in checkgroups format as well as (parts of) an INN active file)
246### IN : $Filename : file to read
8dc6823e 247### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups
ad609792
TH
248 my ($Filename) = @_;
249 my %ValidGroups;
edd250f2 250 open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
ad609792 251 while (<$LIST>) {
7662b106 252 s/^\s*(\S+).*$/$1/;
ad609792 253 chomp;
7662b106 254 next if /^$/;
ad609792
TH
255 $ValidGroups{$_} = '1';
256 };
257 close $LIST;
258 return \%ValidGroups;
259};
260
741336c2
TH
261################################################################################
262
263#####----------------------------- TimePeriods ----------------------------#####
264
265################################################################################
266sub GetTimePeriod {
267################################################################################
36340108
TH
268### get a time period to act on from --month / --day option;
269### if empty, default to last month / day
270### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)'
271### or 'all'
272### $Type : may be 'month' or 'day'
edd250f2
TH
273### OUT: $Verbal,$SQL: verbal description and WHERE-clause
274### of the chosen time period
36340108 275 my ($Period,$Type) = @_;
edd250f2
TH
276 # define result variables
277 my ($Verbal, $SQL);
36340108
TH
278 # check $Type
279 $Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day'));
280 # define a regular expressions for a month or day
281 my $REPeriod = '\d{4}-\d{2}';
282 $REPeriod .= '-\d{2}' if ($Type eq 'day');
8dc6823e 283
36340108
TH
284 # default to last month / day if option is not set
285 if(!$Period) {
286 $Period = &LastMonthDay($Type);
edd250f2 287 }
8dc6823e 288
edd250f2 289 # check for valid input
36340108
TH
290 if ($Period =~ /^$REPeriod$/) {
291 # single month/day [YYYY-MM(-DD)]
292 ($Period) = &CheckPeriod($Type,$Period);
293 $Verbal = $Period;
294 $SQL = sprintf("%s = '%s'",$Type,$Period);
295 } elsif ($Period =~ /^$REPeriod:$REPeriod$/) {
296 # time period [YYYY-MM(-DD):YYYY-MM(-DD)]
297 $Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type));
298 $SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type,
299 &SplitPeriod($Period,$Type));
300 } elsif ($Period =~ /^all$/i) {
edd250f2
TH
301 # special case: ALL
302 $Verbal = 'all time';
303 $SQL = '';
741336c2 304 } else {
edd250f2
TH
305 # invalid input
306 return (undef,undef);
307 }
8dc6823e 308
edd250f2 309 return ($Verbal,$SQL);
741336c2
TH
310};
311
312################################################################################
36340108
TH
313sub LastMonthDay {
314################################################################################
315### get last month/day from todays date in YYYY-MM format
316### IN : $Type : may be 'month' or 'day'
317### OUT: last month/day as YYYY-MM(-DD)
318 my ($Type) = @_;
319 my ($Day,$Month,$Year);
320 if ($Type eq 'day') {
321 # get yesterdays's date
322 (undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
323 # $Month is defined from 0 to 11, so add 1
324 $Month++;
325 } else {
326 # get today's date (month and year)
327 (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
328 # $Month is already defined from 0 to 11, so no need to decrease it by 1
329 if ($Month < 1) {
330 $Month = 12;
331 $Year--;
332 };
333 }
741336c2 334 $Year += 1900;
36340108
TH
335 # return last month / day
336 if ($Type eq 'day') {
337 return sprintf('%4d-%02d-%02d',$Year,$Month,$Day);
338 } else {
339 return sprintf('%4d-%02d',$Year,$Month);
340 }
741336c2
TH
341};
342
343################################################################################
36340108 344sub CheckPeriod {
741336c2 345################################################################################
36340108 346### check if input (in YYYY-MM(-DD) form) is a valid month / day;
edd250f2 347### otherwise, fix it
36340108
TH
348### IN : $Type : may be 'month' or 'day'
349### @Period: array of month/day
350### OUT: @Period: a valid month/day
351 my ($Type,@Period) = @_;
352 foreach my $Period (@Period) {
353 my ($OldPeriod) = $Period;
354 my ($CalMonth,$CalDay);
355 $Period .= '-01' if ($Type eq 'month');
356 $CalDay = substr ($Period, -2);
357 $CalMonth = substr ($Period, 5, 2);
358 if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
edd250f2
TH
359 $CalMonth = '12' if $CalMonth > 12;
360 $CalMonth = '01' if $CalMonth < 1;
36340108
TH
361 substr($Period, 5, 2) = $CalMonth;
362 $CalDay = '01' if $CalDay < 1;
363 $CalDay = '31' if $CalDay > 31;
364 # FIXME! - month with less than 31 days ...
365 substr($Period, -2) = $CalDay;
366 &Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
367 $OldPeriod,$Period));
edd250f2 368 }
36340108 369 $Period = substr($Period,0,7) if ($Type eq 'month');
edd250f2 370 }
36340108 371 return @Period;
741336c2
TH
372};
373
374################################################################################
375sub SplitPeriod {
376################################################################################
36340108 377### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end
741336c2 378### IN : $Period: time period
36340108
TH
379### $Type : may be 'month' or 'day'
380### OUT: $StartTime, $EndTime
381 my ($Period,$Type) = @_;
382 my ($StartTime, $EndTime) = split /:/, $Period;
383 ($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
741336c2 384 # switch parameters as necessary
36340108
TH
385 if ($EndTime gt $StartTime) {
386 return ($StartTime, $EndTime);
741336c2 387 } else {
36340108 388 return ($EndTime, $StartTime);
741336c2
TH
389 };
390};
391
392################################################################################
393sub ListMonth {
394################################################################################
3430c898 395### return a list of months (YYYY-MM) between start and end month
880c3eb2
TH
396### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
397### OUT: @Months: array containing all months from $MonthExpression enumerated
398 my ($MonthExpression )= @_;
399 # return if single month
400 return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
401 # parse $MonthExpression
402 my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
741336c2
TH
403 # set $Year, $Month from $StartMonth
404 my ($Year, $Month) = split /-/, $StartMonth;
405 # define @Months
406 my (@Months);
407 until ("$Year-$Month" gt $EndMonth) {
408 push @Months, "$Year-$Month";
409 $Month = "$Month"; # force string context
410 $Month++;
411 if ($Month > 12) {
412 $Month = '01';
413 $Year++;
414 };
415 };
416 return @Months;
417};
418
419#####---------------------------- OutputFormats ---------------------------#####
420
421################################################################################
422sub OutputData {
423################################################################################
3430c898 424### read database query results from DBHandle and print results with formatting
edd250f2
TH
425### IN : $Format : format specifier
426### $Comments : print or suppress all comments for machine-readable output
427### $GroupBy : primary sorting order (month or key)
428### $Precision: number of digits right of decimal point (0 or 2)
429### $ValidKeys: reference to a hash containing all valid keys
430### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
431### $DBQuery : database query handle with executed query,
78389b28 432### containing $Month, $Key, $Value
b342fcf0
TH
433### $PadField : padding length for key field (optional) for 'pretty'
434### $PadValue : padding length for value field (optional) for 'pretty'
edd250f2 435 my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
b342fcf0 436 $DBQuery, $PadField, $PadValue) = @_;
edd250f2
TH
437 my %ValidKeys = %{$ValidKeys} if $ValidKeys;
438 my ($FileName, $Handle, $OUT);
78389b28 439 our $LastIteration;
8dc6823e 440
edd250f2
TH
441 # define output types
442 my %LegalOutput;
b342fcf0 443 @LegalOutput{('dump','list','pretty')} = ();
edd250f2
TH
444 # bail out if format is unknown
445 &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
446
741336c2 447 while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
edd250f2
TH
448 # don't display invalid keys
449 if(%ValidKeys and !defined($ValidKeys{$Key})) {
450 # FIXME
451 # &Bleat(1,sprintf("DROPPED: %s",$Key));
452 next;
453 };
454 # care for correct sorting order and abstract from month and keys:
455 # $Caption will be $Month or $Key, according to sorting order,
456 # and $Key will be $Key or $Month, respectively
457 my $Caption;
458 if ($GroupBy eq 'key') {
459 $Caption = $Key;
460 $Key = $Month;
461 } else {
462 $Caption = $Month;
463 }
78389b28 464 # set output file handle
edd250f2 465 if (!$FileTempl) {
78389b28 466 $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
edd250f2 467 } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
78389b28 468 close $OUT if ($LastIteration);
edd250f2
TH
469 # safeguards for filename creation:
470 # replace potential problem characters with '_'
471 $FileName = sprintf('%s-%s',$FileTempl,$Caption);
8dc6823e 472 $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
edd250f2
TH
473 open ($OUT,">$FileName")
474 or &Bleat(2,sprintf("Cannot open output file '%s': $!",
475 $FileName));
78389b28
TH
476 $Handle = $OUT;
477 };
edd250f2 478 print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
b342fcf0 479 $Precision, $PadField, $PadValue);
edd250f2 480 $LastIteration = $Caption;
741336c2 481 };
edd250f2 482 close $OUT if ($FileTempl);
741336c2
TH
483};
484
485################################################################################
486sub FormatOutput {
487################################################################################
3430c898 488### format information for output according to format specifier
edd250f2
TH
489### IN : $Format : format specifier
490### $Comments : print or suppress all comments for machine-readable output
491### $Caption : month (as YYYY-MM) or $Key, according to sorting order
492### $Key : newsgroup, client, ... or $Month, as above
493### $Value : number of postings with that attribute
494### $Precision: number of digits right of decimal point (0 or 2)
b342fcf0
TH
495### $PadField : padding length for key field (optional) for 'pretty'
496### $PadValue : padding length for value field (optional) for 'pretty'
741336c2 497### OUT: $Output: formatted output
b342fcf0
TH
498 my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
499 $PadValue) = @_;
741336c2 500 my ($Output);
edd250f2 501 # keep last caption in mind
741336c2 502 our ($LastIteration);
edd250f2 503 # create one line of output
741336c2 504 if ($Format eq 'dump') {
edd250f2
TH
505 # output as dump (key value)
506 $Output = sprintf ("# %s:\n",$Caption)
507 if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
508 $Output .= sprintf ("%s %u\n",$Key,$Value);
741336c2 509 } elsif ($Format eq 'list') {
edd250f2
TH
510 # output as list (caption key value)
511 $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
741336c2 512 } elsif ($Format eq 'pretty') {
edd250f2
TH
513 # output as a table
514 $Output = sprintf ("# ----- %s:\n",$Caption)
515 if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
b342fcf0
TH
516 # increase $PadValue for numbers with decimal point
517 $PadValue += $Precision+1 if $Precision;
518 # add padding if $PadField is set; $PadValue HAS to be set then
519 $Output .= sprintf ($PadField ?
520 sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
521 "%s%s %.*f\n",$Key,$Comments ? ':' : '',
522 $Precision,$Value);
741336c2
TH
523 };
524 return $Output;
525};
526
527#####------------------------- QueryModifications -------------------------#####
528
529################################################################################
530sub SQLHierarchies {
531################################################################################
3430c898
TH
532### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
533### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
534### true, accordingly)
741336c2
TH
535### IN : $ShowHierarchies: boolean value
536### OUT: SQL code
537 my ($ShowHierarchies) = @_;
edd250f2 538 return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
741336c2
TH
539};
540
541################################################################################
d3b6810d 542sub GetMaxLength {
741336c2 543################################################################################
b342fcf0
TH
544### get length of longest fields in future query result
545### IN : $DBHandle : database handle
edd250f2 546### $Table : table to query
b342fcf0
TH
547### $Field : field (key!, i.e. month, newsgroup, ...) to check
548### $Value : field (value!, i.e. postings) to check
edd250f2
TH
549### $WhereClause : WHERE clause
550### $HavingClause: HAVING clause
551### @BindVars : bind variables for WHERE clause
b342fcf0
TH
552### OUT: $FieldLength : length of longest instance of $Field
553### $ValueLength : length of longest instance of $Value
554 my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
555 my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
556 "MAX(%s) ".
557 "FROM %s %s %s",$Field,,$Value,
558 $Table,$WhereClause,$HavingClause ?
edd250f2
TH
559 'GROUP BY newsgroup' . $HavingClause .
560 ' ORDER BY LENGTH(newsgroup) '.
561 'DESC LIMIT 1': ''));
562 $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
563 "for '%s' from table '%s': ".
564 "$DBI::errstr",$Field,$Table));
b342fcf0
TH
565 my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
566 my $ValueLength = length($ValueMax) if ($ValueMax);
567 return ($FieldLength,$ValueLength);
741336c2
TH
568};
569
edd250f2
TH
570################################################################################
571sub SQLSortOrder {
572################################################################################
573### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
574### $OptOrderBy (secondary sorting), both ascending or descending;
575### descending sorting order is done by adding '-desc'
576### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
577### $OrderBy: secondary sort by month/newsgroups (default)
578### or number of 'postings'
579### OUT: a SQL ORDER BY clause
580 my ($GroupBy,$OrderBy) = @_;
581 my ($GroupSort,$OrderSort) = ('','');
582 # $GroupBy (primary sorting)
583 if (!$GroupBy) {
584 $GroupBy = 'month';
585 } else {
586 ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
587 if ($GroupBy =~ /group/i) {
588 $GroupBy = 'newsgroup';
589 } else {
590 $GroupBy = 'month';
591 }
592 }
593 my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
594 # $OrderBy (secondary sorting)
595 if (!$OrderBy) {
596 $OrderBy = $Secondary;
597 } else {
598 ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
599 if ($OrderBy =~ /posting/i) {
600 $OrderBy = "postings $OrderSort, $Secondary";
601 } else {
602 $OrderBy = "$Secondary $OrderSort";
603 }
604 }
605 return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
606};
607
608################################################################################
609sub SQLParseOrder {
610################################################################################
611### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
612### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
613### IN : $OrderOption: order option (see above)
614### OUT: parameter to sort by,
615### sort order ('DESC' or nothing, meaning 'ASC')
616 my ($OrderOption) = @_;
617 my $SortOrder = '';
618 if ($OrderOption =~ s/-?desc$//i) {
619 $SortOrder = 'DESC';
620 } else {
621 $OrderOption =~ s/-?asc$//i
622 }
623 return ($OrderOption,$SortOrder);
624};
625
741336c2
TH
626################################################################################
627sub SQLGroupList {
628################################################################################
edd250f2
TH
629### explode list of newsgroups separated by : (with wildcards)
630### to a SQL 'WHERE' expression
741336c2 631### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
edd250f2
TH
632### OUT: SQL code to become part of a 'WHERE' clause,
633### list of newsgroups for SQL bindings
741336c2 634 my ($Newsgroups) = @_;
edd250f2 635 # substitute '*' wildcard with SQL wildcard character '%'
741336c2 636 $Newsgroups =~ s/\*/%/g;
c30822b4 637 return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
edd250f2
TH
638 # just one newsgroup?
639 return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
10459ac8 640 my ($SQL,@WildcardGroups,@NoWildcardGroups);
edd250f2 641 # list of newsgroups separated by ':'
741336c2
TH
642 my @GroupList = split /:/, $Newsgroups;
643 foreach (@GroupList) {
10459ac8
TH
644 if ($_ !~ /%/) {
645 # add to list of newsgroup names WITHOUT wildcard
646 push (@NoWildcardGroups,$_);
647 } else {
648 # add to list of newsgroup names WITH wildcard
649 push (@WildcardGroups,$_);
650 # add wildcard to SQL clause
651 # 'OR' if SQL clause is not empty
652 $SQL .= ' OR ' if $SQL;
653 $SQL .= 'newsgroup LIKE ?'
654 }
741336c2 655 };
10459ac8
TH
656 if (scalar(@NoWildcardGroups)) {
657 # add 'OR' if SQL clause is not empty
658 $SQL .= ' OR ' if $SQL;
659 if (scalar(@NoWildcardGroups) < 2) {
660 # special case: just one newsgroup without wildcard
661 $SQL .= 'newsgroup = ?';
662 } else {
663 # create list of newsgroups to include: 'newsgroup IN (...)'
664 $SQL .= 'newsgroup IN (';
665 my $SQLin;
666 foreach (@NoWildcardGroups) {
667 $SQLin .= ',' if $SQLin;
668 $SQLin .= '?';
669 }
670 # add list to SQL clause
671 $SQL .= $SQLin .= ')';
672 }
673 }
674 # add brackets '()' to SQL clause as needed (more than one wildcard group)
675 if (scalar(@WildcardGroups)) {
676 $SQL = '(' . $SQL .')';
677 }
678 # rebuild @GroupList in (now) correct order
679 @GroupList = (@WildcardGroups,@NoWildcardGroups);
741336c2
TH
680 return ($SQL,@GroupList);
681};
682
edd250f2
TH
683################################################################################
684sub SQLGroupWildcard {
685################################################################################
686### build a valid SQL 'WHERE' expression with or without wildcards
687### IN : $Newsgroup: newsgroup expression, probably with wildcard
688### (group.name or group.name.%)
689### OUT: SQL code to become part of a 'WHERE' clause
690 my ($Newsgroup) = @_;
edd250f2
TH
691 if ($Newsgroup !~ /%/) {
692 return 'newsgroup = ?';
693 } else {
694 return 'newsgroup LIKE ?';
695 }
696};
697
698################################################################################
699sub SQLSetBounds {
700################################################################################
701### set upper and/or lower boundary (number of postings)
702### IN : $Type: 'level', 'average', 'sum' or 'default'
703### $LowBound,$UppBound: lower/upper boundary, respectively
704### OUT: SQL code to become part of a WHERE or HAVING clause
705 my ($Type,$LowBound,$UppBound) = @_;
706 ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
707 if($LowBound and $UppBound and $LowBound > $UppBound) {
708 &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
709 "$UppBound, exchanging boundaries.");
710 ($LowBound,$UppBound) = ($UppBound,$LowBound);
711 }
712 # default to 'default'
713 my $WhereHavingFunction = 'postings';
714 # set $LowBound to SQL statement:
715 # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
716 if ($Type eq 'level') {
717 $WhereHavingFunction = 'MIN(postings)'
718 } elsif ($Type eq 'average') {
719 $WhereHavingFunction = 'AVG(postings)'
720 } elsif ($Type eq 'sum') {
721 $WhereHavingFunction = 'SUM(postings)'
722 }
723 $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
724 # set $LowBound to SQL statement:
725 # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
726 if ($Type eq 'level') {
727 $WhereHavingFunction = 'MAX(postings)'
728 } elsif ($Type eq 'average') {
729 $WhereHavingFunction = 'AVG(postings)'
730 } elsif ($Type eq 'sum') {
731 $WhereHavingFunction = 'SUM(postings)'
732 }
733 $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
734 return ($LowBound,$UppBound);
735};
736
737################################################################################
738sub SQLCheckNumber {
739################################################################################
740### check if input is a valid positive integer; otherwise, make it one
741### IN : @Numbers: array of parameters
742### OUT: @Numbers: a valid positive integer
743 my (@Numbers) = @_;
744 foreach my $Number (@Numbers) {
745 if ($Number and $Number < 0) {
746 &Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
747 $Number = -$Number;
748 }
749 $Number = '' if ($Number and $Number !~ /^\d+$/);
750 }
751 return @Numbers;
752};
753
754################################################################################
755sub SQLBuildClause {
756################################################################################
757### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
758### from multiple expressions which *may* be empty
759### IN : $Type: 'where', 'having', 'group' or 'order'
760### @Expressions: array of expressions
761### OUT: $SQLClause: a SQL clause
762 my ($Type,@Expressions) = @_;
763 my ($SQLClause,$Separator,$Statement);
764 # set separator ('AND' or ',')
765 if ($Type eq 'where' or $Type eq 'having') {
766 $Separator = 'AND';
767 } else {
768 $Separator = ',';
769 }
770 # set statement
771 if ($Type eq 'where') {
772 $Statement = 'WHERE';
773 } elsif ($Type eq 'order') {
774 $Statement = 'ORDER BY';
775 } elsif ($Type eq 'having') {
776 $Statement = 'HAVING';
777 } else {
778 $Statement = 'GROUP BY';
779 }
780 # build query from expressions with separators
781 foreach my $Expression (@Expressions) {
782 if ($Expression) {
783 $SQLClause .= " $Separator " if ($SQLClause);
784 $SQLClause .= $Expression;
785 }
786 }
787 # add statement in front if not already present
788 $SQLClause = " $Statement " . $SQLClause
789 if ($SQLClause and $SQLClause !~ /$Statement/);
790 return $SQLClause;
791};
792
c30822b4
TH
793#####--------------------------- Verifications ----------------------------#####
794
795################################################################################
796sub CheckValidNewsgroups {
797################################################################################
798### syntax check of newgroup list
799### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
800### OUT: boolean
801 my ($Newsgroups) = @_;
802 my $InvalidCharRegExp = ',; ';
803 return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
804};
805
edd250f2 806
741336c2
TH
807#####------------------------------- done ---------------------------------#####
8081;
This page took 0.061869 seconds and 4 git commands to generate.