Create a database table with parsed raw data.
[usenet/newsstats.git] / lib / NewsStats.pm
... / ...
CommitLineData
1# NewsStats.pm
2#
3# Library functions for the NewsStats package.
4#
5# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
6#
7# This module can be redistributed and/or modified under the same terms under
8# which Perl itself is published.
9
10package NewsStats;
11
12use strict;
13use warnings;
14our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
15
16require Exporter;
17@ISA = qw(Exporter);
18@EXPORT = qw(
19 $MyVersion
20 $PackageVersion
21 $FullPath
22 $HomePath
23 ShowVersion
24 ShowPOD
25 ReadConfig
26 OverrideConfig
27 InitDB
28 Bleat
29);
30@EXPORT_OK = qw(
31 GetTimePeriod
32 LastMonth
33 SplitPeriod
34 ListMonth
35 ListNewsgroups
36 ParseHierarchies
37 ReadGroupList
38 OutputData
39 FormatOutput
40 SQLHierarchies
41 SQLSortOrder
42 SQLGroupList
43 SQLSetBounds
44 SQLBuildClause
45 GetMaxLength
46);
47%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth SplitPeriod
48 ListMonth)],
49 Output => [qw(OutputData FormatOutput)],
50 SQLHelper => [qw(SQLHierarchies SQLSortOrder SQLGroupList
51 SQLSetBounds SQLBuildClause GetMaxLength)]);
52$VERSION = '0.01';
53our $PackageVersion = '0.01';
54
55use Data::Dumper;
56use File::Basename;
57use Cwd qw(realpath);
58
59use Config::Auto;
60use DBI;
61
62#####-------------------------------- Vars --------------------------------#####
63
64# save $0 in $FullPath
65our $FullPath = $0;
66# strip filename and /bin or /install directory to create the $HomePath
67our $HomePath = dirname(realpath($0));
68$HomePath =~ s/\/(bin|install)//;
69# trim $0
70$0 =~ s%.*/%%;
71# set version string
72our $MyVersion = "$0 $::VERSION (NewsStats.pm $VERSION)";
73
74#####------------------------------- Basics -------------------------------#####
75
76################################################################################
77
78################################################################################
79sub ShowVersion {
80################################################################################
81### display version and exit
82 print "NewsStats v$PackageVersion\n$MyVersion\n";
83 print "Copyright (c) 2010-2012 Thomas Hochstein <thh\@inter.net>\n";
84 print "This program is free software; you may redistribute it ".
85 "and/or modify it under the same terms as Perl itself.\n";
86 exit(100);
87};
88################################################################################
89
90################################################################################
91sub ShowPOD {
92################################################################################
93### feed myself to perldoc and exit
94 exec('perldoc', $FullPath);
95 exit(100);
96};
97################################################################################
98
99################################################################################
100sub ReadConfig {
101################################################################################
102### read config via Config::Auto
103### IN : $ConfFile: config filename
104### OUT: reference to a hash containing the configuration
105 my ($ConfFile) = @_;
106 # set default
107 $ConfFile = $HomePath . '/etc/newsstats.conf' if !$ConfFile;
108 # mandatory configuration options
109 my @Mandatory = ('DBDriver','DBHost','DBUser','DBPw','DBDatabase',
110 'DBTableRaw','DBTableParse','DBTableGrps');
111 # read config via Config::Auto
112 my $ConfR = Config::Auto::parse($ConfFile, format => 'equal');
113 my %Conf = %{$ConfR};
114 # check for mandatory options
115 foreach (@Mandatory) {
116 &Bleat(2,sprintf("Mandatory configuration option %s is not set!",$_))
117 if (!defined($Conf{$_}));
118 }
119 # $Conf{'TLH'} is checked in gatherstats.pl
120 return $ConfR;
121};
122################################################################################
123
124################################################################################
125sub OverrideConfig {
126################################################################################
127### override configuration values
128### IN : $ConfigR : reference to configuration hash
129### $OverrideR: reference to a hash containing overrides
130 my ($ConfigR,$OverrideR) = @_;
131 my %Override = %$OverrideR;
132 # Config hash empty?
133 &Bleat(1,"Empty configuration hash passed to OverrideConfig()")
134 if ( keys %$ConfigR < 1);
135 # return if no overrides
136 return if (keys %Override < 1 or keys %$ConfigR < 1);
137 foreach my $Key (keys %Override) {
138 $$ConfigR{$Key} = $Override{$Key};
139 };
140};
141################################################################################
142
143################################################################################
144sub InitDB {
145################################################################################
146### initialise database connection
147### IN : $ConfigR: reference to configuration hash
148### $Die : if TRUE, die if connection fails
149### OUT: DBHandle
150 my ($ConfigR,$Die) = @_;
151 my %Conf = %$ConfigR;
152 my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',
153 $Conf{'DBDriver'},$Conf{'DBDatabase'},
154 $Conf{'DBHost'}), $Conf{'DBUser'},
155 $Conf{'DBPw'}, { PrintError => 0 });
156 if (!$DBHandle) {
157 &Bleat(2,$DBI::errstr) if (defined($Die) and $Die);
158 &Bleat(1,$DBI::errstr);
159 };
160 return $DBHandle;
161};
162################################################################################
163
164################################################################################
165sub Bleat {
166################################################################################
167### print warning or error messages and terminate in case of error
168### IN : $Level : 1 = warning, 2 = error
169### $Message: warning or error message
170 my ($Level,$Message) = @_;
171 if ($Level == 1) {
172 warn "$0 W: $Message\n"
173 } elsif ($Level == 2) {
174 die "$0 E: $Message\n"
175 } else {
176 print "$0: $Message\n"
177 }
178};
179################################################################################
180
181#####------------------------------ GetStats ------------------------------#####
182
183################################################################################
184sub ListNewsgroups {
185################################################################################
186### explode a (scalar) list of newsgroup names to a list of newsgroup and
187### hierarchy names where every newsgroup and hierarchy appears only once:
188### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin
189### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header)
190### $TLH : top level hierarchy (all other newsgroups are ignored)
191### $ValidGroupsR: reference to a hash containing all valid newsgroups
192### as keys
193### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys
194 my ($Newsgroups,$TLH,$ValidGroupsR) = @_;
195 my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR;
196 my %Newsgroups;
197 chomp($Newsgroups);
198 # remove whitespace from contents of Newsgroups:
199 $Newsgroups =~ s/\s//;
200 # call &HierarchyCount for each newsgroup in $Newsgroups:
201 for (split /,/, $Newsgroups) {
202 # don't count newsgroup/hierarchy in wrong TLH
203 next if($TLH and !/^$TLH/);
204 # don't count invalid newsgroups
205 if(%ValidGroups and !defined($ValidGroups{$_})) {
206 warn (sprintf("DROPPED: %s\n",$_));
207 next;
208 }
209 # add original newsgroup to %Newsgroups
210 $Newsgroups{$_} = 1;
211 # add all hierarchy elements to %Newsgroups, amended by '.ALL',
212 # i.e. de.alt.ALL and de.ALL
213 foreach (ParseHierarchies($_)) {
214 $Newsgroups{$_.'.ALL'} = 1;
215 }
216 };
217 return %Newsgroups;
218};
219
220################################################################################
221sub ParseHierarchies {
222################################################################################
223### return a list of all hierarchy levels a newsgroup belongs to
224### (for de.alt.test.moderated that would be de/de.alt/de.alt.test)
225### IN : $Newsgroup : a newsgroup name
226### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
227 my ($Newsgroup) = @_;
228 my @Hierarchies;
229 # strip trailing dots
230 $Newsgroup =~ s/(.+)\.+$/$1/;
231 # butcher newsgroup name by "." and add each hierarchy to @Hierarchies
232 # i.e. de.alt.test: "de.alt" and "de"
233 while ($Newsgroup =~ /\./) {
234 $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
235 push @Hierarchies, $Newsgroup;
236 };
237 return @Hierarchies;
238};
239
240################################################################################
241sub ReadGroupList {
242################################################################################
243### read a list of valid newsgroups from file (each group on one line,
244### ignoring everything after the first whitespace and so accepting files
245### in checkgroups format as well as (parts of) an INN active file)
246### IN : $Filename : file to read
247### OUT: \%ValidGroups: reference to a hash containing all valid newsgroups
248 my ($Filename) = @_;
249 my %ValidGroups;
250 open (my $LIST,"<$Filename") or &Bleat(2,"Cannot read $Filename: $!");
251 while (<$LIST>) {
252 s/^\s*(\S+).*$/$1/;
253 chomp;
254 next if /^$/;
255 $ValidGroups{$_} = '1';
256 };
257 close $LIST;
258 return \%ValidGroups;
259};
260
261################################################################################
262
263#####----------------------------- TimePeriods ----------------------------#####
264
265################################################################################
266sub GetTimePeriod {
267################################################################################
268### get a time period to act on from --month / --day option;
269### if empty, default to last month / day
270### IN : $Period: may be empty, 'YYYY-MM(-DD)', 'YYYY-MM(-DD):YYYY-MM(-DD)'
271### or 'all'
272### $Type : may be 'month' or 'day'
273### OUT: $Verbal,$SQL: verbal description and WHERE-clause
274### of the chosen time period
275 my ($Period,$Type) = @_;
276 # define result variables
277 my ($Verbal, $SQL);
278 # check $Type
279 $Type = 'month' if (!$Type or ($Type ne 'month' and $Type ne 'day'));
280 # define a regular expressions for a month or day
281 my $REPeriod = '\d{4}-\d{2}';
282 $REPeriod .= '-\d{2}' if ($Type eq 'day');
283
284 # default to last month / day if option is not set
285 if(!$Period) {
286 $Period = &LastMonthDay($Type);
287 }
288
289 # check for valid input
290 if ($Period =~ /^$REPeriod$/) {
291 # single month/day [YYYY-MM(-DD)]
292 ($Period) = &CheckPeriod($Type,$Period);
293 $Verbal = $Period;
294 $SQL = sprintf("%s = '%s'",$Type,$Period);
295 } elsif ($Period =~ /^$REPeriod:$REPeriod$/) {
296 # time period [YYYY-MM(-DD):YYYY-MM(-DD)]
297 $Verbal = sprintf('%s to %s',&SplitPeriod($Period,$Type));
298 $SQL = sprintf("%s BETWEEN '%s' AND '%s'",$Type,
299 &SplitPeriod($Period,$Type));
300 } elsif ($Period =~ /^all$/i) {
301 # special case: ALL
302 $Verbal = 'all time';
303 $SQL = '';
304 } else {
305 # invalid input
306 return (undef,undef);
307 }
308
309 return ($Verbal,$SQL);
310};
311
312################################################################################
313sub LastMonthDay {
314################################################################################
315### get last month/day from todays date in YYYY-MM format
316### IN : $Type : may be 'month' or 'day'
317### OUT: last month/day as YYYY-MM(-DD)
318 my ($Type) = @_;
319 my ($Day,$Month,$Year);
320 if ($Type eq 'day') {
321 # get yesterdays's date
322 (undef,undef,undef,$Day,$Month,$Year,undef,undef,undef) = localtime(time-86400);
323 # $Month is defined from 0 to 11, so add 1
324 $Month++;
325 } else {
326 # get today's date (month and year)
327 (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
328 # $Month is already defined from 0 to 11, so no need to decrease it by 1
329 if ($Month < 1) {
330 $Month = 12;
331 $Year--;
332 };
333 }
334 $Year += 1900;
335 # return last month / day
336 if ($Type eq 'day') {
337 return sprintf('%4d-%02d-%02d',$Year,$Month,$Day);
338 } else {
339 return sprintf('%4d-%02d',$Year,$Month);
340 }
341};
342
343################################################################################
344sub CheckPeriod {
345################################################################################
346### check if input (in YYYY-MM(-DD) form) is a valid month / day;
347### otherwise, fix it
348### IN : $Type : may be 'month' or 'day'
349### @Period: array of month/day
350### OUT: @Period: a valid month/day
351 my ($Type,@Period) = @_;
352 foreach my $Period (@Period) {
353 my ($OldPeriod) = $Period;
354 my ($CalMonth,$CalDay);
355 $Period .= '-01' if ($Type eq 'month');
356 $CalDay = substr ($Period, -2);
357 $CalMonth = substr ($Period, 5, 2);
358 if ($CalMonth < 1 or $CalMonth > 12 or $CalDay < 1 or $CalDay > 31) {
359 $CalMonth = '12' if $CalMonth > 12;
360 $CalMonth = '01' if $CalMonth < 1;
361 substr($Period, 5, 2) = $CalMonth;
362 $CalDay = '01' if $CalDay < 1;
363 $CalDay = '31' if $CalDay > 31;
364 # FIXME! - month with less than 31 days ...
365 substr($Period, -2) = $CalDay;
366 &Bleat(1,sprintf("'%s' is an invalid date, set to '%s'.",
367 $OldPeriod,$Period));
368 }
369 $Period = substr($Period,0,7) if ($Type eq 'month');
370 }
371 return @Period;
372};
373
374################################################################################
375sub SplitPeriod {
376################################################################################
377### split a time period denoted by YYYY-MM(-DD):YYYY-MM(-DD) into start and end
378### IN : $Period: time period
379### $Type : may be 'month' or 'day'
380### OUT: $StartTime, $EndTime
381 my ($Period,$Type) = @_;
382 my ($StartTime, $EndTime) = split /:/, $Period;
383 ($StartTime,$EndTime) = CheckPeriod($Type,$StartTime,$EndTime);
384 # switch parameters as necessary
385 if ($EndTime gt $StartTime) {
386 return ($StartTime, $EndTime);
387 } else {
388 return ($EndTime, $StartTime);
389 };
390};
391
392################################################################################
393sub ListMonth {
394################################################################################
395### return a list of months (YYYY-MM) between start and end month
396### IN : $MonthExpression ('YYYY-MM' or 'YYYY-MM to YYYY-MM')
397### OUT: @Months: array containing all months from $MonthExpression enumerated
398 my ($MonthExpression )= @_;
399 # return if single month
400 return ($MonthExpression) if ($MonthExpression =~ /^\d{4}-\d{2}$/);
401 # parse $MonthExpression
402 my ($StartMonth, $EndMonth) = split(' to ',$MonthExpression);
403 # set $Year, $Month from $StartMonth
404 my ($Year, $Month) = split /-/, $StartMonth;
405 # define @Months
406 my (@Months);
407 until ("$Year-$Month" gt $EndMonth) {
408 push @Months, "$Year-$Month";
409 $Month = "$Month"; # force string context
410 $Month++;
411 if ($Month > 12) {
412 $Month = '01';
413 $Year++;
414 };
415 };
416 return @Months;
417};
418
419#####---------------------------- OutputFormats ---------------------------#####
420
421################################################################################
422sub OutputData {
423################################################################################
424### read database query results from DBHandle and print results with formatting
425### IN : $Format : format specifier
426### $Comments : print or suppress all comments for machine-readable output
427### $GroupBy : primary sorting order (month or key)
428### $Precision: number of digits right of decimal point (0 or 2)
429### $ValidKeys: reference to a hash containing all valid keys
430### $FileTempl: file name template (--filetemplate): filetempl-YYYY-MM
431### $DBQuery : database query handle with executed query,
432### containing $Month, $Key, $Value
433### $PadField : padding length for key field (optional) for 'pretty'
434### $PadValue : padding length for value field (optional) for 'pretty'
435 my ($Format, $Comments, $GroupBy, $Precision, $ValidKeys, $FileTempl,
436 $DBQuery, $PadField, $PadValue) = @_;
437 my %ValidKeys = %{$ValidKeys} if $ValidKeys;
438 my ($FileName, $Handle, $OUT);
439 our $LastIteration;
440
441 # define output types
442 my %LegalOutput;
443 @LegalOutput{('dump','list','pretty')} = ();
444 # bail out if format is unknown
445 &Bleat(2,"Unknown output type '$Format'!") if !exists($LegalOutput{$Format});
446
447 while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
448 # don't display invalid keys
449 if(%ValidKeys and !defined($ValidKeys{$Key})) {
450 # FIXME
451 # &Bleat(1,sprintf("DROPPED: %s",$Key));
452 next;
453 };
454 # care for correct sorting order and abstract from month and keys:
455 # $Caption will be $Month or $Key, according to sorting order,
456 # and $Key will be $Key or $Month, respectively
457 my $Caption;
458 if ($GroupBy eq 'key') {
459 $Caption = $Key;
460 $Key = $Month;
461 } else {
462 $Caption = $Month;
463 }
464 # set output file handle
465 if (!$FileTempl) {
466 $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
467 } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
468 close $OUT if ($LastIteration);
469 # safeguards for filename creation:
470 # replace potential problem characters with '_'
471 $FileName = sprintf('%s-%s',$FileTempl,$Caption);
472 $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
473 open ($OUT,">$FileName")
474 or &Bleat(2,sprintf("Cannot open output file '%s': $!",
475 $FileName));
476 $Handle = $OUT;
477 };
478 print $Handle &FormatOutput($Format, $Comments, $Caption, $Key, $Value,
479 $Precision, $PadField, $PadValue);
480 $LastIteration = $Caption;
481 };
482 close $OUT if ($FileTempl);
483};
484
485################################################################################
486sub FormatOutput {
487################################################################################
488### format information for output according to format specifier
489### IN : $Format : format specifier
490### $Comments : print or suppress all comments for machine-readable output
491### $Caption : month (as YYYY-MM) or $Key, according to sorting order
492### $Key : newsgroup, client, ... or $Month, as above
493### $Value : number of postings with that attribute
494### $Precision: number of digits right of decimal point (0 or 2)
495### $PadField : padding length for key field (optional) for 'pretty'
496### $PadValue : padding length for value field (optional) for 'pretty'
497### OUT: $Output: formatted output
498 my ($Format, $Comments, $Caption, $Key, $Value, $Precision, $PadField,
499 $PadValue) = @_;
500 my ($Output);
501 # keep last caption in mind
502 our ($LastIteration);
503 # create one line of output
504 if ($Format eq 'dump') {
505 # output as dump (key value)
506 $Output = sprintf ("# %s:\n",$Caption)
507 if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
508 $Output .= sprintf ("%s %u\n",$Key,$Value);
509 } elsif ($Format eq 'list') {
510 # output as list (caption key value)
511 $Output = sprintf ("%s %s %u\n",$Caption,$Key,$Value);
512 } elsif ($Format eq 'pretty') {
513 # output as a table
514 $Output = sprintf ("# ----- %s:\n",$Caption)
515 if ($Comments and (!defined($LastIteration) or $Caption ne $LastIteration));
516 # increase $PadValue for numbers with decimal point
517 $PadValue += $Precision+1 if $Precision;
518 # add padding if $PadField is set; $PadValue HAS to be set then
519 $Output .= sprintf ($PadField ?
520 sprintf("%%-%us%%s %%%u.*f\n",$PadField,$PadValue) :
521 "%s%s %.*f\n",$Key,$Comments ? ':' : '',
522 $Precision,$Value);
523 };
524 return $Output;
525};
526
527#####------------------------- QueryModifications -------------------------#####
528
529################################################################################
530sub SQLHierarchies {
531################################################################################
532### add exclusion of hierarchy levels (de.alt.ALL) from SQL query by
533### amending the WHERE clause if $ShowHierarchies is false (or don't, if it is
534### true, accordingly)
535### IN : $ShowHierarchies: boolean value
536### OUT: SQL code
537 my ($ShowHierarchies) = @_;
538 return $ShowHierarchies ? '' : "newsgroup NOT LIKE '%.ALL'";
539};
540
541################################################################################
542sub GetMaxLength {
543################################################################################
544### get length of longest fields in future query result
545### IN : $DBHandle : database handle
546### $Table : table to query
547### $Field : field (key!, i.e. month, newsgroup, ...) to check
548### $Value : field (value!, i.e. postings) to check
549### $WhereClause : WHERE clause
550### $HavingClause: HAVING clause
551### @BindVars : bind variables for WHERE clause
552### OUT: $FieldLength : length of longest instance of $Field
553### $ValueLength : length of longest instance of $Value
554 my ($DBHandle,$Table,$Field,$Value,$WhereClause,$HavingClause,@BindVars) = @_;
555 my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)),".
556 "MAX(%s) ".
557 "FROM %s %s %s",$Field,,$Value,
558 $Table,$WhereClause,$HavingClause ?
559 'GROUP BY newsgroup' . $HavingClause .
560 ' ORDER BY LENGTH(newsgroup) '.
561 'DESC LIMIT 1': ''));
562 $DBQuery->execute(@BindVars) or &Bleat(1,sprintf("Can't get field length ".
563 "for '%s' from table '%s': ".
564 "$DBI::errstr",$Field,$Table));
565 my ($FieldLength,$ValueMax) = $DBQuery->fetchrow_array;
566 my $ValueLength = length($ValueMax) if ($ValueMax);
567 return ($FieldLength,$ValueLength);
568};
569
570################################################################################
571sub SQLSortOrder {
572################################################################################
573### build a SQL 'ORDER BY' clause from $OptGroupBy (primary sorting) and
574### $OptOrderBy (secondary sorting), both ascending or descending;
575### descending sorting order is done by adding '-desc'
576### IN : $GroupBy: primary sort by 'month' (default) or 'newsgroups'
577### $OrderBy: secondary sort by month/newsgroups (default)
578### or number of 'postings'
579### OUT: a SQL ORDER BY clause
580 my ($GroupBy,$OrderBy) = @_;
581 my ($GroupSort,$OrderSort) = ('','');
582 # $GroupBy (primary sorting)
583 if (!$GroupBy) {
584 $GroupBy = 'month';
585 } else {
586 ($GroupBy, $GroupSort) = SQLParseOrder($GroupBy);
587 if ($GroupBy =~ /group/i) {
588 $GroupBy = 'newsgroup';
589 } else {
590 $GroupBy = 'month';
591 }
592 }
593 my $Secondary = ($GroupBy eq 'month') ? 'newsgroup' : 'month';
594 # $OrderBy (secondary sorting)
595 if (!$OrderBy) {
596 $OrderBy = $Secondary;
597 } else {
598 ($OrderBy, $OrderSort) = SQLParseOrder($OrderBy);
599 if ($OrderBy =~ /posting/i) {
600 $OrderBy = "postings $OrderSort, $Secondary";
601 } else {
602 $OrderBy = "$Secondary $OrderSort";
603 }
604 }
605 return ($GroupBy,&SQLBuildClause('order',"$GroupBy $GroupSort",$OrderBy));
606};
607
608################################################################################
609sub SQLParseOrder {
610################################################################################
611### parse $OptGroupBy or $OptOrderBy option of the form param[-desc], e.g.
612### 'month', 'month-desc', 'newsgroups-desc', but also just 'desc'
613### IN : $OrderOption: order option (see above)
614### OUT: parameter to sort by,
615### sort order ('DESC' or nothing, meaning 'ASC')
616 my ($OrderOption) = @_;
617 my $SortOrder = '';
618 if ($OrderOption =~ s/-?desc$//i) {
619 $SortOrder = 'DESC';
620 } else {
621 $OrderOption =~ s/-?asc$//i
622 }
623 return ($OrderOption,$SortOrder);
624};
625
626################################################################################
627sub SQLGroupList {
628################################################################################
629### explode list of newsgroups separated by : (with wildcards)
630### to a SQL 'WHERE' expression
631### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
632### OUT: SQL code to become part of a 'WHERE' clause,
633### list of newsgroups for SQL bindings
634 my ($Newsgroups) = @_;
635 # substitute '*' wildcard with SQL wildcard character '%'
636 $Newsgroups =~ s/\*/%/g;
637 return (undef,undef) if !CheckValidNewsgroups($Newsgroups);
638 # just one newsgroup?
639 return (SQLGroupWildcard($Newsgroups),$Newsgroups) if $Newsgroups !~ /:/;
640 my ($SQL,@WildcardGroups,@NoWildcardGroups);
641 # list of newsgroups separated by ':'
642 my @GroupList = split /:/, $Newsgroups;
643 foreach (@GroupList) {
644 if ($_ !~ /%/) {
645 # add to list of newsgroup names WITHOUT wildcard
646 push (@NoWildcardGroups,$_);
647 } else {
648 # add to list of newsgroup names WITH wildcard
649 push (@WildcardGroups,$_);
650 # add wildcard to SQL clause
651 # 'OR' if SQL clause is not empty
652 $SQL .= ' OR ' if $SQL;
653 $SQL .= 'newsgroup LIKE ?'
654 }
655 };
656 if (scalar(@NoWildcardGroups)) {
657 # add 'OR' if SQL clause is not empty
658 $SQL .= ' OR ' if $SQL;
659 if (scalar(@NoWildcardGroups) < 2) {
660 # special case: just one newsgroup without wildcard
661 $SQL .= 'newsgroup = ?';
662 } else {
663 # create list of newsgroups to include: 'newsgroup IN (...)'
664 $SQL .= 'newsgroup IN (';
665 my $SQLin;
666 foreach (@NoWildcardGroups) {
667 $SQLin .= ',' if $SQLin;
668 $SQLin .= '?';
669 }
670 # add list to SQL clause
671 $SQL .= $SQLin .= ')';
672 }
673 }
674 # add brackets '()' to SQL clause as needed (more than one wildcard group)
675 if (scalar(@WildcardGroups)) {
676 $SQL = '(' . $SQL .')';
677 }
678 # rebuild @GroupList in (now) correct order
679 @GroupList = (@WildcardGroups,@NoWildcardGroups);
680 return ($SQL,@GroupList);
681};
682
683################################################################################
684sub SQLGroupWildcard {
685################################################################################
686### build a valid SQL 'WHERE' expression with or without wildcards
687### IN : $Newsgroup: newsgroup expression, probably with wildcard
688### (group.name or group.name.%)
689### OUT: SQL code to become part of a 'WHERE' clause
690 my ($Newsgroup) = @_;
691 if ($Newsgroup !~ /%/) {
692 return 'newsgroup = ?';
693 } else {
694 return 'newsgroup LIKE ?';
695 }
696};
697
698################################################################################
699sub SQLSetBounds {
700################################################################################
701### set upper and/or lower boundary (number of postings)
702### IN : $Type: 'level', 'average', 'sum' or 'default'
703### $LowBound,$UppBound: lower/upper boundary, respectively
704### OUT: SQL code to become part of a WHERE or HAVING clause
705 my ($Type,$LowBound,$UppBound) = @_;
706 ($LowBound,$UppBound) = SQLCheckNumber($LowBound,$UppBound);
707 if($LowBound and $UppBound and $LowBound > $UppBound) {
708 &Bleat(1,"Lower boundary $LowBound is larger than Upper boundary ".
709 "$UppBound, exchanging boundaries.");
710 ($LowBound,$UppBound) = ($UppBound,$LowBound);
711 }
712 # default to 'default'
713 my $WhereHavingFunction = 'postings';
714 # set $LowBound to SQL statement:
715 # 'WHERE postings >=', 'HAVING MIN(postings) >=' or 'HAVING AVG(postings) >='
716 if ($Type eq 'level') {
717 $WhereHavingFunction = 'MIN(postings)'
718 } elsif ($Type eq 'average') {
719 $WhereHavingFunction = 'AVG(postings)'
720 } elsif ($Type eq 'sum') {
721 $WhereHavingFunction = 'SUM(postings)'
722 }
723 $LowBound = sprintf('%s >= '.$LowBound,$WhereHavingFunction) if ($LowBound);
724 # set $LowBound to SQL statement:
725 # 'WHERE postings <=', 'HAVING MAX(postings) <=' or 'HAVING AVG(postings) <='
726 if ($Type eq 'level') {
727 $WhereHavingFunction = 'MAX(postings)'
728 } elsif ($Type eq 'average') {
729 $WhereHavingFunction = 'AVG(postings)'
730 } elsif ($Type eq 'sum') {
731 $WhereHavingFunction = 'SUM(postings)'
732 }
733 $UppBound = sprintf('%s <= '.$UppBound,$WhereHavingFunction) if ($UppBound);
734 return ($LowBound,$UppBound);
735};
736
737################################################################################
738sub SQLCheckNumber {
739################################################################################
740### check if input is a valid positive integer; otherwise, make it one
741### IN : @Numbers: array of parameters
742### OUT: @Numbers: a valid positive integer
743 my (@Numbers) = @_;
744 foreach my $Number (@Numbers) {
745 if ($Number and $Number < 0) {
746 &Bleat(1,"Boundary $Number is < 0, set to ".-$Number);
747 $Number = -$Number;
748 }
749 $Number = '' if ($Number and $Number !~ /^\d+$/);
750 }
751 return @Numbers;
752};
753
754################################################################################
755sub SQLBuildClause {
756################################################################################
757### build a valid SQL WHERE, GROUP BY, ORDER BY or HAVING clause
758### from multiple expressions which *may* be empty
759### IN : $Type: 'where', 'having', 'group' or 'order'
760### @Expressions: array of expressions
761### OUT: $SQLClause: a SQL clause
762 my ($Type,@Expressions) = @_;
763 my ($SQLClause,$Separator,$Statement);
764 # set separator ('AND' or ',')
765 if ($Type eq 'where' or $Type eq 'having') {
766 $Separator = 'AND';
767 } else {
768 $Separator = ',';
769 }
770 # set statement
771 if ($Type eq 'where') {
772 $Statement = 'WHERE';
773 } elsif ($Type eq 'order') {
774 $Statement = 'ORDER BY';
775 } elsif ($Type eq 'having') {
776 $Statement = 'HAVING';
777 } else {
778 $Statement = 'GROUP BY';
779 }
780 # build query from expressions with separators
781 foreach my $Expression (@Expressions) {
782 if ($Expression) {
783 $SQLClause .= " $Separator " if ($SQLClause);
784 $SQLClause .= $Expression;
785 }
786 }
787 # add statement in front if not already present
788 $SQLClause = " $Statement " . $SQLClause
789 if ($SQLClause and $SQLClause !~ /$Statement/);
790 return $SQLClause;
791};
792
793#####--------------------------- Verifications ----------------------------#####
794
795################################################################################
796sub CheckValidNewsgroups {
797################################################################################
798### syntax check of newgroup list
799### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
800### OUT: boolean
801 my ($Newsgroups) = @_;
802 my $InvalidCharRegExp = ',; ';
803 return ($Newsgroups =~ /[$InvalidCharRegExp]/) ? 0 : 1;
804};
805
806
807#####------------------------------- done ---------------------------------#####
8081;
This page took 0.012833 seconds and 4 git commands to generate.