Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
5 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
6 | # | |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
19 | $MySelf | |
20 | $MyVersion | |
f6d15ca7 | 21 | $PackageVersion |
741336c2 TH |
22 | ReadOptions |
23 | ReadConfig | |
24 | OverrideConfig | |
25 | InitDB | |
26 | ); | |
27 | @EXPORT_OK = qw( | |
28 | GetTimePeriod | |
29 | LastMonth | |
30 | CheckMonth | |
31 | SplitPeriod | |
32 | ListMonth | |
33 | ListNewsgroups | |
ad609792 | 34 | ReadGroupList |
741336c2 TH |
35 | OutputData |
36 | FormatOutput | |
37 | SQLHierarchies | |
38 | SQLGroupList | |
d3b6810d | 39 | GetMaxLength |
741336c2 TH |
40 | ); |
41 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], | |
42 | Output => [qw(OutputData FormatOutput)], | |
d3b6810d | 43 | SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]); |
4bbd46d3 TH |
44 | $VERSION = '0.01'; |
45 | our $PackageVersion = '0.01'; | |
741336c2 TH |
46 | |
47 | use Data::Dumper; | |
48 | use File::Basename; | |
49 | use Getopt::Std; | |
50 | ||
51 | use Config::Auto; | |
52 | use DBI; | |
53 | ||
54 | #####-------------------------------- Vars --------------------------------##### | |
55 | ||
56 | our $MySelf = fileparse($0, '.pl'); | |
57 | our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; | |
58 | ||
59 | #####------------------------------- Basics -------------------------------##### | |
60 | ||
61 | ################################################################################ | |
62 | sub ReadOptions { | |
63 | ################################################################################ | |
3430c898 TH |
64 | ### read commandline options and act on standard options -h and -V |
65 | ### IN : $Params: list of legal commandline paramaters (without -h and -V) | |
741336c2 TH |
66 | ### OUT: a hash containing the commandline options |
67 | $Getopt::Std::STANDARD_HELP_VERSION = 1; | |
68 | ||
69 | my ($Params) = @_; | |
70 | my %Options; | |
71 | ||
72 | getopts('Vh'.$Params, \%Options); | |
73 | ||
74 | # -V: display version | |
75 | &ShowVersion if ($Options{'V'}); | |
76 | ||
77 | # -h: feed myself to perldoc | |
78 | &ShowPOD if ($Options{'h'}); | |
79 | ||
80 | return %Options; | |
81 | }; | |
82 | ################################################################################ | |
83 | ||
84 | ################################################################################ | |
85 | sub ShowVersion { | |
86 | ################################################################################ | |
87 | ### display version and exit | |
f6d15ca7 | 88 | print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n"; |
741336c2 TH |
89 | print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; |
90 | exit(100); | |
91 | }; | |
92 | ################################################################################ | |
93 | ||
94 | ################################################################################ | |
95 | sub ShowPOD { | |
96 | ################################################################################ | |
97 | ### feed myself to perldoc and exit | |
98 | exec('perldoc', $0); | |
99 | exit(100); | |
100 | }; | |
101 | ################################################################################ | |
102 | ||
103 | ################################################################################ | |
104 | sub ReadConfig { | |
105 | ################################################################################ | |
106 | ### read config via Config::Auto | |
107 | ### IN : $ConfFile: config filename | |
108 | ### OUT: reference to a hash containing the configuration | |
109 | my ($ConfFile) = @_; | |
110 | return Config::Auto::parse($ConfFile, format => 'equal'); | |
111 | }; | |
112 | ################################################################################ | |
113 | ||
114 | ################################################################################ | |
115 | sub OverrideConfig { | |
116 | ################################################################################ | |
117 | ### override configuration values | |
118 | ### IN : $ConfigR : reference to configuration hash | |
119 | ### $OverrideR: reference to a hash containing overrides | |
120 | my ($ConfigR,$OverrideR) = @_; | |
121 | my %Override = %$OverrideR; | |
3430c898 | 122 | # Config hash empty? |
741336c2 | 123 | warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); |
3430c898 | 124 | # return if no overrides |
741336c2 TH |
125 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
126 | foreach my $Key (keys %Override) { | |
127 | $$ConfigR{$Key} = $Override{$Key}; | |
128 | }; | |
129 | }; | |
130 | ################################################################################ | |
131 | ||
132 | ################################################################################ | |
133 | sub InitDB { | |
134 | ################################################################################ | |
135 | ### initialise database connection | |
136 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 137 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
138 | ### OUT: DBHandle |
139 | my ($ConfigR,$Die) = @_; | |
140 | my %Conf = %$ConfigR; | |
141 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); | |
142 | if (!$DBHandle) { | |
143 | die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); | |
144 | warn("$MySelf: W: $DBI::errstr\n"); | |
145 | }; | |
146 | return $DBHandle; | |
147 | }; | |
148 | ################################################################################ | |
149 | ||
150 | #####------------------------------ GetStats ------------------------------##### | |
151 | ||
152 | ################################################################################ | |
153 | sub ListNewsgroups { | |
154 | ################################################################################ | |
3430c898 TH |
155 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
156 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
157 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 | 158 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
89db2f90 | 159 | ### $TLH : top level hierarchy (all other newsgroups are ignored) |
ad609792 TH |
160 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups |
161 | ### as keys | |
162 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
89db2f90 | 163 | my ($Newsgroups,$TLH,$ValidGroupsR) = @_; |
ad609792 | 164 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; |
741336c2 TH |
165 | my %Newsgroups; |
166 | chomp($Newsgroups); | |
167 | # remove whitespace from contents of Newsgroups: | |
168 | $Newsgroups =~ s/\s//; | |
169 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
170 | for (split /,/, $Newsgroups) { | |
89db2f90 TH |
171 | # don't count newsgroup/hierarchy in wrong TLH |
172 | next if($TLH and !/^$TLH/); | |
ad609792 TH |
173 | # don't count invalid newsgroups |
174 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
175 | warn (sprintf("DROPPED: %s\n",$_)); | |
176 | next; | |
177 | } | |
741336c2 TH |
178 | # add original newsgroup to %Newsgroups |
179 | $Newsgroups{$_} = 1; | |
180 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
181 | # i.e. de.alt.ALL and de.ALL | |
182 | foreach (ParseHierarchies($_)) { | |
183 | $Newsgroups{$_.'.ALL'} = 1; | |
184 | } | |
185 | }; | |
186 | return %Newsgroups; | |
187 | }; | |
188 | ||
189 | ################################################################################ | |
190 | sub ParseHierarchies { | |
191 | ################################################################################ | |
3430c898 TH |
192 | ### return a list of all hierarchy levels a newsgroup belongs to |
193 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
194 | ### IN : $Newsgroup : a newsgroup name |
195 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
196 | my ($Newsgroup) = @_; | |
197 | my @Hierarchies; | |
198 | # strip trailing dots | |
199 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
200 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
201 | # i.e. de.alt.test: "de.alt" and "de" | |
202 | while ($Newsgroup =~ /\./) { | |
203 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
204 | push @Hierarchies, $Newsgroup; | |
205 | }; | |
206 | return @Hierarchies; | |
207 | }; | |
208 | ||
ad609792 TH |
209 | ################################################################################ |
210 | sub ReadGroupList { | |
211 | ################################################################################ | |
212 | ### read a list of valid newsgroups from file (each group on one line, | |
213 | ### ignoring everything after the first whitespace and so accepting files | |
214 | ### in checkgroups format as well as (parts of) an INN active file) | |
215 | ### IN : $Filename : file to read | |
216 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
217 | my ($Filename) = @_; | |
218 | my %ValidGroups; | |
219 | open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; | |
220 | while (<$LIST>) { | |
221 | s/^(\S+).*$/$1/; | |
222 | chomp; | |
223 | $ValidGroups{$_} = '1'; | |
224 | }; | |
225 | close $LIST; | |
226 | return \%ValidGroups; | |
227 | }; | |
228 | ||
741336c2 TH |
229 | ################################################################################ |
230 | ||
231 | #####----------------------------- TimePeriods ----------------------------##### | |
232 | ||
233 | ################################################################################ | |
234 | sub GetTimePeriod { | |
235 | ################################################################################ | |
3430c898 TH |
236 | ### get a time period to act on, in order of preference: by default the |
237 | ### last month; or a month submitted by -m YYYY-MM; or a time period submitted | |
238 | ### by -p YYYY-MM:YYYY-MM | |
741336c2 | 239 | ### IN : $Month,$Period: contents of -m and -p |
3430c898 | 240 | ### OUT: $StartMonth, $EndMonth (identical if period is just one month) |
741336c2 TH |
241 | my ($Month,$Period) = @_; |
242 | # exit if -m is set and not like YYYY-MM | |
243 | die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); | |
b221278d TH |
244 | # warn if -m and -p is set |
245 | warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period); | |
741336c2 TH |
246 | # default: set -m to last month |
247 | $Month = &LastMonth if (!defined($Month) and !defined($Period)); | |
248 | # set $StartMonth, $EndMonth | |
249 | my ($StartMonth, $EndMonth); | |
250 | if ($Period) { | |
251 | # -p: get date range | |
252 | ($StartMonth, $EndMonth) = &SplitPeriod($Period); | |
253 | die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); | |
254 | } else { | |
255 | # set $StartMonth = $EndMonth = $Month if -p is not set | |
256 | $StartMonth = $EndMonth = $Month; | |
257 | }; | |
258 | return ($StartMonth, $EndMonth); | |
259 | }; | |
260 | ||
261 | ################################################################################ | |
262 | sub LastMonth { | |
263 | ################################################################################ | |
3430c898 | 264 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
265 | ### OUT: last month as YYYY-MM |
266 | # get today's date | |
267 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
268 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
269 | $Year += 1900; | |
270 | if ($Month < 1) { | |
271 | $Month = 12; | |
272 | $Year--; | |
273 | }; | |
274 | # return last month | |
275 | return sprintf('%4d-%02d',$Year,$Month); | |
276 | }; | |
277 | ||
278 | ################################################################################ | |
279 | sub CheckMonth { | |
280 | ################################################################################ | |
3430c898 | 281 | ### check if input is a valid month in YYYY-MM form |
741336c2 TH |
282 | ### IN : $Month: month |
283 | ### OUT: TRUE / FALSE | |
284 | my ($Month) = @_; | |
285 | return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); | |
286 | return 1; | |
287 | }; | |
288 | ||
289 | ################################################################################ | |
290 | sub SplitPeriod { | |
291 | ################################################################################ | |
3430c898 | 292 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 TH |
293 | ### IN : $Period: time period |
294 |