Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | # NewsStats.pm |
2 | # | |
3 | # Library functions for the NewsStats package. | |
4 | # | |
5 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
6 | # | |
7 | # This module can be redistributed and/or modified under the same terms under | |
8 | # which Perl itself is published. | |
9 | ||
10 | package NewsStats; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
15 | ||
16 | require Exporter; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw( | |
19 | $MySelf | |
20 | $MyVersion | |
f6d15ca7 | 21 | $PackageVersion |
741336c2 TH |
22 | ReadOptions |
23 | ReadConfig | |
24 | OverrideConfig | |
25 | InitDB | |
26 | ); | |
27 | @EXPORT_OK = qw( | |
28 | GetTimePeriod | |
29 | LastMonth | |
30 | CheckMonth | |
31 | SplitPeriod | |
32 | ListMonth | |
33 | ListNewsgroups | |
ad609792 | 34 | ReadGroupList |
741336c2 TH |
35 | OutputData |
36 | FormatOutput | |
37 | SQLHierarchies | |
38 | SQLGroupList | |
39 | GetMaxLenght | |
40 | ); | |
41 | %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], | |
42 | Output => [qw(OutputData FormatOutput)], | |
43 | SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); | |
4bbd46d3 TH |
44 | $VERSION = '0.01'; |
45 | our $PackageVersion = '0.01'; | |
741336c2 TH |
46 | |
47 | use Data::Dumper; | |
48 | use File::Basename; | |
49 | use Getopt::Std; | |
50 | ||
51 | use Config::Auto; | |
52 | use DBI; | |
53 | ||
54 | #####-------------------------------- Vars --------------------------------##### | |
55 | ||
56 | our $MySelf = fileparse($0, '.pl'); | |
57 | our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; | |
58 | ||
59 | #####------------------------------- Basics -------------------------------##### | |
60 | ||
61 | ################################################################################ | |
62 | sub ReadOptions { | |
63 | ################################################################################ | |
3430c898 TH |
64 | ### read commandline options and act on standard options -h and -V |
65 | ### IN : $Params: list of legal commandline paramaters (without -h and -V) | |
741336c2 TH |
66 | ### OUT: a hash containing the commandline options |
67 | $Getopt::Std::STANDARD_HELP_VERSION = 1; | |
68 | ||
69 | my ($Params) = @_; | |
70 | my %Options; | |
71 | ||
72 | getopts('Vh'.$Params, \%Options); | |
73 | ||
74 | # -V: display version | |
75 | &ShowVersion if ($Options{'V'}); | |
76 | ||
77 | # -h: feed myself to perldoc | |
78 | &ShowPOD if ($Options{'h'}); | |
79 | ||
80 | return %Options; | |
81 | }; | |
82 | ################################################################################ | |
83 | ||
84 | ################################################################################ | |
85 | sub ShowVersion { | |
86 | ################################################################################ | |
87 | ### display version and exit | |
f6d15ca7 | 88 | print "NewsStats v$PackageVersion\n$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n"; |
741336c2 TH |
89 | print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; |
90 | exit(100); | |
91 | }; | |
92 | ################################################################################ | |
93 | ||
94 | ################################################################################ | |
95 | sub ShowPOD { | |
96 | ################################################################################ | |
97 | ### feed myself to perldoc and exit | |
98 | exec('perldoc', $0); | |
99 | exit(100); | |
100 | }; | |
101 | ################################################################################ | |
102 | ||
103 | ################################################################################ | |
104 | sub ReadConfig { | |
105 | ################################################################################ | |
106 | ### read config via Config::Auto | |
107 | ### IN : $ConfFile: config filename | |
108 | ### OUT: reference to a hash containing the configuration | |
109 | my ($ConfFile) = @_; | |
110 | return Config::Auto::parse($ConfFile, format => 'equal'); | |
111 | }; | |
112 | ################################################################################ | |
113 | ||
114 | ################################################################################ | |
115 | sub OverrideConfig { | |
116 | ################################################################################ | |
117 | ### override configuration values | |
118 | ### IN : $ConfigR : reference to configuration hash | |
119 | ### $OverrideR: reference to a hash containing overrides | |
120 | my ($ConfigR,$OverrideR) = @_; | |
121 | my %Override = %$OverrideR; | |
3430c898 | 122 | # Config hash empty? |
741336c2 | 123 | warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); |
3430c898 | 124 | # return if no overrides |
741336c2 TH |
125 | return if (keys %Override < 1 or keys %$ConfigR < 1); |
126 | foreach my $Key (keys %Override) { | |
127 | $$ConfigR{$Key} = $Override{$Key}; | |
128 | }; | |
129 | }; | |
130 | ################################################################################ | |
131 | ||
132 | ################################################################################ | |
133 | sub InitDB { | |
134 | ################################################################################ | |
135 | ### initialise database connection | |
136 | ### IN : $ConfigR: reference to configuration hash | |
3430c898 | 137 | ### $Die : if TRUE, die if connection fails |
741336c2 TH |
138 | ### OUT: DBHandle |
139 | my ($ConfigR,$Die) = @_; | |
140 | my %Conf = %$ConfigR; | |
141 | my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); | |
142 | if (!$DBHandle) { | |
143 | die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); | |
144 | warn("$MySelf: W: $DBI::errstr\n"); | |
145 | }; | |
146 | return $DBHandle; | |
147 | }; | |
148 | ################################################################################ | |
149 | ||
150 | #####------------------------------ GetStats ------------------------------##### | |
151 | ||
152 | ################################################################################ | |
153 | sub ListNewsgroups { | |
154 | ################################################################################ | |
3430c898 TH |
155 | ### explode a (scalar) list of newsgroup names to a list of newsgroup and |
156 | ### hierarchy names where every newsgroup and hierarchy appears only once: | |
157 | ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin | |
ad609792 TH |
158 | ### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) |
159 | ### $ValidGroupsR: reference to a hash containing all valid newsgroups | |
160 | ### as keys | |
161 | ### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys | |
162 | my ($Newsgroups,$ValidGroupsR) = @_; | |
163 | my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; | |
741336c2 TH |
164 | my %Newsgroups; |
165 | chomp($Newsgroups); | |
166 | # remove whitespace from contents of Newsgroups: | |
167 | $Newsgroups =~ s/\s//; | |
168 | # call &HierarchyCount for each newsgroup in $Newsgroups: | |
169 | for (split /,/, $Newsgroups) { | |
ad609792 TH |
170 | # don't count invalid newsgroups |
171 | if(%ValidGroups and !defined($ValidGroups{$_})) { | |
172 | warn (sprintf("DROPPED: %s\n",$_)); | |
173 | next; | |
174 | } | |
741336c2 TH |
175 | # add original newsgroup to %Newsgroups |
176 | $Newsgroups{$_} = 1; | |
177 | # add all hierarchy elements to %Newsgroups, amended by '.ALL', | |
178 | # i.e. de.alt.ALL and de.ALL | |
179 | foreach (ParseHierarchies($_)) { | |
180 | $Newsgroups{$_.'.ALL'} = 1; | |
181 | } | |
182 | }; | |
183 | return %Newsgroups; | |
184 | }; | |
185 | ||
186 | ################################################################################ | |
187 | sub ParseHierarchies { | |
188 | ################################################################################ | |
3430c898 TH |
189 | ### return a list of all hierarchy levels a newsgroup belongs to |
190 | ### (for de.alt.test.moderated that would be de/de.alt/de.alt.test) | |
741336c2 TH |
191 | ### IN : $Newsgroup : a newsgroup name |
192 | ### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to | |
193 | my ($Newsgroup) = @_; | |
194 | my @Hierarchies; | |
195 | # strip trailing dots | |
196 | $Newsgroup =~ s/(.+)\.+$/$1/; | |
197 | # butcher newsgroup name by "." and add each hierarchy to @Hierarchies | |
198 | # i.e. de.alt.test: "de.alt" and "de" | |
199 | while ($Newsgroup =~ /\./) { | |
200 | $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; | |
201 | push @Hierarchies, $Newsgroup; | |
202 | }; | |
203 | return @Hierarchies; | |
204 | }; | |
205 | ||
ad609792 TH |
206 | ################################################################################ |
207 | sub ReadGroupList { | |
208 | ################################################################################ | |
209 | ### read a list of valid newsgroups from file (each group on one line, | |
210 | ### ignoring everything after the first whitespace and so accepting files | |
211 | ### in checkgroups format as well as (parts of) an INN active file) | |
212 | ### IN : $Filename : file to read | |
213 | ### OUT: \%ValidGroups: hash containing all valid newsgroups | |
214 | my ($Filename) = @_; | |
215 | my %ValidGroups; | |
216 | open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; | |
217 | while (<$LIST>) { | |
218 | s/^(\S+).*$/$1/; | |
219 | chomp; | |
220 | $ValidGroups{$_} = '1'; | |
221 | }; | |
222 | close $LIST; | |
223 | return \%ValidGroups; | |
224 | }; | |
225 | ||
741336c2 TH |
226 | ################################################################################ |
227 | ||
228 | #####----------------------------- TimePeriods ----------------------------##### | |
229 | ||
230 | ################################################################################ | |
231 | sub GetTimePeriod { | |
232 | ################################################################################ | |
3430c898 TH |
233 | ### get a time period to act on, in order of preference: by default the |
234 | ### last month; or a month submitted by -m YYYY-MM; or a time period submitted | |
235 | ### by -p YYYY-MM:YYYY-MM | |
741336c2 | 236 | ### IN : $Month,$Period: contents of -m and -p |
3430c898 | 237 | ### OUT: $StartMonth, $EndMonth (identical if period is just one month) |
741336c2 TH |
238 | my ($Month,$Period) = @_; |
239 | # exit if -m is set and not like YYYY-MM | |
240 | die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); | |
b221278d TH |
241 | # warn if -m and -p is set |
242 | warn "$MySelf: W: Time period assigned by '-p' takes precendece over month assigned by '-m'.\n" if ($Month && $Period); | |
741336c2 TH |
243 | # default: set -m to last month |
244 | $Month = &LastMonth if (!defined($Month) and !defined($Period)); | |
245 | # set $StartMonth, $EndMonth | |
246 | my ($StartMonth, $EndMonth); | |
247 | if ($Period) { | |
248 | # -p: get date range | |
249 | ($StartMonth, $EndMonth) = &SplitPeriod($Period); | |
250 | die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); | |
251 | } else { | |
252 | # set $StartMonth = $EndMonth = $Month if -p is not set | |
253 | $StartMonth = $EndMonth = $Month; | |
254 | }; | |
255 | return ($StartMonth, $EndMonth); | |
256 | }; | |
257 | ||
258 | ################################################################################ | |
259 | sub LastMonth { | |
260 | ################################################################################ | |
3430c898 | 261 | ### get last month from todays date in YYYY-MM format |
741336c2 TH |
262 | ### OUT: last month as YYYY-MM |
263 | # get today's date | |
264 | my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); | |
265 | # $Month is already defined from 0 to 11, so no need to decrease it by 1 | |
266 | $Year += 1900; | |
267 | if ($Month < 1) { | |
268 | $Month = 12; | |
269 | $Year--; | |
270 | }; | |
271 | # return last month | |
272 | return sprintf('%4d-%02d',$Year,$Month); | |
273 | }; | |
274 | ||
275 | ################################################################################ | |
276 | sub CheckMonth { | |
277 | ################################################################################ | |
3430c898 | 278 | ### check if input is a valid month in YYYY-MM form |
741336c2 TH |
279 | ### IN : $Month: month |
280 | ### OUT: TRUE / FALSE | |
281 | my ($Month) = @_; | |
282 | return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); | |
283 | return 1; | |
284 | }; | |
285 | ||
286 | ################################################################################ | |
287 | sub SplitPeriod { | |
288 | ################################################################################ | |
3430c898 | 289 | ### split a time period denoted by YYYY-MM:YYYY-MM into start and end month |
741336c2 TH |
290 | ### IN : $Period: time period |
291 |