Commit | Line | Data |
---|---|---|
2832c235 TH |
1 | #! /usr/bin/perl -W |
2 | # | |
3 | # gatherstats.pl | |
4 | # | |
5 | # This script will gather statistical information from a database | |
6 | # containing headers and other information from a INN feed. | |
7 | # | |
8 | # It is part of the NewsStats package. | |
9 | # | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
11 | # | |
12 | # It can be redistributed and/or modified under the same terms under | |
13 | # which Perl itself is published. | |
14 | ||
15 | BEGIN { | |
16 | our $VERSION = "0.01"; | |
17 | use File::Basename; | |
18 | push(@INC, dirname($0)); | |
19 | } | |
20 | use strict; | |
21 | ||
22 | use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); | |
23 | ||
24 | use DBI; | |
25 | ||
26 | ################################# Definitions ################################## | |
27 | ||
28 | # define types of information that can be gathered | |
29 | # all / groups (/ clients / hosts) | |
30 | my %LegalTypes; | |
31 | @LegalTypes{('all','groups')} = (); | |
32 | ||
33 | ################################# Main program ################################# | |
34 | ||
35 | ### read commandline options | |
36 | my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); | |
37 | ||
38 | ### read configuration | |
39 | my %Conf = %{ReadConfig('newsstats.conf')}; | |
40 | ||
41 | ### override configuration via commandline options | |
42 | my %ConfOverride; | |
43 | $ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; | |
44 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; | |
45 | $ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; | |
46 | $ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; | |
47 | $ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; | |
48 | &OverrideConfig(\%Conf,\%ConfOverride); | |
49 | ||
50 | ### get type of information to gather, defaulting to 'all' | |
51 | $Options{'t'} = 'all' if !$Options{'t'}; | |
52 | die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); | |
53 | ||
54 | ### get time period (-m or -p) | |
55 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); | |
56 | ||
57 | ### init database | |
58 | my $DBHandle = InitDB(\%Conf,1); | |
59 | ||
60 | ### get data for each month | |
61 | warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; | |
62 | foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { | |
63 | ||
64 | print "---------- $Month ----------\n" if $Options{'d'}; | |
65 | ||
66 | if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { | |
67 | ### ---------------------------------------------- | |
68 | ### get groups data (number of postings per group) | |
69 | # get groups data from raw table for given month | |
70 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); | |
71 | $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); | |
72 | ||
73 | # count postings per group | |
74 | my %Postings; | |
75 | ||
76 | while (($_) = $DBQuery->fetchrow_array) { | |
77 | # get list oft newsgroups and hierarchies from Newsgroups: | |
78 | my %Newsgroups = ListNewsgroups($_); | |
79 | # count each newsgroup and hierarchy once | |
80 | foreach (sort keys %Newsgroups) { | |
81 | # don't count newsgroup/hierarchy in wrong TLH | |
82 | next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); | |
83 | $Postings{$_}++; | |
84 | }; | |
85 | }; | |
86 | ||
87 | print "----- GroupStats -----\n" if $Options{'d'}; | |
88 | foreach my $Newsgroup (sort keys %Postings) { | |
89 | print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; | |
90 | if (!$Options{'o'}) { | |
91 | # write to database | |
92 | $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); | |
93 | $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); | |
94 | $DBQuery->finish; | |
95 | }; | |
96 | }; | |
97 | } else { | |
98 | # other types of information go here - later on | |
99 | }; | |
100 | }; | |
101 | ||
102 | ### close handles | |
103 | $DBHandle->disconnect; | |
104 | ||
105 | __END__ | |
106 | ||
107 | ################################ Documentation ################################# | |
108 | ||
109 | =head1 NAME | |
110 | ||
111 | gatherstats - process statistical data from a raw source | |
112 | ||
113 | =head1 SYNOPSIS | |
114 | ||
115 | B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>] | |
116 | ||
117 | =head1 REQUIREMENTS | |
118 | ||
119 | See doc/README: Perl 5.8.x itself and the following modules from CPAN: | |
120 | ||
121 | =over 2 | |
122 | ||
123 | =item - | |
124 | ||
125 | Config::Auto | |
126 | ||
127 | =item - | |
128 | ||
129 | DBI | |
130 | ||
131 | =back | |
132 | ||
133 | =head1 DESCRIPTION | |
134 | ||
135 | This script will extract and process statistical information from a | |
136 | database table which is fed from F<feedlog.pl> for a given time period | |
137 | and write its results to (an)other database table(s). | |
138 | ||
139 | The time period to act on defaults to last month; you can assign | |
140 | another month via the B<-m> switch or a time period via the B<-p> | |
141 | switch; the latter takes preference. | |
142 | ||
143 | By default B<gatherstats> will process all types of information; you | |
144 | can change that using the B<-t> switch and assigning the type of | |
145 | information to process. Currently only processing of the number of | |
146 | postings per group per month is implemented anyway, so that doesn't | |
147 | matter yet. | |
148 | ||
149 | Possible information types include: | |
150 | ||
151 | =over 3 | |
152 | ||
153 | =item B<groups> (postings per group per month) | |
154 | ||
155 | B<gatherstats> will examine Newsgroups: headers. Crosspostings will be | |
156 | counted for each single group they appear in. Groups not in I<TLH> | |
157 | will be ignored. | |
158 | ||
159 | B<gatherstats> will also add up the number of postings for each | |
160 | hierarchy level, but only count each posting once. A posting to | |
161 | de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, | |
162 | respectively. A crossposting to de.alt.test and de.alt.admin, on the | |
163 | other hand, will be counted for de.alt.test and de.alt.admin each, but | |
164 | only once for de.alt.ALL and de.ALL. | |
165 | ||
166 | Data is written to I<DBTableGrps> (see doc/INSTALL). | |
167 | ||
168 | =back | |
169 | ||
170 | =head2 Configuration | |
171 | ||
172 | F<gatherstats.pl> will read its configuration from F<newsstats.conf> | |
173 | which should be present in the same directory via Config::Auto. | |
174 | ||
175 | See doc/INSTALL for an overview of possible configuration options. | |
176 | ||
177 | You can override configuration options via the B<-n>, B<-r>, B<-g>, | |
178 | B<-c> and B<-s> switches, respectively. | |
179 | ||
180 | =head1 OPTIONS | |
181 | ||
182 | =over 3 | |
183 | ||
184 | =item B<-V> (version) | |
185 | ||
186 | Print out version and copyright information on B<yapfaq> and exit. | |
187 | ||
188 | =item B<-h> (help) | |
189 | ||
190 | Print this man page and exit. | |
191 | ||
192 | =item B<-d> (debug) | |
193 | ||
194 | Output debugging information to STDOUT while processing (number of | |
195 | postings per group). | |
196 | ||
197 | =item B<-o> (output only) | |
198 | ||
199 | Do not write results to database. You should use B<-d> in conjunction | |
200 | with B<-o> ... everything else seems a bit pointless. | |
201 | ||
202 | =item B<-m> I<YYYY-MM> (month) | |
203 | ||
204 | Set processing period to a month in YYYY-MM format. Ignored if B<-p> | |
205 | is set. | |
206 | ||
207 | =item B<-p> I<YYYY-MM:YYYY-MM> (period) | |
208 | ||
209 | Set processing period to a time period between two month, each in | |
210 | YYYY-MM format, separated by a colon. Overrides B<-m>. | |
211 | ||
212 | =item B<-t> I<type> (type) | |
213 | ||
214 | Set processing type to one of I<all> and I<groups>. Defaults to all | |
215 | (and is currently rather pointless as only I<groups> has been | |
216 | implemented). | |
217 | ||
218 | =item B<-n> I<TLH> (newsgroup hierarchy) | |
219 | ||
220 | Override I<TLH> from F<newsstats.conf>. | |
221 | ||
222 | =item B<-r> I<table> (raw data table) | |
223 | ||
224 | Override I<DBTableRaw> from F<newsstats.conf>. | |
225 | ||
226 | =item B<-g> I<table> (postings per group table) | |
227 | ||
228 | Override I<DBTableGrps> from F<newsstats.conf>. | |
229 | ||
230 | =item B<-c> I<table> (client data table) | |
231 | ||
232 | Override I<DBTableClnts> from F<newsstats.conf>. | |
233 | ||
234 | =item B<-s> I<table> (server/host data table) | |
235 | ||
236 | Override I<DBTableHosts> from F<newsstats.conf>. | |
237 | ||
238 | =back | |
239 | ||
240 | =head1 INSTALLATION | |
241 | ||
242 | See doc/INSTALL. | |
243 | ||
244 | =head1 EXAMPLES | |
245 | ||
246 | Process all types of information for lasth month: | |
247 | ||
248 | gatherstats | |
249 | ||
250 | Do a dry run, showing results of processing: | |
251 | ||
252 | gatherstats -do | |
253 | ||
254 | Process all types of information for January of 2010: | |
255 | ||
256 | gatherstats -m 2010-01 | |
257 | ||
258 | Process only number of postings for the year of 2010: | |
259 | ||
260 | gatherstats -p 2010-01:2010-12 -t groups | |
261 | ||
262 | =head1 FILES | |
263 | ||
264 | =over 4 | |
265 | ||
266 | =item F<gatherstats.pl> | |
267 | ||
268 | The script itself. | |
269 | ||
270 | =item F<NewsStats.pm> | |
271 | ||
272 | Library functions for the NewsStats package. | |
273 | ||
274 | =item F<newsstats.conf> | |
275 | ||
276 | Runtime configuration file for B<yapfaq>. | |
277 | ||
278 | =back | |
279 | ||
280 | =head1 BUGS | |
281 | ||
282 | Please report any bugs or feature requests to the author or use the | |
283 | bug tracker at L<http://bugs.th-h.de/>! | |
284 | ||
285 | =head1 SEE ALSO | |
286 | ||
287 | =over 2 | |
288 | ||
289 | =item - | |
290 | ||
291 | doc/README | |
292 | ||
293 | =item - | |
294 | ||
295 | doc/INSTALL | |
296 | ||
297 | =back | |
298 | ||
299 | This script is part of the B<NewsStats> package. | |
300 | ||
301 | =head1 AUTHOR | |
302 | ||
303 | Thomas Hochstein <thh@inter.net> | |
304 | ||
305 | =head1 COPYRIGHT AND LICENSE | |
306 | ||
307 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net> | |
308 | ||
309 | This program is free software; you may redistribute it and/or modify it | |
310 | under the same terms as Perl itself. | |
311 | ||
312 | =cut |