groupstats.pl: Improve comments.
[usenet/newsstats.git] / gatherstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# gatherstats.pl
4#
5# This script will gather statistical information from a database
6# containing headers and other information from a INN feed.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
22use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);
23
24use DBI;
25
26################################# Definitions ##################################
27
28# define types of information that can be gathered
29# all / groups (/ clients / hosts)
30my %LegalTypes;
31@LegalTypes{('all','groups')} = ();
32
33################################# Main program #################################
34
35### read commandline options
36my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');
37
38### read configuration
39my %Conf = %{ReadConfig('newsstats.conf')};
40
41### override configuration via commandline options
42my %ConfOverride;
43$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};
44$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
45$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};
46$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
47$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
48&OverrideConfig(\%Conf,\%ConfOverride);
49
50### get type of information to gather, defaulting to 'all'
51$Options{'t'} = 'all' if !$Options{'t'};
52die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
53
54### get time period (-m or -p)
55my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
56
57### init database
58my $DBHandle = InitDB(\%Conf,1);
59
60### get data for each month
61warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};
62foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
63
64 print "---------- $Month ----------\n" if $Options{'d'};
65
66 if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {
67 ### ----------------------------------------------
68 ### get groups data (number of postings per group)
69 # get groups data from raw table for given month
70 my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
71 $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
72
73 # count postings per group
74 my %Postings;
75
76 while (($_) = $DBQuery->fetchrow_array) {
77 # get list oft newsgroups and hierarchies from Newsgroups:
78 my %Newsgroups = ListNewsgroups($_);
79 # count each newsgroup and hierarchy once
80 foreach (sort keys %Newsgroups) {
81 # don't count newsgroup/hierarchy in wrong TLH
82 next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);
83 $Postings{$_}++;
84 };
85 };
86
87 print "----- GroupStats -----\n" if $Options{'d'};
88 foreach my $Newsgroup (sort keys %Postings) {
89 print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};
90 if (!$Options{'o'}) {
91 # write to database
92 $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
93 $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
94 $DBQuery->finish;
95 };
96 };
97 } else {
98 # other types of information go here - later on
99 };
100};
101
102### close handles
103$DBHandle->disconnect;
104
105__END__
106
107################################ Documentation #################################
108
109=head1 NAME
110
111gatherstats - process statistical data from a raw source
112
113=head1 SYNOPSIS
114
115B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]
116
117=head1 REQUIREMENTS
118
119See doc/README: Perl 5.8.x itself and the following modules from CPAN:
120
121=over 2
122
123=item -
124
125Config::Auto
126
127=item -
128
129DBI
130
131=back
132
133=head1 DESCRIPTION
134
135This script will extract and process statistical information from a
136database table which is fed from F<feedlog.pl> for a given time period
313610f6
TH
137and write its results to (an)other database table(s). Entries marked
138with I<'disregard'> in the database will be ignored; currently, you have
139to set this flag yourself, using your database management tools. You
140can exclude erroneous entries that way (e.g. automatic reposts (think
141of cancels flood and resurrectors); spam; ...).
2832c235
TH
142
143The time period to act on defaults to last month; you can assign
144another month via the B<-m> switch or a time period via the B<-p>
145switch; the latter takes preference.
146
147By default B<gatherstats> will process all types of information; you
148can change that using the B<-t> switch and assigning the type of
149information to process. Currently only processing of the number of
150postings per group per month is implemented anyway, so that doesn't
151matter yet.
152
153Possible information types include:
154
155=over 3
156
157=item B<groups> (postings per group per month)
158
159B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
160counted for each single group they appear in. Groups not in I<TLH>
161will be ignored.
162
163B<gatherstats> will also add up the number of postings for each
164hierarchy level, but only count each posting once. A posting to
165de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
166respectively. A crossposting to de.alt.test and de.alt.admin, on the
167other hand, will be counted for de.alt.test and de.alt.admin each, but
168only once for de.alt.ALL and de.ALL.
169
170Data is written to I<DBTableGrps> (see doc/INSTALL).
171
172=back
173
174=head2 Configuration
175
176F<gatherstats.pl> will read its configuration from F<newsstats.conf>
177which should be present in the same directory via Config::Auto.
178
179See doc/INSTALL for an overview of possible configuration options.
180
181You can override configuration options via the B<-n>, B<-r>, B<-g>,
182B<-c> and B<-s> switches, respectively.
183
184=head1 OPTIONS
185
186=over 3
187
188=item B<-V> (version)
189
190Print out version and copyright information on B<yapfaq> and exit.
191
192=item B<-h> (help)
193
194Print this man page and exit.
195
196=item B<-d> (debug)
197
198Output debugging information to STDOUT while processing (number of
199postings per group).
200
201=item B<-o> (output only)
202
203Do not write results to database. You should use B<-d> in conjunction
204with B<-o> ... everything else seems a bit pointless.
205
206=item B<-m> I<YYYY-MM> (month)
207
208Set processing period to a month in YYYY-MM format. Ignored if B<-p>
209is set.
210
211=item B<-p> I<YYYY-MM:YYYY-MM> (period)
212
213Set processing period to a time period between two month, each in
214YYYY-MM format, separated by a colon. Overrides B<-m>.
215
216=item B<-t> I<type> (type)
217
218Set processing type to one of I<all> and I<groups>. Defaults to all
219(and is currently rather pointless as only I<groups> has been
220implemented).
221
222=item B<-n> I<TLH> (newsgroup hierarchy)
223
224Override I<TLH> from F<newsstats.conf>.
225
226=item B<-r> I<table> (raw data table)
227
228Override I<DBTableRaw> from F<newsstats.conf>.
229
230=item B<-g> I<table> (postings per group table)
231
232Override I<DBTableGrps> from F<newsstats.conf>.
233
234=item B<-c> I<table> (client data table)
235
236Override I<DBTableClnts> from F<newsstats.conf>.
237
238=item B<-s> I<table> (server/host data table)
239
240Override I<DBTableHosts> from F<newsstats.conf>.
241
242=back
243
244=head1 INSTALLATION
245
246See doc/INSTALL.
247
248=head1 EXAMPLES
249
250Process all types of information for lasth month:
251
252 gatherstats
253
254Do a dry run, showing results of processing:
255
256 gatherstats -do
257
258Process all types of information for January of 2010:
259
260 gatherstats -m 2010-01
261
262Process only number of postings for the year of 2010:
263
264 gatherstats -p 2010-01:2010-12 -t groups
265
266=head1 FILES
267
268=over 4
269
270=item F<gatherstats.pl>
271
272The script itself.
273
274=item F<NewsStats.pm>
275
276Library functions for the NewsStats package.
277
278=item F<newsstats.conf>
279
280Runtime configuration file for B<yapfaq>.
281
282=back
283
284=head1 BUGS
285
286Please report any bugs or feature requests to the author or use the
287bug tracker at L<http://bugs.th-h.de/>!
288
289=head1 SEE ALSO
290
291=over 2
292
293=item -
294
295doc/README
296
297=item -
298
299doc/INSTALL
300
301=back
302
303This script is part of the B<NewsStats> package.
304
305=head1 AUTHOR
306
307Thomas Hochstein <thh@inter.net>
308
309=head1 COPYRIGHT AND LICENSE
310
311Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
312
313This program is free software; you may redistribute it and/or modify it
314under the same terms as Perl itself.
315
316=cut
This page took 0.024154 seconds and 4 git commands to generate.