Fix some typos.
[usenet/newsstats.git] / gatherstats.pl
CommitLineData
2832c235
TH
1#! /usr/bin/perl -W
2#
3# gatherstats.pl
4#
5# This script will gather statistical information from a database
6# containing headers and other information from a INN feed.
7#
8# It is part of the NewsStats package.
9#
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
11#
12# It can be redistributed and/or modified under the same terms under
13# which Perl itself is published.
14
15BEGIN {
16 our $VERSION = "0.01";
17 use File::Basename;
18 push(@INC, dirname($0));
19}
20use strict;
21
ad609792 22use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList);
2832c235
TH
23
24use DBI;
25
26################################# Definitions ##################################
27
28# define types of information that can be gathered
29# all / groups (/ clients / hosts)
30my %LegalTypes;
31@LegalTypes{('all','groups')} = ();
32
33################################# Main program #################################
34
35### read commandline options
ad609792 36my %Options = &ReadOptions('dom:p:t:l:n:r:g:c:s:');
2832c235
TH
37
38### read configuration
39my %Conf = %{ReadConfig('newsstats.conf')};
40
41### override configuration via commandline options
42my %ConfOverride;
43$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};
44$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
45$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};
46$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};
47$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};
48&OverrideConfig(\%Conf,\%ConfOverride);
49
50### get type of information to gather, defaulting to 'all'
51$Options{'t'} = 'all' if !$Options{'t'};
52die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});
53
54### get time period (-m or -p)
55my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
56
ad609792
TH
57### read newsgroups list from -l
58my %ValidGroups = %{&ReadGroupList($Options{'l'})} if $Options{'l'};
59
2832c235
TH
60### init database
61my $DBHandle = InitDB(\%Conf,1);
62
63### get data for each month
64warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};
65foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {
66
67 print "---------- $Month ----------\n" if $Options{'d'};
68
69 if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {
70 ### ----------------------------------------------
71 ### get groups data (number of postings per group)
72 # get groups data from raw table for given month
73 my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));
74 $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});
75
76 # count postings per group
77 my %Postings;
2832c235
TH
78 while (($_) = $DBQuery->fetchrow_array) {
79 # get list oft newsgroups and hierarchies from Newsgroups:
89db2f90 80 my %Newsgroups = ListNewsgroups($_,$Conf{'TLH'},$Options{'l'} ? \%ValidGroups : '');
2832c235
TH
81 # count each newsgroup and hierarchy once
82 foreach (sort keys %Newsgroups) {
2832c235
TH
83 $Postings{$_}++;
84 };
85 };
86
ad609792
TH
87 # add valid but empty groups if -l is set
88 if (%ValidGroups) {
89 foreach (sort keys %ValidGroups) {
90 if (!defined($Postings{$_})) {
91 $Postings{$_} = 0 ;
92 warn (sprintf("ADDED: %s as empty group\n",$_));
93 }
94 };
95 };
96
71f0178b
TH
97 # delete old data for that month
98 if (!$Options{'o'}) {
99 $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}),undef,$Month)
100 or warn sprintf("$MySelf: E: Can't delete old groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
101 };
102
2832c235
TH
103 print "----- GroupStats -----\n" if $Options{'d'};
104 foreach my $Newsgroup (sort keys %Postings) {
105 print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};
106 if (!$Options{'o'}) {
107 # write to database
71f0178b
TH
108 $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
109 # $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));
2832c235
TH
110 $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});
111 $DBQuery->finish;
112 };
113 };
114 } else {
115 # other types of information go here - later on
116 };
117};
118
119### close handles
120$DBHandle->disconnect;
121
122__END__
123
124################################ Documentation #################################
125
126=head1 NAME
127
128gatherstats - process statistical data from a raw source
129
130=head1 SYNOPSIS
131
ad609792 132B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-l> I<filename>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]
2832c235
TH
133
134=head1 REQUIREMENTS
135
136See doc/README: Perl 5.8.x itself and the following modules from CPAN:
137
138=over 2
139
140=item -
141
142Config::Auto
143
144=item -
145
146DBI
147
148=back
149
150=head1 DESCRIPTION
151
152This script will extract and process statistical information from a
153database table which is fed from F<feedlog.pl> for a given time period
313610f6
TH
154and write its results to (an)other database table(s). Entries marked
155with I<'disregard'> in the database will be ignored; currently, you have
156to set this flag yourself, using your database management tools. You
157can exclude erroneous entries that way (e.g. automatic reposts (think
158of cancels flood and resurrectors); spam; ...).
2832c235
TH
159
160The time period to act on defaults to last month; you can assign
161another month via the B<-m> switch or a time period via the B<-p>
162switch; the latter takes preference.
163
164By default B<gatherstats> will process all types of information; you
165can change that using the B<-t> switch and assigning the type of
166information to process. Currently only processing of the number of
167postings per group per month is implemented anyway, so that doesn't
168matter yet.
169
170Possible information types include:
171
172=over 3
173
174=item B<groups> (postings per group per month)
175
176B<gatherstats> will examine Newsgroups: headers. Crosspostings will be
177counted for each single group they appear in. Groups not in I<TLH>
178will be ignored.
179
180B<gatherstats> will also add up the number of postings for each
181hierarchy level, but only count each posting once. A posting to
182de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,
183respectively. A crossposting to de.alt.test and de.alt.admin, on the
184other hand, will be counted for de.alt.test and de.alt.admin each, but
185only once for de.alt.ALL and de.ALL.
186
187Data is written to I<DBTableGrps> (see doc/INSTALL).
188
189=back
190
191=head2 Configuration
192
193F<gatherstats.pl> will read its configuration from F<newsstats.conf>
194which should be present in the same directory via Config::Auto.
195
196See doc/INSTALL for an overview of possible configuration options.
197
198You can override configuration options via the B<-n>, B<-r>, B<-g>,
199B<-c> and B<-s> switches, respectively.
200
201=head1 OPTIONS
202
203=over 3
204
205=item B<-V> (version)
206
207Print out version and copyright information on B<yapfaq> and exit.
208
209=item B<-h> (help)
210
211Print this man page and exit.
212
213=item B<-d> (debug)
214
215Output debugging information to STDOUT while processing (number of
216postings per group).
217
218=item B<-o> (output only)
219
220Do not write results to database. You should use B<-d> in conjunction
221with B<-o> ... everything else seems a bit pointless.
222
223=item B<-m> I<YYYY-MM> (month)
224
225Set processing period to a month in YYYY-MM format. Ignored if B<-p>
226is set.
227
228=item B<-p> I<YYYY-MM:YYYY-MM> (period)
229
230Set processing period to a time period between two month, each in
231YYYY-MM format, separated by a colon. Overrides B<-m>.
232
233=item B<-t> I<type> (type)
234
235Set processing type to one of I<all> and I<groups>. Defaults to all
236(and is currently rather pointless as only I<groups> has been
237implemented).
238
ad609792
TH
239=item B<-l> I<filename> (check against list)
240
241Check each group against a list of valid newsgroups read from
242I<filename>, one group on each line and ignoring everything after the
243first whitespace (so you can use a file in checkgroups format or (part
244of) your INN active file).
245
246Newsgroups not found in I<filename> will be dropped (and logged to
247STDERR), and newsgroups found in I<filename> but having no postings
248will be added with a count of 0 (and logged to STDERR).
249
2832c235
TH
250=item B<-n> I<TLH> (newsgroup hierarchy)
251
252Override I<TLH> from F<newsstats.conf>.
253
254=item B<-r> I<table> (raw data table)
255
256Override I<DBTableRaw> from F<newsstats.conf>.
257
258=item B<-g> I<table> (postings per group table)
259
260Override I<DBTableGrps> from F<newsstats.conf>.
261
262=item B<-c> I<table> (client data table)
263
264Override I<DBTableClnts> from F<newsstats.conf>.
265
266=item B<-s> I<table> (server/host data table)
267
268Override I<DBTableHosts> from F<newsstats.conf>.
269
270=back
271
272=head1 INSTALLATION
273
274See doc/INSTALL.
275
276=head1 EXAMPLES
277
278Process all types of information for lasth month:
279
280 gatherstats
281
282Do a dry run, showing results of processing:
283
284 gatherstats -do
285
286Process all types of information for January of 2010:
287
288 gatherstats -m 2010-01
289
ad609792
TH
290Process only number of postings for the year of 2010,
291checking against checkgroups-2010.txt:
2832c235 292
ad609792 293 gatherstats -p 2010-01:2010-12 -t groups -l checkgroups-2010.txt
2832c235
TH
294
295=head1 FILES
296
297=over 4
298
299=item F<gatherstats.pl>
300
301The script itself.
302
303=item F<NewsStats.pm>
304
305Library functions for the NewsStats package.
306
307=item F<newsstats.conf>
308
309Runtime configuration file for B<yapfaq>.
310
311=back
312
313=head1 BUGS
314
315Please report any bugs or feature requests to the author or use the
316bug tracker at L<http://bugs.th-h.de/>!
317
318=head1 SEE ALSO
319
320=over 2
321
322=item -
323
324doc/README
325
326=item -
327
328doc/INSTALL
329
330=back
331
332This script is part of the B<NewsStats> package.
333
334=head1 AUTHOR
335
336Thomas Hochstein <thh@inter.net>
337
338=head1 COPYRIGHT AND LICENSE
339
340Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
341
342This program is free software; you may redistribute it and/or modify it
343under the same terms as Perl itself.
344
345=cut
This page took 0.026763 seconds and 4 git commands to generate.