Add comments and POD.
[usenet/newsstats.git] / gatherstats.pl
... / ...
CommitLineData
1#! /usr/bin/perl -W\r
2#\r
3# gatherstats.pl\r
4#\r
5# This script will gather statistical information from a database\r
6# containing headers and other information from a INN feed.\r
7# \r
8# It is part of the NewsStats package.\r
9#\r
10# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
11#\r
12# It can be redistributed and/or modified under the same terms under \r
13# which Perl itself is published.\r
14\r
15BEGIN {\r
16 our $VERSION = "0.01";\r
17 use File::Basename;\r
18 push(@INC, dirname($0));\r
19}\r
20use strict;\r
21\r
22use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);\r
23\r
24use DBI;\r
25\r
26################################# Definitions ##################################\r
27\r
28# define types of information that can be gathered\r
29# all / groups (/ clients / hosts)\r
30my %LegalTypes;\r
31@LegalTypes{('all','groups')} = ();\r
32\r
33################################# Main program #################################\r
34\r
35### read commandline options\r
36my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');\r
37\r
38### read configuration\r
39my %Conf = %{ReadConfig('newsstats.conf')};\r
40\r
41### override configuration via commandline options\r
42my %ConfOverride;\r
43$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};\r
44$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r
45$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};\r
46$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};\r
47$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};\r
48&OverrideConfig(\%Conf,\%ConfOverride);\r
49\r
50### get type of information to gather, defaulting to 'all'\r
51$Options{'t'} = 'all' if !$Options{'t'};\r
52die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});\r
53\r
54### get time period (-m or -p)\r
55my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r
56\r
57### init database\r
58my $DBHandle = InitDB(\%Conf,1);\r
59\r
60### get data for each month\r
61warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};\r
62foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {\r
63\r
64 print "---------- $Month ----------\n" if $Options{'d'};\r
65\r
66 if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {\r
67 ### ----------------------------------------------\r
68 ### get groups data (number of postings per group)\r
69 # get groups data from raw table for given month\r
70 my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r
71 $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});\r
72\r
73 # count postings per group\r
74 my %Postings;\r
75\r
76 while (($_) = $DBQuery->fetchrow_array) {\r
77 # get list oft newsgroups and hierarchies from Newsgroups:\r
78 my %Newsgroups = ListNewsgroups($_);\r
79 # count each newsgroup and hierarchy once\r
80 foreach (sort keys %Newsgroups) {\r
81 # don't count newsgroup/hierarchy in wrong TLH\r
82 next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);\r
83 $Postings{$_}++;\r
84 };\r
85 };\r
86\r
87 print "----- GroupStats -----\n" if $Options{'d'};\r
88 foreach my $Newsgroup (sort keys %Postings) {\r
89 print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};\r
90 if (!$Options{'o'}) {\r
91 # write to database\r
92 $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));\r
93 $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});\r
94 $DBQuery->finish;\r
95 };\r
96 };\r
97 } else {\r
98 # other types of information go here - later on\r
99 };\r
100};\r
101\r
102### close handles\r
103$DBHandle->disconnect;\r
104\r
105__END__\r
106\r
107################################ Documentation #################################\r
108\r
109=head1 NAME\r
110\r
111gatherstats - process statistical data from a raw source\r
112\r
113=head1 SYNOPSIS\r
114\r
115B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]\r
116\r
117=head1 REQUIREMENTS\r
118\r
119See doc/README: Perl 5.8.x itself and the following modules from CPAN:\r
120\r
121=over 2\r
122\r
123=item -\r
124\r
125Config::Auto\r
126\r
127=item -\r
128\r
129DBI\r
130\r
131=back\r
132\r
133=head1 DESCRIPTION\r
134\r
135This script will extract and process statistical information from a\r
136database table which is fed from F<feedlog.pl> for a given time period\r
137and write its results to (an)other database table(s).\r
138\r
139The time period to act on defaults to last month; you can assign\r
140another month via the B<-m> switch or a time period via the B<-p>\r
141switch; the latter takes preference.\r
142\r
143By default B<gatherstats> will process all types of information; you\r
144can change that using the B<-t> switch and assigning the type of\r
145information to process. Currently only processing of the number of\r
146postings per group per month is implemented anyway, so that doesn't\r
147matter yet.\r
148\r
149Possible information types include:\r
150\r
151=over 3\r
152\r
153=item B<groups> (postings per group per month)\r
154\r
155B<gatherstats> will examine Newsgroups: headers. Crosspostings will be\r
156counted for each single group they appear in. Groups not in I<TLH>\r
157will be ignored.\r
158\r
159B<gatherstats> will also add up the number of postings for each\r
160hierarchy level, but only count each posting once. A posting to\r
161de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,\r
162respectively. A crossposting to de.alt.test and de.alt.admin, on the\r
163other hand, will be counted for de.alt.test and de.alt.admin each, but\r
164only once for de.alt.ALL and de.ALL.\r
165\r
166Data is written to I<DBTableGrps> (see doc/INSTALL).\r
167\r
168=back\r
169\r
170=head2 Configuration\r
171\r
172F<gatherstats.pl> will read its configuration from F<newsstats.conf>\r
173which should be present in the same directory via Config::Auto.\r
174\r
175See doc/INSTALL for an overview of possible configuration options.\r
176\r
177You can override configuration options via the B<-n>, B<-r>, B<-g>,\r
178B<-c> and B<-s> switches, respectively.\r
179\r
180=head1 OPTIONS\r
181\r
182=over 3\r
183\r
184=item B<-V> (version)\r
185\r
186Print out version and copyright information on B<yapfaq> and exit.\r
187\r
188=item B<-h> (help)\r
189\r
190Print this man page and exit.\r
191\r
192=item B<-d> (debug)\r
193\r
194Output debugging information to STDOUT while processing (number of\r
195postings per group).\r
196\r
197=item B<-o> (output only)\r
198\r
199Do not write results to database. You should use B<-d> in conjunction\r
200with B<-o> ... everything else seems a bit pointless.\r
201\r
202=item B<-m> I<YYYY-MM> (month)\r
203\r
204Set processing period to a month in YYYY-MM format. Ignored if B<-p>\r
205is set.\r
206\r
207=item B<-p> I<YYYY-MM:YYYY-MM> (period)\r
208\r
209Set processing period to a time period between two month, each in\r
210YYYY-MM format, separated by a colon. Overrides B<-m>.\r
211\r
212=item B<-t> I<type> (type)\r
213\r
214Set processing type to one of I<all> and I<groups>. Defaults to all\r
215(and is currently rather pointless as only I<groups> has been\r
216implemented).\r
217\r
218=item B<-n> I<TLH> (newsgroup hierarchy)\r
219\r
220Override I<TLH> from F<newsstats.conf>.\r
221\r
222=item B<-r> I<table> (raw data table)\r
223\r
224Override I<DBTableRaw> from F<newsstats.conf>.\r
225\r
226=item B<-g> I<table> (postings per group table)\r
227\r
228Override I<DBTableGrps> from F<newsstats.conf>.\r
229\r
230=item B<-c> I<table> (client data table)\r
231\r
232Override I<DBTableClnts> from F<newsstats.conf>.\r
233\r
234=item B<-s> I<table> (server/host data table)\r
235\r
236Override I<DBTableHosts> from F<newsstats.conf>.\r
237\r
238=back\r
239\r
240=head1 INSTALLATION\r
241\r
242See doc/INSTALL.\r
243\r
244=head1 EXAMPLES\r
245\r
246Process all types of information for lasth month:\r
247\r
248 gatherstats\r
249\r
250Do a dry run, showing results of processing:\r
251\r
252 gatherstats -do\r
253\r
254Process all types of information for January of 2010:\r
255\r
256 gatherstats -m 2010-01\r
257\r
258Process only number of postings for the year of 2010:\r
259\r
260 gatherstats -p 2010-01:2010-12 -t groups\r
261\r
262=head1 FILES\r
263\r
264=over 4\r
265\r
266=item F<gatherstats.pl>\r
267\r
268The script itself.\r
269\r
270=item F<NewsStats.pm>\r
271\r
272Library functions for the NewsStats package.\r
273\r
274=item F<newsstats.conf>\r
275\r
276Runtime configuration file for B<yapfaq>.\r
277\r
278=back\r
279\r
280=head1 BUGS\r
281\r
282Please report any bugs or feature requests to the author or use the\r
283bug tracker at L<http://bugs.th-h.de/>!\r
284\r
285=head1 SEE ALSO\r
286\r
287=over 2\r
288\r
289=item -\r
290\r
291doc/README\r
292\r
293=item -\r
294\r
295doc/INSTALL\r
296\r
297=back\r
298\r
299This script is part of the B<NewsStats> package.\r
300\r
301=head1 AUTHOR\r
302\r
303Thomas Hochstein <thh@inter.net>\r
304\r
305=head1 COPYRIGHT AND LICENSE\r
306\r
307Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
308\r
309This program is free software; you may redistribute it and/or modify it\r
310under the same terms as Perl itself.\r
311\r
312=cut\r
This page took 0.011628 seconds and 4 git commands to generate.