Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | #! /usr/bin/perl -W\r |
2 | #\r | |
3 | # gatherstats.pl\r | |
4 | #\r | |
5 | # This script will gather statistical information from a database\r | |
6 | # containing headers and other information from a INN feed.\r | |
7 | # \r | |
8 | # It is part of the NewsStats package.\r | |
9 | #\r | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r | |
11 | #\r | |
12 | # It can be redistributed and/or modified under the same terms under \r | |
13 | # which Perl itself is published.\r | |
14 | \r | |
15 | BEGIN {\r | |
16 | our $VERSION = "0.01";\r | |
17 | use File::Basename;\r | |
18 | push(@INC, dirname($0));\r | |
19 | }\r | |
20 | use strict;\r | |
21 | \r | |
22 | use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);\r | |
23 | \r | |
24 | use DBI;\r | |
25 | \r | |
26 | ################################# Definitions ##################################\r | |
27 | \r | |
28 | # define types of information that can be gathered\r | |
29 | # all / groups (/ clients / hosts)\r | |
30 | my %LegalTypes;\r | |
31 | @LegalTypes{('all','groups')} = ();\r | |
32 | \r | |
33 | ################################# Main program #################################\r | |
34 | \r | |
35 | ### read commandline options\r | |
36 | my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');\r | |
37 | \r | |
38 | ### read configuration\r | |
39 | my %Conf = %{ReadConfig('newsstats.conf')};\r | |
40 | \r | |
41 | ### override configuration via commandline options\r | |
42 | my %ConfOverride;\r | |
43 | $ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};\r | |
44 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r | |
45 | $ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};\r | |
46 | $ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};\r | |
47 | $ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};\r | |
48 | &OverrideConfig(\%Conf,\%ConfOverride);\r | |
49 | \r | |
3430c898 | 50 | ### get type of information to gather, defaulting to 'all'\r |
741336c2 TH |
51 | $Options{'t'} = 'all' if !$Options{'t'};\r |
52 | die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});\r | |
53 | \r | |
3430c898 | 54 | ### get time period (-m or -p)\r |
741336c2 TH |
55 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r |
56 | \r | |
57 | ### init database\r | |
58 | my $DBHandle = InitDB(\%Conf,1);\r | |
59 | \r | |
60 | ### get data for each month\r | |
61 | warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};\r | |
62 | foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {\r | |
63 | \r | |
64 | print "---------- $Month ----------\n" if $Options{'d'};\r | |
65 | \r | |
66 | if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {\r | |
67 | ### ----------------------------------------------\r | |
68 | ### get groups data (number of postings per group)\r | |
69 | # get groups data from raw table for given month\r | |
70 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r | |
71 | $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});\r | |
72 | \r | |
73 | # count postings per group\r | |
74 | my %Postings;\r | |
75 | \r | |
76 | while (($_) = $DBQuery->fetchrow_array) {\r | |
77 | # get list oft newsgroups and hierarchies from Newsgroups:\r | |
78 | my %Newsgroups = ListNewsgroups($_);\r | |
79 | # count each newsgroup and hierarchy once\r | |
80 | foreach (sort keys %Newsgroups) {\r | |
81 | # don't count newsgroup/hierarchy in wrong TLH\r | |
82 | next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);\r | |
83 | $Postings{$_}++;\r | |
84 | };\r | |
85 | };\r | |
86 | \r | |
87 | print "----- GroupStats -----\n" if $Options{'d'};\r | |
88 | foreach my $Newsgroup (sort keys %Postings) {\r | |
89 | print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};\r | |
90 | if (!$Options{'o'}) {\r | |
91 | # write to database\r | |
92 | $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));\r | |
93 | $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});\r | |
94 | $DBQuery->finish;\r | |
95 | };\r | |
96 | };\r | |
3430c898 TH |
97 | } else {\r |
98 | # other types of information go here - later on\r | |
741336c2 TH |
99 | };\r |
100 | };\r | |
101 | \r | |
102 | ### close handles\r | |
103 | $DBHandle->disconnect;\r | |
104 | \r | |
3430c898 TH |
105 | __END__\r |
106 | \r | |
107 | ################################ Documentation #################################\r | |
108 | \r | |
109 | =head1 NAME\r | |
110 | \r | |
111 | gatherstats - process statistical data from a raw source\r | |
112 | \r | |
113 | =head1 SYNOPSIS\r | |
114 | \r | |
115 | B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]\r | |
116 | \r | |
117 | =head1 REQUIREMENTS\r | |
118 | \r | |
119 | See doc/README: Perl 5.8.x itself and the following modules from CPAN:\r | |
120 | \r | |
121 | =over 2\r | |
122 | \r | |
123 | =item -\r | |
124 | \r | |
125 | Config::Auto\r | |
126 | \r | |
127 | =item -\r | |
128 | \r | |
129 | DBI\r | |
130 | \r | |
131 | =back\r | |
132 | \r | |
133 | =head1 DESCRIPTION\r | |
134 | \r | |
135 | This script will extract and process statistical information from a\r | |
136 | database table which is fed from F<feedlog.pl> for a given time period\r | |
137 | and write its results to (an)other database table(s).\r | |
138 | \r | |
139 | The time period to act on defaults to last month; you can assign\r | |
140 | another month via the B<-m> switch or a time period via the B<-p>\r | |
141 | switch; the latter takes preference.\r | |
142 | \r | |
143 | By default B<gatherstats> will process all types of information; you\r | |
144 | can change that using the B<-t> switch and assigning the type of\r | |
145 | information to process. Currently only processing of the number of\r | |
146 | postings per group per month is implemented anyway, so that doesn't\r | |
147 | matter yet.\r | |
148 | \r | |
149 | Possible information types include:\r | |
150 | \r | |
151 | =over 3\r | |
152 | \r | |
153 | =item B<groups> (postings per group per month)\r | |
154 | \r | |
155 | B<gatherstats> will examine Newsgroups: headers. Crosspostings will be\r | |
156 | counted for each single group they appear in. Groups not in I<TLH>\r | |
157 | will be ignored.\r | |
158 | \r | |
159 | B<gatherstats> will also add up the number of postings for each\r | |
160 | hierarchy level, but only count each posting once. A posting to\r | |
161 | de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,\r | |
162 | respectively. A crossposting to de.alt.test and de.alt.admin, on the\r | |
163 | other hand, will be counted for de.alt.test and de.alt.admin each, but\r | |
164 | only once for de.alt.ALL and de.ALL.\r | |
165 | \r | |
166 | Data is written to I<DBTableGrps> (see doc/INSTALL).\r | |
167 | \r | |
168 | =back\r | |
169 | \r | |
170 | =head2 Configuration\r | |
171 | \r | |
172 | F<gatherstats.pl> will read its configuration from F<newsstats.conf>\r | |
173 | which should be present in the same directory via Config::Auto.\r | |
174 | \r | |
175 | See doc/INSTALL for an overview of possible configuration options.\r | |
176 | \r | |
177 | You can override configuration options via the B<-n>, B<-r>, B<-g>,\r | |
178 | B<-c> and B<-s> switches, respectively.\r | |
179 | \r | |
180 | =head1 OPTIONS\r | |
181 | \r | |
182 | =over 3\r | |
183 | \r | |
184 | =item B<-V> (version)\r | |
185 | \r | |
186 | Print out version and copyright information on B<yapfaq> and exit.\r | |
187 | \r | |
188 | =item B<-h> (help)\r | |
189 | \r | |
190 | Print this man page and exit.\r | |
191 | \r | |
192 | =item B<-d> (debug)\r | |
193 | \r | |
194 | Output debugging information to STDOUT while processing (number of\r | |
195 | postings per group).\r | |
196 | \r | |
197 | =item B<-o> (output only)\r | |
198 | \r | |
199 | Do not write results to database. You should use B<-d> in conjunction\r | |
200 | with B<-o> ... everything else seems a bit pointless.\r | |
201 | \r | |
202 | =item B<-m> I<YYYY-MM> (month)\r | |
203 | \r | |
204 | Set processing period to a month in YYYY-MM format. Ignored if B<-p>\r | |
205 | is set.\r | |
206 | \r | |
207 | =item B<-p> I<YYYY-MM:YYYY-MM> (period)\r | |
208 | \r | |
209 | Set processing period to a time period between two month, each in\r | |
210 | YYYY-MM format, separated by a colon. Overrides B<-m>.\r | |
211 | \r | |
212 | =item B<-t> I<type> (type)\r | |
213 | \r | |
214 | Set processing type to one of I<all> and I<groups>. Defaults to all\r | |
215 | (and is currently rather pointless as only I<groups> has been\r | |
216 | implemented).\r | |
217 | \r | |
218 | =item B<-n> I<TLH> (newsgroup hierarchy)\r | |
219 | \r | |
220 | Override I<TLH> from F<newsstats.conf>.\r | |
221 | \r | |
222 | =item B<-r> I<table> (raw data table)\r | |
223 | \r | |
224 | Override I<DBTableRaw> from F<newsstats.conf>.\r | |
225 | \r | |
226 | =item B<-g> I<table> (postings per group table)\r | |
227 | \r | |
228 | Override I<DBTableGrps> from F<newsstats.conf>.\r | |
229 | \r | |
230 | =item B<-c> I<table> (client data table)\r | |
231 | \r | |
232 | Override I<DBTableClnts> from F<newsstats.conf>.\r | |
233 | \r | |
234 | =item B<-s> I<table> (server/host data table)\r | |
235 | \r | |
236 | Override I<DBTableHosts> from F<newsstats.conf>.\r | |
237 | \r | |
238 | =back\r | |
239 | \r | |
240 | =head1 INSTALLATION\r | |
241 | \r | |
242 | See doc/INSTALL.\r | |
243 | \r | |
244 | =head1 EXAMPLES\r | |
245 | \r | |
246 | Process all types of information for lasth month:\r | |
247 | \r | |
248 | gatherstats\r | |
249 | \r | |
250 | Do a dry run, showing results of processing:\r | |
251 | \r | |
252 | gatherstats -do\r | |
253 | \r | |
254 | Process all types of information for January of 2010:\r | |
255 | \r | |
256 | gatherstats -m 2010-01\r | |
257 | \r | |
258 | Process only number of postings for the year of 2010:\r | |
259 | \r | |
260 | gatherstats -p 2010-01:2010-12 -t groups\r | |
261 | \r | |
262 | =head1 FILES\r | |
263 | \r | |
264 | =over 4\r | |
265 | \r | |
266 | =item F<gatherstats.pl>\r | |
267 | \r | |
268 | The script itself.\r | |
269 | \r | |
270 | =item F<NewsStats.pm>\r | |
271 | \r | |
272 | Library functions for the NewsStats package.\r | |
273 | \r | |
274 | =item F<newsstats.conf>\r | |
275 | \r | |
276 | Runtime configuration file for B<yapfaq>.\r | |
277 | \r | |
278 | =back\r | |
279 | \r | |
280 | =head1 BUGS\r | |
281 | \r | |
282 | Please report any bugs or feature requests to the author or use the\r | |
283 | bug tracker at L<http://bugs.th-h.de/>!\r | |
284 | \r | |
285 | =head1 SEE ALSO\r | |
286 | \r | |
287 | =over 2\r | |
288 | \r | |
289 | =item -\r | |
290 | \r | |
291 | doc/README\r | |
292 | \r | |
293 | =item -\r | |
294 | \r | |
295 | doc/INSTALL\r | |
296 | \r | |
297 | =back\r | |
298 | \r | |
299 | This script is part of the B<NewsStats> package.\r | |
300 | \r | |
301 | =head1 AUTHOR\r | |
302 | \r | |
303 | Thomas Hochstein <thh@inter.net>\r | |
304 | \r | |
305 | =head1 COPYRIGHT AND LICENSE\r | |
306 | \r | |
307 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r | |
308 | \r | |
309 | This program is free software; you may redistribute it and/or modify it\r | |
310 | under the same terms as Perl itself.\r | |
311 | \r | |
312 | =cut\r |