| 1 | #! /usr/bin/perl -W\r |
| 2 | #\r |
| 3 | # gatherstats.pl\r |
| 4 | #\r |
| 5 | # This script will gather statistical information from a database\r |
| 6 | # containing headers and other information from a INN feed.\r |
| 7 | # \r |
| 8 | # It is part of the NewsStats package.\r |
| 9 | #\r |
| 10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r |
| 11 | #\r |
| 12 | # It can be redistributed and/or modified under the same terms under \r |
| 13 | # which Perl itself is published.\r |
| 14 | \r |
| 15 | BEGIN {\r |
| 16 | our $VERSION = "0.01";\r |
| 17 | use File::Basename;\r |
| 18 | push(@INC, dirname($0));\r |
| 19 | }\r |
| 20 | use strict;\r |
| 21 | \r |
| 22 | use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);\r |
| 23 | \r |
| 24 | use DBI;\r |
| 25 | \r |
| 26 | ################################# Definitions ##################################\r |
| 27 | \r |
| 28 | # define types of information that can be gathered\r |
| 29 | # all / groups (/ clients / hosts)\r |
| 30 | my %LegalTypes;\r |
| 31 | @LegalTypes{('all','groups')} = ();\r |
| 32 | \r |
| 33 | ################################# Main program #################################\r |
| 34 | \r |
| 35 | ### read commandline options\r |
| 36 | my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');\r |
| 37 | \r |
| 38 | ### read configuration\r |
| 39 | my %Conf = %{ReadConfig('newsstats.conf')};\r |
| 40 | \r |
| 41 | ### override configuration via commandline options\r |
| 42 | my %ConfOverride;\r |
| 43 | $ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};\r |
| 44 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r |
| 45 | $ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};\r |
| 46 | $ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};\r |
| 47 | $ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};\r |
| 48 | &OverrideConfig(\%Conf,\%ConfOverride);\r |
| 49 | \r |
| 50 | ### get type of information to gather, defaulting to 'all'\r |
| 51 | $Options{'t'} = 'all' if !$Options{'t'};\r |
| 52 | die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});\r |
| 53 | \r |
| 54 | ### get time period (-m or -p)\r |
| 55 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r |
| 56 | \r |
| 57 | ### init database\r |
| 58 | my $DBHandle = InitDB(\%Conf,1);\r |
| 59 | \r |
| 60 | ### get data for each month\r |
| 61 | warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};\r |
| 62 | foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {\r |
| 63 | \r |
| 64 | print "---------- $Month ----------\n" if $Options{'d'};\r |
| 65 | \r |
| 66 | if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {\r |
| 67 | ### ----------------------------------------------\r |
| 68 | ### get groups data (number of postings per group)\r |
| 69 | # get groups data from raw table for given month\r |
| 70 | my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r |
| 71 | $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});\r |
| 72 | \r |
| 73 | # count postings per group\r |
| 74 | my %Postings;\r |
| 75 | \r |
| 76 | while (($_) = $DBQuery->fetchrow_array) {\r |
| 77 | # get list oft newsgroups and hierarchies from Newsgroups:\r |
| 78 | my %Newsgroups = ListNewsgroups($_);\r |
| 79 | # count each newsgroup and hierarchy once\r |
| 80 | foreach (sort keys %Newsgroups) {\r |
| 81 | # don't count newsgroup/hierarchy in wrong TLH\r |
| 82 | next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);\r |
| 83 | $Postings{$_}++;\r |
| 84 | };\r |
| 85 | };\r |
| 86 | \r |
| 87 | print "----- GroupStats -----\n" if $Options{'d'};\r |
| 88 | foreach my $Newsgroup (sort keys %Postings) {\r |
| 89 | print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};\r |
| 90 | if (!$Options{'o'}) {\r |
| 91 | # write to database\r |
| 92 | $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));\r |
| 93 | $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});\r |
| 94 | $DBQuery->finish;\r |
| 95 | };\r |
| 96 | };\r |
| 97 | } else {\r |
| 98 | # other types of information go here - later on\r |
| 99 | };\r |
| 100 | };\r |
| 101 | \r |
| 102 | ### close handles\r |
| 103 | $DBHandle->disconnect;\r |
| 104 | \r |
| 105 | __END__\r |
| 106 | \r |
| 107 | ################################ Documentation #################################\r |
| 108 | \r |
| 109 | =head1 NAME\r |
| 110 | \r |
| 111 | gatherstats - process statistical data from a raw source\r |
| 112 | \r |
| 113 | =head1 SYNOPSIS\r |
| 114 | \r |
| 115 | B<gatherstats> [B<-Vhdo>] [B<-m> I<YYYY-MM>] [B<-p> I<YYYY-MM:YYYY-MM>] [B<-t> I<type>] [B<-n> I<TLH>] [B<-r> I<database table>] [B<-g> I<database table>] [B<-c> I<database table>] [B<-s> I<database table>]\r |
| 116 | \r |
| 117 | =head1 REQUIREMENTS\r |
| 118 | \r |
| 119 | See doc/README: Perl 5.8.x itself and the following modules from CPAN:\r |
| 120 | \r |
| 121 | =over 2\r |
| 122 | \r |
| 123 | =item -\r |
| 124 | \r |
| 125 | Config::Auto\r |
| 126 | \r |
| 127 | =item -\r |
| 128 | \r |
| 129 | DBI\r |
| 130 | \r |
| 131 | =back\r |
| 132 | \r |
| 133 | =head1 DESCRIPTION\r |
| 134 | \r |
| 135 | This script will extract and process statistical information from a\r |
| 136 | database table which is fed from F<feedlog.pl> for a given time period\r |
| 137 | and write its results to (an)other database table(s).\r |
| 138 | \r |
| 139 | The time period to act on defaults to last month; you can assign\r |
| 140 | another month via the B<-m> switch or a time period via the B<-p>\r |
| 141 | switch; the latter takes preference.\r |
| 142 | \r |
| 143 | By default B<gatherstats> will process all types of information; you\r |
| 144 | can change that using the B<-t> switch and assigning the type of\r |
| 145 | information to process. Currently only processing of the number of\r |
| 146 | postings per group per month is implemented anyway, so that doesn't\r |
| 147 | matter yet.\r |
| 148 | \r |
| 149 | Possible information types include:\r |
| 150 | \r |
| 151 | =over 3\r |
| 152 | \r |
| 153 | =item B<groups> (postings per group per month)\r |
| 154 | \r |
| 155 | B<gatherstats> will examine Newsgroups: headers. Crosspostings will be\r |
| 156 | counted for each single group they appear in. Groups not in I<TLH>\r |
| 157 | will be ignored.\r |
| 158 | \r |
| 159 | B<gatherstats> will also add up the number of postings for each\r |
| 160 | hierarchy level, but only count each posting once. A posting to\r |
| 161 | de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL,\r |
| 162 | respectively. A crossposting to de.alt.test and de.alt.admin, on the\r |
| 163 | other hand, will be counted for de.alt.test and de.alt.admin each, but\r |
| 164 | only once for de.alt.ALL and de.ALL.\r |
| 165 | \r |
| 166 | Data is written to I<DBTableGrps> (see doc/INSTALL).\r |
| 167 | \r |
| 168 | =back\r |
| 169 | \r |
| 170 | =head2 Configuration\r |
| 171 | \r |
| 172 | F<gatherstats.pl> will read its configuration from F<newsstats.conf>\r |
| 173 | which should be present in the same directory via Config::Auto.\r |
| 174 | \r |
| 175 | See doc/INSTALL for an overview of possible configuration options.\r |
| 176 | \r |
| 177 | You can override configuration options via the B<-n>, B<-r>, B<-g>,\r |
| 178 | B<-c> and B<-s> switches, respectively.\r |
| 179 | \r |
| 180 | =head1 OPTIONS\r |
| 181 | \r |
| 182 | =over 3\r |
| 183 | \r |
| 184 | =item B<-V> (version)\r |
| 185 | \r |
| 186 | Print out version and copyright information on B<yapfaq> and exit.\r |
| 187 | \r |
| 188 | =item B<-h> (help)\r |
| 189 | \r |
| 190 | Print this man page and exit.\r |
| 191 | \r |
| 192 | =item B<-d> (debug)\r |
| 193 | \r |
| 194 | Output debugging information to STDOUT while processing (number of\r |
| 195 | postings per group).\r |
| 196 | \r |
| 197 | =item B<-o> (output only)\r |
| 198 | \r |
| 199 | Do not write results to database. You should use B<-d> in conjunction\r |
| 200 | with B<-o> ... everything else seems a bit pointless.\r |
| 201 | \r |
| 202 | =item B<-m> I<YYYY-MM> (month)\r |
| 203 | \r |
| 204 | Set processing period to a month in YYYY-MM format. Ignored if B<-p>\r |
| 205 | is set.\r |
| 206 | \r |
| 207 | =item B<-p> I<YYYY-MM:YYYY-MM> (period)\r |
| 208 | \r |
| 209 | Set processing period to a time period between two month, each in\r |
| 210 | YYYY-MM format, separated by a colon. Overrides B<-m>.\r |
| 211 | \r |
| 212 | =item B<-t> I<type> (type)\r |
| 213 | \r |
| 214 | Set processing type to one of I<all> and I<groups>. Defaults to all\r |
| 215 | (and is currently rather pointless as only I<groups> has been\r |
| 216 | implemented).\r |
| 217 | \r |
| 218 | =item B<-n> I<TLH> (newsgroup hierarchy)\r |
| 219 | \r |
| 220 | Override I<TLH> from F<newsstats.conf>.\r |
| 221 | \r |
| 222 | =item B<-r> I<table> (raw data table)\r |
| 223 | \r |
| 224 | Override I<DBTableRaw> from F<newsstats.conf>.\r |
| 225 | \r |
| 226 | =item B<-g> I<table> (postings per group table)\r |
| 227 | \r |
| 228 | Override I<DBTableGrps> from F<newsstats.conf>.\r |
| 229 | \r |
| 230 | =item B<-c> I<table> (client data table)\r |
| 231 | \r |
| 232 | Override I<DBTableClnts> from F<newsstats.conf>.\r |
| 233 | \r |
| 234 | =item B<-s> I<table> (server/host data table)\r |
| 235 | \r |
| 236 | Override I<DBTableHosts> from F<newsstats.conf>.\r |
| 237 | \r |
| 238 | =back\r |
| 239 | \r |
| 240 | =head1 INSTALLATION\r |
| 241 | \r |
| 242 | See doc/INSTALL.\r |
| 243 | \r |
| 244 | =head1 EXAMPLES\r |
| 245 | \r |
| 246 | Process all types of information for lasth month:\r |
| 247 | \r |
| 248 | gatherstats\r |
| 249 | \r |
| 250 | Do a dry run, showing results of processing:\r |
| 251 | \r |
| 252 | gatherstats -do\r |
| 253 | \r |
| 254 | Process all types of information for January of 2010:\r |
| 255 | \r |
| 256 | gatherstats -m 2010-01\r |
| 257 | \r |
| 258 | Process only number of postings for the year of 2010:\r |
| 259 | \r |
| 260 | gatherstats -p 2010-01:2010-12 -t groups\r |
| 261 | \r |
| 262 | =head1 FILES\r |
| 263 | \r |
| 264 | =over 4\r |
| 265 | \r |
| 266 | =item F<gatherstats.pl>\r |
| 267 | \r |
| 268 | The script itself.\r |
| 269 | \r |
| 270 | =item F<NewsStats.pm>\r |
| 271 | \r |
| 272 | Library functions for the NewsStats package.\r |
| 273 | \r |
| 274 | =item F<newsstats.conf>\r |
| 275 | \r |
| 276 | Runtime configuration file for B<yapfaq>.\r |
| 277 | \r |
| 278 | =back\r |
| 279 | \r |
| 280 | =head1 BUGS\r |
| 281 | \r |
| 282 | Please report any bugs or feature requests to the author or use the\r |
| 283 | bug tracker at L<http://bugs.th-h.de/>!\r |
| 284 | \r |
| 285 | =head1 SEE ALSO\r |
| 286 | \r |
| 287 | =over 2\r |
| 288 | \r |
| 289 | =item -\r |
| 290 | \r |
| 291 | doc/README\r |
| 292 | \r |
| 293 | =item -\r |
| 294 | \r |
| 295 | doc/INSTALL\r |
| 296 | \r |
| 297 | =back\r |
| 298 | \r |
| 299 | This script is part of the B<NewsStats> package.\r |
| 300 | \r |
| 301 | =head1 AUTHOR\r |
| 302 | \r |
| 303 | Thomas Hochstein <thh@inter.net>\r |
| 304 | \r |
| 305 | =head1 COPYRIGHT AND LICENSE\r |
| 306 | \r |
| 307 | Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r |
| 308 | \r |
| 309 | This program is free software; you may redistribute it and/or modify it\r |
| 310 | under the same terms as Perl itself.\r |
| 311 | \r |
| 312 | =cut\r |