X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=gatherstats.pl;h=bcb8ba0b9170774c901d041e576cfb15119a6b6c;hp=4b4b8dd79be7fd458fd442aa3059e8cf8e5c8309;hb=d8695b1c044f9a99f06a7943f88fe2191e6cc52a;hpb=3430c898683771c0ecba9b53a55bb817da0e8fe8 diff --git a/gatherstats.pl b/gatherstats.pl index 4b4b8dd..bcb8ba0 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -1,312 +1,312 @@ -#! /usr/bin/perl -W -# -# gatherstats.pl -# -# This script will gather statistical information from a database -# containing headers and other information from a INN feed. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; - -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); - -use DBI; - -################################# Definitions ################################## - -# define types of information that can be gathered -# all / groups (/ clients / hosts) -my %LegalTypes; -@LegalTypes{('all','groups')} = (); - -################################# Main program ################################# - -### read commandline options -my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); - -### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; - -### override configuration via commandline options -my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; -$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; -$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; -$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; -&OverrideConfig(\%Conf,\%ConfOverride); - -### get type of information to gather, defaulting to 'all' -$Options{'t'} = 'all' if !$Options{'t'}; -die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); - -### get time period (-m or -p) -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); - -### init database -my $DBHandle = InitDB(\%Conf,1); - -### get data for each month -warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; -foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { - - print "---------- $Month ----------\n" if $Options{'d'}; - - if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { - ### ---------------------------------------------- - ### get groups data (number of postings per group) - # get groups data from raw table for given month - my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); - - # count postings per group - my %Postings; - - while (($_) = $DBQuery->fetchrow_array) { - # get list oft newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_); - # count each newsgroup and hierarchy once - foreach (sort keys %Newsgroups) { - # don't count newsgroup/hierarchy in wrong TLH - next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); - $Postings{$_}++; - }; - }; - - print "----- GroupStats -----\n" if $Options{'d'}; - foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; - if (!$Options{'o'}) { - # write to database - $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); - $DBQuery->finish; - }; - }; - } else { - # other types of information go here - later on - }; -}; - -### close handles -$DBHandle->disconnect; - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -gatherstats - process statistical data from a raw source - -=head1 SYNOPSIS - -B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] - -=head1 REQUIREMENTS - -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back - -=head1 DESCRIPTION - -This script will extract and process statistical information from a -database table which is fed from F for a given time period -and write its results to (an)other database table(s). - -The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. - -By default B will process all types of information; you -can change that using the B<-t> switch and assigning the type of -information to process. Currently only processing of the number of -postings per group per month is implemented anyway, so that doesn't -matter yet. - -Possible information types include: - -=over 3 - -=item B (postings per group per month) - -B will examine Newsgroups: headers. Crosspostings will be -counted for each single group they appear in. Groups not in I -will be ignored. - -B will also add up the number of postings for each -hierarchy level, but only count each posting once. A posting to -de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, -respectively. A crossposting to de.alt.test and de.alt.admin, on the -other hand, will be counted for de.alt.test and de.alt.admin each, but -only once for de.alt.ALL and de.ALL. - -Data is written to I (see doc/INSTALL). - -=back - -=head2 Configuration - -F will read its configuration from F -which should be present in the same directory via Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -You can override configuration options via the B<-n>, B<-r>, B<-g>, -B<-c> and B<-s> switches, respectively. - -=head1 OPTIONS - -=over 3 - -=item B<-V> (version) - -Print out version and copyright information on B and exit. - -=item B<-h> (help) - -Print this man page and exit. - -=item B<-d> (debug) - -Output debugging information to STDOUT while processing (number of -postings per group). - -=item B<-o> (output only) - -Do not write results to database. You should use B<-d> in conjunction -with B<-o> ... everything else seems a bit pointless. - -=item B<-m> I (month) - -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. - -=item B<-p> I (period) - -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. - -=item B<-t> I (type) - -Set processing type to one of I and I. Defaults to all -(and is currently rather pointless as only I has been -implemented). - -=item B<-n> I (newsgroup hierarchy) - -Override I from F. - -=item B<-r> I (raw data table) - -Override I from F. - -=item B<-g> I
(postings per group table) - -Override I from F. - -=item B<-c> I
(client data table) - -Override I from F. - -=item B<-s> I
(server/host data table) - -Override I from F. - -=back - -=head1 INSTALLATION - -See doc/INSTALL. - -=head1 EXAMPLES - -Process all types of information for lasth month: - - gatherstats - -Do a dry run, showing results of processing: - - gatherstats -do - -Process all types of information for January of 2010: - - gatherstats -m 2010-01 - -Process only number of postings for the year of 2010: - - gatherstats -p 2010-01:2010-12 -t groups - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file for B. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -doc/README - -=item - - -doc/INSTALL - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut +#! /usr/bin/perl -W +# +# gatherstats.pl +# +# This script will gather statistical information from a database +# containing headers and other information from a INN feed. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); + +use DBI; + +################################# Definitions ################################## + +# define types of information that can be gathered +# all / groups (/ clients / hosts) +my %LegalTypes; +@LegalTypes{('all','groups')} = (); + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; +$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; +$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; +$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; +&OverrideConfig(\%Conf,\%ConfOverride); + +### get type of information to gather, defaulting to 'all' +$Options{'t'} = 'all' if !$Options{'t'}; +die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); + +### get time period (-m or -p) +my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get data for each month +warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; +foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { + + print "---------- $Month ----------\n" if $Options{'d'}; + + if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { + ### ---------------------------------------------- + ### get groups data (number of postings per group) + # get groups data from raw table for given month + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); + + # count postings per group + my %Postings; + + while (($_) = $DBQuery->fetchrow_array) { + # get list oft newsgroups and hierarchies from Newsgroups: + my %Newsgroups = ListNewsgroups($_); + # count each newsgroup and hierarchy once + foreach (sort keys %Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); + $Postings{$_}++; + }; + }; + + print "----- GroupStats -----\n" if $Options{'d'}; + foreach my $Newsgroup (sort keys %Postings) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; + if (!$Options{'o'}) { + # write to database + $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + $DBQuery->finish; + }; + }; + } else { + # other types of information go here - later on + }; +}; + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +gatherstats - process statistical data from a raw source + +=head1 SYNOPSIS + +B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] + +=head1 REQUIREMENTS + +See doc/README: Perl 5.8.x itself and the following modules from CPAN: + +=over 2 + +=item - + +Config::Auto + +=item - + +DBI + +=back + +=head1 DESCRIPTION + +This script will extract and process statistical information from a +database table which is fed from F for a given time period +and write its results to (an)other database table(s). + +The time period to act on defaults to last month; you can assign +another month via the B<-m> switch or a time period via the B<-p> +switch; the latter takes preference. + +By default B will process all types of information; you +can change that using the B<-t> switch and assigning the type of +information to process. Currently only processing of the number of +postings per group per month is implemented anyway, so that doesn't +matter yet. + +Possible information types include: + +=over 3 + +=item B (postings per group per month) + +B will examine Newsgroups: headers. Crosspostings will be +counted for each single group they appear in. Groups not in I +will be ignored. + +B will also add up the number of postings for each +hierarchy level, but only count each posting once. A posting to +de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, +respectively. A crossposting to de.alt.test and de.alt.admin, on the +other hand, will be counted for de.alt.test and de.alt.admin each, but +only once for de.alt.ALL and de.ALL. + +Data is written to I (see doc/INSTALL). + +=back + +=head2 Configuration + +F will read its configuration from F +which should be present in the same directory via Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +You can override configuration options via the B<-n>, B<-r>, B<-g>, +B<-c> and B<-s> switches, respectively. + +=head1 OPTIONS + +=over 3 + +=item B<-V> (version) + +Print out version and copyright information on B and exit. + +=item B<-h> (help) + +Print this man page and exit. + +=item B<-d> (debug) + +Output debugging information to STDOUT while processing (number of +postings per group). + +=item B<-o> (output only) + +Do not write results to database. You should use B<-d> in conjunction +with B<-o> ... everything else seems a bit pointless. + +=item B<-m> I (month) + +Set processing period to a month in YYYY-MM format. Ignored if B<-p> +is set. + +=item B<-p> I (period) + +Set processing period to a time period between two month, each in +YYYY-MM format, separated by a colon. Overrides B<-m>. + +=item B<-t> I (type) + +Set processing type to one of I and I. Defaults to all +(and is currently rather pointless as only I has been +implemented). + +=item B<-n> I (newsgroup hierarchy) + +Override I from F. + +=item B<-r> I
(raw data table) + +Override I from F. + +=item B<-g> I
(postings per group table) + +Override I from F. + +=item B<-c> I
(client data table) + +Override I from F. + +=item B<-s> I
(server/host data table) + +Override I from F. + +=back + +=head1 INSTALLATION + +See doc/INSTALL. + +=head1 EXAMPLES + +Process all types of information for lasth month: + + gatherstats + +Do a dry run, showing results of processing: + + gatherstats -do + +Process all types of information for January of 2010: + + gatherstats -m 2010-01 + +Process only number of postings for the year of 2010: + + gatherstats -p 2010-01:2010-12 -t groups + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file for B. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +doc/README + +=item - + +doc/INSTALL + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut