From: Thomas Hochstein Date: Sat, 18 Sep 2010 16:45:20 +0000 (+0200) Subject: Introduce end-of-line normalization and add .gitattributes. X-Git-Tag: 0.01~28 X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=commitdiff_plain;h=2832c235b2497a02713b12197ed97fbde3a91e15;hp=3430c898683771c0ecba9b53a55bb817da0e8fe8 Introduce end-of-line normalization and add .gitattributes. Signed-off-by: Thomas Hochstein --- diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..176a458 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto diff --git a/feedlog.pl b/feedlog.pl index f33111d..7986249 100755 --- a/feedlog.pl +++ b/feedlog.pl @@ -1,227 +1,227 @@ -#! /usr/bin/perl -W -# -# feedlog.pl -# -# This script will log headers and other data to a database -# for further analysis by parsing a feed from INN. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; - -use NewsStats; - -use Sys::Syslog qw(:standard :macros); - -use Date::Format; -use DBI; - -################################# Main program ################################# - -### read commandline options -my %Options = &ReadOptions('qd'); - -### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; - -### init syslog -openlog($MySelf, 'nofatal,pid', LOG_NEWS); -syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; - -### init database -my $DBHandle = InitDB(\%Conf,0); -if (!$DBHandle) { - syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); - while (1) {}; # go into endless loop to suppress further errors and respawning -}; -my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - -### main loop -while (<>) { - chomp; - # catch empty lines trailing or leading - if ($_ eq '') { - next; - } - # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups - my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; - # remaining lines contain headers - my $Headers = ""; - while (<>) { - chomp; - # empty line terminates this article - if ($_ eq '') { - last; - } - # collect headers - $Headers .= $_."\n" ; - } - - # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp - my $Day = time2str("%Y-%m-%d", $Timestamp); - my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); - - # write to database - if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { - syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); - }; - $DBQuery->finish; - - warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'}; -} - -### close handles -$DBHandle->disconnect; -syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; -closelog(); - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -feedlog - log data from an INN feed to a database - -=head1 SYNOPSIS - -B [B<-Vhdq>] - -=head1 REQUIREMENTS - -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -Date::Format - -=item - - -DBI - -=back - -=head1 DESCRIPTION - -This script will log overview data and complete headers to a database -table for further examination by parsing a feed from INN. It will -parse that information and write it to a mysql database table in real -time. - -All reporting is done to I via I facility. If B -fails to initiate a database connection at startup, it will log to -I with I priority and go in an endless loop, as -terminating would only result in a rapid respawn. - -=head2 Configuration - -F will read its configuration from F which -should be present in the same directory via Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -=head1 OPTIONS - -=over 3 - -=item B<-V> (version) - -Print out version and copyright information on B and exit. - -=item B<-h> (help) - -Print this man page and exit. - -=item B<-d> (debug) - -Output debugging information to STDERR while parsing STDIN. You'll -find that information most probably in your B F file. - -=item B<-q> (quiet) - -Suppress logging to syslog. - -=back - -=head1 INSTALLATION - -See doc/INSTALL. - -=head1 EXAMPLES - -Set up a feed like that in your B F file: - - ## gather statistics for NewsStats - newsstats! - :!*,de.* - :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl - -See doc/INSTALL for further information. - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file for B. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -doc/README - -=item - - -doc/INSTALL - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut +#! /usr/bin/perl -W +# +# feedlog.pl +# +# This script will log headers and other data to a database +# for further analysis by parsing a feed from INN. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats; + +use Sys::Syslog qw(:standard :macros); + +use Date::Format; +use DBI; + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('qd'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### init syslog +openlog($MySelf, 'nofatal,pid', LOG_NEWS); +syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; + +### init database +my $DBHandle = InitDB(\%Conf,0); +if (!$DBHandle) { + syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); + while (1) {}; # go into endless loop to suppress further errors and respawning +}; +my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + +### main loop +while (<>) { + chomp; + # catch empty lines trailing or leading + if ($_ eq '') { + next; + } + # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups + my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; + # remaining lines contain headers + my $Headers = ""; + while (<>) { + chomp; + # empty line terminates this article + if ($_ eq '') { + last; + } + # collect headers + $Headers .= $_."\n" ; + } + + # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp + my $Day = time2str("%Y-%m-%d", $Timestamp); + my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); + + # write to database + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); + }; + $DBQuery->finish; + + warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'}; +} + +### close handles +$DBHandle->disconnect; +syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; +closelog(); + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +feedlog - log data from an INN feed to a database + +=head1 SYNOPSIS + +B [B<-Vhdq>] + +=head1 REQUIREMENTS + +See doc/README: Perl 5.8.x itself and the following modules from CPAN: + +=over 2 + +=item - + +Config::Auto + +=item - + +Date::Format + +=item - + +DBI + +=back + +=head1 DESCRIPTION + +This script will log overview data and complete headers to a database +table for further examination by parsing a feed from INN. It will +parse that information and write it to a mysql database table in real +time. + +All reporting is done to I via I facility. If B +fails to initiate a database connection at startup, it will log to +I with I priority and go in an endless loop, as +terminating would only result in a rapid respawn. + +=head2 Configuration + +F will read its configuration from F which +should be present in the same directory via Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +=head1 OPTIONS + +=over 3 + +=item B<-V> (version) + +Print out version and copyright information on B and exit. + +=item B<-h> (help) + +Print this man page and exit. + +=item B<-d> (debug) + +Output debugging information to STDERR while parsing STDIN. You'll +find that information most probably in your B F file. + +=item B<-q> (quiet) + +Suppress logging to syslog. + +=back + +=head1 INSTALLATION + +See doc/INSTALL. + +=head1 EXAMPLES + +Set up a feed like that in your B F file: + + ## gather statistics for NewsStats + newsstats! + :!*,de.* + :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl + +See doc/INSTALL for further information. + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file for B. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +doc/README + +=item - + +doc/INSTALL + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/gatherstats.pl b/gatherstats.pl index 4b4b8dd..bcb8ba0 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -1,312 +1,312 @@ -#! /usr/bin/perl -W -# -# gatherstats.pl -# -# This script will gather statistical information from a database -# containing headers and other information from a INN feed. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; - -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); - -use DBI; - -################################# Definitions ################################## - -# define types of information that can be gathered -# all / groups (/ clients / hosts) -my %LegalTypes; -@LegalTypes{('all','groups')} = (); - -################################# Main program ################################# - -### read commandline options -my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); - -### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; - -### override configuration via commandline options -my %ConfOverride; -$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; -$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; -$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; -$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; -&OverrideConfig(\%Conf,\%ConfOverride); - -### get type of information to gather, defaulting to 'all' -$Options{'t'} = 'all' if !$Options{'t'}; -die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); - -### get time period (-m or -p) -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); - -### init database -my $DBHandle = InitDB(\%Conf,1); - -### get data for each month -warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; -foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { - - print "---------- $Month ----------\n" if $Options{'d'}; - - if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { - ### ---------------------------------------------- - ### get groups data (number of postings per group) - # get groups data from raw table for given month - my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); - $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); - - # count postings per group - my %Postings; - - while (($_) = $DBQuery->fetchrow_array) { - # get list oft newsgroups and hierarchies from Newsgroups: - my %Newsgroups = ListNewsgroups($_); - # count each newsgroup and hierarchy once - foreach (sort keys %Newsgroups) { - # don't count newsgroup/hierarchy in wrong TLH - next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); - $Postings{$_}++; - }; - }; - - print "----- GroupStats -----\n" if $Options{'d'}; - foreach my $Newsgroup (sort keys %Postings) { - print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; - if (!$Options{'o'}) { - # write to database - $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); - $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); - $DBQuery->finish; - }; - }; - } else { - # other types of information go here - later on - }; -}; - -### close handles -$DBHandle->disconnect; - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -gatherstats - process statistical data from a raw source - -=head1 SYNOPSIS - -B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] - -=head1 REQUIREMENTS - -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back - -=head1 DESCRIPTION - -This script will extract and process statistical information from a -database table which is fed from F for a given time period -and write its results to (an)other database table(s). - -The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. - -By default B will process all types of information; you -can change that using the B<-t> switch and assigning the type of -information to process. Currently only processing of the number of -postings per group per month is implemented anyway, so that doesn't -matter yet. - -Possible information types include: - -=over 3 - -=item B (postings per group per month) - -B will examine Newsgroups: headers. Crosspostings will be -counted for each single group they appear in. Groups not in I -will be ignored. - -B will also add up the number of postings for each -hierarchy level, but only count each posting once. A posting to -de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, -respectively. A crossposting to de.alt.test and de.alt.admin, on the -other hand, will be counted for de.alt.test and de.alt.admin each, but -only once for de.alt.ALL and de.ALL. - -Data is written to I (see doc/INSTALL). - -=back - -=head2 Configuration - -F will read its configuration from F -which should be present in the same directory via Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -You can override configuration options via the B<-n>, B<-r>, B<-g>, -B<-c> and B<-s> switches, respectively. - -=head1 OPTIONS - -=over 3 - -=item B<-V> (version) - -Print out version and copyright information on B and exit. - -=item B<-h> (help) - -Print this man page and exit. - -=item B<-d> (debug) - -Output debugging information to STDOUT while processing (number of -postings per group). - -=item B<-o> (output only) - -Do not write results to database. You should use B<-d> in conjunction -with B<-o> ... everything else seems a bit pointless. - -=item B<-m> I (month) - -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. - -=item B<-p> I (period) - -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. - -=item B<-t> I (type) - -Set processing type to one of I and I. Defaults to all -(and is currently rather pointless as only I has been -implemented). - -=item B<-n> I (newsgroup hierarchy) - -Override I from F. - -=item B<-r> I (raw data table) - -Override I from F. - -=item B<-g> I
(postings per group table) - -Override I from F. - -=item B<-c> I
(client data table) - -Override I from F. - -=item B<-s> I
(server/host data table) - -Override I from F. - -=back - -=head1 INSTALLATION - -See doc/INSTALL. - -=head1 EXAMPLES - -Process all types of information for lasth month: - - gatherstats - -Do a dry run, showing results of processing: - - gatherstats -do - -Process all types of information for January of 2010: - - gatherstats -m 2010-01 - -Process only number of postings for the year of 2010: - - gatherstats -p 2010-01:2010-12 -t groups - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file for B. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -doc/README - -=item - - -doc/INSTALL - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut +#! /usr/bin/perl -W +# +# gatherstats.pl +# +# This script will gather statistical information from a database +# containing headers and other information from a INN feed. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); + +use DBI; + +################################# Definitions ################################## + +# define types of information that can be gathered +# all / groups (/ clients / hosts) +my %LegalTypes; +@LegalTypes{('all','groups')} = (); + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; +$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; +$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; +$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; +&OverrideConfig(\%Conf,\%ConfOverride); + +### get type of information to gather, defaulting to 'all' +$Options{'t'} = 'all' if !$Options{'t'}; +die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); + +### get time period (-m or -p) +my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get data for each month +warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; +foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { + + print "---------- $Month ----------\n" if $Options{'d'}; + + if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { + ### ---------------------------------------------- + ### get groups data (number of postings per group) + # get groups data from raw table for given month + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); + + # count postings per group + my %Postings; + + while (($_) = $DBQuery->fetchrow_array) { + # get list oft newsgroups and hierarchies from Newsgroups: + my %Newsgroups = ListNewsgroups($_); + # count each newsgroup and hierarchy once + foreach (sort keys %Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); + $Postings{$_}++; + }; + }; + + print "----- GroupStats -----\n" if $Options{'d'}; + foreach my $Newsgroup (sort keys %Postings) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; + if (!$Options{'o'}) { + # write to database + $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + $DBQuery->finish; + }; + }; + } else { + # other types of information go here - later on + }; +}; + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +gatherstats - process statistical data from a raw source + +=head1 SYNOPSIS + +B [B<-Vhdo>] [B<-m> I] [B<-p> I] [B<-t> I] [B<-n> I] [B<-r> I] [B<-g> I] [B<-c> I] [B<-s> I] + +=head1 REQUIREMENTS + +See doc/README: Perl 5.8.x itself and the following modules from CPAN: + +=over 2 + +=item - + +Config::Auto + +=item - + +DBI + +=back + +=head1 DESCRIPTION + +This script will extract and process statistical information from a +database table which is fed from F for a given time period +and write its results to (an)other database table(s). + +The time period to act on defaults to last month; you can assign +another month via the B<-m> switch or a time period via the B<-p> +switch; the latter takes preference. + +By default B will process all types of information; you +can change that using the B<-t> switch and assigning the type of +information to process. Currently only processing of the number of +postings per group per month is implemented anyway, so that doesn't +matter yet. + +Possible information types include: + +=over 3 + +=item B (postings per group per month) + +B will examine Newsgroups: headers. Crosspostings will be +counted for each single group they appear in. Groups not in I +will be ignored. + +B will also add up the number of postings for each +hierarchy level, but only count each posting once. A posting to +de.alt.test will be counted for de.alt.test, de.alt.ALL and de.ALL, +respectively. A crossposting to de.alt.test and de.alt.admin, on the +other hand, will be counted for de.alt.test and de.alt.admin each, but +only once for de.alt.ALL and de.ALL. + +Data is written to I (see doc/INSTALL). + +=back + +=head2 Configuration + +F will read its configuration from F +which should be present in the same directory via Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +You can override configuration options via the B<-n>, B<-r>, B<-g>, +B<-c> and B<-s> switches, respectively. + +=head1 OPTIONS + +=over 3 + +=item B<-V> (version) + +Print out version and copyright information on B and exit. + +=item B<-h> (help) + +Print this man page and exit. + +=item B<-d> (debug) + +Output debugging information to STDOUT while processing (number of +postings per group). + +=item B<-o> (output only) + +Do not write results to database. You should use B<-d> in conjunction +with B<-o> ... everything else seems a bit pointless. + +=item B<-m> I (month) + +Set processing period to a month in YYYY-MM format. Ignored if B<-p> +is set. + +=item B<-p> I (period) + +Set processing period to a time period between two month, each in +YYYY-MM format, separated by a colon. Overrides B<-m>. + +=item B<-t> I (type) + +Set processing type to one of I and I. Defaults to all +(and is currently rather pointless as only I has been +implemented). + +=item B<-n> I (newsgroup hierarchy) + +Override I from F. + +=item B<-r> I
(raw data table) + +Override I from F. + +=item B<-g> I
(postings per group table) + +Override I from F. + +=item B<-c> I
(client data table) + +Override I from F. + +=item B<-s> I
(server/host data table) + +Override I from F. + +=back + +=head1 INSTALLATION + +See doc/INSTALL. + +=head1 EXAMPLES + +Process all types of information for lasth month: + + gatherstats + +Do a dry run, showing results of processing: + + gatherstats -do + +Process all types of information for January of 2010: + + gatherstats -m 2010-01 + +Process only number of postings for the year of 2010: + + gatherstats -p 2010-01:2010-12 -t groups + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file for B. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +doc/README + +=item - + +doc/INSTALL + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/groupstats.pl b/groupstats.pl index fcacc30..1c143d2 100755 --- a/groupstats.pl +++ b/groupstats.pl @@ -1,475 +1,475 @@ -#! /usr/bin/perl -W -# -# groupstats.pl -# -# This script will get statistical data on newgroup usage -# form a database. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - push(@INC, dirname($0)); -} -use strict; - -use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); - -use DBI; - -################################# Main program ################################# - -### read commandline options -my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); - -### read configuration -my %Conf = %{ReadConfig('newsstats.conf')}; - -### override configuration via commandline options -my %ConfOverride; -$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; -&OverrideConfig(\%Conf,\%ConfOverride); - -### check for incompatible command line options -# you can't mix '-t', '-b' and '-l' -# -b/-l take preference over -t, and -b takes preference over -l -if ($Options{'b'} or $Options{'l'}) { - if ($Options{'t'}) { - # drop -t - warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); - undef($Options{'t'}); - }; - if ($Options{'b'} and $Options{'l'}) { - # drop -l - warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); - undef($Options{'l'}); - }; - # -q/-d don't work with -b or -l - warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; - warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; -}; - -### check output type -# default output type to 'dump' -$Options{'o'} = 'dump' if !$Options{'o'}; -# fail if more than one newsgroup is combined with 'dumpgroup' type -die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); -# accept 'dumpgroup' only with -n -if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { - $Options{'o'} = 'dump'; - warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); -}; -# set output type to 'pretty' for -l -if ($Options{'l'}) { - $Options{'o'} = 'pretty'; - warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); -}; - -### get time period -my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); -# reset to one month for 'dump' output type -if ($Options{'o'} eq 'dump' and $Options{'p'}) { - $StartMonth = $EndMonth; - warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); -}; - -### init database -my $DBHandle = InitDB(\%Conf,1); - -### create report -# get list of newsgroups (-n) -my ($QueryPart,@GroupList); -my $Newsgroups = $Options{'n'}; -if ($Newsgroups) { - # explode list of newsgroups for WHERE clause - ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); -} else { - # set to dummy value (always true) - $QueryPart = 1; -}; - -# manage thresholds -if (defined($Options{'t'})) { - if ($Options{'i'}) { - # -i: list groups below threshold - $QueryPart .= ' AND postings < ?'; - } else { - # default: list groups above threshold - $QueryPart .= ' AND postings > ?'; - }; - # push threshold to GroupList to match number of binding vars for DBQuery->execute - push @GroupList,$Options{'t'}; -} - -# construct WHERE clause -# $QueryPart is "list of newsgroup" (or 1), -# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) -# according to setting of -s -my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); - -# get lenght of longest newsgroup delivered by query for formatting purposes -# FIXME -my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); - -my ($OrderClause,$DBQuery); -# -b (best of / top list) defined? -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" - # according to -q and -d - $OrderClause = 'newsgroup'; - $OrderClause = 'postings' if $Options{'q'}; - $OrderClause .= ' DESC' if $Options{'d'}; - # prepare query: get number of postings per group from groups table for given months and newsgroups - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # set sorting order (-i) - if ($Options{'i'}) { - $OrderClause = 'postings'; - } else { - $OrderClause = 'postings DESC'; - }; - # push LIMIT to GroupList to match number of binding vars for DBQuery->execute - push @GroupList,$Options{'b'}; - # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT - $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); -} else { - # -l must be set now, as all other cases have been taken care of - # set sorting order (-i) - if ($Options{'i'}) { - $OrderClause = '<'; - } else { - $OrderClause = '>'; - }; - # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute - # FIXME -- together with the query (see below) - push @GroupList,$Options{'l'}; - push @GroupList,$StartMonth,$EndMonth; - # prepare query: get number of postings per group from groups table for given months and - # FIXME -- this query is ... in dire need of impromevent - $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); -}; - -# execute query -$DBQuery->execute($StartMonth,$EndMonth,@GroupList) - or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); - -# output results -# print caption (-c) with time period if -m or -p is set -# FIXME - month or period should handled differently -printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); -# print caption (-c) with newsgroup list if -n is set -printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; -# print caption (-c) with threshold if -t is set, taking -i in account -printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; -if (!defined($Options{'b'}) and !defined($Options{'l'})) { - # default: neither -b nor -l - &OutputData($Options{'o'},$DBQuery,$MaxLength); -} elsif ($Options{'b'}) { - # -b is set (then -l can't be!) - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just assign "top x" or "bottom x" instead of a month for the caption - # FIXME - print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); - }; -} else { - # -l must be set now, as all other cases have been taken care of - # we have to read in the query results ourselves, as they do not have standard layout - while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { - # we just switch $Newsgroups and $Month for output generation - # FIXME - print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); - }; -}; - -### close handles -$DBHandle->disconnect; - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -groupstats - create reports on newsgroup usage - -=head1 SYNOPSIS - -B [B<-Vhiscqd>] [B<-m> I] [B<-p> I] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-g> I] - -=head1 REQUIREMENTS - -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back - -=head1 DESCRIPTION - -This script create reports on newsgroup usage (number of postings per -group per month) taken from result tables created by -F. - -The time period to act on defaults to last month; you can assign -another month via the B<-m> switch or a time period via the B<-p> -switch; the latter takes preference. - -B will process all newsgroups by default; you can limit -that to only some newsgroups by supplying a list of those groups via -B<-n> (see below). You can include hierarchy levels in the output by -adding the B<-s> switch (see below). - -Furthermore you can set a threshold via B<-t> so that only newsgroups -with more postings per month will be included in the report. You can -invert that by the B<-i> switch so only newsgroups with less than -I postings per month will be included. - -You can sort the output by number of postings per month instead of the -default (alphabetical list of newsgroups) by using B<-q>; you can -reverse the sorting order (from highest to lowest or in reversed -alphabetical order) by using B<-d>. - -Furthermore, you can create a list of newsgroups that had consistently -more (or less) than x postings per month during the whole report -period by using B<-l> (together with B as needed). - -Last but not least you can create a "best of" list of the top x -newsgroups via B<-b> (or a "worst of" list by adding B). - -By default, B will dump a very simple alphabetical list of -newsgroups, one per line, followed by the number of postings in that -month. This output format of course cannot sensibly be combined with -time periods, so you can set the output format by using B<-o> (see -below). Captions can be added by setting the B<-c> switch. - -=head2 Configuration - -F will read its configuration from F -which should be present in the same directory via Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -You can override configuration options via the B<-g> switch. - -=head1 OPTIONS - -=over 3 - -=item B<-V> (version) - -Print out version and copyright information on B and exit. - -=item B<-h> (help) - -Print this man page and exit. - -=item B<-m> I (month) - -Set processing period to a month in YYYY-MM format. Ignored if B<-p> -is set. - -=item B<-p> I (period) - -Set processing period to a time period between two month, each in -YYYY-MM format, separated by a colon. Overrides B<-m>. - -=item B<-n> I (newsgroups) - -Limit processing to a certain set of newsgroups. I can -be a single newsgroup name (de.alt.test), a newsgroup hierarchy -(de.alt.*) or a list of either of these, separated by colons, for -example - - de.test:de.alt.test:de.newusers.* - -=item B<-t> I (threshold) - -Only include newsgroups with more than I postings per -month. Can be inverted by the B<-i> switch so that only newsgroups -with less than I postings will be included. - -This setting will be ignored if B<-l> or B<-b> is set. - -=item B<-l> I (level) - -Only include newsgroups with more than I postings per -month, every month during the whole reporting period. Can be inverted -by the B<-i> switch so that only newsgroups with less than I -postings every single month will be included. Output will be ordered -by newsgroup name, followed by month. - -This setting will be ignored if B<-b> is set. Overrides B<-t> and -can't be used together with B<-q> or B<-d>. - -=item B<-b> I (best of) - -Create a list of the I newsgroups with the most postings over the -whole reporting period. Can be inverted by the B<-i> switch so that a -list of the I newsgroups with the least postings over the whole -period is generated. Output will be ordered by sum of postings. - -Overrides B<-t> and B<-l> and can't be used together with B<-q> or -B<-d>. Output format is set to I (see below). - -=item B<-i> (invert) - -Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower -threshold or level or generate a "bottom list" instead of a top list. - -=item B<-s> (sum per hierarchy level) - -Include "virtual" groups for every hierarchy level in output, for -example: - - de.alt.ALL 10 - de.alt.test 5 - de.alt.admin 7 - -See the B man page for details. - -=item B<-o> I (output format) - -Set output format. Default is I, consisting of an alphabetical -list of newsgroups, each on a new line, followed by the number of -postings in that month. This default format can't be used with time -periods of more than one month. - -I format is like I, but will print the month in front of -the newsgroup name. - -I format can only be use with a group list (see B<-n>) of -exactly one newsgroup and is like I, but will output months, -followed by the number of postings. - -If you don't need easily parsable output, you'll mostly use I -format, which will print a header for each new month and try to align -newsgroup names and posting counts. Usage of B<-b> will force this -format. - -=item B<-c> (captions) - -Add captions to output (reporting period, newsgroups list, threshold). - -=item B<-q> (quantity of postings) - -Sort by number of postings instead of by newsgroup names. - -Cannot be used with B<-l> or B<-b>. - -=item B<-d> (descending) - -Change sort order to descending. - -Cannot be used with B<-l> or B<-b>. - -=item B<-g> I
(postings per group table) - -Override I from F. - -=back - -=head1 INSTALLATION - -See doc/INSTALL. - -=head1 EXAMPLES - -Show number of postings per group for lasth month in I format: - - groupstats - -Show that report for January of 2010 and de.alt.* plus de.test, -including display of hierarchy levels: - - groupstats -m 2010-01 -n de.alt.*:de.test -s - -Show that report for the year of 2010 in I format: - - groupstats -p 2010-01:2010-12 -o pretty - -Only show newsgroups with less than 30 postings last month, ordered -by number of postings, descending, in I format: - - groupstats -iqdt 30 -o pretty - -Show top 10 for the first half-year of of 2010 in I format: - - groupstats -p 2010-01:2010-06 -b 10 -o pretty - -Report all groups that had less than 30 postings every singele month -in the year of 2010 (I format is forced) - - groupstats -p 2010-01:2010-12 -il 30 - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file for B. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -doc/README - -=item - - -doc/INSTALL - -=item - - -gatherstats -h - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut +#! /usr/bin/perl -W +# +# groupstats.pl +# +# This script will get statistical data on newgroup usage +# form a database. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); + +use DBI; + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +&OverrideConfig(\%Conf,\%ConfOverride); + +### check for incompatible command line options +# you can't mix '-t', '-b' and '-l' +# -b/-l take preference over -t, and -b takes preference over -l +if ($Options{'b'} or $Options{'l'}) { + if ($Options{'t'}) { + # drop -t + warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n"); + undef($Options{'t'}); + }; + if ($Options{'b'} and $Options{'l'}) { + # drop -l + warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n"); + undef($Options{'l'}); + }; + # -q/-d don't work with -b or -l + warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'}; + warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'}; +}; + +### check output type +# default output type to 'dump' +$Options{'o'} = 'dump' if !$Options{'o'}; +# fail if more than one newsgroup is combined with 'dumpgroup' type +die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); +# accept 'dumpgroup' only with -n +if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { + $Options{'o'} = 'dump'; + warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); +}; +# set output type to 'pretty' for -l +if ($Options{'l'}) { + $Options{'o'} = 'pretty'; + warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n"); +}; + +### get time period +my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); +# reset to one month for 'dump' output type +if ($Options{'o'} eq 'dump' and $Options{'p'}) { + $StartMonth = $EndMonth; + warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); +}; + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### create report +# get list of newsgroups (-n) +my ($QueryPart,@GroupList); +my $Newsgroups = $Options{'n'}; +if ($Newsgroups) { + # explode list of newsgroups for WHERE clause + ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); +} else { + # set to dummy value (always true) + $QueryPart = 1; +}; + +# manage thresholds +if (defined($Options{'t'})) { + if ($Options{'i'}) { + # -i: list groups below threshold + $QueryPart .= ' AND postings < ?'; + } else { + # default: list groups above threshold + $QueryPart .= ' AND postings > ?'; + }; + # push threshold to GroupList to match number of binding vars for DBQuery->execute + push @GroupList,$Options{'t'}; +} + +# construct WHERE clause +# $QueryPart is "list of newsgroup" (or 1), +# &SQLHierarchies() takes care of the exclusion of hierarchy levels (.ALL) +# according to setting of -s +my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); + +# get lenght of longest newsgroup delivered by query for formatting purposes +# FIXME +my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); + +my ($OrderClause,$DBQuery); +# -b (best of / top list) defined? +if (!defined($Options{'b'}) and !defined($Options{'l'})) { + # default: neither -b nor -l + # set ordering (ORDER BY) to "newsgroups" or "postings", "ASC" or "DESC" + # according to -q and -d + $OrderClause = 'newsgroup'; + $OrderClause = 'postings' if $Options{'q'}; + $OrderClause .= ' DESC' if $Options{'d'}; + # prepare query: get number of postings per group from groups table for given months and newsgroups + $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); +} elsif ($Options{'b'}) { + # -b is set (then -l can't be!) + # set sorting order (-i) + if ($Options{'i'}) { + $OrderClause = 'postings'; + } else { + $OrderClause = 'postings DESC'; + }; + # push LIMIT to GroupList to match number of binding vars for DBQuery->execute + push @GroupList,$Options{'b'}; + # prepare query: get sum of postings per group from groups table for given months and newsgroups with LIMIT + $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); +} else { + # -l must be set now, as all other cases have been taken care of + # set sorting order (-i) + if ($Options{'i'}) { + $OrderClause = '<'; + } else { + $OrderClause = '>'; + }; + # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars for DBQuery->execute + # FIXME -- together with the query (see below) + push @GroupList,$Options{'l'}; + push @GroupList,$StartMonth,$EndMonth; + # prepare query: get number of postings per group from groups table for given months and + # FIXME -- this query is ... in dire need of impromevent + $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); +}; + +# execute query +$DBQuery->execute($StartMonth,$EndMonth,@GroupList) + or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); + +# output results +# print caption (-c) with time period if -m or -p is set +# FIXME - month or period should handled differently +printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); +# print caption (-c) with newsgroup list if -n is set +printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; +# print caption (-c) with threshold if -t is set, taking -i in account +printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; +if (!defined($Options{'b'}) and !defined($Options{'l'})) { + # default: neither -b nor -l + &OutputData($Options{'o'},$DBQuery,$MaxLength); +} elsif ($Options{'b'}) { + # -b is set (then -l can't be!) + # we have to read in the query results ourselves, as they do not have standard layout + while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { + # we just assign "top x" or "bottom x" instead of a month for the caption + # FIXME + print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); + }; +} else { + # -l must be set now, as all other cases have been taken care of + # we have to read in the query results ourselves, as they do not have standard layout + while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { + # we just switch $Newsgroups and $Month for output generation + # FIXME + print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); + }; +}; + +### close handles +$DBHandle->disconnect; + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +groupstats - create reports on newsgroup usage + +=head1 SYNOPSIS + +B [B<-Vhiscqd>] [B<-m> I] [B<-p> I] [B<-n> I] [B<-t> I] [B<-l> I] [B<-b> I] [B<-o> I] [B<-g> I] + +=head1 REQUIREMENTS + +See doc/README: Perl 5.8.x itself and the following modules from CPAN: + +=over 2 + +=item - + +Config::Auto + +=item - + +DBI + +=back + +=head1 DESCRIPTION + +This script create reports on newsgroup usage (number of postings per +group per month) taken from result tables created by +F. + +The time period to act on defaults to last month; you can assign +another month via the B<-m> switch or a time period via the B<-p> +switch; the latter takes preference. + +B will process all newsgroups by default; you can limit +that to only some newsgroups by supplying a list of those groups via +B<-n> (see below). You can include hierarchy levels in the output by +adding the B<-s> switch (see below). + +Furthermore you can set a threshold via B<-t> so that only newsgroups +with more postings per month will be included in the report. You can +invert that by the B<-i> switch so only newsgroups with less than +I postings per month will be included. + +You can sort the output by number of postings per month instead of the +default (alphabetical list of newsgroups) by using B<-q>; you can +reverse the sorting order (from highest to lowest or in reversed +alphabetical order) by using B<-d>. + +Furthermore, you can create a list of newsgroups that had consistently +more (or less) than x postings per month during the whole report +period by using B<-l> (together with B as needed). + +Last but not least you can create a "best of" list of the top x +newsgroups via B<-b> (or a "worst of" list by adding B). + +By default, B will dump a very simple alphabetical list of +newsgroups, one per line, followed by the number of postings in that +month. This output format of course cannot sensibly be combined with +time periods, so you can set the output format by using B<-o> (see +below). Captions can be added by setting the B<-c> switch. + +=head2 Configuration + +F will read its configuration from F +which should be present in the same directory via Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +You can override configuration options via the B<-g> switch. + +=head1 OPTIONS + +=over 3 + +=item B<-V> (version) + +Print out version and copyright information on B and exit. + +=item B<-h> (help) + +Print this man page and exit. + +=item B<-m> I (month) + +Set processing period to a month in YYYY-MM format. Ignored if B<-p> +is set. + +=item B<-p> I (period) + +Set processing period to a time period between two month, each in +YYYY-MM format, separated by a colon. Overrides B<-m>. + +=item B<-n> I (newsgroups) + +Limit processing to a certain set of newsgroups. I can +be a single newsgroup name (de.alt.test), a newsgroup hierarchy +(de.alt.*) or a list of either of these, separated by colons, for +example + + de.test:de.alt.test:de.newusers.* + +=item B<-t> I (threshold) + +Only include newsgroups with more than I postings per +month. Can be inverted by the B<-i> switch so that only newsgroups +with less than I postings will be included. + +This setting will be ignored if B<-l> or B<-b> is set. + +=item B<-l> I (level) + +Only include newsgroups with more than I postings per +month, every month during the whole reporting period. Can be inverted +by the B<-i> switch so that only newsgroups with less than I +postings every single month will be included. Output will be ordered +by newsgroup name, followed by month. + +This setting will be ignored if B<-b> is set. Overrides B<-t> and +can't be used together with B<-q> or B<-d>. + +=item B<-b> I (best of) + +Create a list of the I newsgroups with the most postings over the +whole reporting period. Can be inverted by the B<-i> switch so that a +list of the I newsgroups with the least postings over the whole +period is generated. Output will be ordered by sum of postings. + +Overrides B<-t> and B<-l> and can't be used together with B<-q> or +B<-d>. Output format is set to I (see below). + +=item B<-i> (invert) + +Used in conjunction with B<-t>, B<-l> or B<-b> to set a lower +threshold or level or generate a "bottom list" instead of a top list. + +=item B<-s> (sum per hierarchy level) + +Include "virtual" groups for every hierarchy level in output, for +example: + + de.alt.ALL 10 + de.alt.test 5 + de.alt.admin 7 + +See the B man page for details. + +=item B<-o> I (output format) + +Set output format. Default is I, consisting of an alphabetical +list of newsgroups, each on a new line, followed by the number of +postings in that month. This default format can't be used with time +periods of more than one month. + +I format is like I, but will print the month in front of +the newsgroup name. + +I format can only be use with a group list (see B<-n>) of +exactly one newsgroup and is like I, but will output months, +followed by the number of postings. + +If you don't need easily parsable output, you'll mostly use I +format, which will print a header for each new month and try to align +newsgroup names and posting counts. Usage of B<-b> will force this +format. + +=item B<-c> (captions) + +Add captions to output (reporting period, newsgroups list, threshold). + +=item B<-q> (quantity of postings) + +Sort by number of postings instead of by newsgroup names. + +Cannot be used with B<-l> or B<-b>. + +=item B<-d> (descending) + +Change sort order to descending. + +Cannot be used with B<-l> or B<-b>. + +=item B<-g> I
(postings per group table) + +Override I from F. + +=back + +=head1 INSTALLATION + +See doc/INSTALL. + +=head1 EXAMPLES + +Show number of postings per group for lasth month in I format: + + groupstats + +Show that report for January of 2010 and de.alt.* plus de.test, +including display of hierarchy levels: + + groupstats -m 2010-01 -n de.alt.*:de.test -s + +Show that report for the year of 2010 in I format: + + groupstats -p 2010-01:2010-12 -o pretty + +Only show newsgroups with less than 30 postings last month, ordered +by number of postings, descending, in I format: + + groupstats -iqdt 30 -o pretty + +Show top 10 for the first half-year of of 2010 in I format: + + groupstats -p 2010-01:2010-06 -b 10 -o pretty + +Report all groups that had less than 30 postings every singele month +in the year of 2010 (I format is forced) + + groupstats -p 2010-01:2010-12 -il 30 + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file for B. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +doc/README + +=item - + +doc/INSTALL + +=item - + +gatherstats -h + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut diff --git a/install/install.pl b/install/install.pl index 1417e5f..d45d911 100755 --- a/install/install.pl +++ b/install/install.pl @@ -1,257 +1,257 @@ -#! /usr/bin/perl -W -# -# install.pl -# -# This script will create database tables as necessary. -# -# It is part of the NewsStats package. -# -# Copyright (c) 2010 Thomas Hochstein -# -# It can be redistributed and/or modified under the same terms under -# which Perl itself is published. - -BEGIN { - our $VERSION = "0.01"; - use File::Basename; - # we're in .../install, so our module is in .. - push(@INC, dirname($0).'/..'); -} -use strict; - -use NewsStats qw(:DEFAULT); - -use Cwd; - -use DBI; - -################################# Main program ################################# - -### read commandline options -my %Options = &ReadOptions(''); - -### change working directory to .. (as we're in .../install) -chdir dirname($0).'/..'; - -### read configuration -print("Reading configuration.\n"); -my %Conf = %{ReadConfig('newsstats.conf')}; - -##### -------------------------------------------------------------------------- -##### Database table definitions -##### -------------------------------------------------------------------------- - -my %DBCreate = ('DBTableRaw' => < <table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; - -# check for tables and create them, if they don't exist yet -foreach my $Table (keys %DBCreate) { - if (defined($TablesInDB{$Conf{$Table}})) { - printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); - next; - }; - my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); - $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); - printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); -}; - -# close handle -$DBHandle->disconnect; -print "Database table generation done.\n"; - -### output information on other necessary steps -my $Path = cwd(); -print < -TODO - -__END__ - -################################ Documentation ################################# - -=head1 NAME - -install - installation script - -=head1 SYNOPSIS - -B [B<-Vh>] - -=head1 REQUIREMENTS - -See doc/README: Perl 5.8.x itself and the following modules from CPAN: - -=over 2 - -=item - - -Config::Auto - -=item - - -DBI - -=back - -=head1 DESCRIPTION - -This script will create database tables as necessary and configured. - -=head2 Configuration - -F will read its configuration from F via -Config::Auto. - -See doc/INSTALL for an overview of possible configuration options. - -=head1 OPTIONS - -=over 3 - -=item B<-V> (version) - -Print out version and copyright information on B and exit. - -=item B<-h> (help) - -Print this man page and exit. - -=back - -=head1 FILES - -=over 4 - -=item F - -The script itself. - -=item F - -Library functions for the NewsStats package. - -=item F - -Runtime configuration file for B. - -=back - -=head1 BUGS - -Please report any bugs or feature requests to the author or use the -bug tracker at L! - -=head1 SEE ALSO - -=over 2 - -=item - - -doc/README - -=item - - -doc/INSTALL - -=back - -This script is part of the B package. - -=head1 AUTHOR - -Thomas Hochstein - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2010 Thomas Hochstein - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut +#! /usr/bin/perl -W +# +# install.pl +# +# This script will create database tables as necessary. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../install, so our module is in .. + push(@INC, dirname($0).'/..'); +} +use strict; + +use NewsStats qw(:DEFAULT); + +use Cwd; + +use DBI; + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions(''); + +### change working directory to .. (as we're in .../install) +chdir dirname($0).'/..'; + +### read configuration +print("Reading configuration.\n"); +my %Conf = %{ReadConfig('newsstats.conf')}; + +##### -------------------------------------------------------------------------- +##### Database table definitions +##### -------------------------------------------------------------------------- + +my %DBCreate = ('DBTableRaw' => < <table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; + +# check for tables and create them, if they don't exist yet +foreach my $Table (keys %DBCreate) { + if (defined($TablesInDB{$Conf{$Table}})) { + printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); + next; + }; + my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); + $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); + printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); +}; + +# close handle +$DBHandle->disconnect; +print "Database table generation done.\n"; + +### output information on other necessary steps +my $Path = cwd(); +print < +TODO + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +install - installation script + +=head1 SYNOPSIS + +B [B<-Vh>] + +=head1 REQUIREMENTS + +See doc/README: Perl 5.8.x itself and the following modules from CPAN: + +=over 2 + +=item - + +Config::Auto + +=item - + +DBI + +=back + +=head1 DESCRIPTION + +This script will create database tables as necessary and configured. + +=head2 Configuration + +F will read its configuration from F via +Config::Auto. + +See doc/INSTALL for an overview of possible configuration options. + +=head1 OPTIONS + +=over 3 + +=item B<-V> (version) + +Print out version and copyright information on B and exit. + +=item B<-h> (help) + +Print this man page and exit. + +=back + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file for B. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +doc/README + +=item - + +doc/INSTALL + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut