X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=bin%2Ffeedlog.pl;fp=bin%2Ffeedlog.pl;h=bbfd8142fd60adb31bfa3e75831fb11d3a2f542c;hp=0000000000000000000000000000000000000000;hb=1af57a53905788c99a088bb6b17088293c0224d1;hpb=07c0b2589af779c33d5d35b6a7fa0e7883201674 diff --git a/bin/feedlog.pl b/bin/feedlog.pl new file mode 100755 index 0000000..bbfd814 --- /dev/null +++ b/bin/feedlog.pl @@ -0,0 +1,273 @@ +#! /usr/bin/perl +# +# feedlog.pl +# +# This script will log headers and other data to a database +# for further analysis by parsing a feed from INN. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010-2013 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../bin, so our module is in ../lib + push(@INC, dirname($0).'/../lib'); +} +use strict; +use warnings; + +use NewsStats; + +use Sys::Syslog qw(:standard :macros); + +use Date::Format; +use DBI; +use Getopt::Long qw(GetOptions); +Getopt::Long::config ('bundling'); + +################################# Subroutines ################################## + +sub PrepareDB { +### initialise database connection, prepare statement +### and catch errors +### IN : \%Conf : reference to configuration hash +### OUT: $DBHandle: database handle +### $DBQuery : prepared statement + our ($DBHandle, $DBQuery, $OptQuiet); + my ($ConfigR) = @_; + my %Conf = %$ConfigR; + # drop current database connection - hard, if necessary + if ($DBHandle) { + $DBHandle->disconnect; + undef $DBHandle; + }; + # connect to database; try again every 5 seconds + while (!$DBHandle) { + $DBHandle = InitDB($ConfigR,0); + if (!$DBHandle) { + syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); + sleep(5); + } else {; + syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet; + } + }; + $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid, + timestamp,token,size,peer,path, + newsgroups,headers) + VALUES (?,?,?,?,?,?,?,?,?,?)", + $Conf{'DBDatabase'}, + $Conf{'DBTableRaw'})); + return ($DBHandle,$DBQuery); +} + + +################################# Main program ################################# + +### read commandline options +my ($OptDebug,$OptQuiet,$OptConfFile); +GetOptions ('d|debug!' => \$OptDebug, + 'q|test!' => \$OptQuiet, + 'conffile=s' => \$OptConfFile, + 'h|help' => \&ShowPOD, + 'V|version' => \&ShowVersion) or exit 1; + +### read configuration +my %Conf = %{ReadConfig($OptConfFile)}; + +### init syslog +openlog($0, 'nofatal,pid', LOG_NEWS); +syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet; + +### init database +my ($DBHandle,$DBQuery) = PrepareDB(\%Conf); + +### main loop +while (<>) { + chomp; + # catch empty lines trailing or leading + if ($_ eq '') { + next; + } + # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups + my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; + # remaining lines contain headers + my $Headers = ""; + while (<>) { + chomp; + # empty line terminates this article + if ($_ eq '') { + last; + } + # collect headers + $Headers .= $_."\n" ; + } + + # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp + my $Day = time2str("%Y-%m-%d", $Timestamp); + my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); + + # write to database + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, 'Database error %s while processing %s: %s', + $DBI::err, $Mid, $DBI::errstr); + # if "MySQL server has gone away", try to recover + if ($DBI::err == 2006) { + # try to reconnect to database + ($DBHandle,$DBQuery) = PrepareDB(\%Conf); + # try to repeat the write attempt as before + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, + $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; + # otherwise log missing posting + } else { + syslog(LOG_ERR, '%s was dropped and lost.',$Mid); + }; + }; + $DBQuery->finish; + + warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n". + "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n", + $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, + $Newsgroups, $Headers) if $OptDebug; +} + +### close handles +$DBHandle->disconnect; +syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet; +closelog(); + +__END__ + +################################ Documentation ################################# + +=head1 NAME + +feedlog - log data from an INN feed to a database + +=head1 SYNOPSIS + +B [B<-Vhdq>] [--conffile I] + +=head1 REQUIREMENTS + +See L. + +=head1 DESCRIPTION + +This script will log overview data and complete headers to a database +table for further examination by parsing a feed from INN. It will +parse that information and write it to a mysql database table in real +time. + +All reporting is done to I via I facility. If B +fails to initiate a database connection at startup, it will log to +I with I priority and go in an endless loop, as +terminating would only result in a rapid respawn. + +=head2 Configuration + +B will read its configuration from F which +should be present in the same directory via Config::Auto. + +See L for an overview of possible configuration options. + +=head1 OPTIONS + +=over 3 + +=item B<-V>, B<--version> + +Print out version and copyright information and exit. + +=item B<-h>, B<--help> + +Print this man page and exit. + +=item B<-d>, B<--debug> + +Output debugging information to STDERR while parsing STDIN. You'll +find that information most probably in your B F file. + +=item B<-q>, B<--quiet> + +Suppress logging to syslog. + +=item B<--conffile> I + +Load configuration from I instead of F. + +=back + +=head1 INSTALLATION + +See L. + +=head1 EXAMPLES + +Set up a feed like that in your B F file: + + ## gather statistics for NewsStats + newsstats! + :!*,de.* + :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl + +See L for further information. + +=head1 FILES + +=over 4 + +=item F + +The script itself. + +=item F + +Library functions for the NewsStats package. + +=item F + +Runtime configuration file. + +=back + +=head1 BUGS + +Please report any bugs or feature requests to the author or use the +bug tracker at L! + +=head1 SEE ALSO + +=over 2 + +=item - + +L + +=item - + +L + +=back + +This script is part of the B package. + +=head1 AUTHOR + +Thomas Hochstein + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2010-2012 Thomas Hochstein + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut