-#! /usr/bin/perl -W\r
-#\r
-# feedlog.pl\r
-#\r
-# This script will log headers and other data to a database\r
-# for further analysis by parsing a feed from INN.\r
-# \r
-# It is part of the NewsStats package.\r
-#\r
-# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
-#\r
-# It can be redistributed and/or modified under the same terms under \r
-# which Perl itself is published.\r
-\r
-BEGIN {\r
- our $VERSION = "0.01";\r
- use File::Basename;\r
- push(@INC, dirname($0));\r
-}\r
-use strict;\r
-\r
-use NewsStats;\r
-\r
-use Sys::Syslog qw(:standard :macros);\r
-\r
-use Date::Format;\r
-use DBI;\r
-\r
-################################# Main program #################################\r
-\r
-### read commandline options\r
-my %Options = &ReadOptions('qd');\r
-\r
-### read configuration\r
-my %Conf = %{ReadConfig('newsstats.conf')};\r
-\r
-### init syslog\r
-openlog($MySelf, 'nofatal,pid', LOG_NEWS);\r
-syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'};\r
-\r
-### init database\r
-my $DBHandle = InitDB(\%Conf,0);\r
-if (!$DBHandle) {\r
- syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);\r
- while (1) {}; # go into endless loop to suppress further errors and respawning\r
-};\r
-my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r
-\r
-### main loop\r
-while (<>) {\r
- chomp;\r
- # catch empty lines trailing or leading\r
- if ($_ eq '') {\r
- next;\r
- }\r
- # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups\r
- my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;\r
- # remaining lines contain headers\r
- my $Headers = "";\r
- while (<>) {\r
- chomp;\r
- # empty line terminates this article\r
- if ($_ eq '') {\r
- last;\r
- }\r
- # collect headers\r
- $Headers .= $_."\n" ;\r
- }\r
-\r
- # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp\r
- my $Day = time2str("%Y-%m-%d", $Timestamp);\r
- my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);\r
-\r
- # write to database\r
- if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) {\r
- syslog(LOG_ERR, 'Database error: %s', $DBI::errstr);\r
- };\r
- $DBQuery->finish;\r
- \r
- warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'};\r
-}\r
-\r
-### close handles\r
-$DBHandle->disconnect;\r
-syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'};\r
-closelog();\r
-\r
+#! /usr/bin/perl
+#
+# feedlog.pl
+#
+# This script will log headers and other data to a database
+# for further analysis by parsing a feed from INN.
+#
+# It is part of the NewsStats package.
+#
+# Copyright (c) 2010-2013 Thomas Hochstein <thh@inter.net>
+#
+# It can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+BEGIN {
+ our $VERSION = "0.01";
+ use File::Basename;
+ push(@INC, dirname($0));
+}
+use strict;
+use warnings;
+
+use NewsStats;
+
+use Sys::Syslog qw(:standard :macros);
+
+use Date::Format;
+use DBI;
+use Getopt::Long qw(GetOptions);
+Getopt::Long::config ('bundling');
+
+################################# Subroutines ##################################
+
+sub PrepareDB {
+### initialise database connection, prepare statement
+### and catch errors
+### IN : \%Conf : reference to configuration hash
+### OUT: $DBHandle: database handle
+### $DBQuery : prepared statement
+ our ($DBHandle, $DBQuery, $OptQuiet);
+ my ($ConfigR) = @_;
+ my %Conf = %$ConfigR;
+ # drop current database connection - hard, if necessary
+ if ($DBHandle) {
+ $DBHandle->disconnect;
+ undef $DBHandle;
+ };
+ # connect to database; try again every 5 seconds
+ while (!$DBHandle) {
+ $DBHandle = InitDB($ConfigR,0);
+ if (!$DBHandle) {
+ syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);
+ sleep(5);
+ } else {;
+ syslog(LOG_NOTICE, "Database connection (re-)established successfully.") if !$OptQuiet;
+ }
+ };
+ $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,
+ timestamp,token,size,peer,path,
+ newsgroups,headers)
+ VALUES (?,?,?,?,?,?,?,?,?,?)",
+ $Conf{'DBDatabase'},
+ $Conf{'DBTableRaw'}));
+ return ($DBHandle,$DBQuery);
+}
+
+
+################################# Main program #################################
+
+### read commandline options
+my ($OptDebug,$OptQuiet);
+GetOptions ('d|debug!' => \$OptDebug,
+ 'q|test!' => \$OptQuiet,
+ 'h|help' => \&ShowPOD,
+ 'V|version' => \&ShowVersion) or exit 1;
+
+### read configuration
+my %Conf = %{ReadConfig($HomePath.'/newsstats.conf')};
+
+### init syslog
+openlog($0, 'nofatal,pid', LOG_NEWS);
+syslog(LOG_NOTICE, "$MyVersion starting up.") if !$OptQuiet;
+
+### init database
+my ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
+
+### main loop
+while (<>) {
+ chomp;
+ # catch empty lines trailing or leading
+ if ($_ eq '') {
+ next;
+ }
+ # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups
+ my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;
+ # remaining lines contain headers
+ my $Headers = "";
+ while (<>) {
+ chomp;
+ # empty line terminates this article
+ if ($_ eq '') {
+ last;
+ }
+ # collect headers
+ $Headers .= $_."\n" ;
+ }
+
+ # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp
+ my $Day = time2str("%Y-%m-%d", $Timestamp);
+ my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);
+
+ # write to database
+ if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
+ $Path, $Newsgroups, $Headers)) {
+ syslog(LOG_ERR, 'Database error %s while processing %s: %s',
+ $DBI::err, $Mid, $DBI::errstr);
+ # if "MySQL server has gone away", try to recover
+ if ($DBI::err == 2006) {
+ # try to reconnect to database
+ ($DBHandle,$DBQuery) = PrepareDB(\%Conf);
+ # try to repeat the write attempt as before
+ if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer,
+ $Path, $Newsgroups, $Headers)) {
+ syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
+ };
+ # otherwise log missing posting
+ } else {
+ syslog(LOG_ERR, '%s was dropped and lost.',$Mid);
+ };
+ };
+ $DBQuery->finish;
+
+ warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\n".
+ "Size: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",
+ $Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path,
+ $Newsgroups, $Headers) if $OptDebug;
+}
+
+### close handles
+$DBHandle->disconnect;
+syslog(LOG_NOTICE, "$0 closing down.") if !$OptQuiet;
+closelog();
+
+__END__
+
+################################ Documentation #################################
+
+=head1 NAME
+
+feedlog - log data from an INN feed to a database
+
+=head1 SYNOPSIS
+
+B<feedlog> [B<-Vhdq>]
+
+=head1 REQUIREMENTS
+
+See L<doc/README>.
+
+=head1 DESCRIPTION
+
+This script will log overview data and complete headers to a database
+table for further examination by parsing a feed from INN. It will
+parse that information and write it to a mysql database table in real
+time.
+
+All reporting is done to I<syslog> via I<news> facility. If B<feedlog>
+fails to initiate a database connection at startup, it will log to
+I<syslog> with I<CRIT> priority and go in an endless loop, as
+terminating would only result in a rapid respawn.
+
+=head2 Configuration
+
+B<feedlog> will read its configuration from F<newsstats.conf> which
+should be present in the same directory via Config::Auto.
+
+See L<doc/INSTALL> for an overview of possible configuration options.
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-V>, B<--version>
+
+Print out version and copyright information and exit.
+
+=item B<-h>, B<--help>
+
+Print this man page and exit.
+
+=item B<-d>, B<--debug>
+
+Output debugging information to STDERR while parsing STDIN. You'll
+find that information most probably in your B<INN> F<errlog> file.
+
+=item B<-q>, B<--quiet>
+
+Suppress logging to syslog.
+
+=back
+
+=head1 INSTALLATION
+
+See L<doc/INSTALL>.
+
+=head1 EXAMPLES
+
+Set up a feed like that in your B<INN> F<newsfeeds> file:
+
+ ## gather statistics for NewsStats
+ newsstats!
+ :!*,de.*
+ :Tc,WmtfbsPNH,Ac:/path/to/feedlog.pl
+
+See L<doc/INSTALL> for further information.
+
+=head1 FILES
+
+=over 4
+
+=item F<feedlog.pl>
+
+The script itself.
+
+=item F<NewsStats.pm>
+
+Library functions for the NewsStats package.
+
+=item F<newsstats.conf>
+
+Runtime configuration file.
+
+=back
+
+=head1 BUGS
+
+Please report any bugs or feature requests to the author or use the
+bug tracker at L<http://bugs.th-h.de/>!
+
+=head1 SEE ALSO
+
+=over 2
+
+=item -
+
+L<doc/README>
+
+=item -
+
+L<doc/INSTALL>
+
+=back
+
+This script is part of the B<NewsStats> package.
+
+=head1 AUTHOR
+
+Thomas Hochstein <thh@inter.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2010-2012 Thomas Hochstein <thh@inter.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut