--- /dev/null
+# NewsStats.pm
+#
+# Library functions for the NewsStats package.
+#
+# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
+#
+# This module can be redistributed and/or modified under the same terms under
+# which Perl itself is published.
+
+package NewsStats;
+
+use strict;
+use warnings;
+our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(
+ $MySelf
+ $MyVersion
+ ReadOptions
+ ReadConfig
+ OverrideConfig
+ InitDB
+);
+@EXPORT_OK = qw(
+ GetTimePeriod
+ LastMonth
+ CheckMonth
+ SplitPeriod
+ ListMonth
+ ListNewsgroups
+ OutputData
+ FormatOutput
+ SQLHierarchies
+ SQLGroupList
+ GetMaxLenght
+);
+%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)],
+ Output => [qw(OutputData FormatOutput)],
+ SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]);
+$VERSION = '0.1';
+
+use Data::Dumper;
+use File::Basename;
+use Getopt::Std;
+
+use Config::Auto;
+use DBI;
+
+#####-------------------------------- Vars --------------------------------#####
+
+our $MySelf = fileparse($0, '.pl');
+our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)";
+
+#####------------------------------- Basics -------------------------------#####
+
+################################################################################
+sub ReadOptions {
+################################################################################
+### read commandline options and act on standard options
+### IN : $Params: containing list of commandline paramaters (without -h and -V)
+### OUT: a hash containing the commandline options
+ $Getopt::Std::STANDARD_HELP_VERSION = 1;
+
+ my ($Params) = @_;
+ my %Options;
+
+ getopts('Vh'.$Params, \%Options);
+
+ # -V: display version
+ &ShowVersion if ($Options{'V'});
+
+ # -h: feed myself to perldoc
+ &ShowPOD if ($Options{'h'});
+
+ return %Options;
+};
+################################################################################
+
+################################################################################
+sub ShowVersion {
+################################################################################
+### display version and exit
+ print "$MyVersion\nCopyright (c) 2010 Thomas Hochstein <thh\@inter.net>\n";
+ print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n";
+ exit(100);
+};
+################################################################################
+
+################################################################################
+sub ShowPOD {
+################################################################################
+### feed myself to perldoc and exit
+ exec('perldoc', $0);
+ exit(100);
+};
+################################################################################
+
+################################################################################
+sub ReadConfig {
+################################################################################
+### read config via Config::Auto
+### IN : $ConfFile: config filename
+### OUT: reference to a hash containing the configuration
+ my ($ConfFile) = @_;
+ return Config::Auto::parse($ConfFile, format => 'equal');
+};
+################################################################################
+
+################################################################################
+sub OverrideConfig {
+################################################################################
+### override configuration values
+### IN : $ConfigR : reference to configuration hash
+### $OverrideR: reference to a hash containing overrides
+ my ($ConfigR,$OverrideR) = @_;
+ my %Override = %$OverrideR;
+ warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1);
+ return if (keys %Override < 1 or keys %$ConfigR < 1);
+ foreach my $Key (keys %Override) {
+ $$ConfigR{$Key} = $Override{$Key};
+ };
+};
+################################################################################
+
+################################################################################
+sub InitDB {
+################################################################################
+### initialise database connection
+### IN : $ConfigR: reference to configuration hash
+### $Die : if TRUE, die if connection failed
+### OUT: DBHandle
+ my ($ConfigR,$Die) = @_;
+ my %Conf = %$ConfigR;
+ my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 });
+ if (!$DBHandle) {
+ die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die);
+ warn("$MySelf: W: $DBI::errstr\n");
+ };
+ return $DBHandle;
+};
+################################################################################
+
+#####------------------------------ GetStats ------------------------------#####
+
+################################################################################
+sub ListNewsgroups {
+################################################################################
+### count each newsgroup and each hierarchy level, but only once
+### IN : $Newsgroups: a list of newsgroups (content of Newsgroups:)
+### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys
+ my ($Newsgroups) = @_;
+ my %Newsgroups;
+ chomp($Newsgroups);
+ # remove whitespace from contents of Newsgroups:
+ $Newsgroups =~ s/\s//;
+ # call &HierarchyCount for each newsgroup in $Newsgroups:
+ for (split /,/, $Newsgroups) {
+ # add original newsgroup to %Newsgroups
+ $Newsgroups{$_} = 1;
+ # add all hierarchy elements to %Newsgroups, amended by '.ALL',
+ # i.e. de.alt.ALL and de.ALL
+ foreach (ParseHierarchies($_)) {
+ $Newsgroups{$_.'.ALL'} = 1;
+ }
+ };
+ return %Newsgroups;
+};
+
+################################################################################
+sub ParseHierarchies {
+################################################################################
+### get all hierarchies a newsgroup belongs to
+### IN : $Newsgroup : a newsgroup name
+### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to
+ my ($Newsgroup) = @_;
+ my @Hierarchies;
+ # strip trailing dots
+ $Newsgroup =~ s/(.+)\.+$/$1/;
+ # butcher newsgroup name by "." and add each hierarchy to @Hierarchies
+ # i.e. de.alt.test: "de.alt" and "de"
+ while ($Newsgroup =~ /\./) {
+ $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/;
+ push @Hierarchies, $Newsgroup;
+ };
+ return @Hierarchies;
+};
+
+################################################################################
+
+#####----------------------------- TimePeriods ----------------------------#####
+
+################################################################################
+sub GetTimePeriod {
+################################################################################
+### get time period using -m / -p
+### IN : $Month,$Period: contents of -m and -p
+### OUT: $StartMonth, $EndMonth
+ my ($Month,$Period) = @_;
+ # exit if -m is set and not like YYYY-MM
+ die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month);
+ # default: set -m to last month
+ $Month = &LastMonth if (!defined($Month) and !defined($Period));
+ # set $StartMonth, $EndMonth
+ my ($StartMonth, $EndMonth);
+ if ($Period) {
+ # -p: get date range
+ ($StartMonth, $EndMonth) = &SplitPeriod($Period);
+ die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth);
+ } else {
+ # set $StartMonth = $EndMonth = $Month if -p is not set
+ $StartMonth = $EndMonth = $Month;
+ };
+ return ($StartMonth, $EndMonth);
+};
+
+################################################################################
+sub LastMonth {
+################################################################################
+### get last month from today in YYYY-MM format
+### OUT: last month as YYYY-MM
+ # get today's date
+ my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time);
+ # $Month is already defined from 0 to 11, so no need to decrease it by 1
+ $Year += 1900;
+ if ($Month < 1) {
+ $Month = 12;
+ $Year--;
+ };
+ # return last month
+ return sprintf('%4d-%02d',$Year,$Month);
+};
+
+################################################################################
+sub CheckMonth {
+################################################################################
+### check for valid month
+### IN : $Month: month
+### OUT: TRUE / FALSE
+ my ($Month) = @_;
+ return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/);
+ return 1;
+};
+
+################################################################################
+sub SplitPeriod {
+################################################################################
+### split a time period YYYY-MM:YYYY-MM into start and end month
+### IN : $Period: time period
+### OUT: $StartMonth, Â$EndMonth
+ my ($Period) = @_;
+ return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/;
+ my ($StartMonth, $EndMonth) = split /:/, $Period;
+ # switch parameters as necessary
+ if ($EndMonth gt $StartMonth) {
+ return ($StartMonth, $EndMonth);
+ } else {
+ return ($EndMonth, $StartMonth);
+ };
+};
+
+################################################################################
+sub ListMonth {
+################################################################################
+### return a list of month (YYYY-MM) between start and end month
+### IN : $StartMonth, $EndMonth
+### OUT: @Months: array containing all months from $StartMonth to $EndMonth
+ my ($StartMonth, $EndMonth) = @_;
+ return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/);
+ # return if $StartMonth = $EndMonth
+ return ($StartMonth) if ($StartMonth eq $EndMonth);
+ # set $Year, $Month from $StartMonth
+ my ($Year, $Month) = split /-/, $StartMonth;
+ # define @Months
+ my (@Months);
+ until ("$Year-$Month" gt $EndMonth) {
+ push @Months, "$Year-$Month";
+ $Month = "$Month"; # force string context
+ $Month++;
+ if ($Month > 12) {
+ $Month = '01';
+ $Year++;
+ };
+ };
+ return @Months;
+};
+
+#####---------------------------- OutputFormats ---------------------------#####
+
+################################################################################
+sub OutputData {
+################################################################################
+### output information with formatting from DBHandle
+### IN : $Format : format specifier
+### $DBQuery: database query handle with executed query,
+### containing $Month, $Key, $Value
+### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
+### OUT: $Output: formatted output
+ my ($Format, $DBQuery,$PadGroup) = @_;
+ while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
+ print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup);
+ };
+};
+
+################################################################################
+sub FormatOutput {
+################################################################################
+### format information for output
+### IN : $Format : format specifier
+### $PadGroup: padding length for newsgroups field (optional) for 'pretty'
+### $Month : month (as YYYY-MM)
+### $Key : newsgroup, client, ...
+### $Value : number of postings with that attribute
+### OUT: $Output: formatted output
+ my ($Format, $Month, $Key, $Value, $PadGroup) = @_;
+
+ # define output types
+ my %LegalOutput;
+ @LegalOutput{('dump','dumpgroup','list','pretty')} = ();
+ # bail out if format is unknown
+ die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format});
+
+ my ($Output);
+ our ($LastIteration);
+ if ($Format eq 'dump') {
+ # output as dump (ng nnnnn)
+ $Output = sprintf ("%s %u\n",$Key,$Value);
+ } elsif ($Format eq 'dumpgroup') {
+ # output as dump (YYYY-NN: nnnnn)
+ $Output = sprintf ("%s: %5u\n",$Month,$Value);
+ } elsif ($Format eq 'list') {
+ # output as list (YYYY-NN: ng nnnnn)
+ $Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value);
+ } elsif ($Format eq 'pretty') {
+ # output as table
+ $Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration);
+ $LastIteration = $Month;
+ $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value);
+ };
+ return $Output;
+};
+
+#####------------------------- QueryModifications -------------------------#####
+
+################################################################################
+sub SQLHierarchies {
+################################################################################
+### amend WHERE clause to include hierarchies
+### IN : $ShowHierarchies: boolean value
+### OUT: SQL code
+ my ($ShowHierarchies) = @_;
+ return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'";
+};
+
+################################################################################
+sub GetMaxLenght {
+################################################################################
+### get length of longest field in query
+### IN : $DBHandle : database handel
+### $Table : table to query
+### $Field : field to check
+### $WhereClause: WHERE clause
+### @BindVars : bind variables for WHERE clause
+### OUT: $Length: length of longest instnace of $Field
+ my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_;
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause));
+ $DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table);
+ my ($Length) = $DBQuery->fetchrow_array;
+ return $Length;
+};
+
+################################################################################
+sub SQLGroupList {
+################################################################################
+### create part of WHERE clause for list of newsgroups separated by :
+### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*)
+### OUT: SQL code, list of newsgroups
+ my ($Newsgroups) = @_;
+ $Newsgroups =~ s/\*/%/g;
+ return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/;
+ my $SQL = '(';
+ my @GroupList = split /:/, $Newsgroups;
+ foreach (@GroupList) {
+ $SQL .= ' OR ' if $SQL gt '(';
+ $SQL .= 'newsgroup LIKE ?';
+ };
+ $SQL .= ')';
+ return ($SQL,@GroupList);
+};
+
+#####------------------------------- done ---------------------------------#####
+1;
+
+
--- /dev/null
+#! /usr/bin/perl -W\r
+#\r
+# feedlog.pl\r
+#\r
+# This script will log headers and other data to a database\r
+# for further analysis by parsing a feed from INN.\r
+# \r
+# It is part of the NewsStats package.\r
+#\r
+# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
+#\r
+# It can be redistributed and/or modified under the same terms under \r
+# which Perl itself is published.\r
+\r
+BEGIN {\r
+ our $VERSION = "0.01";\r
+ use File::Basename;\r
+ push(@INC, dirname($0));\r
+}\r
+use strict;\r
+\r
+use NewsStats;\r
+\r
+use Sys::Syslog qw(:standard :macros);\r
+\r
+use Date::Format;\r
+use DBI;\r
+\r
+################################# Main program #################################\r
+\r
+### read commandline options\r
+my %Options = &ReadOptions('qd');\r
+\r
+### read configuration\r
+my %Conf = %{ReadConfig('newsstats.conf')};\r
+\r
+### init syslog\r
+openlog($MySelf, 'nofatal,pid', LOG_NEWS);\r
+syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'};\r
+\r
+### init database\r
+my $DBHandle = InitDB(\%Conf,0);\r
+if (!$DBHandle) {\r
+ syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr);\r
+ while (1) {}; # go into endless loop to suppress further errors and respawning\r
+};\r
+my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r
+\r
+### main loop\r
+while (<>) {\r
+ chomp;\r
+ # catch empty lines trailing or leading\r
+ if ($_ eq '') {\r
+ next;\r
+ }\r
+ # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups\r
+ my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split;\r
+ # remaining lines contain headers\r
+ my $Headers = "";\r
+ while (<>) {\r
+ chomp;\r
+ # empty line terminates this article\r
+ if ($_ eq '') {\r
+ last;\r
+ }\r
+ # collect headers\r
+ $Headers .= $_."\n" ;\r
+ }\r
+\r
+ # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp\r
+ my $Day = time2str("%Y-%m-%d", $Timestamp);\r
+ my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp);\r
+\r
+ # write to database\r
+ if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) {\r
+ syslog(LOG_ERR, 'Database error: %s', $DBI::errstr);\r
+ };\r
+ $DBQuery->finish;\r
+ \r
+ warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'};\r
+}\r
+\r
+### close handles\r
+$DBHandle->disconnect;\r
+syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'};\r
+closelog();\r
+\r
--- /dev/null
+#! /usr/bin/perl -W\r
+#\r
+# gatherstats.pl\r
+#\r
+# This script will gather statistical information from a database\r
+# containing headers and other information from a INN feed.\r
+# \r
+# It is part of the NewsStats package.\r
+#\r
+# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
+#\r
+# It can be redistributed and/or modified under the same terms under \r
+# which Perl itself is published.\r
+\r
+BEGIN {\r
+ our $VERSION = "0.01";\r
+ use File::Basename;\r
+ push(@INC, dirname($0));\r
+}\r
+use strict;\r
+\r
+use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups);\r
+\r
+use DBI;\r
+\r
+################################# Definitions ##################################\r
+\r
+# define types of information that can be gathered\r
+# all / groups (/ clients / hosts)\r
+my %LegalTypes;\r
+@LegalTypes{('all','groups')} = ();\r
+\r
+################################# Main program #################################\r
+\r
+### read commandline options\r
+my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:');\r
+\r
+### read configuration\r
+my %Conf = %{ReadConfig('newsstats.conf')};\r
+\r
+### override configuration via commandline options\r
+my %ConfOverride;\r
+$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'};\r
+$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r
+$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'};\r
+$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'};\r
+$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'};\r
+&OverrideConfig(\%Conf,\%ConfOverride);\r
+\r
+### get type of information to gather, default to 'all'\r
+$Options{'t'} = 'all' if !$Options{'t'};\r
+die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}});\r
+\r
+### get time period\r
+my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r
+\r
+### init database\r
+my $DBHandle = InitDB(\%Conf,1);\r
+\r
+### get data for each month\r
+warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'};\r
+foreach my $Month (&ListMonth($StartMonth,$EndMonth)) {\r
+\r
+ print "---------- $Month ----------\n" if $Options{'d'};\r
+\r
+ if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') {\r
+ ### ----------------------------------------------\r
+ ### get groups data (number of postings per group)\r
+ # get groups data from raw table for given month\r
+ my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'}));\r
+ $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'});\r
+\r
+ # count postings per group\r
+ my %Postings;\r
+\r
+ while (($_) = $DBQuery->fetchrow_array) {\r
+ # get list oft newsgroups and hierarchies from Newsgroups:\r
+ my %Newsgroups = ListNewsgroups($_);\r
+ # count each newsgroup and hierarchy once\r
+ foreach (sort keys %Newsgroups) {\r
+ # don't count newsgroup/hierarchy in wrong TLH\r
+ next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/);\r
+ $Postings{$_}++;\r
+ };\r
+ };\r
+\r
+ print "----- GroupStats -----\n" if $Options{'d'};\r
+ foreach my $Newsgroup (sort keys %Postings) {\r
+ print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'};\r
+ if (!$Options{'o'}) {\r
+ # write to database\r
+ $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'}));\r
+ $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'});\r
+ $DBQuery->finish;\r
+ };\r
+ };\r
+ };\r
+};\r
+\r
+### close handles\r
+$DBHandle->disconnect;\r
+\r
--- /dev/null
+#! /usr/bin/perl -W\r
+#\r
+# groupstats.pl\r
+#\r
+# This script will get statistical data on newgroup usage\r
+# form a database.\r
+# \r
+# It is part of the NewsStats package.\r
+#\r
+# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
+#\r
+# It can be redistributed and/or modified under the same terms under \r
+# which Perl itself is published.\r
+\r
+BEGIN {\r
+ our $VERSION = "0.01";\r
+ use File::Basename;\r
+ push(@INC, dirname($0));\r
+}\r
+use strict;\r
+\r
+use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);\r
+\r
+use DBI;\r
+\r
+################################# Definitions ##################################\r
+\r
+# ...\r
+\r
+################################# Main program #################################\r
+\r
+### read commandline options\r
+my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');\r
+\r
+### read configuration\r
+my %Conf = %{ReadConfig('newsstats.conf')};\r
+\r
+### override configuration via commandline options\r
+my %ConfOverride;\r
+$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r
+&OverrideConfig(\%Conf,\%ConfOverride);\r
+\r
+### default output type to 'dump'\r
+$Options{'o'} = 'dump' if !$Options{'o'};\r
+# fail if more than one newsgroup is combined with 'dumpgroup' type\r
+die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);\r
+# accept 'dumpgroup' only with -n\r
+if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {\r
+ $Options{'o'} = 'dump';\r
+ warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");\r
+};\r
+# you can't mix '-t' and '-b'\r
+if ($Options{'b'}) {\r
+ if ($Options{'t'}) {\r
+ warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b). Threshold '-t $Options{'t'}' was ignored.\n");\r
+ undef($Options{'t'});\r
+ };\r
+ warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b).\n") if $Options{'q'};\r
+ warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b).\n") if $Options{'d'};\r
+};\r
+\r
+### get query type, default to 'postings'\r
+#die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));\r
+#die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});\r
+\r
+### get time period\r
+my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r
+# reset to one month for 'dump' type\r
+if ($Options{'o'} eq 'dump' and $Options{'p'}) {\r
+ $StartMonth = $EndMonth;\r
+ warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");\r
+};\r
+\r
+### init database\r
+my $DBHandle = InitDB(\%Conf,1);\r
+\r
+### get data\r
+# get list of newsgroups (-n)\r
+my ($QueryPart,@GroupList);\r
+my $Newsgroups = $Options{'n'};\r
+if ($Newsgroups) {\r
+ ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);\r
+} else {\r
+ $QueryPart = 1;\r
+};\r
+\r
+# manage thresholds\r
+if (defined($Options{'t'})) {\r
+ if ($Options{'i'}) {\r
+ $QueryPart .= ' AND postings < ?';\r
+ } else {\r
+ $QueryPart .= ' AND postings > ?';\r
+ };\r
+ push @GroupList,$Options{'t'};\r
+}\r
+\r
+# construct WHERE clause\r
+my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));\r
+\r
+# get lenght of longest newsgroup delivered by query for formatting purposes\r
+my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);\r
+\r
+my ($OrderClause,$DBQuery);\r
+# -b (best of) defined?\r
+if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r
+ $OrderClause = 'newsgroup';\r
+ $OrderClause = 'postings' if $Options{'q'};\r
+ $OrderClause .= ' DESC' if $Options{'d'};\r
+ # do query: get number of postings per group from groups table for given months and newsgroups\r
+ $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r
+} elsif ($Options{'b'}) {\r
+ # set sorting order (-i)\r
+ if ($Options{'i'}) {\r
+ $OrderClause = 'postings';\r
+ } else {\r
+ $OrderClause = 'postings DESC';\r
+ };\r
+ # push LIMIT to GroupList to match number of binding vars\r
+ push @GroupList,$Options{'b'};\r
+ # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT\r
+ $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r
+} else { # -l\r
+ # set sorting order (-i)\r
+ if ($Options{'i'}) {\r
+ $OrderClause = '<';\r
+ } else {\r
+ $OrderClause = '>';\r
+ };\r
+ # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars\r
+ push @GroupList,$Options{'l'};\r
+ push @GroupList,$StartMonth,$EndMonth;\r
+ # do query: get number of postings per group from groups table for given months and \r
+ $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));\r
+};\r
+\r
+# execute query\r
+$DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);\r
+\r
+# output result\r
+printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});\r
+printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};\r
+printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};\r
+if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r
+ &OutputData($Options{'o'},$DBQuery,$MaxLength);\r
+} elsif ($Options{'b'}) {\r
+ while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r
+ print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);\r
+ };\r
+} else { # -l\r
+ while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r
+ print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);\r
+ };\r
+};\r
+\r
+### close handles\r
+$DBHandle->disconnect;\r
+\r
--- /dev/null
+#! /usr/bin/perl -W\r
+#\r
+# install.pl\r
+#\r
+# This script will create database tables as necessary.\r
+# \r
+# It is part of the NewsStats package.\r
+#\r
+# Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r
+#\r
+# It can be redistributed and/or modified under the same terms under \r
+# which Perl itself is published.\r
+\r
+BEGIN {\r
+ our $VERSION = "0.01";\r
+ use File::Basename;\r
+ # we're in .../install, so our module is in ..\r
+ push(@INC, dirname($0).'/..');\r
+}\r
+use strict;\r
+\r
+use NewsStats qw(:DEFAULT);\r
+\r
+use Cwd;\r
+\r
+use DBI;\r
+\r
+################################# Main program #################################\r
+\r
+### read commandline options\r
+my %Options = &ReadOptions('');\r
+\r
+### change working directory to .. (as we're in .../install)\r
+chdir '..';\r
+\r
+### read configuration\r
+print("Reading configuration.\n");\r
+my %Conf = %{ReadConfig('newsstats.conf')};\r
+\r
+##### --------------------------------------------------------------------------\r
+##### Database table definitions\r
+##### --------------------------------------------------------------------------\r
+\r
+my %DBCreate = ('DBTableRaw' => <<RAW, 'DBTableGrps' => <<GRPS);\r
+-- \r
+-- Table structure for table DBTableRaw\r
+-- \r
+\r
+CREATE TABLE IF NOT EXISTS `$Conf{'DBTableRaw'}` (\r
+ `id` bigint(20) unsigned NOT NULL auto_increment,\r
+ `day` date NOT NULL,\r
+ `mid` varchar(250) character set ascii NOT NULL,\r
+ `date` datetime NOT NULL,\r
+ `timestamp` bigint(20) NOT NULL,\r
+ `token` varchar(80) character set ascii NOT NULL,\r
+ `size` bigint(20) NOT NULL,\r
+ `peer` varchar(250) NOT NULL,\r
+ `path` varchar(1000) NOT NULL,\r
+ `newsgroups` varchar(1000) NOT NULL,\r
+ `headers` longtext NOT NULL,\r
+ `disregard` tinyint(1) default '0',\r
+ PRIMARY KEY (`id`),\r
+ KEY `day` (`day`),\r
+ KEY `mid` (`mid`),\r
+ KEY `peer` (`peer`)\r
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Raw data';\r
+RAW\r
+-- \r
+-- Table structure for table DBTableGrps\r
+-- \r
+\r
+CREATE TABLE IF NOT EXISTS `$Conf{'DBTableGrps'}` (\r
+ `id` bigint(20) unsigned NOT NULL auto_increment,\r
+ `month` varchar(7) character set ascii NOT NULL,\r
+ `newsgroup` varchar(100) NOT NULL,\r
+ `postings` int(11) NOT NULL,\r
+ `revision` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,\r
+ PRIMARY KEY (`id`),\r
+ UNIQUE KEY `month_newsgroup` (`month`,`newsgroup`),\r
+ KEY `newsgroup` (`newsgroup`),\r
+ KEY `postings` (`postings`)\r
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Postings per newsgroup';\r
+GRPS\r
+\r
+##### --------------------------- End of definitions ---------------------------\r
+\r
+### create database tables\r
+print "-----\nStarting database table generation.\n";\r
+# DB init\r
+my $DBHandle = InitDB(\%Conf,1);\r
+\r
+# read tables\r
+my %TablesInDB = %{$DBHandle->table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')};\r
+\r
+# check for tables and create them, if they don't exist yet\r
+foreach my $Table (keys %DBCreate) {\r
+ if (defined($TablesInDB{$Conf{$Table}})) {\r
+ printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table});\r
+ next;\r
+ };\r
+ my $DBQuery = $DBHandle->prepare($DBCreate{$Table});\r
+ $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr);\r
+ printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table});\r
+};\r
+\r
+# close handle\r
+$DBHandle->disconnect;\r
+print "Database table generation done.\n";\r
+\r
+### output information on other necessary steps\r
+my $Path = cwd();\r
+print <<TODO;\r
+-----\r
+Things left to do:\r
+\r
+1) Setup an INN feed to feedlog.pl\r
+\r
+ a) Edit your 'newsfeeds' file and insert something like\r
+\r
+ ## gather statistics for NewsStats\r
+ newsstats!\\r
+ :!*,de.*\\r
+ :Tc,WmtfbsPNH,Ac:$Path/feedlog.pl\r
+\r
+ Please\r
+\r
+ * check that you got the path to feedlog.pl right\r
+ * check that feedlog.pl can be executed by the news user\r
+ * adapt the pattern (here: 'de.*') to your needs\r
+\r
+ b) Check your 'newsfeeds' syntax:\r
+\r
+ # ctlinnd checkfile\r
+\r
+ and reload 'newsfeeds':\r
+\r
+ # ctlinnd reload newsfeeds 'Adding newsstats! feed'\r
+\r
+ c) Watch your 'news.notice' and 'errlog' files:\r
+\r
+ # tail -f /var/log/news/news.notice\r
+ ...\r
+ # tail -f /var/log/news/errlog\r
+\r
+2) Watch your $Conf{'DBTableRaw'} table fill.\r
+\r
+3) Read the documentation. ;)\r
+\r
+Enjoy!\r
+\r
+-thh <thh\@inter.net>\r
+TODO\r