From 741336c210429f37bebfd9882b9461e824320cd0 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Fri, 17 Sep 2010 13:50:58 +0200 Subject: [PATCH] Initial checkin of working branch. Checkin of - NewsStats.pm - install/install.pl - feedlog.pl - gatherstats.pl - groupstats.pl Signed-off-by: Thomas Hochstein --- NewsStats.pm | 395 +++++++++++++++++++++++++++++++++++++++++++++ feedlog.pl | 87 ++++++++++ gatherstats.pl | 102 ++++++++++++ groupstats.pl | 157 ++++++++++++++++++ install/install.pl | 152 +++++++++++++++++ 5 files changed, 893 insertions(+) create mode 100644 NewsStats.pm create mode 100755 feedlog.pl create mode 100755 gatherstats.pl create mode 100755 groupstats.pl create mode 100755 install/install.pl diff --git a/NewsStats.pm b/NewsStats.pm new file mode 100644 index 0000000..ed76737 --- /dev/null +++ b/NewsStats.pm @@ -0,0 +1,395 @@ +# NewsStats.pm +# +# Library functions for the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# This module can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +package NewsStats; + +use strict; +use warnings; +our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); + +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw( + $MySelf + $MyVersion + ReadOptions + ReadConfig + OverrideConfig + InitDB +); +@EXPORT_OK = qw( + GetTimePeriod + LastMonth + CheckMonth + SplitPeriod + ListMonth + ListNewsgroups + OutputData + FormatOutput + SQLHierarchies + SQLGroupList + GetMaxLenght +); +%EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], + Output => [qw(OutputData FormatOutput)], + SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); +$VERSION = '0.1'; + +use Data::Dumper; +use File::Basename; +use Getopt::Std; + +use Config::Auto; +use DBI; + +#####-------------------------------- Vars --------------------------------##### + +our $MySelf = fileparse($0, '.pl'); +our $MyVersion = "$MySelf $::VERSION (NewsStats.pm $VERSION)"; + +#####------------------------------- Basics -------------------------------##### + +################################################################################ +sub ReadOptions { +################################################################################ +### read commandline options and act on standard options +### IN : $Params: containing list of commandline paramaters (without -h and -V) +### OUT: a hash containing the commandline options + $Getopt::Std::STANDARD_HELP_VERSION = 1; + + my ($Params) = @_; + my %Options; + + getopts('Vh'.$Params, \%Options); + + # -V: display version + &ShowVersion if ($Options{'V'}); + + # -h: feed myself to perldoc + &ShowPOD if ($Options{'h'}); + + return %Options; +}; +################################################################################ + +################################################################################ +sub ShowVersion { +################################################################################ +### display version and exit + print "$MyVersion\nCopyright (c) 2010 Thomas Hochstein \n"; + print "This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.\n"; + exit(100); +}; +################################################################################ + +################################################################################ +sub ShowPOD { +################################################################################ +### feed myself to perldoc and exit + exec('perldoc', $0); + exit(100); +}; +################################################################################ + +################################################################################ +sub ReadConfig { +################################################################################ +### read config via Config::Auto +### IN : $ConfFile: config filename +### OUT: reference to a hash containing the configuration + my ($ConfFile) = @_; + return Config::Auto::parse($ConfFile, format => 'equal'); +}; +################################################################################ + +################################################################################ +sub OverrideConfig { +################################################################################ +### override configuration values +### IN : $ConfigR : reference to configuration hash +### $OverrideR: reference to a hash containing overrides + my ($ConfigR,$OverrideR) = @_; + my %Override = %$OverrideR; + warn "$MySelf W: Empty configuration hash passed to OverrideConfig().\n" if ( keys %$ConfigR < 1); + return if (keys %Override < 1 or keys %$ConfigR < 1); + foreach my $Key (keys %Override) { + $$ConfigR{$Key} = $Override{$Key}; + }; +}; +################################################################################ + +################################################################################ +sub InitDB { +################################################################################ +### initialise database connection +### IN : $ConfigR: reference to configuration hash +### $Die : if TRUE, die if connection failed +### OUT: DBHandle + my ($ConfigR,$Die) = @_; + my %Conf = %$ConfigR; + my $DBHandle = DBI->connect(sprintf('DBI:%s:database=%s;host=%s',$Conf{'DBDriver'},$Conf{'DBDatabase'},$Conf{'DBHost'}), $Conf{'DBUser'}, $Conf{'DBPw'}, { PrintError => 0 }); + if (!$DBHandle) { + die ("$MySelf: E: $DBI::errstr\n") if (defined($Die) and $Die); + warn("$MySelf: W: $DBI::errstr\n"); + }; + return $DBHandle; +}; +################################################################################ + +#####------------------------------ GetStats ------------------------------##### + +################################################################################ +sub ListNewsgroups { +################################################################################ +### count each newsgroup and each hierarchy level, but only once +### IN : $Newsgroups: a list of newsgroups (content of Newsgroups:) +### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups) = @_; + my %Newsgroups; + chomp($Newsgroups); + # remove whitespace from contents of Newsgroups: + $Newsgroups =~ s/\s//; + # call &HierarchyCount for each newsgroup in $Newsgroups: + for (split /,/, $Newsgroups) { + # add original newsgroup to %Newsgroups + $Newsgroups{$_} = 1; + # add all hierarchy elements to %Newsgroups, amended by '.ALL', + # i.e. de.alt.ALL and de.ALL + foreach (ParseHierarchies($_)) { + $Newsgroups{$_.'.ALL'} = 1; + } + }; + return %Newsgroups; +}; + +################################################################################ +sub ParseHierarchies { +################################################################################ +### get all hierarchies a newsgroup belongs to +### IN : $Newsgroup : a newsgroup name +### OUT: @Hierarchies: array containing all hierarchies the newsgroup belongs to + my ($Newsgroup) = @_; + my @Hierarchies; + # strip trailing dots + $Newsgroup =~ s/(.+)\.+$/$1/; + # butcher newsgroup name by "." and add each hierarchy to @Hierarchies + # i.e. de.alt.test: "de.alt" and "de" + while ($Newsgroup =~ /\./) { + $Newsgroup =~ s/^((?:\.?[^.]+)*)\.[^.]+$/$1/; + push @Hierarchies, $Newsgroup; + }; + return @Hierarchies; +}; + +################################################################################ + +#####----------------------------- TimePeriods ----------------------------##### + +################################################################################ +sub GetTimePeriod { +################################################################################ +### get time period using -m / -p +### IN : $Month,$Period: contents of -m and -p +### OUT: $StartMonth, $EndMonth + my ($Month,$Period) = @_; + # exit if -m is set and not like YYYY-MM + die "$MySelf: E: Wrong date format - use '$MySelf -m YYYY-MM'!\n" if not &CheckMonth($Month); + # default: set -m to last month + $Month = &LastMonth if (!defined($Month) and !defined($Period)); + # set $StartMonth, $EndMonth + my ($StartMonth, $EndMonth); + if ($Period) { + # -p: get date range + ($StartMonth, $EndMonth) = &SplitPeriod($Period); + die "$MySelf: E: Wrong format for time period - use '$MySelf -p YYYY-MM:YYYY-MM'!\n" if !defined($StartMonth); + } else { + # set $StartMonth = $EndMonth = $Month if -p is not set + $StartMonth = $EndMonth = $Month; + }; + return ($StartMonth, $EndMonth); +}; + +################################################################################ +sub LastMonth { +################################################################################ +### get last month from today in YYYY-MM format +### OUT: last month as YYYY-MM + # get today's date + my (undef,undef,undef,undef,$Month,$Year,undef,undef,undef) = localtime(time); + # $Month is already defined from 0 to 11, so no need to decrease it by 1 + $Year += 1900; + if ($Month < 1) { + $Month = 12; + $Year--; + }; + # return last month + return sprintf('%4d-%02d',$Year,$Month); +}; + +################################################################################ +sub CheckMonth { +################################################################################ +### check for valid month +### IN : $Month: month +### OUT: TRUE / FALSE + my ($Month) = @_; + return 0 if (defined($Month) and $Month !~ /^\d{4}-\d{2}$/); + return 1; +}; + +################################################################################ +sub SplitPeriod { +################################################################################ +### split a time period YYYY-MM:YYYY-MM into start and end month +### IN : $Period: time period +### OUT: $StartMonth, Â$EndMonth + my ($Period) = @_; + return (undef,undef) if $Period !~ /^\d{4}-\d{2}:\d{4}-\d{2}$/; + my ($StartMonth, $EndMonth) = split /:/, $Period; + # switch parameters as necessary + if ($EndMonth gt $StartMonth) { + return ($StartMonth, $EndMonth); + } else { + return ($EndMonth, $StartMonth); + }; +}; + +################################################################################ +sub ListMonth { +################################################################################ +### return a list of month (YYYY-MM) between start and end month +### IN : $StartMonth, $EndMonth +### OUT: @Months: array containing all months from $StartMonth to $EndMonth + my ($StartMonth, $EndMonth) = @_; + return (undef,undef) if ($StartMonth !~ /^\d{4}-\d{2}$/ or $EndMonth !~ /^\d{4}-\d{2}$/); + # return if $StartMonth = $EndMonth + return ($StartMonth) if ($StartMonth eq $EndMonth); + # set $Year, $Month from $StartMonth + my ($Year, $Month) = split /-/, $StartMonth; + # define @Months + my (@Months); + until ("$Year-$Month" gt $EndMonth) { + push @Months, "$Year-$Month"; + $Month = "$Month"; # force string context + $Month++; + if ($Month > 12) { + $Month = '01'; + $Year++; + }; + }; + return @Months; +}; + +#####---------------------------- OutputFormats ---------------------------##### + +################################################################################ +sub OutputData { +################################################################################ +### output information with formatting from DBHandle +### IN : $Format : format specifier +### $DBQuery: database query handle with executed query, +### containing $Month, $Key, $Value +### $PadGroup: padding length for newsgroups field (optional) for 'pretty' +### OUT: $Output: formatted output + my ($Format, $DBQuery,$PadGroup) = @_; + while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) { + print &FormatOutput($Format, $Month, $Key, $Value, $PadGroup); + }; +}; + +################################################################################ +sub FormatOutput { +################################################################################ +### format information for output +### IN : $Format : format specifier +### $PadGroup: padding length for newsgroups field (optional) for 'pretty' +### $Month : month (as YYYY-MM) +### $Key : newsgroup, client, ... +### $Value : number of postings with that attribute +### OUT: $Output: formatted output + my ($Format, $Month, $Key, $Value, $PadGroup) = @_; + + # define output types + my %LegalOutput; + @LegalOutput{('dump','dumpgroup','list','pretty')} = (); + # bail out if format is unknown + die "$MySelf: E: Unknown output type '$Format'!\n" if !exists($LegalOutput{$Format}); + + my ($Output); + our ($LastIteration); + if ($Format eq 'dump') { + # output as dump (ng nnnnn) + $Output = sprintf ("%s %u\n",$Key,$Value); + } elsif ($Format eq 'dumpgroup') { + # output as dump (YYYY-NN: nnnnn) + $Output = sprintf ("%s: %5u\n",$Month,$Value); + } elsif ($Format eq 'list') { + # output as list (YYYY-NN: ng nnnnn) + $Output = sprintf ("%s: %s %u\n",$Month,$Key,$Value); + } elsif ($Format eq 'pretty') { + # output as table + $Output = sprintf ("----- %s:\n",$Month) if (!defined($LastIteration) or $Month ne $LastIteration); + $LastIteration = $Month; + $Output .= sprintf ($PadGroup ? sprintf("%%-%us %%5u\n",$PadGroup) : "%s %u\n",$Key,$Value); + }; + return $Output; +}; + +#####------------------------- QueryModifications -------------------------##### + +################################################################################ +sub SQLHierarchies { +################################################################################ +### amend WHERE clause to include hierarchies +### IN : $ShowHierarchies: boolean value +### OUT: SQL code + my ($ShowHierarchies) = @_; + return $ShowHierarchies ? '' : "AND newsgroup NOT LIKE '%.ALL'"; +}; + +################################################################################ +sub GetMaxLenght { +################################################################################ +### get length of longest field in query +### IN : $DBHandle : database handel +### $Table : table to query +### $Field : field to check +### $WhereClause: WHERE clause +### @BindVars : bind variables for WHERE clause +### OUT: $Length: length of longest instnace of $Field + my ($DBHandle,$Table,$Field,$WhereClause,@BindVars) = @_; + my $DBQuery = $DBHandle->prepare(sprintf("SELECT MAX(LENGTH(%s)) FROM %s WHERE %s",$Field,$Table,$WhereClause)); + $DBQuery->execute(@BindVars) or warn sprintf("$MySelf: W: Can't get field length for %s from table %s: $DBI::errstr\n",$Field,$Table); + my ($Length) = $DBQuery->fetchrow_array; + return $Length; +}; + +################################################################################ +sub SQLGroupList { +################################################################################ +### create part of WHERE clause for list of newsgroups separated by : +### IN : $Newsgroups: list of newsgroups (group.one.*:group.two:group.three.*) +### OUT: SQL code, list of newsgroups + my ($Newsgroups) = @_; + $Newsgroups =~ s/\*/%/g; + return ('newsgroup LIKE ?', $Newsgroups) if $Newsgroups !~ /:/; + my $SQL = '('; + my @GroupList = split /:/, $Newsgroups; + foreach (@GroupList) { + $SQL .= ' OR ' if $SQL gt '('; + $SQL .= 'newsgroup LIKE ?'; + }; + $SQL .= ')'; + return ($SQL,@GroupList); +}; + +#####------------------------------- done ---------------------------------##### +1; + + diff --git a/feedlog.pl b/feedlog.pl new file mode 100755 index 0000000..a68b833 --- /dev/null +++ b/feedlog.pl @@ -0,0 +1,87 @@ +#! /usr/bin/perl -W +# +# feedlog.pl +# +# This script will log headers and other data to a database +# for further analysis by parsing a feed from INN. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats; + +use Sys::Syslog qw(:standard :macros); + +use Date::Format; +use DBI; + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('qd'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### init syslog +openlog($MySelf, 'nofatal,pid', LOG_NEWS); +syslog(LOG_NOTICE, "$MyVersion starting up.") if !$Options{'q'}; + +### init database +my $DBHandle = InitDB(\%Conf,0); +if (!$DBHandle) { + syslog(LOG_CRIT, 'Database connection failed: %s', $DBI::errstr); + while (1) {}; # go into endless loop to suppress further errors and respawning +}; +my $DBQuery = $DBHandle->prepare(sprintf("INSERT INTO %s.%s (day,date,mid,timestamp,token,size,peer,path,newsgroups,headers) VALUES (?,?,?,?,?,?,?,?,?,?)",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + +### main loop +while (<>) { + chomp; + # catch empty lines trailing or leading + if ($_ eq '') { + next; + } + # first line contains: mid, timestamp, token, size, peer, Path, Newsgroups + my ($Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups) = split; + # remaining lines contain headers + my $Headers = ""; + while (<>) { + chomp; + # empty line terminates this article + if ($_ eq '') { + last; + } + # collect headers + $Headers .= $_."\n" ; + } + + # parse timestamp to day (YYYY-MM-DD) and to MySQL timestamp + my $Day = time2str("%Y-%m-%d", $Timestamp); + my $Date = time2str("%Y-%m-%d %H:%M:%S", $Timestamp); + + # write to database + if (!$DBQuery->execute($Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers)) { + syslog(LOG_ERR, 'Database error: %s', $DBI::errstr); + }; + $DBQuery->finish; + + warn sprintf("-----\nDay: %s\nDate: %s\nMID: %s\nTS: %s\nToken: %s\nSize: %s\nPeer: %s\nPath: %s\nNewsgroups: %s\nHeaders: %s\n",$Day, $Date, $Mid, $Timestamp, $Token, $Size, $Peer, $Path, $Newsgroups, $Headers) if !$Options{'d'}; +} + +### close handles +$DBHandle->disconnect; +syslog(LOG_NOTICE, "$MySelf closing down.") if !$Options{'q'}; +closelog(); + diff --git a/gatherstats.pl b/gatherstats.pl new file mode 100755 index 0000000..09157d1 --- /dev/null +++ b/gatherstats.pl @@ -0,0 +1,102 @@ +#! /usr/bin/perl -W +# +# gatherstats.pl +# +# This script will gather statistical information from a database +# containing headers and other information from a INN feed. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups); + +use DBI; + +################################# Definitions ################################## + +# define types of information that can be gathered +# all / groups (/ clients / hosts) +my %LegalTypes; +@LegalTypes{('all','groups')} = (); + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('dom:p:t:n:r:g:c:s:'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableRaw'} = $Options{'r'} if $Options{'r'}; +$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +$ConfOverride{'DBTableClnts'} = $Options{'c'} if $Options{'c'}; +$ConfOverride{'DBTableHosts'} = $Options{'s'} if $Options{'s'}; +$ConfOverride{'TLH'} = $Options{'n'} if $Options{'n'}; +&OverrideConfig(\%Conf,\%ConfOverride); + +### get type of information to gather, default to 'all' +$Options{'t'} = 'all' if !$Options{'t'}; +die "$MySelf: E: Unknown type '-t $Options{'t'}'!\n" if !exists($LegalTypes{$Options{'t'}}); + +### get time period +my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get data for each month +warn "$MySelf: W: Output only mode. Database is not updated.\n" if $Options{'o'}; +foreach my $Month (&ListMonth($StartMonth,$EndMonth)) { + + print "---------- $Month ----------\n" if $Options{'d'}; + + if ($Options{'t'} eq 'all' or $Options{'t'} eq 'groups') { + ### ---------------------------------------------- + ### get groups data (number of postings per group) + # get groups data from raw table for given month + my $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroups FROM %s.%s WHERE day LIKE ? AND NOT disregard",$Conf{'DBDatabase'},$Conf{'DBTableRaw'})); + $DBQuery->execute($Month.'-%') or die sprintf("$MySelf: E: Can't get groups data for %s from %s.%s: $DBI::errstr\n",$Month,$Conf{'DBDatabase'},$Conf{'DBTableRaw'}); + + # count postings per group + my %Postings; + + while (($_) = $DBQuery->fetchrow_array) { + # get list oft newsgroups and hierarchies from Newsgroups: + my %Newsgroups = ListNewsgroups($_); + # count each newsgroup and hierarchy once + foreach (sort keys %Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if(defined($Conf{'TLH'}) and !/^$Conf{'TLH'}/); + $Postings{$_}++; + }; + }; + + print "----- GroupStats -----\n" if $Options{'d'}; + foreach my $Newsgroup (sort keys %Postings) { + print "$Newsgroup => $Postings{$Newsgroup}\n" if $Options{'d'}; + if (!$Options{'o'}) { + # write to database + $DBQuery = $DBHandle->prepare(sprintf("REPLACE INTO %s.%s (month,newsgroup,postings) VALUES (?, ?, ?)",$Conf{'DBDatabase'},$Conf{'DBTableGrps'})); + $DBQuery->execute($Month, $Newsgroup, $Postings{$Newsgroup}) or die sprintf("$MySelf: E: Can't write groups data for %s/%s to %s.%s: $DBI::errstr\n",$Month,$Newsgroup,$Conf{'DBDatabase'},$Conf{'DBTableGrps'}); + $DBQuery->finish; + }; + }; + }; +}; + +### close handles +$DBHandle->disconnect; + diff --git a/groupstats.pl b/groupstats.pl new file mode 100755 index 0000000..bf5bb3d --- /dev/null +++ b/groupstats.pl @@ -0,0 +1,157 @@ +#! /usr/bin/perl -W +# +# groupstats.pl +# +# This script will get statistical data on newgroup usage +# form a database. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + push(@INC, dirname($0)); +} +use strict; + +use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper); + +use DBI; + +################################# Definitions ################################## + +# ... + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:'); + +### read configuration +my %Conf = %{ReadConfig('newsstats.conf')}; + +### override configuration via commandline options +my %ConfOverride; +$ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'}; +&OverrideConfig(\%Conf,\%ConfOverride); + +### default output type to 'dump' +$Options{'o'} = 'dump' if !$Options{'o'}; +# fail if more than one newsgroup is combined with 'dumpgroup' type +die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/); +# accept 'dumpgroup' only with -n +if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) { + $Options{'o'} = 'dump'; + warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n"); +}; +# you can't mix '-t' and '-b' +if ($Options{'b'}) { + if ($Options{'t'}) { + warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b). Threshold '-t $Options{'t'}' was ignored.\n"); + undef($Options{'t'}); + }; + warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b).\n") if $Options{'q'}; + warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b).\n") if $Options{'d'}; +}; + +### get query type, default to 'postings' +#die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}})); +#die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'}); + +### get time period +my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'}); +# reset to one month for 'dump' type +if ($Options{'o'} eq 'dump' and $Options{'p'}) { + $StartMonth = $EndMonth; + warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n"); +}; + +### init database +my $DBHandle = InitDB(\%Conf,1); + +### get data +# get list of newsgroups (-n) +my ($QueryPart,@GroupList); +my $Newsgroups = $Options{'n'}; +if ($Newsgroups) { + ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups); +} else { + $QueryPart = 1; +}; + +# manage thresholds +if (defined($Options{'t'})) { + if ($Options{'i'}) { + $QueryPart .= ' AND postings < ?'; + } else { + $QueryPart .= ' AND postings > ?'; + }; + push @GroupList,$Options{'t'}; +} + +# construct WHERE clause +my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'})); + +# get lenght of longest newsgroup delivered by query for formatting purposes +my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList); + +my ($OrderClause,$DBQuery); +# -b (best of) defined? +if (!defined($Options{'b'}) and !defined($Options{'l'})) { + $OrderClause = 'newsgroup'; + $OrderClause = 'postings' if $Options{'q'}; + $OrderClause .= ' DESC' if $Options{'d'}; + # do query: get number of postings per group from groups table for given months and newsgroups + $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); +} elsif ($Options{'b'}) { + # set sorting order (-i) + if ($Options{'i'}) { + $OrderClause = 'postings'; + } else { + $OrderClause = 'postings DESC'; + }; + # push LIMIT to GroupList to match number of binding vars + push @GroupList,$Options{'b'}; + # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT + $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause)); +} else { # -l + # set sorting order (-i) + if ($Options{'i'}) { + $OrderClause = '<'; + } else { + $OrderClause = '>'; + }; + # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars + push @GroupList,$Options{'l'}; + push @GroupList,$StartMonth,$EndMonth; + # do query: get number of postings per group from groups table for given months and + $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause)); +}; + +# execute query +$DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr); + +# output result +printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'}); +printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'}; +printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'}; +if (!defined($Options{'b'}) and !defined($Options{'l'})) { + &OutputData($Options{'o'},$DBQuery,$MaxLength); +} elsif ($Options{'b'}) { + while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) { + print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength); + }; +} else { # -l + while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) { + print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7); + }; +}; + +### close handles +$DBHandle->disconnect; + diff --git a/install/install.pl b/install/install.pl new file mode 100755 index 0000000..9b6b332 --- /dev/null +++ b/install/install.pl @@ -0,0 +1,152 @@ +#! /usr/bin/perl -W +# +# install.pl +# +# This script will create database tables as necessary. +# +# It is part of the NewsStats package. +# +# Copyright (c) 2010 Thomas Hochstein +# +# It can be redistributed and/or modified under the same terms under +# which Perl itself is published. + +BEGIN { + our $VERSION = "0.01"; + use File::Basename; + # we're in .../install, so our module is in .. + push(@INC, dirname($0).'/..'); +} +use strict; + +use NewsStats qw(:DEFAULT); + +use Cwd; + +use DBI; + +################################# Main program ################################# + +### read commandline options +my %Options = &ReadOptions(''); + +### change working directory to .. (as we're in .../install) +chdir '..'; + +### read configuration +print("Reading configuration.\n"); +my %Conf = %{ReadConfig('newsstats.conf')}; + +##### -------------------------------------------------------------------------- +##### Database table definitions +##### -------------------------------------------------------------------------- + +my %DBCreate = ('DBTableRaw' => < <table_info('%', '%', '%', 'TABLE')->fetchall_hashref('TABLE_NAME')}; + +# check for tables and create them, if they don't exist yet +foreach my $Table (keys %DBCreate) { + if (defined($TablesInDB{$Conf{$Table}})) { + printf("Database table %s.%s already exists, skipping ....\n",$Conf{'DBDatabase'},$Conf{$Table}); + next; + }; + my $DBQuery = $DBHandle->prepare($DBCreate{$Table}); + $DBQuery->execute() or die sprintf("$MySelf: E: Can't create table %s in database %s: %s%\n",$Table,$Conf{'DBDatabase'},$DBI::errstr); + printf("Database table %s.%s created succesfully.\n",$Conf{'DBDatabase'},$Conf{$Table}); +}; + +# close handle +$DBHandle->disconnect; +print "Database table generation done.\n"; + +### output information on other necessary steps +my $Path = cwd(); +print < +TODO -- 2.20.1