-#! /usr/bin/perl -W
+#! /usr/bin/perl
#
# gatherstats.pl
#
push(@INC, dirname($0));
}
use strict;
+use warnings;
-use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList);
+use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList);
use DBI;
use Getopt::Long qw(GetOptions);
};
};
-# read list of newsgroups from --checkgroups
-# into a hash
-my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile;
-
### init database
my $DBHandle = InitDB(\%Conf,1);
print "---------- $Month ----------\n" if $OptDebug;
if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') {
+ # read list of newsgroups from --checkgroups
+ # into a hash
+ my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))}
+ if $OptCheckgroupsFile;
+
### ----------------------------------------------
### get groups data (number of postings per group)
# get groups data from raw table for given month
# count postings per group
my %Postings;
while (($_) = $DBQuery->fetchrow_array) {
- # get list oft newsgroups and hierarchies from Newsgroups:
+ # get list of newsgroups and hierarchies from Newsgroups:
my %Newsgroups = ListNewsgroups($_,$TLH,
$OptCheckgroupsFile ? \%ValidGroups : '');
# count each newsgroup and hierarchy once
if (%ValidGroups) {
foreach (sort keys %ValidGroups) {
if (!defined($Postings{$_})) {
- $Postings{$_} = 0 ;
+ # add current newsgroup as empty group
+ $Postings{$_} = 0;
warn (sprintf("ADDED: %s as empty group\n",$_));
+ # add empty hierarchies for current newsgroup as needed
+ foreach (ParseHierarchies($_)) {
+ my $Hierarchy = $_ . '.ALL';
+ if (!defined($Postings{$Hierarchy})) {
+ $Postings{$Hierarchy} = 0;
+ warn (sprintf("ADDED: %s as empty group\n",$Hierarchy));
+ };
+ };
}
};
};
-
+
# delete old data for that month
if (!$OptTest) {
$DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?",
=head1 SYNOPSIS
-B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats] [B<-c> I<checkgroups file>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
+B<gatherstats> [B<-Vhdt>] [B<-m> I<YYYY-MM> | I<YYYY-MM:YYYY-MM>] [B<-s> I<stats] [B<-c> I<filename template>]] [B<--hierarchy> I<TLH>] [B<--rawdb> I<database table>] [B<-groupsdb> I<database table>] [B<--clientsdb> I<database table>] [B<--hostsdb> I<database table>]
=head1 REQUIREMENTS
(and is currently rather pointless as only I<groups> has been
implemented).
-=item B<-c>, B<--checkgroups> I<filename>
+=item B<-c>, B<--checkgroups> I<filename template>
+
+Check each group against a list of valid newsgroups read from a file,
+one group on each line and ignoring everything after the first
+whitespace (so you can use a file in checkgroups format or (part of)
+your INN active file).
+
+The filename is taken from I<filename template>, amended by each B<--
+month> B<gatherstats> is processing, so that
+
+ gatherstats -m 2010-01:2010-12 -c checkgroups
-Check each group against a list of valid newsgroups read from
-I<filename>, one group on each line and ignoring everything after the
-first whitespace (so you can use a file in checkgroups format or (part
-of) your INN active file).
+will check against F<checkgroups-2010-01> for January 2010, against
+F<checkgroups-2010-02> for February 2010 and so on.
-Newsgroups not found in I<filename> will be dropped (and logged to
-STDERR), and newsgroups found in I<filename> but having no postings
+Newsgroups not found in the checkgroups file will be dropped (and
+logged to STDERR), and newsgroups found there but having no postings
will be added with a count of 0 (and logged to STDERR).
=item B<--hierarchy> I<TLH> (newsgroup hierarchy)
gatherstats --month 2010-01
Process only number of postings for the year of 2010,
-checking against checkgroups-2010.txt:
+checking against checkgroups-*:
- gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt
+ gatherstats -m 2010-01:2010-12 -s groups -c checkgroups
=head1 FILES