X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=gatherstats.pl;h=160c115df856bcfc893d520c21ea9f878e4a2ce2;hp=0a5a9a24137a85947bf652c3dbc7d00087c498c3;hb=7c83a673e654c823fe04d7731fd02f9bc5787703;hpb=314e31aadfdadd37adb5723e7fad672e961699d9;ds=sidebyside diff --git a/gatherstats.pl b/gatherstats.pl index 0a5a9a2..160c115 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -W +#! /usr/bin/perl # # gatherstats.pl # @@ -18,8 +18,9 @@ BEGIN { push(@INC, dirname($0)); } use strict; +use warnings; -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); use DBI; use Getopt::Long qw(GetOptions); @@ -102,10 +103,6 @@ if ($Conf{'TLH'}) { }; }; -# read list of newsgroups from --checkgroups -# into a hash -my %ValidGroups = %{ReadGroupList($OptCheckgroupsFile)} if $OptCheckgroupsFile; - ### init database my $DBHandle = InitDB(\%Conf,1); @@ -116,6 +113,11 @@ foreach my $Month (&ListMonth($Period)) { print "---------- $Month ----------\n" if $OptDebug; if ($OptStatsType eq 'all' or $OptStatsType eq 'groups') { + # read list of newsgroups from --checkgroups + # into a hash + my %ValidGroups = %{ReadGroupList(sprintf('%s-%s',$OptCheckgroupsFile,$Month))} + if $OptCheckgroupsFile; + ### ---------------------------------------------- ### get groups data (number of postings per group) # get groups data from raw table for given month @@ -131,7 +133,7 @@ foreach my $Month (&ListMonth($Period)) { # count postings per group my %Postings; while (($_) = $DBQuery->fetchrow_array) { - # get list oft newsgroups and hierarchies from Newsgroups: + # get list of newsgroups and hierarchies from Newsgroups: my %Newsgroups = ListNewsgroups($_,$TLH, $OptCheckgroupsFile ? \%ValidGroups : ''); # count each newsgroup and hierarchy once @@ -144,12 +146,21 @@ foreach my $Month (&ListMonth($Period)) { if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { - $Postings{$_} = 0 ; + # add current newsgroup as empty group + $Postings{$_} = 0; warn (sprintf("ADDED: %s as empty group\n",$_)); + # add empty hierarchies for current newsgroup as needed + foreach (ParseHierarchies($_)) { + my $Hierarchy = $_ . '.ALL'; + if (!defined($Postings{$Hierarchy})) { + $Postings{$Hierarchy} = 0; + warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); + }; + }; } }; }; - + # delete old data for that month if (!$OptTest) { $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", @@ -195,7 +206,7 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS @@ -289,15 +300,23 @@ Set processing type to one of I and I. Defaults to all (and is currently rather pointless as only I has been implemented). -=item B<-c>, B<--checkgroups> I +=item B<-c>, B<--checkgroups> I + +Check each group against a list of valid newsgroups read from a file, +one group on each line and ignoring everything after the first +whitespace (so you can use a file in checkgroups format or (part of) +your INN active file). + +The filename is taken from I, amended by each B<-- +month> B is processing, so that + + gatherstats -m 2010-01:2010-12 -c checkgroups -Check each group against a list of valid newsgroups read from -I, one group on each line and ignoring everything after the -first whitespace (so you can use a file in checkgroups format or (part -of) your INN active file). +will check against F for January 2010, against +F for February 2010 and so on. -Newsgroups not found in I will be dropped (and logged to -STDERR), and newsgroups found in I but having no postings +Newsgroups not found in the checkgroups file will be dropped (and +logged to STDERR), and newsgroups found there but having no postings will be added with a count of 0 (and logged to STDERR). =item B<--hierarchy> I (newsgroup hierarchy) @@ -341,9 +360,9 @@ Process all types of information for January of 2010: gatherstats --month 2010-01 Process only number of postings for the year of 2010, -checking against checkgroups-2010.txt: +checking against checkgroups-*: - gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups =head1 FILES