X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=gatherstats.pl;h=4c539f1fc0e2821e40eb3abb7d970948d38852a6;hp=d2d4faa3365c4c281a0319bc2e88c8a029081365;hb=1703b8e3b454713d4f873acdb5cb400db490a152;hpb=43a0fc776902f3a7b3ea019e94b67cb7e4500039 diff --git a/gatherstats.pl b/gatherstats.pl index d2d4faa..4c539f1 100755 --- a/gatherstats.pl +++ b/gatherstats.pl @@ -19,7 +19,7 @@ BEGIN { } use strict; -use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ReadGroupList); +use NewsStats qw(:DEFAULT :TimePeriods ListNewsgroups ParseHierarchies ReadGroupList); use DBI; use Getopt::Long qw(GetOptions); @@ -85,9 +85,15 @@ if ($Conf{'TLH'}) { } # strip whitespace $TLH =~ s/\s//g; + # add trailing dots if none are present yet + # (using negative look-behind assertions) + $TLH =~ s/(?fetchrow_array) { - # get list oft newsgroups and hierarchies from Newsgroups: + # get list of newsgroups and hierarchies from Newsgroups: my %Newsgroups = ListNewsgroups($_,$TLH, $OptCheckgroupsFile ? \%ValidGroups : ''); # count each newsgroup and hierarchy once @@ -138,12 +145,21 @@ foreach my $Month (&ListMonth($Period)) { if (%ValidGroups) { foreach (sort keys %ValidGroups) { if (!defined($Postings{$_})) { - $Postings{$_} = 0 ; + # add current newsgroup as empty group + $Postings{$_} = 0; warn (sprintf("ADDED: %s as empty group\n",$_)); + # add empty hierarchies for current newsgroup as needed + foreach (ParseHierarchies($_)) { + my $Hierarchy = $_ . '.ALL'; + if (!defined($Postings{$Hierarchy})) { + $Postings{$Hierarchy} = 0; + warn (sprintf("ADDED: %s as empty group\n",$Hierarchy)); + }; + }; } }; }; - + # delete old data for that month if (!$OptTest) { $DBQuery = $DBHandle->do(sprintf("DELETE FROM %s.%s WHERE month = ?", @@ -189,7 +205,7 @@ gatherstats - process statistical data from a raw source =head1 SYNOPSIS -B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] +B [B<-Vhdt>] [B<-m> I | I] [B<-s> I I]] [B<--hierarchy> I] [B<--rawdb> I] [B<-groupsdb> I] [B<--clientsdb> I] [B<--hostsdb> I] =head1 REQUIREMENTS @@ -283,15 +299,23 @@ Set processing type to one of I and I. Defaults to all (and is currently rather pointless as only I has been implemented). -=item B<-c>, B<--checkgroups> I +=item B<-c>, B<--checkgroups> I + +Check each group against a list of valid newsgroups read from a file, +one group on each line and ignoring everything after the first +whitespace (so you can use a file in checkgroups format or (part of) +your INN active file). + +The filename is taken from I, amended by each B<-- +month> B is processing, so that + + gatherstats -m 2010-01:2010-12 -c checkgroups -Check each group against a list of valid newsgroups read from -I, one group on each line and ignoring everything after the -first whitespace (so you can use a file in checkgroups format or (part -of) your INN active file). +will check against F for January 2010, against +F for February 2010 and so on. -Newsgroups not found in I will be dropped (and logged to -STDERR), and newsgroups found in I but having no postings +Newsgroups not found in the checkgroups file will be dropped (and +logged to STDERR), and newsgroups found there but having no postings will be added with a count of 0 (and logged to STDERR). =item B<--hierarchy> I (newsgroup hierarchy) @@ -335,9 +359,9 @@ Process all types of information for January of 2010: gatherstats --month 2010-01 Process only number of postings for the year of 2010, -checking against checkgroups-2010.txt: +checking against checkgroups-*: - gatherstats -m 2010-01:2010-12 -s groups -c checkgroups-2010.txt + gatherstats -m 2010-01:2010-12 -s groups -c checkgroups =head1 FILES