X-Git-Url: https://code.th-h.de/?p=usenet%2Fnewsstats.git;a=blobdiff_plain;f=NewsStats.pm;h=cc3dd83d85629a70a41f9702b151646279483912;hp=35e0c90054fb136c153faafecd6c97a6c85ee96d;hb=d3b6810d3d09c7a1dffaef75731624fd9712d914;hpb=78389b28e945178cad3f43a63e5d1091334e8dab;ds=sidebyside diff --git a/NewsStats.pm b/NewsStats.pm index 35e0c90..cc3dd83 100644 --- a/NewsStats.pm +++ b/NewsStats.pm @@ -31,15 +31,16 @@ require Exporter; SplitPeriod ListMonth ListNewsgroups + ReadGroupList OutputData FormatOutput SQLHierarchies SQLGroupList - GetMaxLenght + GetMaxLength ); %EXPORT_TAGS = ( TimePeriods => [qw(GetTimePeriod LastMonth CheckMonth SplitPeriod ListMonth)], Output => [qw(OutputData FormatOutput)], - SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLenght)]); + SQLHelper => [qw(SQLHierarchies SQLGroupList GetMaxLength)]); $VERSION = '0.01'; our $PackageVersion = '0.01'; @@ -154,15 +155,26 @@ sub ListNewsgroups { ### explode a (scalar) list of newsgroup names to a list of newsgroup and ### hierarchy names where every newsgroup and hierarchy appears only once: ### de.alt.test,de.alt.admin -> de.ALL, de.alt.ALL, de.alt.test, de.alt.admin -### IN : $Newsgroups: a list of newsgroups (content of Newsgroups: header) -### OUT: %Newsgroups: hash containing all newsgroup and hierarchy names as keys - my ($Newsgroups) = @_; +### IN : $Newsgroups : a list of newsgroups (content of Newsgroups: header) +### $TLH : top level hierarchy (all other newsgroups are ignored) +### $ValidGroupsR: reference to a hash containing all valid newsgroups +### as keys +### OUT: %Newsgroups : hash containing all newsgroup and hierarchy names as keys + my ($Newsgroups,$TLH,$ValidGroupsR) = @_; + my %ValidGroups = %{$ValidGroupsR} if $ValidGroupsR; my %Newsgroups; chomp($Newsgroups); # remove whitespace from contents of Newsgroups: $Newsgroups =~ s/\s//; # call &HierarchyCount for each newsgroup in $Newsgroups: for (split /,/, $Newsgroups) { + # don't count newsgroup/hierarchy in wrong TLH + next if($TLH and !/^$TLH/); + # don't count invalid newsgroups + if(%ValidGroups and !defined($ValidGroups{$_})) { + warn (sprintf("DROPPED: %s\n",$_)); + next; + } # add original newsgroup to %Newsgroups $Newsgroups{$_} = 1; # add all hierarchy elements to %Newsgroups, amended by '.ALL', @@ -194,6 +206,26 @@ sub ParseHierarchies { return @Hierarchies; }; +################################################################################ +sub ReadGroupList { +################################################################################ +### read a list of valid newsgroups from file (each group on one line, +### ignoring everything after the first whitespace and so accepting files +### in checkgroups format as well as (parts of) an INN active file) +### IN : $Filename : file to read +### OUT: \%ValidGroups: hash containing all valid newsgroups + my ($Filename) = @_; + my %ValidGroups; + open (my $LIST,"<$Filename") or die "$MySelf: E: Cannot read $Filename: $!\n"; + while (<$LIST>) { + s/^(\S+).*$/$1/; + chomp; + $ValidGroups{$_} = '1'; + }; + close $LIST; + return \%ValidGroups; +}; + ################################################################################ #####----------------------------- TimePeriods ----------------------------##### @@ -380,7 +412,7 @@ sub SQLHierarchies { }; ################################################################################ -sub GetMaxLenght { +sub GetMaxLength { ################################################################################ ### get length of longest field in future query result ### IN : $DBHandle : database handel