5 # This script will get statistical data on newgroup usage
\r
8 # It is part of the NewsStats package.
\r
10 # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
\r
12 # It can be redistributed and/or modified under the same terms under
\r
13 # which Perl itself is published.
\r
16 our $VERSION = "0.01";
\r
18 push(@INC, dirname($0));
\r
22 use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
\r
26 ################################# Definitions ##################################
\r
30 ################################# Main program #################################
\r
32 ### read commandline options
\r
33 my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
\r
35 ### read configuration
\r
36 my %Conf = %{ReadConfig('newsstats.conf')};
\r
38 ### override configuration via commandline options
\r
40 $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
\r
41 &OverrideConfig(\%Conf,\%ConfOverride);
\r
43 ### default output type to 'dump'
\r
44 $Options{'o'} = 'dump' if !$Options{'o'};
\r
45 # fail if more than one newsgroup is combined with 'dumpgroup' type
\r
46 die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
\r
47 # accept 'dumpgroup' only with -n
\r
48 if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
\r
49 $Options{'o'} = 'dump';
\r
50 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
\r
52 # you can't mix '-t' and '-b'
\r
53 if ($Options{'b'}) {
\r
54 if ($Options{'t'}) {
\r
55 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b). Threshold '-t $Options{'t'}' was ignored.\n");
\r
56 undef($Options{'t'});
\r
58 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b).\n") if $Options{'q'};
\r
59 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b).\n") if $Options{'d'};
\r
62 ### get query type, default to 'postings'
\r
63 #die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));
\r
64 #die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});
\r
67 my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
\r
68 # reset to one month for 'dump' type
\r
69 if ($Options{'o'} eq 'dump' and $Options{'p'}) {
\r
70 $StartMonth = $EndMonth;
\r
71 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
\r
75 my $DBHandle = InitDB(\%Conf,1);
\r
78 # get list of newsgroups (-n)
\r
79 my ($QueryPart,@GroupList);
\r
80 my $Newsgroups = $Options{'n'};
\r
82 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
\r
88 if (defined($Options{'t'})) {
\r
89 if ($Options{'i'}) {
\r
90 $QueryPart .= ' AND postings < ?';
\r
92 $QueryPart .= ' AND postings > ?';
\r
94 push @GroupList,$Options{'t'};
\r
97 # construct WHERE clause
\r
98 my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
\r
100 # get lenght of longest newsgroup delivered by query for formatting purposes
\r
101 my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
\r
103 my ($OrderClause,$DBQuery);
\r
104 # -b (best of) defined?
\r
105 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
\r
106 $OrderClause = 'newsgroup';
\r
107 $OrderClause = 'postings' if $Options{'q'};
\r
108 $OrderClause .= ' DESC' if $Options{'d'};
\r
109 # do query: get number of postings per group from groups table for given months and newsgroups
\r
110 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
\r
111 } elsif ($Options{'b'}) {
\r
112 # set sorting order (-i)
\r
113 if ($Options{'i'}) {
\r
114 $OrderClause = 'postings';
\r
116 $OrderClause = 'postings DESC';
\r
118 # push LIMIT to GroupList to match number of binding vars
\r
119 push @GroupList,$Options{'b'};
\r
120 # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
\r
121 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
\r
123 # set sorting order (-i)
\r
124 if ($Options{'i'}) {
\r
125 $OrderClause = '<';
\r
127 $OrderClause = '>';
\r
129 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars
\r
130 push @GroupList,$Options{'l'};
\r
131 push @GroupList,$StartMonth,$EndMonth;
\r
132 # do query: get number of postings per group from groups table for given months and
\r
133 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
\r
137 $DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
\r
140 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
\r
141 printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
\r
142 printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
\r
143 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
\r
144 &OutputData($Options{'o'},$DBQuery,$MaxLength);
\r
145 } elsif ($Options{'b'}) {
\r
146 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
\r
147 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
\r
150 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
\r
151 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
\r
156 $DBHandle->disconnect;
\r