5 # This script will get statistical data on newgroup usage
\r
8 # It is part of the NewsStats package.
\r
10 # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>
\r
12 # It can be redistributed and/or modified under the same terms under
\r
13 # which Perl itself is published.
\r
16 our $VERSION = "0.01";
\r
18 push(@INC, dirname($0));
\r
22 use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);
\r
26 ################################# Definitions ##################################
\r
30 ################################# Main program #################################
\r
32 ### read commandline options
\r
33 my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');
\r
35 ### read configuration
\r
36 my %Conf = %{ReadConfig('newsstats.conf')};
\r
38 ### override configuration via commandline options
\r
40 $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};
\r
41 &OverrideConfig(\%Conf,\%ConfOverride);
\r
43 ### check for incompatible command line options
\r
44 # you can't mix '-t', '-b' and '-l'
\r
45 # -b/-l take preference over -t, and -b takes preference over -l
\r
46 if ($Options{'b'} or $Options{'l'}) {
\r
47 if ($Options{'t'}) {
\r
49 warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");
\r
50 undef($Options{'t'});
\r
52 if ($Options{'b'} and $Options{'l'}) {
\r
54 warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");
\r
55 undef($Options{'l'});
\r
57 # -q/-d don't work with -b or -l
\r
58 warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};
\r
59 warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};
\r
62 ### check output type
\r
63 # default output type to 'dump'
\r
64 $Options{'o'} = 'dump' if !$Options{'o'};
\r
65 # fail if more than one newsgroup is combined with 'dumpgroup' type
\r
66 die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);
\r
67 # accept 'dumpgroup' only with -n
\r
68 if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {
\r
69 $Options{'o'} = 'dump';
\r
70 warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");
\r
72 # set output type to 'pretty' for -l
\r
73 if ($Options{'l'}) {
\r
74 $Options{'o'} = 'pretty';
\r
75 warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");
\r
78 ### get query type, default to 'postings'
\r
79 #die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));
\r
80 #die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});
\r
83 my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});
\r
84 # reset to one month for 'dump' type
\r
85 if ($Options{'o'} eq 'dump' and $Options{'p'}) {
\r
86 $StartMonth = $EndMonth;
\r
87 warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");
\r
91 my $DBHandle = InitDB(\%Conf,1);
\r
94 # get list of newsgroups (-n)
\r
95 my ($QueryPart,@GroupList);
\r
96 my $Newsgroups = $Options{'n'};
\r
98 ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);
\r
103 # manage thresholds
\r
104 if (defined($Options{'t'})) {
\r
105 if ($Options{'i'}) {
\r
106 $QueryPart .= ' AND postings < ?';
\r
108 $QueryPart .= ' AND postings > ?';
\r
110 push @GroupList,$Options{'t'};
\r
113 # construct WHERE clause
\r
114 my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));
\r
116 # get lenght of longest newsgroup delivered by query for formatting purposes
\r
117 my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);
\r
119 my ($OrderClause,$DBQuery);
\r
120 # -b (best of) defined?
\r
121 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
\r
122 $OrderClause = 'newsgroup';
\r
123 $OrderClause = 'postings' if $Options{'q'};
\r
124 $OrderClause .= ' DESC' if $Options{'d'};
\r
125 # do query: get number of postings per group from groups table for given months and newsgroups
\r
126 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
\r
127 } elsif ($Options{'b'}) {
\r
128 # set sorting order (-i)
\r
129 if ($Options{'i'}) {
\r
130 $OrderClause = 'postings';
\r
132 $OrderClause = 'postings DESC';
\r
134 # push LIMIT to GroupList to match number of binding vars
\r
135 push @GroupList,$Options{'b'};
\r
136 # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT
\r
137 $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));
\r
139 # set sorting order (-i)
\r
140 if ($Options{'i'}) {
\r
141 $OrderClause = '<';
\r
143 $OrderClause = '>';
\r
145 # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars
\r
146 push @GroupList,$Options{'l'};
\r
147 push @GroupList,$StartMonth,$EndMonth;
\r
148 # do query: get number of postings per group from groups table for given months and
\r
149 $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));
\r
153 $DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);
\r
156 printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});
\r
157 printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};
\r
158 printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};
\r
159 if (!defined($Options{'b'}) and !defined($Options{'l'})) {
\r
160 &OutputData($Options{'o'},$DBQuery,$MaxLength);
\r
161 } elsif ($Options{'b'}) {
\r
162 while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
\r
163 print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);
\r
166 while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {
\r
167 print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);
\r
172 $DBHandle->disconnect;
\r