Commit | Line | Data |
---|---|---|
741336c2 TH |
1 | #! /usr/bin/perl -W\r |
2 | #\r | |
3 | # groupstats.pl\r | |
4 | #\r | |
5 | # This script will get statistical data on newgroup usage\r | |
6 | # form a database.\r | |
7 | # \r | |
8 | # It is part of the NewsStats package.\r | |
9 | #\r | |
10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r | |
11 | #\r | |
12 | # It can be redistributed and/or modified under the same terms under \r | |
13 | # which Perl itself is published.\r | |
14 | \r | |
15 | BEGIN {\r | |
16 | our $VERSION = "0.01";\r | |
17 | use File::Basename;\r | |
18 | push(@INC, dirname($0));\r | |
19 | }\r | |
20 | use strict;\r | |
21 | \r | |
22 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);\r | |
23 | \r | |
24 | use DBI;\r | |
25 | \r | |
26 | ################################# Definitions ##################################\r | |
27 | \r | |
28 | # ...\r | |
29 | \r | |
30 | ################################# Main program #################################\r | |
31 | \r | |
32 | ### read commandline options\r | |
33 | my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');\r | |
34 | \r | |
35 | ### read configuration\r | |
36 | my %Conf = %{ReadConfig('newsstats.conf')};\r | |
37 | \r | |
38 | ### override configuration via commandline options\r | |
39 | my %ConfOverride;\r | |
40 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r | |
41 | &OverrideConfig(\%Conf,\%ConfOverride);\r | |
42 | \r | |
13c5a175 TH |
43 | ### check for incompatible command line options\r |
44 | # you can't mix '-t', '-b' and '-l'\r | |
45 | # -b/-l take preference over -t, and -b takes preference over -l\r | |
46 | if ($Options{'b'} or $Options{'l'}) {\r | |
47 | if ($Options{'t'}) {\r | |
48 | # drop -t\r | |
49 | warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");\r | |
50 | undef($Options{'t'});\r | |
51 | };\r | |
52 | if ($Options{'b'} and $Options{'l'}) {\r | |
53 | # drop -l\r | |
54 | warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");\r | |
55 | undef($Options{'l'});\r | |
56 | };\r | |
57 | # -q/-d don't work with -b or -l\r | |
58 | warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};\r | |
59 | warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};\r | |
60 | };\r | |
61 | \r | |
62 | ### check output type\r | |
63 | # default output type to 'dump'\r | |
741336c2 TH |
64 | $Options{'o'} = 'dump' if !$Options{'o'};\r |
65 | # fail if more than one newsgroup is combined with 'dumpgroup' type\r | |
66 | die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);\r | |
67 | # accept 'dumpgroup' only with -n\r | |
68 | if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {\r | |
69 | $Options{'o'} = 'dump';\r | |
70 | warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");\r | |
71 | };\r | |
13c5a175 TH |
72 | # set output type to 'pretty' for -l\r |
73 | if ($Options{'l'}) {\r | |
74 | $Options{'o'} = 'pretty';\r | |
75 | warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");\r | |
741336c2 TH |
76 | };\r |
77 | \r | |
78 | ### get query type, default to 'postings'\r | |
79 | #die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));\r | |
80 | #die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});\r | |
81 | \r | |
82 | ### get time period\r | |
83 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r | |
84 | # reset to one month for 'dump' type\r | |
85 | if ($Options{'o'} eq 'dump' and $Options{'p'}) {\r | |
86 | $StartMonth = $EndMonth;\r | |
87 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");\r | |
88 | };\r | |
89 | \r | |
90 | ### init database\r | |
91 | my $DBHandle = InitDB(\%Conf,1);\r | |
92 | \r | |
93 | ### get data\r | |
94 | # get list of newsgroups (-n)\r | |
95 | my ($QueryPart,@GroupList);\r | |
96 | my $Newsgroups = $Options{'n'};\r | |
97 | if ($Newsgroups) {\r | |
98 | ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);\r | |
99 | } else {\r | |
100 | $QueryPart = 1;\r | |
101 | };\r | |
102 | \r | |
103 | # manage thresholds\r | |
104 | if (defined($Options{'t'})) {\r | |
105 | if ($Options{'i'}) {\r | |
106 | $QueryPart .= ' AND postings < ?';\r | |
107 | } else {\r | |
108 | $QueryPart .= ' AND postings > ?';\r | |
109 | };\r | |
110 | push @GroupList,$Options{'t'};\r | |
111 | }\r | |
112 | \r | |
113 | # construct WHERE clause\r | |
114 | my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));\r | |
115 | \r | |
116 | # get lenght of longest newsgroup delivered by query for formatting purposes\r | |
117 | my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);\r | |
118 | \r | |
119 | my ($OrderClause,$DBQuery);\r | |
120 | # -b (best of) defined?\r | |
121 | if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r | |
122 | $OrderClause = 'newsgroup';\r | |
123 | $OrderClause = 'postings' if $Options{'q'};\r | |
124 | $OrderClause .= ' DESC' if $Options{'d'};\r | |
125 | # do query: get number of postings per group from groups table for given months and newsgroups\r | |
126 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r | |
127 | } elsif ($Options{'b'}) {\r | |
128 | # set sorting order (-i)\r | |
129 | if ($Options{'i'}) {\r | |
130 | $OrderClause = 'postings';\r | |
131 | } else {\r | |
132 | $OrderClause = 'postings DESC';\r | |
133 | };\r | |
134 | # push LIMIT to GroupList to match number of binding vars\r | |
135 | push @GroupList,$Options{'b'};\r | |
136 | # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT\r | |
137 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r | |
138 | } else { # -l\r | |
139 | # set sorting order (-i)\r | |
140 | if ($Options{'i'}) {\r | |
141 | $OrderClause = '<';\r | |
142 | } else {\r | |
143 | $OrderClause = '>';\r | |
144 | };\r | |
145 | # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars\r | |
146 | push @GroupList,$Options{'l'};\r | |
147 | push @GroupList,$StartMonth,$EndMonth;\r | |
148 | # do query: get number of postings per group from groups table for given months and \r | |
149 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));\r | |
150 | };\r | |
151 | \r | |
152 | # execute query\r | |
153 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);\r | |
154 | \r | |
155 | # output result\r | |
156 | printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});\r | |
157 | printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};\r | |
158 | printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};\r | |
159 | if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r | |
160 | &OutputData($Options{'o'},$DBQuery,$MaxLength);\r | |
161 | } elsif ($Options{'b'}) {\r | |
162 | while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r | |
163 | print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);\r | |
164 | };\r | |
165 | } else { # -l\r | |
166 | while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r | |
167 | print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);\r | |
168 | };\r | |
169 | };\r | |
170 | \r | |
171 | ### close handles\r | |
172 | $DBHandle->disconnect;\r | |
173 | \r |