| 1 | #! /usr/bin/perl -W\r |
| 2 | #\r |
| 3 | # groupstats.pl\r |
| 4 | #\r |
| 5 | # This script will get statistical data on newgroup usage\r |
| 6 | # form a database.\r |
| 7 | # \r |
| 8 | # It is part of the NewsStats package.\r |
| 9 | #\r |
| 10 | # Copyright (c) 2010 Thomas Hochstein <thh@inter.net>\r |
| 11 | #\r |
| 12 | # It can be redistributed and/or modified under the same terms under \r |
| 13 | # which Perl itself is published.\r |
| 14 | \r |
| 15 | BEGIN {\r |
| 16 | our $VERSION = "0.01";\r |
| 17 | use File::Basename;\r |
| 18 | push(@INC, dirname($0));\r |
| 19 | }\r |
| 20 | use strict;\r |
| 21 | \r |
| 22 | use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper);\r |
| 23 | \r |
| 24 | use DBI;\r |
| 25 | \r |
| 26 | ################################# Definitions ##################################\r |
| 27 | \r |
| 28 | # ...\r |
| 29 | \r |
| 30 | ################################# Main program #################################\r |
| 31 | \r |
| 32 | ### read commandline options\r |
| 33 | my %Options = &ReadOptions('m:p:n:o:t:l:b:iscqdg:');\r |
| 34 | \r |
| 35 | ### read configuration\r |
| 36 | my %Conf = %{ReadConfig('newsstats.conf')};\r |
| 37 | \r |
| 38 | ### override configuration via commandline options\r |
| 39 | my %ConfOverride;\r |
| 40 | $ConfOverride{'DBTableGrps'} = $Options{'g'} if $Options{'g'};\r |
| 41 | &OverrideConfig(\%Conf,\%ConfOverride);\r |
| 42 | \r |
| 43 | ### check for incompatible command line options\r |
| 44 | # you can't mix '-t', '-b' and '-l'\r |
| 45 | # -b/-l take preference over -t, and -b takes preference over -l\r |
| 46 | if ($Options{'b'} or $Options{'l'}) {\r |
| 47 | if ($Options{'t'}) {\r |
| 48 | # drop -t\r |
| 49 | warn ("$MySelf: W: You cannot combine thresholds (-t) and top lists (-b) or levels (-l). Threshold '-t $Options{'t'}' was ignored.\n");\r |
| 50 | undef($Options{'t'});\r |
| 51 | };\r |
| 52 | if ($Options{'b'} and $Options{'l'}) {\r |
| 53 | # drop -l\r |
| 54 | warn ("$MySelf: W: You cannot combine top lists (-b) and levels (-l). Level '-l $Options{'l'}' was ignored.\n");\r |
| 55 | undef($Options{'l'});\r |
| 56 | };\r |
| 57 | # -q/-d don't work with -b or -l\r |
| 58 | warn ("$MySelf: W: Sorting by number of postings (-q) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'q'};\r |
| 59 | warn ("$MySelf: W: Reverse sorting (-d) ignored due to top list mode (-b) / levels (-l).\n") if $Options{'d'};\r |
| 60 | };\r |
| 61 | \r |
| 62 | ### check output type\r |
| 63 | # default output type to 'dump'\r |
| 64 | $Options{'o'} = 'dump' if !$Options{'o'};\r |
| 65 | # fail if more than one newsgroup is combined with 'dumpgroup' type\r |
| 66 | die ("$MySelf: E: You cannot combine newsgroup lists (-n) with more than one group with '-o dumpgroup'!\n") if ($Options{'o'} eq 'dumpgroup' and defined($Options{'n'}) and $Options{'n'} =~ /:|\*/);\r |
| 67 | # accept 'dumpgroup' only with -n\r |
| 68 | if ($Options{'o'} eq 'dumpgroup' and !defined($Options{'n'})) {\r |
| 69 | $Options{'o'} = 'dump';\r |
| 70 | warn ("$MySelf: W: You must submit exactly one newsgroup ('-n news.group') for '-o dumpgroup'. Output type was set to 'dump'.\n");\r |
| 71 | };\r |
| 72 | # set output type to 'pretty' for -l\r |
| 73 | if ($Options{'l'}) {\r |
| 74 | $Options{'o'} = 'pretty';\r |
| 75 | warn ("$MySelf: W: Output type forced to '-o pretty' due to usage of '-l'.\n");\r |
| 76 | };\r |
| 77 | \r |
| 78 | ### get query type, default to 'postings'\r |
| 79 | #die "$MySelf: E: Unknown query type -q $Options{'q'}!\n" if ($Options{'q'} and !exists($LegalTypes{$Options{'q'}}));\r |
| 80 | #die "$MySelf: E: You must submit a threshold ('-t') for query type '-q $Options{'q'}'!\n" if ($Options{'q'} and !$Options{'t'});\r |
| 81 | \r |
| 82 | ### get time period\r |
| 83 | my ($StartMonth,$EndMonth) = &GetTimePeriod($Options{'m'},$Options{'p'});\r |
| 84 | # reset to one month for 'dump' type\r |
| 85 | if ($Options{'o'} eq 'dump' and $Options{'p'}) {\r |
| 86 | $StartMonth = $EndMonth;\r |
| 87 | warn ("$MySelf: W: You cannot combine time periods (-p) with '-o dump'. Month was set to $StartMonth.\n");\r |
| 88 | };\r |
| 89 | \r |
| 90 | ### init database\r |
| 91 | my $DBHandle = InitDB(\%Conf,1);\r |
| 92 | \r |
| 93 | ### get data\r |
| 94 | # get list of newsgroups (-n)\r |
| 95 | my ($QueryPart,@GroupList);\r |
| 96 | my $Newsgroups = $Options{'n'};\r |
| 97 | if ($Newsgroups) {\r |
| 98 | ($QueryPart,@GroupList) = &SQLGroupList($Newsgroups);\r |
| 99 | } else {\r |
| 100 | $QueryPart = 1;\r |
| 101 | };\r |
| 102 | \r |
| 103 | # manage thresholds\r |
| 104 | if (defined($Options{'t'})) {\r |
| 105 | if ($Options{'i'}) {\r |
| 106 | $QueryPart .= ' AND postings < ?';\r |
| 107 | } else {\r |
| 108 | $QueryPart .= ' AND postings > ?';\r |
| 109 | };\r |
| 110 | push @GroupList,$Options{'t'};\r |
| 111 | }\r |
| 112 | \r |
| 113 | # construct WHERE clause\r |
| 114 | my $WhereClause = sprintf('month BETWEEN ? AND ? AND %s %s',$QueryPart,&SQLHierarchies($Options{'s'}));\r |
| 115 | \r |
| 116 | # get lenght of longest newsgroup delivered by query for formatting purposes\r |
| 117 | my $MaxLength = &GetMaxLenght($DBHandle,$Conf{'DBTableGrps'},'newsgroup',$WhereClause,$StartMonth,$EndMonth,@GroupList);\r |
| 118 | \r |
| 119 | my ($OrderClause,$DBQuery);\r |
| 120 | # -b (best of) defined?\r |
| 121 | if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r |
| 122 | $OrderClause = 'newsgroup';\r |
| 123 | $OrderClause = 'postings' if $Options{'q'};\r |
| 124 | $OrderClause .= ' DESC' if $Options{'d'};\r |
| 125 | # do query: get number of postings per group from groups table for given months and newsgroups\r |
| 126 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE %s ORDER BY month,%s",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r |
| 127 | } elsif ($Options{'b'}) {\r |
| 128 | # set sorting order (-i)\r |
| 129 | if ($Options{'i'}) {\r |
| 130 | $OrderClause = 'postings';\r |
| 131 | } else {\r |
| 132 | $OrderClause = 'postings DESC';\r |
| 133 | };\r |
| 134 | # push LIMIT to GroupList to match number of binding vars\r |
| 135 | push @GroupList,$Options{'b'};\r |
| 136 | # do query: get sum of postings per group from groups table for given months and newsgroups with LIMIT\r |
| 137 | $DBQuery = $DBHandle->prepare(sprintf("SELECT newsgroup,SUM(postings) AS postings FROM %s.%s WHERE %s GROUP BY newsgroup ORDER BY %s,newsgroup LIMIT ?",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause));\r |
| 138 | } else { # -l\r |
| 139 | # set sorting order (-i)\r |
| 140 | if ($Options{'i'}) {\r |
| 141 | $OrderClause = '<';\r |
| 142 | } else {\r |
| 143 | $OrderClause = '>';\r |
| 144 | };\r |
| 145 | # push level and $StartMonth,$EndMonth - again - to GroupList to match number of binding vars\r |
| 146 | push @GroupList,$Options{'l'};\r |
| 147 | push @GroupList,$StartMonth,$EndMonth;\r |
| 148 | # do query: get number of postings per group from groups table for given months and \r |
| 149 | $DBQuery = $DBHandle->prepare(sprintf("SELECT month,newsgroup,postings FROM %s.%s WHERE newsgroup IN (SELECT newsgroup FROM %s.%s WHERE %s GROUP BY newsgroup HAVING MAX(postings) %s ?) AND %s ORDER BY newsgroup,month",$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$WhereClause,$OrderClause,$WhereClause));\r |
| 150 | };\r |
| 151 | \r |
| 152 | # execute query\r |
| 153 | $DBQuery->execute($StartMonth,$EndMonth,@GroupList) or die sprintf("$MySelf: E: Can't get groups data for %s to %s from %s.%s: %s\n",$StartMonth,$EndMonth,$Conf{'DBDatabase'},$Conf{'DBTableGrps'},$DBI::errstr);\r |
| 154 | \r |
| 155 | # output result\r |
| 156 | printf ("----- Report from %s to %s\n",$StartMonth,$EndMonth) if $Options{'c'} and ($Options{'m'} or $Options{'p'});\r |
| 157 | printf ("----- Newsgroups: %s\n",join(',',split(/:/,$Newsgroups))) if $Options{'c'} and $Options{'n'};\r |
| 158 | printf ("----- Threshold: %s %u\n",$Options{'i'} ? '<' : '>',$Options{'t'}) if $Options{'c'} and $Options{'t'};\r |
| 159 | if (!defined($Options{'b'}) and !defined($Options{'l'})) {\r |
| 160 | &OutputData($Options{'o'},$DBQuery,$MaxLength);\r |
| 161 | } elsif ($Options{'b'}) {\r |
| 162 | while (my ($Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r |
| 163 | print &FormatOutput($Options{'o'}, ($Options{'i'} ? 'Bottom ' : 'Top ').$Options{'b'}, $Newsgroup, $Postings, $MaxLength);\r |
| 164 | };\r |
| 165 | } else { # -l\r |
| 166 | while (my ($Month,$Newsgroup,$Postings) = $DBQuery->fetchrow_array) {\r |
| 167 | print &FormatOutput($Options{'o'}, $Newsgroup, $Month, $Postings, 7);\r |
| 168 | };\r |
| 169 | };\r |
| 170 | \r |
| 171 | ### close handles\r |
| 172 | $DBHandle->disconnect;\r |
| 173 | \r |