summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/contrib/status.pl
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib/status.pl')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/status.pl258
1 files changed, 258 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/status.pl b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
new file mode 100755
index 00000000..25ddeda8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
@@ -0,0 +1,258 @@
+#!/usr/local/bin/perl
+#
+# status.pl v1.0 960413 Iain Lea (iain@sbs.de)
+#
+# ChangeLog
+# 960413 IL
+#
+# Produces a HTML 'Search Engine Status' page with last 5 runs
+# and 'Top 10' servers by #URLS indexed.
+#
+# Usage: status.pl [options]
+# -h help
+# -F file HTML footer
+# -H file HTML header
+# -o file HTML generated file
+# -v verbose
+#
+# TODO
+
+require 'timelocal.pl';
+require 'getopts.pl';
+require '/www/search.sbs.de/bin/sbs.pl';
+
+$DataDir = '/www/search.sbs.de/data/robot';
+$RunTimeFile = "$DataDir/current-runtime";
+$RobotFile = "$DataDir/current-robot";
+$IndexFile = '/www/search.sbs.de/test/db/db.wordlist';
+
+$DefOutputFile = '/www/search.sbs.de/test/pub/status.html';
+$TmpFile = "/tmp/status.$$";
+$DefFooter = '';
+$DefHeader = '';
+$Verbose = 0;
+$Top10Servers = 10;
+
+&ParseCmdLine;
+
+print "Generating status.html...\n" if $Verbose;
+
+&ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile);
+&WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter);
+
+exit 1;
+
+#############################################################################
+# Subroutines
+#
+
+sub ParseCmdLine
+{
+ &Getopts ('F:hH:o:v');
+
+ if ($opt_h ne "") {
+ print <<EndOfHelp
+Produce an HTML 'Status' page of last 5 runs and Top 10 servers by #URLS.
+
+Usage: $0 [options]
+ -h help
+ -F file HTML footer
+ -H file HTML header
+ -o file HTML generated file
+ -v verbose
+
+EndOfHelp
+;
+ exit 0;
+ }
+ $DefFooter = $opt_F if ($opt_H ne "");
+ $DefHeader = $opt_H if ($opt_H ne "");
+ $DefOutputFile = $opt_o if ($opt_o ne "");
+ $Verbose = 1 if ($opt_v ne "");
+}
+
+sub ReadDataFiles
+{
+ my ($RunTimeFile, $RobotFile, $IndexFile) = @_;
+ my ($IndexSize, $NumWords, $NumURLS, $NumServers);
+ my ($BegTime, $EndTime, $RunDate, $RunTime, $Key);
+ my (%Months) = (
+ 'Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May', '4', 'Jun', '5',
+ 'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', 'Dec', '11' );
+
+ # RunDate : RunTime
+
+ open (TIME, "$RunTimeFile") || die "Error: $RunTimeFile - $!\n";
+ while (<TIME>) {
+ chop;
+ if (! $EndTime && $BegTime) {
+ # Sat Apr 13 12:57:52 MET DST 1996
+ /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+ $EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900);
+ $RunTime = $EndTime - $BegTime;
+ $RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60);
+ print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose;
+ }
+ if (! $BegTime) {
+ # Sat Apr 13 12:57:52 MET DST 1996
+ /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+ $Mon = $Months{$1};
+ $Year = $6 - 1900;
+ $BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year);
+ $RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2);
+ print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose;
+ }
+ }
+ close (TIME);
+
+ # IndexSize : NumWords : NumURLS : NumServers
+
+ @StatData = stat ($IndexFile);
+ $IndexSize = $StatData[7];
+ print "SIZE=[$IndexSize]\n" if $Verbose;
+
+ # NumWords : NumURLS : NumServers
+
+ $NumWords = $NumURLS = $NumServers = 0;
+
+ open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n";
+ while (<ROBOT>) {
+ if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) {
+ $NumURLS += $2;
+ $NumServers++;
+ if ($2 > 0) {
+ $Key = sprintf ("%07d|%s", $2, $1);
+ $Top10ByName{$Key} = $2;
+ }
+ print "SERVER=[$1] DOCS=[$2]\n" if $Verbose;
+ } elsif (/^Read\s+([0-9]*)\s+words$/) {
+ $NumWords = $1;
+ print "WORDS=[$NumWords]\n" if $Verbose;
+ }
+ }
+ close (ROBOT);
+
+ # Write data to YYMMDD-info file
+
+ $InfoFile = "$DataDir/$RunDate-info";
+ $CurrFile = "$DataDir/current-info";
+
+ open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n";
+ print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose;
+ print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n";
+ close (INFO);
+ unlink ($CurrFile);
+ symlink ($InfoFile, $CurrFile);
+}
+
+sub WriteStatus
+{
+ my ($DataDir, $OutFile, $Header, $Footer) = @_;
+
+ $RobotInfo = &ReadRobotInfo ("$DataDir/current-info");
+
+ open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
+
+ &PrintBoilerPlate ($Header, 1);
+
+ print HTML <<EOT
+<p>
+<strong>$RobotInfo</strong>
+<p>
+<table border=2 width=400>
+<caption>Table of last 5 robot runs.</caption>
+<th>Run Date<th>Run Time<th># Servers<th># URL's<th># Words<th>Index (MB)
+<tr>
+EOT
+;
+ # read YYMMDD-info files
+ opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n";
+ @InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR));
+ closedir (DIR);
+ @InfoFiles = reverse (sort (@InfoFiles));
+
+ @InfoFiles = @InfoFiles[0,1,2,3,4];
+ foreach $File (@InfoFiles) {
+ $File = "$DataDir/$File";
+ open (INFO, "$File") || die "Error: $File - $!\n";
+ chop (($_ = <INFO>));
+ ($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':');
+ $IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024));
+ $RunTime =~ /(..)(..)/;
+ $RunTime = "$1:$2";
+ print HTML <<EOT
+<td align="center">$RunDate</td>
+<td align="center">$RunTime</td>
+<td align="right">$NumServers</td>
+<td align="right">$NumURLS</td>
+<td align="right">$NumWords</td>
+<td align="right">$IndexSize</td>
+<tr>
+EOT
+;
+ close (INFO);
+ }
+
+ print HTML <<EOT
+</table>
+<p>
+<p>
+<table border=2 width=400>
+<caption>Table of Top 10 servers listed by number of indexed documents.</caption>
+<th>Top 10 Servers<th># URL's
+<tr>
+EOT
+;
+ $NumServers = 0;
+ foreach $Key (reverse (sort (keys (%Top10ByName)))) {
+ if ($NumServers < $Top10Servers) {
+ $NumServers++;
+ $NumURLS = $Top10ByName{$Key};
+ $Key =~ /^[0-9]*\|(.*)$/;
+ $Server = $1;
+ $Server =~ s/:80$//;
+ print HTML <<EOT
+<td width="80%" align="left"><a href="http://$Server/">$Server</a></td>
+<td width="20%" align="right">$NumURLS</td>
+<tr>
+EOT
+;
+ }
+ }
+
+ print HTML "</table>\n";
+
+ &PrintBoilerPlate ($Footer, 0);
+
+ close (HTML);
+}
+
+sub PrintBoilerPlate
+{
+ my ($File, $IsHeader) = @_;
+
+ if ($File ne "" && -e $File) {
+ open (FILE, $File) || die "Error: $File - $!\n";
+ while (<FILE>) {
+ print HTML;
+ }
+ close (FILE);
+ } else {
+ if ($IsHeader) {
+ print HTML <<EOT
+<html>
+<head>
+<title>Search Engine Status</title>
+</head>
+<body>
+<h2>Search Engine Status</h2>
+<hr>
+<p>
+EOT
+;
+ } else {
+ &PrintFooterHTML;
+ }
+ }
+}
+