summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib')
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/README34
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/acroconv.pl93
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/autorun/README16
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/autorun/autorun46
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl298
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl214
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS399
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/doc2html/README25
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg413
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl676
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty40
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl161
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl67
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl183
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl201
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README3
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi118
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi125
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/examples/badwords349
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh96
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/examples/updatedig53
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/handler.pl45
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec184
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README38
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c197
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc72
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile58
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/README133
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect49
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls37
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf26
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect99
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig93
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf32
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect39
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db39
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl238
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING340
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile11
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README16
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h257
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h257
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c910
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh23
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/README16
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml17
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html53
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE2
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html2
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html22
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html6
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html30
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html1
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html27
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/status.pl258
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl285
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl365
-rw-r--r--debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html16
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl54
59 files changed, 7957 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/README b/debian/htdig/htdig-3.2.0b6/contrib/README
new file mode 100644
index 00000000..d7c57ea3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/README
@@ -0,0 +1,34 @@
+ht://Dig contributed scripts
+
+This directory tree contains perl and shell programs that attempt to
+do things with the generated databases. Most of these were written
+for a very specific purpose for the specific version of ht://Dig that
+was current at that point. This means that some of these programs
+will be severely broken! Do not expect them to work; use them only as
+examples of the types of things you can do with the ht://Dig
+databases.
+
+More contributed work is available on the ht://Dig website:
+<http://www.htdig.org/contrib/>
+
+What's here:
+
+acroconv.pl An external converter script that uses acroread to parse PDFs
+autorun An example of automating the database building
+changehost A script to change hostnames of URLs in the databases
+conv_doc.pl A sample script to use the conversion features of external_parsers
+doclist List the information in the doc db (or after a certain date)
+ewswrap Two sample htsearch wrappers to emulate Excite for Web
+ Servers (EWS) and to simplify queries
+handler.pl A sample external_protocols script to handle HTTP/HTTPS using curl
+htparsedoc A sample shell script to parse Word documents
+multidig A set of scripts to simplify updating multiple databases
+parse_doc.pl A general external parser script that handles MS Word documents
+ (among others)
+run-robot.sh Another example of automating the database building
+scriptname An example of using htsearch within dynamic SSI pages
+status.pl Build a status page of last 5 runs and top 10
+ servers (by # URLs)
+urlindex Build an index of all the URLs in the database
+whatsnew Build a "what's new" page with custom header and footer
+wordfreq Build a list of words and frequency in the database
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl b/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl
new file mode 100755
index 00000000..ad7d4d79
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl
@@ -0,0 +1,93 @@
+#!/usr/local/bin/perl
+#
+# Sample external converter for htdig 3.1.4 or later, to convert PDFs
+# using Adobe Acrobat 3's acroread -toPostScript option on UNIX systems.
+# (Use it in place of conv_doc.pl if you have acroread but not pdftotext.)
+# Written by Gilles Detillieux.
+#
+# Usage: (in htdig.conf)
+#
+# external_parsers: application/pdf->text/html /usr/local/bin/acroconv.pl
+#
+# This is a pretty quick and dirty implementation, but it does seem to
+# give functionality equivalent to the now defunct htdig/PDF.cc parser.
+# I'm not a Perl expert by any stretch of the imagination, so the code
+# could probably use a lot of optimization to make it work better.
+#
+
+$watch = 0;
+$bigspace = 0;
+$putspace = 0;
+$putbody = 1;
+
+system("ln $ARGV[0] $ARGV[0].pdf; acroread -toPostScript $ARGV[0].pdf");
+open(INP, "< $ARGV[0].ps") || die "Can't open $ARGV[0].ps\n";
+
+print "<HTML>\n<head>\n";
+while (<INP>) {
+ if (/^%%Title: / && $putbody) {
+ s/^%%Title: \((.*)\).*\n/$1/;
+ s/\\222/'/g;
+ s/\\267/*/g;
+ s/\\336/fi/g;
+ s/\\([0-7]{3})/pack(C, oct($1))/eig;
+ s/\\([0-7]{2})/pack(C, oct($1))/eig;
+ s/\\([0-7])/pack(C, oct($1))/eig;
+ s/\\[nrtbf]/ /g;
+ s/\\(.)/$1/g;
+ s/&/\&amp\;/g;
+ s/</\&lt\;/g;
+ s/>/\&gt\;/g;
+ print "<title>$_</title>\n";
+ print "</head>\n<body>\n";
+ $putbody = 0;
+ } elsif (/^BT/) {
+ $watch = 1;
+ } elsif (/^ET/) {
+ $watch = 0;
+ if ($putspace) {
+ print "\n";
+ $putspace = 0;
+ }
+ } elsif ($watch) {
+ if (/T[Jj]$/) {
+ s/\)[^(]*\(//g;
+ s/^[^(]*\((.*)\).*\n/$1/;
+ s/\\222/'/g;
+ s/\\267/*/g;
+ s/\\336/fi/g;
+ s/\\([0-7]{3})/pack(C, oct($1))/eig;
+ s/\\([0-7]{2})/pack(C, oct($1))/eig;
+ s/\\([0-7])/pack(C, oct($1))/eig;
+ s/\\[nrtbf]/ /g;
+ s/\\(.)/$1/g;
+ if ($bigspace) {
+ s/(.)/$1 /g;
+ }
+ s/&/\&amp\;/g;
+ s/</\&lt\;/g;
+ s/>/\&gt\;/g;
+ if ($putbody) {
+ print "</head>\n<body>\n";
+ $putbody = 0;
+ }
+ print "$_";
+ $putspace = 1;
+ } elsif (/T[Ddm*]$/ && $putspace) {
+ print "\n";
+ $putspace = 0;
+ } elsif (/Tc$/) {
+ $bigspace = 0;
+ if (/^([3-9]|[1-9][0-9]+)\..*Tc$/) {
+ $bigspace = 1;
+ }
+ }
+ }
+}
+if ($putbody) {
+ print "</head>\n<body>\n";
+}
+print "</body>\n</HTML>\n";
+
+close(INP);
+system("rm -f $ARGV[0].pdf $ARGV[0].ps");
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/autorun/README b/debian/htdig/htdig-3.2.0b6/contrib/autorun/README
new file mode 100644
index 00000000..44686879
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/autorun/README
@@ -0,0 +1,16 @@
+README for autorun.
+
+The autorun program is an attempt at automatic the steps
+needed to build a complete search database.
+
+If the search domain is not too big, this can be run on a
+daily (nightly) basis.
+
+
+Usage:
+ autorun
+
+Configuration:
+ Edit the autorun script and change things to your
+ liking...
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun b/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun
new file mode 100755
index 00000000..6014073a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+##
+## Configurable variables
+##
+
+
+##
+## Specify the location of the htdig and htmerge binaries
+##
+htbin=/opt/www/bin
+
+##
+## Specify the configuration file to use for digging and merging
+##
+conffile=/opt/www/htdig/sdsu.conf
+
+##
+## Specify the location where the temporary database is
+##
+source=/tmp
+
+##
+## Specify the location of the target search database
+##
+target=/gopher/www/htdig
+
+##
+## Specify the host of the target search database
+##
+search_host=athena
+
+##
+## Specify how to copy the new database to the location
+## where the search engine can get at it.
+##
+docopy() {
+ rcp $source/*.docdb $source/*.docs.index $source/*.words.gdbm ${search_host}:$target
+}
+
+
+$htbin/htdig -i -c $conffile
+$htbin/htmerge -c $conffile
+$htbin/htnotify -vv -c $conffile
+
+docopy
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
new file mode 100755
index 00000000..3bd6c44d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
@@ -0,0 +1,298 @@
+#!/usr/local/bin/perl
+
+##
+## changehost.pl (C) 1995 Andrew Scherpbier
+##
+## This program will change hostnames of URLs in the document database and index.
+##
+## usage:
+## changehost.pl database_base from to
+##
+## example:
+## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net
+##
+## Two new database will be created with a base of '/tmp/new'.
+## These databases can then be used by htsearch.
+##
+
+use GDBM_File;
+
+$base = $ARGV[0];
+$from = $ARGV[1];
+$to = $ARGV[2];
+
+$dbfile = "$base.docdb";
+$newfile = "/tmp/new.docdb";
+
+##
+## Convert the document database first.
+##
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+
+while (($key, $value) = each %docdb)
+{
+ if ($key =~ /http:\/\/$from/i)
+ {
+ %record = parse_ref_record($value);
+ $key =~ s/http:\/\/$from/http:\/\/$to/i;
+ print "$key\n";
+ $t = $record{"URL"};
+ $t =~ s/http:\/\/$from/http:\/\/$to/i;
+ $record{"URL"} = $t;
+
+ $value = create_ref_record(%record);
+ }
+
+ $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+##
+## Now create the document index
+##
+$newfile = "/tmp/new.docs.index";
+$dbfile = "$base.docs.index";
+
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+while (($key, $value) = each %docdb)
+{
+ if ($value =~ /http:\/\/$from/i)
+ {
+ $value =~ s/http:\/\/$from/http:\/\/$to/i;
+ }
+ $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+######################################################################
+sub create_ref_record
+{
+ local(%rec) = @_;
+ local($s);
+
+ if (exists $rec{"ID"})
+ {
+ $s .= pack("Ci", 0, $rec{"ID"});
+ }
+ if (exists $rec{"TIME"})
+ {
+ $s .= pack("Ci", 1, $rec{"TIME"});
+ }
+ if (exists $rec{"ACCESSED"})
+ {
+ $s .= pack("Ci", 2, $rec{"ACCESSED"});
+ }
+ if (exists $rec{"STATE"})
+ {
+ $s .= pack("Ci", 3, $rec{"STATE"});
+ }
+ if (exists $rec{"SIZE"})
+ {
+ $s .= pack("Ci", 4, $rec{"SIZE"});
+ }
+ if (exists $rec{"LINKS"})
+ {
+ $s .= pack("Ci", 5, $rec{"LINKS"});
+ }
+ if (exists $rec{"IMAGESIZE"})
+ {
+ $s .= pack("Ci", 6, $rec{"IMAGESIZE"});
+ }
+ if (exists $rec{"HOPCOUNT"})
+ {
+ $s .= pack("Ci", 7, $rec{"HOPCOUNT"});
+ }
+ if (exists $rec{"URL"})
+ {
+ $s .= pack("Ci", 8, length($rec{"URL"}));
+ $s .= $rec{"URL"};
+ }
+ if (exists $rec{"HEAD"})
+ {
+ $s .= pack("Ci", 9, length($rec{"HEAD"}));
+ $s .= $rec{"HEAD"};
+ }
+ if (exists $rec{"TITLE"})
+ {
+ $s .= pack("Ci", 10, length($rec{"TITLE"}));
+ $s .= $rec{"TITLE"};
+ }
+ if (exists $rec{"DESCRIPTIONS"})
+ {
+ @v = split('', $rec{"DESCRIPTIONS"});
+ $s .= pack("Ci", 11, $#v - 1);
+ foreach (@v)
+ {
+ $s .= pack("i", length($_));
+ $s .= $_;
+ }
+ }
+ if (exists $rec{"ANCHORS"})
+ {
+ @v = split('', $rec{"ANCHORS"});
+ $s .= pack("Ci", 12, $#v - 1);
+ foreach (@v)
+ {
+ $s .= pack("i", length($_));
+ $s .= $_;
+ }
+ }
+ if (exists $rec{"EMAIL"})
+ {
+ $s .= pack("Ci", 13, length($rec{"EMAIL"}));
+ $s .= $rec{"EMAIL"};
+ }
+ if (exists $rec{"NOTIFICATION"})
+ {
+ $s .= pack("Ci", 14, length($rec{"NOTIFICATION"}));
+ $s .= $rec{"NOTIFICATION"};
+ }
+ if (exists $rec{"SUBJECT"})
+ {
+ $s .= pack("Ci", 15, length($rec{"SUBJECT"}));
+ $s .= $rec{"SUBJECT"};
+ }
+
+ return $s;
+}
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ }
+ return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl b/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl
new file mode 100755
index 00000000..78d8a985
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl
@@ -0,0 +1,214 @@
+#!/usr/local/bin/perl
+
+#
+# Sample external converter for htdig 3.1.4 or later.
+# Usage: (in htdig.conf)
+#
+# external_parsers: application/msword->text/html /usr/local/bin/conv_doc.pl \
+# application/postscript->text/html /usr/local/bin/conv_doc.pl \
+# application/pdf->text/html /usr/local/bin/conv_doc.pl
+#
+# Written by Gilles Detillieux <grdetil@scrc.umanitoba.ca>.
+# Based in part on the parse_word_doc.pl script, written by
+# Jesse op den Brouw <MSQL_User@st.hhs.nl> but heavily revised.
+#
+# 1998/12/11
+# Added: catdoc test (is catdoc runnable?) <carl@dpiwe.tas.gov.au>
+# 1999/02/09
+# Added: uses ps2ascii to handle PS files <grdetil@scrc.umanitoba.ca>
+# 1999/02/15
+# Added: check for some file formats <Frank.Richter@hrz.tu-chemnitz.de>
+# 1999/02/25
+# Added: uses pdftotext to handle PDF files <grdetil@scrc.umanitoba.ca>
+# 1999/03/01
+# Added: extra checks for file "wrappers" <grdetil@scrc.umanitoba.ca>
+# & check for MS Word signature (no longer defaults to catdoc)
+# 1999/03/05
+# Changed: rejoin hyphenated words across lines <grdetil@scrc.umanitoba.ca>
+# (in PDFs)
+# 1999/08/12
+# Changed: adapted for xpdf 0.90 release <grdetil@scrc.umanitoba.ca>
+# Added: uses pdfinfo to handle PDF titles <grdetil@scrc.umanitoba.ca>
+# Changed: change dashes to hyphens <grdetil@scrc.umanitoba.ca>
+# 1999/09/09
+# Changed: fix to handle empty PDF title right <grdetil@scrc.umanitoba.ca>
+# 1999/12/01
+# Changed: rewritten as external converter <grdetil@scrc.umanitoba.ca>
+# stripped out all parser-related code
+# Added: test to silently ignore wrapped EPS files < " >
+# Added: test for null device on Win32 env. <PBISSET@emergency.qld.gov.au>
+# 2000/01/12
+# Changed: "break" to "last" (no break in Perl) <wjones@tc.fluke.com>
+# 2001/07/12
+# Changed: fix "last" handling in dehyphenation <grdetil@scrc.umanitoba.ca>
+# Added: handle %xx codes in title from URL <grdetil@scrc.umanitoba.ca>
+#########################################
+#
+# set this to your MS Word to text converter
+# get it from: http://www.fe.msk.ru/~vitus/catdoc/
+#
+$CATDOC = "/usr/local/bin/catdoc";
+#
+# set this to your WordPerfect to text converter, or /bin/true if none available
+# this nabs WP documents with .doc suffix, so catdoc doesn't see them
+#
+$CATWP = "/bin/true";
+#
+# set this to your RTF to text converter, or /bin/true if none available
+# this nabs RTF documents with .doc suffix, so catdoc doesn't see them
+#
+$CATRTF = "/bin/true";
+#
+# set this to your PostScript to text converter
+# get it from the ghostscript 3.33 (or later) package
+#
+$CATPS = "/usr/bin/ps2ascii";
+#
+# set this to your PDF to text converter, and pdfinfo tool
+# get it from the xpdf 0.90 package at http://www.foolabs.com/xpdf/
+#
+$CATPDF = "/usr/bin/pdftotext";
+$PDFINFO = "/usr/bin/pdfinfo";
+#$CATPDF = "/usr/local/bin/pdftotext";
+#$PDFINFO = "/usr/local/bin/pdfinfo";
+
+#########################################
+#
+# need some var's
+$dehyphenate = 0; # set if we must dehyphenate text output
+$ishtml = 0; # set if converter produces HTML
+$null = "";
+$magic = "";
+$type = "";
+$cvtr = "";
+$cvtcmd = "";
+$title = "";
+@parts = ();
+
+# make portable to win32 platform or unix
+$null = "/dev/null";
+if ($^O eq "MSWin32") {$null = "nul";}
+
+
+#########################################
+#
+# Read first bytes of file to check for file type (like file(1) does)
+open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+read FILE,$magic,8;
+close FILE;
+
+if ($magic =~ /^\0\n/) { # possible MacBinary header
+ open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+ read FILE,$magic,136; # let's hope converters can handle them!
+ close FILE;
+}
+
+if ($magic =~ /%!|^\033%-12345/) { # it's PostScript (or HP print job)
+ $cvtr = $CATPS; # gs 3.33 leaves _temp_.??? files in .
+# keep quiet even if PS gives errors...
+ $cvtcmd = "(cd /tmp; $cvtr; rm -f _temp_.???) < $ARGV[0] 2>$null";
+# allow PS interpreter to give error messages...
+# $cvtcmd = "(cd /tmp; $cvtr; rm -f _temp_.???) < $ARGV[0]";
+ $type = "PostScript";
+ $dehyphenate = 0; # ps2ascii already does this
+ if ($magic =~ /^\033%-12345/) { # HP print job
+ open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+ read FILE,$magic,256;
+ close FILE;
+ exit unless $magic =~ /^\033%-12345X\@PJL.*\n*.*\n*.*ENTER\s*LANGUAGE\s*=\s*POSTSCRIPT.*\n*.*\n*.*\n%!/
+ }
+} elsif ($magic =~ /\305\320\323\306\036/) { # it's a wrapped EPS - ignore
+ exit
+} elsif ($magic =~ /%PDF-/) { # it's PDF (Acrobat)
+ $cvtr = $CATPDF;
+ $cvtcmd = "$cvtr -raw $ARGV[0] -";
+# to handle single-column, strangely laid out PDFs, use coalescing feature...
+# $cvtcmd = "$cvtr $ARGV[0] -";
+ $type = "PDF";
+ $dehyphenate = 1; # PDFs often have hyphenated lines
+ if (open(INFO, "$PDFINFO $ARGV[0] 2>$null |")) {
+ while (<INFO>) {
+ if (/^Title:/) {
+ s/^Title:\s+//;
+ s/\s+$//;
+ s/\s+/ /g;
+ s/&/\&amp\;/g;
+ s/</\&lt\;/g;
+ s/>/\&gt\;/g;
+ $title = $_;
+ last;
+ }
+ }
+ close INFO;
+ }
+# to use coalescing feature conditionally...
+# if ($title =~ /...Title of Corel DRAW output.../) {
+# $cvtcmd = "$cvtr $ARGV[0] -";
+# }
+} elsif ($magic =~ /WPC/) { # it's WordPerfect
+ $cvtr = $CATWP;
+ $cvtcmd = "$cvtr $ARGV[0]";
+ $type = "WordPerfect";
+ $dehyphenate = 0; # WP documents not likely hyphenated
+} elsif ($magic =~ /^{\\rtf/) { # it's Richtext
+ $cvtr = $CATRTF;
+ $cvtcmd = "$cvtr $ARGV[0]";
+ $type = "RTF";
+ $dehyphenate = 0; # RTF documents not likely hyphenated
+} elsif ($magic =~ /\320\317\021\340/) { # it's MS Word
+ $cvtr = $CATDOC;
+ $cvtcmd = "$cvtr -a -w $ARGV[0]";
+ $type = "Word";
+ $dehyphenate = 0; # Word documents not likely hyphenated
+} else {
+ die "Can't determine type of file $ARGV[0]; content-type: $ARGV[1]; URL: $ARGV[2]\n";
+}
+
+die "$cvtr is absent or unwilling to execute.\n" unless -x $cvtr;
+
+#############################################
+#
+# Start output.
+
+# if running as a converter for "user-defined" output type...
+#print "Content-Type: text/html\n\n";
+
+if ($ishtml) {
+ # converter will give its own HTML output
+ system("$cvtcmd") || die "$cvtr doesn't want to be run from shell.\n";
+ exit;
+}
+
+# Produce HTML output from converter's text output, so we can add title.
+print "<HTML>\n<head>\n";
+
+# print out the title, if it's set, and not just a file name, or make one up
+if ($title eq "" || $title =~ /^[A-G]:[^\s]+\.[Pp][Dd][Ff]$/) {
+ @parts = split(/\//, $ARGV[2]); # get the file basename
+ $parts[-1] =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+ $title = "$type Document $parts[-1]"; # use it in title
+}
+print "<title>$title</title>\n";
+
+print "</head>\n<body>\n";
+
+# Open file via selected converter, output its text.
+open(CAT, "$cvtcmd |") || die "$cvtr doesn't want to be opened using pipe.\n";
+while (<CAT>) {
+ while (/[A-Za-z\300-\377]-\s*$/ && $dehyphenate) {
+ $_ .= <CAT>;
+ last if eof;
+ s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s
+ }
+ s/[\255]/-/g; # replace dashes with hyphens
+ s/\f/\n/g; # replace form feed
+ s/&/\&amp\;/g; # HTMLify text
+ s/</\&lt\;/g;
+ s/>/\&gt\;/g;
+ print;
+}
+
+print "</body>\n</HTML>\n";
+
+close CAT;
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS
new file mode 100644
index 00000000..35300c03
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS
@@ -0,0 +1,399 @@
+INTRODUCTION
+============
+
+This DETAILS file accompanies doc2html version 3.0.1.
+
+Read this file for instructions on the installation and use of the
+doc2html scripts.
+
+The set of files is:
+
+ DETAILS - this file
+ doc2html.pl - the main Perl script
+ doc2html.cfg - configuration file for use with wp2html
+ doc2html.sty - style file for use with wp2html
+ pdf2html.pl - Perl script for converting PDF files to HTML
+ swf2html.pl - Perl script for extracting links from Shockwave flash files.
+ README - brief description
+
+doc2html.pl is a Perl5 script for use as an external converter with
+htdig 3.1.4 or later. It takes as input the name of a file containing a
+document in a number of possible formats and its MIME type. It uses
+the appropriate conversion utility to convert it to HTML on standard
+output.
+
+doc2html.pl was designed to be easily adapted to use whatever conversion
+utilities are available, and although it has been written around the
+"wp2html" utility, it does not require wp2html to function.
+
+NOTE: version 3.0.1 has only been tested on Unix.
+
+pdf2html.pl is a Perl script which uses a pair of utilities (pdfinfo and
+pdf2text) to extract information and text from an Adobe PDF file and
+write HTML output. It can be called directly from htdig, but you are
+recommended to call it via doc2html.pl.
+
+swf2html.pl is a Perl script which calls a utility (swfparse) and
+outputs HTML containing links to the URL's found in a Shockwave flash
+file. It can be called directly from htdig, but you are recommended to
+call it via doc2html.pl.
+
+
+ABOUT DOC2HTML.PL
+=================
+
+doc2html.pl is essentially a wrapper script, and is itself only capable
+of reading plain text files. It requires the utility programs described
+below to work properly.
+
+doc2html.pl was written by David Adams <d.j.adams@soton.ac.uk>, it is
+based on conv_doc.pl written by Gilles Detillieux <grdetil@scrc.umanitoba.ca>.
+This in turn was based on the parse_word_doc.pl script, written by
+Jesse op den Brouw <MSQL_User@st.hhs.nl>.
+
+doc2html.pl makes up to three attempts to read a file. It first tries
+utilities which convert directly into HTML. If one is not found, or no
+output is produced, it then tries utilities which output plain text. If
+none is found, and the file is not of a type known to be unconvertable,
+then doc2html.pl attempts to read the file itself, stripping out any
+control characters.
+
+doc2html.pl is written to be flexible and easy to adapt to whatever
+conversion utilites are available. New conversion utilities may be
+added simply by making additions to routine 'store_methods', with no
+other changes being necessary. The existing lines in store_methods
+should provide sufficient examples on how to add more converters. Note
+that converters which produce HTML are entered differently to those that
+produce plain text.
+
+htdig provides three arguments which are read by doc2html.pl:
+
+1) the name of a temporary file containing a copy of the
+ document to be converted.
+
+2) the MIME type of the document.
+
+3) the URL of the document (which is used in generating the
+ title in the output).
+
+The test for document type uses both the MIME-type passed as second
+argument and the "Magic number" of the file.
+
+
+INSTALLATION
+============
+
+Installation requires that you acquire, compile and install the utilities
+you need to do the conversions. Those already setup in the Perl scripts are
+described below.
+
+If you don't have Perl module Sys::AlarmCall installed, then consider
+installing it, see section "TIMEOUT" below.
+
+You may need to change the first line of each script to the location of
+Perl on your system.
+
+Edit doc2html.pl to include the full pathname of each utility you have
+installed. For example:
+
+my $WP2HTML = '/opt/local/wp2html-3.2/bin/wp2html';
+
+If you don't have a particular utility then leave its location as a null
+string.
+
+Then place doc2html.pl and the other scripts where htdig can access them.
+
+If you are going to convert PDF files then you will need to edit pdf2html.pl
+and include its full path name in doc2html.pl.
+
+If you are going to extract links from Shockwave flash files then you will
+need to edit swf2html.pl and include its full path name in doc2html.pl.
+
+Edit the htdig.conf configuration file to use the script, as in this example:
+
+external_parsers: application/rtf->text/html /usr/local/scripts/doc2html.pl \
+ text/rtf->text/html /usr/local/scripts/doc2html.pl \
+ application/pdf->text/html /usr/local/scripts/doc2html.pl \
+ application/postscript->text/html /usr/local/scripts/doc2html.pl \
+ application/msword->text/html /usr/local/scripts/doc2html.pl \
+ application/Wordperfect5.1->text/html /usr/local/scripts/doc2html.pl \
+ application/msexcel->text/html /usr/local/scripts/doc2html.pl \
+ application/vnd.ms-excel->text/html /usr/local/scripts/doc2html.pl \
+ application/vnd.ms-powerpoint->text/html /usr/local/scripts/doc2html.pl \
+ application/x-shockwave-flash->text/html /usr/local/scripts/doc2html.pl \
+ application/x-shockwave-flash2-preview->text/html /usr/local/scripts/doc2html.pl
+
+If you are using wp2html then place the files doc2html.cfg and doc2html.sty in the
+wp2html library directory.
+
+
+UTILITY WP2HTML
+===============
+
+Obtain wp2html from http://www.res.bbsrc.ac.uk/wp2html/
+
+Note that wp2html is not free; its author charges a small fee for
+"registration". Various pre-compiled versions and the source code are
+available, together with extensive documentation. Upgrades are
+available at no further charge.
+
+wp2html converts WordPerfect documents (5.1 and later) to HTML.
+Versions 3.2 and later will also convert Word7 and Word97 documents to
+HTML. A feature of wp2html which doc2html.pl exploits is that the -q
+option will result in either good HTML or no output at all.
+
+wp2html is very flexible in the output it creates. The two files,
+doc2html.cfg and doc2html.sty, should be placed in the wp2html library
+directory along with the .cfg and .sty files supplied with wp2html.
+
+Edit the line in doc2html.pl:
+
+my $WP2HTML = '';
+
+to set $WP2HTML to the full pathname of wp2html.
+
+wp2html will look for the title in a document, and if it is found then
+output it in <TITLE>....</TITLE> markup. If a title is not found
+then it defaults to the file name in square brackets.
+
+If wp2html is unable to convert a document, or is not installed,
+then doc2html.pl can use the "catdoc" or "catwpd" utilities instead.
+
+
+UTILITY CATDOC
+==============
+
+Obtain catdoc from http://www.ice.ru/~vitus/catdoc/, it is available
+under the terms of the Gnu Public License.
+
+Edit the line in doc2html.pl:
+
+my $CATDOC = '';
+
+to set the variables to the full pathname of catdoc. You might want
+to use a different version of catdoc for Word2 documents or for MAC Word
+files.
+
+catdoc converts MS Word6, Word7, etc., documents to plain text. The
+latest beta version is also able to convert Word2 documents. catdoc
+also produces a certaint amount of "garbage" as well as the text of the
+document. The -b option improves the likelihood that catdoc will
+extract all the text from the document, but at the expense of increasing
+the garbage as well. doc2html.pl removes some non-printing characters
+to minimise the garbage. If a later version of catdoc than 0.91.4 is
+obtained then the use of the -b option should be reviewed.
+
+
+UTILITY CATWPD
+==============
+
+Obtain catwpd from the contribs section of the Ht://Dig web site where
+you obtained doc2html. It extracts words from some versions of WordPerfect
+files. You won't need it if you buy the superior wp2html.
+
+If you do use it, then edit the line in doc2html.pl:
+
+my $CATWPD = '';
+
+to set the variables to the full pathname of catwpd.
+
+
+UTILITY PPTHTML
+===============
+
+obtain ppthtml from http://www.xlhtml.org, where it is bundled in with
+xlhtml.
+
+In doc2html.pl, edit the line:
+
+my $PPT2HTML = '';
+
+to set $PPT2HTML to the full pathname of ppthtml.
+
+ppthtml converts Microsoft Powerpoint files into HTML. It uses the input
+filename as the title. doc2html.pl replaces this with the original
+filename from the URL in square brackets.
+
+
+UTILITY XLHTML
+==============
+
+Obtain xlhtml from http://www.xlhtml.org
+
+In doc2html.pl, edit the line:
+
+my $XLS2HTML = '';
+
+to set $XLS2HTML to the full pathname of xlhtml.
+
+xlhtml converts Microsoft Excel spreadsheets into HTML. It uses the input
+filename as the title. doc2html.pl replaces this with the original
+filename from the URL in square brackets.
+
+The present version of xlHtml (0.4) writes HTML output, but does not
+mark up hyperlinks in .xls files as links in its output.
+
+An alternative to xlHtml is xls2csv, see below.
+
+
+UTILITY RTF2HTML
+================
+
+Obtain rtf2html from http://www.ice.ru/~vitus/catdoc/
+
+In doc2html.pl, edit the line:
+
+my $RTF2HTML = '';
+
+to set $RTF2HTML to the full pathname of rtf2html.
+
+rtf2html converts Rich Text Font documents into HTML. It uses the input
+filename as the title, doc2html.pl replaces this with the original
+filename from the URL within square brackets.
+
+
+UTILITY PS2ASCII
+================
+
+Ps2ascii is a PostScript to text converter.
+
+In doc2html.pl, edit the line:
+
+my $CATPS = '';
+
+to the correct full pathname of ps2ascii.
+
+ps2ascii comes with ghostscript 3.33 (or later) package, which is
+pre-installed on many Unix systems. Commonly, it is a Bourne-shell
+script which invokes "gs", the Ghostscript binary. doc2html.pl has
+provision for adding the location of gs to the search path.
+
+
+UTILITY PDFTOTEXT
+=================
+
+pdftotext converts Adobe PDF files to text. pdfinfo is a tool which
+displays information about the document, and is used to obtain its
+title, etc. Get them from the xpdf package at
+http://www.foolabs.com/xpdf/
+
+In script pdf2html.pl, change the lines:
+
+my $PDFTOTEXT = "/... .../pdftotext";
+my $PDFINFO = "/... .../pdfinfo";
+
+to the correct full pathnames.
+
+Edit doc2html.pl to include the full pathname of the pdf2html.pl script.
+
+pdf2text may fail to convert PDF documents which have been truncated
+because htdig has max_doc_size set to smaller than the documents full
+size. Some PDF documents do not allow text to be extracted.
+
+
+UTILITY CATXLS
+==============
+
+The Excel to .csv converter, xls2csv, is included with recent versions of
+catdoc. This is an alternative to xlhtml (see above).
+
+Edit the line:
+
+my $CATXLS = '';
+
+to the full pathname of xls2csv.
+
+Xls2csv translates Excel spread sheets into comma-separated data.
+
+
+UTILITY SWFPARSE
+================
+
+swfparse (aka swfdump) extracts information from Shockwave flash files,
+and can be obtained from the contribs section of the Ht://Dig web site,
+where you obtained doc2html.
+
+Perl script swf2html.pl calls swfparse and writes HTML output containing
+links to the URLs found in the Shockwave file. It does NOT extract text
+from the file.
+
+In script swf2html.pl, change the line:
+
+my $SWFPARSE = "/... .../swfdump";
+
+to the full pathname.
+
+Edit doc2html.pl to include the full pathname of the swf2html.pl script.
+
+
+LOGGING
+=======
+
+Output of logging information and error messages is controlled by the
+environmental variable DOC2HTML_LOG, which may be set in the rundig
+script. If it is not set then only error messages output by doc2html.pl
+and by the conversion utilities it calls are returned to htdig and will
+appear in its STDOUT. If DOC2HTML_LOG is set to a filename, then
+doc2html.pl appends logging information and any error messages to the
+file. If DOC2HTML_LOG is set but blank, or the file cannot be opened
+for writing, logging information and error messages are passed back to
+htdig and will appear its STDOUT.
+
+In doc2html.pl, the variables $Emark and $EEmark, set in subroutine init,
+are used to highlight error messages.
+
+The number of lines of STDERR output from a utility which is logged or
+passed back to htdig is controlled by the variable $Maxerr set in
+routine "init" of doc2html.pl. This is provided in order to curb the
+large number of error messages which some utilities can produce from
+processing a single file.
+
+
+TIMEOUT
+=======
+
+If possible, install Perl module Sys::AlarmCall, obtainable from CPAN if
+you don't already have it. This module is used by doc2html.pl to
+terminate a utility if it takes too long to finish. The line in
+doc2html.pl:
+
+ $Time = 60; # allow 60 seconds for external utility to complete
+
+may be altered to suit.
+
+
+LIMITING INPUT AND OUTPUT
+=========================
+
+The environmental variable DOC2HTML_IP_LIMIT may be set in the rundig
+script to limit the size of the file which doc2html.pl will attempt to
+convert. The default value is 20000000. Doc2html.pl will return no
+output to htdig if the file size is equal to or greater than this size.
+
+You are recommended to set DOC2HTML_IP_LIMIT to the same as the
+"max_doc_size" parameter in the htdig configuration file. Then no
+attempt wil be made to extract text from files which have been truncated
+by htdig. It is not possible to extract any text from .PDF files, for
+example, if they have been truncated.
+
+The environmental variable DOC2HTML_OP_LIMIT may be set in the rundig
+script to limit the output sent back to htdig by a single call to
+doc2html.pl. The default value is 10000000. Doc2html.pl will stop
+returning output to htdig once the DOC2HTML_OP_LIMIT has been reached.
+This is precaution against the unlikely event of a conversion utility
+returning disproportionately large amounts of data.
+
+
+CONTACT
+=======
+
+Any queries regarding doc2html are best sent to the mailing list
+htdig-general@lists.sourceforge.net
+
+The author can be emailed at D.J.Adams@soton.ac.uk
+
+David Adams
+Information Systems Services
+University of Southampton
+
+27-November-2002
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README
new file mode 100644
index 00000000..427eb8ce
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README
@@ -0,0 +1,25 @@
+Readme for doc2html
+
+External converter scripts for ht://Dig (version 3.1.4 and later), that
+convert Microsoft Word, Excel and Powerpoint files, and PDF,
+PostScript, RTF, and WordPerfect files to text (in HTML form) so they
+can be indexed. Uses a variety of conversion programs:
+
+ wp2html - to convert Wordperfect and Word7 & 97 documents to HTML
+ catdoc - to extract text from Word documents
+ catwpd - to extract text from WordPerfect documents [alternative to wp2html]
+ rtf2html - to convert RTF documents to HTML
+ pdftotext - to extract text from Adobe PDFs
+ ps2ascii - to extract text from PostScript
+ pptHtml - to convert Powerpoint files to HTML
+ xlHtml - to convert Excel spreadsheets to HTML
+ xls2csv - to extract data from Excel spreadsheets [alternative to xlHtml]
+ swfparse - to extract links from Shockwave flash files.
+
+The main script, doc2html.pl, is easily edited to include the available
+utlitities, and new utilities are easily incorporated.
+
+Written by David Adams (University of Southampton), and based on the
+conv_doc.pl script by Gilles Detillieux.
+
+For more information see the DETAILS file.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg
new file mode 100644
index 00000000..0bff981a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg
@@ -0,0 +1,413 @@
+# Configuration file for use with doc2html.pl, which is used
+# to index Word, WordPerfect , etc. files using Ht://dig.
+#
+# Based on wp2html.cfg file supplied with wp2html version 3.0
+
+# The special token "typeout" simply outputs the given text
+# and can be used to inform users of versions, configuration changes etc.
+typeout=""
+
+#------------------- Single character translations ---------------
+# Protect HTML magic symbols.
+'<'="&lt;"
+'>'="&gt;"
+'&'="&amp;"
+'"'="&quot;"
+
+#------------------- WP code translations ---------------
+# File header. BEGIN is called before any text is output
+# BEGIN is passed three strings being the
+# Input Directory, Input file name and Input file type.
+# Do what you like with them!
+
+BEGIN="<HTML>
+<HEAD>
+<Title>%X<XDocSubject></Title>
+<META name=\"keywords\" content=\"%X<XDocKeywords>\">
+<META name=\"description\" content=\"%X<XDocAbstract>\">
+</HEAD>
+<BODY>
+<p>
+%xH
+"
+# Beginning of a subpart. This is called for each file.
+begin="<html>
+<head>
+<title>%X<XDocSubject - %O</title>
+</head>
+<body>\n"
+
+# File end. END is called at the end of the document
+# You may wish to insert signatures etc.
+END="<p>
+%xf
+%xF
+%X<XDocAuthor>
+</BODY>
+</HTML>\n"
+
+# End of a subpart. This is called for each sub part of a file except last.
+end="\n%xf\n</body>\n</html>\n"
+
+# End of the last subpart. This is only for the final sub-part
+# which may wish to have a different ending to the others (like
+# perhaps not refering to the NEXT chapter?)
+End="\n%xf\n</body>\n</html>\n"
+
+# Message output by wp2html into output file but not to be displayed
+# or for "hidden" WP text
+Message="<!-- "
+message="-->"
+
+# PageNo="\\folio{}" # insert page number
+# RomanPage="%\n%% Set roman page number %1\n" # set roman numerals
+# ArabicPage="%\n%% Set arabic page number %1\n" # set arabic numerals
+
+HSpace="&nbsp;" # Unbreakable (Hard) space
+
+# Tabs in Netscape (before HTML3) cannot be done properly
+# We fudge them here with non breaking space until Netscape 1.2?
+Tab="&nbsp;"
+
+# Hard page looks quite good as a line
+HPg="\n"
+# Soft page is usually ignored
+SPg=" "
+
+CondEOP=" "
+
+HRt="<br>\n" # Single Hard return just a break
+HRt2="<p>\n" # Two or more terminates paragraph
+SRt="\n" # Soft return is a newline
+DSRt="\n" # Deletable return at end of line
+DSPg="\n" # Deletable return at end of page
+
+softHyphen="" # Hyphens are ignored, since wrapping is up the clients.
+softHyphenEOL="" # same for hyphens at end of a line
+hardHyphen="-" # Nonbreaking hyphen, must put in
+autoHyphen="" # auto Hyphens will be ignored too
+NoHyphWord="" # Inhibit hyphenation of this word
+
+# Margins are left as comments until HTML3 arrives
+Marg=" "
+TopMarg=" "
+PageLength=" "
+
+# Line spacing changes are ignored
+
+SS=""
+1.5S=""
+DS=""
+TS=""
+LS=""
+LPI=""
+
+# Font changes mapped to Netscape font size defn
+ExtraLarge="<H1>"
+extralarge="</h1>"
+VeryLarge="<h2>"
+verylarge="</h2>"
+Large="<h3>"
+large="</h3>"
+Small="<h5>"
+small="</h5>"
+Fine="</h6>"
+fine="<h6>"
+
+FontColour="\n<font color=\"#%s\">"
+fontColour="\n</font>"
+
+Font=" "
+font=" "
+
+Bold="<b>" # Boldface
+bold="</b>"
+Und="<u>"
+und="</u>"
+Red="" # Redlining
+red=""
+Strike="" # Strikeout XXX
+strike=""
+Italics="<i>"
+italics="</i>"
+Rev="" # Reverse video XXX
+rev=""
+Over="" # overprinting not supported XXX
+over=""
+# Netscape 2 and after can use
+Sup="<sup>"
+sup="</sup>"
+Sub="<sub>"
+sub="</sub>"
+
+# UpHalfLine, DownHalfLine, AdvanceToHalf -- undefined
+
+# Indent mapped to unordered lists, good for blocked indents
+#Indent="\n<ul>\n"
+#indent="\n</ul>\n"
+#DIndent="\n<ul>\n"
+#dindent="\n</ul>\n"
+
+# Indents as space, use if indents are like TABS for you
+Indent="&nbsp;"
+indent=""
+DIndent="&nbsp;"
+dindent=""
+
+# Margin release is passed one parameter, the number of characters.
+MarginRelease=" "
+
+Center="<Div align=\"center\">" # centering, Netscape way
+center="</Div>\n"
+
+Left="<Div align=\"left\">" # Netscape left justify
+left="</Div>"
+
+Right="<Div align=\"right\">" # Netscape right justify
+right="</Div>"
+
+Full="<Div align=\"full\">" # Netscape full justify
+full="</Div>"
+
+# Can use also
+# Left
+# left
+# Full
+# full
+
+
+# Math, math, MathCalc, MathCalcColumn, SubTtl, IsSubTtl, Ttl, IsTtl, GrandTtl
+# -- undefined
+Column="<MULTICOL COLS=%1>"
+column="</MULTICOL>"
+
+Header="\n"
+header="\n"
+Footer="\n"
+footer="\n"
+
+Footnote="&nbsp;&nbsp; <Font size=2>
+<a href=\"#Footnote%1\" name=\"Footref%1\">
+</Font>&nbsp;"
+footnote="</a>"
+FootnoteFormat="<a href=\"#Footref%1\" name=\"Footnote%1\">%1</i></a><i>"
+footnoteFormat="</i>"
+
+# Displays for various automatic numbers
+# uncomment these to "eat" the autonumbers inserted by WP6
+FootnoteDisplay="%e"
+footnoteDisplay="%f"
+
+#EndnoteDisplay="%e"
+#endnoteDisplay="%f"
+#ParanumDisplay="%e"
+#paranumDisplay="%f"
+#LinenumDisplay="%e"
+#linenumDisplay="%f"
+
+BeginTabs=""
+SetTab=""
+SetTabCenter=""
+SetTabRight=""
+SetTabDecimal=""
+EndTabs=""
+
+Hyph="" # Allow hyphenation
+hyph="" # Disable hyphenation
+Wid="" # Widow protection
+wid="" # Allow widows
+
+# HZone, DAlign -- undefined
+
+Supp=" "
+CtrPg=" "
+SetFont=" "
+SetBin=" "
+
+# True table definitions, these are Netscape style (HTML3)
+# Start of a table
+Table="\n<Table>"
+# end of a table
+table="\n</Table>\n"
+
+# New row
+Row="\n<Tr>"
+# End row
+row="\n</Tr>"
+
+# New cell. Is passed the col and row spans integers and align flags
+#Cell="\n<Td ColSpan=%1 RowSpan=%2 Align=%u Valign=%v>\n"
+Cell="\n<Td>\n"
+# End cell
+cell="\n</Td>"
+
+# Table header cells.
+HeadCell="\n<Th ColSpan=%1 RowSpan=%2 Align=%u Valign=%v>\n"
+HeadCell="\n<Th>\n"
+# End header cell
+headCell="\n</Th>"
+
+# Ordinary WP comment anywhere in the document, passed comment text
+Comment="\n<!-- WP Comment "
+comment=" -->\n"
+
+# default Style operation for styles user has not names below
+defaultStyleOn=" "
+defaultStyleOff=" "
+defaultStyleOpen=" "
+defaultStyleEnd=" "
+
+# Set defaults for TOC markers here
+ToC=" "
+toC=" "
+ToC1=" "
+toC1=" "
+ToC2=" "
+toC2=" "
+ToC3=" "
+toC3=" "
+ToC4=" "
+toC4=" "
+ToC5=" "
+toC5=" "
+
+# Detect start and end of index in document
+Index=" "
+index=" "
+
+# Set defaults for List markers here
+List=" "
+list=" "
+List1=" "
+list1=" "
+List2=" "
+list2=" "
+List3=" "
+list3=" "
+List4=" "
+list4=" "
+List5=" "
+list5=" "
+
+ToA=" "
+toA=" "
+ToAMark=" "
+
+XrefMark=" "
+xrefMark=" "
+XrefTarget=" "
+
+# Figure inside WP.
+# Right now we have a confusing 3 options, that is we have
+# A WPG image inside Wordperfect,
+# 1. With no GIF/JPEG conversion available
+# 2. Of unknown size but a GIF/JPEG conversion exists
+# 3. With known (set) size and with GIF/JPEG conversion
+# Depending on which of the above we find we call one of the next
+# three tags, Figure, Image, ImageSized
+# NO GIF/JPEG
+Figure=" "
+
+# GIF/JPEG available, but sizes and alignment unknown
+# Image="<a href=\"%s.%t\"><img src=\"%s.%u\"></a>
+# Click thumbnail picture to see full size version"
+# You can also do thumbnails too, like this
+#Image="<a href=\"%s.%t\" target=\"Graphics\"><img src=\"%s.%u\"></a>
+#Click thumbnail picture to see full size in separate window"
+Image="<img src=\"%s.%t\"></a>"
+
+
+# GIF/JPEG available, and sizes and alignment known
+# If this is NOT given, Image will be used instead
+#ImageSized="<img src=\"%s.%t\" height=%3 width=%4 align=%w>"
+
+# Boxes, Table, Text and User
+TableBox="\n"
+tableBox="\n"
+TextBox="\n"
+textBox="\n"
+UserBox="\n"
+userBox="\n"
+
+# Equations are rendered as rough text right now, wait for HTML3
+# and we can switch this on properly
+Equation=" "
+equation=" "
+
+# Captions for all boxes
+Caption="<br><i>["
+caption="]</i><br>\n"
+
+HLine="<hr width=%1 size=%2 align=%u>"
+
+ParaNum1="%s "
+ParaNum2="%s "
+ParaNum3="%s "
+ParaNum4="%s "
+ParaNum5="%s "
+ParaNum6="%s "
+ParaNum7="%s "
+ParaNum8="%s "
+ParaNumEnd=" "
+
+PN0=" "
+PN1=" "
+PN2=" "
+PN3=" "
+PN4=" "
+PN5=" "
+PN6=" "
+PN7=" "
+PN8=" "
+
+#*********************************************************************
+# End of required parameters -- start of optional entries
+
+# Here is the complete list of fields which can optionally be
+# extracted from the extended document summary area of the file
+# If they are not defined (to be anything) they will not be extracted
+#!XDocName="Filename"
+#!XDocType="Type"
+XDocAuthor=""
+XDocAbstract=""
+#!XDocAccount="Account"
+XDocKeywords=""
+XDocSubject="%X<Title>"
+#!XDocTypist="Typist"
+
+# These tags allow you to convert WP6 hypertext refs into HTML hypertext
+# But you should be careful how you use them as documents NOT designed
+# to be HTML (perhaps linking to other documents in unreachable
+# directories) would generate confusing links.
+#!HyperBegin="<a href=\"%s.htm#%t\">%e"
+#!hyperBegin="%f"
+#!HyperEnd="</a>%e"
+#!hyperEnd="%f"
+
+# Or you can use WP hypertext refs as direct hypertext
+# but make sure you insert the URL in the BOOKMARK field
+HyperBegin="<a href=\"%t\">%e"
+hyperBegin="%f"
+HyperEnd="</a>%e"
+hyperEnd="%f"
+
+# These tags allow you to convert WP6 bookmarks into appropriate
+# HTML anchors which are needed if you use the Hyper tags above.
+BookMark="<a name=\"%s\">"
+bookMark="</a>"
+
+# Hypertext references in Word Documents (real URL)
+Href="<a href=\"%s\">"
+href="</a>"
+
+# These tags allow you to find the file name of included files
+#SubDoc="<hr>Start Included file %s<hr>\n"
+#subDoc="<hr>End Included file %s<hr>\n"
+SubDoc="\n"
+subDoc="\n"
+
+# These tags are trigger by WP Small Caps attributes (On/Off)
+#!SmallCaps=""
+#!smallCaps=""
+
+# End of main configuration file
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl
new file mode 100755
index 00000000..c69f00cc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl
@@ -0,0 +1,676 @@
+#!/usr/bin/perl
+use strict;
+#
+# Version 3.0.1 19-September-2002
+#
+# External converter for htdig 3.1.4 or later (Perl5 or later)
+# Usage: (in htdig.conf)
+#
+#external_parsers: application/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# text/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/pdf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/postscript->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/msword->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/wordperfect5.1->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/wordperfect6.0->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/msexcel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/vnd.ms-excel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/vnd.ms-powerpoint->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
+# application/x-shockwave-flash->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+# application/x-shockwave-flash2-preview->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
+#
+# Uses wp2html to convert Word and WordPerfect documents into HTML, and
+# falls back to using Catdoc for Word and Catwpd for WordPerfect if
+# Wp2html is unavailable or unable to convert.
+#
+# Uses range of other converters as available.
+#
+# If all else fails, attempts to read file without conversion.
+#
+########################################################################################
+# Written by David Adams <d.j.adams@soton.ac.uk>.
+# Based on conv_doc.pl written by Gilles Detillieux <grdetil@scrc.umanitoba.ca>,
+# which in turn was based on the parse_word_doc.pl script, written by
+# Jesse op den Brouw <MSQL_User@st.hhs.nl>.
+########################################################################################
+
+# Install Sys::AlarmCall if you can
+eval "use Sys::AlarmCall";
+
+######## Full paths of conversion utilities ##########
+######## YOU MUST SET THESE ##########
+######## (leave null those you don't have) ##########
+
+# Wp2html converts Word & Wordperfect to HTML
+# (get it from: http://www.res.bbsrc.ac.uk/wp2html/):
+my $WP2HTML = '';
+
+#Catwpd for WordPerfect to text conversion
+# (you don't need this if you have wp2html)
+# (get it from htdig site)
+my $CATWPD = '';
+
+# rtf2html converts Rich Text Font documents to HTML
+# (get it from http://www.ice.ru/~vitus/catdoc/):
+my $RTF2HTML = '';
+
+# Catdoc converts Word (MicroSoft) to plain text
+# (get it from: http://www.ice.ru/~vitus/catdoc/):
+
+#version of catdoc for Word6, Word7 & Word97 files:
+my $CATDOC = '';
+
+#version of catdoc for Word2 files:
+my $CATDOC2 = $CATDOC;
+
+#version of catdoc for Word 5.1 for MAC:
+my $CATDOCM = $CATDOC;
+
+# PostScript to text converter
+# (get it from the ghostscript 3.33 (or later) package):
+my $CATPS = '';
+
+# add to search path the directory which contains gs:
+#$ENV{PATH} .= ":/usr/freeware/bin";
+
+# PDF to HTML conversion script:
+my $PDF2HTML = ''; # full pathname of pdf2html/pl script
+
+# Excel (MicroSoft) to HTML converter
+# (get it from www.xlhtml.org)
+my $XLS2HTML = '';
+
+# Excel (MicroSoft) to .CSV converter
+# (you don't need this if you have xlhtml)
+# (if you do want it, you can get it with catdoc)
+my $CATXLS = '';
+
+# Powerpoint (MicroSoft) to HTML converter
+# (get it from www.xlhtml.org)
+my $PPT2HTML = '';
+
+# Shockwave Flash
+# (extracts links from file)
+my $SWF2HTML = ''; # full pathname of swf2html.pl script
+
+# OpenOffice.org files
+#my $OpenOffice2XML = '/usr/bin/unzip';
+my $OpenOffice2XML = '';
+# (remove multi-byte unicode from XML in OOo documents)
+#my $strip_unicode = '| /usr/bin/iconv -c -s -f UTF-8 -t ISO-8859-1';
+my $strip_unicode = '';
+
+
+########################################################################
+
+# Other Global Variables
+my ($Success, $LOG, $Verbose, $CORE_MESS, $TMP, $RM, $ED, $Magic, $Time,
+ $Count, $Prog, $Input, $MIME_type, $URL, $Name, $Efile, $Maxerr,
+ $Redir, $Emark, $EEmark, $Method, $OP_Limit, $IP_Limit);
+my (%HTML_Method, %TEXT_Method, %BAD_type);
+
+
+&init; # initialise
+my $size = -s $Input;
+&quit("Input file size of $size at or above $IP_Limit limit" ) if $size >= $IP_Limit;
+&store_methods; #
+&read_magic; # Magic reveals type
+&error_setup; # re-route standard error o/p from utilities
+
+# see if a document -> HTML converter will work:
+&run('&try_html');
+if ($Success) { &quit(0) }
+
+# try a document -> text converter:
+&run('&try_text');
+if ($Success) { &quit(0) }
+
+# see if a known problem
+my $fail = &cannot_do;
+if ($fail) { &quit($fail) }
+
+# last-ditch attempt, try copying document
+&try_plain;
+if ($Success) {&quit(0)}
+
+&quit("UNABLE to convert");
+
+#------------------------------------------------------------------------------
+
+sub init {
+
+ # Doc2html log file
+ $LOG = $ENV{'DOC2HTML_LOG'} || '';
+ #
+ if ($LOG) {
+ open(STDERR,">>$LOG"); # ignore possible failure to open
+ } # else O/P really does go to STDERR
+
+ # Set to 1 for O/P to STDERR or Log file
+ $Verbose = exists($ENV{'DOC2HTML_LOG'}) ? 1 : 0;
+
+ # Limiting size of file doc2html.pl will try to process (default 20Mbyte)
+ $IP_Limit = $ENV{'DOC2HTML_IP_LIMIT'} || 20000000;
+
+ # Limit for O/P returned to htdig (default 10Mbyte)
+ $OP_Limit = $ENV{'DOC2HTML_OP_LIMIT'} || 10000000;
+
+ # Mark error message produced within doc2html script
+ $Emark = "!\t";
+ # Mark error message produced by conversion utility
+ $EEmark = "!!\t";
+
+ # Message to STDERR if core dump detected
+ $CORE_MESS = "CORE DUMPED";
+
+ # Directory for temporary files
+ $TMP = "/tmp/htdig";
+ if (! -d $TMP) {
+ mkdir($TMP,0700) or die "Unable to create directory \"$TMP\": $!";
+ }
+ # Current directory during run of script:
+ chdir $TMP or warn "Cannot change directory to $TMP\n";
+
+ # File for error output from utility
+ $Efile = 'doc_err.' . $$;
+
+ # Max. number of lines of error output from utility copied
+ $Maxerr = 10;
+
+ # System command to delete a file
+ $RM = "/bin/rm -f";
+
+ # Line editor to do substitution
+ $ED = "/bin/sed -e";
+ if ($^O eq "MSWin32") {$ED = "$^X -pe"}
+
+ $Time = 60; # allow 60 seconds for external utility to complete
+
+ $Success = 0;
+ $Count = 0;
+ $Method = '';
+ $Prog = $0;
+ $Prog =~ s#^.*/##;
+ $Prog =~ s/\..*?$//;
+
+ $Input = $ARGV[0] or die "No filename given\n";
+ $MIME_type = $ARGV[1] or die "No MIME-type given";
+ $URL = $ARGV[2] || '?';
+ $Name = $URL;
+ $Name =~ s#^.*/##;
+ $Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+
+ if ($Verbose and not $LOG) { print STDERR "\n$Prog: [$MIME_type] " }
+ if ($LOG) { print STDERR "$URL [$MIME_type] " }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub store_methods {
+# The method of dealing with each file type is set up here.
+# Edit as necessary
+
+ my ($mime_type,$magic,$cmd,$cmdl,$type,$description);
+
+ my $name = quotemeta($Name);
+
+ ####Document -> HTML converters####
+
+ # WordPerfect documents
+ if ($WP2HTML) {
+ $mime_type = "application/wordperfect|application/msword";
+ $cmd = $WP2HTML;
+ $cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
+ $magic = '\377WPC';
+ &store_html_method('WordPerfect (wp2html)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Word documents
+ if ($WP2HTML) {
+ $mime_type = "application/msword";
+ $cmd = $WP2HTML;
+ $cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
+ $magic = '^\320\317\021\340';
+ &store_html_method('Word (wp2html)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # RTF documents
+ if ($RTF2HTML) {
+ $mime_type = "application/msword|application/rtf|text/rtf";
+ $cmd = $RTF2HTML;
+ # Rtf2html uses filename as title, change this:
+ $cmdl = "$cmd $Input | $ED \"s#^<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+ $magic = '^{\134rtf';
+ &store_html_method('RTF (rtf2html)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Microsoft Excel spreadsheet
+ if ($XLS2HTML) {
+ $mime_type = "application/msexcel|application/vnd.ms-excel";
+ $cmd = $XLS2HTML;
+ # xlHtml uses filename as title, change this:
+ $cmdl = "$cmd -fw $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+ $magic = '^\320\317\021\340';
+ &store_html_method('Excel (xlHtml)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Microsoft Powerpoint Presentation
+ if ($PPT2HTML) {
+ $mime_type = "application/vnd.ms-powerpoint";
+ $cmd = $PPT2HTML;
+ # xlHtml uses filename as title, change this:
+ $cmdl = "$cmd $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+ $magic = '^\320\317\021\340';
+ &store_html_method('Powerpoint (pptHtml)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Adobe PDF file using Perl script
+ if ($PDF2HTML) {
+ $mime_type = "application/pdf";
+ $cmd = $PDF2HTML;
+ # Replace default title (if used) with filename:
+ $cmdl = "$cmd $Input $mime_type $name";
+ $magic = '%PDF-|\0PDF CARO\001\000\377';
+ &store_html_method('PDF (pdf2html)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Shockwave Flash file using Perl script
+ if ($SWF2HTML) {
+ $mime_type = "application/x-shockwave-flash";
+ $cmd = $SWF2HTML;
+ $cmdl = "$cmd $Input";
+ $magic = '^FWS[\001-\010]'; # versions 1 to 5, perhaps some later versions
+ &store_html_method('Shockwave-Flash (swf2html)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # OpenOffice Documents
+ if ($OpenOffice2XML) {
+ $mime_type = "application/vnd.sun.xml.writer|application/vnd.sun.xml.impress|application/vnd.sun.xml.calc|application/vnd.sun.xml.draw|application/vnd.sun.xml.math";
+ $cmd = $OpenOffice2XML;
+ $cmdl = "$cmd -p -qq $Input content.xml | /bin/sed -r 's/<[^>]*>/ /gi' $strip_unicode";
+ $magic = 'PK';
+ &store_html_method('OpenOffice XML (oo2xml)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ ####Document -> Text converters####
+
+ # Word6, Word7 & Word97 documents
+ if ($CATDOC) {
+ $mime_type = "application/msword";
+ $cmd = $CATDOC;
+ # -b option increases chance of success:
+ $cmdl = "$cmd -a -b -w $Input";
+ $magic = '^\320\317\021\340';
+ &store_text_method('Word (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Word2 documents
+ if ($CATDOC2) {
+ $mime_type = "application/msword";
+ $cmd = $CATDOC2;
+ $cmdl = "$cmd -a -b -w $Input";
+ $magic = '^\333\245-\000';
+ &store_text_method('Word2 (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Word 5.1 for MAC documents
+ if ($CATDOCM) {
+ $mime_type = "application/msword";
+ $cmd = $CATDOCM;
+ $cmdl = "$cmd -a -b -w $Input";
+ $magic = '^\3767\000#\000\000\000\000';
+ &store_text_method('MACWord (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # PostScript files
+ if ($CATPS) {
+ $mime_type = "application/postscript";
+ $cmd = $CATPS;
+ # allow PS interpreter to give error messages
+ $cmdl = "($cmd; $RM _temp_.???) < $Input";
+ $magic = '^.{0,20}?%!|^\033%-12345.*\n%!';
+ &store_text_method('PostScript (ps2ascii)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # Microsoft Excel file
+ if ($CATXLS) {
+ $mime_type = "application/vnd.ms-excel";
+ $cmd = $CATXLS;
+ $cmdl = "$cmd $Input";
+ $magic = '^\320\317\021\340';
+ &store_text_method('MS Excel (xls2csv)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+ # WordPerfect document
+ if ($CATWPD) {
+ $mime_type = "application/wordperfect|application/msword";
+ $cmd = $CATWPD;
+ $cmdl = "$cmd $Input";
+ $magic = '\377WPC';
+ &store_text_method('WordPerfect (catwpd)',$cmd,$cmdl,$mime_type,$magic);
+ }
+
+
+ ####Documents that cannot be converted####
+
+ # wrapped encapsulated Postscript
+ $type = "EPS";
+ $magic = '^\305\320\323\306 \0';
+ $description = 'wrapped Encapsulated Postscript';
+ &store_cannot_do($type,$magic,$description);
+
+ # Shockwave Flash version 6
+ $type = "SWF6";
+ $description = 'Shockwave-Flash Version 6';
+ $magic = '^CWS\006';
+ &store_cannot_do($type,$magic,$description);
+
+#### Binary (data or whatever)
+###$type = "BIN";
+###$magic = '[\000-\007\016-\037\177]'; # rather crude test!
+###$description = 'apparently binary';
+###&store_cannot_do($type,$magic,$description);
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub read_magic {
+
+ # Read first bytes of file to check for file type
+ open(FILE, "< $Input") || die "Can't open file $Input\n";
+ read FILE,$Magic,256;
+ close FILE;
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub error_setup {
+
+ if ($Efile) {
+ open SAVERR, ">&STDERR";
+ if (open STDERR, "> $Efile") {
+ print SAVERR " Overwriting $Efile\n" if (-s $Efile);
+ $Redir = 1;
+ } else { close SAVERR }
+ }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub run {
+
+ my $routine = shift;
+ my $return;
+
+ if (defined &alarm_call) {
+ $return = alarm_call($Time, $routine);
+ } else {
+ eval $routine;
+ $return = $@ if $@;
+ }
+
+ if ($return) { &quit($return) }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub try_html {
+
+ my($set,$cmnd,$type);
+
+ $Success = 0;
+ foreach $type (keys %HTML_Method) {
+ $set = $HTML_Method{$type};
+ if (($MIME_type =~ m/$set->{'mime'}/i) and
+ ($Magic =~ m/$set->{'magic'}/s)) { # found the method to use
+ $Method = $type;
+ my $cmnd = $set->{'cmnd'};
+ if (! -x $cmnd) {
+ warn "Unable to execute $cmnd for $type document\n";
+ return;
+ }
+ if (not open(CAT, "$set->{'command'} |")) {
+ warn "$cmnd doesn't want to be opened using pipe\n";
+ return;
+ }
+ while (<CAT>) {
+ # getting something, so it is working
+ $Success = 1;
+ if ($_ !~ m/^<!--/) { # skip comment lines inserted by converter
+ print;
+ $Count += length;
+ if ($Count > $OP_Limit) { last }
+ }
+ }
+ close CAT;
+ last;
+ }
+ }
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub try_text {
+
+ my($set,$cmnd,$type);
+
+ $Success = 0;
+ foreach $type (keys %TEXT_Method) {
+ $set = $TEXT_Method{$type};
+ if (($MIME_type =~ m/$set->{'mime'}/i) and
+ ($Magic =~ m/$set->{'magic'}/s)) { # found the method to use
+ $Method = $type;
+ my $cmnd = $set->{'cmnd'};
+ if (! -x $cmnd) { die "Unable to execute $cmnd for $type document\n" }
+
+ # Open file via selected converter, output head, then its text:
+ open(CAT, "$set->{'command'} |") or
+ die "$cmnd doesn't want to be opened using pipe\n";
+ &head;
+ print "<BODY>\n<PRE>\n";
+ $Success = 1;
+ while (<CAT>) {
+ s/\255/-/g; # replace dashes with hyphens
+ # replace bell, backspace, tab. etc. with single space:
+ s/[\000-\040]+/ /g;
+ if (length > 1) { # if not just a single character, eg space
+ print &HTML($_), "\n";
+ $Count += length;
+ if ($Count > $OP_Limit) { last }
+ }
+ }
+ close CAT;
+
+ print "</PRE>\n</BODY>\n</HTML>\n";
+ last;
+ }
+
+ }
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub cannot_do {
+
+ my ($type,$set);
+
+ # see if known, unconvertable type
+ $Method = '';
+ foreach $type (keys %BAD_type) {
+ $set = $BAD_type{$type};
+ if ($Magic =~ m/$set->{'magic'}/s) { # known problem
+ return "CANNOT DO $set->{'desc'} ";
+ }
+ }
+
+ return 0;
+}
+
+#------------------------------------------------------------------------------
+
+sub try_plain {
+
+ $Success = 0;
+ ####### if ($Magic !~ m/^[\000-\007\016-\037\177]) {
+ if (-T $Input) { # Looks like text, so go for it:
+ $Method = 'Plain Text';
+ open(FILE, "<$Input") || die "Error reading $Input\n";
+ $Success = 1;
+ $Method = 'Plain Text';
+ &head;
+ print "<BODY>\n<PRE>\n";
+
+ while (<FILE>) {
+ # replace bell, backspace, tab. etc. with single space:
+ s/[\000-\040\177]+/ /g;
+ if (length > 1) {
+ print &HTML($_), "\n";
+ $Count += length;
+ if ($Count > $OP_Limit) { last }
+ }
+ }
+ close FILE;
+ print "</PRE>\n</BODY>\n</HTML>\n";
+
+ } else { $Method = '' }
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub HTML {
+
+ my $text = shift;
+
+ $text =~ s/\f/\n/gs; # replace form feed
+ $text =~ s/\s+/ /g; # replace multiple spaces, etc. with a single space
+ $text =~ s/\s+$//gm; # remove trailing spaces
+ $text =~ s/&/&amp;/g;
+ $text =~ s/</&lt;/g;
+ $text =~ s/>/&gt;/g;
+
+ return $text;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_html_method {
+
+ my $type = shift;
+ my $cmnd = shift;
+ my $cline = shift;
+ my $mime = shift;
+ my $magic = shift;
+
+ $HTML_Method{$type} = {
+ 'mime' => $mime,
+ 'magic' => $magic,
+ 'cmnd' => $cmnd,
+ 'command' => $cline,
+ };
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_text_method {
+
+ my $type = shift;
+ my $cmnd = shift;
+ my $cline = shift;
+ my $mime = shift;
+ my $magic = shift;
+
+ $TEXT_Method{$type} = {
+ 'mime' => $mime,
+ 'magic' => $magic,
+ 'cmnd' => $cmnd,
+ 'command' => $cline,
+ };
+
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_cannot_do {
+
+ my $type = shift;
+ my $magic = shift;
+ my $desc = shift;
+
+ $BAD_type{$type} = {
+ 'magic' => $magic,
+ 'desc' => $desc,
+ };
+
+ return;
+
+}
+
+#------------------------------------------------------------------------------
+
+sub head {
+
+ print "<HTML>\n<HEAD>\n";
+ print "<TITLE>[" . $Name . "]</TITLE>\n";
+ print "</HEAD>\n";
+
+}
+
+#------------------------------------------------------------------------------
+
+sub quit {
+
+ if ($Redir) { # end redirection of STDERR to temporary file
+ close STDERR;
+ open STDERR, ">&SAVERR";
+ }
+
+ if ($Verbose) {
+ print STDERR "$Method $Count" if ($Success);
+ print STDERR "\n";
+ }
+
+ if ($Count > $OP_Limit) {
+ print STDERR $Emark, "Output truncated after limit $OP_Limit reached\n";
+ }
+
+ my $return = shift;
+ if ($return) {
+ print STDERR $Emark, $return, "\n";
+ $return = 1;
+ }
+
+ chdir $TMP;
+ if ($Efile && -s $Efile) {
+ open EFILE, "<$Efile";
+ my $c = 0;
+ while (<EFILE>) {
+ $c++;
+ if ($c <= $Maxerr) {
+ print STDERR $EEmark, $_;
+ }
+ }
+ close EFILE;
+ print STDERR $Emark, " ... (total of $c lines of error messages)\n" if ($c > $Maxerr);
+ }
+ unlink $Efile if ($Efile && -e $Efile);
+
+ if (-e "core" && (-M "core" < 0)) {
+ print STDERR $Emark, "$CORE_MESS\n";
+ }
+ exit $return;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty
new file mode 100644
index 00000000..fccfb8ee
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty
@@ -0,0 +1,40 @@
+Any Font 8 On ="<H6>"
+Any Font 8 Off ="</H6>"
+Any Font 9 On =" "
+Any Font 9 Off =" "
+Any Font 10 On =" "
+Any Font 10 Off =" "
+Any Font 11 On =" "
+Any Font 11 Off =" "
+Any Font 12 On ="<H4>"
+Any Font 12 Off ="</H4>"
+Any Font 14 On ="<H3>"
+Any Font 14 Off ="</H3>"
+Any Font 18 On ="<H3>"
+Any Font 18 Off ="</H3>"
+Any Font 24 On ="<H2>"
+Any Font 24 Off ="</H2>"
+Any Font 28 On ="<H2>"
+Any Font 28 Off ="</H2>"
+Any Font 32 On ="<H1>"
+Any Font 32 Off ="</H1>"
+Any Font 36 On ="<H1>"
+Any Font 36 Off ="</H1>"
+
+# Now the really specific stuff for WWW Urls
+# This one decodes the special Url macro which puts the URL reference
+# inside a WP Comment (so it is hidden but editable) and makes the
+# link text blue and underline
+# If we find a comment inside an Url style pair defined by the user
+# we can be pretty sure it was deliberate ( done by MACRO), so we
+# have this special translation just for Comments inside Url Styles
+# Course, if not defined (UrlComment) it will default to standard
+#
+UrlOn="%e" # Eat style codes
+UrlOnEnd="%f" # Style end for UrlOn, restart output
+# the comment text, passed as parameter 2 text, is the URL
+UrlCommentOn="<a href=\""
+UrlCommentOff="\">" # URL link
+UrlOff="</a>%e" # Anchor Off and eat style codes
+UrlOffEnd="%f" # Style end for UrlOff, restart output
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl
new file mode 100755
index 00000000..fee93282
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl
@@ -0,0 +1,161 @@
+#!/usr/bin/perl -w
+use strict;
+#
+# Version 1.0.1 12-Feb-2002
+# Written by David Adams <d.j.adams@soton.ac.uk>
+#
+# Uses pdftotext & pdfinfo utilities from the xpdf package
+# to read an Adobe Acrobat file and produce HTML output.
+#
+# Can be called directly from htdig as an external converter,
+# or may be called by doc2html.pl converter script.
+#
+
+####--- Configuration ---####
+# Full paths of pdtotext and pdfinfo
+# (get them from the xpdf package at http://www.foolabs.com/xpdf/):
+
+#### YOU MUST SET THESE ####
+
+my $PDFTOTEXT = "/... .../pdftotext";
+my $PDFINFO = "/... .../pdfinfo";
+#
+# De-hyphenation option (only affects end-of-line hyphens):
+my $Dehyphenate = 1;
+#
+# Set title to be used when none is found:
+my $Default_title = "Adobe Acrobat Document";
+#
+# make portable to win32 platform or unix:
+my $null = "/dev/null";
+if ($^O eq "MSWin32") {$null = "nul";}
+####--- End of configuration ---###
+
+if (! -x $PDFTOTEXT) { die "Unable to execute pdftotext" }
+
+my $Input = $ARGV[0] || die "Usage: pdf2html.pl filename [mime-type] [URL]";
+my $MIME_type = $ARGV[1] || '';
+if ($MIME_type and ($MIME_type !~ m#^application/pdf#i)) {
+ die "MIME/type $MIME_type wrong";
+}
+
+my $Name = $ARGV[2] || '';
+$Name =~ s#^(.*/)##;
+# decode if 2nd argument was a URL
+$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie if $1;
+
+&pdf_head;
+&pdf_body;
+exit;
+
+#------------------------------------------------------------------------------
+
+sub pdf_head {
+#
+# Contributed by Greg Holmes and Michael Fuller
+# (any errors by David Adams)
+#
+ my $title = '';
+ my $subject = '';
+ my $keywords = '';
+ if (open(INFO, "$PDFINFO '$Input' 2>$null |")) {
+ while (<INFO>) {
+ if (m/^title:/i) {
+ s/^title:\s+//i;
+ $title = &clean_pdf($_);
+ } elsif (m/^subject:/i) {
+ s/^subject:\s+//i;
+ $subject = &clean_pdf($_);
+ } elsif (m/^keywords:/i) {
+ s/^keywords:\s+//i;
+ $keywords = &clean_pdf($_);
+ }
+
+ }
+ close INFO;
+ } else { warn "cannot execute pdfinfo" }
+ if (not length $title) {
+ if ($Name) {
+ $title = '[' . $Name . ']';
+ } else {
+ $title = $Default_title;
+ }
+ }
+
+ print "<HTML>\n<HEAD>\n";
+ print "<TITLE>$title</TITLE>\n";
+ if (length $subject) {
+ print '<META NAME="DESCRIPTION" CONTENT="' . $subject. "\">\n";
+ }
+ if (length $keywords) {
+ print '<META NAME="KEYWORDS" CONTENT="' . $keywords . "\">\n";
+ }
+ print "</HEAD>\n";
+
+###print STDERR "\n$Name:\n";
+###print STDERR "\tTitle:\t$title\n";
+###print STDERR "\tDescription:\t$subject\n";
+###print STDERR "\tKeywords:\t$keywords\n";
+
+}
+
+#------------------------------------------------------------------------------
+
+sub pdf_body {
+
+ my $bline = '';
+ open(CAT, "$PDFTOTEXT -raw '$Input' - |") ||
+ die "$PDFTOTEXT doesn't want to be opened using pipe\n";
+ print "<BODY>\n";
+ while (<CAT>) {
+ while ( m/[A-Za-z\300-\377]-\s*$/ && $Dehyphenate) {
+ $_ .= <CAT>;
+ last if eof;
+ s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s;
+ }
+ s/\255/-/g; # replace dashes with hyphens
+ # replace bell, backspace, tab. etc. with single space:
+ s/[\000-\040]+/ /g;
+ $_ = &HTML($_);
+ if (length) {
+ print $bline, $_, "\n";
+ $bline = "<br>\n";
+ } else {
+ $bline = "<p>\n";
+ }
+ }
+ close CAT;
+
+ print "</BODY>\n</HTML>\n";
+ return;
+}
+
+#------------------------------------------------------------------------------
+
+sub HTML {
+
+ my $text = shift;
+
+ $text =~ s/\f/\n/gs; # replace form feed
+ $text =~ s/\s+/ /g; # replace multiple spaces, etc. with a single space
+ $text =~ s/\s+$//gm; # remove trailing space
+ $text =~ s/&/&amp;/g;
+ $text =~ s/</&lt;/g;
+ $text =~ s/>/&gt;/g;
+ chomp $text;
+
+ return $text;
+}
+
+#------------------------------------------------------------------------------
+
+sub clean_pdf {
+# removes odd pair of characters that may be in pdfinfo output
+# Any double quotes are replaced with single
+
+ my $text = shift;
+ chomp $text;
+ $text =~ s/\376\377//g;
+ $text =~ s/\"/\'/g;
+ return $text;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl
new file mode 100755
index 00000000..5f0cdb07
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl
@@ -0,0 +1,67 @@
+#!/usr/bin/perl -w
+use strict;
+#
+# Version 1.1 17-May-2002
+# Written by David Adams <d.j.adams@soton.ac.uk>
+#
+# Uses swfparse utlity to extract URL's from Shockwave flash files
+#
+# Can be called directly from htdig as an external converter,
+# or may be called by doc2html.pl converter script.
+#
+
+####--- Configuration ---####
+# Full path of swfparse
+# (get it from http:/www.htdig.org/files/contrib/contrib/parsers/)
+
+##### YOU MUST SET THIS ####
+
+my $SWFPARSE = "/.. .../swfdump";
+
+####--- End of configuration ---###
+
+if (! -x $SWFPARSE) { die "Unable to execute swfparse" }
+
+my $Input = $ARGV[0] || die "Usage: swf2html.pl filename [mime-type] [URL]";
+my $MIME_type = $ARGV[1] || '';
+if ($MIME_type and ($MIME_type !~ m#^application/x-shockwave-flash#i)) {
+ die "MIME/type $MIME_type wrong";
+}
+
+my $Name = $ARGV[2] || '';
+$Name =~ s#^(.*/)##;
+# decode if 2nd argument was a URL
+$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie if $1;
+
+print <<"HEAD";
+<HTML>
+<HEAD>
+<TITLE>SWF $Name</TITLE>
+<META NAME="robots" CONTENT="follow, noindex">
+</HEAD>
+HEAD
+
+open(CAT, "$SWFPARSE -t '$Input'|") ||
+ die "$SWFPARSE doesn't want to be opened using pipe\n";
+
+print "<BODY>\n";
+my $c = 0;
+while (<CAT>) {
+### if ($_ !~ m/\s+getUrl\s+(.*?)\s+.*$/) { next }
+ if ($_ !~ m/\s+getUrl\s+(.*)$/) { next }
+ my $link = $1 . ' ';
+ if ($link =~ m/^FSCommand:/) { next }
+ if ($link =~ m/\s+target\s+/) {
+ $link =~ s/^(.*)\s+target\s+.*$/$1/;
+ } else {
+ $link =~ s/^(.*?)\s+.*$/$1/;
+ }
+ print '<A href="', $link, '"> </a>', "\n";
+ $c++;
+}
+close CAT;
+
+print "</BODY>\n</HTML>\n";
+print STDERR "No links extracted\n" if ($c == 0);
+
+exit;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl b/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl
new file mode 100755
index 00000000..ef933de8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl
@@ -0,0 +1,183 @@
+#!/usr/local/bin/perl
+
+##
+## doclist.pl (C) 1995 Andrew Scherpbier
+##
+## This program will list the information in the documentdb generated by htdig.
+##
+
+use GDBM_File;
+
+$dbfile = $ARGV[0];
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+
+while (($key, $value) = each %docdb)
+{
+ next if $key =~ /^nextDocID/;
+ %record = parse_ref_record($value);
+ print "Title: $record{'TITLE'}\n";
+ print "Descriptions: $record{'DESCRIPTIONS'}\n";
+ print "URL: $record{'URL'}\n";
+ print "\n";
+}
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 16)
+ {
+ # STRING (ignore, but unpack)
+ $length = unpack("i", $value);
+ $rec{"STRING"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 17)
+ {
+ # METADSC
+ $length = unpack("i", $value);
+ $rec{"METADSC"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 18)
+ {
+ # BACKLINKS
+ $rec{"BACKLINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 19)
+ {
+ # SIGNATURE
+ $rec{"SIG"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ }
+ return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl b/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl
new file mode 100755
index 00000000..976cf333
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl
@@ -0,0 +1,201 @@
+#!/usr/local/bin/perl
+
+##
+## listafter.pl (C) 1996 Andrew Scherpbier
+##
+## This program will list all URLs which were modified after a specified date.
+## For each URL, the following fields are displayed:
+## Title
+## Descriptions
+## URL
+## Last modification date (in ctime format)
+##
+## The date is specified as mm/dd/yyyy
+##
+## Example usage:
+## listafter.pl 1/1/1996 /opt/www/htdig/sdsu.docdb
+##
+
+use GDBM_File;
+require('timelocal.pl');
+
+$t = $ARGV[0];
+$t =~ m,([0-9]+)/([0-9]+)/([0-9]+),;
+$when = timelocal(0, 0, 0, $2, $1 - 1, $3 - 1900);
+$dbfile = $ARGV[1];
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+while (($key, $value) = each %docdb)
+{
+ next if $key =~ /^nextDocID/;
+ %record = parse_ref_record($value);
+ if ($record{'TIME'} >= $when)
+ {
+ print "Title: $record{'TITLE'}\n";
+ print "Descriptions: $record{'DESCRIPTIONS'}\n";
+ print "URL: $record{'URL'}\n";
+ $w = localtime($record{'TIME'} * 1);
+ print "Modified: $w\n";
+ print "\n";
+ }
+}
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 16)
+ {
+ # STRING (ignore, but unpack)
+ $length = unpack("i", $value);
+ $rec{"STRING"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 17)
+ {
+ # METADSC
+ $length = unpack("i", $value);
+ $rec{"METADSC"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 18)
+ {
+ # BACKLINKS
+ $rec{"BACKLINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 19)
+ {
+ # SIGNATURE
+ $rec{"SIG"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ }
+ return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README
new file mode 100644
index 00000000..0889e245
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README
@@ -0,0 +1,3 @@
+ewswrap.cgi = Excite for Web Servers (EWS) to htsearch wrapper
+htwrap.cgi = htsearch wrapper to do some basic
+ sanity checking on the query
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi
new file mode 100755
index 00000000..f3f9419e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi
@@ -0,0 +1,118 @@
+#!/usr/bin/perl -w
+
+# ewswrap.cgi
+#
+# by John Grohol (grohol@cmhc.com)
+# Freeware
+# v1.00 - 5 Oct 1998
+#
+# Simple wrapper script for htsearch to parse old
+# Excite for Web Servers (EWS) forms as-is
+# This only makes sense if your want to upgrade
+# your search engine but can't upgrade every form
+# which points to it (e.g., external sites are
+# pointing to your EWS CGI.
+#
+# As an added bonus, given the differences from how
+# EWS handles queries to how htsearch handles them,
+# it does some basic sanity checking on the query
+# and tries to re-form it into a valid htsearch query.
+#
+# This script must be called using the POST method!
+#
+#_______________________________________________________
+# Set some defaults here
+# These can be overridden in the calling form
+
+$config = "htdig"; # htDig config file
+$exclude = ""; # exclude this url
+$restrict = ""; # restrict to this url
+$format = "builtin-long"; # results format
+$method = "and"; # default method
+$dir = "/usr/httpd/cgi-bin"; # Set cgi-bin dir
+
+#_______________________________________________________
+# Rest of program
+
+ $| = 1;
+
+# Get the form variables from POST form
+
+ read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
+ @pairs = split(/&/, $buffer);
+
+ foreach $pair (@pairs) {
+ ($name, $value) = split(/=/, $pair);
+ $value =~ tr/+/ /;
+ $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+ $value =~ s/<!--(.|\n)*-->//g;
+ $value =~ s/<([^>]|\n)*>//g;
+ $tags{$name} = $value;
+ }
+
+$squery = $tags{'search'}; # Set search query
+$page = $tags{'page'};
+if (not($page)) { $page=1; }
+
+ $squery =~ s/\+//g;
+ $squery =~ s/\-//g;
+ $squery =~ s/the//g;
+ $squery =~ s/not//g;
+ $squery =~ s/what//g;
+
+# If someone puts "and" or "or" in the query,
+# then it should be a boolean query
+
+ if (($squery =~ " and ") || ($squery =~ " or ")) {
+ $method = "boolean"; }
+
+# Count the number of words in the query
+
+ @words = split(/ /,$squery);
+ foreach $word (@words) { $xwd++; }
+
+# If there are quotes in the query, we have to
+# turn them into parantheses and make it boolean
+
+if (($squery =~ "\"")) {
+ $oo = (index($squery,"\""))+1;
+ $od = (index($squery,"\"",$oo))-1;
+ $op = $od - $oo +1;
+ $yty = substr($squery,$oo,$op);
+ @wrds = split(/ /,$yty);
+ foreach $wrd (@wrds) { $xww++; }
+
+ if ($xww eq 2) { # Right now, can only handle 2-word phrases
+ $oi = (index($yty," "));
+ if ($oi > -1) {
+ $ytt = substr($yty,0,$oi);
+ $john = $od - $oi +1;
+ $yte = substr($yty,$oi+1,$john);
+ $james = substr($squery,$od+2);
+ $james =~ s/ and//g;
+ $james =~ s/ / and /g;
+ $squery = "($ytt and $yte) $james"; # We turn it into a
+ $method = "boolean"; # boolean query
+ }
+
+# More than 2 words in quotes (phrase), just
+# turn it into one big string of words and set method to "and"
+
+ } else {
+ $squery =~ s/\"//g;
+ $squery =~ s/ and//g;
+ $method = "and";
+ $yty = "";
+ }
+}
+
+# Set the environmental variables
+
+$ENV{'REQUEST_METHOD'} = 'GET';
+$ENV{'QUERY_STRING'} = "config=$config&restrict=$restrict&exclude=$exclude&words=$squery&method=$method&format=$format&page=$page";
+
+# Run htsearch
+
+system("$dir/htsearch");
+
+1;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi
new file mode 100755
index 00000000..1e7ea66b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi
@@ -0,0 +1,125 @@
+#!/usr/bin/perl -w
+
+# htwrap.cgi
+#
+# by John Grohol (grohol@cmhc.com)
+# Freeware
+# v1.00 - 5 Oct 1998
+#
+# Simple wrapper script for htsearch to
+# do some basic sanity checking on the query
+# and tries to re-form it into a valid htsearch query.
+#
+# This script must be called using the GET method!
+#
+#_______________________________________________________
+# Set some defaults here
+# These can be overridden in the calling form
+
+$config = "htdig"; # htDig config file
+$exclude = ""; # exclude this url
+$restrict = ""; # restrict to this url
+$format = "builtin-long"; # results format
+$method = "and"; # default method
+$dir = "/usr/httpd/cgi-bin"; # Set cgi-bin dir
+
+#_______________________________________________________
+# Rest of program
+
+ $| = 1;
+
+# Get the form variables
+
+&ParseTags($ENV{'PATH_INFO'});
+&ParseTags($ENV{'QUERY_STRING'});
+
+$squery = $tags{'words'};
+$restrict = $tags{'restrict'};
+$method = $tags{'method'};
+$format = $tags{'format'};
+$page = $tags{'page'};
+
+if (not($page)) { $page=1; }
+
+ $squery =~ s/\+//g;
+ $squery =~ s/\-//g;
+ $squery =~ s/the//g;
+ $squery =~ s/not//g;
+ $squery =~ s/what//g;
+
+# If someone puts "and" or "or" in the query,
+# then it should be a boolean query
+
+ if (($squery =~ " and ") || ($squery =~ " or ")) {
+ $method = "boolean"; }
+
+# How many words are there in the query?
+ @words = split(/ /,$squery);
+ foreach $word (@words) { $xwd++; }
+
+# If there are quotes in the query, we have to
+# turn them into parantheses and make it boolean
+
+if (($squery =~ "\"")) {
+ $oo = (index($squery,"\""))+1;
+ $od = (index($squery,"\"",$oo))-1;
+ $op = $od - $oo +1;
+ $yty = substr($squery,$oo,$op);
+ @wrds = split(/ /,$yty);
+ foreach $wrd (@wrds) { $xww++; }
+
+
+ if ($xww eq 2) { # Right now, can only handle 2-word phrases
+ $oi = (index($yty," "));
+ if ($oi > -1) {
+ $ytt = substr($yty,0,$oi);
+ $john = $od - $oi +1;
+ $yte = substr($yty,$oi+1,$john);
+ $james = substr($squery,$od+2);
+ $james =~ s/ and//g;
+ $james =~ s/ / and /g;
+ $squery = "($ytt and $yte) $james"; # We turn it into a
+ $method = "boolean"; # boolean query
+ }
+
+# More than 2 words in quotes (phrase), just
+# turn it into one big string of words and set method to "and"
+
+ } else {
+ $squery =~ s/\"//g; # Dump quotes
+ $squery =~ s/ and//g; # Dump and's
+ $squery =~ s/ or//g; # Dump or's
+ $method = "and";
+ $yty = "";
+ }
+}
+
+# Set the environmental variables
+
+$ENV{'REQUEST_METHOD'} = 'GET';
+$ENV{'QUERY_STRING'} = "config=$config&restrict=$restrict&exclude=$exclude&words=$squery&method=$method&format=$format&page=$page"
+;
+
+# Run htsearch
+
+system("$dir/htsearch");
+
+exit;
+
+sub ParseTags {
+ local($_) = @_;
+ local(@terms, $tag, $val);
+ s|^/||;
+ @terms = split('&');
+ foreach $term (@terms) {
+ ($tag,$val) = split('=',$term,2);
+ $val =~ tr/+/ /;
+ $val =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+ $val =~ s/<!--(.|\n)*-->//g;
+ $val =~ s/<([^>]|\n)*>//g;
+ # may override previous value
+ $tags{$tag} = $val;
+ }
+}
+
+1;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords b/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords
new file mode 100644
index 00000000..9912e646
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords
@@ -0,0 +1,349 @@
+a
+above
+about
+according
+across
+actually
+adj
+after
+afterwards
+again
+against
+all
+almost
+alone
+along
+already
+also
+although
+always
+among
+amongst
+an
+and
+another
+any
+anyhow
+anyone
+anything
+anywhere
+are
+aren
+arent
+around
+as
+at
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+begin
+beginning
+behind
+being
+below
+beside
+besides
+between
+beyond
+billion
+both
+but
+by
+can
+cant
+cannot
+caption
+co
+could
+couldnt
+did
+didnt
+do
+does
+doesnt
+dont
+down
+during
+each
+eg
+eight
+eighty
+either
+else
+elsewhere
+end
+ending
+enough
+etc
+even
+ever
+every
+everyone
+everything
+everywhere
+except
+few
+fifty
+first
+five
+for
+former
+formerly
+forty
+found
+four
+from
+further
+had
+has
+hasnt
+have
+havent
+he
+hence
+her
+here
+hereafter
+hereby
+herein
+heres
+hereupon
+hers
+herself
+hes
+him
+himself
+his
+how
+however
+hundred
+ie
+if
+in
+inc
+indeed
+instead
+into
+is
+isnt
+it
+its
+itself
+last
+later
+latter
+latterly
+least
+less
+let
+like
+likely
+ltd
+made
+make
+makes
+many
+may
+maybe
+me
+meantime
+meanwhile
+might
+million
+miss
+more
+moreover
+most
+mostly
+mr
+mrs
+much
+must
+my
+myself
+namely
+neither
+never
+nevertheless
+next
+nine
+ninety
+no
+nobody
+none
+nonetheless
+noone
+nor
+not
+nothing
+now
+nowhere
+of
+off
+often
+on
+once
+one
+only
+onto
+or
+others
+otherwise
+our
+ours
+ourselves
+out
+over
+overall
+own
+page
+per
+perhaps
+rather
+re
+recent
+recently
+same
+seem
+seemed
+seeming
+seems
+seven
+seventy
+several
+she
+shes
+should
+shouldnt
+since
+six
+sixty
+so
+some
+somehow
+someone
+something
+sometime
+sometimes
+somewhere
+still
+stop
+such
+taking
+ten
+than
+that
+the
+their
+them
+themselves
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+thereupon
+these
+they
+thirty
+this
+those
+though
+thousand
+three
+through
+throughout
+thru
+thus
+tips
+to
+together
+too
+toward
+towards
+trillion
+twenty
+two
+under
+unless
+unlike
+unlikely
+until
+up
+update
+updated
+updates
+upon
+us
+used
+using
+ve
+very
+via
+want
+wanted
+wants
+was
+wasnt
+way
+ways
+we
+wed
+well
+were
+werent
+what
+whats
+whatever
+when
+whence
+whenever
+where
+whereafter
+whereas
+whereby
+wherein
+whereupon
+wherever
+wheres
+whether
+which
+while
+whither
+who
+whoever
+whole
+whom
+whomever
+whose
+why
+will
+with
+within
+without
+wont
+work
+worked
+works
+working
+would
+wouldnt
+yes
+yet
+you
+youd
+youll
+your
+youre
+yours
+yourself
+yourselves
+youve
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh b/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh
new file mode 100644
index 00000000..7a78955d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh
@@ -0,0 +1,96 @@
+#! /bin/sh
+
+# rundig.sh
+# a script to drive ht://Dig updates
+# Copyright (c) 1998 Colin Viebrock <cmv@shmooze.net>
+# Copyright (c) 1998-1999 Geoff Hutchison <ghutchis@wso.williams.edu>
+# Updated for ht://Dig 3.2.0b3 Feb 2001, Copyright (c) 2001 Geoff Hutchison
+# Distributed under the GNU GPL version 2 or later
+
+if [ "$1" = "-v" ]; then
+ verbose="-v"
+fi
+
+# This is the directory where htdig lives
+BASEDIR=/export/htdig
+
+# This is the db dir
+DBDIR=$BASEDIR/db/
+
+# This is the name of a temporary report file
+REPORT=/tmp/htdig.report
+
+# This is who gets the report
+REPORT_DEST="webmaster@yourdomain.com"
+export REPORT_DEST
+
+# This is the subject line of the report
+SUBJECT="cron: htdig report for domain"
+
+# This is the name of the conf file to use
+CONF=htdig.conf
+
+# This is the directory htdig will use for temporary sort files
+TMPDIR=$DBDIR
+export TMPDIR
+
+# This is the PATH used by this script. Change it if you have problems
+# with not finding wc or grep.
+PATH=/usr/local/bin:/usr/bin:/bin
+
+##### Dig phase
+STARTTIME=`date`
+echo Start time: $STARTTIME
+echo rundig: Start time: $STARTTIME > $REPORT
+$BASEDIR/bin/htdig $verbose -s -a -c $BASEDIR/conf/$CONF >> $REPORT
+TIME=`date`
+echo Done Digging: $TIME
+echo rundig: Done Digging: $TIME >> $REPORT
+
+##### Purge Phase
+# (clean out broken links, etc.)
+$BASEDIR/bin/htpurge $verbose -a -c $BASEDIR/conf/$CONF >> $REPORT
+TIME=`date`
+echo Done Purging: $TIME
+echo rundig: Done Purging: $TIME >> $REPORT
+
+##### Cleanup Phase
+# To enable htnotify or the soundex search, uncomment the following lines
+# $BASEDIR/bin/htnotify $verbose >>$REPORT
+# $BASEDIR/bin/htfuzzy $verbose soundex
+# To get additional statistics, uncomment the following line
+# $BASEDIR/bin/htstat $verbose >>$REPORT
+
+# Move 'em into place. Since these are only used by htdig for update digs
+# and we always use -a, we just leave them as .work
+# mv $DBDIR/db.docs.index.work $DBDIR/db.docs.index
+# (this is just a mapping from a URL to a DocID)
+# We need the .work for next time as an update dig, plus the copy for searching
+cp $DBDIR/db.docdb.work $DBDIR/db.docdb
+cp $DBDIR/db.excerpts.work $DBDIR/db.excerpts
+cp $DBDIR/db.words.db.work $DBDIR/db.words.db
+test -f $DBDIR/db.words.db.work_weakcmpr &&
+ cp $DBDIR/db.words.db.work_weakcmpr $DBDIR/db.words.db_weakcmpr
+
+END=`date`
+echo End time: $END
+echo rundig: End time: $END >> $REPORT
+echo
+
+# Grab the important statistics from the report file
+# All lines begin with htdig: or htmerge:
+fgrep "htdig:" $REPORT
+echo
+fgrep "htmerge:" $REPORT
+echo
+fgrep "rundig:" $REPORT
+echo
+
+WC=`wc -l $REPORT`
+echo Total lines in $REPORT: $WC
+
+# Send out the report ...
+mail -s "$SUBJECT - $STARTTIME" $REPORT_DEST < $REPORT
+
+# ... and clean up
+rm $REPORT
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig b/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig
new file mode 100755
index 00000000..1bcc3e08
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig
@@ -0,0 +1,53 @@
+#! /bin/sh
+
+#
+# updatedig
+#
+# This is a script to update the search database for ht://Dig.
+# Copyright (c) 1998 David Robley webmaster@www.nisu.flinders.edu.au
+#
+if [ "$1" = "-v" ]; then
+ verbose=-v
+fi
+
+# -a: run using alternate work files so search can still be done during index run
+# -t: create an ASCII version of document database in doc_list as specified
+# in the config file
+# -s: print stats after completion
+/web/webdocs/htdig/bin/htdig -a -t $verbose -s
+/web/webdocs/htdig/bin/htmerge -a $verbose -s
+/web/webdocs/htdig/bin/htnotify $verbose
+
+# Because the -a switch creates alternate work files, but doesn't seem to move
+# them into the correct place, we will do it here.
+mv /web/webdocs/htdig/db/db.docdb /web/webdocs/htdig/db/db.docdb.old
+mv /web/webdocs/htdig/db/db.docdb.work /web/webdocs/htdig/db/db.docdb
+
+mv /web/webdocs/htdig/db/db.docs.index /web/webdocs/htdig/db/db.docs.index.old
+mv /web/webdocs/htdig/db/db.docs.index.work /web/webdocs/htdig/db/db.docs.index
+
+mv /web/webdocs/htdig/db/db.wordlist /web/webdocs/htdig/db/db.wordlist.old
+mv /web/webdocs/htdig/db/db.wordlist.work /web/webdocs/htdig/db/db.wordlist
+
+mv /web/webdocs/htdig/db/db.words.gdbm /web/webdocs/htdig/db/db.words.gdbm.old
+mv /web/webdocs/htdig/db/db.words.gdbm.work /web/webdocs/htdig/db/db.words.gdbm
+
+#
+# Only create the endings database if it doesn't already exist.
+# This database is static, so even if pages change, this database will not
+# need to be rebuilt.
+#
+if [ ! -f /web/webdocs/htdig/common/word2root.gdbm ]
+then
+ /web/webdocs/htdig/bin/htfuzzy $verbose endings
+fi
+
+# This next needs to be run if synonyms are added/modified/removed
+# Guess the best way would be to delete synonyms.gdbm before
+# running this script??
+
+if [ ! -f /web/webdocs/htdig/common/synonyms.gdbm ]
+then
+ /web/webdocs/htdig/bin/htfuzzy $verbose synonyms
+fi
+# end updatedig
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/handler.pl b/debian/htdig/htdig-3.2.0b6/contrib/handler.pl
new file mode 100755
index 00000000..53ec7f34
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/handler.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+#
+# handler.pl
+# Sample ExternalTransport handler for HTTP and HTTPS using curl
+# for the ht://Dig package 3.2.x and higher
+# by Geoffrey Hutchison <ghutchis@wso.williams.edu>
+# Copyright (c) 1999 under the terms of the GNU Public License vesion 2 (GPL)
+#
+# handler.pl protocol url config_file
+#
+# Really a simplistic example--this should probably use Perl's LWP for HTTP/HTTPS/FTP
+# Right now it uses the program 'curl' to do HTTP or HTTPS transactions.
+#
+
+my $curl_path="/usr/local/bin/curl";
+my $protocol=$ARGV[0];
+my $url=$ARGV[1];
+my $config_file=$ARGV[2];
+
+open (DOC, "$curl_path -i $url |") || die "s:\t404\nr:\tCan't open curl!\n";
+while ( my $line = <DOC> ) {
+ if ( $line =~ /^HTTP.?\/\d.\d\s(\d\d\d)\s(.*)/io ) {
+ print "s:\t$1\n";
+ print "r:\t$2\n";
+ } elsif ( $line =~ /^last-modified: (.*)$/io ) {
+ print "m:\t$1\n";
+ } elsif ( $line =~ /^content-type: (.*)$/io ) {
+ print "t:\t$1\n";
+ } elsif ( $line =~ /^content-length: (.*)$/io ) {
+ print "l:\t$1\n";
+ } elsif ( $line =~ /^location: (.*)$/io ) {
+ print "u:\t$1\n";
+ }
+
+ last if ( $line =~ /^\s*$/ )
+}
+
+local($/) = undef;
+my $text = <DOC>;
+close(DOC);
+
+print "\n$text";
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec b/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec
new file mode 100644
index 00000000..1631164f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec
@@ -0,0 +1,184 @@
+# Last definitions below override, so change the order to redefine. You can't
+# comment them out because %defines are parsed inside comments.
+# For Red Hat [456].x...
+%define contentdir /home/httpd
+%define commondir /var/lib/htdig/common
+%define databasedir /var/lib/htdig/db
+%define searchdir %{contentdir}/html
+%define configdir /etc/htdig
+%define bindir /usr/sbin
+%define mandir /usr/man
+%define docdir /usr/doc
+# For Red Hat [789].x, FCx...
+%define contentdir /var/www
+%define commondir %{_prefix}/share/htdig
+%define databasedir /var/lib/htdig
+%define searchdir %{contentdir}/html/htdig
+%define configdir %{_sysconfdir}/htdig
+%define bindir %{_bindir}
+%define mandir %{_mandir}
+%define docdir %{_docdir}
+Summary: A web indexing and searching system for a small domain or intranet
+Name: htdig
+Version: 3.2.0b6
+Release: 8
+Copyright: GPL
+Group: Networking/Utilities
+BuildRoot: /var/tmp/htdig-root
+Source0: http://www.htdig.org/files/htdig-%{PACKAGE_VERSION}.tar.gz
+URL: http://www.htdig.org/
+Packager: Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+
+%description
+The ht://Dig system is a complete world wide web indexing and searching
+system for a small domain or intranet. This system is not meant to replace
+the need for powerful internet-wide search systems like Lycos, Infoseek,
+Webcrawler and AltaVista. Instead it is meant to cover the search needs for
+a single company, campus, or even a particular sub section of a web site.
+
+As opposed to some WAIS-based or web-server based search engines, ht://Dig
+can span several web servers at a site. The type of these different web
+servers doesn't matter as long as they understand the HTTP 1.0 protocol.
+%prep
+%setup -q -n htdig-%{PACKAGE_VERSION}
+#%patch0 -p0 -b .noparse
+
+%build
+CFLAGS="$RPM_OPT_FLAGS" ./configure --prefix=/usr --mandir=%{mandir} \
+ --bindir=%{bindir} --libexec=/usr/lib --libdir=/usr/lib \
+ --with-image-dir=%{contentdir}/html/htdig \
+ --with-cgi-bin-dir=%{contentdir}/cgi-bin \
+ --with-search-dir=%{searchdir} \
+ --with-config-dir=%{configdir} \
+ --with-common-dir=%{commondir} \
+ --with-database-dir=%{databasedir}
+#rm -f htlib/langinfo.h # conflicts with libc5 headers
+#echo '#include "/usr/include/langinfo.h"' > htlib/langinfo.h # to keep htlib/Makefile happy
+make
+
+%install
+
+rm -rf $RPM_BUILD_ROOT
+
+make DESTDIR=$RPM_BUILD_ROOT install-strip
+mkdir -p $RPM_BUILD_ROOT/etc/cron.daily
+ln -s ../..%{bindir}/rundig $RPM_BUILD_ROOT/etc/cron.daily/htdig-dbgen
+ln -s ../../../..%{docdir}/htdig-%{PACKAGE_VERSION} \
+ $RPM_BUILD_ROOT%{contentdir}/html/htdig/htdoc
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post
+# Only run this if installing for the first time
+if [ "$1" = 1 ]; then
+ SERVERNAME="`grep '^ServerName' /etc/httpd/conf/httpd.conf | awk 'NR == 1 {print $2}'`"
+ [ -z "$SERVERNAME" ] && SERVERNAME="`hostname -f`"
+ [ -z "$SERVERNAME" ] && SERVERNAME="localhost"
+ TMPFILE=$(mktemp /tmp/ht.XXXXXX) || exit 1
+ sed 's/^start_url:.*/#&\
+# (See end of file for this parameter.)/' %{configdir}/htdig.conf > $TMPFILE
+ cat $TMPFILE > %{configdir}/htdig.conf
+ rm $TMPFILE
+ cat >> %{configdir}/htdig.conf <<!
+
+# Automatically set up by htdig RPM, from your current Apache httpd.conf...
+# Verify and configure these, and set maintainer above, before running
+# %{bindir}/rundig.
+# See %{docdir}/htdig*/attrs.html for descriptions of attributes.
+
+# The URL(s) where htdig will start. See also limit_urls_to above.
+start_url: http://$SERVERNAME/
+
+# These attributes allow indexing server via local filesystem rather than HTTP.
+local_urls: http://$SERVERNAME/=%{contentdir}/html/
+local_user_urls: http://$SERVERNAME/=/home/,/public_html/
+!
+
+fi
+
+%files
+%defattr(-,root,root)
+%config %{configdir}/htdig.conf
+%config %{configdir}/mime.types
+%config %{configdir}/HtFileType-magic.mime
+%config %{configdir}/cookies.txt
+%config %{bindir}/rundig
+%config %{searchdir}/search.html
+%config %{commondir}/[a-rt-z]*.html
+%config %{commondir}/s[a-df-z]*.html
+%config %{commondir}/english*
+%config %{commondir}/synonyms
+%config %{commondir}/bad_words
+%config(missingok) /etc/cron.daily/htdig-dbgen
+%{bindir}/[Hh]t*
+/usr/lib/*
+/usr/include/*
+%dir %{databasedir}
+%{contentdir}/cgi-bin/htsearch
+%{contentdir}/cgi-bin/qtest
+%{contentdir}/html/htdig/*.gif
+%{contentdir}/html/htdig/*.png
+%{contentdir}/html/htdig/htdoc
+%{mandir}/man*
+
+%doc README htdoc/*
+
+%changelog
+* Thu Jun 10 2004 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - built with 3.2.0b6, adding man pages & include files
+ - updated pathnames for current systems (/usr/share/htdig for common dir)
+ - used variable for configdir, mandir & docdir
+ - used mktemp to create safe temp file in post script
+
+* Wed Jul 4 2001 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - used variables for many pathnames, to allow easy switchover to 7.x
+ (using Powertools-like pathnames for Red Hat 7)
+
+* Thu Jun 7 2001 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - updated to 3.2.0b4
+
+* Fri Dec 1 2000 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - updated to 3.2.0b3
+
+* Mon Feb 21 2000 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - fixed post script to add more descriptive entries in htdig.conf
+ - made cron script a config file
+ - updated to 3.2.0b2
+
+* Thu Feb 3 2000 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - added mime.types as a config file
+
+* Mon Jan 17 2000 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - updated to 3.2.0b1
+
+* Fri Aug 13 1999 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - changed configure & install options and got rid of conf.patch file
+ to work with latest 3.2 code
+
+* Mon Jun 7 1999 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - fixed post script to use only first ServerName directive in httpd.conf
+
+* Tue Mar 23 1999 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - updated to 3.2.0dev, for testing
+
+* Thu Feb 4 1999 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - put web stuff back in /home/httpd/html & /home/httpd/cgi-bin, so it can
+ go over a standard Apache installation on Red Hat
+ - cleaned up install to make use of new features
+
+* Thu Feb 4 1999 Ric Klaren <klaren@telin.nl>
+ - changed buildroot stuff
+ - minor spec file fixes
+ - install web stuff in /home/httpd/htdig
+ - made rundig config file
+
+* Tue Sep 22 1998 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - Added local_urls stuff to generated htdig.conf file
+
+* Fri Sep 18 1998 Gilles Detillieux <grdetil@scrc.umanitoba.ca>
+ - Built the rpm from latest htdig source (3.1.0b1), using earlier
+ versions of rpms by Mihai Ibanescu <misa@dntis.ro> and Elliot Lee
+ <sopwith@cuc.edu> as a model, incorporating ideas from both. I've
+ made the install locations as FSSTND compliant as I can think of.
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README
new file mode 100644
index 00000000..4ec0f6ab
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README
@@ -0,0 +1,38 @@
+
+> Subject: htdig: HTDIG: Searching Word files
+> To: htdig@sdsu.edu
+> From: Richard Jones <rjones@imcl.com>
+> Date: Tue, 15 Jul 1997 12:44:03 +0100
+>
+> I'm currently trying to hack together a script to search
+> Word files. I have a little program called `catdoc' (attached)
+> which takes Word files and turns them into passable text files.
+> What I did was write a shell script around this called
+> `htparsedoc' (also attached) and add it as an external
+> parser:
+>
+> --- /usr/local/lib/htdig/conf/htdig.conf ---
+>
+> # External parser for Word documents.
+> external_parsers: "applications/msword"
+> "/usr/local/lib/htdig/bin/htparsedoc"
+>
+> This script produces output like this:
+>
+> t Word document http://annexia.imcl.com/test/comm.doc
+> w INmEDIA 1 -
+> w Investment 2 -
+> w Ltd 3 -
+> w Applications 4 -
+> w Subproject 5 -
+> w Terms 6 -
+> w of 7 -
+> [...]
+> w Needed 994 -
+> w Tbd 995 -
+> w Resources 996 -
+> w Needed 997 -
+> w Tbd 998 -
+> w i 1000 -
+>
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c
new file mode 100644
index 00000000..93bf02f8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c
@@ -0,0 +1,197 @@
+
+From VDiGiampietro@sansalvo.marelli.it Fri Jul 3 09:52:34 1998
+Date: Fri, 3 Jul 1998 17:20:50 +0200 (MET DST)
+From: Valerio Di Giampietro <VDiGiampietro@sansalvo.marelli.it>
+To: htdig@sdsu.edu
+Subject: htdig: Searching Word files
+/* catdoc.c version 0.3 */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define TEXT_WIDTH 72
+/* #define LATIN1 */
+/* enable this define, if you don't want cyrillic code page translations */
+
+unsigned char specs[]={7, /* tab columns separator - handled specially*/
+ '\n',/* hook to handle end of line in tables */
+ 0x1E,/* unbreakable defis */
+ 0x1F,/* soft hyphen */
+ 0x85,/* dots */
+ 0x91,/* opening single quote */
+ 0x92,/* closing single quote */
+ 0x93,/* opening double quote */
+ 0x94,/* closing double quote */
+ 0x96,/* em-dash (or em-space)*/
+ 0x97,/* en-dash */
+ 0x99,/* Trade Mark sign */
+ 0xA0,/* unbreakable space */
+ 0xA9,/* Copyright sign */
+ 0xAE,/* Reserved sign */
+ 0xAB,/* opening << quote*/
+ 0xBB,/* closing >> quote*/
+ /* The rest is translated into itself unless TeX mode is selected */
+ '%','$','_','{','}','\\',
+ };
+
+char *ascii_specs[]={"\t","\n","-","","...","`","'","``","''","-","-","tm",
+ " ","(c)","(R)","\"","\"","%","$","_","{","}","\\"};
+char *TeX_specs[]={"\t&","\\\\\n","-","\\-","\\dots{}","`","'","``","''","---","--",
+"${}^{\\scriptscriptstyle\\mathrm{TM}}$",/* this is my idea about tm sign*/
+"~",
+"{\\copyright}",
+"(R)",/* to be replaced with correct command */
+"<",">","\\%","\\$","$\\{$","$\\}$","$\\backslash$",};
+#ifndef LATIN1
+#ifdef unix
+unsigned char table[256]={
+/* Windows cyrillic code page to KOI-8 */
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0D,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x2D,0x20,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
+0x80,0x81,0x82,0xAA,0x8F,0x90,0xA9,0x93,0x84,0x92,0x91,0x94,0x83,0x95,0x99,0x8B,
+0x98,0x60,0x27,0x22,0x22,0x9A,0x2D,0x2D,0x9E,0xA6,0x87,0xB0,0x8D,0x97,0x86,0xA2,
+0x20,0xA7,0xA5,0x88,0xA4,0x8E,0x96,0x85,0xB3,0xA1,0x9F,0x22,0xAB,0xAC,0xAD,0xAE,
+0xAF,0xB2,0xB1,'i',0xB5,0xB6,0xB7,0xB8,0xA3,0xB9,0xBA,0x22,0xBC,0xBD,0xBE,0x9B,
+0xE1,0xE2,0xF7,0xE7,0xE4,0xE5,0xF6,0xFA,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xF0,
+0xF2,0xF3,0xF4,0xF5,0xE6,0xE8,0xE3,0xFE,0xFB,0xFD,0xFF,0xF9,0xF8,0xFC,0xE0,0xF1,
+0xC1,0xC2,0xD7,0xC7,0xC4,0xC5,0xD6,0xDA,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,
+0xD2,0xD3,0xD4,0xD5,0xC6,0xC8,0xC3,0xDE,0xDB,0xDD,0xDF,0xD9,0xD8,0xDC,0xC0,0xD1};
+#else
+unsigned char table[256]={
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef};
+#endif
+#define recode_char(x) table[x]
+#else
+#define recode_char(x) x
+#endif
+char *map_char(char **map,int c)
+
+{unsigned char *ptr;
+ static char buffer[2]="a";
+ if ((ptr=strchr(specs,c)))
+ return map[ptr-specs];
+ else
+ { buffer[0]=recode_char(c); return buffer; }
+}
+void format(char *buf,char **map)
+{ unsigned char outstring[128]="";
+ unsigned char *sp=buf,*dp;int table=0;
+ while (*sp)
+ { if (*sp==7&&table)
+ { printf("%s%s",outstring,map_char(map,'\n'));
+ outstring[0]=0;
+ table=0;sp++;
+ }
+ else
+ { if (strlen(strcat(outstring,map_char(map,*sp)))>TEXT_WIDTH)
+ { dp=strrchr(outstring,' ');
+ if (dp)
+ { *(dp++)=0;
+ printf("%s\n",outstring);
+ strcpy(outstring,dp);
+ }
+ else
+ { int i;
+ for(i=0;i<72;i++) putc(outstring[i],stdout);
+ putc('\n',stdout);
+ strcpy(outstring,outstring+72);
+ }
+ }
+ table=*(sp++)==7;
+ }
+ }
+if (outstring[0]==0) putc('\n',stdout);
+ else printf("%s\n\n",outstring);
+
+}
+void help(void)
+{ printf("catdoc - exctract text from MS-Word files and catenate it to stdout\n"
+ "Copyright (c) by Victor B. Wagner, 1996\n"
+ "Usage catdoc [-ast] files ...\n"
+ "\t-a - converts non-standard printable chars into readable form (default)\n"
+ "\t-t - converts them into TeX control sequences\n"
+ "\t-s - exits with code 1 if MSWordDoc signature not found before\n"
+ "\t\tfirst printable paragraph\n\n"
+ "All options affects only files, specified AFTER them\n");
+ exit(2);
+}
+
+char buf[8192];
+void do_file(FILE *f,char **map,int search_sign)
+{ int ok=!search_sign;
+ int bufptr,c;
+ while(!feof(f))
+ {bufptr=-1;
+ do {
+ c=getc(f);
+ /* Special printable symbols 7- table separator \r - paragraph end
+ 0x1E - short defis */
+ if ((c<=255&&c>=32)||c==7||c=='\t'||c=='\r'||c==0x1E)
+ buf[++bufptr]=c;
+ else
+ if (c==0x0b) buf[++bufptr]='\r';
+ else
+ { if (!c) {buf[++bufptr]=0;
+ if(!strcmp(buf,"MSWordDoc"))
+ { ok=1; }
+ }
+ if (c!=2) bufptr=-1;/* \002 is Word's footnote mark */
+ }
+ } while (c!='\r'&&c!=EOF);
+ if (bufptr>0&&buf[bufptr]=='\r')
+ { if (!ok) exit( 1);
+ buf[bufptr]=0; format(buf,map);
+ }
+ }
+}
+
+int main(int argc,char **argv)
+{ int search_sign =0; /* Must program exit with exit code 1 if MSWordDoc
+ signature is not found? */
+ char **sequences=ascii_specs;/* pointer to array of character sequences
+ to represent special characters of Word */
+ int i=1,stdin_processed=0;
+ if (argc<2) help();
+ for(;i<argc;i++)
+ { if (!strcmp(argv[i],"-s")) search_sign=1;
+ else
+ if (!strcmp(argv[i],"-t")) sequences=TeX_specs;
+ else
+ if (!strcmp(argv[i],"-a")) sequences=ascii_specs;
+ else
+ if (!strcmp(argv[i],"-"))
+ if (!stdin_processed) {do_file(stdin,sequences,search_sign);
+ stdin_processed=1;}
+ else { fprintf(stderr,"Cannot process standard input twice a row\n");
+ exit (2);}
+ else
+ if (argv[i][0]=='-') {fprintf(stderr,"Invalid option %s\n",argv[i]);
+ help();}
+ else
+ { FILE *f=fopen(argv[i],"r");
+ if(!f) {fprintf(stderr,"Cannot open file %s\n",argv[i]);exit(2);}
+ do_file(f,sequences,search_sign);
+ }
+ }
+ return 0;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc
new file mode 100755
index 00000000..9d47e85d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc
@@ -0,0 +1,72 @@
+#!/bin/sh -
+
+#--
+# External parser for HTDIG that parses Word files so they can
+# be indexed.
+#--
+# Written by Richard W.M. Jones <rjones@imcl.com>. Distributed freely
+# under the terms of the GNU General Public License (GPL).
+# Modified by Andrew M. Bishop <amb@gedanken.demon.co.uk>
+#--
+
+#----------------------------------------------------------------------
+# Configurable stuff here:
+
+# The program that converts Word files into text. I use ``catdoc''
+# by Victor Wagner <vitus@agropc.msk.su>. You may wish to just use
+# ``strings''.
+CATDOC=/usr/local/bin/catdoc
+#CATDOC=strings
+
+# End of configurable stuff.
+#----------------------------------------------------------------------
+
+# Arguments are:
+# $1 = input file
+# $2 = content type (ignored)
+# $3 = base URL
+# $4 = HTDIG config file (ignored)
+# HTDIG expects us to print out:
+# w WORD LOCATION HEADING Word at location 0-1000 under heading
+# u URL DESCRIPTION URL with description
+# t TITLE Title of document
+# h HEAD Heading
+# a ANCHOR Anchor (ie. like <a name="">)
+# i IMAGE_URL Image pointer
+
+#----------------------------------------------------------------------
+
+# Format input to word per line.
+
+wordPerLine () {
+ tr '[ \010]' '\012' | awk 'NF==1 {print;}'
+}
+
+# Change non-alphabetical/numeric characters in space.
+
+removeNonAlNum () {
+ tr -c '[a-zA-Z0-9\015]' ' '
+}
+
+#----------------------------------------------------------------------
+
+# Parse input file to linear list of words.
+$CATDOC $1 | removeNonAlNum | wordPerLine > /tmp/htparsedoc.$$
+
+# Compute length of list.
+filelen=`wc -l < /tmp/htparsedoc.$$`
+
+# We can't find the title from the document, so make one up.
+echo "t Binary Document $3"
+
+# We can't make an excerpt so we make one up.
+echo "h No excerpt available"
+
+# Pass words to htdig.
+if [ $filelen -gt 0 ]; then
+ awk "{printf (\"w\t%s\t%d\t-\t\n\", \$1, 1000*NR/$filelen);}" \
+ < /tmp/htparsedoc.$$
+fi
+
+# Remove temporary file.
+rm /tmp/htparsedoc.$$
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile b/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile
new file mode 100644
index 00000000..c2dc4857
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile
@@ -0,0 +1,58 @@
+#
+# Makefile for the multidig system
+#
+# Copyright (c) 1998-2000 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+
+#
+# You probably want to change some or all of these.
+# BASH = location of bash or other Bourne-like shell with 'source' builtin
+# BASEDIR = directory of ht://Dig installation
+# These should probably be OK.
+# BINDIR = directory of ht://Dig binaries. Also destination for these scripts.
+# CONFIG_DIR = directory of ht://Dig config files.
+# DB_BASE = base directory for ht://Dig / multidig databases
+BASH= /bin/bash
+BASEDIR= /opt/htdig
+BINDIR= $(BASEDIR)/bin
+CONFIG_DIR= $(BASEDIR)/conf
+DB_BASE= $(BASEDIR)/db
+
+
+#
+# You shouldn't need to change any of this...
+#
+SCRIPTS= add-collect add-urls multidig \
+ new-collect new-db gen-collect
+CONF= db.conf multidig.conf
+
+all:
+
+clean:
+ rm -f *~
+
+install:
+ @echo "Installing scripts..."
+ @for i in $(SCRIPTS); do \
+ sed -e s%@BASH@%$(BASH)% \
+ -e s%@CONFIG_DIR@%$(CONFIG_DIR)% $$i >$(BINDIR)/$$i; \
+ chmod a+x $(BINDIR)/$$i; \
+ echo $(BINDIR)/$$i; \
+ done && test -z "$$fail"
+ @echo
+ @echo "Installing config files..."
+ @echo
+ @for i in $(CONF); do \
+ sed -e s%@BASH@%$(BASH)% -e s%@BASEDIR@%$(BASEDIR)% \
+ -e s%@BINDIR@%$(BINDIR)% -e s%@CONFIG_DIR@%$(CONFIG_DIR)% \
+ -e s%@DB_BASE@%$(DB_BASE)% $$i >$(CONFIG_DIR)/$$i; \
+ echo $(CONFIG_DIR)/$$i; \
+ done && test -z "$$fail"
+ @echo
+ @echo "Done with installation."
+ @echo
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/README b/debian/htdig/htdig-3.2.0b6/contrib/multidig/README
new file mode 100644
index 00000000..f394e5e5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/README
@@ -0,0 +1,133 @@
+README for multidig 1.1
+ by Geoff Hutchison <ghutchis@wso.williams.edu>
+
+ Copyright (c) 1998-1999 The ht://Dig Group <http://www.htdig.org/>
+ Distributed under the terms of the GNU General Public License (GPL)
+ version 2 or later.
+--------------------------------
+
+This document is part of the "multidig script system" a system of
+shell scripts and some modified conf files that makes dealing with
+multiple databases easier for ht://Dig. It assumes that you know what
+ht://Dig is. If you don't know, see the website at
+<http://www.htdig.org/>
+
+This README is a bit rough around the edges. I don't know what people
+really want or need to know about the scripts. I expect a lot of
+questions. Hey, maybe I'm wrong. I'm always open to suggestions,
+criticisms, corrections, etc. E-mail me at <ghutchis@wso.williams.edu>
+
+--------------------------------
+
+INTRODUCTION:
+
+* Why write multidig?
+
+ There are many reasons I started the multidig system. The biggest
+were the complaints that ht://Dig didn't have much of an
+administration interface. If you're looking for one, multidig isn't
+it. Yet. The next biggest is that people wanted me to make dealing
+with multiple databases easier. If you're looking for this, you're in
+the right place.
+
+* Why should I bother with multidig?
+
+ If you already have a multiple-database setup and it's working
+smoothly, you probably don't want to bother. It was written the way
+*I* would organize a multiple-database setup. Not suprisingly, it
+might be more pain to convert to multidig than it's worth.
+ If you're planning a multiple-database setup or you have one and
+it's not working well, this will help. It hides most of the pain and
+suffering behind some shell scripts and generally automates life. :-)
+
+--------------------------------
+
+SETTING UP:
+
+* How do I install it?
+
+ It's pretty easy to install. It requires bash, or at least a
+Bourne-shell that supports the "source" builtin. Obviously, it also
+requires ht://Dig. :-)
+ Change any paths in the Makefile. D a "make install" to install the
+scripts in the right place and the config files in the right
+place. The Makefile edits the scripts for you so the paths are consistent.
+
+* Now that it's in, how does it work?
+
+ The multidig script will replace the rundig script that comes with
+ht://Dig. Use it through a cron job or some other means of automating
+updates. It will run through all the db that multidig knows about, run
+htdig, htmerge, move the databases around, etc. As written it tries to
+index with the least disk space in the least time. Thus it keeps only
+the minimum files and does "update" digs.
+ After indexing all the db, it merges all the collections, trying to
+do the same thing, fastest speed, smallest disk and RAM
+requirements. It spits out a short status to STDOUT and a more
+complete report to the file referened with the $REPORT option in
+multidig.conf. Adding a "-v" to the command-line makes everything more
+verbose.
+
+* Can I convert my previous multiple-db setup?
+
+ Yes. I'm assuming you have a config file for each database you've
+set up. In that case, put the databases into a directory with the same
+name as the .conf file and tack the name onto the db.list file in your
+config directory. This is multidig's list of all databases, so adding
+a line here will ensure it's indexed using multidig.
+
+* How do I add new URLs to databases or add new databases?
+
+ 1) New URLs: Run 'add-urls <db>' and either paste in URLs or
+ redirect a file or program.
+ 2) New DB: Run 'new-db <db>' to set up everything for that database.
+
+--------------------------------
+
+COLLECTIONS:
+
+* What's a collection?
+
+ Version 3.1.0 of ht://Dig added support for merging multiple
+databases together. Technically, you merge one database into
+another. Multidig makes this a bit easier. You set up a "collection"
+of other databases and the multidig script will merge them all
+together.
+
+* Fantastic! How do I define a collection?
+
+./new-collect <name>
+./add-collect <name>
+<insert dbs here>
+
+ The add-collect script will go through the list of dbs and make sure
+the multidig system actually knows about them. If not, it complains.
+
+* Can I just generate the collections from my databases?
+
+ Yup, run gen-collect. This is what the main multidig script runs.
+
+--------------------------------
+
+DIRECTORY LAYOUT:
+
+Here are the locations of files used by multidig:
+
+ $BASEDIR/bin
+ add-collect script for adding db to a collection
+ add-urls script for adding URLs to a db
+ gen-collect script for generating all collections
+ from their db (called by multidig)
+ multidig script for generating all db and collections
+ new-collect script for making a new collection
+ new-db script for making a new db
+ $BASEDIR/conf
+ db.conf template database config
+ used by new-collect and new-db
+ foo.conf database config for db foo
+ multidig.conf config for multidig paths and options
+ db.list list of all db, one per line
+ collect.list list of all collections, one per line
+ $BASEDIR/db
+ foo/foo.urls URLs used by foo db
+ foo/db.* actual foo databases
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect
new file mode 100644
index 00000000..d169ed84
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect
@@ -0,0 +1,49 @@
+#!@BASH@
+
+#
+# add-collect 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# add-collect <collection>
+#
+# Reads new DB in from the standard input (either redirect or paste)
+# Ensures the DB actually exist before adding them to the collection
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+ echo Syntax: add-colect \<collection\>
+ exit
+fi
+
+# Do we actually have a collection named as specified?
+TEST=`grep $1 $COLLECT_LIST`
+if [ "$TEST" = "" ]; then
+ # This may become annoying. If so, comment it out!
+ echo The collection $1 does not exist. Sorry.
+ echo The existing collections are:
+ cat $COLLECT_LIST
+else
+ # OK, now we have to make sure these are legal db
+ for db in `cat /dev/stdin`; do
+ DBTEST=`grep $db $DB_LIST`
+ if [ "$DBTEST" != "" ]; then
+ echo $db >>$DB_BASE/$1/$1.collect
+ else
+ # This may become annoying. If so, comment it out!
+ echo The database $db does not exist. Sorry.
+ echo The existing databases are:
+ cat $DB_LIST
+ fi
+ done
+fi
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls
new file mode 100644
index 00000000..15866e23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls
@@ -0,0 +1,37 @@
+#!@BASH@
+
+#
+# add-urls 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# add-urls <db>
+#
+# Reads new URLs in from the standard input (either redirect or paste)
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+ echo Syntax: add-urls \<db\>
+ exit
+fi
+
+# Do we actually have a database named as specified?
+TEST=`grep $1 $DB_LIST`
+if [ "$TEST" = "" ]; then
+ # This may become annoying. If so, comment it out!
+ echo The database $1 does not exist. Sorry.
+ echo The existing databases are:
+ cat $DB_LIST
+else
+ cat /dev/stdin >>$DB_BASE/$1/$1.urls
+fi
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf b/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf
new file mode 100644
index 00000000..edacd723
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf
@@ -0,0 +1,26 @@
+#
+# db.conf file for the multidig system
+# (copied for each database used)
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+# Change this if you use a different global config file
+# Put most of your configuration options in this file
+# the db.conf files only define the URL list used and the directory for
+# storing the databases
+include: ${config_dir}/htdig.conf
+
+# Changed for each database. Places the databases in separate directories
+# for convenience and organization
+database_dir: @DB_BASE@/@DATABASE@
+
+# Each database has a separate list of starting URLs
+# This makes it easier to index a variety of categories
+start_url: `${database_dir}/@DATABASE@.urls`
+
+# Any database-specific config options should go here...
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect
new file mode 100644
index 00000000..f75e08ad
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect
@@ -0,0 +1,99 @@
+#!@BASH@
+
+#
+# gen-collect 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# Part of the "multidig script system"
+# a system of shell scripts and some modified conf files
+# that makes dealing with multiple databases easier for ht://Dig
+#
+# Syntax:
+# gen-collect [-v]
+#
+# Merges multiple databases into ``collected'' db
+# (This is done by multidig too, but this script lets you *just*
+# generate the collections.)
+#
+
+# This is useful for debugging info
+if [ "$1" = "-v" ]; then
+ verbose=-v
+fi
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# We may be called inside multidig, so we don't want to mess with the report.
+for collect in `cat $COLLECT_LIST`; do
+ # What's the conf file for this database?
+ CONF=$CONFIG_DIR/$collect.conf
+ echo Generating $collect at: `date`
+
+ # We want to replace the old .work files with the first database
+ # This ensures that we *only* get documents from the merged db
+ # and not old ones left around in our previous collected db
+ firstdb=`head -n 1 $DB_BASE/$collect/$collect.collect`
+ cp $DB_BASE/$firstdb/db.docdb $DB_BASE/$collect/db.docdb.work
+ cp $DB_BASE/$firstdb/db.docs.index $DB_BASE/$collect/db.docs.index.work
+ cp $DB_BASE/$firstdb/db.wordlist.work $DB_BASE/$collect/db.wordlist.work
+ cp $DB_BASE/$firstdb/db.words.db $DB_BASE/$collect/db.words.db.work
+ # Now we need to work out the number of remaining db in the collection
+ LENGTH=`wc -l $DB_BASE/$collect/$collect.collect | awk '{print $1;}'`
+ let NUM=LENGTH-1
+
+ for db in `tail -n $NUM $DB_BASE/$collect/$collect.collect`; do
+ if [ "$1" = "-v" ]; then
+ echo Merging db $db of collect $collect
+ fi
+ MERGE_CONF=$CONFIG_DIR/$db.conf
+ # There's a slight bug in the merge function.
+ # It's looking for db.wordlist, not .work. So lets copy it temporarily
+ cp $DB_BASE/$db/db.wordlist.work $DB_BASE/$db/db.wordlist
+ # Do the merging, using -d and -w to prevent normal merging
+ # (it would be a waste of time, we'd repeat it multiple times)
+ $BINDIR/htmerge $verbose -s -d -w -m $MERGE_CONF -a -c $CONF >>$REPORT
+ # And now remove the copy
+ rm $DB_BASE/$db/db.wordlist
+ done
+
+ # Now after merging in all of those databases
+ # we need to do the usual htmerge run
+ $BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT
+
+ if [ "$1" = "-v" ]; then
+ echo Moving files $collect at: `date`
+ fi
+ # If you don't have the space for backups, this step can be omitted
+ if [ $BACKUPS = "true" ]; then
+ cp $DB_BASE/$collect/db.docdb $DB_BASE/$collect/db.docdb.bak
+ cp $DB_BASE/$collect/db.docs.index $DB_BASE/$collect/db.docs.index.bak
+ # cp $DB_BASE/$collect/db.wordlist $DB_BASE/$collect/db.wordlist.bak
+ cp $DB_BASE/$collect/db.words.db $DB_BASE/$collect/db.words.db.bak
+ fi
+
+ # Move them because we don't want .work files around
+ # (Remember, we're generating using merging,
+ # so we want to make sure we don't have old stuff to gum up the works...
+ mv $DB_BASE/$collect/db.docdb.work $DB_BASE/$collect/db.docdb
+ mv $DB_BASE/$collect/db.docs.index.work $DB_BASE/$collect/db.docs.index
+ # mv $DB_BASE/$collect/db.wordlist.work $DB_BASE/$collect/db.wordlist
+ mv $DB_BASE/$collect/db.words.db.work $DB_BASE/$collect/db.words.db
+
+ # Make them world readable!
+ chmod 644 $DB_BASE/$collect/db.docdb
+ chmod 644 $DB_BASE/$collect/db.docs.index
+ # chmod 644 $DB_BASE/$collect/db.wordlist
+ chmod 644 $DB_BASE/$collect/db.words.db
+ if [ "$1" = "-v" ]; then
+ echo Done with $collect at: `date`
+ fi
+done
+
+# That's it!
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig
new file mode 100644
index 00000000..0b59136a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig
@@ -0,0 +1,93 @@
+#!@BASH@
+
+#
+# multidig 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# Part of the "multidig script system"
+# a system of shell scripts and some modified conf files
+# that makes dealing with multiple databases easier for ht://Dig
+#
+# Syntax:
+# multidig [-v]
+#
+# Performs all the digging, merging and so on needed
+# for indexing and updating multiple db
+# Merges multiple databases into ``collected'' db
+#
+
+# This is useful for debugging info
+if [ "$1" = "-v" ]; then
+ verbose=-v
+fi
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Start indexing.
+rm $REPORT
+for db in `cat $DB_LIST`; do
+ echo Digging $db at: `date`
+ # What's the conf file for this database?
+ CONF=$CONFIG_DIR/$db.conf
+ if [ "$1" = "-v" ]; then
+ echo " Indexing $db at: `date`"
+ fi
+ $BINDIR/htdig -a $verbose -s -c $CONF >>$REPORT
+ if [ "$1" = "-v" ]; then
+ echo " Merging $db at: `date`"
+ fi
+ $BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT
+
+ if [ "$1" = "-v" ]; then
+ echo " Moving files $db at: `date`"
+ fi
+ # If you don't have the space for backups, this step can be omitted
+ if [ $BACKUPS = "true" ]; then
+ cp $DB_BASE/$db/db.docdb $DB_BASE/$db/db.docdb.bak
+ cp $DB_BASE/$db/db.docs.index $DB_BASE/$db/db.docs.index.bak
+ # cp $DB_BASE/$db/db.wordlist $DB_BASE/$db/db.wordlist.bak
+ cp $DB_BASE/$db/db.words.db $DB_BASE/$db/db.words.db.bak
+ fi
+
+ # Copy the db.docdb file, the .work file is needed for update digs
+ cp $DB_BASE/$db/db.docdb.work $DB_BASE/$db/db.docdb
+ # We don't do anything with the db.wordlist file because the
+ # .work file is needed for update digs and the non-work file isn't needed
+ # cp $DB_BASE/$db/db.wordlist.work $DB_BASE/$db/db.wordlist
+ # These .work files are never used, so let's just keep the active copy
+ mv $DB_BASE/$db/db.docs.index.work $DB_BASE/$db/db.docs.index
+ mv $DB_BASE/$db/db.words.db.work $DB_BASE/$db/db.words.db
+
+ # Make them world readable!
+ chmod 644 $DB_BASE/$db/db.docdb
+ chmod 644 $DB_BASE/$db/db.docdb.work
+ chmod 644 $DB_BASE/$db/db.docs.index
+ # chmod 644 $DB_BASE/$db/db.wordlist
+ chmod 644 $DB_BASE/$db/db.words.db
+ if [ "$1" = "-v" ]; then
+ echo " Done with $db at: `date`"
+ fi
+done
+# Now generate the collections by merging their component databases
+# We do this in our gen-collect script, so we won't do that here.
+$BINDIR/gen-collect $1
+
+if [ "$1" = "-v" ]; then
+ echo
+ fgrep "htdig:" $REPORT
+ echo
+ fgrep "htmerge:" $REPORT
+ echo
+ echo Total lines in $REPORT: `wc -l $REPORT`
+fi
+
+# You probably don't need to do this since the script will remove it next
+# time it's run. But you can do it anyway
+# rm $REPORT
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf
new file mode 100644
index 00000000..32164977
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf
@@ -0,0 +1,32 @@
+#!@BASH@
+#
+# multidig config 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+# You may wish to set some of these:
+# BASEDIR = base directory for ht://Dig installation
+# BINDIR = directory with ht://Dig binaries (i.e. htdig, htmerge)
+# DB_BASE = base directory for ht://Dig DB
+# (i.e. each DB gets its own directory off of this)
+# CONFIG_DIR = directory with ht://Dig config files
+# DB_LIST = file with list of databases
+# COLLECT_LIST = file with list of "collections" databases merged from others
+# DB_CONF = file copied by new-db and new-collect for .conf files
+# REPORT = temporary file used to generate a report for the dig
+# TMPDIR = a directory with lots of temporary space for the merging
+export BASEDIR=@BASEDIR@
+export BINDIR=@BINDIR@
+export DB_BASE=@DB_BASE@
+export CONFIG_DIR=@CONFIG_DIR@
+export DB_LIST=$CONFIG_DIR/db.list
+export COLLECT_LIST=$CONFIG_DIR/collect.list
+export DB_CONF=$CONFIG_DIR/db.conf
+export REPORT=$BASEDIR/multidig.report
+export TMPDIR=$DB_BASE
+export BACKUPS=true
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect
new file mode 100644
index 00000000..6647d447
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect
@@ -0,0 +1,39 @@
+#!@BASH@
+
+#
+# new-collect 1.1
+#
+# Copyright (c) 1998-2000 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# new-collect <collection>
+#
+# Creates a new database directory and conf file with given name
+# Updates the global collect.list file
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+ echo Syntax: new-collect \<collection\>
+ exit
+fi
+
+# Add the new collection to the collect.list file
+echo ${1:?You need to specify a collection} >>$COLLECT_LIST
+
+# Now make the appropriate database directory
+mkdir $DB_BASE/$1
+
+# And make a copy of the default (db.conf) conf file for the DB
+# Use sed to replace @DATABASE@ with the name of the database
+sed -e s%@DATABASE@%$1% $DB_CONF >$CONFIG_DIR/$1.conf
+# And make a blank file for the ${start_urls} directive
+touch $DB_BASE/$1/$1.collect
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db
new file mode 100644
index 00000000..1c4948f7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db
@@ -0,0 +1,39 @@
+#!@BASH@
+
+#
+# new-db 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+# version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# new-db <db>
+#
+# Creates a new database directory and conf file with given name
+# Updates the global db.list file
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+ echo Syntax: new-db \<db\>
+ exit
+fi
+
+# Add the new database to the db.list file
+echo ${1:?You need to specify a database} >>$DB_LIST
+
+# Now make the appropriate database directory
+mkdir $DB_BASE/$1
+
+# And make a copy of the default (db.conf) conf file for the DB
+# Use sed to replace @DATABASE@ with the name of the database
+sed -e s%@DATABASE@%$1% $DB_CONF >$CONFIG_DIR/$1.conf
+# And make a blank file for the ${start_urls} directive
+touch $DB_BASE/$1/$1.urls
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl b/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl
new file mode 100755
index 00000000..63b775db
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl
@@ -0,0 +1,238 @@
+#!/usr/local/bin/perl
+
+# 1998/12/10
+# Added: push @allwords, $fields[$x]; <carl@dpiwe.tas.gov.au>
+# Replaced: matching patterns. they match words starting or ending with ()[]'`;:?.,! now, not when in between!
+# Gone: the variable $line is gone (using $_ now)
+#
+# 1998/12/11
+# Added: catdoc test (is catdoc runnable?) <carl@dpiwe.tas.gov.au>
+# Changed: push line semi-colomn wrong. <carl@dpiwe.tas.gov.au>
+# Changed: matching works for end of lines now <carl@dpiwe.tas.gov.au>
+# Added: option to rigorously delete all punctuation <carl@dpiwe.tas.gov.au>
+#
+# 1999/02/09
+# Added: option to delete all hyphens <grdetil@scrc.umanitoba.ca>
+# Added: uses ps2ascii to handle PS files <grdetil@scrc.umanitoba.ca>
+# 1999/02/15
+# Added: check for some file formats <Frank.Richter@hrz.tu-chemnitz.de>
+# 1999/02/25
+# Added: uses pdftotext to handle PDF files <grdetil@scrc.umanitoba.ca>
+# Changed: generates a head record with punct. <grdetil@scrc.umanitoba.ca>
+# 1999/03/01
+# Added: extra checks for file "wrappers" <grdetil@scrc.umanitoba.ca>
+# & check for MS Word signature (no longer defaults to catdoc)
+# 1999/03/05
+# Changed: rejoin hyphenated words across lines <grdetil@scrc.umanitoba.ca>
+# (in PDFs) & remove multiple punct. chars. between words (all)
+# 1999/03/10
+# Changed: fix handling of minimum word length <grdetil@scrc.umanitoba.ca>
+# 1999/08/12
+# Changed: adapted for xpdf 0.90 release <grdetil@scrc.umanitoba.ca>
+# Added: uses pdfinfo to handle PDF titles <grdetil@scrc.umanitoba.ca>
+# Changed: keep hyphens by default, as htdig <grdetil@scrc.umanitoba.ca>
+# does, but change dashes to hyphens
+# 1999/09/09
+# Changed: fix to handle empty PDF title right <grdetil@scrc.umanitoba.ca>
+# 2000/01/12
+# Changed: "break" to "last" (no break in Perl) <wjones@tc.fluke.com>
+# Changed: code for parsing a line into a list of
+# words, to use "split", other streamlining.
+# 2001/07/12
+# Changed: fix "last" handling in dehyphenation <grdetil@scrc.umanitoba.ca>
+# Added: handle %xx codes in title from URL <grdetil@scrc.umanitoba.ca>
+# 2003/06/07
+# Changed: allow file names with spaces <lha@users.sourceforge.net>
+#########################################
+#
+# set this to your MS Word to text converter
+# get it from: http://www.fe.msk.ru/~vitus/catdoc/
+#
+$CATDOC = "/usr/local/bin/catdoc";
+#
+# set this to your WordPerfect to text converter, or /bin/true if none available
+# this nabs WP documents with .doc suffix, so catdoc doesn't see them
+#
+$CATWP = "/bin/true";
+#
+# set this to your RTF to text converter, or /bin/true if none available
+# this nabs RTF documents with .doc suffix, so catdoc doesn't see them
+#
+$CATRTF = "/bin/true";
+#
+# set this to your PostScript to text converter
+# get it from the ghostscript 3.33 (or later) package
+#
+$CATPS = "/usr/bin/ps2ascii";
+#
+# set this to your PDF to text converter, and pdfinfo tool
+# get it from the xpdf 0.90 package at http://www.foolabs.com/xpdf/
+#
+$CATPDF = "/usr/bin/pdftotext";
+$PDFINFO = "/usr/bin/pdfinfo";
+#$CATPDF = "/usr/local/bin/pdftotext";
+#$PDFINFO = "/usr/local/bin/pdfinfo";
+
+# need some var's
+$minimum_word_length = 3;
+$head = "";
+@allwords = ();
+@temp = ();
+$x = 0;
+#@fields = ();
+$calc = 0;
+$dehyphenate = 0;
+$title = "";
+#
+# okay. my programming style isn't that nice, but it works...
+
+#for ($x=0; $x<@ARGV; $x++) { # print out the args
+# print STDERR "$ARGV[$x]\n";
+#}
+
+# Read first bytes of file to check for file type (like file(1) does)
+open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+read FILE,$magic,8;
+close FILE;
+
+if ($magic =~ /^\0\n/) { # possible MacBinary header
+ open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+ read FILE,$magic,136; # let's hope parsers can handle them!
+ close FILE;
+}
+
+if ($magic =~ /%!|^\033%-12345/) { # it's PostScript (or HP print job)
+ $parser = $CATPS; # gs 3.33 leaves _temp_.??? files in .
+ $parsecmd = "(cd /tmp; $parser; rm -f _temp_.???) < \"$ARGV[0]\" |";
+# keep quiet even if PS gives errors...
+# $parsecmd = "(cd /tmp; $parser; rm -f _temp_.???) < \"$ARGV[0]\" 2>/dev/null |";
+ $type = "PostScript";
+ $dehyphenate = 0; # ps2ascii already does this
+ if ($magic =~ /^\033%-12345/) { # HP print job
+ open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+ read FILE,$magic,256;
+ close FILE;
+ exit unless $magic =~ /^\033%-12345X\@PJL.*\n*.*\n*.*ENTER\s*LANGUAGE\s*=\s*POSTSCRIPT.*\n*.*\n*.*\n%!/
+ }
+} elsif ($magic =~ /%PDF-/) { # it's PDF (Acrobat)
+ $parser = $CATPDF;
+ $parsecmd = "$parser -raw \"$ARGV[0]\" - |";
+# to handle single-column, strangely laid out PDFs, use coalescing feature...
+# $parsecmd = "$parser \"$ARGV[0]\" - |";
+ $type = "PDF";
+ $dehyphenate = 1; # PDFs often have hyphenated lines
+ if (open(INFO, "$PDFINFO \"$ARGV[0]\" 2>/dev/null |")) {
+ while (<INFO>) {
+ if (/^Title:/) {
+ $title = $_;
+ $title =~ s/^Title:\s+//;
+ $title =~ s/\s+$//;
+ $title =~ s/\s+/ /g;
+ $title =~ s/&/\&amp\;/g;
+ $title =~ s/</\&lt\;/g;
+ $title =~ s/>/\&gt\;/g;
+ last;
+ }
+ }
+ close INFO;
+ }
+} elsif ($magic =~ /WPC/) { # it's WordPerfect
+ $parser = $CATWP;
+ $parsecmd = "$parser \"$ARGV[0]\" |";
+ $type = "WordPerfect";
+ $dehyphenate = 0; # WP documents not likely hyphenated
+} elsif ($magic =~ /^{\\rtf/) { # it's Richtext
+ $parser = $CATRTF;
+ $parsecmd = "$parser \"$ARGV[0]\" |";
+ $type = "RTF";
+ $dehyphenate = 0; # RTF documents not likely hyphenated
+} elsif ($magic =~ /\320\317\021\340/) { # it's MS Word
+ $parser = $CATDOC;
+ $parsecmd = "$parser -a -w \"$ARGV[0]\" |";
+ $type = "Word";
+ $dehyphenate = 0; # Word documents not likely hyphenated
+} else {
+ die "Can't determine type of file $ARGV[0]; content-type: $ARGV[1]; URL: $ARGV[2]\n";
+}
+# print STDERR "$ARGV[0]: $type $parsecmd\n";
+die "Hmm. $parser is absent or unwilling to execute.\n" unless -x $parser;
+
+
+# open it
+open(CAT, "$parsecmd") || die "Hmmm. $parser doesn't want to be opened using pipe.\n";
+while (<CAT>) {
+ while (/[A-Za-z\300-\377]-\s*$/ && $dehyphenate) {
+ $_ .= <CAT>;
+ last if eof;
+ s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s
+ }
+ $head .= " " . $_;
+# s/\s+[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+\s+|^[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+$/ /g; # replace reading-chars with space (only at end or begin of word, but allow multiple characters)
+## s/\s[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]\s|^[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]$/ /g; # replace reading-chars with space (only at end or begin of word)
+## s/[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]/ /g; # rigorously replace all by <carl@dpiwe.tas.gov.au>
+## s/[\-\255]/ /g; # replace hyphens with space
+# s/[\255]/-/g; # replace dashes with hyphens
+# @fields = split; # split up line
+# next if (@fields == 0); # skip if no fields (does it speed up?)
+# for ($x=0; $x<@fields; $x++) { # check each field if string length >= 3
+# if (length($fields[$x]) >= $minimum_word_length) {
+# push @allwords, $fields[$x]; # add to list
+# }
+# }
+
+ # Delete valid punctuation. These are the default values
+ # for valid_punctuation, and should be changed other values
+ # are specified in the config file.
+ tr{-\255._/!#$%^&'}{}d;
+ push @allwords, grep { length >= $minimum_word_length } split /\W+/;
+}
+
+close CAT;
+
+exit unless @allwords > 0; # nothing to output
+
+#############################################
+# print out the title, if it's set, and not just a file name
+if ($title !~ /^$/ && $title !~ /^[A-G]:[^\s]+\.[Pp][Dd][Ff]$/) {
+ print "t\t$title\n";
+} else { # otherwise generate a title
+ @temp = split(/\//, $ARGV[2]); # get the filename, get rid of basename
+ $temp[-1] =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+ print "t\t$type Document $temp[-1]\n"; # print it
+}
+
+
+#############################################
+# print out the head
+$head =~ s/^\s+//; # remove leading and trailing space
+$head =~ s/\s+$//;
+$head =~ s/\s+/ /g;
+$head =~ s/&/\&amp\;/g;
+$head =~ s/</\&lt\;/g;
+$head =~ s/>/\&gt\;/g;
+print "h\t$head\n";
+#$calc = @allwords;
+#print "h\t";
+##if ($calc >100) { # but not more than 100 words
+## $calc = 100;
+##}
+#for ($x=0; $x<$calc; $x++) { # print out the words for the exerpt
+# print "$allwords[$x] ";
+#}
+#print "\n";
+
+
+#############################################
+# now the words
+#for ($x=0; $x<@allwords; $x++) {
+# $calc=int(1000*$x/@allwords); # calculate rel. position (0-1000)
+# print "w\t$allwords[$x]\t$calc\t0\n"; # print out word, rel. pos. and text type (0)
+#}
+$x = 0;
+for ( @allwords ) {
+ # print out word, rel. pos. and text type (0)
+ printf "w\t%s\t%d\t0\n", $_, 1000*$x++/@allwords;
+}
+
+$calc=@allwords;
+# print STDERR "# of words indexed: $calc\n";
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING
new file mode 100644
index 00000000..d60c31a9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile
new file mode 100644
index 00000000..5409f487
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile
@@ -0,0 +1,11 @@
+CC= gcc
+CFLAGS= -O2 -Wall
+
+rtf2html: rtf2html.c
+ $(CC) $(CFLAGS) -o rtf2html rtf2html.c
+
+install: rtf2html
+ cp rtf2html /usr/local/bin
+
+clean:
+ rm -f rtf2html
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README
new file mode 100644
index 00000000..9f3084d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README
@@ -0,0 +1,16 @@
+rtf2html - a RTF to HTML conversion program
+
+This version of rtf2html has been developed by
+David Lippi <d.lippi@comune.prato.it> and Gabriele Bartolini
+<g.bartolini@comune.prato.it>, based on an earlier work
+by Chuck Shotton <cshotton@oac.hsc.uth.tmc.edu>
+(see http://www.w3.org/Tools/HTMLGeneration/rtf2html.html)
+and Dmitry Porapov <dpotapov@capitalsoft.com>.
+
+This version can handle character set recognition at run-time:
+currently, the ANSI Windows 1252 code and the Macintosh's are
+supported.
+
+For copyright details, see the file COPYING in your distribution
+or the GNU General Public License (GPL) version 2 or later
+<http://www.gnu.org/copyleft/gpl.html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h
new file mode 100644
index 00000000..d2b40ba0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h
@@ -0,0 +1,257 @@
+unsigned char* charset1252[256] = {
+ "", /* 1 - 1 */
+ "", /* 2 - 2 */
+ "", /* 3 - 3 */
+ "", /* 4 - 4 */
+ "", /* 5 - 5 */
+ "", /* 6 - 6 */
+ "", /* 7 - 7 */
+ "", /* 8 - 8 */
+ "\t", /* 9 - 9 */
+ "\n", /* 10 - a */
+ " ", /* 11 - b */
+ " ", /* 12 - c */
+ "\r", /* 13 - d */
+ "", /* 14 - e */
+ "", /* 15 - f */
+ "", /* 16 - 10 */
+ "", /* 17 - 11 */
+ "", /* 18 - 12 */
+ "", /* 19 - 13 */
+ "", /* 20 - 14 */
+ "", /* 21 - 15 */
+ "", /* 22 - 16 */
+ "", /* 23 - 17 */
+ "", /* 24 - 18 */
+ "", /* 25 - 19 */
+ "", /* 26 - 1a */
+ "", /* 27 - 1b */
+ "", /* 28 - 1c */
+ "", /* 29 - 1d */
+ "", /* 30 - 1e */
+ "", /* 31 - 1f */
+ " ", /* 32 - 20 */
+ "!", /* 33 - 21 */
+ "\"", /* 34 - 22 */
+ "#", /* 35 - 23 */
+ "$", /* 36 - 24 */
+ "%", /* 37 - 25 */
+ "&amp;", /* 38 - 26 */
+ "'", /* 39 - 27 */
+ "(", /* 40 - 28 */
+ ")", /* 41 - 29 */
+ "*", /* 42 - 2a */
+ "+", /* 43 - 2b */
+ ",", /* 44 - 2c */
+ "-", /* 45 - 2d */
+ ".", /* 46 - 2e */
+ "/", /* 47 - 2f */
+ "0", /* 48 - 30 */
+ "1", /* 49 - 31 */
+ "2", /* 50 - 32 */
+ "3", /* 51 - 33 */
+ "4", /* 52 - 34 */
+ "5", /* 53 - 35 */
+ "6", /* 54 - 36 */
+ "7", /* 55 - 37 */
+ "8", /* 56 - 38 */
+ "9", /* 57 - 39 */
+ ":", /* 58 - 3a */
+ ";", /* 59 - 3b */
+ "<", /* 60 - 3c */
+ "=", /* 61 - 3d */
+ ">", /* 62 - 3e */
+ "?", /* 63 - 3f */
+ "@", /* 64 - 40 */
+ "A", /* 65 - 41 */
+ "B", /* 66 - 42 */
+ "C", /* 67 - 43 */
+ "D", /* 68 - 44 */
+ "E", /* 69 - 45 */
+ "F", /* 70 - 46 */
+ "G", /* 71 - 47 */
+ "H", /* 72 - 48 */
+ "I", /* 73 - 49 */
+ "J", /* 74 - 4a */
+ "K", /* 75 - 4b */
+ "L", /* 76 - 4c */
+ "M", /* 77 - 4d */
+ "N", /* 78 - 4e */
+ "O", /* 79 - 4f */
+ "P", /* 80 - 50 */
+ "Q", /* 81 - 51 */
+ "R", /* 82 - 52 */
+ "S", /* 83 - 53 */
+ "T", /* 84 - 54 */
+ "U", /* 85 - 55 */
+ "V", /* 86 - 56 */
+ "W", /* 87 - 57 */
+ "X", /* 88 - 58 */
+ "Y", /* 89 - 59 */
+ "Z", /* 90 - 5a */
+ "[", /* 91 - 5b */
+ "\\", /* 92 - 5c */
+ "]", /* 93 - 5d */
+ "^", /* 94 - 5e */
+ "_", /* 95 - 5f */
+ "`", /* 96 - 60 */
+ "a", /* 97 - 61 */
+ "b", /* 98 - 62 */
+ "c", /* 99 - 63 */
+ "d", /* 100 - 64 */
+ "e", /* 101 - 65 */
+ "f", /* 102 - 66 */
+ "g", /* 103 - 67 */
+ "h", /* 104 - 68 */
+ "i", /* 105 - 69 */
+ "j", /* 106 - 6a */
+ "k", /* 107 - 6b */
+ "l", /* 108 - 6c */
+ "m", /* 109 - 6d */
+ "n", /* 110 - 6e */
+ "o", /* 111 - 6f */
+ "p", /* 112 - 70 */
+ "q", /* 113 - 71 */
+ "r", /* 114 - 72 */
+ "s", /* 115 - 73 */
+ "t", /* 116 - 74 */
+ "u", /* 117 - 75 */
+ "v", /* 118 - 76 */
+ "w", /* 119 - 77 */
+ "x", /* 120 - 78 */
+ "y", /* 121 - 79 */
+ "z", /* 122 - 7a */
+ "{", /* 123 - 7b */
+ "&brvbar;", /* 124 - 7c */
+ "}", /* 125 - 7d */
+ "~", /* 126 - 7e */
+ " ", /* 127 - 7f */
+ "&euro;", /* 128 - 80 */
+ " ", /* 129 - 81 */
+ "&sbquo;", /* 130 - 82 */
+ "&fnof;", /* 131 - 83 */
+ "&bdquo;", /* 132 - 84 */
+ "&hellip;", /* 133 - 85 */
+ "&dagger;", /* 134 - 86 */
+ "&#0135;", /* 135 - 87 */
+ "&#0136;", /* 136 - 88 */
+ "&#0137;", /* 137 - 89 */
+ "&Scaron;", /* 138 - 8a */
+ "&lsaquo;", /* 139 - 8b */
+ "&OElig;", /* 140 - 8c */
+ " ", /* 141 - 8d */
+ "&Zcaron;", /* 142 - 8e */
+ " ", /* 143 - 8f */
+ " ", /* 144 - 90 */
+ "&lsquo;", /* 145 - 91 */
+ "&rsquo;", /* 146 - 92 */
+ "&ldquo;", /* 147 - 93 */
+ "&rdquo;", /* 148 - 94 */
+ "&bull;", /* 149 - 95 */
+ "&ensp;", /* 150 - 96 */
+ "&emsp;", /* 151 - 97 */
+ "&tilde;", /* 152 - 98 */
+ "&trade;", /* 153 - 99 */
+ "&scaron;", /* 154 - 9a */
+ "&rsaquo;", /* 155 - 9b */
+ "&oelig;", /* 156 - 9c */
+ " ", /* 157 - 9d */
+ "&zcaron;", /* 158 - 9e */
+ "&Yuml;", /* 159 - 9f */
+ "&nbsp;", /* 160 - a0 */
+ "&iexcl;", /* 161 - a1 */
+ "&cent;", /* 162 - a2 */
+ "&pound;", /* 163 - a3 */
+ "&curren;", /* 164 - a4 */
+ "&yen;", /* 165 - a5 */
+ "&brvbar;", /* 166 - a6 */
+ "&sect;", /* 167 - a7 */
+ "&uml;", /* 168 - a8 */
+ "&copy;", /* 169 - a9 */
+ "&ordf;", /* 170 - aa */
+ "&laquo;", /* 171 - ab */
+ "&not;", /* 172 - ac */
+ "&shy;", /* 173 - ad */
+ "&reg;", /* 174 - ae */
+ "&macr;", /* 175 - af */
+ "&deg;", /* 176 - b0 */
+ "&plusmn;", /* 177 - b1 */
+ "&sup2;", /* 178 - b2 */
+ "&sup3;", /* 179 - b3 */
+ "&acute;", /* 180 - b4 */
+ "&micro;", /* 181 - b5 */
+ "&para;", /* 182 - b6 */
+ "&middot;", /* 183 - b7 */
+ "&ccedil;", /* 184 - b8 */
+ "&sup1;", /* 185 - b9 */
+ "&ordm;", /* 186 - ba */
+ "&raquo;", /* 187 - bb */
+ "&frac14;", /* 188 - bc */
+ "&frac12;", /* 189 - bd */
+ "&frac34;", /* 190 - be */
+ "&iquest;", /* 191 - bf */
+ "&Agrave;", /* 192 - c0 */
+ "&Aacute;", /* 193 - c1 */
+ "&Acirc;", /* 194 - c2 */
+ "&Atilde;", /* 195 - c3 */
+ "&Auml;", /* 196 - c4 */
+ "&Aring;", /* 197 - c5 */
+ "&AElig;", /* 198 - c6 */
+ "&Ccedil;", /* 199 - c7 */
+ "&Egrave;", /* 200 - c8 */
+ "&Eacute;", /* 201 - c9 */
+ "&Ecirc;", /* 202 - ca */
+ "&Euml;", /* 203 - cb */
+ "&Igrave;", /* 204 - cc */
+ "&Iacute;", /* 205 - cd */
+ "&Icirc;", /* 206 - ce */
+ "&Iuml;", /* 207 - cf */
+ "&ETH;", /* 208 - d0 */
+ "&Ntilde;", /* 209 - d1 */
+ "&Ograve;", /* 210 - d2 */
+ "&Oacute;", /* 211 - d3 */
+ "&Oring;", /* 212 - d4 */
+ "&Otilde;", /* 213 - d5 */
+ "&Ouml;", /* 214 - d6 */
+ "&times;", /* 215 - d7 */
+ "&Oslash;", /* 216 - d8 */
+ "&Ugrave;", /* 217 - d9 */
+ "&Uacute;", /* 218 - da */
+ "&Ucirc;", /* 219 - db */
+ "&Uuml;", /* 220 - dc */
+ "&Yacute;", /* 221 - dd */
+ "&THORN;", /* 222 - de */
+ "&szlig;", /* 223 - df */
+ "&agrave;", /* 224 - e0 */
+ "&aacute;", /* 225 - e1 */
+ "&acirc;", /* 226 - e2 */
+ "&atilde;", /* 227 - e3 */
+ "&auml;", /* 228 - e4 */
+ "&aring;", /* 229 - e5 */
+ "&aelig;", /* 230 - e6 */
+ "&ccedil;", /* 231 - e7 */
+ "&egrave;", /* 232 - e8 */
+ "&eacute;", /* 233 - e9 */
+ "&ecirc;", /* 234 - ea */
+ "&euml;", /* 235 - eb */
+ "&igrave;", /* 236 - ec */
+ "&iacute;", /* 237 - ed */
+ "&icirc;", /* 238 - ee */
+ "&iuml;", /* 239 - ef */
+ "&eth;", /* 240 - f0 */
+ "&ntilde;", /* 241 - f1 */
+ "&ograve;", /* 242 - f2 */
+ "&oacute;", /* 243 - f3 */
+ "&ocirc;", /* 244 - f4 */
+ "&otilde;", /* 245 - f5 */
+ "&ouml;", /* 246 - f6 */
+ "&divide;", /* 247 - f7 */
+ "&oslash;", /* 248 - f8 */
+ "&ugrave;", /* 249 - f9 */
+ "&uacute;", /* 250 - fa */
+ "&ucirc;", /* 251 - fb */
+ "&uuml;", /* 252 - fc */
+ "&yacute;", /* 253 - fd */
+ "&thorn;", /* 254 - fe */
+ "&yuml;" /* 255 - ff */
+};
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h
new file mode 100644
index 00000000..8c4aeca0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h
@@ -0,0 +1,257 @@
+unsigned char* mac[256] = {
+ "", /* 1 - 1 */
+ "", /* 2 - 2 */
+ "", /* 3 - 3 */
+ "", /* 4 - 4 */
+ "", /* 5 - 5 */
+ "", /* 6 - 6 */
+ "", /* 7 - 7 */
+ "", /* 8 - 8 */
+ "\t", /* 9 - 9 */
+ "\n", /* 10 - a */
+ " ", /* 11 - b */
+ " ", /* 12 - c */
+ "\r", /* 13 - d */
+ "", /* 14 - e */
+ "", /* 15 - f */
+ "", /* 16 - 10 */
+ "", /* 17 - 11 */
+ "", /* 18 - 12 */
+ "", /* 19 - 13 */
+ "", /* 20 - 14 */
+ "", /* 21 - 15 */
+ "", /* 22 - 16 */
+ "", /* 23 - 17 */
+ "", /* 24 - 18 */
+ "", /* 25 - 19 */
+ "", /* 26 - 1a */
+ "", /* 27 - 1b */
+ "", /* 28 - 1c */
+ "", /* 29 - 1d */
+ "", /* 30 - 1e */
+ "", /* 31 - 1f */
+ " ", /* 32 - 20 */
+ "!", /* 33 - 21 */
+ "\"", /* 34 - 22 */
+ "#", /* 35 - 23 */
+ "$", /* 36 - 24 */
+ "%", /* 37 - 25 */
+ "&amp;", /* 38 - 26 */
+ "'", /* 39 - 27 */
+ "(", /* 40 - 28 */
+ ")", /* 41 - 29 */
+ "*", /* 42 - 2a */
+ "+", /* 43 - 2b */
+ ",", /* 44 - 2c */
+ "-", /* 45 - 2d */
+ ".", /* 46 - 2e */
+ "/", /* 47 - 2f */
+ "0", /* 48 - 30 */
+ "1", /* 49 - 31 */
+ "2", /* 50 - 32 */
+ "3", /* 51 - 33 */
+ "4", /* 52 - 34 */
+ "5", /* 53 - 35 */
+ "6", /* 54 - 36 */
+ "7", /* 55 - 37 */
+ "8", /* 56 - 38 */
+ "9", /* 57 - 39 */
+ ":", /* 58 - 3a */
+ ";", /* 59 - 3b */
+ "<", /* 60 - 3c */
+ "=", /* 61 - 3d */
+ ">", /* 62 - 3e */
+ "?", /* 63 - 3f */
+ "@", /* 64 - 40 */
+ "A", /* 65 - 41 */
+ "B", /* 66 - 42 */
+ "C", /* 67 - 43 */
+ "D", /* 68 - 44 */
+ "E", /* 69 - 45 */
+ "F", /* 70 - 46 */
+ "G", /* 71 - 47 */
+ "H", /* 72 - 48 */
+ "I", /* 73 - 49 */
+ "J", /* 74 - 4a */
+ "K", /* 75 - 4b */
+ "L", /* 76 - 4c */
+ "M", /* 77 - 4d */
+ "N", /* 78 - 4e */
+ "O", /* 79 - 4f */
+ "P", /* 80 - 50 */
+ "Q", /* 81 - 51 */
+ "R", /* 82 - 52 */
+ "S", /* 83 - 53 */
+ "T", /* 84 - 54 */
+ "U", /* 85 - 55 */
+ "V", /* 86 - 56 */
+ "W", /* 87 - 57 */
+ "X", /* 88 - 58 */
+ "Y", /* 89 - 59 */
+ "Z", /* 90 - 5a */
+ "[", /* 91 - 5b */
+ "\\", /* 92 - 5c */
+ "]", /* 93 - 5d */
+ "^", /* 94 - 5e */
+ "_", /* 95 - 5f */
+ "`", /* 96 - 60 */
+ "a", /* 97 - 61 */
+ "b", /* 98 - 62 */
+ "c", /* 99 - 63 */
+ "d", /* 100 - 64 */
+ "e", /* 101 - 65 */
+ "f", /* 102 - 66 */
+ "g", /* 103 - 67 */
+ "h", /* 104 - 68 */
+ "i", /* 105 - 69 */
+ "j", /* 106 - 6a */
+ "k", /* 107 - 6b */
+ "l", /* 108 - 6c */
+ "m", /* 109 - 6d */
+ "n", /* 110 - 6e */
+ "o", /* 111 - 6f */
+ "p", /* 112 - 70 */
+ "q", /* 113 - 71 */
+ "r", /* 114 - 72 */
+ "s", /* 115 - 73 */
+ "t", /* 116 - 74 */
+ "u", /* 117 - 75 */
+ "v", /* 118 - 76 */
+ "w", /* 119 - 77 */
+ "x", /* 120 - 78 */
+ "y", /* 121 - 79 */
+ "z", /* 122 - 7a */
+ "{", /* 123 - 7b */
+ "&brvbar;", /* 124 - 7c */
+ "}", /* 125 - 7d */
+ "~", /* 126 - 7e */
+ " ", /* 127 - 7f */
+ "&euro;", /* 128 - 80 */
+ "&Aring;", /* 129 - 81 */
+ "&sbquo;", /* 130 - 82 */
+ "&fnof;", /* 131 - 83 */
+ "&bdquo;", /* 132 - 84 */
+ "&hellip;", /* 133 - 85 */
+ "&dagger;", /* 134 - 86 */
+ "&#0135;", /* 135 - 87 */
+ "&aacute;", /* 136 - 88 */
+ "&#0137;", /* 137 - 89 */
+ "&Scaron;", /* 138 - 8a */
+ "&lsaquo;", /* 139 - 8b */
+ "&OElig;", /* 140 - 8c */
+ "&ccedil;", /* 141 - 8d */
+ "&eacute;", /* 142 - 8e */
+ "&egrave; ", /* 143 - 8f */
+ "&ecirc;", /* 144 - 90 */
+ "&#145;", /* 145 - 91 */
+ "&#146;", /* 146 - 92 */
+ "&igrave;", /* 147 - 93 */
+ "\"", /* 148 - 94 */
+ "&bull;", /* 149 - 95 */
+ "&ensp;", /* 150 - 96 */
+ "&emsp;", /* 151 - 97 */
+ "&tilde;", /* 152 - 98 */
+ "&trade;", /* 153 - 99 */
+ "&scaron;", /* 154 - 9a */
+ "&rsaquo;", /* 155 - 9b */
+ "&oelig;", /* 156 - 9c */
+ "&ugrave;", /* 157 - 9d */
+ "&zcaron;", /* 158 - 9e */
+ "&Yuml;", /* 159 - 9f */
+ "&nbsp;", /* 160 - a0 */
+ "&ordm;", /* 161 - a1 */
+ "&cent;", /* 162 - a2 */
+ "&pound;", /* 163 - a3 */
+ "&sect;", /* 164 - a4 */
+ "&yen;", /* 165 - a5 */
+ "&brvbar;", /* 166 - a6 */
+ "&sect;", /* 167 - a7 */
+ "&uml;", /* 168 - a8 */
+ "&copy;", /* 169 - a9 */
+ "&ordf;", /* 170 - aa */
+ "&laquo;", /* 171 - ab */
+ "&not;", /* 172 - ac */
+ "&shy;", /* 173 - ad */
+ "&reg;", /* 174 - ae */
+ "&macr;", /* 175 - af */
+ "&deg;", /* 176 - b0 */
+ "&plusmn;", /* 177 - b1 */
+ "&sup2;", /* 178 - b2 */
+ "&sup3;", /* 179 - b3 */
+ "&acute;", /* 180 - b4 */
+ "&micro;", /* 181 - b5 */
+ "&para;", /* 182 - b6 */
+ "&middot;", /* 183 - b7 */
+ "&ccedil;", /* 184 - b8 */
+ "&sup1;", /* 185 - b9 */
+ "&ordm;", /* 186 - ba */
+ "&raquo;", /* 187 - bb */
+ "&frac14;", /* 188 - bc */
+ "&frac12;", /* 189 - bd */
+ "&frac34;", /* 190 - be */
+ "&iquest;", /* 191 - bf */
+ "&Agrave;", /* 192 - c0 */
+ "&Aacute;", /* 193 - c1 */
+ "&Acirc;", /* 194 - c2 */
+ "&Atilde;", /* 195 - c3 */
+ "&Auml;", /* 196 - c4 */
+ "&Aring;", /* 197 - c5 */
+ "&AElig;", /* 198 - c6 */
+ "&Ccedil;", /* 199 - c7 */
+ "&Egrave;", /* 200 - c8 */
+ "&Eacute;", /* 201 - c9 */
+ "&Ecirc;", /* 202 - ca */
+ "&Agrave;", /* 203 - cb */
+ "&Igrave;", /* 204 - cc */
+ "&Iacute;", /* 205 - cd */
+ "&Icirc;", /* 206 - ce */
+ "&Iuml;", /* 207 - cf */
+ "&ETH;", /* 208 - d0 */
+ "&Ntilde;", /* 209 - d1 */
+ "\"", /* 210 - d2 */
+ "\"", /* 211 - d3 */
+ "&Oring;", /* 212 - d4 */
+ "&Otilde;", /* 213 - d5 */
+ "&Ouml;", /* 214 - d6 */
+ "&times;", /* 215 - d7 */
+ "&Oslash;", /* 216 - d8 */
+ "&Ugrave;", /* 217 - d9 */
+ "&Uacute;", /* 218 - da */
+ "&Ucirc;", /* 219 - db */
+ "&Uuml;", /* 220 - dc */
+ "&Yacute;", /* 221 - dd */
+ "&THORN;", /* 222 - de */
+ "&szlig;", /* 223 - df */
+ "&agrave;", /* 224 - e0 */
+ "&aacute;", /* 225 - e1 */
+ "&acirc;", /* 226 - e2 */
+ "&atilde;", /* 227 - e3 */
+ "&auml;", /* 228 - e4 */
+ "&aring;", /* 229 - e5 */
+ "&aelig;", /* 230 - e6 */
+ "&ccedil;", /* 231 - e7 */
+ "&egrave;", /* 232 - e8 */
+ "&eacute;", /* 233 - e9 */
+ "&ecirc;", /* 234 - ea */
+ "&euml;", /* 235 - eb */
+ "&igrave;", /* 236 - ec */
+ "&iacute;", /* 237 - ed */
+ "&icirc;", /* 238 - ee */
+ "&iuml;", /* 239 - ef */
+ "&eth;", /* 240 - f0 */
+ "&ntilde;", /* 241 - f1 */
+ "&ograve;", /* 242 - f2 */
+ "&oacute;", /* 243 - f3 */
+ "&ocirc;", /* 244 - f4 */
+ "&otilde;", /* 245 - f5 */
+ "&ouml;", /* 246 - f6 */
+ "&divide;", /* 247 - f7 */
+ "&oslash;", /* 248 - f8 */
+ "&ugrave;", /* 249 - f9 */
+ "&uacute;", /* 250 - fa */
+ "&ucirc;", /* 251 - fb */
+ "&uuml;", /* 252 - fc */
+ "&yacute;", /* 253 - fd */
+ "&thorn;", /* 254 - fe */
+ "&yuml;" /* 255 - ff */
+};
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c
new file mode 100644
index 00000000..d49140d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c
@@ -0,0 +1,910 @@
+/* RTF2HTML.c, Chuck Shotton - 6/21/93 */
+/************************************************************************
+ * This program takes a stab at converting RTF (Rich Text Format) files
+ * into HTML. There are some limitations that keep RTF from being able to
+ * easily represent things like in-line images and anchors as styles. In
+ * particular, RTF styles apply to entire "paragraphs", so anchors or
+ * images in the middle of a text stream can't easily be represented by
+ * styles. The intent is to ultimately use something like embedded text
+ * color changes to represent these constructs.
+ *
+ * In the meantime, you can take existing Word documents, apply the
+ * correct style sheet, and convert them to HTML with this tool.
+ *
+ * AUTHOR: Chuck Shotton, UT-Houston Academic Computing,
+ * cshotton@oac.hsc.uth.tmc.edu
+ *
+ * Dmitry Potapov, CapitalSoft
+ * dpotapov@capitalsoft.com
+ *
+ * David Lippi, Comune di Prato, Italy
+ * d.lippi@comune.prato.it
+ *
+ * Gabriele Bartolini, Comune di Prato, Italy
+ * g.bartolini@comune.prato.it
+ *
+ * USAGE: rtf2html [rtf_filename]
+ *
+ * BEHAVIOR:
+ * rtf2html will open the specified RTF input file or read from
+ * standard input, writing converted HTML to standard output.
+ *
+ * NOTES:
+ * The RTF document must be formatted with a style sheet that has
+ * style numberings that conform to the style_mappings table
+ * defined in this source file. Characters are converted according
+ * to the ANSI Windows 1252 code or Macintosh.
+ *
+ * MODIFICATIONS:
+ * 6/21/93 : Chuck Shotton - created version 1.0.
+ * 11/26/98 : Dmitry Potapov - version 1.1 beta
+ * 05/07/04 : David Lippi, Gabriele Bartolini - version 1.2
+ *
+ * Copyright (C) 2004 Comune di Prato
+ *
+ * For copyright details, see the file COPYING in your distribution
+ * or the GNU General Public License (GPL) version 2 or later
+ * <http://www.gnu.org/copyleft/gpl.html>
+ *
+ ************************************************************************/
+
+/* Note, the source is formated with 4 character tabs */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "charset1252.h"
+#include "charsetmac.h"
+
+#ifdef _MSC_VER
+# define strcasecmp _stricmp
+#endif
+
+#ifndef TRUE
+#define TRUE -1
+#define FALSE 0
+#endif
+
+#define MAX_LEVELS 40 /*defines the # of nested in-line styles (pairs of {})*/
+#define MAX_RTF_TOKEN 40
+
+#define MAX_INLINE_STYLES 5 /*defines # of in-line styles, bold, italic, etc.*/
+
+typedef struct tag_StyleState
+{
+ unsigned char s: MAX_INLINE_STYLES;
+} TStyleState;
+
+typedef enum { s_plain, s_bold, s_italic, s_underline, s_hidden, /*in-line styles*/
+ s_para, s_br, /*pseudo style*/
+ s_h0, s_h1, s_h2, s_h3, s_h4, s_h5, s_h6 /*heading styles*/
+} StyleState;
+
+char *styles[][2] = { /*HTML Start and end tags for styles*/
+ {"", ""},
+ {"<strong>", "</strong>"},
+ {"<em>", "</em>"},
+ {"", ""},
+ {"<!-- ", " -->"},
+ {"\n", "\n"}, /* {"\n<p>", "</p>\n"}, */
+ {"<br />\n",""},
+ {"", ""},
+ {"<h1>", "</h1>"},
+ {"<h2>", "</h2>"},
+ {"<h3>", "</h3>"},
+ {"<h4>", "</h4>"},
+ {"<h5>", "</h5>"},
+ {"<h6>", "</h6>"}
+};
+
+/* style_mappings maps the style numbers in a RTF style sheet into one of the*/
+/* (currently) six paragraph-oriented HTML styles (i.e. heading 1 through 6.)*/
+/* Additional styles for lists, etc. should be added here. Style info */
+/* ultimately should be read from some sort of config file into these tables.*/
+
+#define MAX_NAME_LEN 40
+char style_name[MAX_NAME_LEN];
+
+#define STYLE_NUMBER 7
+char *style_namings[STYLE_NUMBER] = {
+ "", "heading 1", "heading 2", "heading 3", "heading 4", "heading 5",
+ "heading 6"
+};
+char style_mappings[STYLE_NUMBER][MAX_RTF_TOKEN];
+char style_number[MAX_RTF_TOKEN];
+
+/* RTF tokens that mean something to the parser. All others are ignored. */
+
+typedef enum {
+ t_start,
+ t_fonttbl, t_colortbl, t_stylesheet, t_info, t_s, t_b, t_ul, t_ulw,
+ t_uld, t_uldb, t_i, t_v, t_plain, t_par, t_pict, t_tab, t_bullet,
+ t_cell, t_row, t_line, t_endash, t_emdash, t_rquote,
+ t_end
+} TokenIndex;
+
+char *tokens[] = {
+ "###",
+ "fonttbl", "colortbl", "stylesheet", "info", "s", "b", "ul", "ulw",
+ "uld", "uldb", "i", "v", "plain", "par", "pict", "tab", "bullet",
+ "cell", "row", "line", "endash", "emdash", "rquote",
+ "###"
+};
+
+TStyleState style_state[MAX_LEVELS], curr_style;
+short curr_heading;
+
+void (*RTF_DoControl)(FILE*,char*,char*);
+char isBody;
+char* title;
+//FILE* f;
+
+short level, /*current {} nesting level*/
+ skip_to_level,/*{} level to which parsing should skip (used to skip */
+ /* font tables, style sheets, color tables, etc.) */
+ gobble, /*Flag set to indicate all input should be discarded */
+ ignore_styles;/*Set to ignore inline style expansions after style use*/
+
+/* Charset */
+unsigned char** charset_table;
+
+#define CHARSET_DEFAULT 0 // Index of the default charset to use
+#define CHARSET_NUMBER 2 // Number of charset used
+#define CHARSET_MAX_LENGTH 20 // Max numbero of char in the charset
+// metadata used in rtf standard for the charset definition
+unsigned char *charset[CHARSET_NUMBER] = {
+ "ansi",
+ "mac"
+};
+// variable with the charset definition
+unsigned char **charset_variablename[CHARSET_NUMBER] = {
+ charset1252,
+ mac
+};
+
+/**************************************/
+
+int openfile (char * filename, FILE ** f)
+{
+ int rv = 1;
+
+ if (filename)
+ {
+ if (!(*f = fopen (filename, "r")))
+ {
+ fprintf (stderr, "\nError: Input file %s not found.\n", filename);
+ rv = 0;
+ }
+ else
+ {
+ title = filename;
+ }
+ }
+ else
+ {
+ *f = stdin;
+ title="STDIN";
+ }
+ return rv;
+}
+
+/**************************************/
+
+int closefile (FILE * f)
+{
+ return fclose (f);
+}
+
+/**************************************/
+
+char RTF_GetChar( FILE* f )
+{
+ char ch;
+ do
+ {
+ ch = fgetc( f );
+ } while ((ch=='\r')||(ch=='\n'));
+ return ch;
+}
+
+/**************************************/
+
+char RTF_UnGetChar(FILE* f, char ch)
+{
+ return ungetc(ch, f);
+}
+
+/**************************************/
+
+void RTF_PutStr(char* s)
+{
+ if (gobble) return;
+ fputs(s, stdout);
+}
+
+/**************************************/
+
+void RTF_PutHeader()
+{
+ RTF_PutStr("<head>\n<title>");
+ RTF_PutStr(title);
+ RTF_PutStr("</title>\n");
+ RTF_PutStr("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n");
+ RTF_PutStr("</head>\n");
+}
+
+/**************************************/
+
+void RTF_PutChar(char ch)
+{
+ if (gobble) return;
+ if (!isBody)
+ {
+ RTF_PutHeader();
+ RTF_PutStr("<body>\n");
+ isBody=TRUE;
+ }
+ switch (ch) {
+ case '<':
+ RTF_PutStr("&lt;");
+ break;
+
+ case '>':
+ RTF_PutStr("&gt;");
+ break;
+
+ case '&':
+ RTF_PutStr("&amp;");
+ break;
+
+ default:
+ fputc(ch, stdout);
+ }
+}
+
+/**************************************/
+
+void RTF_PlainStyle (TStyleState* s)
+{
+ int i;
+ for(i=0;i<MAX_INLINE_STYLES;i++)
+ {
+ if(s->s & (1<<i))
+ RTF_PutStr(styles[i][1]);
+ }
+ s->s=0;
+}
+
+/**************************************/
+
+void RTF_SetStyle(TStyleState* s, StyleState style)
+{
+ if( (!ignore_styles||(style==s_hidden)) && ((s->s&(1<<style))==0) )
+ {
+ RTF_PutStr(styles[style][0]);
+ s->s|=(1<<style);
+ }
+}
+
+/**************************************/
+
+void RTF_PushState(short* level)
+{
+ if(*level>=MAX_LEVELS)
+ {
+ fprintf(stderr,"Exceed maximum level\n");
+ exit(-1);
+ }
+ style_state[*level]=curr_style;
+ (*level)++;
+}
+
+/**************************************/
+
+void RTF_PopState(short* level)
+{
+ int j;
+ TStyleState new_style;
+
+ if(*level<1)
+ {
+ fprintf(stderr,"RTF parse error: unexpected '}'\n");
+ exit(-1);
+ }
+ new_style = style_state[*level-1];
+ /*close off any in-line styles*/
+ for (j=0;j<MAX_INLINE_STYLES;j++)
+ {
+ if ( ((curr_style.s & (1<<j))!=0) && ((new_style.s & (1<<j))==0) )
+ {
+ curr_style.s &= ~(1<<j);
+ RTF_PutStr(styles[j][1]);
+ }
+ }
+
+ for (j=0;j<MAX_INLINE_STYLES;j++)
+ {
+ if( ((curr_style.s & (1<<j))==0) && ((new_style.s & (1<<j))!=0) )
+ RTF_PutStr(styles[j][0]);
+ }
+ (*level)--;
+ curr_style = new_style;
+
+ if (*level == skip_to_level) {
+ skip_to_level = -1;
+ gobble = FALSE;
+ }
+}
+
+/**************************************/
+/* Map a style number into a HTML heading */
+
+short RTF_MapStyle(char* s)
+{
+ int i;
+ for (i=0;i<7;i++)
+ if (!strcmp(style_mappings[i], s))
+ return (i);
+ return (0);
+}
+
+/**************************************/
+
+void RTF_AddStyleMap(char* name, char* number)
+{
+ int i, len;
+ len=strlen(name);
+ if( name[len-1]==';') name[--len]=0;
+ for(i=0;i<STYLE_NUMBER;i++)
+ {
+ if(!strcasecmp(name,style_namings[i]))
+ {
+ strcpy(style_mappings[i],number);
+ return;
+ }
+ }
+}
+
+/**************************************/
+
+void RTF_BuildName(char* token, char* ch, unsigned is_string)
+{
+ int len;
+ char *p;
+ len = strlen(token);
+ if(len>=MAX_NAME_LEN-1)
+ return;
+ if (is_string)
+ {
+ for (p = ch; p && *p; ++p)
+ {
+ token[len]=*p;
+ ++len;
+ }
+ }
+ else
+ {
+ token[len] = *ch;
+ ++len;
+ }
+ token[len]='\0';
+}
+
+
+/**************************************/
+
+void RTF_ClearName(char* token)
+{
+ token[0]=0;
+}
+
+/**************************************/
+
+TokenIndex GetTokenIndex(char* control)
+{
+ TokenIndex i;
+
+ for (i=t_start; i<t_end; i++)
+ {
+ if(control[0]==tokens[i][0]) /* Added for fast compare */
+ {
+ if (!strcmp(control, tokens[i]))
+ {
+ break;
+ }
+ }
+ }
+ return i;
+}
+
+/**************************************/
+
+void RTF_DoStyleControl (FILE* f, char* control, char* arg)
+{
+ if(GetTokenIndex(control)==t_s)
+ {
+ strcpy(style_number,arg);
+ }
+}
+
+/**************************************/
+
+int chartoi(char ch)
+{
+ if((ch>='0')&&(ch<='9'))
+ return ch-'0';
+ if((ch>='A')&&(ch<='Z'))
+ return ch-'A'+10;
+ if((ch>='a')&&(ch<='z'))
+ return ch-'a'+10;
+ return -1;
+}
+
+/**************************************/
+
+void RTF_BuildArg (FILE * f, char ch, char* arg)
+{
+ int i=0;
+
+ if(feof(f))
+ {
+ arg[0]=0;
+ return;
+ }
+ if(ch=='-')
+ {
+ arg[i++]='-';
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ {
+ arg[0]=0;
+ return;
+ }
+ }
+ for(;isdigit(ch);i++)
+ {
+ arg[i]=ch;
+ if(i>=MAX_RTF_TOKEN-1)
+ {
+ arg[MAX_RTF_TOKEN-1]=0;
+ while(isdigit(ch)) {
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ return;
+ }
+ break;
+ }
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ {
+ arg[i+1]=0;
+ return;
+ }
+ }
+ arg[i]=0;
+ if(!isspace(ch))
+ {
+ RTF_UnGetChar(f, ch);
+ }
+}
+
+/**************************************/
+
+void RTF_BuildToken (FILE* f, char ch)
+{
+ int i;
+
+ for(i=1;;i++)
+ {
+ char token[MAX_RTF_TOKEN], arg[MAX_RTF_TOKEN];
+ token[i-1]=ch;
+ if(i>=MAX_RTF_TOKEN-1)
+ {
+ do {
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ return;
+ } while (isalpha(ch));
+ RTF_BuildArg(f, ch,arg);
+ return;
+ }
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ {
+ token[i]=0;
+ RTF_DoControl(f,token,"");
+ return;
+ }
+ if( !isalpha(ch) )
+ {
+ token[i]=0;
+ RTF_BuildArg(f, ch,arg);
+ RTF_DoControl(f,token,arg);
+ return;
+ }
+ }
+}
+
+/**************************************/
+
+void RTF_backslash(FILE* f, char** pch, char* pf)
+{
+ int ch;
+ *pf=FALSE;
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ {
+ fprintf(stderr,"Unexpected end of file\n");
+ return;
+ }
+ switch (ch)
+ {
+ case '\\':
+ *pch=charset_table[92]; *pf=TRUE;
+ break;
+ case '{':
+ *pch=charset_table[123]; *pf=TRUE;
+ break;
+ case '}':
+ *pch=charset_table[125]; *pf=TRUE;
+ break;
+ case '*':
+ gobble = TRUE; /*perform no output, ignore commands 'til level-1*/
+ if(skip_to_level>level-1||skip_to_level==-1)
+ skip_to_level = level-1;
+ break;
+ case '\'':
+ {
+ char ch1, ch2;
+ ch1 = RTF_GetChar( f );
+ ch2 = RTF_GetChar( f );
+ if(!feof(f))
+ {
+ if(isxdigit(ch1)&&isxdigit(ch2))
+ {
+ ch = chartoi(ch1)*16+chartoi(ch2);
+ *pch = charset_table[ch-1]; *pf=TRUE;
+ } else {
+ fprintf(stderr,"RTF Error: unexpected '%c%c' after \\\'\n",ch1,ch2);
+ }
+ }
+ break;
+ }
+ default:
+ if (isalpha(ch))
+ {
+ RTF_BuildToken(f, ch);
+ } else {
+ fprintf(stderr, "\nRTF Error: unexpected '%c' after \\.\n", ch);
+ }
+ break;
+ }
+}
+
+/**************************************/
+
+void RTF_ParseStyle(FILE * f)
+{
+ char ch, pf;
+ char *code;
+ int level0;
+ void (*PrevDoControl)(FILE*,char*,char*);
+
+ level0=level;
+ PrevDoControl=RTF_DoControl;
+ RTF_DoControl=RTF_DoStyleControl;
+
+ RTF_ClearName(style_name);
+ style_number[0]=0;
+ while (1)
+ {
+ ch = RTF_GetChar( f );
+ if(feof(f))
+ break;
+ switch (ch)
+ {
+ case '\\':
+ RTF_backslash(f, &code, &pf);
+ if(pf)
+ {
+ RTF_BuildName(style_name, code, 1);
+ } else {
+ RTF_ClearName(style_name);
+ }
+ break;
+
+ case '{':
+ level++;
+ RTF_ClearName(style_name);
+ break;
+
+ case '}':
+ if(level0+1==level)
+ {
+ if(style_number[0]!=0)
+ {
+ RTF_AddStyleMap(style_name,style_number);
+ style_number[0]=0;
+ }
+ } else if(level0==level) {
+ RTF_DoControl=PrevDoControl;
+ RTF_UnGetChar(f, ch);
+ return;
+ }
+ level--;
+ RTF_ClearName(style_name);
+ break;
+
+ default:
+ RTF_BuildName(style_name, &ch, 0);
+ break;
+ }
+ } /* while */
+}
+
+/**************************************/
+/* Perform actions for RTF control words */
+
+void RTF_DoBodyControl (FILE * f, char* control,char* arg)
+{
+ short style;
+
+ if (gobble) return;
+
+ switch (GetTokenIndex(control))
+ {
+ case t_stylesheet:
+ gobble = TRUE; /*perform no output, ignore commands 'til level-1*/
+ skip_to_level = level-1;
+ RTF_ParseStyle( f );
+ break;
+ case t_fonttbl: /*skip all of these and their contents!*/
+ case t_colortbl:
+ case t_info:
+ gobble = TRUE; /*perform no output, ignore commands 'til level-1*/
+ skip_to_level = level-1;
+ break;
+ case t_pict:
+ gobble = TRUE; /*perform no output, ignore commands 'til level-1*/
+ if(skip_to_level>=level || skip_to_level==-1)
+ skip_to_level = level-1;
+ break;
+
+
+ case t_s: /*Style*/
+ if (!curr_heading)
+ {
+ style = RTF_MapStyle (arg);
+ if(style)
+ {
+ curr_heading = s_h0 + style;
+ RTF_PutStr(styles[curr_heading][0]);
+ ignore_styles = TRUE;
+ }
+ }
+ break;
+
+ case t_b: /*Bold*/
+ RTF_SetStyle(&curr_style,s_bold);
+ break;
+
+ case t_ulw:
+ case t_uld:
+ case t_uldb:
+ case t_ul: /*Underline, maps to "emphasis" HTML style*/
+ RTF_SetStyle(&curr_style,s_underline);
+ break;
+
+ case t_i: /*Italic*/
+ RTF_SetStyle(&curr_style,s_italic);
+ break;
+
+ case t_v: /* Hidden*/
+ RTF_SetStyle(&curr_style,s_hidden);
+ break;
+
+ case t_par: /*Paragraph*/
+ if (curr_heading!=s_plain) {
+ RTF_PutStr(styles[curr_heading][1]);
+ curr_heading = s_plain;
+ } else {
+ RTF_PutStr(styles[s_para][0]);
+ }
+ ignore_styles = FALSE;
+ break;
+
+ case t_plain: /*reset inline styles*/
+ RTF_PlainStyle(&curr_style);
+ break;
+ case t_cell:
+ case t_tab:
+ RTF_PutChar(' ');
+ break;
+ case t_endash:
+ case t_emdash:
+ RTF_PutChar('-');
+ break;
+ case t_line:
+ case t_row:
+ RTF_PutStr(styles[s_br][0]);
+ break;
+ case t_bullet:
+ RTF_PutChar('\xb7');
+ break;
+ case t_start:
+ case t_end:
+ break;
+ case t_rquote:
+ //RTF_PutStr("&rsquo;");
+ RTF_PutStr("'");
+ break;
+ }
+
+}
+
+/**************************************/
+/* RTF_Parse is a crude, ugly state machine that understands enough of */
+/* the RTF syntax to be dangerous. */
+
+void RTF_ParseBody( FILE* f )
+{
+ char ch, pf;
+ char* code;
+
+ RTF_DoControl=RTF_DoBodyControl;
+ level = 0;
+ skip_to_level = -1;
+ gobble = FALSE;
+ ignore_styles = FALSE;
+
+ while (1)
+ {
+ ch = RTF_GetChar( f );
+ if (feof(f))
+ {
+ break;
+ }
+ switch (ch)
+ {
+ case '\\':
+ RTF_backslash(f, &code,&pf);
+ if(pf && code)
+ RTF_PutStr(code);
+ break;
+
+ case '{':
+ RTF_PushState(&level);
+ break;
+
+ case '}':
+ RTF_PopState(&level);
+ break;
+
+ default:
+ RTF_PutChar(ch);
+ break;
+ }
+ }/*while*/
+}
+
+/**************************************/
+
+int RTF_Parse (FILE* f)
+{
+ RTF_PutStr("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n<html>\n");
+
+ isBody=FALSE;
+
+ RTF_ParseBody(f);
+
+ if (isBody) RTF_PutStr("</body>\n");
+
+ RTF_PutStr("</html>\n");
+
+ return 0;
+}
+
+/**************************************/
+
+void Initialize()
+{
+ int i;
+
+ for (i=0;i<MAX_LEVELS;i++)
+ style_state[i].s=s_plain;
+
+ curr_style.s=s_plain;
+ curr_heading = s_plain;
+
+ // Set default styles maping
+ style_mappings[0][0]=0;
+ for(i=1;i<=6;i++)
+ sprintf(style_mappings[i],"%d",256-i);
+}
+
+/**************************************/
+
+int RTF_FindCharset(FILE * f)
+{
+ char ch;
+ char code[CHARSET_MAX_LENGTH];
+ int metadata = 0;
+ int i = 0;
+
+ while ( !feof(f) )
+ {
+ ch = RTF_GetChar( f );
+ if ( ch == '\\' )
+ {
+ metadata++;
+ }
+ if ( metadata == 2 ) // the second metadata is the charset used
+ {
+ if ( ch != '\\' )
+ {
+ code[i] = ch;
+ i++;
+ }
+ }
+ if ( metadata > 2 )
+ {
+ code[i] = '\0';
+ break;
+ }
+ }
+
+
+ for ( i = 0; i < CHARSET_NUMBER ; i++)
+ {
+ if ( strcmp( (const char *)charset[i], (const char *) code ) == 0 )
+ {
+ charset_table = charset_variablename[i];
+ break;
+ };
+ }
+ if ( i == CHARSET_NUMBER )
+ {
+ charset_table = charset_variablename[CHARSET_DEFAULT];
+ }
+
+ return 1; // always true!
+}
+
+/**************************************/
+
+int main(int argc,char** argv)
+{
+ int rv = 0;
+ FILE *f = NULL;
+
+ Initialize();
+
+ if ( argc > 1)
+ {
+ if( strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-H")==0 )
+ {
+ printf("Use: %s [rtf_filename]\n",argv[0]);
+ rv = 0;
+ } else if ( strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0 ) {
+ printf("rtf2html version 1.2\n");
+ rv = 0;
+ }
+ else
+ {
+ rv = openfile(argv[1], &f);
+ if ( rv ) rv = RTF_FindCharset(f);
+ if ( rv )
+ {
+ rewind(f);
+ rv = RTF_Parse(f);
+ }
+ if ( rv ) rv = closefile(f);
+ }
+ }
+ else
+ {
+ printf("Use: %s [rtf_filename]\n",argv[0]);
+ }
+ return rv;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh b/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh
new file mode 100644
index 00000000..a5884f2c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+CfgFile=/www/search.sbs.de/test/conf/htfig.conf
+BinDir=/www/search.sbs.de/test/bin
+CgiBinDir=/www/search.sbs.de/test/cgi-bin
+DataDir=/www/search.sbs.de/data/robot
+Date=`date +%y%m%d`
+
+date > $DataDir/$Date-runtime
+$BinDir/htdig -v -t -s -c $CfgFile >> $DataDir/$Date-robot
+$BinDir/htmerge -v -c $CfgFile >> $DataDir/$Date-robot
+date >> $DataDir/$Date-runtime
+
+$BinDir/whatsnew.pl -v > $DataDir/$Date-whatsnew
+sort $BinDir/urls | uniq > $DataDir/$Date-urls
+
+rm -f $DataDir/current-*
+ln -s $DataDir/$Date-runtime $DataDir/current-runtime
+ln -s $DataDir/$Date-robot $DataDir/current-robot
+ln -s $DataDir/$Date-urls $DataDir/current-urls
+
+$BinDir/status.pl -v > $DataDir/$Date-status
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README
new file mode 100644
index 00000000..9b94ec5d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README
@@ -0,0 +1,16 @@
+This is a small example to demonstrate the script_name attribute.
+
+
+Assuming that these files are located within your server's "search"
+directory, just add the following line to your htdig configuration
+file:
+
+script_name: /search/results.shtml
+
+You may also have to override the standard template files, using the
+search_results_header, search_results_footer, syntax_error_file and
+nothing_found_file attributes.
+
+
+(c) 1999, Hanno Mueller, http://www.hanno.de
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml
new file mode 100644
index 00000000..86e09563
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml
@@ -0,0 +1,17 @@
+<!--
+ -- script_name example using SSI
+ -- This is the results page.
+ -- Note the server side include directive calling /cgi-bin/htsearch.
+ -- The page's parameters will be handed over to htsearch.
+ -->
+
+<html><head><title>Search results (SHTML)</title></head>
+<body bgcolor="#eef7ff">
+<h2><img src="/htdig/htdig.gif">
+
+<!--#exec cgi="/cgi-bin/htsearch" -->
+
+<hr noshade size=4>
+<a href="http://www.htdig.org">
+<img src="/htdig/htdig.gif" border=0>ht://Dig</a>
+</body></html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html
new file mode 100644
index 00000000..b6f80e97
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html
@@ -0,0 +1,53 @@
+<!--
+ -- script_name example using SSI
+ -- This is the standard search page (no dynamic stuff),
+ -- with two minor differences: The form uses the "get"
+ -- method and the "action" sends the form input to the
+ -- the dynamic results page.
+ -->
+
+<html>
+<head>
+<title>ht://Dig WWW Search</title>
+</head>
+<body bgcolor="#eef7ff">
+<h1>
+<a href="http://www.htdig.org"><IMG SRC="/htdig/htdig.gif" align=bottom alt="ht://Dig" border=0></a>
+WWW Site Search</H1>
+<hr noshade size=4>
+This search will allow you to search the contents of
+all the publicly available WWW documents at this site.
+<br>
+<p>
+<form method="get" action="/search/results.shtml">
+<font size=-1>
+Match: <select name=method>
+<option value=and>All
+<option value=or>Any
+<option value=boolean>Boolean
+</select>
+Format: <select name=format>
+<option value=builtin-long>Long
+<option value=builtin-short>Short
+</select>
+Sort by: <select name=sort>
+<option value=score>Score
+<option value=time>Time
+<option value=title>Title
+<option value=revscore>Reverse Score
+<option value=revtime>Reverse Time
+<option value=revtitle>Reverse Title
+</select>
+</font>
+<input type=hidden name=config value="htdig-ssi">
+<input type=hidden name=restrict value="">
+<input type=hidden name=exclude value="">
+<br>
+Search:
+<input type="text" size="30" name="words" value="">
+<input type="submit" value="Search">
+</form>
+<hr noshade size=4>
+</body>
+</html>
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE
new file mode 100644
index 00000000..b6a82833
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE
@@ -0,0 +1,2 @@
+These are the standard template files, minus the standard start and
+ending of the HTML that is already in the dynamic results page.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html
new file mode 100644
index 00000000..67938f89
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html
@@ -0,0 +1,2 @@
+$(PAGEHEADER)
+$(PREVPAGE) $(PAGELIST) $(NEXTPAGE)
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html
new file mode 100644
index 00000000..41503364
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html
@@ -0,0 +1,22 @@
+Search results for '$(LOGICAL_WORDS)'</h2>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
+<hr noshade size=1>
+<b>Documents $(FIRSTDISPLAYED) - $(LASTDISPLAYED) of $(MATCHES) matches.
+More <img src="/htdig/star.gif" alt="*">'s indicate a better match.
+</b>
+<hr noshade size=1>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html
new file mode 100644
index 00000000..57ea8dcc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html
@@ -0,0 +1,6 @@
+<dl><dt><strong><a href="$(URL)">$(TITLE)</a></strong>$(STARSLEFT)
+</dt><dd>$(EXCERPT)<br>
+<i><a href="$(URL)">$(URL)</a></i>
+<font size=-1>$(MODIFIED), $(SIZE) bytes</font>
+</dd></dl>
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html
new file mode 100644
index 00000000..840e4098
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html
@@ -0,0 +1,30 @@
+Search results</h1>
+<hr noshade size=4>
+<h2>No matches were found for '$(LOGICAL_WORDS)'</h2>
+<p>
+Check the spelling of the search word(s) you used.
+If the spelling is correct and you only used one word,
+try using one or more similar search words with "<b>Any</b>."
+</p><p>
+If the spelling is correct and you used more than one
+word with "<b>Any</b>," try using one or more similar search
+words with "<b>Any</b>."</p><p>
+If the spelling is correct and you used more than one
+word with "<b>All</b>," try using one or more of the same words
+with "<b>Any</b>."</p>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html
new file mode 100644
index 00000000..b5044b31
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html
@@ -0,0 +1 @@
+$(STARSRIGHT) <strong><a href="$(URL)">$(TITLE)</a></strong><br>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html
new file mode 100644
index 00000000..feddac71
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html
@@ -0,0 +1,27 @@
+Error in Boolean search for '$(LOGICAL_WORDS)'</h1>
+<hr noshade size=4>
+Boolean expressions need to be 'correct' in order for the search
+system to use them.
+The expression you entered has errors in it.<p>
+Examples of correct expressions are: <b>cat and dog</b>, <b>cat
+not dog</b>, <b>cat or (dog not nose)</b>.<br>Note that
+the operator <b>not</b> has the meaning of 'without'.
+<blockquote><b>
+$(SYNTAXERROR)
+</b></blockquote>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/status.pl b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
new file mode 100755
index 00000000..25ddeda8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
@@ -0,0 +1,258 @@
+#!/usr/local/bin/perl
+#
+# status.pl v1.0 960413 Iain Lea (iain@sbs.de)
+#
+# ChangeLog
+# 960413 IL
+#
+# Produces a HTML 'Search Engine Status' page with last 5 runs
+# and 'Top 10' servers by #URLS indexed.
+#
+# Usage: status.pl [options]
+# -h help
+# -F file HTML footer
+# -H file HTML header
+# -o file HTML generated file
+# -v verbose
+#
+# TODO
+
+require 'timelocal.pl';
+require 'getopts.pl';
+require '/www/search.sbs.de/bin/sbs.pl';
+
+$DataDir = '/www/search.sbs.de/data/robot';
+$RunTimeFile = "$DataDir/current-runtime";
+$RobotFile = "$DataDir/current-robot";
+$IndexFile = '/www/search.sbs.de/test/db/db.wordlist';
+
+$DefOutputFile = '/www/search.sbs.de/test/pub/status.html';
+$TmpFile = "/tmp/status.$$";
+$DefFooter = '';
+$DefHeader = '';
+$Verbose = 0;
+$Top10Servers = 10;
+
+&ParseCmdLine;
+
+print "Generating status.html...\n" if $Verbose;
+
+&ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile);
+&WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter);
+
+exit 1;
+
+#############################################################################
+# Subroutines
+#
+
+sub ParseCmdLine
+{
+ &Getopts ('F:hH:o:v');
+
+ if ($opt_h ne "") {
+ print <<EndOfHelp
+Produce an HTML 'Status' page of last 5 runs and Top 10 servers by #URLS.
+
+Usage: $0 [options]
+ -h help
+ -F file HTML footer
+ -H file HTML header
+ -o file HTML generated file
+ -v verbose
+
+EndOfHelp
+;
+ exit 0;
+ }
+ $DefFooter = $opt_F if ($opt_H ne "");
+ $DefHeader = $opt_H if ($opt_H ne "");
+ $DefOutputFile = $opt_o if ($opt_o ne "");
+ $Verbose = 1 if ($opt_v ne "");
+}
+
+sub ReadDataFiles
+{
+ my ($RunTimeFile, $RobotFile, $IndexFile) = @_;
+ my ($IndexSize, $NumWords, $NumURLS, $NumServers);
+ my ($BegTime, $EndTime, $RunDate, $RunTime, $Key);
+ my (%Months) = (
+ 'Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May', '4', 'Jun', '5',
+ 'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', 'Dec', '11' );
+
+ # RunDate : RunTime
+
+ open (TIME, "$RunTimeFile") || die "Error: $RunTimeFile - $!\n";
+ while (<TIME>) {
+ chop;
+ if (! $EndTime && $BegTime) {
+ # Sat Apr 13 12:57:52 MET DST 1996
+ /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+ $EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900);
+ $RunTime = $EndTime - $BegTime;
+ $RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60);
+ print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose;
+ }
+ if (! $BegTime) {
+ # Sat Apr 13 12:57:52 MET DST 1996
+ /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+ $Mon = $Months{$1};
+ $Year = $6 - 1900;
+ $BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year);
+ $RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2);
+ print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose;
+ }
+ }
+ close (TIME);
+
+ # IndexSize : NumWords : NumURLS : NumServers
+
+ @StatData = stat ($IndexFile);
+ $IndexSize = $StatData[7];
+ print "SIZE=[$IndexSize]\n" if $Verbose;
+
+ # NumWords : NumURLS : NumServers
+
+ $NumWords = $NumURLS = $NumServers = 0;
+
+ open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n";
+ while (<ROBOT>) {
+ if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) {
+ $NumURLS += $2;
+ $NumServers++;
+ if ($2 > 0) {
+ $Key = sprintf ("%07d|%s", $2, $1);
+ $Top10ByName{$Key} = $2;
+ }
+ print "SERVER=[$1] DOCS=[$2]\n" if $Verbose;
+ } elsif (/^Read\s+([0-9]*)\s+words$/) {
+ $NumWords = $1;
+ print "WORDS=[$NumWords]\n" if $Verbose;
+ }
+ }
+ close (ROBOT);
+
+ # Write data to YYMMDD-info file
+
+ $InfoFile = "$DataDir/$RunDate-info";
+ $CurrFile = "$DataDir/current-info";
+
+ open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n";
+ print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose;
+ print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n";
+ close (INFO);
+ unlink ($CurrFile);
+ symlink ($InfoFile, $CurrFile);
+}
+
+sub WriteStatus
+{
+ my ($DataDir, $OutFile, $Header, $Footer) = @_;
+
+ $RobotInfo = &ReadRobotInfo ("$DataDir/current-info");
+
+ open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
+
+ &PrintBoilerPlate ($Header, 1);
+
+ print HTML <<EOT
+<p>
+<strong>$RobotInfo</strong>
+<p>
+<table border=2 width=400>
+<caption>Table of last 5 robot runs.</caption>
+<th>Run Date<th>Run Time<th># Servers<th># URL's<th># Words<th>Index (MB)
+<tr>
+EOT
+;
+ # read YYMMDD-info files
+ opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n";
+ @InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR));
+ closedir (DIR);
+ @InfoFiles = reverse (sort (@InfoFiles));
+
+ @InfoFiles = @InfoFiles[0,1,2,3,4];
+ foreach $File (@InfoFiles) {
+ $File = "$DataDir/$File";
+ open (INFO, "$File") || die "Error: $File - $!\n";
+ chop (($_ = <INFO>));
+ ($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':');
+ $IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024));
+ $RunTime =~ /(..)(..)/;
+ $RunTime = "$1:$2";
+ print HTML <<EOT
+<td align="center">$RunDate</td>
+<td align="center">$RunTime</td>
+<td align="right">$NumServers</td>
+<td align="right">$NumURLS</td>
+<td align="right">$NumWords</td>
+<td align="right">$IndexSize</td>
+<tr>
+EOT
+;
+ close (INFO);
+ }
+
+ print HTML <<EOT
+</table>
+<p>
+<p>
+<table border=2 width=400>
+<caption>Table of Top 10 servers listed by number of indexed documents.</caption>
+<th>Top 10 Servers<th># URL's
+<tr>
+EOT
+;
+ $NumServers = 0;
+ foreach $Key (reverse (sort (keys (%Top10ByName)))) {
+ if ($NumServers < $Top10Servers) {
+ $NumServers++;
+ $NumURLS = $Top10ByName{$Key};
+ $Key =~ /^[0-9]*\|(.*)$/;
+ $Server = $1;
+ $Server =~ s/:80$//;
+ print HTML <<EOT
+<td width="80%" align="left"><a href="http://$Server/">$Server</a></td>
+<td width="20%" align="right">$NumURLS</td>
+<tr>
+EOT
+;
+ }
+ }
+
+ print HTML "</table>\n";
+
+ &PrintBoilerPlate ($Footer, 0);
+
+ close (HTML);
+}
+
+sub PrintBoilerPlate
+{
+ my ($File, $IsHeader) = @_;
+
+ if ($File ne "" && -e $File) {
+ open (FILE, $File) || die "Error: $File - $!\n";
+ while (<FILE>) {
+ print HTML;
+ }
+ close (FILE);
+ } else {
+ if ($IsHeader) {
+ print HTML <<EOT
+<html>
+<head>
+<title>Search Engine Status</title>
+</head>
+<body>
+<h2>Search Engine Status</h2>
+<hr>
+<p>
+EOT
+;
+ } else {
+ &PrintFooterHTML;
+ }
+ }
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl b/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl
new file mode 100755
index 00000000..436c5eef
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl
@@ -0,0 +1,285 @@
+#!/usr/local/bin/perl
+
+##
+## urlindex.pl (C) 1995 Andrew Scherpbier
+##
+## This program will build an index of all the URLs in the
+## htdig document database.
+##
+
+use GDBM_File;
+require('SDSU_www.pl');
+
+$dbfile = "/gopher/www/htdig/sdsu3d.docdb";
+$dbfile = "/tmp/db.docdb";
+$exclude = "rohan.sdsu.edu\\/home\\/";
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+print "Reading...\n";
+
+##
+## Read in all the relevant data.
+##
+while (($key, $value) = each %docdb)
+{
+ next if $key =~ /^nextDocID/;
+ %record = parse_ref_record($value);
+ next if $record{"STATE"} eq 1;
+ next if $key =~ /$exclude/;
+
+ $title = $record{"TITLE"};
+
+ ##
+ ## Get rid of starting and trailing whitespace junk
+ ##
+ $title =~ s/^[ \t\n\r]*//;
+ $title =~ s/[ \t\n\r]*$//;
+
+ ##
+ ## If the title starts with 'the', it will be taken out and added
+ ## to the end of the title. This means that a title like "The
+ ## Homepage of X" will become "Homepage of X, The"
+ ##
+ if ($title =~ /^the /i)
+ {
+ $title = substr($title, 4) . ", " . substr($title, 0, 3);
+ }
+ if ($title =~ /^SDSU /)
+ {
+ $title = substr($title, 5) . ", " . substr($title, 0, 4);
+ }
+ if ($title =~ /^San Diego State University /i)
+ {
+ $title = substr($title, 27) . ", " . substr($title, 0, 26);
+ }
+ $value = $title;
+ $value =~ tr/A-Z/a-z/;
+ $titles{$value} = "$title\001$key";
+ push(@unsorted, $value);
+}
+
+$current = " ";
+open(M, ">index.html");
+print M "<html><head><title>Index of all documents at SDSU</title></head>\n";
+print M "<body>\n";
+print M &www_logo_2("Index of all documents at SDSU");
+print M "<p>This is a list of WWW documents that were found while indexing all\n";
+print M "the publicly available WWW servers at San Diego State University.\n";
+print M "The documents are indexed by their titles.\n";
+print M "</p><h2>\n";
+
+$previous = "";
+
+print "Writing...\n";
+
+foreach $value (sort @unsorted)
+{
+ next if $value eq $previous;
+ $previous = $value;
+ next if !($value =~ /^[a-zA-Z]/);
+
+ ($title, $url) = split('\001', $titles{$value}, 2);
+
+ $first = substr($title, 0, 1);
+ if ($current =~ /$first/i)
+ {
+ print F "<li><a href=\"$url\">$title</a></li>\n";
+ }
+ else
+ {
+ ##
+ ## New letter. Open a new file for it
+ ##
+ $current = $first;
+ $current =~ tr/a-z/A-Z/;
+ print F "</li></body></html>\n";
+ close(F);
+ open(F, ">index$current.html");
+ print F "<html><head><title>Index for $current</title></head>\n";
+ print F "<body>\n";
+ print F &www_logo_2("Index for $current");
+ print F "<ul>\n";
+ print F "<li><a href=\"$url\">$title</a></li>\n";
+
+ ##
+ ## Add a reference to the main index for this letter
+ ##
+ print M " <a href=\"index$current.html\">$current</a>\n";
+
+ print "Index of $current\n";
+ }
+}
+
+close(F);
+
+print M "</h2></body></html>\n";
+close(M);
+
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 16)
+ {
+ # STRING (ignore, but unpack)
+ $length = unpack("i", $value);
+ $rec{"STRING"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 17)
+ {
+ # METADSC
+ $length = unpack("i", $value);
+ $rec{"METADSC"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 18)
+ {
+ # BACKLINKS
+ $rec{"BACKLINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 19)
+ {
+ # SIGNATURE
+ $rec{"SIG"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ }
+ print "title = $rec{'TITLE'}\n";
+ return %rec;
+}
+
+
+
+
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl b/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl
new file mode 100755
index 00000000..e27e744c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl
@@ -0,0 +1,365 @@
+#!/usr/local/bin/perl
+#
+# whatsnew.pl v1.1 (C) 1996 Iain Lea
+# modified 26 Oct 1998 (c) 1998 Jacques Reynes
+#
+# ChangeLog
+# 960321 IL Reversed sorting to show newest documents first
+# 981026 JR Modified to work with Berkeley DB2.
+# 980204 GRH Modified to work with changes in ht://Dig db format
+#
+# Produces a HTML 'Whats New' page with custom header and footer.
+#
+# Title
+# Descriptions
+# URL
+# Last modification date (in ctime format)
+#
+# The date is specified as yyyymmdd
+#
+# Usage: whatsnew.pl [options]
+# -h help
+# -d date base date [default: $DefDate]
+# -n days list documents newer than days old [default: $DefDays]
+# -f file database index [default: $DefIndex]
+# -F file HTML footer
+# -H file HTML header
+# -o file HTML generated file
+# -v verbose
+
+use BerkeleyDB;
+require 'timelocal.pl';
+require 'getopts.pl';
+
+$DefIndex = ' your data base .docdb';
+$DefOutputFile = ' your result file URL created in your web server whatsnew.html';
+$TmpFile = "/tmp/whatsnew.$$";
+$DefFooter = '';
+$DefHeader = '';
+$Verbose = 0;
+$NewNum = 0;
+$DefDays = 3;
+chop (($DefDate = '19'.`date +%y%m%d`));
+
+&ParseCmdLine;
+
+$DefDate =~ /([0-9]{4})([0-9]{2})([0-9]{2})/;
+$When = timelocal (0, 0, 0, $3, $2 - 1, $1 - 1900)- ($DefDays * 86400);
+$NewDate = localtime ($When);
+$dbfile = $DefIndex;
+
+print "Generating 'Whats New' for documents newer than '$NewDate'...\n" if $Verbose;
+
+&ReadDatabase ($DefIndex, $TmpFile);
+&WriteWhatsNew ($TmpFile, $DefOutputFile, $DefHeader, $DefFooter);
+
+exit 1;
+
+#############################################################################
+# Subroutines
+#
+
+sub ParseCmdLine
+{
+ &Getopts ('d:f:F:hH:n:o:v');
+
+ if ($opt_h ne "") {
+ print <<EndOfHelp
+Produce an HTML 'Whats New' page with custom header & footer for database.
+
+Usage: $0 [options]
+ -h help
+ -d date base date [default: $DefDate]
+ -n days list documents newer than days old [default: $DefDays]
+ -f file database index [default: $DefIndex]
+ -F file HTML footer
+ -H file HTML header
+ -o file HTML generated file
+ -v verbose
+
+EndOfHelp
+;
+ exit 0;
+ }
+ $DefDate = $opt_d if ($opt_d ne "");
+ $DefDays = $opt_n if ($opt_n ne "");
+ $DefIndex = $opt_f if ($opt_f ne "");
+ $DefFooter = $opt_F if ($opt_H ne "");
+ $DefHeader = $opt_H if ($opt_H ne "");
+ $DefOutputFile = $opt_o if ($opt_o ne "");
+ $Verbose = 1 if ($opt_v ne "");
+}
+
+sub ReadDatabase
+{
+ my ($Index, $TmpFile) = @_;
+
+ tie %docdb, 'BerkeleyDB::Btree', -Filename => $Index, -Flags => DB_RDONLY || die "Error: $Index - $!";
+
+ open (TMP, ">$TmpFile") || die "Error: $TmpFile - $!\n";
+
+ while (($key, $value) = each %docdb)
+ {
+ next if $key =~ /^nextDocID/;
+ %rec = parse_ref_record ($value);
+ if ($rec{'TIME'} >= $When)
+ {
+ $Line = "$rec{'TIME'}|$rec{'URL'}|$rec{'TITLE'}|$rec{'DESCRIPTIONS'}\n";
+ print $Line if $Verbose;
+ print TMP $Line;
+ $NewNum++;
+ }
+ }
+
+ close (TMP);
+}
+
+sub WriteWhatsNew
+{
+ my ($InFile, $OutFile, $Header, $Footer) = @_;
+
+ open (URLS, "sort -r $InFile |") || die "Error: $InFile - $!\n";
+ open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
+
+ &PrintBoilerPlate ($Header, 1);
+
+ while (<URLS>) {
+ chop;
+ ($Time, $URL, $Title, $Description) = split ('\|');
+ $Ctime = localtime ($Time);
+ if ($Verbose) {
+ print <<EOT
+Title: $Title
+Description: $Description
+URL: $URL
+Modified: $Ctime
+
+EOT
+;
+ }
+ print HTML <<EOT
+<strong>Title:</strong> <a href="$URL">$Title</a>
+<strong>Description:</strong> $Description
+<strong>URL:</strong> $URL
+<strong>Modified:</strong> $Ctime
+
+EOT
+;
+ }
+
+ &PrintBoilerPlate ($Footer, 0);
+
+ close (HTML);
+ close (URLS);
+
+ unlink ($InFile);
+}
+
+sub PrintBoilerPlate
+{
+ my ($File, $IsHeader) = @_;
+
+ if ($File ne "" && -e $File) {
+ open (FILE, $File) || die "Error: $File - $!\n";
+ while (<FILE>) {
+ print HTML;
+ }
+ close (FILE);
+ } else {
+ if ($IsHeader) {
+ print HTML <<EOT
+<html>
+<head>
+<title>Whats New!</title>
+</head>
+<body>
+<h2>Whats New!</h2>
+<center>
+<a href="/whatsnew.html"><img src="/new.gif"></a>
+<a href="/"><img src="/home.gif"></a>
+<a href="/intranet.html"><img src="/search.gif"></a>
+<a href="mailto:Iain.Lea\@sbs.de"><img src="/contact.gif"></a>
+</center>
+<hr>
+<strong>Found $NewNum documents newer than '$NewDate'</strong>
+<pre>
+EOT
+;
+ } else {
+ print HTML <<EOT
+</pre>
+<hr>
+<center>
+<a href="/whatsnew.html"><img src="/new.gif"></a>
+<a href="/"><img src="/home.gif"></a>
+<a href="/intranet.html"><img src="/search.gif"></a>
+<a href="mailto:Iain.Lea\@sbs.de"><img src="/contact.gif"></a>
+</center>
+</body>
+</html>
+EOT
+;
+ }
+ }
+}
+
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 16)
+ {
+ # STRING (ignore, but unpack)
+ $length = unpack("i", $value);
+ $rec{"STRING"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 17)
+ {
+ # METADSC
+ $length = unpack("i", $value);
+ $rec{"METADSC"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 18)
+ {
+ # BACKLINKS
+ $rec{"BACKLINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 19)
+ {
+ # SIGNATURE
+ $rec{"SIG"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ }
+ return %rec;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html
new file mode 100644
index 00000000..164b8e5f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html
@@ -0,0 +1,16 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html> <head>
+<title></title>
+</head>
+
+<body>
+<h1></h1>
+
+
+
+<hr>
+<address><a href="http://www.sdsu.edu/~turtle/">Andrew Scherpbier &lt;andrew@sdsu.edu&gt;</a></address>
+<!-- hhmts start -->
+Last modified: Wed Jul 5 10:26:36 PDT 1995
+<!-- hhmts end -->
+</body> </html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
new file mode 100755
index 00000000..31402a23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
@@ -0,0 +1,54 @@
+#!/usr/local/bin/perl
+
+use GDBM_File;
+use BerkeleyDB;
+
+##
+## wordfreq.pl
+## (C) 1995 Andrew Scherpbier <andrew@sdsu.edu>
+##
+## Will generate a list of words and how frequently they are used
+##
+## updated to deal with Berkeley db files 1998 Iosif Fettich <ifettich@netsoft.ro>
+##
+
+
+$filetype = 'DB';
+
+if (not defined $ARGV[0] or defined ($ARGV[1]) and $ARGV[1] !~ /g/i) {
+ print "\n\nThis program is used in conjunction with ht://Dig \n";
+ print "to determine the frequency of words in a database containing word references.\n\n";
+ print "Usage: $0 filename (to use a Berkeley db2 wordlist)\n";
+ print " $0 filename g[dbm] (to use a GDBM wordlist)\n\n\n";
+ exit;
+}
+
+$filename = $ARGV[0];
+
+if ($filename =~ /gdbm$/i or $ARGV[1] =~ /g/i) {
+ $filetype = 'GDBM';
+}
+
+if ($filetype eq 'GDBM') {
+ tie %worddb, 'GDBM_File', $ARGV[0], GDBM_READER, 0
+ or die "Unable to open $ARGV[0] $!";
+} else {
+ tie %worddb, 'BerkeleyDB::Btree',
+ -Filename => $filename,
+ -Flags => DB_RDONLY
+ or die "Cannot open file $filename: $! $BerkeleyDB::Error\n" ;
+}
+
+while (($key, $value) = each %worddb)
+{
+ $length = length($value) / 20;
+ $total = 0;
+ foreach $i (0 .. $length - 1)
+ {
+ ($count, $id, $weight, $anchor, $location) =
+ unpack("i i i i i", substr($value, $i * 20, 20));
+ $total += $count;
+ }
+ print "$total\t$key\n";
+}
+