summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl54
1 files changed, 54 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
new file mode 100755
index 00000000..31402a23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
@@ -0,0 +1,54 @@
+#!/usr/local/bin/perl
+
+use GDBM_File;
+use BerkeleyDB;
+
+##
+## wordfreq.pl
+## (C) 1995 Andrew Scherpbier <andrew@sdsu.edu>
+##
+## Will generate a list of words and how frequently they are used
+##
+## updated to deal with Berkeley db files 1998 Iosif Fettich <ifettich@netsoft.ro>
+##
+
+
+$filetype = 'DB';
+
+if (not defined $ARGV[0] or defined ($ARGV[1]) and $ARGV[1] !~ /g/i) {
+ print "\n\nThis program is used in conjunction with ht://Dig \n";
+ print "to determine the frequency of words in a database containing word references.\n\n";
+ print "Usage: $0 filename (to use a Berkeley db2 wordlist)\n";
+ print " $0 filename g[dbm] (to use a GDBM wordlist)\n\n\n";
+ exit;
+}
+
+$filename = $ARGV[0];
+
+if ($filename =~ /gdbm$/i or $ARGV[1] =~ /g/i) {
+ $filetype = 'GDBM';
+}
+
+if ($filetype eq 'GDBM') {
+ tie %worddb, 'GDBM_File', $ARGV[0], GDBM_READER, 0
+ or die "Unable to open $ARGV[0] $!";
+} else {
+ tie %worddb, 'BerkeleyDB::Btree',
+ -Filename => $filename,
+ -Flags => DB_RDONLY
+ or die "Cannot open file $filename: $! $BerkeleyDB::Error\n" ;
+}
+
+while (($key, $value) = each %worddb)
+{
+ $length = length($value) / 20;
+ $total = 0;
+ foreach $i (0 .. $length - 1)
+ {
+ ($count, $id, $weight, $anchor, $location) =
+ unpack("i i i i i", substr($value, $i * 20, 20));
+ $total += $count;
+ }
+ print "$total\t$key\n";
+}
+