summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/.cvsignore7
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/Makefile.am51
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/Makefile.in544
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/Makefile.win3222
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/README11
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.cc927
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.h267
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCaseIsAStatements.h26
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordContext.cc107
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordContext.h101
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc582
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursor.h445
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc590
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.h163
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDB.cc71
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDB.h295
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc411
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBCache.h267
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.cc175
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.h114
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.cc97
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.h82
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc1024
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBPage.h508
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDead.cc123
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDead.h70
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDict.cc274
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDict.h252
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordKey.cc673
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordKey.h612
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.cc225
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.h170
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordList.cc436
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordList.h372
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordListMulti.cc599
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordListMulti.h252
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordListOne.cc485
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordListOne.h142
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordMeta.cc182
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordMeta.h87
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordMonitor.cc272
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordMonitor.h141
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordRecord.cc144
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordRecord.h198
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.cc51
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.h83
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordReference.cc88
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordReference.h263
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordStat.cc19
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordStat.h60
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordType.cc219
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordType.h157
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/word.desc15
53 files changed, 13551 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/.cvsignore b/debian/htdig/htdig-3.2.0b6/htword/.cvsignore
new file mode 100644
index 00000000..09dc8ef2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/.cvsignore
@@ -0,0 +1,7 @@
+Makefile
+*.lo
+*.la
+.purify
+.pure
+.deps
+.libs
diff --git a/debian/htdig/htdig-3.2.0b6/htword/Makefile.am b/debian/htdig/htdig-3.2.0b6/htword/Makefile.am
new file mode 100644
index 00000000..16c6d7bc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/Makefile.am
@@ -0,0 +1,51 @@
+#
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+include $(top_srcdir)/Makefile.config
+
+LOCAL_DEFINES =
+
+pkglib_LTLIBRARIES = libhtword.la
+
+libhtword_la_SOURCES = \
+ WordBitCompress.cc \
+ WordContext.cc \
+ WordCursor.cc \
+ WordDB.cc \
+ WordDBCompress.cc \
+ WordDBInfo.cc \
+ WordDBPage.cc \
+ WordKey.cc \
+ WordKeyInfo.cc \
+ WordList.cc \
+ WordMonitor.cc \
+ WordRecord.cc \
+ WordRecordInfo.cc \
+ WordReference.cc \
+ WordStat.cc \
+ WordType.cc
+
+libhtword_la_LDFLAGS = -release $(HTDIG_MAJOR_VERSION).$(HTDIG_MINOR_VERSION).$(HTDIG_MICRO_VERSION) ${extra_ldflags}
+
+pkginclude_HEADERS = \
+ WordBitCompress.h \
+ WordCaseIsAStatements.h \
+ WordContext.h \
+ WordCursor.h \
+ WordDB.h \
+ WordDBCompress.h \
+ WordDBInfo.h \
+ WordDBPage.h \
+ WordKey.h \
+ WordKeyInfo.h \
+ WordList.h \
+ WordMonitor.h \
+ WordRecord.h \
+ WordRecordInfo.h \
+ WordReference.h \
+ WordStat.h \
+ WordType.h
diff --git a/debian/htdig/htdig-3.2.0b6/htword/Makefile.in b/debian/htdig/htdig-3.2.0b6/htword/Makefile.in
new file mode 100644
index 00000000..f540671b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/Makefile.in
@@ -0,0 +1,544 @@
+# Makefile.in generated by automake 1.7.9 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# To compile with profiling do the following:
+#
+# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_triplet = @host@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+APACHE = @APACHE@
+APACHE_MODULES = @APACHE_MODULES@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CGIBIN_DIR = @CGIBIN_DIR@
+COMMON_DIR = @COMMON_DIR@
+CONFIG_DIR = @CONFIG_DIR@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATABASE_DIR = @DATABASE_DIR@
+DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+FIND = @FIND@
+GUNZIP = @GUNZIP@
+HAVE_SSL = @HAVE_SSL@
+HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@
+HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@
+HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@
+IMAGE_DIR = @IMAGE_DIR@
+IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
+MAKEINFO = @MAKEINFO@
+MV = @MV@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+RANLIB = @RANLIB@
+RRDTOOL = @RRDTOOL@
+SEARCH_DIR = @SEARCH_DIR@
+SEARCH_FORM = @SEARCH_FORM@
+SED = @SED@
+SENDMAIL = @SENDMAIL@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TAR = @TAR@
+TESTS_FALSE = @TESTS_FALSE@
+TESTS_TRUE = @TESTS_TRUE@
+TIME = @TIME@
+TIMEV = @TIMEV@
+USER = @USER@
+VERSION = @VERSION@
+YACC = @YACC@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+exec_prefix = @exec_prefix@
+extra_ldflags = @extra_ldflags@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \
+ -I$(top_srcdir)/include -I$(top_srcdir)/htlib \
+ -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \
+ -I$(top_srcdir)/htword \
+ -I$(top_srcdir)/db -I$(top_builddir)/db \
+ $(LOCAL_DEFINES) $(PROFILING)
+
+
+HTLIBS = $(top_builddir)/htnet/libhtnet.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/htlib/libht.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/db/libhtdb.la \
+ $(top_builddir)/htlib/libht.la
+
+
+
+#
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+LOCAL_DEFINES =
+
+pkglib_LTLIBRARIES = libhtword.la
+
+libhtword_la_SOURCES = \
+ WordBitCompress.cc \
+ WordContext.cc \
+ WordCursor.cc \
+ WordDB.cc \
+ WordDBCompress.cc \
+ WordDBInfo.cc \
+ WordDBPage.cc \
+ WordKey.cc \
+ WordKeyInfo.cc \
+ WordList.cc \
+ WordMonitor.cc \
+ WordRecord.cc \
+ WordRecordInfo.cc \
+ WordReference.cc \
+ WordStat.cc \
+ WordType.cc
+
+
+libhtword_la_LDFLAGS = -release $(HTDIG_MAJOR_VERSION).$(HTDIG_MINOR_VERSION).$(HTDIG_MICRO_VERSION) ${extra_ldflags}
+
+pkginclude_HEADERS = \
+ WordBitCompress.h \
+ WordCaseIsAStatements.h \
+ WordContext.h \
+ WordCursor.h \
+ WordDB.h \
+ WordDBCompress.h \
+ WordDBInfo.h \
+ WordDBPage.h \
+ WordKey.h \
+ WordKeyInfo.h \
+ WordList.h \
+ WordMonitor.h \
+ WordRecord.h \
+ WordRecordInfo.h \
+ WordReference.h \
+ WordStat.h \
+ WordType.h
+
+subdir = htword
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+
+libhtword_la_LIBADD =
+am_libhtword_la_OBJECTS = WordBitCompress.lo WordContext.lo \
+ WordCursor.lo WordDB.lo WordDBCompress.lo WordDBInfo.lo \
+ WordDBPage.lo WordKey.lo WordKeyInfo.lo WordList.lo \
+ WordMonitor.lo WordRecord.lo WordRecordInfo.lo WordReference.lo \
+ WordStat.lo WordType.lo
+libhtword_la_OBJECTS = $(am_libhtword_la_OBJECTS)
+
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+depcomp =
+am__depfiles_maybe =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libhtword_la_SOURCES)
+HEADERS = $(pkginclude_HEADERS)
+
+DIST_COMMON = README $(pkginclude_HEADERS) $(srcdir)/Makefile.in \
+ $(top_srcdir)/Makefile.config Makefile.am
+SOURCES = $(libhtword_la_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign htword/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+pkglibLTLIBRARIES_INSTALL = $(INSTALL)
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(pkglibdir)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(pkglibdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(pkglibdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-pkglibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(pkglibdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(pkglibdir)/$$p; \
+ done
+
+clean-pkglibLTLIBRARIES:
+ -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" = "$$p" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libhtword.la: $(libhtword_la_OBJECTS) $(libhtword_la_DEPENDENCIES)
+ $(CXXLINK) -rpath $(pkglibdir) $(libhtword_la_LDFLAGS) $(libhtword_la_OBJECTS) $(libhtword_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.cc.o:
+ $(CXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+.cc.obj:
+ $(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+
+.cc.lo:
+ $(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+pkgincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+install-pkgincludeHEADERS: $(pkginclude_HEADERS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(pkgincludedir)
+ @list='$(pkginclude_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(pkgincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(pkgincludedir)/$$f"; \
+ $(pkgincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(pkgincludedir)/$$f; \
+ done
+
+uninstall-pkgincludeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkginclude_HEADERS)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(pkgincludedir)/$$f"; \
+ rm -f $(DESTDIR)$(pkgincludedir)/$$f; \
+ done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+CTAGS = ctags
+CTAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/..
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(pkglibdir) $(DESTDIR)$(pkgincludedir)
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-pkgincludeHEADERS
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-info-am uninstall-pkgincludeHEADERS \
+ uninstall-pkglibLTLIBRARIES
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am install-man \
+ install-pkgincludeHEADERS install-pkglibLTLIBRARIES \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool pdf \
+ pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am \
+ uninstall-pkgincludeHEADERS uninstall-pkglibLTLIBRARIES
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/debian/htdig/htdig-3.2.0b6/htword/Makefile.win32 b/debian/htdig/htdig-3.2.0b6/htword/Makefile.win32
new file mode 100644
index 00000000..9f484eae
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/Makefile.win32
@@ -0,0 +1,22 @@
+
+TARGET = $(LIBDIR)/libhtword$(LIBSFX)
+
+# ----------------------------------------------------------------------------
+# add new library members to this list
+
+# ----------------------------------------------------------------------------
+
+include ../Makedefs.win32
+
+CXXSRC = WordBitCompress.cc WordContext.cc WordCursor.cc WordDB.cc \
+ WordDBCompress.cc WordDBInfo.cc WordDBPage.cc WordKey.cc \
+ WordKeyInfo.cc WordList.cc WordMonitor.cc WordRecord.cc \
+ WordRecordInfo.cc WordReference.cc WordStat.cc WordType.cc
+
+CPPFLAGS += -DHAVE_CONFIG_H -I../db -I../htcommon -I../htlib -I../htword
+
+$(TARGET): $(OBJDIRDEP) $(LIBDIRDEP) $(OBJS)
+ $(AR) $(ARFLAGS) $(OBJS)
+
+include ../Makerules.win32
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/README b/debian/htdig/htdig-3.2.0b6/htword/README
new file mode 100644
index 00000000..adb0e1af
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/README
@@ -0,0 +1,11 @@
+Files:
+
+WordDB : Interface to berkeley DB
+WordKey : key manipulation
+WordRecord : record manipulation
+WordReference : record and key manipulation
+WordStat : derived from WordReference -> per unique word statistics
+WordType : word normalisation and transformation (accents, lowercase, ...)
+WordList : inverted index interface (word insert, word delete, list browsing)
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.cc b/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.cc
new file mode 100644
index 00000000..ce4bdb54
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.cc
@@ -0,0 +1,927 @@
+//
+// WordBitCompress.cc
+//
+// BitStream: put and get bits into a buffer
+// *tagging: add tags to keep track of the position of data
+// inside the bitstream for debuging purposes.
+// *freezing: saves current position. further inserts in the BitStream
+// aren't really done. This way you can try different
+// compression algorithms and chose the best.
+//
+// Compressor: BitStream with extended compression fuctionalities
+//
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordBitCompress.cc,v 1.5 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include"WordBitCompress.h"
+
+// ******** HtVector_byte (implementation)
+#define GType byte
+#define HtVectorGType HtVector_byte
+#include "HtVectorGenericCode.h"
+
+// ******** HtVector_charptr (implementation)
+#define GType charptr
+#define HtVectorGType HtVector_charptr
+#include "HtVectorGenericCode.h"
+
+
+
+// **************************************************
+// *************** misc functions *******************
+// **************************************************
+
+// return a temporary string that merges a name and a number
+char *
+label_str(const char *s,int n)
+{
+ static char buff[1000];
+ sprintf(buff,"%s%d",s,n);
+ return buff;
+}
+
+// display n bits of value v
+void
+show_bits(int v,int n/*=16*/)
+{
+ int i;
+ if(n>0)
+ {
+ for(i=0;i<n;i++)
+ {
+ printf("%c",( v&(1<<(n-i-1)) ? '1':'0' ) );
+ }
+ }
+ else
+ {
+ n=-n;
+ for(i=0;i<n;i++)
+ {
+ printf("%c",( v&(1<<(i)) ? '1':'0' ) );
+ }
+ }
+}
+
+
+
+// duplicate an array of unsigned int's
+unsigned int *
+duplicate(unsigned int *v,int n)
+{
+ unsigned int *res=new unsigned int[n];
+ CHECK_MEM(res);
+ memcpy((void *)res,(void *)v,n*sizeof(unsigned int));
+ return(res);
+}
+
+// quick sort compare function (for unsigned int's)
+int
+qsort_uint_cmp(const void *a,const void *b)
+{
+// printf("%12u %12u",*((unsigned int *)a),*((unsigned int *)b));
+ if((*((unsigned int *)a)) > (*((unsigned int *)b))) return 1;
+ else
+ if((*((unsigned int *)a)) < (*((unsigned int *)b))) return -1;
+ else
+ return 0;
+// return
+// (*((unsigned int *)a)) -
+// (*((unsigned int *)b)) ;
+}
+// quick sort an array of unsigned int's
+void
+qsort_uint(unsigned int *v,int n)
+{
+ qsort((void *)v,(unsigned int)n,sizeof(unsigned int),&qsort_uint_cmp);
+}
+
+// log in base 2 of v
+// log2(0) -> -1
+// log2(1) -> 0
+// log2(2) -> 1
+// log2(4) -> 2
+// ...
+// log2(8) -> 3
+// log2(7) -> 2
+int
+log2(unsigned int v)
+{
+ int res;
+ for(res=-1;v;res++){v>>=1;}
+ return(res);
+}
+
+
+
+
+// **************************************************
+// *************** VlengthCoder *******************
+// **************************************************
+//
+// Compress values into a bitstream based on their probability distribution
+// The probability distribution is reduced to a number of intervals.
+// Each interval (generally) has the same probability of occuring
+// values are then coded by: interval_number position_inside_interval
+// this can be seen as modified version of shanon-fanno encoding
+//
+// Here are some aproximate calculation for estimating final coded size:
+//
+// n number of entries to code
+// nbits maximum size in bits of entries to code
+//
+// SUM_interval_bit_sizes -> depends on probability dist
+// total_size = table_size + coded_size
+// table_size = 2^nlev * NBITS_NBITS_VAL
+// coded_size = n * (nlev + SUM_interval_bit_sizes / 2^nlev )
+//
+// example1: flat probability distribution :
+// SUM_interval_bit_sizes = 2^nlev * log2( 2^nbits / 2^nlev) = 2^nlev * ( nbits - nlev )
+// => coded_size = n * ( nlev + nbits - nlev ) = n*nbits !!
+// => coded_size is the same as if we used no compression
+// this is normal, because it is not possible to compress random data
+//
+// example2: probability all focused in first interval except for one entry
+// SUM_interval_bit_sizes = 1 + nbits
+// the computations above are not valid because of integer roundofs
+// => coded_size would actually be = n * 1 + nbits
+// (but the code needs a few cleanups to obtain this value)
+//
+class VlengthCoder
+{
+ int nbits;// min number of bits to code all entries
+ int nlev;// split proba into 2^nlev parts
+ int nintervals;// number of intervals
+
+ int *intervals;
+ unsigned int *intervalsizes; // speedup
+ unsigned int *lboundaries; // speedup
+ BitStream &bs;
+
+// inline unsigned int intervalsize(int i)
+// {
+// unsigned int res=((intervals[i] > 0 ? pow2(intervals[i]-1) : 0));
+// if(intervalsizes[i]!=res){errr("intervalsizes");}
+// return res;
+// }
+ inline unsigned int intervalsize0(int i){return((intervals[i] > 0 ? pow2(intervals[i]-1) : 0));}
+
+public:
+ int verbose;
+
+ // find interval where value v resides
+ // fast version, this one recursively splits initial interval
+ inline int find_interval2(const unsigned int v,unsigned int &lboundary)
+ {
+ int i0=0;
+ int i1=nintervals;
+ int i;
+ for(;;)
+ {
+ if(i1==i0+1){break;}
+ i=(i0+i1)>>1;
+ lboundary=lboundaries[i];
+// if(verbose)printf("considering i0:%3d i1:%3d : i:%3d v:%12u lboundary:%12u (%12u - %12u)\n",i0,i1,i,v,lboundary,lboundaries[i0],lboundaries[i1]);
+ if(v<lboundary){i1=i;continue;}
+ else {i0=i;continue;}
+
+ }
+
+ lboundary=lboundaries[i0];
+// i=i0;
+// unsigned int sboundary=lboundary+intervalsizes[i];
+// if(!( (lboundary!=sboundary && v>=lboundary && v<sboundary) ||
+// (lboundary==sboundary && v==lboundary) ))
+// {
+// printf("interval fd:i0:%3d i1:%3d : i:%3d v:%12u lboundary:%12u (%12u - %12u)\n",i0,i1,i,v,lboundary,lboundaries[i0],lboundaries[i1]);
+// errr("bad interval");
+// }
+ return i0;
+ }
+
+ // find interval where value v resides
+ // slow version, this tries every interval
+ inline int find_interval(const unsigned int v,unsigned int &lboundary)
+ {
+ // SPEED CRITICAL SECTION
+ register int i;
+ register unsigned int sboundary=0;
+ lboundary=0;
+ for(i=0;i<nintervals-1;i++)
+ {
+// if(i>=nintervals){errr("code argh!");}
+ sboundary=lboundary+intervalsizes[i];
+// printf("nintervals:%3d i:%3d : %12u ... %12u : %12u\n",nintervals,i,lboundary,sboundary,v);
+ if( (lboundary!=sboundary && v>=lboundary && v<sboundary) ||
+ (lboundary==sboundary && v==lboundary) ){break;}
+ lboundary=sboundary;
+ }
+
+ return i;
+ }
+
+ // compress and insert a value into the bitstream
+ inline void code(unsigned int v)
+ {
+ unsigned int lboundary=0;
+ // SPEED CRITICAL SECTION
+ int i;
+// i=find_interval(v,lboundary);
+ i=find_interval2(v,lboundary);
+ // were in the i'th interval;
+ bs.put_uint(i,nlev,"int");// store interval
+ const int bitsremaining=(intervals[i]>0 ? intervals[i]-1 : 0);
+// if(verbose>1)printf("v:%6d interval:%2d (%5d - %5d) bitsremaining:%2d ",v,i,lboundary,sboundary,bitsremaining);
+ v-=lboundary;
+// if(verbose>1)printf("remain:%6d totalbits:%2d\n",v,bitsremaining+nlev);
+ bs.put_uint(v,bitsremaining,"rem");
+ }
+ // get and uncompress a value from the bitstream
+ inline unsigned int get()
+ {
+ // SPEED CRITICAL SECTION
+ int i=bs.get_uint(nlev,"int");// get interval
+// if(verbose>1)printf("get:interval:%2d ",i);
+ const int bitsremaining=(intervals[i]>0 ? intervals[i]-1 : 0);
+// if(verbose>1)printf("bitsremain:%2d ",bitsremaining);
+ unsigned int v=bs.get_uint(bitsremaining,"rem");
+// if(verbose>1)printf("v0:%3d ",v);
+// unsigned int lboundary=0;
+ v+=lboundaries[i];
+// for(int j=0;j<i;j++){lboundary+=intervalsizes[j];}
+// v+=lboundary;
+// if(verbose>1)printf("lboundary:%5d v:%5d \n",lboundaries[i],v);
+ return(v);
+ }
+
+
+ // insert the packed probability distrbution into the bitstream
+ void code_begin();
+ // get the packed probability distrbution from the bitstream
+ void get_begin();
+
+ void make_lboundaries();
+
+ VlengthCoder(BitStream &nbs,int nverbose=0);
+
+ ~VlengthCoder()
+ {
+ delete [] lboundaries;
+ delete [] intervals;
+ delete [] intervalsizes;
+ }
+
+ // create VlengthCoder and its probability distrbution from an array of values
+ VlengthCoder(unsigned int *vals,int n,BitStream &nbs,int nverbose=0);
+};
+
+void
+VlengthCoder::code_begin()
+{
+ int i;
+ bs.add_tag("VlengthCoder:Header");
+ bs.put_uint(nbits,NBITS_NBITS_VAL,"nbits");
+ bs.put_uint(nlev,5,"nlev");
+ for(i=0;i<nintervals;i++)
+ {
+ bs.put_uint(intervals[i],NBITS_NBITS_VAL,label_str("interval",i));
+ }
+}
+void
+VlengthCoder::get_begin()
+{
+ int i;
+ nbits=bs.get_uint(NBITS_NBITS_VAL,"nbits");
+ if(verbose>1)printf("get_begin nbits:%d\n",nbits);
+ nlev=bs.get_uint(5,"nlev");
+ if(verbose>1)printf("get_begin nlev:%d\n",nlev);
+ nintervals=pow2(nlev);
+
+ intervals=new int [nintervals];
+ CHECK_MEM(intervals);
+ intervalsizes=new unsigned int [nintervals];
+ CHECK_MEM(intervalsizes);
+ lboundaries=new unsigned int [nintervals+1];
+ CHECK_MEM(lboundaries);
+
+ for(i=0;i<nintervals;i++)
+ {
+ intervals[i]=bs.get_uint(NBITS_NBITS_VAL,label_str("interval",i));
+ intervalsizes[i]=intervalsize0(i);
+ if(verbose>1)printf("get_begin intervals:%2d:%2d\n",i,intervals[i]);
+ }
+ make_lboundaries();
+}
+void
+VlengthCoder::make_lboundaries()
+{
+ unsigned int lboundary=0;
+ for(int j=0;j<=nintervals;j++)
+ {
+ lboundaries[j]=lboundary;
+ if(j<nintervals){lboundary+=intervalsizes[j];}
+ }
+}
+
+VlengthCoder::VlengthCoder(BitStream &nbs,int nverbose/*=0*/):bs(nbs)
+{
+ verbose=nverbose;
+ nbits=0;
+ nlev=0;
+ nintervals=0;
+ intervals=NULL;
+}
+
+int debug_test_nlev=-1;
+
+VlengthCoder::VlengthCoder(unsigned int *vals,int n,BitStream &nbs,int nverbose/*=0*/):bs(nbs)
+{
+ verbose=nverbose;
+ unsigned int *sorted=duplicate(vals,n);
+ qsort_uint(sorted,n);
+
+ nbits=num_bits(HtMaxMin::max_v(vals,n));
+
+ // **** heuristics to determine best nlev
+ // force table size to be less than 1/10 of the maximum coded size
+ nlev=num_bits((n*nbits)/(10*NBITS_NBITS_VAL));
+ // sanity
+ if(nlev>=nbits){nlev=nbits-1;}
+ // nlev at least 1
+ if(nlev<1){nlev=1;}
+
+ if(debug_test_nlev>=0){nlev=debug_test_nlev;}
+ nintervals=pow2(nlev);
+ int i;
+
+ intervals=new int [nintervals];
+ CHECK_MEM(intervals);
+ intervalsizes=new unsigned int [nintervals];
+ CHECK_MEM(intervalsizes);
+ lboundaries=new unsigned int [nintervals+1];
+ CHECK_MEM(lboundaries);
+
+ if(verbose>1)printf("nbits:%d nlev:%d nintervals:%d \n",nbits,nlev,nintervals);
+
+ if(verbose>10)
+ {
+ printf("vals;\n");
+ for(i=0;i<n;i++)
+ {
+ printf("%12u ",vals[i]);
+ }
+ printf("\nsorted:\n");
+ for(i=0;i<n;i++)
+ {
+ printf("%12u ",sorted[i]);
+ }
+ printf("\n");
+ }
+
+ // find split boundaires
+ unsigned int lboundary=0;
+ unsigned int boundary;
+ for(i=0;i<nintervals-1;i++)
+ {
+ boundary=sorted[(n*(i+1))/nintervals];
+ intervals[i]=1+log2(boundary-lboundary);
+ intervalsizes[i]=intervalsize0(i);
+ if(0 || verbose>1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary);
+ lboundary+=intervalsizes[i];
+ }
+ boundary=sorted[n-1];
+ intervals[i]=1+log2(boundary-lboundary)+1;
+ intervalsizes[i]=intervalsize0(i);
+ if(0 || verbose>1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary);
+ if(0 || verbose>1)printf("\n");
+
+ make_lboundaries();
+
+ int SUM_interval_bit_sizes=0;
+ for(i=0;i<nintervals;i++)
+ {
+ SUM_interval_bit_sizes+=intervals[i];
+ }
+ if(verbose)printf("SUM_interval_bit_sizes:%d\n",SUM_interval_bit_sizes);
+ delete [] sorted;
+}
+
+
+// **************************************************
+// *************** BitStream ***********************
+// **************************************************
+
+void
+BitStream::put_zone(byte *vals,int n,const char *tag)
+{
+ add_tag(tag);
+ for(int i=0;i<(n+7)/8;i++){put_uint(vals[i],TMin(8,n-8*i),NULL);}
+}
+void
+BitStream::get_zone(byte *vals,int n,const char *tag)
+{
+ check_tag(tag);
+ for(int i=0;i<(n+7)/8;i++){vals[i]=get_uint(TMin(8,n-8*i));}
+}
+
+void
+BitStream::put_uint(unsigned int v,int n,const char *tag/*="NOTAG"*/)
+{
+ // SPEED CRITICAL SECTION
+ if(freezeon){bitpos+=n;return;}
+ add_tag(tag);
+
+ if(!n){return;}
+
+ // 1)
+ int bpos0= bitpos & 0x07;
+// printf("bpos0:%3d bitpos:%5d:%5d n:%4d val:%x\n",bpos0,bitpos,buff.size()*8,n,v);
+ if(bpos0 + n <8)
+ {
+// printf("simple case:");
+// ::show_bits(v,n);
+// printf("\n");
+ // simplest case it all fits
+ buff.back()|=v<<bpos0;
+ bitpos+=n;
+ if(! (bitpos & 0x07) )
+ {buff.push_back(0);}// new byte
+ return;
+ }
+ else
+ {
+ const int ncentral=((bpos0 + n)>>3)-1;
+ // put first
+ buff.back()|=((v & 0xff)<<bpos0) & 0xff;
+ const int nbitsinfirstbyte=8-bpos0;
+
+// printf("normal case :(%x:%x)",((v & 0xff)<<bpos0) & 0xff,buff.back());
+// ::show_bits(((v & 0xff)<<bpos0) & 0xff,-8);
+// printf(" ");
+
+
+ v>>=nbitsinfirstbyte;
+// printf(" (v:%x)",v);
+ // put central
+ for(int i=ncentral;i;i--)
+ {
+ buff.push_back(0);
+ buff.back()= v & 0xff ;
+// ::show_bits(v & 0xff,-8);
+// printf(" ");
+ v>>=8;
+ }
+ // put last
+ const int nbitsremaining=n-( (ncentral<<3)+nbitsinfirstbyte );
+ if(nbitsremaining)
+ {
+ buff.push_back(0);
+ buff.back()=v & (pow2(nbitsremaining+1)-1);
+
+// printf(" (v:%x:%x)",v & (pow2(nbitsremaining+1)-1),buff.back());
+// ::show_bits(v & (pow2(nbitsremaining+1)-1),-nbitsremaining);
+// printf("\n");
+ }
+ if(!(nbitsremaining & 0x07)){buff.push_back(0);}
+ bitpos+=n;
+// printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining);
+
+ }
+// printf("cuurent put order:");
+// for(i=0;i<n;i++)
+// {
+// printf("%c",((v0& pow2(i) ? '1':'0')));
+// }
+// printf("\n");
+}
+
+
+
+
+unsigned int
+BitStream::get_uint(int n,const char *tag/*=NULL*/)
+{
+ // SPEED CRITICAL SECTION
+ if(check_tag(tag)==NOTOK){errr("BitStream::get(int) check_tag failed");}
+ if(!n){return 0;}
+
+ unsigned int res=0;
+
+ // 1)
+ int bpos0= bitpos & 0x07;
+
+// printf("bpos0:%3d bitpos:%5d n:%4d %s\n",bpos0,bitpos,n,tag);
+// printf("input:\n");
+// for(int j=0;j<(bpos0+n+7)/8;j++){printf("%x",buff[bitpos/8+j]);}
+// printf("\n");
+
+ if(bpos0 + n <8)
+ {
+ // simplest case it all fits
+ res=(buff[bitpos>>3]>>bpos0) & (pow2(n)-1);
+ bitpos+=n;
+// printf("simple case:res:%x\n",res);
+ return res;
+ }
+ else
+ {
+ int bytepos=bitpos>>3;
+ const int ncentral=((bpos0 + n)>>3)-1;
+ // put first
+ res=(buff[bytepos]>>bpos0) & 0xff;
+// printf("normal case:res0:%x\n",res);
+
+ const int nbitsinfirstbyte=8-bpos0;
+
+ bytepos++;
+ // put central
+ if(ncentral)
+ {
+ unsigned int v=0;
+ for(int i=ncentral-1;i>=0;i--)
+ {
+ v|=buff[bytepos+i]&0xff;
+ if(i)v<<=8;
+// printf(" resC%d:v:%x\n",i,v);
+ }
+ bytepos+=ncentral;
+ res|=v<<nbitsinfirstbyte;
+// printf(" :resC:%x\n",res);
+ }
+ // put last
+ const int nbitsremaining=n-( (ncentral<<3)+nbitsinfirstbyte );
+ if(nbitsremaining)
+ {
+ res|=((unsigned int)(buff[bytepos] & (pow2(nbitsremaining)-1) )) << (nbitsinfirstbyte +((bytepos-(bitpos>>3)-1)<<3));
+// printf(" :resR:%x buff[%d]:%x %d\n",res,bytepos,buff[bytepos],
+// (nbitsinfirstbyte +((bytepos-(bitpos>>3)-1)<<3)));
+ }
+
+ bitpos+=n;
+// printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining);
+ return res;
+ }
+}
+#ifdef NOTDEF
+unsigned int
+BitStream::get(int n,const char *tag/*=NULL*/)
+{
+ if(check_tag(tag)==NOTOK){errr("BitStream::get(int) check_tag failed");}
+ unsigned int res=0;
+ for(int i=0;i<n;i++)
+ {
+ if(get()){res|=pow2(i);}
+ }
+ return(res);
+}
+#endif
+void
+BitStream::freeze()
+{
+ freeze_stack.push_back(bitpos);
+ freezeon=1;
+}
+
+int
+BitStream::unfreeze()
+{
+ int size0=bitpos;
+ bitpos=freeze_stack.back();
+ freeze_stack.pop_back();
+ size0-=bitpos;
+ if(freeze_stack.size()==0){freezeon=0;}
+ return(size0);
+}
+void
+BitStream::add_tag1(const char *tag)
+{
+ if(!use_tags){return;}
+ if(freezeon){return;}
+ if(!tag){return;}
+ tags.push_back(strdup(tag));
+ tagpos.push_back(bitpos);
+}
+
+int
+BitStream::check_tag1(const char *tag,int pos/*=-1*/)
+{
+ if(!use_tags){return OK;}
+ if(!tag){return OK;}
+ int found=-1;
+ int ok=0;
+ if(pos==-1){pos=bitpos;}
+ for(int i=0;i<tags.size();i++)
+ {
+ if(!strcmp(tags[i],tag))
+ {
+ found=tagpos[i];
+ if(tagpos[i]==pos){ok=1;break;}
+ }
+ }
+ if(!ok)
+ {
+ show();
+ if(found>=0)
+ {
+ printf("ERROR:BitStream:bitpos:%4d:check_tag: found tag %s at %d expected it at %d\n",bitpos,tag,found,pos);
+ }
+ else
+ {
+ printf("ERROR:BitStream:bitpos:%4d:check_tag: tag %s not found, expected it at %d\n",bitpos,tag,pos);
+ }
+ return(NOTOK);
+ }
+ return(OK);
+}
+
+int
+BitStream::find_tag(const char *tag)
+{
+ int i;
+ for(i=0;i<tags.size() && strcmp(tag,tags[i]);i++);
+ if(i==tags.size()){return -1;}
+ else{return i;}
+}
+int
+BitStream::find_tag(int pos,int posaftertag/*=1*/)
+{
+ int i;
+ for(i=0;i<tags.size() && tagpos[i]<pos;i++);
+ if(i==tags.size()){return -1;}
+ if(!posaftertag){return i;}
+ for(;tagpos[i]>pos && i>=0;i--);
+ return(i);
+}
+
+void
+BitStream::show_bits(int a,int n)
+{
+ for(int b=a;b<a+n;b++)
+ {
+ printf("%c",(buff[b/8] & (1<<(b%8)) ? '1' : '0'));
+ }
+}
+void
+BitStream::show(int a/*=0*/,int n/*=-1*/)
+{
+ int all=(n<0 ? 1 : 0);
+ if(n<0){n=bitpos-a;}
+ int i;
+
+ if(all)
+ {
+ printf("BitStream::Show: ntags:%d size:%4d buffsize:%6d ::: ",tags.size(),size(),buffsize());
+// for(i=0;i<tags.size();i++){printf("tag:%d:%s:pos:%d\n",i,tags[i],tagpos[i]);}
+ }
+
+ int t=find_tag(a,0);
+ if(t<0){show_bits(a,n);return;}
+ for(i=a;i<a+n;i++)
+ {
+ for(;t<tags.size() && tagpos[t]<i+1;t++)
+ {
+ printf("# %s:%03d:%03d #",tags[t],tagpos[t],n);
+ }
+ show_bits(i,1);
+ }
+ if(all){printf("\n");}
+
+}
+byte *
+BitStream::get_data()
+{
+ byte *res=(byte *)malloc(buff.size());
+ CHECK_MEM(res);
+ for(int i=0;i<buff.size();i++){res[i]=buff[i];}
+ return(res);
+}
+void
+BitStream::set_data(const byte *nbuff,int nbits)
+{
+ if(buff.size()!=1 || bitpos!=0)
+ {
+ printf("BitStream:set_data: size:%d bitpos:%d\n",buff.size(),bitpos);
+ errr("BitStream::set_data: valid only if BitStream is empty");
+ }
+ buff[0] = nbuff[0];
+ for(int i=1;i<(nbits+7)/8;i++){buff.push_back(nbuff[i]);}
+ bitpos=nbits;
+}
+
+
+
+// **************************************************
+// *************** Compressor ***********************
+// **************************************************
+
+
+void
+Compressor::put_uint_vl(unsigned int v,int maxn,const char *tag/*="NOTAG"*/)
+{
+ int nbits=num_bits(v);
+ put_uint(nbits,num_bits(maxn),tag);
+ if(nbits){put_uint(v,nbits,(char *)NULL);}
+}
+unsigned int
+Compressor::get_uint_vl(int maxn,const char *tag/*=NULL*/)
+{
+ int nbits=get_uint(num_bits(maxn),tag);
+ if(!nbits){return 0;}
+ else{return(get_uint(nbits,(char *)NULL));}
+}
+
+int
+Compressor::put_vals(unsigned int *vals,int n,const char *tag)
+{
+ int cpos=bitpos;
+ add_tag(tag);
+ if(n>=pow2(NBITS_NVALS)){errr("Compressor::put(uint *,nvals) : overflow: nvals>2^16");}
+ put_uint_vl(n,NBITS_NVALS,"size");
+ if(n==0){return NBITS_NVALS;}
+
+ int sdecr=2;
+ int sfixed=1;
+
+ int nbits=num_bits(HtMaxMin::max_v(vals,n));
+ if(verbose)printf("*********************put_vals:n:%3d nbits:%3d\n",n,nbits);
+
+ int i;
+ if(verbose)
+ {
+ printf("TTT:n:%3d nbits:%3d\n",n,nbits);
+ for(i=1;i<7;i++)
+ {
+ debug_test_nlev=i;
+ printf("trying nlev:%3d\n",debug_test_nlev);
+ freeze();
+ put_decr(vals,n);
+ int fndsz=unfreeze();
+ printf("TTT:nlev:%2d try size:%4d\n",i,fndsz);
+ }
+ debug_test_nlev=-1;
+ }
+
+ if(n>15 && nbits>3)
+ {
+ freeze();
+ put_decr(vals,n);
+ sdecr=unfreeze();
+
+ freeze();
+ put_fixedbitl(vals,n);
+ sfixed=unfreeze();
+ }
+
+ if(verbose)printf("put_vals:n:%3d sdecr:%6d sfixed:%6d rap:%f\n",n,sdecr,sfixed,sdecr/(float)sfixed);
+ if(sdecr<sfixed)
+ {
+ if(verbose)printf("put_vals: comptyp:0\n");
+ put_uint(0,2,"put_valsCompType");
+ put_decr(vals,n);
+ }
+ else
+ {
+ if(verbose)printf("put_vals: comptyp:1\n");
+ put_uint(1,2,"put_valsCompType");
+ put_fixedbitl(vals,n);
+ }
+
+ if(verbose)printf("------------------------------put_vals over\n");
+
+ return(bitpos-cpos);
+}
+
+int
+Compressor::get_vals(unsigned int **pres,const char *tag/*="BADTAG!"*/)
+{
+ if(check_tag(tag)==NOTOK){errr("Compressor::get_vals(unsigned int): check_tag failed");}
+ int n=get_uint_vl(NBITS_NVALS);
+ if(verbose>1)printf("get_vals n:%d\n",n);
+ if(!n){*pres=NULL;return 0;}
+
+ if(verbose)printf("get_vals: n:%3d\n",n);
+ unsigned int *res=new unsigned int[n];
+ CHECK_MEM(res);
+
+
+ int comptype=get_uint(2,"put_valsCompType");
+ if(verbose)printf("get_vals:comptype:%d\n",comptype);
+ switch(comptype)
+ {
+ case 0: get_decr(res,n);
+ break;
+ case 1: get_fixedbitl(res,n);
+ break;
+ default: errr("Compressor::get_vals invalid comptype");break;
+ }
+// get_fixedbitl(res,n);
+// get_decr(res,n);
+
+ *pres=res;
+ return(n);
+}
+
+
+int
+Compressor::put_fixedbitl(byte *vals,int n,const char *tag)
+{
+ int cpos=bitpos;
+ int i,j;
+ add_tag(tag);
+
+ put_uint_vl(n,NBITS_NVALS,"size");
+ if(n==0){return 0;}
+
+ byte maxv=vals[0];
+ for(i=1;i<n;i++)
+ {
+ byte v=vals[i];
+ if(v>maxv){maxv=v;}
+ }
+ int nbits=num_bits(maxv);
+ if(n>=pow2(NBITS_NVALS)){errr("Compressor::put_fixedbitl(byte *) : overflow: nvals>2^16");}
+ put_uint(nbits,NBITS_NBITS_CHARVAL,"nbits");
+ add_tag("data");
+ for(i=0;i<n;i++)
+ {
+ byte v=vals[i];
+ for(j=0;j<nbits;j++) {put(v&pow2(j));}
+ }
+ return(bitpos-cpos);
+}
+void
+Compressor::put_fixedbitl(unsigned int *vals,int n)
+{
+ int nbits=num_bits(HtMaxMin::max_v(vals,n));
+
+ put_uint_vl(nbits,NBITS_NBITS_VAL,"nbits");
+ add_tag("data");
+ if(verbose)printf("put_fixedbitl:nbits:%4d nvals:%6d\n",nbits,n);
+ for(int i=0;i<n;i++)
+ {
+ put_uint(vals[i],nbits,NULL);
+ }
+}
+
+void
+Compressor::get_fixedbitl(unsigned int *res,int n)
+{
+ int nbits=get_uint_vl(NBITS_NBITS_VAL);
+ if(verbose)printf("get_fixedbitl(uint):n%3d nbits:%2d\n",n,nbits);
+ int i;
+ for(i=0;i<n;i++)
+ {
+ res[i]=get_uint(nbits);
+ }
+}
+int
+Compressor::get_fixedbitl(byte **pres,const char *tag/*="BADTAG!"*/)
+{
+ if(check_tag(tag)==NOTOK){errr("Compressor::get_fixedbitl(byte *): check_tag failed");}
+ int n=get_uint_vl(NBITS_NVALS);
+ if(!n){*pres=NULL;return 0;}
+ int nbits=get_uint(NBITS_NBITS_CHARVAL);
+ if(verbose)printf("get_fixedbitl(byte):n%3d nbits:%2d\n",n,nbits);
+ int i;
+ byte *res=new byte[n];
+ CHECK_MEM(res);
+ for(i=0;i<n;i++)
+ {
+ res[i]=get_uint(nbits);
+ }
+ *pres=res;
+ return(n);
+}
+
+void
+Compressor::put_decr(unsigned int *vals,int n)
+{
+ VlengthCoder coder(vals,n,*this,verbose);
+ coder.code_begin();
+ int i;
+ for(i=0;i<n;i++){coder.code(vals[i]);}
+}
+void
+Compressor::get_decr(unsigned int *res,int n)
+{
+ VlengthCoder coder(*this,verbose);
+ coder.get_begin();
+ int i;
+ for(i=0;i<n;i++)
+ {
+ res[i]=coder.get();
+ if(verbose>1){printf("get_decr:got:%8d\n",res[i]);}
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.h b/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.h
new file mode 100644
index 00000000..19f2c336
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordBitCompress.h
@@ -0,0 +1,267 @@
+//
+// WordBitCompress.h
+//
+// BitStream: put and get bits into a buffer
+// *tagging: add tags to keep track of the position of data
+// inside the bitstream for debuging purposes.
+// *freezing: saves current position. further inserts in the BitStream
+// aren't really done. This way you can try different
+// compression algorithms and chose the best.
+//
+// Compressor: BitStream with extended fuctionalities
+//
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordBitCompress.h,v 1.7 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordBitCompress_h
+#define _WordBitCompress_h
+
+#include<stdio.h>
+#include<stdlib.h>
+#include"HtVector_int.h"
+#include"HtMaxMin.h"
+
+typedef unsigned char byte;
+// ******** HtVector_byte (header)
+#define GType byte
+#define HtVectorGType HtVector_byte
+#include "HtVectorGeneric.h"
+
+typedef char * charptr;
+// ******** HtVector_charptr (header)
+#define GType charptr
+#define HtVectorGType HtVector_charptr
+#include "HtVectorGeneric.h"
+
+
+// ******** Utility inline functions and macros
+
+// error checking
+#define FATAL_ABORT fflush(stdout);fprintf(stderr,"FATAL ERROR at file:%s line:%d !!!\n",__FILE__,__LINE__);fflush(stderr);(*(int *)NULL)=1
+#define errr(s) {fprintf(stderr,"FATAL ERROR:%s\n",s);FATAL_ABORT;}
+#define CHECK_MEM(p) if(!p) errr("mifluz: Out of memory!");
+// max/min of 2 values
+#define TMax(a,b) (((a)>(b)) ? (a) : (b))
+#define TMin(a,b) (((a)<(b)) ? (a) : (b))
+
+// compute integer log2
+// == minimum number of bits needed to code value
+inline int
+num_bits(unsigned int maxval )
+{
+ unsigned int mv=maxval;
+ int nbits;
+ for(nbits=0;mv;nbits++){mv>>=1;}
+ return(nbits);
+}
+// compute 2^x
+#define pow2(x) (1<<(x))
+
+
+// function declarations
+char *label_str(const char *s,int n);
+void show_bits(int v,int n=16);
+
+// unsigned short max_v(unsigned short *vals,int n);
+// unsigned int max_v(unsigned int *vals,int n);
+// unsigned short min_v(unsigned short *vals,int n);
+// unsigned int min_v(unsigned int *vals,int n);
+
+
+
+
+
+// **************************************************
+// *************** BitStream ***********************
+// **************************************************
+// compression is done in Compressor not in BitStream
+class BitStream
+{
+protected:
+
+ // the buffer were the bitstream is stored
+ HtVector_byte buff;
+
+ // current bit position within the buffer
+ int bitpos;
+
+ // tags for debuging
+ HtVector_int tagpos;
+ HtVector_charptr tags;
+ int use_tags;
+
+ // freezing the bitstream
+ HtVector_int freeze_stack;
+ int freezeon;
+public:
+ void freeze();
+ int unfreeze();
+
+ // puts a bit into the bitstream
+ inline void put(unsigned int v)
+ {
+ // SPEED CRITICAL SECTION
+ if(freezeon){bitpos++;return;}
+ if(v){buff.back()|=pow2(bitpos & 0x07);}
+ bitpos++;
+ if(!(bitpos & 0x07))// new byte
+ {
+ buff.push_back(0);
+ }
+ }
+ inline void put(unsigned int v,const char *tag)
+ {
+ if(!freezeon){add_tag(tag);}
+ put(v);
+ }
+
+ // gets a bit from the bitstream
+ inline byte get(const char *tag=(char*)NULL)
+ {
+ // SPEED CRITICAL SECTION
+ if(check_tag(tag)==NOTOK){errr("BitStream::get() check_tag failed");}
+ if(bitpos>=(buff.size()<<3)){errr("BitStream::get reading past end of BitStream!");}
+ byte res=buff[bitpos>>3] & pow2(bitpos & 0x07);
+// printf("get:res:%d bitpos:%5d/%d buff[%3d]=%x\n",res,bitpos,bitpos%8,bitpos/8,buff[bitpos/8]);
+ bitpos++;
+ return(res);
+ }
+
+ // get/put an integer using n bits
+ void put_uint(unsigned int v,int n,const char *tag=(char*)"NOTAG");
+ unsigned int get_uint( int n,const char *tag=(char*)NULL);
+
+ // get/put n bits of data stored in vals
+ void put_zone(byte *vals,int n,const char *tag);
+ void get_zone(byte *vals,int n,const char *tag);
+
+ //
+ inline void add_tag(const char *tag)
+ {
+ if(!use_tags || !tag || freezeon){return;}
+ add_tag1(tag);
+ }
+ void add_tag1(const char *tag);
+ inline int check_tag(const char *tag,int pos=-1)
+ {
+ if(!use_tags || !tag){return OK;}
+ return(check_tag1(tag,pos));
+ }
+ int check_tag1(const char *tag,int pos);
+ void set_use_tags(){use_tags=1;}
+ int find_tag(const char *tag);
+ int find_tag(int pos,int posaftertag=1);
+
+ void show_bits(int a,int n);
+ void show(int a=0,int n=-1);
+
+ // position accesors
+ int size(){return(bitpos);}
+ int buffsize(){return(buff.size());}
+
+ // get a copy of the buffer
+ byte *get_data();
+ // set the buffer from outside data (current buffer must be empty)
+ void set_data(const byte *nbuff,int nbits);
+
+ // use this for reading a BitStream after you have written in it
+ // (generally for debuging)
+ void rewind(){bitpos=0;}
+
+ ~BitStream()
+ {
+ int i;
+ for(i=0;i<tags.size();i++){free(tags[i]);}
+ }
+ BitStream(int size0)
+ {
+ buff.reserve((size0+7)/8);
+ init();
+ }
+ BitStream()
+ {
+ init();
+ }
+ private:
+ void init()
+ {
+ bitpos=0;
+ buff.push_back(0);
+ freezeon=0;
+ use_tags=0;
+ }
+};
+
+
+// **************************************************
+// *************** Compressor ***********************
+// **************************************************
+
+// Constants used by Compressor
+// number of bits to code the number of values in an array
+#define NBITS_NVALS 16
+// number of bits to code the values in an unsigned int array (=sizeof(unsigned int))
+#define NBITS_VAL 32
+// number of bits to code he number of bits used by an unsigned int value
+#define NBITS_NBITS_VAL 5
+// number of bits to code the number of bits used by a byte value
+#define NBITS_NBITS_CHARVAL 4
+
+class Compressor : public BitStream
+{
+public:
+ int verbose;
+ // get/put an integer using a variable number of bits
+ void put_uint_vl(unsigned int v,int maxn,const char *tag=(char*)"NOTAG");
+ unsigned int get_uint_vl( int maxn,const char *tag=(char*)NULL);
+
+ // get/put an integer checking for an expected value
+ void put_uint_ex(unsigned int v,unsigned int ex,int maxn,const char *tag=(char*)"NOTAG")
+ {
+ if(v==ex){put(1,tag);}
+ else{put(0,tag);put_uint(v,maxn,(char*)NULL);}
+ }
+ unsigned int get_uint_ex( unsigned int ex,int maxn,const char *tag=(char*)NULL)
+ {
+ if(get(tag)){return ex;}
+ else{return get_uint(maxn,(char*)NULL);}
+ }
+
+
+ // compress/decompress an array of unsigned ints (choosing best method)
+ int put_vals(unsigned int *vals,int n,const char *tag);
+ int get_vals(unsigned int **pres,const char *tag=(char*)"BADTAG!");
+
+ // compress/decompress an array of bytes (very simple)
+ int put_fixedbitl(byte *vals,int n,const char *tag);
+ int get_fixedbitl(byte **pres,const char *tag=(char*)"BADTAG!");
+
+ // compress/decompress an array of unsigned ints (very simple)
+ void get_fixedbitl(unsigned int *res,int n);
+ void put_fixedbitl(unsigned int *vals,int n);
+
+ // compress/decompress an array of unsigned ints (sophisticated)
+ void get_decr(unsigned int *res,int n);
+ void put_decr(unsigned int *vals,int n);
+
+ Compressor():BitStream()
+ {
+ verbose=0;
+ }
+ Compressor(int size0):BitStream(size0)
+ {
+ verbose=0;
+ }
+
+};
+
+
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCaseIsAStatements.h b/debian/htdig/htdig-3.2.0b6/htword/WordCaseIsAStatements.h
new file mode 100644
index 00000000..2046ee2f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCaseIsAStatements.h
@@ -0,0 +1,26 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// switch between unknown numerical types
+// example usage:
+// --------------------------
+// switch(word_key_info.sort[position].type)
+// {
+//#define STATEMENT(type) case WORD_ISA_##type:pool_##type[word_key_info.sort[position].index]=val;break
+//#include"WordCaseIsAStatements.h"
+// }
+// --------------------------
+#ifdef WORD_HAVE_TypeA
+ STATEMENT(TypeA);
+#endif /* WORD_HAVE_TypeA */
+#ifdef WORD_HAVE_TypeB
+ STATEMENT(TypeB);
+#endif /* WORD_HAVE_TypeB */
+#ifdef WORD_HAVE_TypeC
+ STATEMENT(TypeC);
+#endif /* WORD_HAVE_TypeC */
+#undef STATEMENT
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordContext.cc b/debian/htdig/htdig-3.2.0b6/htword/WordContext.cc
new file mode 100644
index 00000000..490c9361
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordContext.cc
@@ -0,0 +1,107 @@
+//
+// WordContext.cc
+//
+// WordContext: call Initialize for all classes that need to.
+// This will enable the Instance() static member
+// of each to return a properly allocated and configured
+// object.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordContext.cc,v 1.5 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+#include "WordContext.h"
+#include "WordType.h"
+#include "WordKeyInfo.h"
+#include "WordDBInfo.h"
+#include "WordRecord.h"
+#include "WordMonitor.h"
+
+void WordContext::Initialize(Configuration &config)
+{
+#if !defined(HAVE_LIBZ) || !defined(HAVE_ZLIB_H)
+ config.Add("wordlist_compress", "false");
+#endif
+
+ WordType::Initialize(config);
+ WordKeyInfo::Initialize(config);
+ WordRecordInfo::Initialize(config);
+ WordDBInfo::Initialize(config);
+ if(config.Boolean("wordlist_monitor"))
+ WordMonitor::Initialize(config);
+}
+
+Configuration *WordContext::Initialize(const ConfigDefaults* config_defaults /* = 0 */)
+{
+ Configuration *config = new Configuration();
+
+ if(config_defaults)
+ config->Defaults(config_defaults);
+
+ String filename;
+ //
+ // Check file pointed by MIFLUZ_CONFIG environment variable
+ //
+ if(getenv("MIFLUZ_CONFIG")) {
+ filename << getenv("MIFLUZ_CONFIG");
+ struct stat statbuf;
+ if(stat((char*)filename, &statbuf) < 0) {
+ if(errno != ENOENT) {
+ fprintf(stderr, "WordContext::Initialize: MIFLUZ_CONFIG could not stat %s\n", (char*)filename);
+ perror("");
+ }
+ filename.trunc();
+ }
+ }
+ //
+ // Check for ~/.mifluz
+ //
+ if(filename.empty()) {
+ const char* home = getenv("HOME");
+ if(home) {
+ filename << home << "/.mifluz";
+ struct stat statbuf;
+ if(stat((char*)filename, &statbuf) < 0) {
+ if(errno != ENOENT) {
+ fprintf(stderr, "WordContext::Initialize: could not stat %s\n", (char*)filename);
+ perror("");
+ }
+ filename.trunc();
+ }
+ }
+ }
+
+ if(!filename.empty())
+ config->Read(filename);
+
+ Initialize(*config);
+
+ if(filename.empty() && !config_defaults) {
+ delete config;
+ config = 0;
+ }
+
+ return config;
+}
+
+void WordContext::Finish()
+{
+ delete WordType::Instance();
+ delete WordKeyInfo::Instance();
+ delete WordRecordInfo::Instance();
+ delete WordDBInfo::Instance();
+ if(WordMonitor::Instance()) delete WordMonitor::Instance();
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordContext.h b/debian/htdig/htdig-3.2.0b6/htword/WordContext.h
new file mode 100644
index 00000000..9081175c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordContext.h
@@ -0,0 +1,101 @@
+//
+// WordContext.h
+//
+// NAME
+//
+// read configuration description and setup mifluz context.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// Configuration* config = WordContext::Initialize();
+// ...
+// WordContext::Finish();
+//
+// DESCRIPTION
+//
+// The WordContext::Initialize() method initialize the global context
+// for the mifluz library. All other classes depend on it. It must
+// therefore be called before any other <i>mifluz</i> classes are used.
+//
+// CONFIGURATION
+//
+// wordlist_monitor {true|false} (default false)
+// If true create a <i>WordMonitor</i> instance to gather statistics and
+// build reports.
+//
+//
+// ENVIRONMENT
+//
+// <b>MIFLUZ_CONFIG</b> file name of configuration file read by
+// WordContext(3). Defaults to <b>~/.mifluz.</b>
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordContext.h,v 1.5 2004/05/28 13:15:26 lha Exp $
+//
+#ifndef _WordContext_h_
+#define _WordContext_h_
+
+#ifndef SWIG
+#include "Configuration.h"
+#endif /* SWIG */
+
+//
+// Short hand for calling Initialize for all classes
+// Word* that have a single instance (WordType, WordKeyInfo, WordRecordInfo).
+//
+class WordContext
+{
+ public:
+ //-
+ // Create environment. Must be called before any other class are used.
+ //
+ // When calling <b>Initialize</b> a second time, one must ensure
+ // that all WordList and WordCursor objects have been
+ // destroyed. WordList and WordCursor internal state depends on the
+ // current WordContext that will be lost by a second call.
+ // <br>
+ // For those interested by the internals, the <b>Initialize</b> function
+ // maintains a Berkeley DB environment (DB_ENV) in the following way:
+ //
+ // First invocation:
+ // <pre>
+ // Initialize -> new DB_ENV (thru WordDBInfo)
+ // </pre>
+ //
+ // Second invocation:
+ // <pre>
+ // Initialize -> delete DB_ENV -> new DB_ENV (thru WordDBInfo)
+ // </pre>
+ //
+ static void Initialize(Configuration &config);
+#ifndef SWIG
+ //-
+ // Build a <i>Configuration</i> object from the file pointed to by the
+ // MIFLUZ_CONFIG environment variable or ~/.mifluz.
+ // The <b>config_defaults</b> argument, if provided, is passed to
+ // the <i>Configuration</i> object using the <b>Defaults</b> method.
+ // The <b>Initialize(const Configuration &)</b> method is then called
+ // with the <i>Configuration</i> object.
+ //
+ // Refer to the <i>Configuration</i> description for more information.
+ //
+ //
+ static Configuration *Initialize(const ConfigDefaults* config_defaults = 0);
+#endif /* SWIG */
+ //-
+ // Destroy environment. Must be called after all other <i>mifluz</i>
+ // objects are destroyed.
+ //
+ static void Finish();
+};
+
+#endif // _WordContext_h_
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc
new file mode 100644
index 00000000..d0980e04
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc
@@ -0,0 +1,582 @@
+//
+// WordCursor.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursor.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "WordCursor.h"
+#include "WordStat.h"
+#include "WordList.h"
+
+#include <stdio.h>
+
+//
+// WordCursor implementation
+//
+
+// *****************************************************************************
+//
+int WordCursor::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
+{
+ action = naction;
+ searchKey = nsearchKey;
+ callback = ncallback;
+ callback_data = ncallback_data;
+ words = nwords;
+ return OK;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::Clear()
+{
+ searchKey.Clear();
+ action = 0;
+ callback = 0;
+ callback_data = 0;
+ ClearResult();
+ ClearInternal();
+ words = 0;
+
+ //
+ // Debugging section.
+ //
+ traceRes = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::ClearInternal()
+{
+ cursor.Close();
+ key.trunc();
+ data.trunc();
+ prefixKey.Clear();
+ cursor_get_flags = DB_SET_RANGE;
+ searchKeyIsSameAsPrefix = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::ClearResult()
+{
+ collectRes = 0;
+ found.Clear();
+ status = OK;
+}
+
+int
+WordCursor::ContextRestore(const String& buffer)
+{
+ int ret = OK;
+ if(!buffer.empty()) {
+ WordKey key(buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ //
+ // Move to restored position so that next call to
+ // WalkNext will go above the restored position.
+ //
+ if((ret = WalkNext()) != OK)
+ return ret;
+ }
+ return ret;
+}
+
+// *****************************************************************************
+//
+// Walk and collect data from the word database.
+//
+// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
+// stored in a list and the list is returned.
+// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
+// is called for each WordReference found. No list is built and the
+// function returns a null pointer.
+//
+// The <searchKey> argument may be a fully qualified key, containing precise values for each
+// field of the key. It may also contain only some fields of the key. In both cases
+// all the word occurrences matching the fields set in the key are retrieved. It may
+// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
+// be *slow* if key is not a prefix because it forces a complete walk of the
+// index.
+//
+int
+WordCursor::Walk()
+{
+ int ret;
+ if((ret = WalkInit()) != OK) return ret;
+ while((ret = WalkNext()) == OK)
+ ;
+ int ret1;
+ if((ret1 = WalkFinish()) != OK) return ret1;
+
+ return ret == WORD_WALK_ATEND ? OK : NOTOK;
+}
+
+int
+WordCursor::WalkInit()
+{
+ int ret = OK;
+
+ ClearResult();
+ ClearInternal();
+
+ WordReference wordRef;
+
+ if((ret = cursor.Open(words->db.db)) != 0)
+ return ret;
+
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
+
+ if(action & HTDIG_WORDLIST_COLLECTOR) {
+ collectRes = new List;
+ }
+
+ const WordReference& last = WordStat::Last();
+
+ WordKey first_key;
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ //
+ // Move past the stat data
+ //
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is empty\n");
+ first_key = last.Key();
+
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is not a prefix\n");
+ prefixKey.Clear();
+ //
+ // Move past the stat data
+ //
+ first_key = last.Key();
+ } else {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: go to %s \n", (char*)prefixKey.Get());
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Init
+ //
+ found.Key().CopyFrom(first_key);
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursor::WalkRewind()
+{
+ const WordReference& last = WordStat::Last();
+
+ WordKey first_key;
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ first_key = last.Key();
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ prefixKey.Clear();
+ //
+ // Move past the stat data
+ //
+ first_key = last.Key();
+ } else {
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Rewind
+ //
+ found.Key().CopyFrom(first_key);
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursor::WalkNext()
+{
+ int ret;
+ while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNext: got false match, retry\n");
+
+ return ret;
+}
+
+int
+WordCursor::WalkNextStep()
+{
+ status = OK;
+
+ {
+ int error;
+ if((error = cursor.Get(key, data, cursor_get_flags)) != 0) {
+ if(error == DB_NOTFOUND) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ } else {
+ return WORD_WALK_GET_FAILED;
+ }
+ }
+ }
+
+ //
+ // Next step operation is always sequential walk
+ //
+ cursor_get_flags = DB_NEXT;
+
+ found.Unpack(key, data);
+
+ if(traceRes) traceRes->Add(new WordReference(found));
+
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ //
+ // Don't bother to compare keys if we want to walk all the entries
+ //
+ if(!(searchKey.Empty())) {
+ // examples
+ // searchKey: aabc 1 ? ? ?
+ // prefixKey: aabc 1 ? ? ?
+
+ //
+ // Stop loop if we reach a record whose key does not
+ // match prefix key requirement, provided we have a valid
+ // prefix key.
+ // (ie. stop loop if we're past last possible match...)
+ //
+ if(!prefixKey.Empty() &&
+ !prefixKey.Equal(found.Key())) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ }
+
+ //
+ // Skip entries that do not exactly match the specified key.
+ //
+ if(!searchKeyIsSameAsPrefix &&
+ !searchKey.Equal(found.Key())) {
+ int ret;
+ switch((ret = SkipUselessSequentialWalking())) {
+ case OK:
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
+ return WORD_WALK_NOMATCH_FAILED;
+ break;
+ case WORD_WALK_ATEND:
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ break;
+ default:
+ fprintf(stderr, "WordCursor::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
+ return NOTOK;
+ break;
+ }
+ }
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ if(collectRes) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: collect\n");
+ collectRes->Add(new WordReference(found));
+ } else if(callback) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: calling callback\n");
+ int ret = (*callback)(words, cursor, &found, *(callback_data) );
+ //
+ // The callback function tells us that something went wrong, might
+ // as well stop walking.
+ //
+ if(ret != OK) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: callback returned NOTOK");
+ return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND);
+ }
+ }
+
+ return OK;
+}
+
+int
+WordCursor::WalkFinish()
+{
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkFinish\n");
+
+ return cursor.Close() == 0 ? OK : NOTOK;
+}
+
+// *****************************************************************************
+//
+// Helper for SkipUselessSequentialWalking.
+// Undefine in foundKey all fields defined in searchKey
+// so that they are not considered by SetToFollowing.
+// It could become a method of WordKey but lacks generalisation and
+// from what I see it is a rather specific operation.
+//
+static inline void complement(WordKey& key, const WordKey& mask)
+{
+ int nfields = WordKey::NFields();
+ int i;
+ //
+ // Undefine in 'key' all fields defined in 'mask'
+ //
+ for(i = 0; i < nfields; i++) {
+ if(mask.IsDefined(i))
+ key.Undefined(i);
+ else
+ key.SetDefined(i);
+ }
+ //
+ // If searching for a prefix, we must allow the word in
+ // key to increment.
+ //
+ if(mask.IsDefinedWordSuffix()) {
+ key.UndefinedWordSuffix();
+ } else {
+ key.SetDefinedWordSuffix();
+ key.SetDefined(0);
+ }
+}
+
+// *****************************************************************************
+//
+// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of
+// sequential iterating (DB_NEXT).
+// If it is decided that jump is a better move :
+// cursor_set_flags = DB_SET_RANGE
+// key = calculated next possible key
+// Else
+// do nothing
+// Return values
+// OK: skipping successfull.
+// WORD_WALK_ATEND : no more possible match, reached the maximum
+// WORD_WALK_FAILED: general failure, occurs if called and no skipping
+// necessary.
+//
+// Sequential searching can waste time by searching all keys, for example:
+// If searching for Key: argh <DEF> <UNDEF> 10
+// Under normal circonstances we would do the following
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_NEXT
+// 3: argh 2 15 nomatch DB_NEXT
+// 4: argh 2 20 nomatch DB_NEXT
+// 5: argh 2 30 nomatch DB_NEXT
+// 6: argh 5 1 nomatch DB_NEXT
+// 7: argh 5 8 nomatch DB_NEXT
+// 8: argh 8 6 nomatch DB_NEXT
+//
+// But the optimal would be
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10
+// 3: argh 2 15
+// 4: argh 2 20
+// 5: argh 2 30
+// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10
+// 7: argh 5 8
+// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10
+//
+// That saves a lot of unecessary hit. The underlying logic is a bit
+// more complex but you have the idea.
+//
+int
+WordCursor::SkipUselessSequentialWalking()
+{
+ WordKey& foundKey = found.Key();
+
+ int nfields = WordKey::NFields();
+ int i;
+
+ //
+ // Find out how the searchKey and the foundKey differ.
+ //
+ int diff_field = 0;
+ int lower = 0;
+ if(!foundKey.Diff(searchKey, diff_field, lower)) {
+ //
+ // foundKey matches searchKey (no difference), don't
+ // skip, everything is fine. The caller of SkipUselessSequentialWalking
+ // is expected to avoid this case for efficiency.
+ //
+ return WORD_WALK_FAILED;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Undefine in foundKey all fields defined in searchKey
+ // so that they are not considered by SetToFollowing.
+ //
+ complement(foundKey, searchKey);
+
+ //
+ // If the key found is lower than the searched key when
+ // considering only the fields defined in the search key,
+ // we only need to enforce the key to get the match.
+ // Otherwise we need to increment the found key to jump
+ // properly.
+ //
+ if(lower) {
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
+ for(i = diff_field + 1; i < nfields; i++)
+ if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
+ } else {
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: increment the key to jump forward\n");
+ //
+ // diff_field - 1 is not really necessary because diff_field is undefined
+ // in foundKey and would therefore be ignored by SetToFollowing. We write
+ // diff_field - 1 to clearly state that incrementing begins just before the
+ // field for which a difference was found.
+ //
+ int ret;
+ if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
+ return ret;
+ }
+
+ //
+ // Copy all fields defined in searchKey into foundKey. This will copy
+ // searchKey in foundKey because all these fields have been
+ // previously undefined in foundKey.
+ //
+ foundKey.Merge(searchKey);
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Instruct Next function to jump to the calculated key
+ //
+ if(foundKey.Pack(key) == NOTOK) {
+ return WORD_WALK_FAILED;
+ }
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+// Copy defined fields in patch into foundKey and
+// initialize internal state so that WalkNext jumps to
+// this key next time it's called.
+//
+// Technically this means : Override latest key found (found data member)
+// with patch fields values, starting from the first field set in
+// patch up to the last. Pack the result in the key field and set
+// cursor_get_flags to DB_SET_RANGE.
+//
+int
+WordCursor::Seek(const WordKey& patch)
+{
+ int nfields = WordKey::NFields();
+ WordKey pos = searchKey;
+
+ if(patch.Empty()) {
+ fprintf(stderr, "WordCursor::Seek: empty patch is useless\n");
+ return NOTOK;
+ }
+
+ int i;
+ //
+ // Leave the most significant fields untouched
+ //
+ for(i = WORD_FIRSTFIELD; i < nfields; i++)
+ if(patch.IsDefined(i))
+ break;
+ //
+ // From the first value set in the patch to the end
+ // override.
+ //
+ for(; i < nfields; i++) {
+ if(patch.IsDefined(i))
+ pos.Set(i, patch.Get(i));
+ else
+ pos.Set(i, 0);
+ }
+
+ if(!pos.Filled()) {
+ fprintf(stderr, "WordCursor::Seek: only make sense if the resulting key is fully defined\n");
+ return NOTOK;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::Seek: seek to %s\n", (char*)pos.Get());
+
+ //
+ // Next move will jump to the patched key
+ //
+ pos.Pack(key);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int WordCursor::Noccurrence(unsigned int& noccurrence) const
+{
+ if(!words) {
+ fprintf(stderr, "WordCursor::Noccurrence: words not set (call Prepare first)\n");
+ return NOTOK;
+ }
+ return words->Noccurrence(searchKey, noccurrence);
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int WordCursor::Get(String& bufferout) const
+{
+ String tmp;
+ bufferout.trunc();
+
+ searchKey.Get(tmp);
+ bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set");
+ found.Get(tmp);
+ bufferout << ", found = " << tmp << ", status = " << status;
+ prefixKey.Get(tmp);
+ bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
+
+ return OK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h
new file mode 100644
index 00000000..ba6e9732
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h
@@ -0,0 +1,445 @@
+//
+// WordList.h
+//
+// NAME
+//
+// search specification and results for WordList.
+//
+// SYNOPSIS
+//
+// #include <WordList.h>
+//
+// int callback(WordList *, WordDBCursor& , const WordReference *, Object &)
+// {
+// ...
+// }
+//
+// Object* data = ...
+//
+// WordList *words = ...;
+//
+// WordCursor *search = words->Cursor(callback, data);
+// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"));
+// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"), callback, data);
+//
+// ...
+//
+// if(search->Walk() == NOTOK) bark;
+// List* results = search->GetResults();
+//
+// if(search->WalkNext() == OK)
+// dosomething(search->GetFound());
+//
+// DESCRIPTION
+//
+// WordCursor is an iterator on an inverted index. It is created by
+// asking a <i>WordList</i> object with the <i>Cursor.</i> There is
+// no other way to create a WordCursor object.
+// When the <i>Walk*</i> methods return,
+// the WordCursor object contains the result of the search and
+// status information that indicates if it reached the end of
+// the list (IsAtEnd() method).
+//
+// The <b>callback</b> function that is called each time a match is
+// found takes the following arguments:
+// <pre>
+// WordList* words pointer to the inverted index handle.
+// WordDBCursor& cursor to call Del() and delete the current match
+// WordReference* wordRef is the match
+// Object& data is the user data provided by the caller when
+// search began.
+// </pre>
+//
+// The <i>WordKey</i> object that specifies the search criterion
+// may be used as follows (assuming word is followed by DOCID and
+// LOCATION):
+//
+// Ex1: <b>WordKey("word <DEF> <UNDEF> <UNDEF>")</b> find all occurrences
+// of <i>word</i>.
+//
+// Ex2: <b>WordKey("meet <UNDEF> <UNDEF> <UNDEF>")</b> find all occurrences
+// starting with <i>meet</i>, including <i>meeting</i> etc.
+//
+// Ex3: <b>WordKey("meet <DEF> <UNDEF> 1")</b> find all occurrences of
+// <i>meet</i> that occur at LOCATION 1 in any DOCID. This can
+// be inefficient since the search has to scan all occurrences
+// of <i>meet</i> to find the ones that occur at LOCATION 1.
+//
+// Ex4: <b>WordKey("meet <DEF> 2 <UNDEF>")</b> find all occurrences of
+// <i>meet</i> that occur in DOCID 2, at any location.
+//
+// Interface functions are virtual so that a derivation of the
+// class is possible. Some functions are meant to be used by derived
+// classes such as the <b>Initialize</b> function. All data members
+// should be accessed using the corresponding accessor if possible.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursor.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordCursor_h_
+#define _WordCursor_h_
+
+#ifndef SWIG
+#include "htString.h"
+#include "WordKey.h"
+#include "WordDB.h"
+
+class WordList;
+class WordDBCursor;
+#endif /* SWIG */
+//
+// Possible values of the action argument of WordList::Walk
+// check walk function in WordList.cc for info on these:
+//
+#define HTDIG_WORDLIST_COLLECTOR 0x0001
+#define HTDIG_WORDLIST_WALKER 0x0002
+
+#ifndef SWIG
+//
+// Type of the callback argument in WordCursor
+//
+typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &);
+#endif /* SWIG */
+
+//
+// Possible values of the status member
+//
+//
+// WalkNext reached the end of the matches
+//
+#define WORD_WALK_ATEND 0x0001
+//
+// Failed to acquire Berkeley DB cursor
+//
+#define WORD_WALK_CURSOR_FAILED 0x0002
+//
+// Berkeley DB Get operation failed
+//
+#define WORD_WALK_GET_FAILED 0x0004
+//
+// Callback function returned NOTOK
+//
+#define WORD_WALK_CALLBACK_FAILED 0x0008
+//
+// WalkNextStep hit an entry that does not match the
+// searched key.
+//
+#define WORD_WALK_NOMATCH_FAILED 0x0010
+//
+// WordCursor contains undefined data
+//
+#define WORD_WALK_FAILED 0xffffffff
+
+//
+// Possible return values of the IsA() method
+//
+#define WORD_CURSOR 1
+#define WORD_CURSORS 2
+
+//
+// Wordlist::Walk uses WordCursor for :
+// state information : cursor
+// search term description
+// debug/trace/benchmarking
+// search result format description
+//
+class WordCursor
+{
+ public:
+#ifndef SWIG
+ //
+ // Private constructor. Creator of the object must then call Initialize()
+ // prior to using any other methods.
+ //
+ WordCursor() { Clear(); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, WordKey(), callback, callback_data, HTDIG_WORDLIST_WALKER); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { Clear(); Initialize(words, searchKey, 0, 0, action); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); }
+#endif /* SWIG */
+ virtual ~WordCursor() {}
+ //-
+ // Clear all data in object, set <b>GetResult()</b> data to NULL but
+ // do not delete it (the application is responsible for that).
+ //
+ virtual void Clear();
+ virtual void ClearInternal();
+ virtual void ClearResult();
+
+ //-
+ // Returns the type of the object. May be overloaded by
+ // derived classes to differentiate them at runtime.
+ // Returns WORD_CURSOR.
+ //
+ virtual int IsA() const { return WORD_CURSOR; }
+
+ //-
+ // Returns true if WalkNext() step entries in strictly increasing
+ // order, false if it step entries in random order.
+ //
+ virtual int Ordered() const { return 1; }
+
+ //-
+ // Optimize the cursor before starting a Walk.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Optimize() { return OK; }
+
+ //-
+ // Save in <b>buffer</b> all the information necessary to resume
+ // the walk at the point it left. The ASCII representation of the
+ // last key found (GetFound()) is written in <b>buffer</b> using the
+ // WordKey::Get method.
+ //
+ virtual int ContextSave(String& buffer) const { found.Get(buffer); return OK; }
+ //-
+ // Restore from buffer all the information necessary to
+ // resume the walk at the point it left. The <b>buffer</b> is expected
+ // to contain an ASCII representation of a WordKey (see WordKey::Set
+ // method). A <b>Seek</b> is done on the key and the object is prepared
+ // to jump to the next occurrence when <b>WalkNext</b> is called (the
+ // cursor_get_flags is set to <i>DB_NEXT.</i>
+ //
+ virtual int ContextRestore(const String& buffer);
+
+#ifndef SWIG
+ //-
+ // Walk and collect data from the index.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Walk();
+#endif /* SWIG */
+ //-
+ // Must be called before other Walk methods are used.
+ // Fill internal state according to input parameters
+ // and move before the first matching entry.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkInit();
+ //-
+ // Move before the first index matching entry.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkRewind();
+ //-
+ // Move to the next matching entry.
+ // At end of list, WORD_WALK_ATEND is returned.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkNext();
+#ifndef SWIG
+ //-
+ // Advance the cursor one step. The entry pointed to by the cursor may
+ // or may not match the requirements. Returns OK if entry pointed
+ // by cursor matches requirements. Returns NOTOK on
+ // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry
+ // does not match requirements, it's safe to call WalkNextStep again
+ // until either OK or NOTOK is returned.
+ //
+ virtual int WalkNextStep();
+#endif /* SWIG */
+ //-
+ // Terminate Walk, free allocated resources.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkFinish();
+ //
+ // Find out if cursor should better jump to the next possible key
+ // (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it
+ // is decided that jump is a better move : cursor_set_flags =
+ // DB_SET_RANGE key = calculated next possible key Else do nothing
+ // Return OK if skipping successfull. Returns WORD_WALK_ATEND if no
+ // more possible match, reached the maximum. Returns
+ // WORD_WALK_FAILED on general failure, occurs if called and no
+ // skipping necessary.
+ //
+ int SkipUselessSequentialWalking();
+
+ //-
+ // Move before the inverted index position specified in <b>patch.</b>
+ // May only be called after a successfull call to the <i>WalkNext</i>
+ // or <i>WalkNextStep</i>method.
+ // Copy defined fields from <b>patch</b> into a copy of the
+ // <i>found</i> data member and
+ // initialize internal state so that <i>WalkNext</i> jumps to
+ // this key next time it's called (cursor_get_flag set to DB_SET_RANGE).
+ // Returns OK if successfull, NOTOK otherwise.
+ //
+ virtual int Seek(const WordKey& patch);
+
+ //-
+ // Returns true if cursor is positioned after the last possible
+ // match, false otherwise.
+ //
+ virtual int IsAtEnd() const { return status == WORD_WALK_ATEND; }
+
+ //
+ // Accessors for input parameters
+ //
+ //-
+ // Returns the search criterion.
+ //
+ WordKey& GetSearch() { return searchKey; }
+#ifndef SWIG
+ const WordKey& GetSearch() const { return searchKey; }
+#endif /* SWIG */
+ //-
+ // Returns the type of action when a matching entry
+ // is found.
+ //
+ int GetAction() const { return action; }
+ //
+ // Accessors for output parameters
+ //
+ //-
+ // Returns the list of WordReference found. The application
+ // is responsible for deallocation of the list.
+ //
+ List *GetResults() { return collectRes; }
+ //-
+ // For debugging purposes. Returns the list of WordReference hit
+ // during the search
+ // process. Some of them match the searched key, some don't.
+ // The application is responsible for deallocation of the list.
+ //
+ List *GetTraces() { return traceRes; }
+ //-
+ // For debugging purposes. Set the list of WordReference hit
+ // during the search process.
+ //
+ void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; }
+ //-
+ // Returns the last entry hit by the search. Only contains
+ // a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i>
+ // call was successfull (i.e. returned OK).
+ //
+ const WordReference& GetFound() { return found; }
+ //-
+ // Returns the number of occurrences of the searched word
+ // in the inverted index in the <b>noccurrence</b> parameter.
+ // Returns OK on success, NOTOK on failure.
+ //
+ virtual int Noccurrence(unsigned int& noccurrence) const;
+
+#ifndef SWIG
+ //-
+ // Convert the whole structure to an ASCII string description
+ // Returns OK if successfull, NOTOK otherwise.
+ //
+ virtual int Get(String& bufferout) const;
+ String Get() const { String tmp; Get(tmp); return tmp; }
+
+ protected:
+
+ //-
+ // Protected method. Derived classes should use this function to initialize
+ // the object if they do not call a WordCursor constructor in their own
+ // constructutor. Initialization may occur after the object is created
+ // and must occur before a <b>Walk*</b> method is called. See the
+ // DESCRIPTION section for the semantics of the arguments.
+ // Return OK on success, NOTOK on error.
+ //
+ int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction);
+
+ //
+ // Input parameters
+ //
+ //-
+ // Input data. The key to be searched, see DESCRIPTION for more information.
+ //
+ WordKey searchKey;
+ //
+ // Input data. What do do when a WordReference is found.
+ // Can either be
+ // HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes
+ // HTDIG_WORDLIST_WALKER callback is called for each WordReference found
+ //
+ int action;
+
+ //
+ // Input data. Callback function called for each match found.
+ //
+ wordlist_walk_callback_t callback;
+ //
+ // Input data. Argument given to callback, contains arbitrary
+ // caller defined data.
+ //
+ Object *callback_data;
+
+ //
+ // Output parameters
+ //
+ //
+ // Output data. List of WordReference found in the search.
+ //
+ List *collectRes;
+
+ //-
+ // Output data. Last match found. Use GetFound() to retrieve it.
+ //
+ WordReference found;
+ //-
+ // Output data. WORD_WALK_ATEND if cursor is past last match,
+ // OK otherwise. Use GetStatus() to retrieve it.
+ //
+ int status;
+
+ //
+ // Debugging section. Do not use unless you know exactly what you do.
+ //
+ //
+ // Collect everything found while searching (not necessarily matching)
+ //
+ List *traceRes;
+
+ //
+ // Internal state
+ //
+ //
+ // The actual Berkeley DB cursor.
+ //
+ WordDBCursor cursor;
+ //
+ // The latest retrieved key and data
+ //
+ String key;
+ String data;
+ //
+ // The shorted prefix key computed from searchKey
+ //
+ WordKey prefixKey;
+ //-
+ // WalkNext leap is either DB_NEXT or DB_SET_RANGE.
+ //
+ int cursor_get_flags;
+ //
+ // True if search key is a prefix key
+ //
+ int searchKeyIsSameAsPrefix;
+ //-
+ // The inverted index used by this cursor.
+ //
+ WordList *words;
+#endif /* SWIG */
+};
+
+#endif /* _WordCursor_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc
new file mode 100644
index 00000000..011cfc9e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc
@@ -0,0 +1,590 @@
+//
+// WordCursorOne.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursorOne.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "WordCursorOne.h"
+#include "WordListOne.h"
+#include "WordDead.h"
+
+#include <stdio.h>
+
+//
+// WordCursorOne implementation
+//
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER);
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, searchKey, 0, 0, action);
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER);
+}
+
+// *****************************************************************************
+//
+int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
+{
+ action = naction;
+ searchKey = nsearchKey;
+ callback = ncallback;
+ callback_data = ncallback_data;
+ words = nwords;
+ cursor = ((WordListOne*)nwords)->db->Cursor();
+ return OK;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::Clear()
+{
+ searchKey.Clear();
+ action = 0;
+ callback = 0;
+ callback_data = 0;
+ ClearResult();
+ ClearInternal();
+ words = 0;
+
+ //
+ // Debugging section.
+ //
+ traceRes = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::ClearInternal()
+{
+ key.trunc();
+ data.trunc();
+ prefixKey.Clear();
+ cursor_get_flags = DB_SET_RANGE;
+ searchKeyIsSameAsPrefix = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::ClearResult()
+{
+ collectRes = 0;
+ found.Clear();
+ status = OK;
+}
+
+int
+WordCursorOne::ContextRestore(const String& buffer)
+{
+ int ret = OK;
+ if(!buffer.empty()) {
+ WordKey key(words->GetContext(), buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ //
+ // Move to restored position so that next call to
+ // WalkNext will go above the restored position.
+ //
+ if((ret = WalkNext()) != OK)
+ return ret;
+ }
+ return ret;
+}
+
+// *****************************************************************************
+//
+// Walk and collect data from the word database.
+//
+// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
+// stored in a list and the list is returned.
+// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
+// is called for each WordReference found. No list is built and the
+// function returns a null pointer.
+//
+// The <searchKey> argument may be a fully qualified key, containing precise values for each
+// field of the key. It may also contain only some fields of the key. In both cases
+// all the word occurrences matching the fields set in the key are retrieved. It may
+// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
+// be *slow* if key is not a prefix because it forces a complete walk of the
+// index.
+//
+int
+WordCursorOne::Walk()
+{
+ int ret;
+ if((ret = WalkInit()) != OK) return ret;
+ while((ret = WalkNext()) == OK)
+ ;
+ int ret1;
+ if((ret1 = WalkFinish()) != OK) return ret1;
+
+ return ret == WORD_WALK_ATEND ? OK : NOTOK;
+}
+
+int
+WordCursorOne::WalkInit()
+{
+ ClearResult();
+ ClearInternal();
+
+ WordReference wordRef(words->GetContext());
+
+ {
+ int ret;
+ if((ret = cursor->Open()) != 0)
+ return ret;
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
+
+ if(action & HTDIG_WORDLIST_COLLECTOR) {
+ collectRes = new List;
+ }
+
+ WordKey first_key(words->GetContext());
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ //
+ // Move past the stat data
+ //
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n");
+
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n");
+ prefixKey.Clear();
+ } else {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get());
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Init
+ //
+ found.Key() = first_key;
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkRewind()
+{
+ WordKey first_key(words->GetContext());
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ first_key.Clear();
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ prefixKey.Clear();
+ first_key.Clear();
+ } else {
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Rewind
+ //
+ found.Key() = first_key;
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkNext()
+{
+ int ret;
+ while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n");
+
+ return ret;
+}
+
+int
+WordCursorOne::WalkNextStep()
+{
+ status = OK;
+
+ {
+ int error;
+ if((error = cursor->Get(key, data, cursor_get_flags)) != 0) {
+ if(error == DB_NOTFOUND) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ } else {
+ return WORD_WALK_GET_FAILED;
+ }
+ }
+ }
+
+ //
+ // Next step operation is always sequential walk
+ //
+ cursor_get_flags = DB_NEXT;
+
+ found.Unpack(key, data);
+
+ if(words->Dead()->Exists(found.Key()))
+ return WORD_WALK_NOMATCH_FAILED;
+
+ if(traceRes) traceRes->Add(new WordReference(found));
+
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ //
+ // Don't bother to compare keys if we want to walk all the entries
+ //
+ if(!(searchKey.Empty())) {
+ // examples
+ // searchKey: aabc 1 ? ? ?
+ // prefixKey: aabc 1 ? ? ?
+
+ //
+ // Stop loop if we reach a record whose key does not
+ // match prefix key requirement, provided we have a valid
+ // prefix key.
+ // (ie. stop loop if we're past last possible match...)
+ //
+ if(!prefixKey.Empty() &&
+ !prefixKey.Equal(found.Key())) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ }
+
+ //
+ // Skip entries that do not exactly match the specified key.
+ //
+ if(!searchKeyIsSameAsPrefix &&
+ !searchKey.Equal(found.Key())) {
+ int ret;
+ switch((ret = SkipUselessSequentialWalking())) {
+ case OK:
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
+ return WORD_WALK_NOMATCH_FAILED;
+ break;
+ case WORD_WALK_ATEND:
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ break;
+ default:
+ fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
+ return NOTOK;
+ break;
+ }
+ }
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ if(collectRes) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n");
+ collectRes->Add(new WordReference(found));
+ } else if(callback) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n");
+ int ret = (*callback)(words, *cursor, &found, *(callback_data) );
+ //
+ // The callback function tells us that something went wrong, might
+ // as well stop walking.
+ //
+ if(ret != OK) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK");
+ return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND);
+ }
+ }
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkFinish()
+{
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n");
+
+ return cursor->Close() == 0 ? OK : NOTOK;
+}
+
+// *****************************************************************************
+//
+// Helper for SkipUselessSequentialWalking.
+// Undefine in foundKey all fields defined in searchKey
+// so that they are not considered by SetToFollowing.
+// It could become a method of WordKey but lacks generalisation and
+// from what I see it is a rather specific operation.
+//
+static inline void complement(WordContext* context, WordKey& key, const WordKey& mask)
+{
+ int nfields = context->GetKeyInfo().nfields;
+ int i;
+ //
+ // Undefine in 'key' all fields defined in 'mask'
+ //
+ for(i = 0; i < nfields; i++) {
+ if(mask.IsDefined(i))
+ key.Undefined(i);
+ else
+ key.SetDefined(i);
+ }
+}
+
+// *****************************************************************************
+//
+// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of
+// sequential iterating (DB_NEXT).
+// If it is decided that jump is a better move :
+// cursor_set_flags = DB_SET_RANGE
+// key = calculated next possible key
+// Else
+// do nothing
+// Return values
+// OK: skipping successfull.
+// WORD_WALK_ATEND : no more possible match, reached the maximum
+// WORD_WALK_FAILED: general failure, occurs if called and no skipping
+// necessary.
+//
+// Sequential searching can waste time by searching all keys, for example:
+// If searching for Key: argh <DEF> <UNDEF> 10
+// Under normal circonstances we would do the following
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_NEXT
+// 3: argh 2 15 nomatch DB_NEXT
+// 4: argh 2 20 nomatch DB_NEXT
+// 5: argh 2 30 nomatch DB_NEXT
+// 6: argh 5 1 nomatch DB_NEXT
+// 7: argh 5 8 nomatch DB_NEXT
+// 8: argh 8 6 nomatch DB_NEXT
+//
+// But the optimal would be
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10
+// 3: argh 2 15
+// 4: argh 2 20
+// 5: argh 2 30
+// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10
+// 7: argh 5 8
+// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10
+//
+// That saves a lot of unecessary hit. The underlying logic is a bit
+// more complex but you have the idea.
+//
+int
+WordCursorOne::SkipUselessSequentialWalking()
+{
+ WordKey& foundKey = found.Key();
+
+ int nfields = words->GetContext()->GetKeyInfo().nfields;
+ int i;
+
+ //
+ // Find out how the searchKey and the foundKey differ.
+ //
+ int diff_field = 0;
+ int lower = 0;
+ if(!foundKey.Diff(searchKey, diff_field, lower)) {
+ //
+ // foundKey matches searchKey (no difference), don't
+ // skip, everything is fine. The caller of SkipUselessSequentialWalking
+ // is expected to avoid this case for efficiency.
+ //
+ return WORD_WALK_FAILED;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Undefine in foundKey all fields defined in searchKey
+ // so that they are not considered by SetToFollowing.
+ //
+ complement(words->GetContext(), foundKey, searchKey);
+
+ //
+ // If the key found is lower than the searched key when
+ // considering only the fields defined in the search key,
+ // we only need to enforce the key to get the match.
+ // Otherwise we need to increment the found key to jump
+ // properly.
+ //
+ if(lower) {
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
+ for(i = diff_field + 1; i < nfields; i++)
+ if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
+ } else {
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n");
+ //
+ // diff_field - 1 is not really necessary because diff_field is undefined
+ // in foundKey and would therefore be ignored by SetToFollowing. We write
+ // diff_field - 1 to clearly state that incrementing begins just before the
+ // field for which a difference was found.
+ //
+ int ret;
+ if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
+ return ret;
+ }
+
+ //
+ // Copy all fields defined in searchKey into foundKey. This will copy
+ // searchKey in foundKey because all these fields have been
+ // previously undefined in foundKey.
+ //
+ foundKey.Merge(searchKey);
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Instruct Next function to jump to the calculated key
+ //
+ if(foundKey.Pack(key) == NOTOK) {
+ return WORD_WALK_FAILED;
+ }
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+// Copy defined fields in patch into foundKey and
+// initialize internal state so that WalkNext jumps to
+// this key next time it's called.
+//
+// Technically this means : Override latest key found (found data member)
+// with patch fields values, starting from the first field set in
+// patch up to the last. Pack the result in the key field and set
+// cursor_get_flags to DB_SET_RANGE.
+//
+int
+WordCursorOne::Seek(const WordKey& patch)
+{
+ int nfields = words->GetContext()->GetKeyInfo().nfields;
+ WordKey pos = searchKey;
+
+ if(patch.Empty()) {
+ fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n");
+ return NOTOK;
+ }
+
+ int i;
+ //
+ // Leave the most significant fields untouched
+ //
+ for(i = WORD_KEY_WORD + 1; i < nfields; i++)
+ if(patch.IsDefined(i))
+ break;
+ //
+ // From the first value set in the patch to the end
+ // override.
+ //
+ for(; i < nfields; i++) {
+ if(patch.IsDefined(i))
+ pos.Set(i, patch.Get(i));
+ else
+ pos.Set(i, 0);
+ }
+
+ if(!pos.Filled()) {
+ fprintf(stderr, "WordCursorOne::Seek: only make sense if the resulting key is fully defined\n");
+ return NOTOK;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get());
+
+ //
+ // Next move will jump to the patched key
+ //
+ pos.Pack(key);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int WordCursorOne::Get(String& bufferout) const
+{
+ String tmp;
+ bufferout.trunc();
+
+ searchKey.Get(tmp);
+ bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set");
+ found.Get(tmp);
+ bufferout << ", found = " << tmp << ", status = " << status;
+ prefixKey.Get(tmp);
+ bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
+
+ return OK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.h b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.h
new file mode 100644
index 00000000..133ef59c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.h
@@ -0,0 +1,163 @@
+//
+// WordCursorOne.h
+//
+// NAME
+//
+// search and retrieve entries in a WordListOne object.
+//
+// SYNOPSIS
+//
+// #include <WordList.h>
+//
+// int callback(WordList *, WordDBCursor& , const WordReference *, Object &)
+// {
+// ...
+// }
+//
+// Object* data = ...
+//
+// WordList *words = ...;
+//
+// WordCursor *search = words->Cursor(callback, data);
+// WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"));
+// WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"), callback, data);
+// WordCursor *search = words->Cursor(WordKey());
+//
+// ...
+//
+// if(search->Walk() == NOTOK) bark;
+// List* results = search->GetResults();
+//
+// search->WalkInit();
+// if(search->WalkNext() == OK)
+// dosomething(search->GetFound());
+// search->WalkFinish();
+//
+// DESCRIPTION
+//
+// WordCursorOne is a WordCursor derived class that implements search
+// in a WordListOne object. It currently is the only derived class of
+// the WordCursor object. Most of its behaviour is described in the
+// WordCursor manual page, only the behaviour specific to WordCursorOne
+// is documented here.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursorOne.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordCursorOne_h_
+#define _WordCursorOne_h_
+
+#ifndef SWIG
+#include "htString.h"
+#include "WordKey.h"
+#include "WordDB.h"
+#include "WordCursor.h"
+
+class WordList;
+class WordDBCursor;
+#endif /* SWIG */
+
+class WordCursorOne : public WordCursor
+{
+ public:
+#ifndef SWIG
+ //-
+ // Private constructor. Creator of the object must then call Initialize()
+ // prior to using any other methods.
+ //
+ WordCursorOne(WordList *words);
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data);
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursorOne(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER);
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data);
+#endif /* SWIG */
+ virtual ~WordCursorOne() {
+ if(cursor) delete cursor;
+ }
+ virtual void Clear();
+ virtual void ClearInternal();
+ virtual void ClearResult();
+
+ virtual inline int ContextSave(String& buffer) const { found.Get(buffer); return OK; }
+ virtual int ContextRestore(const String& buffer);
+
+#ifndef SWIG
+ virtual int Walk();
+#endif /* SWIG */
+ virtual int WalkInit();
+ virtual int WalkRewind();
+ virtual int WalkNext();
+#ifndef SWIG
+ virtual int WalkNextStep();
+#endif /* SWIG */
+ virtual int WalkFinish();
+ //
+ // Find out if cursor should better jump to the next possible key
+ // (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it
+ // is decided that jump is a better move : cursor_set_flags =
+ // DB_SET_RANGE key = calculated next possible key Else do nothing
+ // Return OK if skipping successfull. Returns WORD_WALK_ATEND if no
+ // more possible match, reached the maximum. Returns
+ // WORD_WALK_FAILED on general failure, occurs if called and no
+ // skipping necessary.
+ //
+ int SkipUselessSequentialWalking();
+
+ virtual int Seek(const WordKey& patch);
+
+#ifndef SWIG
+ virtual int Get(String& bufferout) const;
+ inline String Get() const { String tmp; Get(tmp); return tmp; }
+
+ protected:
+
+ int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction);
+
+ //
+ // Internal state
+ //
+ //
+ // The actual Berkeley DB cursor.
+ //
+ WordDBCursor* cursor;
+ //
+ // The latest retrieved key and data
+ //
+ String key;
+ String data;
+ //
+ // The shorted prefix key computed from searchKey
+ //
+ WordKey prefixKey;
+ //
+ // WalkNext leap is either DB_NEXT or DB_SET_RANGE.
+ //
+ int cursor_get_flags;
+ //
+ // True if search key is a prefix key
+ //
+ int searchKeyIsSameAsPrefix;
+#endif /* SWIG */
+};
+
+#endif /* _WordCursorOne_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDB.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDB.cc
new file mode 100644
index 00000000..5718afa5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDB.cc
@@ -0,0 +1,71 @@
+//
+// WordDB.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDB.cc,v 1.10 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "defaults.h"
+#include "WordDB.h"
+
+#include "../db/db.h"
+
+const char* dberror(int errval) {
+#define DB_MAX_ERROR (-DB_TXN_CKP + 1)
+ static const char* dbstr[DB_MAX_ERROR] = {
+ "",
+ "DB_INCOMPLETE",
+ "DB_KEYEMPTY",
+ "DB_KEYEXISTS",
+ "DB_LOCK_DEADLOCK",
+ "DB_LOCK_NOTGRANTED",
+ "DB_LOCK_NOTHELD",
+ "DB_NOTFOUND",
+ "DB_RUNRECOVERY",
+ "DB_DELETED",
+ "DB_NEEDSPLIT",
+ "DB_SWAPBYTES",
+ "DB_TXN_CKP",
+ };
+ if(errval < 0 && -errval < DB_MAX_ERROR)
+ return dbstr[-errval];
+ else
+ return strerror(errval);
+}
+
+int WordDB::Open(const String& filename, DBTYPE type, int flags, int mode) {
+ if(is_open) {
+ int error = 0;
+ if((error = Close()) != 0)
+ return error;
+ }
+
+ if(!dbenv) {
+ const char* progname = "WordDB";
+
+ //
+ // Environment initialization
+ //
+ // Output errors to the application's log.
+ //
+ db->set_errfile(db, stderr);
+ db->set_errpfx(db, progname);
+
+ }
+
+ int error = db->open(db, filename, NULL, type, (u_int32_t)flags, mode);
+
+ if(error == 0)
+ is_open = 1;
+
+ return error;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDB.h b/debian/htdig/htdig-3.2.0b6/htword/WordDB.h
new file mode 100644
index 00000000..e48ffc4d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDB.h
@@ -0,0 +1,295 @@
+//
+// WordDB.h
+//
+// WordDB: Interface to Berkeley DB
+// uses String and WordReference instead of Dbt, add some convenience
+// methods and implements string translation of Berkeley DB error codes.
+// It does not include the 'join' feature.
+// Beside this, the interface it identical to the Db class.
+// The next evolution for this set of class is to have a single object per
+// application so that they all share the same environment (transactions,
+// shared pool, database directory). This implies a static common object
+// that is refered by each actual instance of WordDB. The static object
+// holds the DbEnv and DbInfo, the instances of WordDB only have an open
+// descriptor using the same DbEnv and DbInfo.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDB.h,v 1.7 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordDB_h_
+#define _WordDB_h_
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "db.h"
+#include "WordReference.h"
+#include "WordDBInfo.h"
+#include "htString.h"
+
+#define WORD_DBT_DCL(v) \
+ DBT v; \
+ memset((char*)&(v), '\0', sizeof(DBT))
+
+#define WORD_DBT_SET(v,d,s) \
+ v.data = (d); \
+ v.size = (s)
+
+#define WORD_DBT_INIT(v,d,s) \
+ WORD_DBT_DCL(v); \
+ WORD_DBT_SET(v,d,s)
+
+//
+// Encapsulate the Berkeley DB DB type
+//
+// Implements the same methods with String instead of Dbt.
+//
+// Add convenience methods taking WordReference instead of String
+//
+// The error model is *not* to use exceptions.
+//
+// To get a cursor use the Open method of WordDBCursor. I find this
+// more convinient than getting a cursor from WordDB.
+//
+// The WordDB has DbInfo and DbEnv members that can be set before
+// calling Open to configure it.
+//
+class WordDB {
+ public:
+ inline WordDB() { Alloc(); }
+ inline ~WordDB() { Dealloc(); }
+
+ inline int Alloc() {
+ db = 0;
+ is_open = 0;
+ dbenv = WordDBInfo::Instance()->dbenv;
+ return CDB_db_create(&db, dbenv, 0);
+ }
+
+ inline int Dealloc() {
+ int error = 0;
+ is_open = 0;
+ if(db)
+ error = db->close(db, 0);
+ else
+ fprintf(stderr, "WordDB::Dealloc: null db\n");
+ dbenv = 0;
+ db = 0;
+ return error;
+ }
+
+ int Open(const String& filename, DBTYPE type, int flags, int mode);
+
+ inline int Close() {
+ int error;
+ if((error = Dealloc()) != 0)
+ return error;
+ return Alloc();
+ }
+
+ inline int Fd(int *fdp) {
+ if(!is_open) return DB_UNKNOWN;
+ return db->fd(db, fdp);
+ }
+
+ inline int Stat(void *sp, void *(*db_malloc)(size_t), int flags) {
+ if(!is_open) return DB_UNKNOWN;
+ return db->stat(db, sp, db_malloc, (u_int32_t) flags);
+ }
+
+ inline int Sync(int flags) {
+ if(!is_open) return DB_UNKNOWN;
+ return db->sync(db, (u_int32_t) flags);
+ }
+
+ inline int get_byteswapped() const {
+ if(!is_open) return DB_UNKNOWN;
+ return db->get_byteswapped(db);
+ }
+
+ inline DBTYPE get_type() const {
+ if(!is_open) return DB_UNKNOWN;
+ return db->get_type(db);
+ }
+
+ //
+ // String arguments
+ //
+ inline int Put(DB_TXN *txn, const String& key, const String& data, int flags) {
+ WORD_DBT_INIT(rkey, (void*)key.get(), key.length());
+ WORD_DBT_INIT(rdata, (void*)data.get(), data.length());
+
+ return db->put(db, txn, &rkey, &rdata, flags);
+ }
+
+ inline int Get(DB_TXN *txn, String& key, String& data, int flags) const {
+ WORD_DBT_INIT(rkey, (void*)key.get(), (u_int32_t)key.length());
+ WORD_DBT_INIT(rdata, (void*)data.get(), (u_int32_t)data.length());
+
+ int error;
+ if((error = db->get(db, txn, &rkey, &rdata, 0)) != 0) {
+ if(error != DB_NOTFOUND)
+ fprintf(stderr, "WordDB::Get(%s,%s) using %d failed %s\n", (char*)key, (char*)data, flags, CDB_db_strerror(error));
+ } else {
+ //
+ // Only set arguments if found something.
+ //
+ key.set((const char*)rkey.data, (int)rkey.size);
+ data.set((const char*)rdata.data, (int)rdata.size);
+ }
+
+ return error;
+ }
+
+ inline int Del(DB_TXN *txn, const String& key) {
+ WORD_DBT_INIT(rkey, (void*)key.get(), (u_int32_t)key.length());
+
+ return db->del(db, txn, &rkey, 0);
+ }
+
+ //
+ // WordReference argument
+ //
+ inline int Put(const WordReference& wordRef, int flags) {
+ if(!is_open) return DB_UNKNOWN;
+
+ int ret;
+ String key;
+ String record;
+
+ if((ret = wordRef.Pack(key, record)) != OK) return DB_RUNRECOVERY;
+
+ return Put(0, key, record, flags);
+ }
+
+ inline int Del(const WordReference& wordRef) {
+ String key;
+
+ wordRef.Key().Pack(key);
+
+ return Del(0, key);
+ }
+
+ //
+ // Search entry matching wkey exactly, return key and data
+ // in wordRef.
+ //
+ inline int Get(WordReference& wordRef) const {
+ if(!is_open) return DB_UNKNOWN;
+
+ String data;
+ String key;
+
+ if(wordRef.Key().Pack(key) != OK) return DB_RUNRECOVERY;
+
+ int ret;
+ if((ret = Get(0, key, data, 0)) != 0)
+ return ret;
+
+ return wordRef.Unpack(key, data) == OK ? 0 : DB_RUNRECOVERY;
+ }
+
+ //
+ // Returns 0 of the key of wordRef matches an entry in the database.
+ // Could be implemented with Get but is not because we don't
+ // need to build a wordRef with the entry found in the base.
+ //
+ inline int Exists(const WordReference& wordRef) const {
+ if(!is_open) return DB_UNKNOWN;
+
+ String key;
+ String data;
+
+ if(wordRef.Key().Pack(key) != OK) return DB_RUNRECOVERY;
+
+ return Get(0, key, data, 0);
+ }
+
+ //
+ // Accessors
+ //
+ inline int set_bt_compare(int (*compare)(const DBT *, const DBT *)) {
+ return db->set_bt_compare(db, compare);
+ }
+
+ inline int set_pagesize(u_int32_t pagesize) {
+ return db->set_pagesize(db, pagesize);
+ }
+
+ //
+ // Accessors for description of the compression scheme
+ //
+ inline DB_CMPR_INFO* CmprInfo() { return dbenv->mp_cmpr_info; }
+ inline void CmprInfo(DB_CMPR_INFO* info) { dbenv->mp_cmpr_info = info; }
+
+ int is_open;
+ DB* db;
+ DB_ENV* dbenv;
+};
+
+//
+// Interface to DBC that uses String instead of DBT
+//
+class WordDBCursor {
+ public:
+ inline WordDBCursor() { cursor = 0; }
+ inline ~WordDBCursor() {
+ Close();
+ }
+
+ inline int Open(DB* db) {
+ Close();
+ return db->cursor(db, 0, &cursor, 0);
+ }
+
+ inline int Close() {
+ if(cursor) cursor->c_close(cursor);
+ cursor = 0;
+ return 0;
+ }
+
+ //
+ // String arguments
+ //
+ inline int Get(String& key, String& data, int flags) {
+ WORD_DBT_DCL(rkey);
+ WORD_DBT_DCL(rdata);
+ switch(flags & DB_OPFLAGS_MASK) {
+ case DB_SET_RANGE:
+ case DB_SET:
+ case DB_GET_BOTH:
+ WORD_DBT_SET(rkey, (void*)key.get(), key.length());
+ break;
+ }
+ int error;
+ if((error = cursor->c_get(cursor, &rkey, &rdata, (u_int32_t)flags)) != 0) {
+ if(error != DB_NOTFOUND)
+ fprintf(stderr, "WordDBCursor::Get(%d) failed %s\n", flags, CDB_db_strerror(error));
+ } else {
+ key.set((const char*)rkey.data, (int)rkey.size);
+ data.set((const char*)rdata.data, (int)rdata.size);
+ }
+ return error;
+ }
+
+ inline int Put(const String& key, const String& data, int flags) {
+ WORD_DBT_INIT(rkey, (void*)key.get(), (size_t)key.length());
+ WORD_DBT_INIT(rdata, (void*)data.get(), (size_t)data.length());
+ return cursor->c_put(cursor, &rkey, &rdata, (u_int32_t)flags);
+ }
+
+ inline int Del() {
+ return cursor->c_del(cursor, (u_int32_t)0);
+ }
+
+private:
+ DBC* cursor;
+};
+
+#endif /* _WordDB_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc
new file mode 100644
index 00000000..2f7a988a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc
@@ -0,0 +1,411 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "WordKey.h"
+#include "WordDB.h"
+#include "WordDBCache.h"
+#include "WordMeta.h"
+#include "ber.h"
+
+int WordDBCaches::Add(char* key, int key_size, char* data, int data_size)
+{
+ int ret;
+ if((ret = cache.Allocate(key_size + data_size)) == ENOMEM) {
+ if((ret = CacheFlush()) != 0) return ret;
+ if((ret = cache.Allocate(key_size + data_size))) return ret;
+ }
+
+ return cache.Add(key, key_size, data, data_size);
+}
+
+int WordDBCaches::AddFile(String& filename)
+{
+ char tmp[32];
+ unsigned int serial;
+ words->Meta()->Serial(WORD_META_SERIAL_FILE, serial);
+ if(serial == WORD_META_SERIAL_INVALID)
+ return NOTOK;
+ filename = words->Filename();
+ sprintf(tmp, "C%08d", serial - 1);
+ filename << tmp;
+
+ String dummy;
+ if(files->Put(0, filename, dummy, 0) != 0)
+ return NOTOK;
+
+ size = (cache.GetMax() / 1024) * serial;
+
+ return OK;
+}
+
+int WordDBCaches::CacheFlush()
+{
+ if(cache.Empty()) return OK;
+
+ if(cache.Sort() != OK) return NOTOK;
+ String filename;
+ int locking = 0;
+ if(!lock) {
+ words->Meta()->Lock("cache", lock);
+ locking = 1;
+ }
+ if(AddFile(filename) != OK) return NOTOK;
+ if(CacheWrite(filename) != OK) return NOTOK;
+
+ unsigned int serial;
+ words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial);
+ if(serial >= (unsigned int)file_max || Full())
+ if(Merge() != OK) return NOTOK;
+ if(locking) words->Meta()->Unlock("cache", lock);
+
+ return OK;
+}
+
+static int merge_cmp_size(WordDBCaches* , WordDBCacheFile* a, WordDBCacheFile* b)
+{
+ return b->size - a->size;
+}
+
+int WordDBCaches::Merge()
+{
+ if(CacheFlush() != OK) return NOTOK;
+
+ int locking = 0;
+ if(!lock) {
+ words->Meta()->Lock("cache", lock);
+ locking = 1;
+ }
+ unsigned int serial;
+ words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial);
+ if(serial <= 1) return OK;
+
+ //
+ // heap lists all the files in decreasing size order (biggest first)
+ //
+ WordDBCacheFile* heap = new WordDBCacheFile[serial];
+ {
+ String filename;
+ String dummy;
+ WordDBCursor* cursor = files->Cursor();
+ struct stat stat_buf;
+ int i;
+ int ret;
+ for(i = 0; (ret = cursor->Get(filename, dummy, DB_NEXT)) == 0; i++) {
+ WordDBCacheFile& file = heap[i];
+ file.filename = filename;
+ if(stat((char*)file.filename, &stat_buf) == 0) {
+ file.size = stat_buf.st_size;
+ } else {
+ const String message = String("WordDBCaches::Merge: cannot stat ") + file.filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+ cursor->Del();
+ }
+ delete cursor;
+ myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this);
+ }
+
+ String tmpname = words->Filename() + String("C.tmp");
+
+ while(serial > 1) {
+ WordDBCacheFile* a = &heap[serial - 1];
+ WordDBCacheFile* b = &heap[serial - 2];
+
+ if(Merge(a->filename, b->filename, tmpname) != OK) return NOTOK;
+
+ //
+ // Remove file a
+ //
+ if(unlink((char*)a->filename) != 0) {
+ const String message = String("WordDBCaches::Merge: unlink ") + a->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+
+ //
+ // Remove file b
+ //
+ if(unlink((char*)b->filename) != 0) {
+ const String message = String("WordDBCaches::Merge: unlink ") + b->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+
+ //
+ // Rename tmp file into file b
+ //
+ if(rename((char*)tmpname, (char*)b->filename) != 0) {
+ const String message = String("WordDBCaches::Merge: rename ") + tmpname + String(" ") + b->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+
+ //
+ // Update b file size. The size need not be accurate number as long
+ // as it reflects the relative size of each file.
+ //
+ b->size += a->size;
+
+ serial--;
+ //
+ // update heap
+ //
+ myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this);
+ }
+
+ {
+ String newname(words->Filename());
+ newname << "C00000000";
+
+ if(rename((char*)heap[0].filename, (char*)newname) != 0) {
+ const String message = String("WordDBCaches::Merge: rename ") + heap[0].filename + String(" ") + newname;
+ perror((const char*)message);
+ return NOTOK;
+ }
+
+ String dummy;
+ if(files->Put(0, newname, dummy, 0) != 0)
+ return NOTOK;
+ words->Meta()->SetSerial(WORD_META_SERIAL_FILE, serial);
+ }
+ if(locking) words->Meta()->Unlock("cache", lock);
+
+ return OK;
+}
+
+int WordDBCaches::Merge(const String& filea, const String& fileb, const String& tmpname)
+{
+ FILE* ftmp = fopen((const char*)tmpname, "w");
+ FILE* fa = fopen((const char*)filea, "r");
+ FILE* fb = fopen((const char*)fileb, "r");
+
+ unsigned int buffertmp_size = 128;
+ unsigned char* buffertmp = (unsigned char*)malloc(buffertmp_size);
+ unsigned int buffera_size = 128;
+ unsigned char* buffera = (unsigned char*)malloc(buffera_size);
+ unsigned int bufferb_size = 128;
+ unsigned char* bufferb = (unsigned char*)malloc(bufferb_size);
+
+ unsigned int entriesa_length;
+ if(ber_file2value(fa, entriesa_length) < 1) return NOTOK;
+ unsigned int entriesb_length;
+ if(ber_file2value(fb, entriesb_length) < 1) return NOTOK;
+
+ if(ber_value2file(ftmp, entriesa_length + entriesb_length) < 1) return NOTOK;
+
+ WordDBCacheEntry entrya;
+ WordDBCacheEntry entryb;
+
+ if(entriesa_length > 0 && entriesb_length > 0) {
+
+ if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK;
+ if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK;
+
+ while(entriesa_length > 0 && entriesb_length > 0) {
+ if(WordKey::Compare(words->GetContext(), (const unsigned char*)entrya.key, entrya.key_size, (const unsigned char*)entryb.key, entryb.key_size) < 0) {
+ if(WriteEntry(ftmp, entrya, buffertmp, buffertmp_size) != OK) return NOTOK;
+ if(--entriesa_length > 0)
+ if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK;
+ } else {
+ if(WriteEntry(ftmp, entryb, buffertmp, buffertmp_size) != OK) return NOTOK;
+ if(--entriesb_length > 0)
+ if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK;
+ }
+ }
+ }
+
+ if(entriesa_length > 0 || entriesb_length > 0) {
+ FILE* fp = entriesa_length > 0 ? fa : fb;
+ unsigned int& entries_length = entriesa_length > 0 ? entriesa_length : entriesb_length;
+ WordDBCacheEntry& entry = entriesa_length > 0 ? entrya : entryb;
+ while(entries_length > 0) {
+ if(WriteEntry(ftmp, entry, buffertmp, buffertmp_size) != OK) return NOTOK;
+ if(--entries_length > 0)
+ if(ReadEntry(fp, entry, buffera, buffera_size) != OK) return NOTOK;
+ }
+ }
+
+ free(buffera);
+ free(bufferb);
+ free(buffertmp);
+
+ fclose(fa);
+ fclose(fb);
+ fclose(ftmp);
+
+ return OK;
+}
+
+int WordDBCaches::Merge(WordDB& db)
+{
+ int locking = 0;
+ if(!lock) {
+ words->Meta()->Lock("cache", lock);
+ locking = 1;
+ }
+ if(Merge() != OK) return NOTOK;
+
+ String filename;
+ String dummy;
+ WordDBCursor* cursor = files->Cursor();
+ if(cursor->Get(filename, dummy, DB_FIRST) != 0) {
+ delete cursor;
+ return NOTOK;
+ }
+ cursor->Del();
+ delete cursor;
+
+ FILE* fp = fopen((char*)filename, "r");
+
+ unsigned int buffer_size = 128;
+ unsigned char* buffer = (unsigned char*)malloc(buffer_size);
+
+ unsigned int entries_length;
+ if(ber_file2value(fp, entries_length) < 1) return NOTOK;
+
+ WordDBCacheEntry entry;
+
+ unsigned int i;
+ for(i = 0; i < entries_length; i++) {
+ if(ReadEntry(fp, entry, buffer, buffer_size) != OK) return NOTOK;
+ void* user_data = words->GetContext();
+ WORD_DBT_INIT(rkey, (void*)entry.key, entry.key_size);
+ WORD_DBT_INIT(rdata, (void*)entry.data, entry.data_size);
+ db.db->put(db.db, 0, &rkey, &rdata, 0);
+ }
+
+ if(unlink((char*)filename) != 0) {
+ const String message = String("WordDBCaches::Merge: unlink ") + filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+
+ words->Meta()->SetSerial(WORD_META_SERIAL_FILE, 0);
+ if(locking) words->Meta()->Unlock("cache", lock);
+ size = 0;
+ free(buffer);
+ fclose(fp);
+
+ return OK;
+}
+
+int WordDBCaches::CacheWrite(const String& filename)
+{
+ FILE* fp = fopen(filename, "w");
+ if(!fp) {
+ String message;
+ message << "WordDBCaches::CacheWrite()" << filename << "): ";
+ perror((char*)message);
+ return NOTOK;
+ }
+
+ int entries_length;
+ WordDBCacheEntry* entries;
+ int ret;
+ if((ret = cache.Entries(entries, entries_length)) != 0)
+ return ret;
+
+ if(ber_value2file(fp, entries_length) < 1) return NOTOK;
+
+ unsigned int buffer_size = 1024;
+ unsigned char* buffer = (unsigned char*)malloc(buffer_size);
+ int i;
+ for(i = 0; i < entries_length; i++) {
+ if(WriteEntry(fp, entries[i], buffer, buffer_size) != OK) return NOTOK;
+ }
+ free(buffer);
+ fclose(fp);
+
+ cache.Flush();
+
+ return OK;
+}
+
+int WordDBCaches::WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size)
+{
+ if(entry.key_size + entry.data_size + 64 > buffer_size) {
+ buffer_size = entry.key_size + entry.data_size + 64;
+ buffer = (unsigned char*)realloc(buffer, buffer_size);
+ }
+
+ int p_size = buffer_size;
+ unsigned char* p = buffer;
+
+ int ber_len;
+ if((ber_len = ber_value2buf(p, p_size, entry.key_size)) < 1) {
+ fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for key %d\n", entry.key_size);
+ return NOTOK;
+ }
+ p += ber_len;
+ memcpy(p, entry.key, entry.key_size);
+ p += entry.key_size;
+
+ p_size -= ber_len + entry.key_size;
+
+ if((ber_len = ber_value2buf(p, p_size, entry.data_size)) < 1) {
+ fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for data %d\n", entry.data_size);
+ return NOTOK;
+ }
+ p += ber_len;
+ memcpy(p, entry.data, entry.data_size);
+ p += entry.data_size;
+
+ if(fwrite((void*)buffer, p - buffer, 1, fp) != 1) {
+ perror("WordDBCaches::WriteEntry: cannot write entry ");
+ return NOTOK;
+ }
+
+ return OK;
+}
+
+int WordDBCaches::ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size)
+{
+ if(ber_file2value(fp, entry.key_size) < 1) return NOTOK;
+
+ if(entry.key_size > buffer_size) {
+ buffer_size += entry.key_size;
+ if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK;
+ }
+
+ if(fread((void*)buffer, entry.key_size, 1, fp) != 1) {
+ perror("WordDBCaches::ReadEntry(): cannot read key entry ");
+ return NOTOK;
+ }
+
+ if(ber_file2value(fp, entry.data_size) < 1) return NOTOK;
+
+ if(entry.data_size > 0) {
+ if(entry.data_size + entry.key_size > buffer_size) {
+ buffer_size += entry.data_size;
+ if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK;
+ }
+
+ if(fread((void*)(buffer + entry.key_size), entry.data_size, 1, fp) != 1) {
+ perror("WordDBCaches::ReadEntry(): cannot read data entry ");
+ return NOTOK;
+ }
+ }
+
+ entry.key = (char*)buffer;
+ entry.data = (char*)(buffer + entry.key_size);
+
+ return OK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.h b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.h
new file mode 100644
index 00000000..c4c0a2e3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.h
@@ -0,0 +1,267 @@
+//
+// WordDBCache.h
+//
+// NAME
+// intermediate cache for WordList objects.
+//
+// SYNOPSIS
+//
+// Internal helper for the WordListOne object.
+//
+// DESCRIPTION
+//
+// To speed up bulk insertions, the WordDBCache allows them to remain in
+// memory as long as a given limit is not reached. The inserted entries
+// are them sorted and dumped into a file. When a given number of files
+// have been produced, they are merged into one. Eventually the resulting
+// list of entries is inserted into the WordList index.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBCache.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordDBCache_h_
+#define _WordDBCache_h_
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include "htString.h"
+#include "List.h"
+#include "db.h"
+#include "lib.h"
+#include "myqsort.h"
+#include "WordList.h"
+
+class WordDB;
+class WordLock;
+
+//
+// Minimum size of the pulsing cache
+//
+#define WORD_DB_CACHE_MINIMUM (500 * 1024)
+
+//
+// We could use DBT instead but it's more than two times bigger and
+// time saving by the most efficient way of memory space is the whole
+// point of the cache.
+//
+class WordDBCacheEntry {
+public:
+ char* key;
+ unsigned int key_size;
+ char* data;
+ unsigned int data_size;
+};
+
+class WordDBCache {
+public:
+ inline WordDBCache(WordContext* ncontext) {
+ context = ncontext;
+
+ entries = (WordDBCacheEntry*)malloc(1000 * sizeof(WordDBCacheEntry));
+ entries_length = 0;
+ entries_size = 1000;
+
+ pool = (char*)malloc(WORD_DB_CACHE_MINIMUM);
+ pool_length = 0;
+ pool_size = pool_max = WORD_DB_CACHE_MINIMUM;
+ }
+
+ inline ~WordDBCache() {
+ if(pool_length > 0) {
+ fprintf(stderr, "WordDBCache::~WordDBCache: destructor called and cache not empty\n");
+ }
+ free(entries);
+ free(pool);
+ }
+
+ inline int ResizeEntries() {
+ entries_size *= 2;
+ entries = (WordDBCacheEntry*)realloc(entries, entries_size * sizeof(WordDBCacheEntry));
+ return entries ? 0 : DB_RUNRECOVERY;
+ }
+
+ inline int ResizePool(int wanted) {
+ if(pool_size * 2 > pool_max) {
+ if(pool_max > pool_size && pool_max > wanted)
+ pool_size = pool_max;
+ else
+ return ENOMEM;
+ } else {
+ pool_size *= 2;
+ }
+ pool = (char*)realloc(pool, pool_size);
+ return pool ? 0 : DB_RUNRECOVERY;
+ }
+
+ inline int Allocate(int size) {
+ int ret;
+ if(entries_length >= entries_size)
+ if((ret = ResizeEntries()) != 0)
+ return ret;
+ if(pool_length + size >= pool_size) {
+ if((ret = ResizePool(pool_length + size)) != 0)
+ return ret;
+ }
+ return 0;
+ }
+
+ inline int GetMax() const { return pool_max; }
+
+ inline int SetMax(int max) {
+ if(max > pool_max)
+ pool_max = max;
+ return 0;
+ }
+
+ inline int SetCompare(int (*ncompare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) {
+ compare = ncompare;
+ return 0;
+ }
+
+ inline int Sort() {
+ if(Absolute() != OK) return NOTOK;
+ //
+ // Reorder entries in increasing order
+ //
+ myqsort((void*)entries, entries_length, sizeof(WordDBCacheEntry), (myqsort_cmp)compare, (void*)context);
+ return 0;
+ }
+
+ inline int Relative() {
+ int i;
+ for(i = 0; i < entries_length; i++) {
+ entries[i].key = (char*)(entries[i].key - pool);
+ entries[i].data = (char*)(entries[i].data - pool);
+ }
+ return OK;
+ }
+
+ inline int Absolute() {
+ int i;
+ for(i = 0; i < entries_length; i++) {
+ entries[i].key = pool + (int)(entries[i].key);
+ entries[i].data = pool + (int)(entries[i].data);
+ }
+ return OK;
+ }
+
+ inline int Entries(WordDBCacheEntry*& nentries, int& nentries_length) {
+ nentries = entries;
+ nentries_length = entries_length;
+ return 0;
+ }
+
+ inline int Pool(char*& npool, int& npool_length) {
+ npool = pool;
+ npool_length = pool_length;
+ return OK;
+ }
+
+ inline int Add(char* key, int key_size, char* data, int data_size) {
+ int ret;
+ if((ret = Allocate(key_size + data_size)) != 0)
+ return ret;
+
+ entries[entries_length].key = (char*)pool_length;
+ entries[entries_length].key_size = key_size;
+ entries[entries_length].data = (char*)(pool_length + key_size);
+ entries[entries_length].data_size = data_size;
+ entries_length++;
+ memcpy(pool + pool_length, key, key_size);
+ memcpy(pool + pool_length + key_size, data, data_size);
+ pool_length += key_size + data_size;
+
+ return 0;
+ }
+
+ inline int Flush() {
+ entries_length = 0;
+ pool_length = 0;
+ return 0;
+ }
+
+ inline int Empty() {
+ return entries_length <= 0;
+ }
+
+private:
+ WordDBCacheEntry* entries;
+ int entries_length;
+ int entries_size;
+
+ char* pool;
+ int pool_length;
+ int pool_size;
+ int pool_max;
+
+ int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *);
+ WordContext *context;
+};
+
+class WordDBCacheFile : public Object
+{
+public:
+ WordDBCacheFile() { size = 0; }
+
+ String filename;
+ unsigned int size;
+};
+
+class WordDBCaches {
+ public:
+ inline WordDBCaches(WordList* nwords, int nfile_max, int size_hint, int nsize_max) : cache(nwords->GetContext()) {
+ words = nwords;
+
+ files = new WordDB(words->GetContext()->GetDBInfo());
+ files->Open(words->Filename(), "tmp", DB_BTREE, words->Flags(), 0666, WORD_DB_FILES);
+ file_max = nfile_max;
+ size_max = nsize_max;
+ lock = 0;
+
+ cache.SetMax(size_hint / 2);
+ }
+
+ ~WordDBCaches() {
+ delete files;
+ }
+
+ int Full() const { return size_max > 0 ? size >= size_max : 0; }
+
+ int Add(char* key, int key_size, char* data, int data_size);
+ int AddFile(String& filename);
+
+ int CacheFlush();
+
+ int Merge();
+ int Merge(const String& filea, const String& fileb, const String& tmpname);
+ int Merge(WordDB& db);
+
+ int CacheWrite(const String& filename);
+ int CacheCompare(int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) { cache.SetCompare(compare); return OK; }
+
+ int WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size);
+ int ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size);
+
+ private:
+ WordList* words;
+
+ WordDB* files;
+ int file_max;
+ int size_max;
+ int size;
+
+ WordLock* lock;
+ WordDBCache cache;
+};
+
+#endif /* _WordDBCache_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.cc
new file mode 100644
index 00000000..4fe9f738
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.cc
@@ -0,0 +1,175 @@
+//
+// WordDBCompress.h
+//
+// WordDBCompress: Implements specific compression scheme for
+// Berkeley DB pages containing WordReferences objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBCompress.cc,v 1.7 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <ctype.h>
+
+#include "WordDBPage.h"
+#include "WordDBCompress.h"
+#include "WordBitCompress.h"
+
+/*
+ * WordDBCompress: C-callbacks, actually called by Berkeley-DB
+ * they just call their WordDBCompress equivalents (by using user_data)
+ */
+extern "C"
+{
+
+static int WordDBCompress_compress_c(const u_int8_t* inbuff, int inbuff_length, u_int8_t** outbuffp, int* outbuff_lengthp, void *user_data)
+{
+ if(!user_data) {
+ fprintf(stderr, "WordDBCompress_compress_c:: user_data is NULL");
+ return NOTOK;
+ }
+ return ((WordDBCompress *)user_data)->Compress((unsigned char*)inbuff, inbuff_length, (unsigned char**)outbuffp, outbuff_lengthp);
+}
+
+static int WordDBCompress_uncompress_c(const u_int8_t* inbuff, int inbuff_length, u_int8_t* outbuff, int outbuff_length, void *user_data)
+{
+ if(!user_data) {
+ fprintf(stderr, "WordDBCompress_uncompress_c:: user_data is NULL");
+ return NOTOK;
+ }
+ return ((WordDBCompress *)user_data)->Uncompress((unsigned char *)inbuff, inbuff_length, (unsigned char*)outbuff, outbuff_length);
+}
+
+}
+
+// ***********************************************
+// *********** WordDBCompress *******************
+// ***********************************************
+
+WordDBCompress::WordDBCompress()
+{
+
+ cmprInfo = 0;
+
+ //
+ // DEBUGING / BENCHMARKING
+ //
+ debug = 0;
+
+ //zlib WordDB Compression
+ use_zlib = 0;
+ zlib_level = 0;
+
+}
+
+
+WordDBCompress::WordDBCompress(int zlib, int level)
+{
+
+ cmprInfo = 0;
+
+ //
+ // DEBUGING / BENCHMARKING
+ //
+ debug = 0;
+
+ //zlib WordDB Compression
+ use_zlib = zlib;
+ zlib_level = level;
+}
+
+
+DB_CMPR_INFO* WordDBCompress::CmprInfo()
+{
+
+ DB_CMPR_INFO *cmpr_info = new DB_CMPR_INFO;
+
+ cmpr_info->user_data = (void *)this;
+ cmpr_info->compress = WordDBCompress_compress_c;
+ cmpr_info->uncompress = WordDBCompress_uncompress_c;
+ cmpr_info->coefficient = 3; // reduce page size by factor of 1<<3 = 8
+ cmpr_info->max_npages = 9;
+
+ if(use_zlib == 1)
+ cmpr_info->zlib_flags = zlib_level;
+ else
+ cmpr_info->zlib_flags = 0;
+
+ cmprInfo = cmpr_info;
+
+ return cmpr_info;
+}
+
+int
+WordDBCompress::Compress(const u_int8_t *inbuff, int inbuff_length, u_int8_t **outbuffp, int *outbuff_lengthp)
+{
+ WordDBPage pg(inbuff, inbuff_length);
+
+ if(debug > 2) {
+ printf("########################### WordDBCompress::Compress: #################################################\n");
+ pg.show();
+ printf("~~~~~~~~~~~~~\n");
+ }
+
+ if(debug) TestCompress(inbuff, inbuff_length);
+
+ Compressor *res = pg.Compress(0, cmprInfo);
+
+ (*outbuffp) = res->get_data();
+ (*outbuff_lengthp) = res->buffsize();
+
+ if(debug > 2) {
+ res->show();
+ printf("\n%%%%%%%% Final COMPRESSED size:%4d %f\n",res->size(),res->size()/8.0);
+ printf("*************************** #################################################\n");
+ }
+
+ delete res;
+ if(debug > 2) printf("WordDBCompress::Compress: final output size:%6d (inputsize:%6d)\n", (*outbuff_lengthp), inbuff_length);
+
+ pg.unset_page();
+
+ return(0);
+}
+
+int
+WordDBCompress::Uncompress(const u_int8_t *inbuff, int inbuff_length, u_int8_t *outbuff,int outbuff_length)
+{
+ if(debug > 2) printf("WordDBCompress::Uncompress:: %5d -> %5d\n", inbuff_length, outbuff_length);
+
+ WordDBPage pg(outbuff_length);
+
+ if(debug > 2) printf("------------------------ WordDBCompress::Uncompress: --------------------------------\n");
+
+ Compressor in(inbuff_length);
+ in.set_data(inbuff,inbuff_length*8);
+ in.rewind();
+
+ pg.Uncompress(&in,0);
+
+ memcpy((void *)outbuff, (void *)pg.pg, outbuff_length);
+
+ if(debug > 2) printf("------------------------ WordDBCompress::Uncompress: END\n");
+
+ // DEBUGING / BENCHMARKING
+
+ pg.delete_page();
+ return(0);
+}
+
+int
+WordDBCompress::TestCompress(const u_int8_t* pagebuff, int pagebuffsize)
+{
+ WordDBPage pg(pagebuff,pagebuffsize);
+ pg.TestCompress(debug);
+ pg.unset_page();
+ return 0;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.h b/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.h
new file mode 100644
index 00000000..0f5c1973
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBCompress.h
@@ -0,0 +1,114 @@
+//
+// WordDBCompress.h
+//
+// WordDBCompress: Implements specific compression scheme for
+// Berkeley DB pages containing WordReferences objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBCompress.h,v 1.6 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordDBCompress_h_
+#define _WordDBCompress_h_
+
+// ***********************************************
+// *************** WordDBCompress*****************
+// ***********************************************
+// Starting point for compression.
+//
+//
+// Comrpession HOW IT WORKS:
+//
+// ** General outline:
+//
+// BerkeleyDB pages are stored in a memory pool. When the memory pool
+// is full, least recently used pages are swaped to disk. Page
+// compression occurs at page in/out level. The
+// WordDBCompress_compress_c functions are C callbacks that are called
+// by the the page compression code in BerkeleyDB. The C callbacks the
+// call the WordDBCompress comress/uncompress methods. The
+// WordDBCompress creates a WordDBPage which does the actual
+// compress/uncompress job.
+//
+// The WordDBPage compression/uncompression methods store/retreive data
+// from a bitstream. BitStream is a simple bitstream, and Compressor is
+// a bitstream with added compression capabilities.
+//
+
+// Compression algorithm.
+//
+// Most DB pages are full of really redundant data. Mifluz choice of using
+// one db entry per word makes the DB pages have an even more redundant.
+// But this choice also makes the pages have a very simple structure.
+//
+// Here is a real world example of what a page can look like:
+// (key structure: word + 4 numerical fields)
+//
+// "trois" 1 4482 1 10b
+// "trois" 1 4482 1 142
+// "trois" 1 4484 1 40
+// "trois" 1 449f 1 11e
+// "trois" 1 4545 1 11
+// "trois" 1 45d3 1 545
+// "trois" 1 45e0 1 7e5
+// "trois" 1 45e2 1 830
+// "trois" 1 45e8 1 545
+// "trois" 1 45fe 1 ec
+// "trois" 1 4616 1 395
+// "trois" 1 461a 1 1eb
+// "trois" 1 4631 1 49
+// "trois" 1 4634 1 48
+// .... etc ....
+//
+// To compress we chose to only code differences between succesive entries.
+//
+// Differences in words are coded by 2 numbers and some letters:
+// - the position within the word of the first letter that changes
+// - the size of the new suffix
+// - the letters in the new suffix
+//
+// Only differences in succesive numerical entries are stored.
+//
+// A flag is stored for each entry indicating which fields have changed.
+//
+// All this gives us a few numerical arrays which are themselves compressed
+// and sent to the bitstream.
+//
+//
+class WordDBCompress
+{
+ public:
+ WordDBCompress();
+ WordDBCompress(int, int);
+
+ int Compress(const u_int8_t* inbuff, int inbuff_length, u_int8_t** outbuffp, int* outbuff_lengthp);
+ int Uncompress(const u_int8_t* inbuff, int inbuff_length, u_int8_t* outbuff, int outbuff_length);
+
+ //
+ // Return a new DB_CMPR_INFO initialized with characteristics of the
+ // current object and suitable as WordDB::CmprInfo argument.
+ //
+ DB_CMPR_INFO *CmprInfo();
+
+ private:
+ DB_CMPR_INFO *cmprInfo;
+
+ //ZLIB WordDBCompression Flags
+ int use_zlib;
+ int zlib_level;
+
+// DEBUGING / BENCHMARKING
+ int debug;
+// 0 : no debug no check
+// 1 : TestCompress before each compression (but no debug within Compress Uncompress)
+// 2 : use_tags (BitStream) within TestCompress -> Compress Uncompress
+// 3 : verbose
+ int TestCompress(const u_int8_t* pagebuff, int pagebuffsize);
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.cc
new file mode 100644
index 00000000..b4fb1225
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.cc
@@ -0,0 +1,97 @@
+// WordDBInfo.cc
+//
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "db.h"
+#include "WordDBInfo.h"
+
+//
+// WordDBInfo implementation
+//
+
+WordDBInfo* WordDBInfo::instance = 0;
+
+//
+// Like standard function but allows easy breakpoint setting.
+//
+static void message(const char *errpfx, char *msg)
+{
+ fprintf(stderr, "%s: %s\n", errpfx, msg);
+}
+
+WordDBInfo::WordDBInfo(const Configuration& config)
+{
+ dbenv = 0;
+
+ if(config.Boolean("wordlist_env_skip")) return;
+
+ int error;
+ if((error = CDB_db_env_create(&dbenv, 0)) != 0) {
+ fprintf(stderr, "WordDBInfo: CDB_db_env_create %s\n", CDB_db_strerror(error));
+ return;
+ }
+ dbenv->set_errpfx(dbenv, "WordDB");
+ dbenv->set_errcall(dbenv, message);
+ if(dbenv->set_verbose(dbenv, DB_VERB_CHKPOINT, 1) != 0)
+ return;
+ if(dbenv->set_verbose(dbenv, DB_VERB_DEADLOCK, 1) != 0)
+ return;
+ if(dbenv->set_verbose(dbenv, DB_VERB_RECOVERY, 1) != 0)
+ return;
+ if(dbenv->set_verbose(dbenv, DB_VERB_WAITSFOR, 1) != 0)
+ return;
+ int cache_size = config.Value("wordlist_cache_size", 10*1024*1024);
+ if(cache_size > 0) {
+ if(dbenv->set_cachesize(dbenv, 0, cache_size, 1) != 0)
+ return;
+ }
+
+ char* dir = 0;
+ int flags = DB_CREATE;
+ if(config.Boolean("wordlist_env_share")) {
+ const String& env_dir = config["wordlist_env_dir"];
+ if(env_dir.empty()) {
+ fprintf(stderr, "WordDB: wordlist_env_dir not specified\n");
+ return;
+ }
+ dir = strdup((const char*)env_dir);
+
+ if(config.Boolean("wordlist_env_cdb"))
+ flags |= DB_INIT_CDB;
+ else
+ flags |= DB_INIT_LOCK | DB_INIT_MPOOL;
+
+ } else {
+ flags |= DB_PRIVATE | DB_INIT_LOCK | DB_INIT_MPOOL;
+ }
+
+ if((error = dbenv->open(dbenv, (const char*)dir, NULL, flags, 0666)) != 0)
+ dbenv->err(dbenv, error, "open %s", (dir ? dir : ""));
+ if(dir) free(dir);
+}
+
+WordDBInfo::~WordDBInfo()
+{
+ if(dbenv) dbenv->close(dbenv, 0);
+}
+
+void
+WordDBInfo::Initialize(const Configuration &config_arg)
+{
+ if(instance != 0)
+ delete instance;
+ instance = new WordDBInfo(config_arg);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.h b/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.h
new file mode 100644
index 00000000..86fa5576
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBInfo.h
@@ -0,0 +1,82 @@
+//
+// WordDBInfo.h
+//
+// NAME
+// inverted index usage environment.
+//
+// SYNOPSIS
+//
+// Only called thru WordContext::Initialize()
+//
+// DESCRIPTION
+//
+// The inverted indexes may be shared among processes/threads and provide the
+// appropriate locking to prevent mistakes. In addition the memory cache
+// used by <i>WordList</i> objects may be shared by processes/threads,
+// greatly reducing the memory needs in multi-process applications.
+// For more information about the shared environment, check the Berkeley
+// DB documentation.
+//
+// CONFIGURATION
+//
+// wordlist_env_skip {true,false} (default false)
+// If true no environment is created at all. This must never
+// be used if a <i>WordList</i> object is created. It may be
+// useful if only <i>WordKey</i> objects are used, for instance.
+//
+// wordlist_env_share {true,false} (default false)
+// If true a sharable environment is open or created if none exist.
+//
+// wordlist_env_dir <directory> (default .)
+// Only valid if <i>wordlist_env_share</i> set to <i>true.</i>
+// Specify the directory in which the sharable environment will
+// be created. All
+// inverted indexes specified with a non-absolute pathname will be
+// created relative to this directory.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+
+#ifndef _WordDBInfo_h_
+#define _WordDBInfo_h_
+
+#include "Configuration.h"
+
+struct __db_env;
+
+class WordDBInfo
+{
+ public:
+ WordDBInfo(const Configuration& config);
+ ~WordDBInfo();
+ //
+ // Unique instance handlers
+ //
+ static void Initialize(const Configuration& config);
+
+ static WordDBInfo* Instance() {
+ if(instance) return instance;
+ fprintf(stderr, "WordDBInfo::Instance: no instance\n");
+ return 0;
+ }
+
+ //
+ // Berkeley DB environment
+ //
+ struct __db_env *dbenv;
+
+ //
+ // Unique instance pointer
+ //
+ static WordDBInfo* instance;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc
new file mode 100644
index 00000000..eb43af30
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc
@@ -0,0 +1,1024 @@
+//
+// WordDBPage.cc
+//
+// WordDBPage: Implements specific compression scheme for
+// Berkeley DB pages containing WordReferences objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBPage.cc,v 1.5 2004/05/28 13:15:26 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include"WordDBPage.h"
+#include"WordDBCompress.h"
+#include<ctype.h>
+
+#define NBITS_CMPRTYPE 2
+#define CMPRTYPE_NORMALCOMRPESS 0
+#define CMPRTYPE_BADCOMPRESS 1
+
+// ***********************************************
+// ********** Compression Versions **************
+// ***********************************************
+
+// never change NBITS_COMPRESS_VERSION ! (otherwise version tracking will fail)
+#define NBITS_COMPRESS_VERSION 11
+
+// IMPORTANT: change these EVERY time you change something that affects the compression
+#define COMPRESS_VERSION 4
+static const char *version_label[]={"INVALID_VERSION_0","INVALID_VERSION_1","INVALID_VERSION_2","14 Dec 1999","3 Jan 2000",NULL};
+
+// returns the label of compression version v
+static const char *
+get_version_label(int v)
+{
+ // check if version number is ok
+ if(COMPRESS_VERSION <0 || COMPRESS_VERSION>((sizeof(version_label)/sizeof(*version_label))-1))
+ {
+ errr("get_version_label: version_label[COMPRESS_VERSION] is not valid, please update version_label");
+ }
+ if( v >= (int)((sizeof(version_label)/sizeof(*version_label))-1) )
+ {
+ return("INVALID_VERSION");
+ }
+ // return label
+ return(version_label[v]);
+}
+
+
+
+// ***********************************************
+// ********** WordDBPage ***********************
+// ***********************************************
+
+// checks if compression/decompression sequence is harmless
+int
+WordDBPage::TestCompress(int debuglevel)
+{
+ if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress BEGIN\n");}
+ int compress_debug=debuglevel-1;
+ // start by compressing this page
+ Compressor *res=Compress(compress_debug);
+
+ if(res)
+ {
+ int size=res->size();
+ // now uncompress into pageu
+ WordDBPage pageu(pgsz);
+ res->rewind();
+ pageu.Uncompress(res,compress_debug);
+
+ // comapre this page and pageu
+ int cmp=Compare(pageu);
+
+ // show some results
+ if(debuglevel>2)printf("TOTAL SIZE: %6d %8f\n",size,size/8.0);
+ // argh! compare failed somthing went wrong
+ // display the compress/decompress sequence and fail
+ if(cmp || size>8*1024*1000000000)
+ {
+ if(size>8*1024)
+ {
+ printf("---------------------------------------------------\n");
+ printf("-----------overflow:%5d------------------------------\n",size/8);
+ printf("---------------------------------------------------\n");
+ printf("---------------------------------------------------\n");
+ }
+ printf("################### ORIGINAL #########################################\n");
+ show();
+ printf("################### REDECOMPRESSED #########################################\n");
+ pageu.show();
+
+ // re-compress the page verbosely
+ Compressor *res2=Compress(2);
+ res2->rewind();
+ // re-uncompress the page verbosely
+ WordDBPage pageu2(pgsz);
+ pageu2.Uncompress(res2,2);
+ pageu2.show();
+ if(cmp){errr("Compare failed");}
+ delete res2;
+ }
+ pageu.delete_page();
+ delete res;
+
+ }else {errr("WordDBPage::TestCompress: Compress failed");}
+
+ if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress END\n");}
+ return OK;
+}
+
+// find position of first difference between 2 strings
+static int first_diff(const String &s1,const String &s2)
+{
+ int j;
+ for(j=0;j<s1.length() && j<s2.length() && s1[j]==s2[j];j++);
+ return(j);
+}
+
+// ******* Uncompress Compressor into this page
+int
+WordDBPage::Uncompress(Compressor *pin,int ndebug, DB_CMPR_INFO */*=NULL*/)
+{
+ debug=ndebug;
+ if(debug>1){verbose=1;}
+ if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: BEGIN\n");}
+
+
+ // ** first check if versions are OK
+ int read_version = pin->get_uint(NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ if(read_version != COMPRESS_VERSION)
+ {
+ fprintf(stderr,"WordDBPage::Uncompress: *** Compression version mismatch ***\n");
+ fprintf(stderr,"found version : %3d but using version : %3d\n",read_version,COMPRESS_VERSION);
+ fprintf(stderr,"found version label: %s\n",get_version_label(read_version));
+ fprintf(stderr,"using version label: %s\n",get_version_label(COMPRESS_VERSION));
+ fprintf(stderr,"Are you sure you're not reading an old DB with a newer version of the indexer??\n");
+ errr("WordDBPage::Uncompress: *** Compression version mismatch ***");
+ exit(1);
+ }
+
+
+ // ** now see if this page was a normal or uncorrectly compressed page
+ int cmprtype=pin->get_uint(NBITS_CMPRTYPE,"CMPRTYPE");
+ // two possible cases
+ switch(cmprtype)
+ {
+ case CMPRTYPE_NORMALCOMRPESS:// this was a normaly compressed page
+ Uncompress_main(pin);
+ break;
+ case CMPRTYPE_BADCOMPRESS:// this page did not compress correctly
+ pin->get_zone((byte *)pg,pgsz*8,"INITIALBUFFER");
+ break;
+ default:
+ errr("WordDBPage::Uncompress: CMPRTYPE incoherent");
+ }
+
+ if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: END\n");}
+ return OK;
+}
+
+// ******* Uncompress Compressor into this page
+// normally compressed page case
+int
+WordDBPage::Uncompress_main(Compressor *pin)
+{
+ if(!pin){errr("WordDBPage::Uncompress: no Compressor to uncompress from!!");}
+ Compressor &in=*((Compressor *)pin);
+ if(debug>0){in.set_use_tags();}
+ int i,j;
+ // number arrays used to reconstruct the original page
+ unsigned int **rnums=new unsigned int *[nnums];
+ CHECK_MEM(rnums);
+ // sizes of each array
+ int *rnum_sizes=new int[nnums];
+ CHECK_MEM(rnum_sizes);
+ // char differences between words
+ byte *rworddiffs=NULL;
+ int nrworddiffs;
+
+ // *********** read header
+ if(Uncompress_header(in)!=OK){return NOTOK;}
+
+ // get first key(s):
+ //type=5: key(0) stored seperately ... others are decompressed frome differences
+ //
+ //type=3: btikey(0) is particular (len=0) it is stored seperately
+ // btikey(1) stored seperately ... others are decompressed frome differences
+ //
+ int nkeysleft=nk;
+ if(nkeysleft>0)
+ {
+ WordDBKey key0=uncompress_key(in,0);
+ if(type==P_LBTREE){uncompress_data(in,0,key0.RecType());}
+ nkeysleft--;
+ }
+ if(nkeysleft>0 && type==P_IBTREE){uncompress_key(in,1);nkeysleft--;}
+
+ if(nkeysleft>0)
+ {
+ // ********* read numerical fields
+ Uncompress_vals_chaged_flags(in,&(rnums[0]),&(rnum_sizes[0]));
+ for(j=1;j<nnums;j++)
+ {
+ if(verbose)printf("field %2d : start position:%4d \n",j,in.size());
+ if(j==3 && verbose){in.verbose=2;}
+ rnum_sizes[j]=in.get_vals(&(rnums[j]),label_str("NumField",j));// ***
+ if(j==3 && verbose){in.verbose=0;}
+ if(verbose){printf("WordDBPage::Uncompress_main:got numfield:%2d:nvals:%4d\n",j,rnum_sizes[j]);}
+ }
+
+ // ********* read word differences
+ nrworddiffs=in.get_fixedbitl(&rworddiffs,"WordDiffs");
+
+
+ // ********* rebuild original page
+ Uncompress_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs);
+ Uncompress_show_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs);
+
+
+ for(i=0;i<nnums;i++){delete [] rnums[i];}
+ }
+ delete [] rnums;
+ delete [] rnum_sizes;
+ if(rworddiffs){delete [] rworddiffs;}
+ return 0;
+}
+void
+WordDBPage::Uncompress_vals_chaged_flags(Compressor &in,unsigned int **pcflags,int *pn)
+{
+ int n=in.get_uint_vl(NBITS_NVALS,"FlagsField");
+ unsigned int *cflags=new unsigned int[n];
+ unsigned int ex=0;
+ int nbits=num_bits(n);
+ for(int i=0;i<n;i++)
+ {
+ ex=in.get_uint(WordKey::NFields(),label_str("cflags",i));
+ cflags[i]=ex;
+ int rep=in.get("rep");
+ if(rep)
+ {
+ rep=in.get_uint_vl(nbits,NULL);
+ for(int k=1;k<=rep;k++){cflags[k+i]=ex;}
+ i+=rep;
+ }
+ }
+
+ *pn=n;
+ *pcflags=cflags;
+}
+int
+WordDBPage::Uncompress_header(Compressor &in)
+{
+ pg->lsn.file =in.get_uint_vl( 8*sizeof(pg->lsn.file ),"page:lsn.file");
+ pg->lsn.offset =in.get_uint_vl( 8*sizeof(pg->lsn.offset ),"page:lsn.offset");
+ pg->pgno =in.get_uint_vl( 8*sizeof(pg->pgno ),"page:pgno");
+ pg->prev_pgno =in.get_uint_vl( 8*sizeof(pg->prev_pgno ),"page:prev_pgno");
+ pg->next_pgno =in.get_uint_vl( 8*sizeof(pg->next_pgno ),"page:next_pgno");
+ pg->entries =in.get_uint_vl( 8*sizeof(pg->entries ),"page:entries");
+ pg->hf_offset =in.get_uint_vl( 8*sizeof(pg->hf_offset ),"page:hf_offset");
+ pg->level =in.get_uint_vl( 8*sizeof(pg->level ),"page:level");
+ pg->type =in.get_uint_vl( 8*sizeof(pg->type ),"page:type");
+
+ init();
+
+ if(verbose)
+ {
+ printf("************************************\n");
+ printf("******** WordDBPage::Uncompress: page header ***\n");
+ printf("************************************\n");
+ printf("page size:%d\n",(int)pgsz);
+ printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file );
+ printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset );
+ printf(" 08-11: Current page number. : %d\n", pg->pgno );
+ printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno );
+ printf(" 16-19: Next page number. : %d\n", pg->next_pgno );
+ printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries );
+ printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset );
+ printf(" 24: Btree tree level. : %d\n", pg->level );
+ printf(" 25: Page type. : %d\n", pg->type );
+ }
+ return OK;
+}
+void
+WordDBPage::Uncompress_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs)
+{
+ int irwordiffs=0;
+ int nfields=WordKey::NFields();
+ int *rnum_pos=new int[ nnums0];// current index count
+ CHECK_MEM(rnum_pos);
+
+ int ii,j;
+ for(j=0;j<nnums0;j++){rnum_pos[j]=0;}
+
+ int i0=0;
+ if(type==P_IBTREE){i0=1;}// internal pages have particular first key
+
+ WordDBKey pkey;
+ WordDBKey akey=get_WordDBKey(i0);
+
+ // reconstruct each key using previous key and coded differences
+ for(ii=i0;ii<nk;ii++)
+ {
+ WordDBRecord arec;
+ BINTERNAL bti;
+
+ if(type==P_LBTREE)
+ {
+ // **** get the data fields
+ arec.set_decompress(rnums,rnum_sizes,ii,CNDATADATA,CNDATASTATS0,CNDATASTATS1);
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");}
+ // ****** btree internal page specific
+ bti.pgno =rnums[CNBTIPGNO ][rnum_pos[CNBTIPGNO ]++];
+ bti.nrecs=rnums[CNBTINRECS][rnum_pos[CNBTINRECS]++];
+ }
+ // all that follows codes differences between succesive entries
+ // that is: Numerical key fields, Words
+ if(ii>i0)
+ {
+ unsigned int flags=rnums[CNFLAGS][rnum_pos[CNFLAGS]++];
+ int foundfchange=0;
+ // **** reconstruct the word
+ if(flags&pow2(nfields-1))// check flags to see if word has changed
+ {
+ foundfchange=1;
+ if(rnum_pos[CNWORDDIFFLEN]>=rnum_sizes[CNWORDDIFFLEN]){errr("WordDBPage::Uncompress read wrong num worddiffs");}
+ // get position of first character that changes in this word
+ int diffpos=rnums[CNWORDDIFFPOS][rnum_pos[CNWORDDIFFPOS]++];
+ // get size of changed part of the word
+ int difflen=rnums[CNWORDDIFFLEN][rnum_pos[CNWORDDIFFLEN]++];
+ int wordlen=diffpos+difflen;
+ char *str=new char [wordlen+1];
+ CHECK_MEM(str);
+ // copy the unchanged part into str from previos key's word
+ if(diffpos)strncpy(str,(char *)pkey.GetWord(),diffpos);
+ // copy the changed part from coded word differences
+ strncpy(str+diffpos,(char *)rworddiffs+irwordiffs,difflen);
+ str[wordlen]=0;
+ if(verbose)printf("key %3d word:\"%s\"\n",ii,str);
+ akey.SetWord(str);
+ irwordiffs+=difflen;
+ delete [] str;
+
+ }else{akey.SetWord(pkey.GetWord());}
+ // **** reconstruct the numerical key fields
+ for(j=1;j<nfields;j++)
+ {
+ // check flags to see if this field has changed
+ int changed=flags&pow2(j-1);
+ if(changed)
+ {
+ // this field's number
+ int k=CNFIELDS+j-1;
+ // current position within coded differences of this field
+ int indx=rnum_pos[k];
+ if(indx>=rnum_sizes[k]){errr("WordDBPage::Uncompress read wrong num of changes in a field");}
+ if(!foundfchange)
+ {
+ // this is the first field that changes in this key
+ // so difference is coded compared to value in pevious key
+ akey.Set(j,rnums[k][indx]+pkey.Get(j));
+ }
+ else
+ {
+ // this is NOT the first field that changes in this key
+ // so difference is coded from 0
+ akey.Set(j,rnums[k][indx]);
+ }
+ // we read 1 element from coded differences in this field
+ rnum_pos[k]++;
+ foundfchange=1;
+ }
+ else
+ {
+ // no changes found, just copy from previous key
+ if(!foundfchange){akey.Set(j,pkey.Get(j));}
+ else{akey.Set(j,0);}
+ }
+ }
+ }
+ // now insert key/data into page
+ if(type==P_LBTREE)
+ {
+ if(ii>i0)insert_key(akey);
+ if(ii>i0)insert_data(arec);
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");}
+ if(ii>i0)insert_btikey(akey,bti);
+ }
+ pkey=akey;
+ }
+ delete [] rnum_pos;
+}
+
+// display
+void
+WordDBPage::Uncompress_show_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs)
+{
+ int i,j;
+ if(verbose)
+ {
+ printf("WordDBPage::Uncompress_show_rebuild: rebuilt numerical fields\n");
+ for(j=0;j<nnums0;j++)
+ {
+ printf("resfield %2d %13s:",j,number_field_label(j));
+ for(i=0;i<rnum_sizes[j];i++)
+ {
+ printf("%4d ",rnums[j][i]);
+ }
+ printf("\n");
+ printf("diffield %2d:",j);
+ for(i=0;i<rnum_sizes[j];i++)
+ {
+ ;// printf("%2d:%d ",i,nums[j*nk+i] == rnums[j][i]);
+ }
+ printf("\n");
+ }
+ printf("reswordiffs:");
+ for(i=0;i<nrworddiffs;i++){printf("%c",(isalnum(rworddiffs[i]) ? rworddiffs[i] : '#'));}
+ printf("\n");
+ }
+}
+
+Compressor *
+WordDBPage::Compress(int ndebug, DB_CMPR_INFO *cmprInfo/*=NULL*/)
+{
+ debug=ndebug;
+ if(debug>1){verbose=1;}
+
+ Compressor *res=(Compressor *)new Compressor((cmprInfo ?
+ pgsz/(1<<(cmprInfo->coefficient)) :
+ pgsz/4));
+ CHECK_MEM(res);
+ if(debug>0){res->set_use_tags();}
+
+ res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ res->put_uint(CMPRTYPE_NORMALCOMRPESS,NBITS_CMPRTYPE,"CMPRTYPE");
+
+ if(verbose){printf("WordDBPage::Compress: trying normal compress\n");}
+ int cmpr_ok=Compress_main(*((Compressor *)res));
+
+ if(cmpr_ok!=OK || res->buffsize()>pgsz)
+ {
+ if(verbose){printf("WordDBCompress::Compress full compress failed ... not compressing at all\n");}
+ show();
+
+ if(res){delete res;}
+ res=new Compressor;
+ CHECK_MEM(res);
+
+ if(debug>0){res->set_use_tags();}
+
+ res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ res->put_uint(CMPRTYPE_BADCOMPRESS,NBITS_CMPRTYPE,"CMPRTYPE");
+
+ res->put_zone((byte *)pg,pgsz*8,"INITIALBUFFER");
+ }
+
+ if(verbose)
+ {
+ printf("WordDBPage::Compress: Final bitstream result\n");
+ res->show();
+ }
+ return res;
+};
+
+int
+WordDBPage::Compress_main(Compressor &out)
+{
+ if(debug>1){verbose=1;}
+ if(verbose){printf("WordDBPage::Compress_main: starting compression\n");}
+
+ if(pg->type!=5 && pg->type!=3){ printf("pg->type:%3d\n",pg->type);return NOTOK;}
+// if(pg->type==P_IBTREE){show();}
+
+
+ // *************** initialize data structures **************
+ int j;
+ // 0 -> changed/unchanged flags : 4bits
+ // 1..n -> numerical fields delta : ?bits (depending on field)
+ // n+1 -> word changed size : 1
+ int *nums =new int[nk*nnums];
+ CHECK_MEM(nums);
+ int *nums_pos=new int[ nnums];
+ CHECK_MEM(nums_pos);
+// int *cnsizes =new int[ nnums];
+ for(j=0;j<nnums;j++){nums_pos[j]=0;}
+// for(j=1;j<nfields;j++) {cnsizes[j]=word_key_info->sort[j].bits;}
+// cnsizes[CNFLAGS]=4;
+// cnsizes[CNWORDDIFFPOS ]=8;
+// cnsizes[CNWORDDIFFLEN ]=8;
+ HtVector_byte worddiffs;
+
+
+//bmt_START;
+ // *************** extract values and wordiffs **************
+ if(nk>0)
+ {
+ Compress_extract_vals_wordiffs(nums,nums_pos,nnums,worddiffs);
+ if(verbose)Compress_show_extracted(nums,nums_pos,nnums,worddiffs);
+ }
+
+ // *************** init compression **************
+
+//bmt_END;bmt_START;
+ Compress_header(out);
+
+ // *************** compress values and wordiffs **************
+
+ // compress first key(s)
+ int nkeysleft=nk;
+ if(nkeysleft>0)
+ {
+ compress_key(out,0);
+ if(type==P_LBTREE){compress_data(out,0);}
+ nkeysleft--;
+ }
+ if(nkeysleft>0 && type==P_IBTREE){compress_key(out,1);nkeysleft--;}
+
+ if(nkeysleft>0)
+ {
+//bmt_END;bmt_START;
+ // compress values
+ Compress_vals(out,nums,nums_pos,nnums);
+//bmt_END;bmt_START;
+
+ // compress worddiffs
+ int size=out.put_fixedbitl(worddiffs.begin(),worddiffs.size(),"WordDiffs");
+ if(verbose)printf("compressed wordiffs : %3d values: %4d bits %4f bytes\n",worddiffs.size(),size,size/8.0);
+//bmt_END;
+ }
+
+ // *************** cleanup **************
+
+ delete [] nums ;
+ delete [] nums_pos;
+
+ return OK;
+}
+
+void
+WordDBPage::Compress_extract_vals_wordiffs(int *nums,int *nums_pos,int ,HtVector_byte &worddiffs)
+{
+ WordDBKey pkey;
+
+ int ii,j;
+ int i0=0;
+ if(type==P_IBTREE){i0=1;}// internal pages have particular first key
+ for(ii=i0;ii<nk;ii++)
+ {
+ WordDBKey akey=get_WordDBKey(ii);
+
+ if(type==P_LBTREE)
+ {
+ // ****** WordRecord (data/stats)
+ // get word record
+ WordDBRecord arec(data(ii),akey.RecType());
+ // add record
+ if(arec.type==WORD_RECORD_STATS)
+ {
+ nums[CNDATASTATS0*nk+nums_pos[CNDATASTATS0]++]=arec.info.stats.noccurrence;
+ nums[CNDATASTATS1*nk+nums_pos[CNDATASTATS1]++]=arec.info.stats.ndoc;
+ }
+ else
+ if(arec.type==WORD_RECORD_DATA)
+ {
+ nums[CNDATADATA *nk+nums_pos[CNDATADATA ]++]=arec.info.data;
+ }
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Compress_extract_vals_wordiffs: unsupported type!=3");}
+ // ****** btree internal page specific
+ nums[CNBTIPGNO *nk+nums_pos[CNBTIPGNO ]++]=btikey(ii)->pgno ;
+ nums[CNBTINRECS*nk+nums_pos[CNBTINRECS]++]=btikey(ii)->nrecs;
+ }
+
+ // all that follows codes differences between succesive entries
+ // that is: Numerical key fields, Words
+ if(ii>i0)
+ {
+ // clear changed falgs
+ int iflag=CNFLAGS*nk+nums_pos[CNFLAGS]++;
+ nums[iflag]=0;
+
+ int foundfchange=0;
+ const String &aword=akey.GetWord();
+ const String &pword=pkey.GetWord();
+ if(!(aword==pword)){foundfchange=1;}
+
+ // check numerical fields for changes
+ // ******** sets CNFIELDS and some of CNFLAGS ************
+ for(j=1;j<akey.NFields();j++)
+ {
+ int diff=akey.Get(j)-(foundfchange ? 0 : pkey.Get(j));
+ if(diff)
+ {
+ foundfchange=1;
+ nums[iflag]|=pow2(j-1);
+ nums[ j*nk+nums_pos[j]++]=diff;
+ }
+ }
+
+ // ************ check word for changes
+ // ******** sets CNWORDDIFFPOS CNWORDDIFFLEN and some of CNFLAGS ************
+ if(!(aword==pword))
+ {
+ nums[iflag]|=pow2(akey.NFields()-1);
+ int fd=first_diff(aword,pword);
+ nums[CNWORDDIFFPOS*nk+nums_pos[CNWORDDIFFPOS]++]=fd;
+ nums[CNWORDDIFFLEN*nk+nums_pos[CNWORDDIFFLEN]++]=aword.length()-fd;
+ for(int s=fd;s<aword.length();s++){worddiffs.push_back(aword[s]);}
+ }
+ }
+ pkey=akey;
+ }
+// nums_pos[CNFLAGS]=nk-1;
+
+}
+
+void
+WordDBPage::Compress_vals_changed_flags(Compressor &out,unsigned int *cflags,int n)
+{
+ int size=out.size();
+ out.put_uint_vl(n,NBITS_NVALS,"FlagsField");
+ unsigned int ex=0;
+ int nbits=num_bits(n);
+ for(int i=0;i<n;i++)
+ {
+ ex=cflags[i];
+ out.put_uint(ex,WordKey::NFields(),label_str("cflags",i));
+ int k;
+ for(k=1;k+i<n;k++){if(ex!=cflags[i+k]){break;}}
+ k--;
+ if(k>0)
+ {
+ out.put(1,"rep");
+ out.put_uint_vl(k,nbits,NULL);
+ i+=k;
+ }
+ else
+ {out.put(0,"rep");}
+ }
+ size=out.size()-size;
+ if(verbose)printf("compressed flags %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",0,n,size,size/8.0,out.size());
+
+}
+
+void
+WordDBPage::Compress_vals(Compressor &out,int *nums,int *nums_pos,int nnums0)
+{
+ // the changed flags fields are particular
+ Compress_vals_changed_flags(out,(unsigned int *)(nums+0*nk),nums_pos[0]);
+
+
+ // compress the difference numbers for the numerical fields
+ for( int j=1;j<nnums0;j++)
+ {
+ int nv=nums_pos[j];
+ unsigned int *v=(unsigned int *)(nums+j*nk);
+ if((1 || j==3) && verbose){out.verbose=2;}
+ int size=out.put_vals(v,nv,label_str("NumField",j));
+ if((1 || j==3) && verbose){out.verbose=0;}
+ if(verbose)printf("compressed field %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",j,n,size,size/8.0,out.size());
+ }
+}
+
+void
+WordDBPage::Compress_header(Compressor &out)
+{
+// no smart compression ... for now
+ out.put_uint_vl(pg->lsn.file , 8*sizeof(pg->lsn.file ),"page:lsn.file");
+ out.put_uint_vl(pg->lsn.offset , 8*sizeof(pg->lsn.offset ),"page:lsn.offset");
+ out.put_uint_vl(pg->pgno , 8*sizeof(pg->pgno ),"page:pgno");
+ out.put_uint_vl(pg->prev_pgno , 8*sizeof(pg->prev_pgno ),"page:prev_pgno");
+ out.put_uint_vl(pg->next_pgno , 8*sizeof(pg->next_pgno ),"page:next_pgno");
+ out.put_uint_vl(pg->entries , 8*sizeof(pg->entries ),"page:entries");
+ out.put_uint_vl(pg->hf_offset , 8*sizeof(pg->hf_offset ),"page:hf_offset");
+ out.put_uint_vl(pg->level , 8*sizeof(pg->level ),"page:level");
+ out.put_uint_vl(pg->type , 8*sizeof(pg->type ),"page:type");
+}
+
+void
+WordDBPage::Compress_show_extracted(int *nums,int *nums_pos,int nnums0,HtVector_byte &worddiffs)
+{
+ int i,j;
+ int *cnindexe2=new int[ nnums0];
+ CHECK_MEM(cnindexe2);
+ for(j=0;j<nnums0;j++){cnindexe2[j]=0;}
+ for(j=0;j<nnums0;j++)
+ {
+ printf("%13s",number_field_label(j));
+ }
+ printf("\n");
+ int w=0;
+ int mx=(nk>worddiffs.size() ? nk : worddiffs.size());
+ for(i=0;i<mx;i++)
+ {
+ printf("%3d: ",i);
+ for(j=0;j<nnums0;j++)
+ {
+ int k=cnindexe2[j]++;
+ int nbits=(j ? 16:4);// just to show the flags field...
+ if(k<nums_pos[j])
+ {
+ int val=nums[j*nk+k];
+ if(nbits<8){show_bits(val,nbits);printf(" ");}
+ else
+ {
+ printf("|%12u",val);
+ }
+ }
+ else
+ {
+ if(nbits<8){printf(" ");}
+ else
+ {
+ printf("| ");
+ }
+ }
+ }
+ if(w<worddiffs.size()){printf(" %02x %c ",worddiffs[w],(isalnum(worddiffs[w]) ? worddiffs[w] : '#'));}
+ w++;
+ printf("\n");
+ }
+ delete [] cnindexe2;
+}
+
+// Compare two pages to check if equal
+int
+WordDBPage::Compare(WordDBPage &other)
+{
+ int res=0;
+ // Compare headers
+ if(other.pgsz != pgsz ){res++;printf("compare failed for pgsz \n");}
+ if(other.pg->lsn.file != pg->lsn.file ){res++;printf("compare failed for pg->lsn.file \n");}
+ if(other.pg->lsn.offset != pg->lsn.offset ){res++;printf("compare failed for pg->lsn.offset \n");}
+ if(other.pg->pgno != pg->pgno ){res++;printf("compare failed for pg->pgno \n");}
+ if(other.pg->prev_pgno != pg->prev_pgno ){res++;printf("compare failed for pg->prev_pgno \n");}
+ if(other.pg->next_pgno != pg->next_pgno ){res++;printf("compare failed for pg->next_pgno \n");}
+ if(other.pg->entries != pg->entries ){res++;printf("compare failed for pg->entries \n");}
+ if(other.pg->hf_offset != pg->hf_offset ){res++;printf("compare failed for pg->hf_offset \n");}
+ if(other.pg->level != pg->level ){res++;printf("compare failed for pg->level \n");}
+ if(other.pg->type != pg->type ){res++;printf("compare failed for pg->type \n");}
+ int i,k;
+ // double check header
+ if(memcmp((void *)pg,(void *)other.pg,sizeof(PAGE)-sizeof(db_indx_t)))
+ {
+ res++;
+ printf("compare failed in some unknown place in header:\n");
+ for(i=0;i<(int)(sizeof(PAGE)-sizeof(db_indx_t));i++)
+ {
+ printf("%3d: %3x %3x\n",i,((byte *)pg)[i],((byte *)other.pg)[i]);
+ }
+ }
+
+ // pg->type != 5 && !=3 pages are not really compressed: just memcmp
+ if(pg->type != 5 && pg->type != 3)
+ {
+ if(memcmp((void *)pg,(void *)other.pg,pgsz))
+ {
+ printf("compare:PAGETYPE:!=5 and memcmp failed\n");
+ res++;
+ printf("compare failed\n");
+ }
+ return(res);
+ }
+
+ // compare each key/data pair
+ for(i=0;i<(type==P_LBTREE ? pg->entries/2 : pg->entries);i++)
+ {
+ if(pg->type==P_LBTREE)
+ {
+ // compare keys
+ if(key(i)->len !=other.key(i)->len )
+ {
+ printf("compare:key(%2d) len : %2d != %2d\n",i,key(i)->len ,other.key(i)->len );
+ res++;
+ }
+ if(key(i)->type!=other.key(i)->type)
+ {
+ printf("compare:key(%2d) type: %2d != %2d\n",i,key(i)->type,other.key(i)->type);
+ res++;
+ }
+ if(memcmp(key(i)->data,other.key(i)->data,key(i)->len))
+ {
+ printf("compare :key(%2d)\n",i);
+ for(k=0;k<key(i)->len;k++)
+ {
+ int c=key(i)->data[k];
+ if(isalnum(c)){printf(" %c ",c);}
+ else{printf("%02x ",c);}
+ }
+ printf("\n");
+ for(k=0;k<key(i)->len;k++)
+ {
+ int c=other.key(i)->data[k];
+ if(isalnum(c)){printf(" %c ",c);}
+ else{printf("%02x ",c);}
+ }
+ printf("\n");
+ res++;printf("compare:key failed\n");
+ }
+ // compare data
+ if(data(i)->len !=other.data(i)->len )
+ {
+ printf("compare:data(%2d) len : %2d != %2d\n",i,data(i)->len ,other.data(i)->len );
+ res++;
+ }
+ if(data(i)->type!=other.data(i)->type)
+ {
+ printf("compare:data(%2d) type: %2d != %2d\n",i,data(i)->type,other.key(i)->type);
+ res++;
+ }
+ if(memcmp(data(i)->data,other.data(i)->data,data(i)->len))
+ {
+ printf("compare :data(%2d)\n",i);
+ for(k=0;k<data(i)->len;k++)
+ {
+ printf("%02x ",data(i)->data[k]);
+ }
+ printf("\n");
+ for(k=0;k<data(i)->len;k++)
+ {
+ printf("%02x ",other.data(i)->data[k]);
+ }
+ printf("\n");
+ res++;printf("compare:data failed\n");
+ }
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Compare: unsupported type!=3");}
+ if(btikey(i)->len != other.btikey(i)->len ||
+ btikey(i)->type != other.btikey(i)->type ||
+ btikey(i)->pgno != other.btikey(i)->pgno ||
+ btikey(i)->nrecs != other.btikey(i)->nrecs )
+ {
+ printf("compare:btikey(%2d) failed\n",i);
+ printf("this :len :%4d type :%4d pgno :%4d nrecs :%4d \n",btikey(i)->len,btikey(i)->type,
+ btikey(i)->pgno,btikey(i)->nrecs);
+ printf("other:len :%4d type :%4d pgno :%4d nrecs :%4d \n",other.btikey(i)->len,other.btikey(i)->type,
+ other.btikey(i)->pgno,other.btikey(i)->nrecs);
+ res++;
+
+ }
+ if(memcmp(btikey(i)->data,other.btikey(i)->data,btikey(i)->len))
+ {
+ printf("compare :btikey(%2d)\n",i);
+ for(k=0;k<btikey(i)->len;k++)
+ {
+ printf("%02x ",btikey(i)->data[k]);
+ }
+ printf("\n");
+ for(k=0;k<btikey(i)->len;k++)
+ {
+ printf("%02x ",other.btikey(i)->data[k]);
+ }
+ printf("\n");
+ res++;printf("compare:btikey failed\n");
+
+ }
+ }
+ }
+ if(pg->entries>0)
+ {
+ int smallestoffset=HtMaxMin::min_v(pg->inp,pg->entries);
+ int other_smallestoffset=HtMaxMin::min_v(other.pg->inp,other.pg->entries);
+ if(smallestoffset!=other_smallestoffset)
+ {
+ printf("compare fail:smallestoffset:%d other_smallestoffset:%d\n",smallestoffset,other_smallestoffset);
+ res++;
+ }
+ }
+
+ return(res);
+}
+
+// Bit stream description
+// | field[last] changed only | yes -> delta field[last]
+//
+
+// redo=0 ->
+// redo=1 -> oops, dont show!
+// redo=2 ->
+void
+WordDBPage::show()
+{
+ int i,j,dd,l;
+
+ printf("************************************\n");
+ printf("************************************\n");
+ printf("************************************\n");
+ printf("page size:%d\n",(int)pgsz);
+ printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file );
+ printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset );
+ printf(" 08-11: Current page number. : %d\n", pg->pgno );
+ printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno );
+ printf(" 16-19: Next page number. : %d\n", pg->next_pgno );
+ printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries );
+ printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset );
+ printf(" 24: Btree tree level. : %d\n", pg->level );
+ printf(" 25: Page type. : %d\n", pg->type );
+
+
+ printf("entry offsets:");
+ for(i=0;i<pg->entries;i++){printf("%4d ",pg->inp[i]);}
+ printf("\n");
+
+ if(pg->type ==5)
+ {
+
+ WordRecord dud;
+ WordKey prev;
+ int pagecl=0;
+ for(i=0;i<pg->entries;i++)
+ {
+ if( (i%2) && dud.type==WORD_RECORD_NONE){continue;}
+ printf("\n||%c:%3d:off:%03d:invoff:%4d:len:%2d:typ:%x:",i%2 ? 'D' : 'K',i,e_offset(i),pgsz-e_offset(i),entry(i)->len,entry(i)->type);
+ if(i>0)
+ {
+ l=entry(i)->len+3;
+ dd=(int)(e_offset(i-1))-l;
+ dd-=dd%4;
+ printf("% 5d:: ",(e_offset(i)-dd));
+ }
+ if(!(i%2))
+ {
+ WordDBKey tkey(entry(i));
+ int fieldchanged[10];
+ char *wordchange=NULL;
+ printf("\"");
+ printf("%s",(char *)tkey.GetWord());
+ printf("\"");
+ for(j=0;j<20-tkey.GetWord().length();j++){printf(" ");}
+ printf("|");
+ for(j=1;j<tkey.NFields();j++){printf("%4x ",tkey.Get(j));}
+ printf("|");
+
+ for(j=1;j<tkey.NFields();j++)
+ {
+ int diff=tkey.Get(j)-prev.Get(j);
+ if(diff<0){diff=tkey.Get(j);}
+ printf("%6d ",diff);
+ fieldchanged[j]=diff;
+ }
+
+ String &word=tkey.GetWord();
+ String &pword=prev.GetWord();
+ if(word==pword){printf(" 00 ===");fieldchanged[0]=0;}
+ else
+ {
+ int fd=first_diff(word,pword);
+ fieldchanged[0]=fd+1;
+ wordchange=((char *)word)+fd;
+ printf(" %2d %s",fd,((char *)word)+fd);
+ }
+
+ int keycl=tkey.NFields();
+ for(j=1;j<tkey.NFields();j++)
+ {
+ if(fieldchanged[j]){keycl+=WordKeyInfo::Instance()->sort[j].bits;}
+ }
+ if(fieldchanged[0]){keycl+=3;keycl+=8*strlen(wordchange);}
+ printf(" ::%2d %f",keycl,keycl/8.0);
+ pagecl+=keycl;
+ prev=tkey;
+ }
+ else
+ {
+ if(entry(i)->len>100){printf("WordDBPage::show: aaargh strange failing\n");return;}
+ for(j=0;j<entry(i)->len;j++)
+ {
+ printf("%02x ",entry(i)->data[j]);
+ }
+ }
+ }
+ printf("\n");
+ }
+ else
+ if(1)
+ {
+ int nn=0;
+ // dump hex
+ for(i=0;;i++)
+ {
+ printf("%5d: ",nn);
+ for(j=0;j<20;j++)
+ {
+ printf("%2x ",((byte *)pg)[nn++]);
+ if(nn>=pgsz){break;}
+ }
+ printf("\n");
+ if(nn>=pgsz){break;}
+ }
+ }
+ if(pg->type == 3)
+ {
+ for(i=0;i<pg->entries;i++)
+ {
+ BINTERNAL *bie=GET_BINTERNAL(pg,i);
+ printf("%3d: off:%4d:len:%3d :type:%3d :pgno:%4d: nrecs:%4d:: ",i,pg->inp[i],bie->len,bie->type,bie->pgno,bie->nrecs);
+ WordDBKey tkey(bie);
+ for(j=0;j<bie->len-tkey.GetWord().length();j++){printf("%2x ",bie->data[j]);}
+ printf(" : ");
+ for(j=1;j<tkey.NFields();j++){printf("%5d ",tkey.Get(j));}
+ printf("\"%s\"\n",(char *)tkey.GetWord());
+ }
+ }
+
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.h b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.h
new file mode 100644
index 00000000..1f23d5ff
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.h
@@ -0,0 +1,508 @@
+//
+// WordDBPage.h
+//
+// WordDBPage: Implements specific compression scheme for
+// Berkeley DB pages containing WordReferences objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBPage.h,v 1.8 2004/05/28 13:15:26 lha Exp $
+//
+//
+// Access to Berkeley DB internal
+//
+
+#ifndef _WordDBPage_h_
+#define _WordDBPage_h_
+
+extern "C"
+{
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "db_page.h"
+#include "common_ext.h"
+}
+
+#include "WordDBCompress.h"
+#include "WordBitCompress.h"
+#include "WordRecord.h"
+#include "WordKey.h"
+
+
+#define WORD_ALIGN_TO(v,a) ( (v)%(a) ? (v+((a)-(v)%(a))) : v)
+#define NBITS_KEYLEN 16
+#define NBITS_DATALEN 16
+
+// ***********************************************
+// *************** WordDBRecord *****************
+// ***********************************************
+
+// WordRecord with added functionalities to help with compression/decompression
+class WordDBRecord : public WordRecord
+{
+public:
+
+ // retreive WordRecord data/stats from coded numbers
+ void set_decompress(unsigned int **data,int *indexes,int i,int pdata,int pstat0,int pstat1)
+ {
+ if(i>=indexes[pstat0])
+ {// were pas the end of coded stats, so this can't be a stat
+ type=DefaultType();
+ if(type==WORD_RECORD_DATA){info.data=data[pdata][i-indexes[pstat0]];}
+ else{info.data=0;}
+ }
+ else
+ {// this is a stat
+ type=WORD_RECORD_STATS;
+ info.stats.noccurrence=data[pstat0][i];
+ info.stats.ndoc =data[pstat1][i];
+ }
+ }
+ WordDBRecord():WordRecord(){;}
+ WordDBRecord(byte *dat,int len,int rectyp):WordRecord()
+ {
+ type=(rectyp ? DefaultType() : WORD_RECORD_STATS);
+ Unpack(String((char *)dat,len));
+ }
+ WordDBRecord(BKEYDATA *ndata,int rectyp):WordRecord()
+ {// typ: 0->stat 1->data
+ type=(rectyp ? DefaultType() : WORD_RECORD_STATS);
+ Unpack(String((char *)ndata->data,ndata->len));
+ }
+};
+
+
+// ***********************************************
+// **************** WordDBKey *****************
+// ***********************************************
+
+// WordKey with added functionalities to help with compression/decompression
+class WordDBKey : public WordKey
+{
+ BKEYDATA *key;
+public:
+
+ int RecType(){return (GetWord()[0]!=1 ? 1 :0);}
+ WordDBKey():WordKey()
+ {
+ key=NULL;
+ }
+ WordDBKey(BKEYDATA *nkey):WordKey()
+ {
+ key=nkey;
+ Unpack(String((char *)key->data,key->len));
+ }
+ int is_null()
+ {
+ errr("UNUSED");
+ if(GetWord().length()==0)
+ {
+ for(int j=1;j<NFields();j++)
+ {if(Get(j)!=0){errr("WordDBKey::is_null key has 0 len word but is not null");}}
+ return 1;
+ }
+ return 0;
+ }
+ WordDBKey(BINTERNAL *nkey):WordKey()
+ {
+ key=NULL;
+ if(nkey->len==0)
+ {
+ ;// errr("WordDBKey::WordDBKey(BINTERNAL) : nkey->len==0");
+ }
+ else{Unpack(String((char *)nkey->data,nkey->len));}
+ }
+ WordDBKey(byte *data,int len):WordKey()
+ {
+ key=NULL;
+ if(!data || !len){errr("WordDBKey::WordDBKey(data,len) !data || !len");}
+ Unpack(String((char *)data,len));
+ }
+};
+
+
+// ***********************************************
+// **************** WordDBPage *****************
+// ***********************************************
+
+// encapsulation of Berkeley DB BTREE page.
+// this one knows how to compress/decompress itself
+class WordDBPage
+{
+ public:
+ int n; // number of entries
+ int nk; // number of keys
+ int type; // for now 3(btreeinternal) && 5(leave:normal case) are allowed
+ int pgsz;
+
+ PAGE *pg; // pointer to BerkeleyDB BTREE page structure
+
+ // assert this page is a leave
+ void isleave()
+ {
+ if(type!=P_LBTREE){errr("WordDBPage::isleave: trying leave specific on non leave");}
+ }
+
+ // assert this page is an internal (non-leave) page
+ void isintern()
+ {
+ if(type!=P_IBTREE){errr("WordDBPage::isintern: trying btreeinternal specific on non btreeinternal page type");}
+
+ }
+
+ // get the i'th key stored in this page
+ WordDBKey get_WordDBKey(int i)
+ {
+ if(type==P_LBTREE){return(WordDBKey(key(i)));}
+ else
+ if(type==P_IBTREE){return(WordDBKey(btikey(i)));}
+ else
+ {errr("WordDBPage:get_WordDBKey: bad page type");}
+ return WordDBKey();
+ }
+
+ // ******************* Accessors to packed entries ****************
+
+ // get the i'th key stored in this (internal==nonleave) page. (ptr to packed)
+ BINTERNAL *btikey(int i)
+ {
+ if(i<0 || i>=pg->entries){printf("btikey:%d\n",i);errr("WordDBPage::btikey out iof bounds");}
+ isintern();return(GET_BINTERNAL(pg,i ));
+ }
+ // get the i'th entry stored in this (nonleave) page. (ptr to packed)
+ // an entry can either be a key or a data entry
+ BKEYDATA *entry (int i)
+ {
+ if(i<0 || i>=pg->entries){printf("entry:%d\n",i);errr("WordDBPage::entry out iof bounds");}
+ isleave(); return(GET_BKEYDATA (pg,i ));
+ }
+ // get the i'th key stored in this (leave) page. (ptr to packed)
+ BKEYDATA *key (int i)
+ {
+ if(i<0 || 2*i>=pg->entries){printf("key:%d\n",i);errr("WordDBPage::key out iof bounds");}
+ isleave(); return(GET_BKEYDATA (pg,i*2 ));
+ }
+ // get the i'th data stored in this (leave) page. (ptr to packed)
+ BKEYDATA *data (int i)
+ {
+ if(i<0 || 2*i+1>=pg->entries){printf("data:%d\n",i);errr("WordDBPage::data out iof bounds");}
+ isleave(); return(GET_BKEYDATA (pg,i*2+1));
+ }
+
+
+ // ********************* Inserting entries into a page ***************
+
+ int insert_pos; // offset in page of last inserted entry
+ int insert_indx; // index of next entry to be inserted
+
+ int e_offset(int i) {return((int)(pg->inp[i]));}
+
+ // allocate space (in the db page) for adding an entry to this page
+ void *alloc_entry(int size)
+ {
+ size=WORD_ALIGN_TO(size,4);
+ int inp_pos=((byte *)&(pg->inp[insert_indx]))-(byte *)pg;
+ insert_pos-=size;
+ if(insert_pos<=inp_pos)
+ {
+ show();
+ printf("alloc_entry: allocating size:%4d entrynum:insert_indx:%4d at:insert_pos:%4d\n",size,insert_indx,insert_pos);
+ errr("WordDBPage::alloc_entry: PAGE OVERFLOW");
+ }
+ pg->inp[insert_indx++]=insert_pos;
+ return((void *)((byte *)pg+insert_pos));
+ }
+
+
+ // add a data entry to this page
+ void insert_data(WordDBRecord &wrec)
+ {
+ isleave();
+ if(!(insert_indx%2)){errr("WordDBPage::insert_data data must be an odd number!");}
+ String prec;
+ wrec.Pack(prec);
+ int len=prec.length();
+ int size=len+(sizeof(BKEYDATA)-1);
+
+ BKEYDATA *dat=(BKEYDATA *)alloc_entry(size);
+ dat->len=len;
+ dat->type=1;//!!!!!!!!!!!!!
+ memcpy((void *)dat->data,(void *)(char *)prec,len);
+ }
+ // add a key entry to this page
+ void insert_key(WordDBKey &ky)
+ {
+ isleave();
+ if(insert_indx%2){errr("WordDBPage::insert_key key must be an even number!");}
+ String pkey;
+ ky.Pack(pkey);
+ int keylen=pkey.length();
+ int size=keylen+(sizeof(BKEYDATA)-1);
+ BKEYDATA *bky=(BKEYDATA *)alloc_entry(size);
+ bky->len=keylen;
+ bky->type=1;// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ memcpy((void *)bky->data,(void *)(char *)pkey,keylen);
+ }
+ // add a key entry to this internal page
+ void insert_btikey(WordDBKey &ky,BINTERNAL &bti,int empty=0)
+ {
+ isintern();
+ int keylen=0;
+ String pkey;
+ if(!empty)
+ {
+ ky.Pack(pkey);
+ keylen=pkey.length();
+ }
+ int size=keylen+((byte *)&(bti.data))-((byte *)&bti);// pos of data field in BINTERNAL
+ if(empty)
+ {
+ if(verbose){printf("WordDBPage::insert_btikey: empty : BINTERNAL:%d datapos:%d keylen:%d size:%d alligned to:%d\n",(int)sizeof(BINTERNAL),
+ (int)(((byte *)&(bti.data))-((byte *)&bti)),
+ keylen,size,WORD_ALIGN_TO(size,4));}
+ }
+
+ BINTERNAL *btik=(BINTERNAL *)alloc_entry(size);
+ btik->len =(empty ? 0 : keylen);
+ btik->type=1;// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ btik->pgno =bti.pgno;
+ btik->nrecs=bti.nrecs;
+ if(!empty){memcpy((void *)btik->data,(void *)(char *)pkey,keylen);}
+// else
+// {btik->data[0]=0;}// just to avoid uninit memory read
+ }
+ int entry_struct_size()
+ {
+ return(type==P_IBTREE ? sizeof(BINTERNAL) : sizeof(BKEYDATA ) )-1;
+ }
+ int entry_size(int i)
+ {
+ return entry_struct_size() + (type==P_IBTREE ? btikey(i)->len : key(i)->len );
+ }
+
+
+
+
+
+ // ************** Comrpession/Uncompression ***************************
+
+ // The compression functions
+ void Compress_extract_vals_wordiffs(int *nums,int *nums_pos,int nnums,HtVector_byte &wordiffs);
+ void Compress_show_extracted(int *nums,int *nums_pos,int nnums,HtVector_byte &wordiffs);
+ void Compress_vals(Compressor &out,int *nums,int *nums_pos,int nnums);
+ void Compress_vals_changed_flags(Compressor &out,unsigned int *cflags,int n);
+ void Compress_header(Compressor &out);
+ int Compress_main(Compressor &out);
+ Compressor *Compress(int debug=0, DB_CMPR_INFO *cmprInfo=NULL);
+
+ // The uncompression functions
+ int Uncompress(Compressor *pin,int debug=0, DB_CMPR_INFO *cmprInfo=NULL);
+ int Uncompress_main(Compressor *pin);
+ void Uncompress_vals_chaged_flags(Compressor &in,unsigned int **pcflags,int *pn);
+ int Uncompress_header(Compressor &in);
+ void Uncompress_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums,byte *rworddiffs,int nrworddiffs);
+ void Uncompress_show_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums,byte *rworddiffs,int nrworddiffs);
+
+ int TestCompress(int debuglevel);
+ int Compare(WordDBPage &other);
+
+ // the following functions are use to compress/uncompress
+ // keys/data directly
+ // This is necesary for the first key/data elements of the page
+ void compress_key(Compressor &out,int i)
+ {
+ if(type==P_IBTREE)
+ {
+ int len=btikey(i)->len;
+ out.put_uint(len,NBITS_KEYLEN,label_str("seperatekey_len",i));
+ if(verbose){printf("WordDBPage::compress_key:compress(typ3):%d ::: sizeof(BINTERNAL):%d\n",len,(int)sizeof(BINTERNAL));}
+ out.put_uint(btikey(i)->len ,sizeof(btikey(i)->len )*8,label_str("seperatekey_bti_len" ,i));
+ out.put_uint(btikey(i)->type ,sizeof(btikey(i)->type )*8,label_str("seperatekey_bti_type" ,i));
+ out.put_uint(btikey(i)->pgno ,sizeof(btikey(i)->pgno )*8,label_str("seperatekey_bti_pgno" ,i));
+ out.put_uint(btikey(i)->nrecs,sizeof(btikey(i)->nrecs)*8,label_str("seperatekey_bti_nrecs",i));
+ if(len){out.put_zone((byte *)btikey(i)->data,8*len,label_str("seperatekey_btidata",i));}
+ }
+ else
+ {
+ int len=key(i)->len;
+ out.put_uint(len,NBITS_KEYLEN,label_str("seperatekey_len",i));
+ if(verbose){printf("WordDBPage::compress_key: compress(typ5):%d\n",len);}
+ out.put_zone((byte *)key(i)->data,8*len,label_str("seperatekey_data",i));
+ }
+ }
+ void compress_data(Compressor &out,int i)
+ {
+ int len=data(i)->len;
+ out.put_uint(len,NBITS_DATALEN,label_str("seperatedata_len",i));
+ if(verbose){printf("WordDBPage::compress_data: compressdata(typ5):%d\n",len);}
+ out.put_zone((byte *)data(i)->data,8*len,label_str("seperatedata_data",i));
+ }
+ WordDBKey uncompress_key(Compressor &in,int i)
+ {
+ WordDBKey res;
+ int len=in.get_uint(NBITS_KEYLEN,label_str("seperatekey_len",i));
+ if(verbose){printf("WordDBPage::uncompress_key: seperatekey:len:%d\n",len);}
+
+ if(type==P_IBTREE)
+ {
+ if(len==0 && i!=0){errr("WordDBPage::uncompress_key: keylen=0 && i!=0");}
+ BINTERNAL bti;
+ bti.len =in.get_uint(sizeof(bti.len )*8,label_str("seperatekey_bti_len" ,i));
+ bti.type =in.get_uint(sizeof(bti.type )*8,label_str("seperatekey_bti_type" ,i));
+ bti.pgno =in.get_uint(sizeof(bti.pgno )*8,label_str("seperatekey_bti_pgno" ,i));
+ bti.nrecs=in.get_uint(sizeof(bti.nrecs)*8,label_str("seperatekey_bti_nrecs",i));
+ if(len!=bti.len){errr("WordDBPage::uncompress_key: incoherence: len!=bti.len");}
+ if(len)
+ {
+ byte *gotdata=new byte[len];
+ CHECK_MEM(gotdata);
+ in.get_zone(gotdata,8*len,label_str("seperatekey_btidata",i));
+ res=WordDBKey(gotdata,len);
+ delete [] gotdata;
+ }
+ insert_btikey(res,bti,(len==0 ? 1:0));
+ }
+ else
+ {
+ byte *gotdata=new byte[len];
+ CHECK_MEM(gotdata);
+ in.get_zone(gotdata,8*len,label_str("seperatekey_data",i));
+ res=WordDBKey(gotdata,len);
+ insert_key(res);
+ delete [] gotdata;
+ }
+ return res;
+ }
+ WordDBRecord uncompress_data(Compressor &in,int i,int rectyp)
+ {
+ WordDBRecord res;
+ int len=in.get_uint(NBITS_DATALEN,label_str("seperatedata_len",i));
+ if(verbose)printf("uncompressdata:len:%d\n",len);
+ byte *gotdata=new byte[len];
+ CHECK_MEM(gotdata);
+ in.get_zone(gotdata,8*len,label_str("seperatedata_data",i));
+ res=WordDBRecord(gotdata,len,rectyp);
+ insert_data(res);
+ delete [] gotdata;
+ return res;
+ }
+
+
+ // exctracted numerical fields
+
+ const char* number_field_label(int j)
+ {
+ if(j>0 && j<WordKey::NFields()){return (char *)(WordKey::Info()->sort[j].name);}
+ if( j==CNFLAGS )return "CNFLAGS " ;
+ if( j==CNDATASTATS0 )return "CNDATASTATS0 " ;
+ if( j==CNDATASTATS1 )return "CNDATASTATS1 " ;
+ if( j==CNDATADATA )return "CNDATADATA " ;
+ if( j==CNBTIPGNO )return "CNBTIPGNO " ;
+ if( j==CNBTINRECS )return "CNBTINRECS " ;
+ if( j==CNWORDDIFFPOS )return "CNWORDDIFFPOS" ;
+ if( j==CNWORDDIFFLEN )return "CNWORDDIFFLEN" ;
+ return "BADFIELD";
+ }
+ // positions of different fileds in
+ // number arrays that are extracted
+ int CNFLAGS ;// FLAGS: which key-fields have changed
+ int CNFIELDS ;// first numerical field
+ int CNDATASTATS0 ;// word record - stats element 0
+ int CNDATASTATS1 ;// word record - stats element 1
+ int CNDATADATA ;// word record - data
+ int CNBTIPGNO ;// internal page: page pointed at by node
+ int CNBTINRECS ;// internal page: ??
+ int CNWORDDIFFPOS ;// position of first caracter that changed in word
+ int CNWORDDIFFLEN ;// number of chars that changed in word
+ int nnums ;
+
+
+ // ************** DEBUGING/BENCHMARKING ***************
+ void show();
+ int verbose;
+ int debug;
+
+
+ // ************** Initialization/Destruction *****************
+
+ // initialize when header is valid
+ void init()
+ {
+ type=pg->type;
+ n=pg->entries;
+ nk=(type==P_LBTREE ? n/2 : n);
+ insert_pos=pgsz;
+ insert_indx=0;
+ }
+
+ void init0()
+ {
+ CNFLAGS =0;
+ CNFIELDS =1;
+ CNDATASTATS0 = WordKey::NFields() ;
+ CNDATASTATS1 = WordKey::NFields() + 1;
+ CNDATADATA = WordKey::NFields() + 2;
+ CNBTIPGNO = WordKey::NFields() + 3;
+ CNBTINRECS = WordKey::NFields() + 4;
+ CNWORDDIFFPOS = WordKey::NFields() + 5;
+ CNWORDDIFFLEN = WordKey::NFields() + 6;
+ nnums=(CNWORDDIFFLEN+1);
+
+ pg=NULL;
+ pgsz=0;
+ n=0;
+ nk=0;
+ type=-1;
+ verbose=0;
+ debug=0;
+ insert_pos=pgsz;
+ insert_indx=0;
+ }
+
+ // db page was created here, destroy it
+ void delete_page()
+ {
+ if(!pg){errr("WordDBPage::delete_page: pg==NULL");}
+ delete [] pg;
+ pg=NULL;
+ }
+ // unlink db page from this encapsulation
+ void unset_page()
+ {
+ if(!pg){errr("WordDBPage::unset_page: pg==NULL");}
+ pg=NULL;
+ }
+ // the DB page must be unset or deleted
+ // before destroying this encapsulation
+ ~WordDBPage()
+ {
+ if(pg){errr("WordDBPage::~WordDBPage: page not empty");}
+ }
+ WordDBPage(int npgsz)
+ {
+ init0();
+ pgsz=npgsz;
+ pg=(PAGE *)(new byte[pgsz]);
+ CHECK_MEM(pg);
+ insert_pos=pgsz;
+ insert_indx=0;
+ }
+ WordDBPage(const u_int8_t* buff,int buff_length)
+ {
+ init0();
+ pg=(PAGE *)buff;
+ pgsz=buff_length;
+ insert_pos=pgsz;
+ insert_indx=0;
+ init();
+ }
+};
+
+
+#endif// _WordDBPage_h_
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDead.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDead.cc
new file mode 100644
index 00000000..ff5e5250
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDead.cc
@@ -0,0 +1,123 @@
+//
+// WordDead.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDead.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+
+#include "WordDead.h"
+#include "WordListOne.h"
+
+class WordDeadCursor {
+public:
+ WordDBCursor* cursor;
+};
+
+WordDead::~WordDead()
+{
+ delete db;
+ delete mask;
+}
+
+int WordDead::Initialize(WordList* nwords)
+{
+ words = nwords;
+ db = new WordDB(nwords->GetContext()->GetDBInfo());
+ mask = new WordKey(words->GetContext());
+ return OK;
+}
+
+int WordDead::Open()
+{
+ const String& filename = words->Filename();
+ int flags = words->Flags();
+
+ db->set_pagesize(words->Pagesize());
+
+ return db->Open(filename, "dead", DB_BTREE, flags, 0666, WORD_DB_DEAD) == 0 ? OK : NOTOK;
+}
+
+int WordDead::Remove()
+{
+ return db->Remove(words->Filename(), "dead") == 0 ? OK : NOTOK;
+}
+
+int WordDead::Close()
+{
+ return db->Close() == 0 ? OK : NOTOK;
+}
+
+int WordDead::Normalize(WordKey& key) const
+{
+ int nfields = words->GetContext()->GetKeyInfo().nfields;
+ int i;
+ //
+ // Undefine in 'key' all fields not defined in 'mask'
+ //
+ for(i = 0; i < nfields; i++) {
+ if(!mask->IsDefined(i))
+ key.Set(i, WORD_KEY_VALUE_INVALID);
+ }
+
+ return OK;
+}
+
+int WordDead::Exists(const WordKey& key) const
+{
+ WordKey tmp_key = key;
+
+ Normalize(tmp_key);
+
+ String coded;
+ String dummy;
+
+ tmp_key.Pack(coded);
+
+ return db->Get(0, coded, dummy, 0) == 0;
+}
+
+int WordDead::Put(const WordKey& key) const
+{
+ WordKey tmp_key = key;
+
+ Normalize(tmp_key);
+
+ String coded;
+ String dummy;
+
+ tmp_key.Pack(coded);
+
+ return db->Put(0, coded, dummy, 0) == 0 ? OK : NOTOK;
+}
+
+WordDeadCursor* WordDead::Cursor() const
+{
+ WordDeadCursor* cursor = new WordDeadCursor;
+ cursor->cursor = db->Cursor();
+
+ return cursor;
+}
+
+int WordDead::Next(WordDeadCursor* cursor, WordKey& key)
+{
+ String coded;
+ String dummy;
+ int ret = cursor->cursor->Get(coded, dummy, DB_NEXT);
+ if(ret != 0) {
+ delete cursor->cursor;
+ delete cursor;
+ } else {
+ key.Unpack(coded);
+ }
+ return ret;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDead.h b/debian/htdig/htdig-3.2.0b6/htword/WordDead.h
new file mode 100644
index 00000000..a9a6e2ed
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDead.h
@@ -0,0 +1,70 @@
+//
+// WordDead.h
+//
+// NAME
+//
+// list of documents that must be ignored and then deleted from the index.
+//
+// SYNOPSIS
+//
+// Helper for the WordList class.
+//
+// DESCRIPTION
+//
+// WordDead is a list of WordKey entries describing deleted documents.
+// All inverted index entries that match a WordKey entry of the WordDead
+// list are treated as if they do not appear in the inverted index.
+//
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDead.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordDead_h_
+#define _WordDead_h_
+
+#include <stdio.h>
+
+#include "htString.h"
+#include "WordDB.h"
+
+class WordList;
+class WordDeadCursor;
+
+class WordDead
+{
+ public:
+ WordDead() { words = 0; db = 0; mask = 0; }
+ ~WordDead();
+
+ int Initialize(WordList* words);
+
+ int Open();
+ int Remove();
+ int Close();
+
+ int Mask(const WordKey& nmask) { *mask = nmask; return OK; }
+
+ List* Words() const;
+
+ int Normalize(WordKey& key) const;
+ int Exists(const WordKey& key) const;
+ int Put(const WordKey& key) const;
+
+ WordDeadCursor* Cursor() const;
+ int Next(WordDeadCursor* cursor, WordKey& key);
+
+ private:
+ WordList* words;
+ WordDB* db;
+ WordKey* mask;
+};
+#endif /* _WordDead_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc
new file mode 100644
index 00000000..85bac6f5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc
@@ -0,0 +1,274 @@
+//
+// WordDict.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDict.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+
+#include "WordDict.h"
+#include "WordListOne.h"
+
+#define WORD_DICT_CURSOR_FIRST 1
+#define WORD_DICT_CURSOR_NEXT 2
+
+class WordDictCursor {
+public:
+ int info;
+ String prefix;
+ WordDBCursor* cursor;
+};
+
+WordDict::~WordDict()
+{
+ delete db;
+}
+
+int WordDict::Initialize(WordList* nwords)
+{
+ words = nwords;
+ db = new WordDB(nwords->GetContext()->GetDBInfo());
+ return OK;
+}
+
+int WordDict::Open()
+{
+ const String& filename = words->Filename();
+ int flags = words->Flags();
+
+ db->set_pagesize(words->Pagesize());
+
+ return db->Open(filename, "dict", DB_BTREE, flags, 0666, WORD_DB_DICT) == 0 ? OK : NOTOK;
+}
+
+int WordDict::Remove()
+{
+ return db->Remove(words->Filename(), "dict") == 0 ? OK : NOTOK;
+}
+
+int WordDict::Close()
+{
+ return db->Close() == 0 ? OK : NOTOK;
+}
+
+int WordDict::Serial(const String& word, unsigned int& serial)
+{
+ int ret;
+ WordDictRecord entry;
+ if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+ if(ret == DB_NOTFOUND) {
+ words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
+ if(entry.Put(db, word) != 0) return NOTOK;
+ }
+ serial = entry.id;
+
+ return OK;
+}
+
+int WordDict::SerialExists(const String& word, unsigned int& serial)
+{
+ int ret;
+ WordDictRecord entry;
+ if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+
+ serial = ret == DB_NOTFOUND ? WORD_DICT_SERIAL_INVALID : entry.id;
+
+ return OK;
+}
+
+int WordDict::SerialRef(const String& word, unsigned int& serial)
+{
+ int ret;
+ WordDictRecord entry;
+ if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+ if(ret == DB_NOTFOUND)
+ words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
+ entry.count++;
+ if(entry.Put(db, word) != 0) return NOTOK;
+ serial = entry.id;
+
+ return OK;
+}
+
+int WordDict::Noccurrence(const String& word, unsigned int& noccurrence) const
+{
+ if(word.empty()) {
+ fprintf(stderr, "WordDict::Noccurrence: null word\n");
+ return NOTOK;
+ }
+ WordDictRecord entry;
+ noccurrence = 0;
+ int ret;
+ if((ret = entry.Get(db, word)) != 0) {
+ if(ret != DB_NOTFOUND)
+ return NOTOK;
+ }
+ noccurrence = entry.count;
+
+ return OK;
+}
+
+int WordDict::Normalize(String& word) const
+{
+ const WordType& wtype = words->GetContext()->GetType();
+
+ return wtype.Normalize(word);
+}
+
+int WordDict::Incr(const String& word, unsigned int incr)
+{
+ int ret;
+ WordDictRecord entry;
+ if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+ if(ret == DB_NOTFOUND)
+ words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
+ entry.count += incr;
+ if(entry.Put(db, word) != 0) return NOTOK;
+ return OK;
+}
+
+int WordDict::Decr(const String& word, unsigned int decr)
+{
+ WordDictRecord entry;
+ int ret;
+ if((ret = entry.Get(db, word)) != 0) {
+ if(ret == DB_NOTFOUND)
+ fprintf(stderr, "WordDict::Unref(%s) Unref on non existing word occurrence\n", (const char*)word);
+ return NOTOK;
+ }
+ entry.count -= decr;
+ if(entry.count > 0)
+ ret = entry.Put(db, word) == 0 ? OK : NOTOK;
+ else
+ ret = entry.Del(db, word) == 0 ? OK : NOTOK;
+
+ return ret;
+}
+
+int WordDict::Put(const String& word, unsigned int noccurrence)
+{
+ int ret;
+ WordDictRecord entry;
+ if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+ if(ret == DB_NOTFOUND)
+ words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
+ entry.count = noccurrence;
+ if(entry.Put(db, word) != 0) return NOTOK;
+ return OK;
+}
+
+List *WordDict::Words() const
+{
+ String key;
+ String coded;
+ WordDBCursor* cursor = db->Cursor();
+ List* list = new List;
+
+ while(cursor->Get(key, coded, DB_NEXT) == 0)
+ list->Add(new String(key));
+
+ delete cursor;
+
+ return list;
+}
+
+int WordDict::Exists(const String& word) const
+{
+ String tmp_word = word;
+ String coded;
+
+ return db->Get(0, tmp_word, coded, 0) == 0;
+}
+
+WordDictCursor* WordDict::Cursor() const
+{
+ WordDictCursor* cursor = new WordDictCursor;
+ cursor->cursor = db->Cursor();
+
+ return cursor;
+}
+
+int WordDict::Next(WordDictCursor* cursor, String& word, WordDictRecord& record)
+{
+ String coded;
+ int ret = cursor->cursor->Get(word, coded, DB_NEXT);
+ if(ret != 0) {
+ delete cursor->cursor;
+ delete cursor;
+ } else {
+ record.Unpack(coded);
+ }
+ return ret;
+}
+
+WordDictCursor* WordDict::CursorPrefix(const String& prefix) const
+{
+ WordDictCursor* cursor = new WordDictCursor;
+ cursor->cursor = db->Cursor();
+ cursor->prefix = prefix;
+ cursor->info = WORD_DICT_CURSOR_FIRST;
+
+ return cursor;
+}
+
+int WordDict::NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record)
+{
+ String coded;
+ int ret;
+ if(cursor->info == WORD_DICT_CURSOR_FIRST) {
+ word = cursor->prefix;
+ ret = cursor->cursor->Get(word, coded, DB_SET_RANGE);
+ cursor->info = WORD_DICT_CURSOR_NEXT;
+ } else {
+ ret = cursor->cursor->Get(word, coded, DB_NEXT);
+ }
+ //
+ // Stop walking when 1) DB_NOTFOUND, 2) the word found is shorter than
+ // the required prefix, 3) the word found does not start with the
+ // required prefix.
+ //
+ if(ret != 0 ||
+ cursor->prefix.length() > word.length() ||
+ strncmp(cursor->prefix.get(), word.get(), cursor->prefix.length())) {
+ delete cursor->cursor;
+ delete cursor;
+ if(ret == 0) ret = DB_NOTFOUND;
+ } else {
+ record.Unpack(coded);
+ }
+ return ret;
+}
+
+int WordDict::Write(FILE* f)
+{
+ WordDBCursor* cursor = db->Cursor();
+ String key;
+ String coded;
+ unsigned int occurrence;
+ unsigned int id;
+
+ while(cursor->Get(key, coded, DB_NEXT) == 0) {
+ int offset = 0;
+ coded.ber_shift(offset, occurrence);
+ coded.ber_shift(offset, id);
+ fprintf(f, "%s %d %d\n", (char*)key, id, occurrence);
+ }
+
+ delete cursor;
+
+ return OK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDict.h b/debian/htdig/htdig-3.2.0b6/htword/WordDict.h
new file mode 100644
index 00000000..86b45717
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDict.h
@@ -0,0 +1,252 @@
+//
+// WordDict.h
+//
+// NAME
+//
+// manage and use an inverted index dictionary.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// WordList* words = ...;
+// WordDict* dict = words->Dict();
+//
+// DESCRIPTION
+//
+// WordDict maps strings to unique identifiers and frequency in the
+// inverted index. Whenever a new word is found, the WordDict class
+// can be asked to assign it a serial number. When doing so, an entry
+// is created in the dictionary with a frequency of zero. The application
+// may then increment or decrement the frequency to reflect the inverted
+// index content.
+//
+// The serial numbers range from 1 to 2^32 inclusive.
+//
+// A WordDict object is automatically created by the WordList object and
+// should not be created directly by the application.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDict.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordDict_h_
+#define _WordDict_h_
+
+#include <stdio.h>
+
+#ifndef SWIG
+#include "htString.h"
+#include "WordDB.h"
+
+class WordList;
+class WordDictCursor;
+
+#define WORD_DICT_SERIAL_INVALID 0
+
+class WordDictRecord {
+ public:
+ inline WordDictRecord() { count = 0; id = WORD_DICT_SERIAL_INVALID; }
+
+ inline int Unpack(const String& coded) {
+ int offset = 0;
+ coded.ber_shift(offset, count);
+ coded.ber_shift(offset, id);
+ return OK;
+ }
+
+ inline int Pack(String& coded) const {
+ int offset = 0;
+ coded.ber_push(offset, count);
+ coded.ber_push(offset, id);
+ return OK;
+ }
+
+ inline int Get(WordDB* db, const String& word) {
+ String tmp_word = word;
+ String coded(BER_MAX_BYTES * 2);
+ int ret;
+ if((ret = db->Get(0, tmp_word, coded, 0)) != 0) return ret;
+
+ Unpack(coded);
+
+ return ret;
+ }
+
+ inline int Put(WordDB* db, const String& word) {
+ String coded(BER_MAX_BYTES * 2);
+ Pack(coded);
+ return db->Put(0, word, coded, 0);
+ }
+
+ inline int Del(WordDB* db, const String& word) {
+ return db->Del(0, word);
+ }
+
+ inline unsigned int Count() { return count; }
+ inline unsigned int Id() { return id; }
+
+ unsigned int count;
+ unsigned int id;
+};
+#endif /* SWIG */
+
+class WordDict
+{
+ public:
+#ifndef SWIG
+ //-
+ // Private constructor.
+ //
+ WordDict() { words = 0; db = 0; }
+ ~WordDict();
+
+ //-
+ // Bind the object a WordList inverted index. Return OK on success,
+ // NOTOK otherwise.
+ //
+ int Initialize(WordList* words);
+
+ //-
+ // Open the underlying Berkeley DB sub-database. The enclosing
+ // file is given by the <i>words</i> data member. Return OK on success,
+ // NOTOK otherwise.
+ //
+ int Open();
+ //-
+ // Destroy the underlying Berkeley DB sub-database. Return OK on success,
+ // NOTOK otherwise.
+ //
+ int Remove();
+ //-
+ // Close the underlying Berkeley DB sub-database. Return OK on success,
+ // NOTOK otherwise.
+ //
+ int Close();
+
+ //-
+ // If the <b>word</b> argument exists in the dictionnary, return its
+ // serial number in the <b>serial</b> argument. If it does not already
+ // exists, assign it a serial number, create an entry with a frequency
+ // of zero and return the new serial in the <b>serial</b> argument.
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Serial(const String& word, unsigned int& serial);
+ //-
+ // If the <b>word</b> argument exists in the dictionnary, return its
+ // serial number in the <b>serial</b> argument. If it does not exists
+ // set the <b>serial</b> argument to WORD_DICT_SERIAL_INVALID.
+ // Return OK on success, NOTOK otherwise.
+ //
+ int SerialExists(const String& word, unsigned int& serial);
+ //-
+ // Short hand for Serial() followed by Ref().
+ // Return OK on success, NOTOK otherwise.
+ //
+ int SerialRef(const String& word, unsigned int& serial);
+ //-
+ // Return the frequency of the <b>word</b> argument
+ // in the <b>noccurrence</b> argument.
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Noccurrence(const String& word, unsigned int& noccurrence) const;
+#endif /* SWIG */
+
+ //-
+ // Short hand for words->GetContext()->GetType()->Normalize(word).
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Normalize(String& word) const;
+
+ //-
+ // Short hand for Incr(word, 1)
+ //
+ int Ref(const String& word) { return Incr(word, 1); }
+ //-
+ // Add <b>incr</b> to the frequency of the <b>word</b>.
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Incr(const String& word, unsigned int incr);
+ //-
+ // Short hand for Decr(word, 1)
+ //
+ int Unref(const String& word) { return Decr(word, 1); }
+ //-
+ // Subtract <b>decr</b> to the frequency of the <b>word</b>. If
+ // the frequency becomes lower or equal to zero, remove the entry
+ // from the dictionnary and lose the association between the word and its
+ // serial number.
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Decr(const String& word, unsigned int decr);
+ //-
+ // Set the frequency of <b>word</b> with the value of the <b>noccurrence</b>
+ // argument.
+ //
+ int Put(const String& word, unsigned int noccurrence);
+
+ //-
+ // Return true if <b>word</b> exists in the dictionnary, false otherwise.
+ //
+ int Exists(const String& word) const;
+
+#ifndef SWIG
+ //-
+ // Return a pointer to the associated WordList object.
+ //
+ List* Words() const;
+
+ //-
+ // Return a cursor to sequentially walk the dictionnary using the
+ // <b>Next</b> method.
+ //
+ WordDictCursor* Cursor() const;
+ //-
+ // Return the next entry in the dictionnary. The <b>cursor</b> argument
+ // must have been created using the <i>Cursor</i> method. The word is
+ // returned in the <b>word</b> argument and the record is returned in
+ // the <b>record</b> argument.
+ // On success the function returns 0, at the end of the dictionnary it
+ // returns DB_NOTFOUND. The <b>cursor</b> argument is deallocated when
+ // the function hits the end of the dictionnary or an error occurs.
+ //
+ int Next(WordDictCursor* cursor, String& word, WordDictRecord& record);
+
+ //-
+ // Return a cursor to sequentially walk the entries of the dictionnary
+ // that start with the <b>prefix</b> argument, using the
+ // <b>NextPrefix</b> method.
+ //
+ WordDictCursor* CursorPrefix(const String& prefix) const;
+ //-
+ // Return the next prefix from the dictionnary. The <b>cursor</b> argument
+ // must have been created using the <i>CursorPrefix</i> method. The word is
+ // returned in the <b>word</b> argument and the record is returned in
+ // the <b>record</b> argument. The <b>word</b> is guaranteed to start with
+ // the prefix specified to the <b>CursorPrefix</b> method.
+ // On success the function returns 0, at the end of the dictionnary it
+ // returns DB_NOTFOUND. The <b>cursor</b> argument is deallocated when
+ // the function hits the end of the dictionnary or an error occurs.
+ //
+ int NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record);
+
+ //-
+ // Dump the complete dictionary in the file descriptor <b>f.</b> The
+ // format of the dictionary is <i>word serial frequency</i>, one by
+ // line.
+ //
+ int Write(FILE* f);
+
+ private:
+ WordList* words;
+ WordDB* db;
+#endif /* SWIG */
+};
+#endif /* _WordDict_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordKey.cc b/debian/htdig/htdig-3.2.0b6/htword/WordKey.cc
new file mode 100644
index 00000000..413faaac
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordKey.cc
@@ -0,0 +1,673 @@
+//
+// WordKey.cc
+//
+// WordKey: All the functions are implemented regardless of the actual
+// structure of the key using word_key_info.
+// WARNING: although it may seem that you can have two String
+// fields in the key, some code does not support that. This should
+// not be a problem since the goal of the WordKey class is to
+// implement the keys of an inverted index.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordKey.cc,v 1.9 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "WordKey.h"
+
+//
+// Returns OK if fields set in 'object' and 'other' are all equal.
+//
+// Fields not set in either 'object' or 'other' are ignored
+// completely. If the prefix_length is > 0 the 'object' String
+// fields are compared to the prefix_length bytes of the 'other'
+// String fields only.
+//
+// This function is useful to compare existing keys with a search
+// criterion that may be incomplete. For instance if we look for keys
+// that contain words starting with a given prefix or keys that
+// are located in a specific document, regardless of their location
+// in the document.
+//
+int WordKey::Equal(const WordKey& other) const
+{
+ const WordKeyInfo& info = *WordKey::Info();
+ //
+ // Walk the fields in sorting order. As soon as one of them
+ // does not compare equal, return.
+ //
+ for(int j = 0; j < info.nfields; j++)
+ {
+ //
+ // Only compare fields that are set in both key
+ //
+ if(!IsDefined(j) || !other.IsDefined(j)) continue;
+
+ switch(info.sort[j].type) {
+ case WORD_ISA_STRING:
+ if(!IsDefinedWordSuffix()) {
+ if(kword != other.kword.sub(0, kword.length()))
+ return 0;
+ } else {
+ if(kword != other.kword)
+ return 0;
+ }
+ break;
+ default:
+ if(Get(j) != other.Get(j)) return 0;
+ break;
+ }
+ }
+ return 1;
+}
+
+//
+// Compare <a> and <b> in the Berkeley DB fashion.
+// <a> and <b> are packed keys.
+// Compares full WordKey, unlike Compare_WordOnly.
+//
+inline int
+WordKey::Compare(const char *a, int a_length, const char *b, int b_length)
+{
+ const WordKeyInfo& info = *WordKey::Info();
+
+ if(a_length < info.num_length || b_length < info.num_length) {
+ fprintf(stderr, "WordKey::Compare: key length %d or %d < info.num_length = %d\n", a_length, b_length, info.num_length);
+ return NOTOK;
+ }
+
+ //
+ // Walk the fields, as soon as one of them does not compare equal,
+ // return.
+ //
+
+ //
+ // first field: string
+ //
+ const int p1_length = a_length - info.num_length;
+ const int p2_length = b_length - info.num_length;
+ {
+ int len = p1_length > p2_length ? p2_length : p1_length;
+ const unsigned char* p1 = (unsigned char *)a;
+ const unsigned char* p2 = (unsigned char *)b;
+
+ for (;len--; ++p1, ++p2) {
+ if (*p1 != *p2)
+ return (int)*p1 - (int)*p2;
+ }
+ if(p1_length != p2_length)
+ return p1_length - p2_length;
+ }
+ //
+ // following fields: numerical
+ // But what *are* they?? -- lha
+ //
+ for(int j = 1; j < info.nfields; j++)
+ {
+ WordKeyNum p1;
+ int a_index = info.sort[j].bytes_offset + p1_length;
+ WordKey::UnpackNumber((unsigned char *)&a[a_index],
+ info.sort[j].bytesize,
+ p1,
+ info.sort[j].lowbits,
+ info.sort[j].bits);
+
+ WordKeyNum p2;
+ int b_index = info.sort[j].bytes_offset + p2_length;
+ WordKey::UnpackNumber((unsigned char *)&b[b_index],
+ info.sort[j].bytesize,
+ p2,
+ info.sort[j].lowbits,
+ info.sort[j].bits);
+ if(p1 != p2)
+ return p1 - p2;
+ }
+
+ //
+ // If we reach this point, everything compared equal
+ //
+ return 0;
+}
+//
+// Compare <a> and <b> in the Berkeley DB fashion.
+// <a> and <b> are packed keys.
+// Only compares "word" part of WordKey, unlike Compare.
+//
+inline int
+WordKey::Compare_WordOnly(const char *a, int a_length, const char *b, int b_length)
+{
+ const WordKeyInfo& info = *WordKey::Info();
+
+ if(a_length < info.num_length || b_length < info.num_length) {
+ fprintf(stderr, "WordKey::Compare: key length %d or %d < info.num_length = %d\n", a_length, b_length, info.num_length);
+ return NOTOK;
+ }
+
+ //
+ // compare first field only: actual word
+ //
+ const int p1_length = a_length - info.num_length;
+ const int p2_length = b_length - info.num_length;
+ {
+ int len = p1_length > p2_length ? p2_length : p1_length;
+ const unsigned char* p1 = (unsigned char *)a;
+ const unsigned char* p2 = (unsigned char *)b;
+
+ for (;len--; ++p1, ++p2) {
+ if (*p1 != *p2)
+ return (int)*p1 - (int)*p2;
+ }
+ if(p1_length != p2_length)
+ return p1_length - p2_length;
+ }
+ return 0;
+}
+
+//
+// Compare <a> and <b> in the Berkeley DB fashion.
+// <a> and <b> are packed keys.
+// Compares full WordKey, unlike Compare_WordOnly.
+//
+int
+WordKey::Compare(const String& a, const String& b)
+{
+ return WordKey::Compare(a, a.length(), b, b.length());
+}
+
+//
+// Compare <a> and <b> in the Berkeley DB fashion.
+// <a> and <b> are packed keys.
+// Only compares "word" part of WordKey, unlike Compare.
+//
+int
+WordKey::Compare_WordOnly(const String& a, const String& b)
+{
+ return WordKey::Compare_WordOnly(a, a.length(), b, b.length());
+}
+
+//
+// C comparison function interface for Berkeley DB (bt_compare)
+// Just call the static Compare function of WordKey. It is *critical*
+// that this function is as fast as possible. See the Berkeley DB
+// documentation for more information on the return values.
+// Compares full WordKey, unlike word_only_db_cmp.
+//
+int
+word_db_cmp(const DBT *a, const DBT *b)
+{
+ return WordKey::Compare((char*)a->data, a->size, (char*)b->data, b->size);
+}
+
+//
+// C comparison function interface for Berkeley DB (bt_compare)
+// Just call the static Compare function of WordKey.
+// See the Berkeley DB
+// documentation for more information on the return values.
+// Only compares text part of the WordKey, unlike word_db_cmp.
+//
+int
+word_only_db_cmp(const DBT *a, const DBT *b)
+{
+ return WordKey::Compare_WordOnly((char*)a->data, a->size, (char*)b->data, b->size);
+}
+
+//
+// Compare current key defined fields with other key defined fields only,
+// ignore fields that are not defined in key or other. Return 1 if different
+// 0 if equal. If different, position is set to the field number that differ,
+// lower is set to 1 if Get(position) is lower than other.Get(position) otherwise
+// lower is set to 0.
+//
+int WordKey::Diff(const WordKey& other, int& position, int& lower)
+{
+ position = -1;
+
+ if(IsDefined(0) && other.IsDefined(0)) {
+ int ret = 0;
+ if(other.IsDefinedWordSuffix())
+ ret = GetWord().compare(other.GetWord());
+ else
+ ret = strncmp((char*)GetWord(), (const char*)other.GetWord(), other.GetWord().length());
+ if(ret) {
+ position = 0;
+ lower = ret > 0;
+ }
+ }
+
+ if(position < 0) {
+ int nfields=WordKey::NFields();
+
+ int i;
+ for(i = 1; i < nfields; i++) {
+ if(IsDefined(i) && other.IsDefined(i) &&
+ Get(i) != other.Get(i)) {
+ lower = Get(i) < other.Get(i);
+ break;
+ }
+ }
+ if(i < nfields)
+ position = i;
+ }
+
+ return position >= 0;
+}
+
+//
+// Compare object and <other> using comparison of their packed form
+//
+int
+WordKey::PackEqual(const WordKey& other) const
+{
+ String this_pack;
+ Pack(this_pack);
+
+ String other_pack;
+ other.Pack(other_pack);
+
+ return this_pack == other_pack;
+}
+
+//
+// Implement ++ on a key.
+//
+// It behaves like arithmetic but follows these rules:
+// . Increment starts at field <position>
+// . If a field value overflows, increment field <position> - 1
+// . Undefined fields are ignored and their value untouched
+// . Incrementing the word field is done by appending \001
+// . When a field is incremented all fields to the left are set to 0
+// If position is not specified it is equivalent to NFields() - 1.
+// It returns OK if successfull, NOTOK if position out of range or
+// WORD_FOLLOWING_ATEND if the maximum possible value was reached.
+//
+// Examples assuming numerical fields are 8 bits wide:
+//
+// 0 1 2 3 OPERATION RESULT
+// ---------------------------------------------------------------------------------------
+// foo <DEF> 1 1 1 -> SetToFollowing(3) -> foo <DEF> 1 1 2
+// foo <DEF> 1 1 1 -> SetToFollowing(2) -> foo <DEF> 1 2 0
+// foo <DEF> 1 1 255 -> SetToFollowing(3) -> foo <DEF> 1 2 0
+// foo <DEF> 255 255 255 -> SetToFollowing(3) -> foo\001 <DEF> 0 0 0
+// foo <DEF> 255 1 1 -> SetToFollowing(1) -> foo\001 <DEF> 0 0 0
+// <UNDEF><UNDEF> 255 1 1 -> SetToFollowing(1) -> WORD_FOLLOWING_ATEND
+// foo <DEF> 1 <UNDEF> 255 -> SetToFollowing(3) -> foo <DEF> 2 <UNDEF> 0
+// foo <DEF><UNDEF><UNDEF> 255 -> SetToFollowing(3) -> foo\001 <DEF><UNDEF><UNDEF> 0
+//
+//
+int WordKey::SetToFollowing(int position /* = WORD_FOLLOWING_MAX */)
+{
+ if(position == WORD_FOLLOWING_MAX)
+ position = NFields() - 1;
+
+ if(position < 0 || position >= NFields()) {
+ fprintf(stderr, "WordKey::SetToFollowing invalid position = %d\n", position);
+ return NOTOK;
+ }
+
+ int i = position;
+ while(i > 0) {
+ if(IsDefined(i)) {
+ if(Overflow(i, 1))
+ Set(i, 0);
+ else
+ break;
+ }
+ i--;
+ }
+
+ if(i == 0) {
+ if(IsDefined(i))
+ GetWord() << '\001';
+ else
+ return WORD_FOLLOWING_ATEND;
+ } else
+ Get(i)++;
+
+ for(i = position + 1; i < NFields(); i++)
+ if(IsDefined(i)) Set(i,0);
+
+ return OK;
+}
+
+//
+// Return true if the key may be used as a prefix for search.
+// In other words return true if the fields set in the key
+// are all contiguous, starting from the first field in sort order.
+//
+int
+WordKey::Prefix() const
+{
+ const WordKeyInfo& info = *WordKey::Info();
+ //
+ // If all fields are set, it can be considered as a prefix although
+ // it really is a fully qualified key.
+ //
+ if(Filled()) return OK;
+ //
+ // If the first field is not set this cannot be a prefix
+ //
+ if(!IsDefined(0)) return NOTOK;
+
+ int found_unset = 0;
+ if(!IsDefinedWordSuffix()) { found_unset = 1; }
+ //
+ // Walk the fields in sorting order.
+ //
+ for(int j = WORD_FIRSTFIELD; j < info.nfields; j++)
+ {
+ //
+ // Fields set, then fields unset then field set -> not a prefix
+ //
+ if(IsDefined(j))
+ if(found_unset) return NOTOK;
+ else
+ //
+ // Found unset fields and this is fine as long as we do
+ // not find a field set later on.
+ //
+ found_unset++;
+ }
+
+ return OK;
+}
+
+//
+// Unset all fields past the first unset field
+// Return the number of fields in the prefix or 0 if
+// first field is not set, ie no possible prefix.
+//
+int
+WordKey::PrefixOnly()
+{
+ const WordKeyInfo& info = *WordKey::Info();
+ //
+ // If all fields are set, the whole key is the prefix.
+ //
+ if(Filled()) return OK;
+ //
+ // If the first field is not set there is no possible prefix
+ //
+ if(!IsDefined(0))
+ {
+ return NOTOK;
+ }
+
+ int found_unset = 0;
+ //
+ // Walk the fields in sorting order.
+ //
+ if(!IsDefinedWordSuffix()){found_unset=1;}
+
+ for(int j = WORD_FIRSTFIELD; j < info.nfields; j++)
+ {
+ //
+ // Unset all fields after the first unset field
+ //
+ if(IsDefined(j))
+ {
+ if(found_unset) {Set(j,0);Undefined(j);}
+ }
+ else {found_unset=1;}
+ }
+
+ return OK;
+}
+
+//
+// Unpack from data and fill fields of object
+//
+int
+WordKey::Unpack(const char* string,int length)
+{
+ const WordKeyInfo& info = *WordKey::Info();
+ if(length < info.num_length) {
+ fprintf(stderr, "WordKey::Unpack: key record length < info.num_length\n");
+ return NOTOK;
+ }
+
+ int string_length = length - info.num_length;
+ SetWord(string, string_length);
+
+ for(int j = WORD_FIRSTFIELD; j < info.nfields; j++)
+ {
+ WordKeyNum value = 0;
+ int index = string_length + info.sort[j].bytes_offset;
+ WordKey::UnpackNumber((unsigned char *)&string[index],
+ info.sort[j].bytesize,
+ value,
+ info.sort[j].lowbits,
+ info.sort[j].bits);
+ Set(j,value);
+ }
+
+ return OK;
+}
+
+//
+// Pack object into the <packed> string
+//
+int
+WordKey::Pack(String& packed) const
+{
+ const WordKeyInfo& info = *WordKey::Info();
+
+ char* string;
+ int length = info.num_length;
+
+ length += kword.length();
+
+ if((string = (char*)malloc(length)) == 0) {
+ fprintf(stderr, "WordKey::Pack: malloc returned 0\n");
+ return NOTOK;
+ }
+ memset(string, '\0', length);
+
+ memcpy(string, kword.get(), kword.length());
+ for(int i = WORD_FIRSTFIELD; i < info.nfields; i++) {
+ int index = kword.length() + info.sort[i].bytes_offset;
+ WordKey::PackNumber(Get(i),
+ &string[index],
+ info.sort[i].bytesize,
+ info.sort[i].lowbits,
+ info.sort[i].lastbits);
+ }
+
+ packed.set(string, length);
+
+ free(string);
+
+ return OK;
+}
+
+//
+// Copy all fields set in <other> to object, only if
+// the field is not already set in <other>
+//
+int WordKey::Merge(const WordKey& other)
+{
+ const WordKeyInfo& info = *WordKey::Info();
+
+
+ for(int j = 0; j < info.nfields; j++) {
+ if(!IsDefined(j) && other.IsDefined(j)) {
+ switch(info.sort[j].type) {
+ case WORD_ISA_STRING:
+ SetWord(other.GetWord());
+ if(!other.IsDefinedWordSuffix()) UndefinedWordSuffix();
+ break;
+ default:
+ Set(j,other.Get(j));
+ break;
+ }
+ }
+ }
+
+ return OK;
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int
+WordKey::Get(String& buffer) const
+{
+ buffer.trunc();
+ const WordKeyInfo& info = *WordKey::Info();
+
+ //
+ // Walk the fields in sorting order. As soon as one of them
+ // does not compare equal, return.
+ //
+ for(int j = 0; j < info.nfields; j++) {
+ if(!IsDefined(j)) {
+ buffer << "<UNDEF>";
+ } else {
+ switch(info.sort[j].type) {
+ case WORD_ISA_STRING:
+ buffer << GetWord();
+ break;
+ case WORD_ISA_NUMBER:
+ buffer << Get(j);
+ break;
+ default:
+ fprintf(stderr, "WordKey::Get: invalid type %d for field %d\n", info.sort[j].type, j);
+ return NOTOK;
+ }
+ }
+ //
+ // Output virtual word suffix field
+ //
+ if(j == 0) {
+ if(IsDefined(j) && !IsDefinedWordSuffix()) {
+ buffer << "\t<UNDEF>";
+ } else {
+ buffer << "\t<DEF>";
+ }
+ }
+ buffer << "\t";
+ }
+ return OK;
+}
+
+String
+WordKey::Get() const
+{
+ String tmp;
+ Get(tmp);
+ return tmp;
+}
+
+//
+// Set a key from an ascii representation
+//
+int
+WordKey::Set(const String& buffer)
+{
+ StringList fields(buffer, "\t ");
+ return SetList(fields);
+}
+
+//
+// Set a key from list of fields
+//
+int
+WordKey::SetList(StringList& fields)
+{
+ const WordKeyInfo& info = *WordKey::Info();
+ int length = fields.Count();
+
+ //
+ // + 1 counts for the word suffix field
+ //
+ if(length < info.nfields + 1) {
+ fprintf(stderr, "WordKey::Set: expected at least %d fields and found %d (ignored)\n", info.nfields + 1, length);
+ return NOTOK;
+ }
+ if(length < 2) {
+ fprintf(stderr, "WordKey::Set: expected at least two fields in line\n");
+ return NOTOK;
+ }
+
+ Clear();
+
+ fields.Start_Get();
+ //
+ // Handle word and its suffix
+ //
+ int i = 0;
+ {
+ //
+ // Get the word
+ //
+ String* word = (String*)fields.Get_Next();
+ if(word == 0) {
+ fprintf(stderr, "WordKey::Set: failed to get word\n");
+ return NOTOK;
+ }
+ if(word->nocase_compare("<undef>") == 0)
+ UndefinedWord();
+ else
+ SetWord(*word);
+ i++;
+
+ //
+ // Get the word suffix status
+ //
+ String* suffix = (String*)fields.Get_Next();
+ if(suffix == 0) {
+ fprintf(stderr, "WordKey::Set: failed to get word suffix %d\n", i);
+ return NOTOK;
+ }
+ if(suffix->nocase_compare("<undef>") == 0)
+ UndefinedWordSuffix();
+ else
+ SetDefinedWordSuffix();
+ }
+
+ //
+ // Handle numerical fields
+ //
+ int j;
+ for(j = WORD_FIRSTFIELD; i < info.nfields; i++, j++) {
+ String* field = (String*)fields.Get_Next();
+
+ if(field == 0) {
+ fprintf(stderr, "WordKey::Set: failed to retrieve field %d\n", i);
+ return NOTOK;
+ }
+
+ if(field->nocase_compare("<undef>") == 0) {
+ Undefined(j);
+ } else {
+ WordKeyNum value = strtoul(field->get(), 0, 10);
+ Set(j, value);
+ }
+ }
+
+ return OK;
+}
+
+int WordKey::Write(FILE* f) const
+{
+ String tmp;
+ Get(tmp);
+ fprintf(f, "%s", (char*)tmp);
+ return 0;
+}
+
+void WordKey::Print() const
+{
+ Write(stderr);
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordKey.h b/debian/htdig/htdig-3.2.0b6/htword/WordKey.h
new file mode 100644
index 00000000..3890ad47
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordKey.h
@@ -0,0 +1,612 @@
+// WordKey.h
+//
+// NAME
+// inverted index key.
+//
+// SYNOPSIS
+//
+// #include <WordKey.h>
+//
+// #define DOCID 1
+// #define LOCATION 1
+//
+// WordKey key("word <DEF> 1 2");
+// key.Set(DOCID, 100);
+// key.SetWord("other");
+//
+// DESCRIPTION
+//
+// Describes the key used to store a entry in the inverted index.
+// The structure of a key is described by the <i>WordKeyInfo</i>
+// Each field in the key has a bit in the <b>set</b>
+// member that says if it is set or not. This bit allows to
+// say that a particular field is <i>undefined</i> regardless of
+// the actual value stored. The methods
+// <b>IsDefined, SetDefined</b> and <b>Undefined</b> are used to manipulate
+// the <i>defined</i> status of a field. The <b>Pack</b> and <b>Unpack</b>
+// methods are used to convert to and from the disk storage representation
+// of the key.
+//
+// Generic functions to manipulate the key should use the <i>WordKeyInfo</i>
+// information to work regardless of the actual structure of the key.
+//
+// Suffix definition: a word suffix is a kind of marker that says if
+// the word is a full word or only the beginning of a
+// word. If a word has a suffix then it's a full word. If it
+// has no suffix then it's only the beginning of a word.
+// This is mostly useful when specifying search keys. If a
+// search key word has no suffix, the search mechanism is
+// expected to return all words that begin with the word. If
+// the search key word has a suffix, only words that exactly
+// match the search key word will be returned.
+//
+// ASCII FORMAT
+//
+// The ASCII description is a string with fields separated by tabs or
+// white space.
+// <pre>
+// Example: Foo <DEF> 0 1 4 2
+// Field 1: The word as a string or <UNDEF> if not defined
+// Field 2: <DEF> if suffix defined, <UNDEF> if suffix undefined
+// Field 3 to nfield + 1: numerical value of the field or <UNDEF> if
+// not defined
+//
+// </pre>
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+
+#ifndef _WordKey_h_
+#define _WordKey_h_
+
+#ifndef SWIG
+#include "db.h"
+#include "htString.h"
+#include "StringList.h"
+#endif /* SWIG */
+
+//
+// WORDSUFFIX:
+//
+// field in set flag that says if a word is just a prefix (incomplete word)
+// WORD_KEY_WORDSUFFIX_DEFINED -> means that word IS complete (not a prefix)
+//
+#define WORD_KEY_WORDSUFFIX_DEFINED (1 << 30)
+#define WORD_KEY_WORD_DEFINED 1
+#define WORD_KEY_WORDFULLY_DEFINED ( WORD_KEY_WORDSUFFIX_DEFINED | WORD_KEY_WORD_DEFINED )
+
+//
+// Possible return values of Outbound/Overflow/Underflow methods
+//
+#define WORD_INBOUND 0
+#define WORD_OVERFLOW 1
+#define WORD_UNDERFLOW 2
+
+//
+// Possible return values of SetToFollowing
+//
+#define WORD_FOLLOWING_ATEND 0x0001
+//
+// Default value for position argument of SetToFollowing
+// meaning NFields() - 1
+//
+#define WORD_FOLLOWING_MAX -1
+
+//
+// Position of the first numerical field (just after the word)
+//
+#define WORD_FIRSTFIELD 1
+
+//
+// Unknown field position
+//
+#define WORD_KEY_UNKNOWN_POSITION -1
+
+#ifndef SWIG
+// C comparison function interface for Berkeley DB (bt_compare)
+//
+int word_db_cmp(const DBT *a, const DBT *b);
+int word_only_db_cmp(const DBT *a, const DBT *b);
+#endif /* SWIG */
+
+#ifndef SWIG
+#include"WordKeyInfo.h"
+#endif /* SWIG */
+
+//
+// Describe a word occurrence
+//
+// !!!!!!!DEBUGTMP
+#ifndef SWIG
+#define WORD_FATAL_ABORT fflush(stdout);fprintf(stderr,"FATAL ERROR at file:%s line:%d !!!\n",__FILE__,__LINE__);fflush(stderr);(*(int *)NULL)=1
+#define word_errr(s) {fprintf(stderr,"FATAL ERROR:%s\n",s);WORD_FATAL_ABORT;}
+#endif /* SWIG */
+class WordKey
+{
+ public:
+ //
+ // Constructors, destructors, copy and clear
+ //
+ //-
+ // Constructor. Build an empty key.
+ //
+ WordKey() { Initialize(); }
+#ifndef SWIG
+ //-
+ // Constructor. Initialize from an ASCII description of a key.
+ // See <i>ASCII FORMAT</i> section.
+ //
+ WordKey(const String& word)
+ {
+ Initialize();
+ Set(word);
+ }
+ //
+ // Copy constructor (needed because of the array pointer)
+ //
+ WordKey(const WordKey &other)
+ {
+ Initialize();
+ CopyFrom(other);
+ }
+#endif /* SWIG */
+ ~WordKey()
+ {
+ delete [] numerical_fields;
+ }
+#ifndef SWIG
+ protected:
+ //
+ // Constructor helper, allocate members and set to empty key
+ //
+ void Initialize()
+ {
+ if(!Info())
+ {
+ fprintf(stderr, "WordKey::WordKey used before word_key_info set\n");
+ word_errr("WordKey::initialize");
+ }
+
+ numerical_fields = new WordKeyNum[NFields()-1];
+ Clear();
+ }
+ public:
+ //
+ // Copy operator (needed because of the array pointer)
+ //
+ void operator =(const WordKey &other)
+ {
+ Clear();
+ CopyFrom(other);
+ }
+#endif /* SWIG */
+ //-
+ // Copy <b>other</b> into object.
+ //
+ void CopyFrom(const WordKey &other)
+ {
+ if(other.IsDefined(0)) { SetWord(other.GetWord()); }
+ for(int i=1;i<NFields();i++)
+ {
+ if(other.IsDefined(i))
+ {
+ Set(i, other.Get(i));
+ }
+ }
+ setbits=other.setbits;
+ }
+ //-
+ // Reset to empty key.
+ //
+ void Clear()
+ {
+ setbits = 0;
+ kword.trunc();
+ for(int i=0;i<NFields()-1;i++)
+ {
+ numerical_fields[i] = 0;
+ }
+ }
+
+#ifndef SWIG
+ //-
+ // Convenience function to access key structure
+ // information (see <i>WordKeyInfo(3)</i>).
+ //
+ static inline const WordKeyInfo *Info() { return WordKeyInfo::Instance(); }
+#endif /* SWIG */
+ //-
+ // Convenience functions to access the total number of fields
+ // in a key (see <i>WordKeyInfo(3)</i>).
+ //
+ static inline int NFields() { return Info()->nfields; }
+ //-
+ // Convenience functions to access the
+ // maximum possible value for field at <b>position.</b>
+ // in a key (see <i>WordKeyInfo(3)</i>).
+ //
+ static inline WordKeyNum MaxValue(int position) { return Info()->sort[position].MaxValue(); }
+
+ //
+ // Accessors
+ //
+ //-
+ // Returns the word as a const.
+ //
+#ifndef SWIG
+ inline const String& GetWord() const { return kword; }
+#endif /* SWIG */
+
+ //-
+ // Returns the word.
+ //
+ inline String& GetWord() { return kword; }
+ //-
+ // Set the word.
+ //
+ inline void SetWord(const String& arg) { kword = arg; setbits |= WORD_KEY_WORDFULLY_DEFINED; }
+ protected:
+ //-
+ // Set the word.
+ //
+ inline void SetWord(const char* arg, int arg_length) { kword.set(arg, arg_length); setbits |= WORD_KEY_WORDFULLY_DEFINED; }
+ public:
+ //-
+ // Change status of the word to <i>undefined.</i> Also undefines
+ // its suffix.
+ //
+ inline void UndefinedWord() { kword.trunc(); setbits &= ~WORD_KEY_WORDFULLY_DEFINED; }
+ //-
+ // Set the status of the word suffix to <i>undefined.</i>
+ //
+ inline void UndefinedWordSuffix() {setbits &= ~WORD_KEY_WORDSUFFIX_DEFINED;}
+ //-
+ // Set the status of the word suffix to <i>defined.</i>
+ //
+ inline void SetDefinedWordSuffix() {setbits |= WORD_KEY_WORDSUFFIX_DEFINED;}
+ //-
+ // Returns true if word suffix is <i>defined</i>, false otherwise.
+ //
+ inline int IsDefinedWordSuffix() const {return( (setbits & WORD_KEY_WORDSUFFIX_DEFINED) == WORD_KEY_WORDSUFFIX_DEFINED);}
+ //
+ // Get/Set numerical fields
+ //
+ //-
+ // Return value of numerical field at <b>position</b> as const.
+ //
+ inline WordKeyNum Get(int position) const
+ {
+ // if(position<1 || position>=NFields()){errr("Get: out of bounds");}
+ return(numerical_fields[position-1]);
+ }
+#ifndef SWIG
+ //-
+ // Return value of numerical field at <b>position.</b>
+ //
+ inline WordKeyNum& Get(int position)
+ {
+ return(numerical_fields[position-1]);
+ }
+ //-
+ // Return value of numerical field at <b>position</b> as const.
+ //
+ inline const WordKeyNum & operator[] (int position) const { return(numerical_fields[position-1]); }
+ //-
+ // Return value of numerical field at <b>position.</b>
+ //
+ inline WordKeyNum & operator[] (int position) { return(numerical_fields[position-1]); }
+#endif /* SWIG */
+ //-
+ // Set value of numerical field at <b>position</b> to <b>val.</b>
+ //
+ inline void Set(int position, WordKeyNum val)
+ {
+ // if(position<1 || position>=NFields()){errr("Set: out of bounds");}
+ SetDefined(position);
+ numerical_fields[position-1] = val;
+ }
+
+ //
+ // Key field value existenz. Defined means the value of the field contains
+ // a valid value. Undefined means the value of the field is not valid.
+ //
+ //-
+ // Returns true if field at <b>position</b> is <i>defined</i>, false
+ // otherwise.
+ //
+ int IsDefined(int position) const { return setbits & (1 << position); }
+ //-
+ // Value in field <b>position</b> becomes <i>defined.</i>
+ //
+ void SetDefined(int position) { setbits |= (1 << position); }
+ //-
+ // Value in field <b>position</b> becomes <i>undefined.</i>
+ //
+ void Undefined(int position) { setbits &= ~(1 << position); }
+
+#ifndef SWIG
+ //
+ // Set and Get the whole structure from/to ASCII description
+ //-
+ // Set the whole structure from ASCII string in <b>bufferin.</b>
+ // See <i>ASCII FORMAT</i> section.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Set(const String& bufferin);
+ int SetList(StringList& fields);
+ //-
+ // Convert the whole structure to an ASCII string description
+ // in <b>bufferout.</b>
+ // See <i>ASCII FORMAT</i> section.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Get(String& bufferout) const;
+ //-
+ // Convert the whole structure to an ASCII string description
+ // and return it.
+ // See <i>ASCII FORMAT</i> section.
+ //
+ String Get() const;
+#endif /* SWIG */
+
+ //
+ // Storage format conversion
+ //
+#ifndef SWIG
+ //-
+ // Set structure from disk storage format as found in
+ // <b>string</b> buffer or length <b>length.</b>
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Unpack(const char* string, int length);
+ //
+ //-
+ // Set structure from disk storage format as found in
+ // <b>data</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ inline int Unpack(const String& data) { return(Unpack(data,data.length())); }
+ //
+ //-
+ // Convert object into disk storage format as found in
+ // and place the result in <b>data</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Pack(String& data) const;
+#endif /* SWIG */
+
+ //
+ // Transformations
+ //
+ //-
+ // Copy each <i>defined</i> field from other into the object, if
+ // the corresponding field of the object is not defined.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Merge(const WordKey& other);
+ //-
+ // Undefine all fields found after the first undefined field. The
+ // resulting key has a set of defined fields followed by undefined fields.
+ // Returns NOTOK if the word is not defined because the resulting key would
+ // be empty and this is considered an error. Returns OK on success.
+ //
+ int PrefixOnly();
+#ifndef SWIG
+ //-
+ // Implement ++ on a key.
+ //
+ // It behaves like arithmetic but follows these rules:
+ // <pre>
+ // . Increment starts at field <position>
+ // . If a field value overflows, increment field <b>position</b> - 1
+ // . Undefined fields are ignored and their value untouched
+ // . Incrementing the word field is done by appending \001
+ // . When a field is incremented all fields to the left are set to 0
+ // </pre>
+ // If position is not specified it is equivalent to NFields() - 1.
+ // It returns OK if successfull, NOTOK if <b>position</b> out of range or
+ // WORD_FOLLOWING_ATEND if the maximum possible value was reached.
+ //
+ int SetToFollowing(int position = WORD_FOLLOWING_MAX);
+#endif /* SWIG */
+
+ //
+ // Predicates
+ //
+ //-
+ // Return true if all the fields are <i>defined</i>, false otherwise.
+ //
+ int Filled() const { return setbits == (unsigned int) (((1 << NFields()) - 1) | WORD_KEY_WORDSUFFIX_DEFINED); }
+ //-
+ // Return true if no fields are <i>defined</i>, false otherwise.
+ //
+ int Empty() const { return setbits == 0; }
+ //-
+ // Return true if the object and <b>other</b> are equal.
+ // Only fields defined in both keys are compared.
+ //
+ int Equal(const WordKey& other) const;
+ //-
+ // Return true if the object and <b>other</b> are equal.
+ // All fields are compared. If a field is defined in <b>object</b>
+ // and not defined in the object, the key are not considered
+ // equal.
+ //
+ int ExactEqual(const WordKey& other) const {return(Equal(other) && other.setbits == setbits);}
+#ifndef SWIG
+ //-
+ // Return true if the object and <b>other</b> are equal.
+ // The packed string are compared. An <i>undefined</i> numerical field
+ // will be 0 and therefore undistinguishable from a <i>defined</i> field
+ // whose value is 0.
+ //
+ int PackEqual(const WordKey& other) const;
+ //-
+ // Return true if adding <b>increment</b> in field at <b>position</b> makes
+ // it overflow or underflow, false if it fits.
+ //
+ int Outbound(int position, int increment) {
+ if(increment < 0) return Underflow(position, increment);
+ else if(increment > 0) return Overflow(position, increment);
+ else return WORD_INBOUND;
+ }
+ //-
+ // Return true if adding positive <b>increment</b> to field at
+ // <b>position</b> makes it overflow, false if it fits.
+ //
+ int Overflow(int position, int increment) {
+ return MaxValue(position) - Get(position) < (WordKeyNum)increment ? WORD_OVERFLOW : WORD_INBOUND;
+ }
+ //-
+ // Return true if subtracting positive <b>increment</b> to field
+ // at <b>position</b> makes it underflow, false if it fits.
+ //
+ int Underflow(int position, int increment) {
+ return Get(position) < (WordKeyNum)(-increment) ? WORD_UNDERFLOW : WORD_INBOUND;
+ }
+#endif /* SWIG */
+ //-
+ // Return OK if the key may be used as a prefix for search.
+ // In other words return OK if the fields set in the key
+ // are all contiguous, starting from the first field.
+ // Otherwise returns NOTOK
+ //
+ int Prefix() const;
+
+#ifndef SWIG
+ //-
+ // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion.
+ // <b>a</b> and <b>b</b> are packed keys. The semantics of the
+ // returned int is as of strcmp and is driven by the key description
+ // found in <i>WordKeyInfo.</i>
+ //
+ static int Compare(const String& a, const String& b);
+ static int Compare_WordOnly(const String& a, const String& b);
+ //-
+ // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion.
+ // <b>a</b> and <b>b</b> are packed keys. The semantics of the
+ // returned int is as of strcmp and is driven by the key description
+ // found in <i>WordKeyInfo.</i>
+ //
+ static int Compare(const char *a, int a_length, const char *b, int b_length);
+ static int Compare_WordOnly(const char *a, int a_length, const char *b, int b_length);
+ //-
+ // Compare object defined fields with <b>other</b> key defined fields only,
+ // ignore fields that are not defined in object or <b>other.</b>
+ // Return 1 if different 0 if equal.
+ // If different, <b>position</b> is set to the field number that differ,
+ // <b>lower</b> is set to 1 if Get(<b>position</b>) is lower than
+ // other.Get(<b>position</b>) otherwise lower is set to 0.
+ //
+ int Diff(const WordKey& other, int& position, int& lower);
+
+ //-
+ // Print object in ASCII form on <b>f</b> (uses <i>Get</i> method).
+ // See <i>ASCII FORMAT</i> section.
+ //
+ int Write(FILE* f) const;
+#endif /* SWIG */
+ //-
+ // Print object in ASCII form on <b>stdout</b> (uses <i>Get</i> method).
+ // See <i>ASCII FORMAT</i> section.
+ //
+ void Print() const;
+
+#ifndef SWIG
+
+private:
+
+ //
+ // Convert a single number from and to disk storage representation
+ //
+ static int UnpackNumber(const unsigned char* from, const int from_size, WordKeyNum &res, const int lowbits, const int bits);
+ static int PackNumber(WordKeyNum from, char* to, int to_size, int lowbits, int lastbits);
+
+ //
+ // Data members
+ //
+ //
+ // Bit field for defined/undefined status of each key field
+ //
+ unsigned int setbits;
+ //
+ // Holds the numerical values of the key fields
+ //
+ WordKeyNum *numerical_fields;
+ //
+ // Holds the word key field
+ //
+ String kword;
+#endif /* SWIG */
+};
+
+#ifndef SWIG
+//
+// Set bit number <b> to 0 and others to 1. <b> may have a value from 0 to 8. If
+// 8 then all bits are 1.
+//
+#define WORD_BIT_MASK(b) ((b) == 0 ? 0xff : ((( 1 << (b)) - 1) & 0xff))
+#define WORD_BIT_MASK2(b) ((1<<(b)) -1)
+//
+// Decode integer found in <from> using <from_size> bytes. The integer starts at <lowbits> bit
+// in the first byte and occupies a total of <bits> bits. The resulting integer is stored in *<top>
+//
+inline int WordKey::UnpackNumber(const unsigned char* from, const int from_size, WordKeyNum& to, const int lowbits, const int bits)
+{
+ to = 0;
+ to = ((from[0] & 0xff) >> lowbits);
+
+ if(lowbits) to &= WORD_BIT_MASK(8 - lowbits);
+
+ if(from_size == 1)
+ to &= WORD_BIT_MASK(bits);
+ else {
+ for(int i = 1; i < from_size; i++) {
+ to |= (from[i] & 0xff) << ((i - 1) * 8 + (8 - lowbits));
+ }
+ }
+
+ if(bits < (int)(sizeof(WordKeyNum) * 8))
+ to &= ( 1 << bits ) - 1;
+
+ return OK;
+}
+
+//
+// Encode integer <from>, starting at bit <lowbits> in byte array <to>. It will span
+// <to_size> bytes and only the <lastbits> bits of the last byte (to[to_size - 1]) are
+// filled. See word_builder.pl for more information.
+//
+inline int WordKey::PackNumber(WordKeyNum from, char* to, int to_size, int lowbits, int lastbits)
+{
+ // first byte
+ if(lowbits) {
+ to[0] |= ((from & WORD_BIT_MASK(8 - lowbits)) << lowbits) & 0xff;
+ } else {
+ to[0] = from & 0xff;
+ }
+ from >>= 8 - lowbits;
+
+ // following bytes
+ for(int i = 1; i < to_size; i++) {
+ to[i] = from & 0xff;
+ from >>= 8;
+ }
+
+ // clip the end off (clobbers anything left at the end of this byte)
+ if(lastbits) to[to_size - 1] &= WORD_BIT_MASK(lastbits);
+
+ return OK;
+}
+
+#undef WORD_BIT_MASK
+#endif /* SWIG */
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.cc b/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.cc
new file mode 100644
index 00000000..5a7adffc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.cc
@@ -0,0 +1,225 @@
+// WordKeyInfo.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include "WordKeyInfo.h"
+#include "StringList.h"
+
+#define WORDKEYFIELD_BITS_MAX 64
+
+//
+// WordKeyField implementation
+//
+int WordKeyField::SetNum(WordKeyField *previous, char *nname, int nbits)
+{
+ type = WORD_ISA_NUMBER;
+ name.set(nname, strlen(nname));
+
+ bits = nbits;
+ bits_offset = (previous ? previous->bits_offset + previous->bits : 0 );
+
+ if(bits_offset < 0 ||
+ bits_offset > WORDKEYFIELD_BITS_MAX*WORD_KEY_MAX_NFIELDS) {
+ fprintf(stderr, "WordKeyField::WordKeyField: bits_offset: %d out of bounds\n", bits_offset);
+ return EINVAL;
+ }
+ bytes_offset = bits_offset / 8;
+ bytesize = ((bits_offset + bits - 1) / 8) - bytes_offset + 1;
+ lastbits = (bits_offset + bits) % 8;
+ lowbits = bits_offset % 8;
+
+ return 0;
+}
+
+int WordKeyField::SetString()
+{
+ name.set("Word");
+ type = WORD_ISA_STRING;
+ return 0;
+}
+
+//
+// Tabulate for printing
+//
+static void nprint(char c, int n)
+{
+ for(int i = 0; i < n; i++) {
+ if(!(i % 4)) {
+ printf("%c", 'a' + i / 4);
+ } else {
+ printf("%c", c);
+ }
+ }
+}
+
+//
+// Print object on standard output
+//
+void
+WordKeyField::Show()
+{
+ if(!name.nocase_compare("Word")) {
+ printf("Word type: %2d\n", type);
+ } else {
+ nprint(' ',bits_offset);
+ printf("\"%s\" type:%2d lowbits:%2d lastbits:%2d\n",
+ (char *)name,
+ type,
+ lowbits,
+ lastbits);
+ nprint(' ',bits_offset);
+ printf("|---bytesize:%2d bytes_offset:%2d bits:%2d bits_offset:%2d\n", bytesize, bytes_offset, bits, bits_offset);
+ }
+}
+
+//
+// WordKeyInfo implementation
+//
+
+WordKeyInfo* WordKeyInfo::instance = 0;
+
+WordKeyInfo::WordKeyInfo(const Configuration& config)
+{
+ sort = NULL;
+ nfields = -1;
+ num_length = 0;
+
+ const String &keydesc = config["wordlist_wordkey_description"];
+
+ if(!keydesc.empty()) {
+ Set(keydesc);
+ } else {
+ fprintf(stderr, "WordKeyInfo::WordKeyInfo: didn't find key description in config\n");
+ }
+}
+
+void
+WordKeyInfo::Initialize(const Configuration &config_arg)
+{
+ if(instance != 0)
+ delete instance;
+ instance = new WordKeyInfo(config_arg);
+}
+
+void
+WordKeyInfo::InitializeFromString(const String &desc)
+{
+ Configuration config;
+ config.Add("wordlist_wordkey_description", desc);
+ Initialize(config);
+}
+
+int
+WordKeyInfo::Alloc(int nnfields)
+{
+ nfields = nnfields;
+ if(!(sort = new WordKeyField[nfields])) {
+ fprintf(stderr, "WordKeyInfo::Alloc: cannot allocate\n");
+ return ENOMEM;
+ }
+ num_length = 0;
+ return 0;
+}
+
+int
+WordKeyInfo::Set(const String &desc)
+{
+ int ret = 0;
+ StringList fields(desc, "/");
+
+ if(fields.Count() > WORD_KEY_MAX_NFIELDS) {
+ fprintf(stderr, "WordKeyInfo::Set: too many fields in %s, max is %d\n", (const char*)desc, WORD_KEY_MAX_NFIELDS);
+ return EINVAL;
+ }
+
+ if(fields.Count() <= 0) {
+ fprintf(stderr, "WordKeyInfo::Set: no fields\n");
+ return EINVAL;
+ }
+
+ if((ret = Alloc(fields.Count())))
+ return ret;
+
+ WordKeyField* previous = 0;
+ int i;
+ for(i = 0; i < fields.Count(); i++) {
+ char* field = fields[i];
+ WordKeyField& key_field = sort[i];
+ if(!mystrcasecmp(field, "word")) {
+ //
+ // String field
+ //
+ if(i != 0) {
+ fprintf(stderr, "WordKeyInfo::Set: Word field must show in first position %s\n", (const char*)desc);
+ return EINVAL;
+ }
+ key_field.SetString();
+ } else {
+ //
+ // Numerical field
+ //
+ StringList pair(field, "\t ");
+
+ if(pair.Count() != 2) {
+ fprintf(stderr, "WordKeyInfo::AddField: there must be exactly two strings separated by a white space (space or tab) in a field description (%s in key description %s)\n", field, (const char*)desc);
+ return EINVAL;
+ }
+
+ int bits = atoi(pair[1]);
+ char* name = pair[0];
+ key_field.SetNum(previous, name, bits);
+ previous = &key_field;
+ }
+ }
+
+ //
+ // Total length in bytes of the numerical fields
+ //
+ num_length = sort[i - 1].bytes_offset + sort[i - 1].bytesize;
+
+ return ret;
+}
+
+void
+WordKeyInfo::Show()
+{
+ fprintf(stderr, "-----------------------------------------\n");
+ fprintf(stderr, "nfields:%3d num_length:%3d\n", nfields, num_length);
+ int i;
+ for(i = 0; i < nfields; i++)
+ sort[i].Show();
+
+ char str[WORDKEYFIELD_BITS_MAX*WORD_KEY_MAX_NFIELDS];
+ memset(str, '_', WORDKEYFIELD_BITS_MAX*WORD_KEY_MAX_NFIELDS);
+
+ int last = 0;
+ int j;
+ for(j = 0; j < nfields; j++) {
+ for(i = 0; i < sort[j].bits; i++) {
+ char c = (j % 10) + '0';
+ int pos = sort[j].bits_offset + i;
+ if(str[pos] != '_') {
+ fprintf(stderr, "WordKeyInfo::Show: overlaping bits (field %d), bit %d\n", j, i);
+ c='X';
+ }
+ str[pos] = c;
+ if(last < pos) last = pos;
+ }
+ }
+ str[last + 1] = '\0';
+ fprintf(stderr, "%s (bits)\n",str);
+ fprintf(stderr, "^0 ^1 ^2 ^3 ^4 ^5 ^6 ^7\n");
+ fprintf(stderr, "0123456701234567012345670123456701234567012345670123456701234567\n");
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.h b/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.h
new file mode 100644
index 00000000..039dbf4f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.h
@@ -0,0 +1,170 @@
+// WordKeyInfo.h
+//
+// NAME
+// information on the key structure of the inverted index.
+//
+// SYNOPSIS
+//
+// Use the WordKey::NField() method instead.
+//
+// DESCRIPTION
+//
+// Describe the structure of the index key (<i>WordKey</i>).
+// The description includes the layout of the packed version
+// stored on disk.
+//
+// CONFIGURATION
+//
+// wordlist_wordkey_description <desc> (no default)
+// Describe the structure of the inverted index key.
+// In the following explanation of the <i><desc></i> format
+// mandatory words are
+// in bold and values that must be replaced in italic.
+// <br>
+// <b>Word</b>/<i>name bits</i>[/...]
+// <br>
+// The <i>name</i> is an alphanumerical symbolic name for the key field.
+// The <i>bits</i> is the number of bits required to store this field.
+// Note that all values are stored in unsigned integers (unsigned int).
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+
+#ifndef _WordKeyInfo_h_
+#define _WordKeyInfo_h_
+
+#include "Configuration.h"
+
+//
+// Type number associated to each possible type for a key element
+// (type field of struct WordKeyInfo).
+//
+#define WORD_ISA_NUMBER 1
+#define WORD_ISA_STRING 2
+
+//
+// Maximum number of fields in a key description
+//
+#define WORD_KEY_MAX_NFIELDS 20
+
+//
+// All numerical fields of the key are typed WordKeyNum.
+// Most of the code strongly assume that it is unsigned.
+// Mainly provided to be replaced by unsigned longlong WordKeyNum
+// for 64 bits machines.
+//
+typedef unsigned int WordKeyNum;
+
+//
+// Maximum number of bits in a field
+//
+#define WORD_KEY_MAXBITS ((int)(sizeof(WordKeyNum) * 8))
+#define WORD_KEY_MAXVALUE ((WordKeyNum)~(WordKeyNum)0)
+
+//
+// Description of a single field
+//
+class WordKeyField
+{
+ public:
+ WordKeyField() {
+ type = lowbits = lastbits = bytesize = bytes_offset = bits = bits_offset = 0;
+ }
+
+ //
+ // Precompute information that will be needed to pack/unpack the key
+ // to/from disk.
+ //
+ // The <previous> field is used to compute the position of the field
+ // in packed string. <nname> is the symbolic name of the field
+ // <nbits> is the number of bits actualy used in a number.
+ //
+ int SetNum(WordKeyField *previous, char *nname, int nbits);
+ //
+ // Set the one and only string field
+ //
+ int SetString();
+
+ //
+ // Maximum possible value for this field.
+ //
+ WordKeyNum MaxValue() const {
+ return bits >= WORD_KEY_MAXBITS ? WORD_KEY_MAXVALUE : ((1 << bits) - 1);
+ }
+
+ //
+ // Debugging and printing
+ //
+ void Show();
+
+ String name; // Symbolic name of the field
+ int type; // WORD_ISA_{STRING|NUMBER}
+ //
+ // 01234567012345670123456701234567
+ // +-------+-------+-------+-------+--
+ // 100101010011100111101011110
+ // ^^^ ^^^^^^
+ // | |
+ // lowbits = 3 lastbits = 6
+ //
+ int lowbits;
+ int lastbits;
+ int bytesize; // Number of bytes involved
+ int bytes_offset; // Offset of first byte from start
+ int bits; // Size of field in bits
+ int bits_offset; // Offset of first bit from start
+};
+
+//
+// Description of the key structure
+//
+class WordKeyInfo
+{
+ public:
+ WordKeyInfo(const Configuration& config);
+ ~WordKeyInfo() { if(sort) delete [] sort; }
+
+ //
+ // Unique instance handlers
+ //
+ static void Initialize(const Configuration& config);
+ static void InitializeFromString(const String &desc);
+ static WordKeyInfo* Instance() {
+ if(instance) return instance;
+ fprintf(stderr, "WordKeyInfo::Instance: no instance\n");
+ return 0;
+ }
+
+ int Alloc(int nnfields);
+ int Set(const String &desc);
+
+ void Show();
+
+ //
+ // Array describing the fields, in sort order.
+ //
+ WordKeyField *sort;
+ //
+ // Total number of fields
+ //
+ int nfields;
+ //
+ // Total number of bytes used by numerical fields
+ //
+ int num_length;
+
+ //
+ // Unique instance pointer
+ //
+ static WordKeyInfo* instance;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordList.cc b/debian/htdig/htdig-3.2.0b6/htword/WordList.cc
new file mode 100644
index 00000000..566acb93
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordList.cc
@@ -0,0 +1,436 @@
+//
+// WordList.cc
+//
+// WordList: Interface to the word database. Previously, this wrote to
+// a temporary text file. Now it writes directly to the
+// word database.
+// NOTE: Some code previously attempted to directly read from
+// the word db. This will no longer work, so it's preferred to
+// use the access methods here.
+// Configuration parameter used:
+// wordlist_extend
+// wordlist_verbose 1 walk logic
+// wordlist_verbose 2 walk logic details
+// wordlist_verbose 3 walk logic lots of details
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordList.cc,v 1.13 2004/05/28 13:15:27 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordList.h"
+#include "WordReference.h"
+#include "WordRecord.h"
+#include "WordType.h"
+#include "WordStat.h"
+#include "Configuration.h"
+#include "htString.h"
+#include "HtPack.h"
+#include "HtTime.h"
+#include "WordDBCompress.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+
+// *****************************************************************************
+//
+WordList::WordList(const Configuration& config_arg) :
+ wtype(config_arg),
+ config(config_arg)
+{
+ // The database itself hasn't been opened yet
+ isopen = 0;
+ isread = 0;
+ extended = config.Boolean("wordlist_extend");
+ verbose = config.Value("wordlist_verbose");
+ compressor = 0;
+}
+
+// *****************************************************************************
+//
+WordList::~WordList()
+{
+ Close();
+}
+
+// *****************************************************************************
+//
+int WordList::Open(const String& filename, int mode, int word_only)
+{
+ int usecompress=0;
+
+ // If word_only, entries compare equal if the "word" part matches.
+ // This should only be used for querying the database, not writing it.
+ // It is needed by speling to test for the existence of words.
+ db.set_bt_compare(word_only ? word_only_db_cmp : word_db_cmp);
+
+ if(config.Value("wordlist_page_size", 0))
+ db.set_pagesize(config.Value("wordlist_page_size"));
+
+ if(config.Boolean("wordlist_compress") == 1) {
+ usecompress = DB_COMPRESS;
+ WordDBCompress* compressor = new WordDBCompress(
+ config.Boolean("wordlist_compress_zlib",0), config.Value("compression_level",0));
+
+ // compressor->debug = config.Value("wordlist_compress_debug");
+ SetCompressor(compressor);
+ db.CmprInfo(compressor->CmprInfo());
+ }
+
+ int flags = (mode & O_RDWR) ? DB_CREATE : DB_RDONLY;
+ if(mode & O_TRUNC) {
+ if(flags == DB_CREATE)
+ flags |= DB_TRUNCATE;
+ else
+ fprintf(stderr, "WordList::Open: O_TRUNC | O_RDONLY is meaningless\n");
+ }
+ flags |= usecompress;
+
+ int ret = db.Open(filename, DB_BTREE, flags, 0666) == 0 ? OK : NOTOK;
+
+ isread = mode & O_RDONLY;
+ isopen = 1;
+
+ return ret;
+}
+
+// *****************************************************************************
+//
+int WordList::Close()
+{
+ if(isopen) {
+ if(db.Close() != 0) return NOTOK;
+ isopen = 0;
+ isread = 0;
+ }
+
+ {
+ WordDBCompress* compressor = GetCompressor();
+ if(compressor) {
+ delete compressor;
+ SetCompressor(0);
+ }
+ }
+
+ return OK;
+}
+
+// ****************************************************************************
+//
+int WordList::Put(const WordReference& arg, int flags)
+{
+ if (arg.Key().GetWord().length() == 0) {
+ fprintf(stderr, "WordList::Put(%s) word is zero length\n", (char*)arg.Get());
+ return NOTOK;
+ }
+ if (!arg.Key().Filled()) {
+ fprintf(stderr, "WordList::Put(%s) key is not fully defined\n", (char*)arg.Get());
+ return NOTOK;
+ }
+
+ WordReference wordRef(arg);
+ String word = wordRef.Key().GetWord();
+ if(wtype.Normalize(word) & WORD_NORMALIZE_NOTOK)
+ return NOTOK;
+ wordRef.Key().SetWord(word);
+
+ //
+ // The two case could be grouped in a more compact way.
+ // However, the resources consumption difference between
+ // a Put(DB_NOOVERWRITE) and Put(0) is huge (the first is 75%
+ // slower than the second). Check the db_put sources for the
+ // explanation.
+ //
+ int ret = NOTOK;
+ if(flags) {
+ //
+ // First attempt tells us if the key exists. If it
+ // does not we just increment the reference count.
+ // Otherwise, and only if flags does not contain DB_NOOVERWRITE,
+ // we override the key/record pair.
+ //
+ int error;
+ if((error = db.Put(wordRef, DB_NOOVERWRITE)) != 0) {
+ if(error == DB_KEYEXIST && flags == 0)
+ ret = db.Put(wordRef, 0) == 0 ? OK : NOTOK;
+ } else {
+ ret = Ref(wordRef);
+ }
+ } else {
+ if((ret = db.Put(wordRef, 0)) == 0)
+ ret = Ref(wordRef);
+ }
+
+ return ret;
+}
+
+
+// *****************************************************************************
+//
+List *WordList::operator [] (const WordReference& wordRef)
+{
+ return Collect(wordRef);
+}
+
+// *****************************************************************************
+//
+List *WordList::Prefix (const WordReference& prefix)
+{
+ WordReference prefix2(prefix);
+ prefix2.Key().UndefinedWordSuffix();
+ return Collect(prefix2);
+}
+
+// *****************************************************************************
+//
+List *WordList::WordRefs()
+{
+ return Collect(WordReference());
+}
+
+// *****************************************************************************
+//
+List *WordList::Collect(const WordReference& wordRef)
+{
+ WordCursor *search = Cursor(wordRef.Key(), HTDIG_WORDLIST_COLLECTOR);
+ if(search->Walk() != OK) return 0;
+ List* result = search->GetResults();
+ delete search;
+ return result;
+}
+
+// *****************************************************************************
+//
+// Callback data dedicated to Dump and dump_word communication
+//
+class DeleteWordData : public Object
+{
+public:
+ DeleteWordData() { count = 0; }
+
+ int count;
+};
+
+// *****************************************************************************
+//
+//
+static int delete_word(WordList *words, WordDBCursor &cursor, const WordReference *word, Object &data)
+{
+ if(words->Delete(cursor) == 0) {
+ words->Unref(*word);
+ ((DeleteWordData&)data).count++;
+ return OK;
+ } else {
+ fprintf(stderr, "WordList delete_word: deleting %s failed\n", (char*)word->Get());
+ return NOTOK;
+ }
+}
+
+// *****************************************************************************
+//
+// Delete all records matching wordRef, return the number of
+// deleted records.
+//
+int WordList::WalkDelete(const WordReference& wordRef)
+{
+ DeleteWordData data;
+ WordCursor *description = Cursor(wordRef.Key(), delete_word, &data);
+ description->Walk();
+ delete description;
+ return data.count;
+}
+
+// *****************************************************************************
+//
+//
+List *WordList::Words()
+{
+ List *list = 0;
+ String key;
+ String record;
+ WordReference lastWord;
+ WordDBCursor cursor;
+
+ if(cursor.Open(db.db) != 0) return 0;
+
+ //
+ // Move past the first word count record
+ //
+ const WordReference& last = WordStat::Last();
+ last.Pack(key, record);
+ if(cursor.Get(key, record, DB_SET_RANGE) != 0)
+ return 0;
+ list = new List;
+ do {
+ WordReference wordRef(key, record);
+ if(lastWord.Key().GetWord().empty() ||
+ wordRef.Key().GetWord() != lastWord.Key().GetWord())
+ {
+ list->Add(new String(wordRef.Key().GetWord()));
+ lastWord = wordRef;
+ }
+ } while (cursor.Get(key, record, DB_NEXT) == 0);
+
+ return list;
+}
+
+// *****************************************************************************
+//
+// Returns the reference count for word in <count> arg
+//
+int WordList::Noccurrence(const WordKey& key, unsigned int& noccurrence) const
+{
+ noccurrence = 0;
+ WordStat stat(key.GetWord());
+ int ret;
+ if((ret = db.Get(stat)) != 0) {
+ if(ret != DB_NOTFOUND)
+ return NOTOK;
+ } else {
+ noccurrence = stat.Noccurrence();
+ }
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+// Increment reference count for wordRef
+//
+int WordList::Ref(const WordReference& wordRef)
+{
+ if(!extended) return OK;
+
+ WordStat stat(wordRef.Key().GetWord());
+ int ret;
+ if((ret = db.Get(stat)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+
+ stat.Noccurrence()++;
+
+ return db.Put(stat, 0) == 0 ? OK : NOTOK;
+}
+
+// *****************************************************************************
+//
+// Decrement reference count for wordRef
+//
+int WordList::Unref(const WordReference& wordRef)
+{
+ if(!extended) return OK;
+
+ WordStat stat(wordRef.Key().GetWord());
+ int ret;
+ if((ret = db.Get(stat)) != 0) {
+ if(ret == DB_NOTFOUND)
+ fprintf(stderr, "WordList::Unref(%s) Unref on non existing word occurrence\n", (char*)wordRef.Get());
+ return NOTOK;
+ }
+
+ if(stat.Noccurrence() == 0) {
+ fprintf(stderr, "WordList::Unref(%s) Unref on 0 occurrences word\n", (char*)wordRef.Get());
+ return NOTOK;
+ }
+ stat.Noccurrence()--;
+
+ if(stat.Noccurrence() > 0) {
+ ret = db.Put(stat, 0) == 0 ? OK : NOTOK;
+ } else
+ ret = db.Del(stat) == 0 ? OK : NOTOK;
+ return ret;
+}
+
+
+// *****************************************************************************
+//
+// streaming operators for ascii dumping and reading a list
+class FileOutData : public Object
+{
+public:
+ FILE* f;
+ FileOutData(FILE* f_arg) : f(f_arg) { }
+};
+
+// *****************************************************************************
+//
+static int
+wordlist_walk_callback_file_out(WordList *, WordDBCursor& , const WordReference *word, Object &data)
+{
+ fprintf(((FileOutData&)data).f, "%s\n", (char*)word->Get());
+ return OK;
+}
+
+// *****************************************************************************
+//
+int
+WordList::Write(FILE* f)
+{
+ WordKey empty;
+ FileOutData data(f);
+ WordCursor *description = Cursor(empty, wordlist_walk_callback_file_out, (Object *)&data);
+ description->Walk();
+ delete description;
+ return 0;
+}
+
+// *****************************************************************************
+//
+int
+WordList::Read(FILE* f)
+{
+ WordReference word;
+#define WORD_BUFFER_SIZE 1024
+ char buffer[WORD_BUFFER_SIZE + 1];
+ String line;
+ int line_number = 0;
+ int inserted = 0;
+
+ while(fgets(buffer, WORD_BUFFER_SIZE, f)) {
+ line_number++;
+ int buffer_length = strlen(buffer);
+ int eol = buffer[buffer_length - 1] == '\n';
+
+ if(eol) buffer[--buffer_length] = '\0';
+
+ line.append(buffer, buffer_length);
+ //
+ // Join big lines
+ //
+ if(!eol) continue;
+ //
+ // If line ends with a \ continue
+ //
+ if(line.last() == '\\') {
+ line.chop(1);
+ continue;
+ }
+
+ if(!line.empty()) {
+ if(word.Set(line) != OK) {
+ fprintf(stderr, "WordList::Read: line %d : %s\n", line_number, (char*)line);
+ fprintf(stderr, " cannot build WordReference (ignored)\n");
+ } else {
+ if(Insert(word) != OK) {
+ fprintf(stderr, "WordList::Read: line %d : %s\n", line_number, (char*)line);
+ fprintf(stderr, " insert failed (ignored)\n");
+ } else {
+ inserted++;
+ }
+ if(verbose) fprintf(stderr, "WordList::Read: inserting %s\n", (char*)word.Get());
+ }
+
+ line.trunc();
+ }
+ }
+ return inserted;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordList.h b/debian/htdig/htdig-3.2.0b6/htword/WordList.h
new file mode 100644
index 00000000..1aa87864
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordList.h
@@ -0,0 +1,372 @@
+//
+// WordList.h
+//
+// NAME
+//
+// manage and use an inverted index file.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// Configuration* config;
+// WordReference wordRef;
+// ...
+// WordList* words = new WordList(config)
+//
+// delete words;
+//
+// DESCRIPTION
+//
+// WordList is the <i>mifluz</i> equivalent of a database handler. Each
+// WordList object is bound to an inverted index file and implements the
+// operations to create it, fill it with word occurrences and search
+// for an entry matching a given criterion.
+//
+// CONFIGURATION
+//
+// wordlist_extend {true|false} (default false)
+// If <b>true</b> maintain reference count of unique
+// words. The <b>Noccurrence</b> method gives access to this count.
+//
+// wordlist_verbose <number> (default 0)
+// Set the verbosity level of the WordList class.
+// <br>
+// 1 walk logic
+// <br>
+// 2 walk logic details
+// <br>
+// 3 walk logic lots of details
+//
+// wordlist_page_size <bytes> (default 8192)
+// Berkeley DB page size (see Berkeley DB documentation)
+//
+// wordlist_cache_size <bytes> (default 500K)
+// Berkeley DB cache size (see Berkeley DB documentation)
+// Cache makes a huge difference in performance. It must be at least 2%
+// of the expected total data size. Note that if compression is activated
+// the data size is eight times larger than the actual file size. In this
+// case the cache must be scaled to 2% of the data size, not 2%
+// of the file size. See <b>Cache tuning</b> in the mifluz guide for
+// more hints.
+//
+// wordlist_compress {true|false} (default false)
+// Activate compression of the index. The resulting index is eight times
+// smaller than the uncompressed index.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordList.h,v 1.10 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordList_h_
+#define _WordList_h_
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#ifndef SWIG
+#include "Dictionary.h"
+#include "List.h"
+#include "htString.h"
+#include "WordRecord.h"
+#include "WordReference.h"
+#include "WordType.h"
+#include "WordDB.h"
+#include "WordDBCompress.h"
+#include "Configuration.h"
+#include "WordCursor.h"
+#endif /* SWIG */
+
+class List;
+class WordList;
+class WordDBCursor;
+
+//
+// Inverted index interface
+//
+class WordList
+{
+public:
+ //-
+ // Constructor. Build inverted index handling object using
+ // run time configuration parameters listed in the <b>CONFIGURATION</b>
+ // section.
+ //
+ WordList(const Configuration& config_arg);
+ virtual ~WordList();
+
+ //-
+ // Insert <b>wordRef</b> in index. It is an error to insert
+ // the same <b>wordRef</b> twice. This requires a lookup in the index
+ // prior to the insertion.
+ // Returns OK on success, NOTOK on error.
+ //
+ int Insert(const WordReference& wordRef) { return Put(wordRef, DB_NOOVERWRITE); }
+ //-
+ // Insert <b>wordRef</b> in index. If the <i>Key()</i> part of
+ // the <b>wordRef</b> exists in the index, override it.
+ // Returns OK on success, NOTOK on error.
+ //
+ int Override(const WordReference& wordRef) { return Put(wordRef, 0); }
+#ifndef SWIG
+ int Put(const WordReference& wordRef, int flags);
+#endif /* SWIG */
+
+ //-
+ // Returns OK if <b>wordRef</b> exists in the index, NOTOK otherwise.
+ //
+ int Exists(const WordReference& wordRef) { return db.Exists(wordRef) == 0 ? OK : NOTOK; }
+#ifndef SWIG
+ //-
+ // Returns OK if <b>word</b> exists in the index, NOTOK otherwise.
+ //
+ int Exists(const String& word) { return Exists(WordReference(word)); }
+#endif /* SWIG */
+
+ //
+ // Delete permanently
+ //
+ //-
+ // Delete all entries in the index whose key matches the
+ // <i>Key()</i> part of <b>wordRef</b>, using the <i>Walk</i>
+ // method.
+ // Returns the number of entries successfully deleted.
+ //
+ int WalkDelete(const WordReference& wordRef);
+ //-
+ // Delete the entry in the index that exactly matches the
+ // <i>Key()</i> part of <b>wordRef.</b>
+ // Returns OK if deletion is successfull, NOTOK otherwise.
+ //
+ int Delete(const WordReference& wordRef) {
+ if(db.Del(wordRef) == 0)
+ return Unref(wordRef);
+ else
+ return NOTOK;
+ }
+#ifdef SWIG
+%name(DeleteCursor)
+#endif /* SWIG */
+ //-
+ // Delete the inverted index entry currently pointed to by the
+ // <b>cursor.</b>
+ // Returns 0 on success, Berkeley DB error code on error. This
+ // is mainly useful when implementing a callback function for
+ // a <b>WordCursor.</b>
+ //
+ int Delete(WordDBCursor& cursor) { return cursor.Del(); }
+
+ //-
+ // Open inverted index <b>filename.</b> <b>mode</b>
+ // may be <i>O_RDONLY</i> or <i>O_RDWR.</i> If mode is
+ // <i>O_RDWR</i> it can be or'ed with <i>O_TRUNC</i> to reset
+ // the content of an existing inverted index.
+ // If word_only is true, entries will compare equal if the "word" part
+ // of the key is equal, even if the numeric fields aren't. (What are the
+ // numeric fields, anyway??)
+ // Return OK on success, NOTOK otherwise.
+ //
+ int Open(const String& filename, int mode, int word_only=false);
+ //-
+ // Close inverted index.
+ //
+ int Close();
+
+ //
+ // These returns a list of all the WordReference * matching
+ // the constraint.
+ //-
+ // Returns the list of word occurrences exactly matching the
+ // <i>Key()</i> part of <b>wordRef.</b> The <i>List</i> returned
+ // contains pointers to <i>WordReference</i> objects. It is
+ // the responsibility of the caller to free the list. See List.h
+ // header for usage.
+ //
+ List *Find(const WordReference& wordRef) { return (*this)[wordRef]; }
+ //-
+ // Returns the list of word occurrences exactly matching the
+ // <b>word.</b> The <i>List</i> returned
+ // contains pointers to <i>WordReference</i> objects. It is
+ // the responsibility of the caller to free the list. See List.h
+ // header for usage.
+ //
+ List *FindWord(const String& word) { return (*this)[word]; }
+#ifndef SWIG
+ //-
+ // Alias to the <b>Find</b> method.
+ //
+ List *operator [] (const WordReference& wordRef);
+ //-
+ // Alias to the <b>FindWord</b> method.
+ //
+ List *operator [] (const String& word) { return (*this)[WordReference(word)]; }
+#endif /* SWIG */
+ //-
+ // Returns the list of word occurrences matching the <i>Key()</i>
+ // part of <b>wordRef.</b> In the <i>Key()</i>, the string
+ // (accessed with <i>GetWord()</i>) matches any string that begins
+ // with it. The <i>List</i> returned contains pointers to
+ // <i>WordReference</i> objects. It is the responsibility of the
+ // caller to free the list.
+ //
+ List *Prefix (const WordReference& prefix);
+#ifndef SWIG
+ //-
+ // Returns the list of word occurrences matching the
+ // <b>word.</b> In the <i>Key()</i>, the string (accessed with
+ // <i>GetWord()</i>) matches any string that begins with it. The
+ // <i>List</i> returned contains pointers to <i>WordReference</i>
+ // objects. It is the responsibility of the caller to free the
+ // list.
+ //
+ List *Prefix (const String& prefix) { return this->Prefix(WordReference(prefix)); }
+#endif /* SWIG */
+
+ //
+ // Iterate over the complete database.
+ //
+#ifndef SWIG
+ //-
+ // Returns a list of all unique words contained in the inverted
+ // index. The <i>List</i> returned contains pointers to
+ // <i>String</i> objects. It is the responsibility of the caller
+ // to free the list. See List.h header for usage.
+ //
+ List *Words();
+#endif /* SWIG */
+ //-
+ // Returns a list of all entries contained in the
+ // inverted index. The <i>List</i> returned contains pointers to
+ // <i>WordReference</i> objects. It is the responsibility of
+ // the caller to free the list. See List.h header for usage.
+ //
+ List *WordRefs();
+
+#ifndef SWIG
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and call <b>ncallback</b> with
+ // <b>ncallback_data</b> for every match.
+ //
+ WordCursor *Cursor(wordlist_walk_callback_t callback, Object *callback_data) { return new WordCursor(this, callback, callback_data); }
+#endif /* SWIG */
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and that match <b>nsearchKey.</b> If
+ // <b>naction</b> is set to HTDIG_WORDLIST_WALKER calls
+ // <b>searchKey.callback</b> with <b>searchKey.callback_data</b>
+ // for every match. If <b>naction</b> is set to
+ // HTDIG_WORDLIST_COLLECT push each match in <b>searchKey.collectRes</b>
+ // data member as a <b>WordReference</b> object. It is the responsibility
+ // of the caller to free the <b>searchKey.collectRes</b> list.
+ //
+ WordCursor *Cursor(const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { return new WordCursor(this, searchKey, action); }
+#ifndef SWIG
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and that match <b>nsearchKey</b> and calls
+ // <b>ncallback</b> with <b>ncallback_data</b> for every match.
+ //
+ WordCursor *Cursor(const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { return new WordCursor(this, searchKey, callback, callback_data); }
+#endif /* SWIG */
+
+ //
+ // Update/get global word statistics statistics
+ //
+ //-
+ // Add one to the reference count for the string contained
+ // in the <i>Key().GetWord()</i> part of <b>wordRef.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ int Ref(const WordReference& wordRef);
+ //-
+ // Substract one to the reference count for the string contained
+ // in the <i>Key().GetWord()</i> part of <b>wordRef.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ int Unref(const WordReference& wordRef);
+#ifndef SWIG
+ //-
+ // Return in <b>noccurrence</b> the number of occurrences of the
+ // string contained in the <i>GetWord()</i> part of <b>key.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ int Noccurrence(const WordKey& key, unsigned int& noccurrence) const;
+
+ //
+ // Accessors
+ //
+ //
+ // Get the Berkeley DB object
+ //
+ const WordType& GetWordType() const { return wtype; }
+#endif /* SWIG */
+ //-
+ // Return the <i>Configuration</i> object used to initialize
+ // the <i>WordList</i> object.
+ //
+ const Configuration& GetConfiguration() const { return config; }
+
+#ifndef SWIG
+ //
+ // Input/Output
+ //
+ //-
+ // Write on file descriptor <b>f</b> an ASCII description of the
+ // index. Each line of the file contains a <i>WordReference</i>
+ // ASCII description.
+ // Returns 0 on success, not 0 otherwise.
+ //
+ int Write(FILE* f);
+ //
+ //-
+ // Read <i>WordReference</i> ASCII descriptions from <b>f</b>,
+ // returns the number of inserted WordReference or < 0 if an error
+ // occurs. Invalid descriptions are ignored as well as empty
+ // lines.
+ //
+ int Read(FILE* f);
+
+#endif /* SWIG */
+ //
+ // Retrieve WordReferences from the database.
+ // Backend of WordRefs, operator[], Prefix...
+ //
+ List *Collect(const WordReference& word);
+#ifndef SWIG
+ //
+ // Compressor object accessors
+ //
+ WordDBCompress *GetCompressor() { return compressor; }
+ void SetCompressor(WordDBCompress* compressor_arg) { compressor = compressor_arg; }
+
+ const WordType wtype;
+ const Configuration& config;
+
+ int isopen;
+ int isread;
+
+ //
+ // If true enable extended functionalities of WordList such
+ // as per-word statistics. Read from wordlist_extended configuration
+ // parameter.
+ //
+ int extended;
+
+
+ WordDB db;
+ WordDBCompress *compressor;
+ int verbose;
+#endif /* SWIG */
+};
+
+#endif /* _WordList_h_ */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.cc b/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.cc
new file mode 100644
index 00000000..032cb97c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.cc
@@ -0,0 +1,599 @@
+//
+// WordListMulti.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordListMulti.cc,v 1.6 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordListMulti.h"
+#include "WordListOne.h"
+#include "myqsort.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+class WordDBMulti : public Object
+{
+public:
+ WordDBMulti() { words = 0; size = 0; mode = 0; }
+
+ WordListOne *words;
+ String filename;
+ int mode;
+ unsigned int size;
+};
+
+// *****************************************************************************
+//
+WordListMulti::WordListMulti(WordContext* ncontext)
+{
+ dbs = new List;
+ context = ncontext;
+ // The database itself hasn't been opened yet
+ isopen = 0;
+ Configuration& config = context->GetConfiguration();
+ extended = config.Boolean("wordlist_extend");
+ verbose = config.Value("wordlist_verbose");
+
+ file_max = config.Value("wordlist_multi_max", 50);
+ if(file_max < 4) file_max = 4;
+
+ file_min = config.Value("wordlist_multi_min", 4);
+ if(file_min < 2) file_min = 2;
+
+ if(file_max < file_min) file_max = file_min * 2;
+
+ put_max = config.Value("wordlist_multi_put_max", 1000);
+ if(put_max < 50) put_max = 50;
+
+ compressor = 0;
+ serial = 0;
+}
+
+// *****************************************************************************
+//
+WordListMulti::~WordListMulti()
+{
+ Close();
+}
+
+// *****************************************************************************
+//
+int WordListMulti::Open(const String& nfilename, int mode)
+{
+ filename = nfilename;
+
+ char tmp[32];
+ struct stat stat_buf;
+ int i;
+ //
+ // Open existing indexes
+ //
+ for(i = 0; i < file_max; i++) {
+ String filename_one(filename);
+ sprintf(tmp, "%08d", i);
+ filename_one << tmp;
+ if(stat((char*)filename_one, &stat_buf) == 0) {
+ WordDBMulti* db = new WordDBMulti();
+ db->words = new WordListOne(context);
+ db->filename = filename_one;
+ db->mode = mode;
+ dbs->Push(db);
+ } else {
+ break;
+ }
+ }
+ serial = i;
+ //
+ // If no indexes exists and read-only, abort
+ //
+ if(i == 0 && (flags & DB_RDONLY)) {
+ fprintf(stderr, "WordListMulti::Open(%s, O_RDONLY): no index found\n", (char*)filename);
+ return NOTOK;
+ }
+
+ isopen = 1;
+
+ //
+ // If no indexes exists and read/write, create the first
+ //
+ if(i == 0)
+ if(AddIndex() != OK) return NOTOK;
+
+ WordDBMulti* db = (WordDBMulti*)dbs->Last();
+ if(db->words->Open(db->filename, mode) != OK)
+ return NOTOK;
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+int WordListMulti::Close()
+{
+ if(isopen) {
+ WordDBMulti* db;
+ ListCursor cursor;
+ for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
+ delete db->words;
+ }
+ dbs->Destroy();
+ isopen = 0;
+ filename.trunc();
+ }
+ return OK;
+}
+
+// ****************************************************************************
+//
+unsigned int WordListMulti::Size() const
+{
+ unsigned int size = 0;
+ if(isopen) {
+ WordDBMulti* db;
+ ListCursor cursor;
+ for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
+ if(!db->words->isopen) {
+ if(db->words->Open(db->filename, O_RDONLY) != OK) return 0;
+ size += db->words->Size();
+ if(db->words->Close() != OK) return 0;
+ } else {
+ size += db->words->Size();
+ }
+ }
+ }
+ return size;
+}
+
+int WordListMulti::AddIndex()
+{
+ if(Flags() & O_RDONLY) return NOTOK;
+
+ if(serial >= file_max)
+ Merge();
+
+ char tmp[32];
+
+ String filename_one(filename);
+ sprintf(tmp, "%08d", serial);
+ filename_one << tmp;
+ serial++;
+
+ WordDBMulti* db = new WordDBMulti();
+ db->words = new WordListOne(context);
+ db->words->extended = extended;
+ db->filename = filename_one;
+ dbs->Push(db);
+
+ return OK;
+}
+
+static int merge_cmp_size(WordListMulti*, WordDBMulti* a, WordDBMulti* b)
+{
+ return b->size - a->size;
+}
+
+static int merge_cmp_filename(WordListMulti*, WordDBMulti* a, WordDBMulti* b)
+{
+ return a->filename.compare(b->filename);
+}
+
+int WordListMulti::Merge()
+{
+ if(Flags() & DB_RDONLY) return NOTOK;
+
+ Configuration& config = context->GetConfiguration();
+ int use_compress = config.Boolean("wordlist_compress");
+
+ WordDBMulti* db = (WordDBMulti*)dbs->Last();
+ if(db->words->Close() != OK) return NOTOK;
+
+ //
+ // heap lists all the files in decreasing size order (biggest first)
+ //
+ WordDBMulti* heap = new WordDBMulti[serial];
+ {
+ int i;
+ WordDBMulti* db;
+ ListCursor cursor;
+ for(i = 0, dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor)); i++) {
+ if(db->words->Open(db->filename, O_RDONLY) != OK) return NOTOK;
+ db->size = db->words->Size();
+ if(db->words->Close() != OK) return NOTOK;
+
+ heap[i] = *db;
+ }
+ dbs->Destroy();
+ myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_size, (void*)this);
+ }
+
+ String tmpname = filename;
+ tmpname << ".tmp";
+
+ while(serial > file_min) {
+ WordDBMulti* a = &heap[serial - 1];
+ WordDBMulti* b = &heap[serial - 2];
+
+ WordListOne tmp(context);
+ tmp.extended = 0;
+
+ if(a->words->Open(a->filename, O_RDONLY) != OK) return NOTOK;
+ if(b->words->Open(b->filename, O_RDONLY) != OK) return NOTOK;
+ if(tmp.Open(tmpname, O_RDWR) != OK) return NOTOK;
+ if(tmp.db->CacheP() && tmp.db->CacheOff() != 0) return OK;
+
+ WordDBCursor* cursora = a->words->db->Cursor();
+ WordDBCursor* cursorb = b->words->db->Cursor();
+
+ if(cursora->Open() != 0) return NOTOK;
+ String keya;
+ String dataa;
+
+ if(cursorb->Open() != 0) return NOTOK;
+ String keyb;
+ String datab;
+
+ int reta;
+ int retb;
+
+ reta = cursora->Get(keya, dataa, DB_NEXT);
+ retb = cursorb->Get(keyb, datab, DB_NEXT);
+
+ //
+ // Merge while there are entries in both indexes
+ //
+ while(reta == 0 && retb == 0) {
+ //
+ // If keya lower than keyb
+ //
+ if(WordKey::Compare(context, keya, keyb) < 0) {
+ if(tmp.db->Put(0, keya, dataa, 0) != 0) return NOTOK;
+ reta = cursora->Get(keya, dataa, DB_NEXT);
+ } else {
+ if(tmp.db->Put(0, keyb, datab, 0) != 0) return NOTOK;
+ retb = cursorb->Get(keyb, datab, DB_NEXT);
+ }
+ }
+
+ //
+ // Sanity check
+ //
+ if((reta != 0 && reta != DB_NOTFOUND) ||
+ (retb != 0 && retb != DB_NOTFOUND))
+ return NOTOK;
+
+ //
+ // Flush the remaining entries from the index that is
+ // not yet empty.
+ //
+ if(reta != DB_NOTFOUND || retb != DB_NOTFOUND) {
+ String key = reta == 0 ? keya : keyb;
+ String data = reta == 0 ? data : datab;
+ WordDBCursor* cursor = reta == 0 ? cursora : cursorb;
+ int ret = 0;
+ while(ret == 0) {
+ if(tmp.db->Put(0, key, data, 0) != 0) return NOTOK;
+ ret = cursor->Get(key, data, DB_NEXT);
+ }
+ if(ret != DB_NOTFOUND)
+ return NOTOK;
+ }
+
+ delete cursora;
+ delete cursorb;
+
+ a->words->Close();
+ b->words->Close();
+ tmp.Close();
+
+ //
+ // Remove file a
+ //
+ if(unlink((char*)a->filename) != 0) {
+ const String message = String("WordListMulti::Merge: unlink ") + a->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+ if(use_compress) {
+ if(unlink((char*)(a->filename + String("_weakcmpr"))) != 0) {
+ const String message = String("WordListMulti::Merge: unlink ") + a->filename + String("_weakcmpr");
+ perror((const char*)message);
+ return NOTOK;
+ }
+ }
+
+ //
+ // Remove file b
+ //
+ if(unlink((char*)b->filename) != 0) {
+ const String message = String("WordListMulti::Merge: unlink ") + b->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+ if(use_compress) {
+ if(unlink((char*)(b->filename + String("_weakcmpr"))) != 0) {
+ const String message = String("WordListMulti::Merge: unlink ") + b->filename + String("_weakcmpr");
+ perror((const char*)message);
+ return NOTOK;
+ }
+ }
+
+ //
+ // Rename tmp file into file b
+ //
+ if(rename((char*)tmpname, (char*)b->filename) != 0) {
+ const String message = String("WordListMulti::Merge: rename ") + tmpname + String(" ") + b->filename;
+ perror((const char*)message);
+ return NOTOK;
+ }
+ if(use_compress) {
+ if(rename((char*)(tmpname + String("_weakcmpr")), (char*)(b->filename + String("_weakcmpr"))) != 0) {
+ const String message = String("WordListMulti::Merge: rename ") + tmpname + String("_weakcmpr ") + b->filename + String("_weakcmpr");
+ perror((const char*)message);
+ return NOTOK;
+ }
+ }
+
+ //
+ // Update b file size. The size need not be accurate number as long
+ // as it reflects the relative size of each file.
+ //
+ b->size += a->size;
+
+ //
+ // The 'a' index is no longer in use
+ //
+ delete a->words;
+
+ serial--;
+ //
+ // update heap
+ //
+ myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_size, (void*)this);
+ }
+
+ //
+ // Rename the indexes so that they are in increasing order
+ // and push them in the list of active indexes.
+ //
+ myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_filename, (void*)this);
+ int i;
+ for(i = 0; i < serial; i++) {
+ WordDBMulti* db = new WordDBMulti();
+ *db = heap[i];
+
+ String newname(filename);
+ char tmp[32];
+ sprintf(tmp, "%08d", i);
+ newname << tmp;
+
+ //
+ // Rename if not equal
+ //
+ if(db->filename.compare(newname)) {
+ //
+ // Rename db index into newname
+ //
+ if(rename((char*)db->filename, (char*)newname) != 0) {
+ const String message = String("WordListMulti::Merge: rename ") + db->filename + String(" ") + newname;
+ perror((const char*)message);
+ return NOTOK;
+ }
+ if(use_compress) {
+ if(rename((char*)(db->filename + String("_weakcmpr")), (char*)(newname + String("_weakcmpr"))) != 0) {
+ const String message = String("WordListMulti::Merge: rename ") + db->filename + String("_weakcmpr ") + newname + String("_weakcmpr");
+ perror((const char*)message);
+ return NOTOK;
+ }
+ }
+
+ db->filename = newname;
+ }
+
+ dbs->Push(db);
+ }
+
+ return OK;
+}
+
+// ****************************************************************************
+//
+int WordListMulti::Override(const WordReference& arg)
+{
+ WordDBMulti* db = (WordDBMulti*)dbs->Last();
+
+ if(db->words->Size() > put_max) {
+ if(db->words->Close() != OK) return NOTOK;
+ if(AddIndex() != OK) return NOTOK;
+ db = (WordDBMulti*)dbs->Last();
+ if(db->words->Open(db->filename, db->mode) != OK) return NOTOK;
+ }
+
+ return db->words->Override(arg);
+}
+
+// *****************************************************************************
+int WordListMulti::Exists(const WordReference& )
+{
+ return 0;
+}
+
+// *****************************************************************************
+//
+List *WordListMulti::operator [] (const WordReference& )
+{
+ return 0;
+#if 0
+ return Collect(wordRef);
+#endif
+}
+
+// *****************************************************************************
+//
+List *WordListMulti::Prefix (const WordReference& )
+{
+ return 0;
+#if 0
+ WordReference prefix2(prefix);
+ prefix2.Key().UndefinedWordSuffix();
+ return Collect(prefix2);
+#endif
+}
+
+// *****************************************************************************
+//
+List *WordListMulti::WordRefs()
+{
+ return 0;
+#if 0
+ return Collect(WordReference(context));
+#endif
+}
+
+// *****************************************************************************
+//
+List *WordListMulti::Collect(const WordReference&)
+{
+ return 0;
+#if 0
+ WordCursor *search = Cursor(wordRef.Key(), HTDIG_WORDLIST_COLLECTOR);
+ if(search->Walk() != OK) return 0;
+ List* result = search->GetResults();
+ delete search;
+ return result;
+#endif
+}
+
+// *****************************************************************************
+//
+// Delete all records matching wordRef, return the number of
+// deleted records.
+//
+int WordListMulti::WalkDelete(const WordReference& )
+{
+ return 0;
+#if 0
+ DeleteWordData data;
+ WordCursor *description = Cursor(wordRef.Key(), delete_word, &data);
+ description->Walk();
+ delete description;
+ return data.count;
+#endif
+}
+
+int WordListMulti::Delete(const WordReference& )
+{
+ return NOTOK;
+}
+
+// *****************************************************************************
+//
+//
+List *WordListMulti::Words()
+{
+ return 0;
+#if 0
+ List *list = 0;
+ String key;
+ String record;
+ WordReference lastWord(context);
+ WordDBCursor* cursor = db.Cursor();
+
+ if(!cursor) return 0;
+
+ //
+ // Move past the first word count record
+ //
+ const WordReference& last = WordStat::Last(context);
+ last.Pack(key, record);
+ if(cursor->Get(key, record, DB_SET_RANGE) != 0)
+ return 0;
+ list = new List;
+ do {
+ WordReference wordRef(context, key, record);
+ if(lastWord.Key().GetWord().empty() ||
+ wordRef.Key().GetWord() != lastWord.Key().GetWord())
+ {
+ list->Add(new String(wordRef.Key().GetWord()));
+ lastWord = wordRef;
+ }
+ } while (cursor->Get(key, record, DB_NEXT) == 0);
+
+ return list;
+#endif
+}
+
+// *****************************************************************************
+//
+// Returns the reference count for word in <count> arg
+//
+int WordListMulti::Noccurrence(const String& , unsigned int& ) const
+{
+ return 0;
+#if 0
+ noccurrence = 0;
+ WordStat stat(context, key.GetWord());
+ int ret;
+ if((ret = db.Get(stat)) != 0) {
+ if(ret != DB_NOTFOUND)
+ return NOTOK;
+ } else {
+ noccurrence = stat.Noccurrence();
+ }
+
+ return OK;
+#endif
+}
+
+// *****************************************************************************
+//
+// Increment reference count for wordRef
+//
+int WordListMulti::Ref(const WordReference& )
+{
+ return NOTOK;
+}
+
+// *****************************************************************************
+//
+// Decrement reference count for wordRef
+//
+int WordListMulti::Unref(const WordReference& )
+{
+ return NOTOK;
+}
+
+// *****************************************************************************
+//
+int WordListMulti::AllRef() {
+ if(!extended) return OK;
+
+ Merge();
+
+ WordDBMulti* db;
+ ListCursor cursor;
+ for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
+ if(!db->words->isopen) {
+ if(db->words->Open(db->filename, O_RDWR) != OK) return NOTOK;
+ if(db->words->Close() != OK) return NOTOK;
+ }
+ }
+
+ return OK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.h b/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.h
new file mode 100644
index 00000000..2aede10f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordListMulti.h
@@ -0,0 +1,252 @@
+//
+// WordList.h
+//
+// NAME
+//
+// manage and use an inverted index file.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// Configuration* config;
+// WordReference wordRef;
+// ...
+// WordList* words = new WordList(config)
+//
+// delete words;
+//
+// DESCRIPTION
+//
+// WordList is the <i>mifluz</i> equivalent of a database handler. Each
+// WordList object is bound to an inverted index file and implements the
+// operations to create it, fill it with word occurrences and search
+// for an entry matching a given criterion.
+//
+// CONFIGURATION
+//
+// wordlist_extend {true|false} (default false)
+// If <b>true</b> maintain reference count of unique
+// words. The <b>Noccurrence</b> method gives access to this count.
+//
+// wordlist_verbose <number> (default 0)
+// Set the verbosity level of the WordList class.
+// <br>
+// 1 walk logic
+// <br>
+// 2 walk logic details
+// <br>
+// 3 walk logic lots of details
+//
+// wordlist_page_size <bytes> (default 8192)
+// Berkeley DB page size (see Berkeley DB documentation)
+//
+// wordlist_cache_size <bytes> (default 500K)
+// Berkeley DB cache size (see Berkeley DB documentation)
+// Cache makes a huge difference in performance. It must be at least 2%
+// of the expected total data size. Note that if compression is activated
+// the data size is eight times larger than the actual file size. In this
+// case the cache must be scaled to 2% of the data size, not 2%
+// of the file size. See <b>Cache tuning</b> in the mifluz guide for
+// more hints.
+//
+// wordlist_compress {true|false} (default false)
+// Activate compression of the index. The resulting index is eight times
+// smaller than the uncompressed index.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordListMulti.h,v 1.4 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordListMulti_h_
+#define _WordListMulti_h_
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#ifndef SWIG
+#include "WordList.h"
+#include "WordCursorOne.h"
+//#include "WordCursorMulti.h"
+#endif /* SWIG */
+
+class WordContext;
+
+//
+// Inverted index interface
+//
+class WordListMulti : public WordList
+{
+ public:
+ //-
+ // Constructor. Build inverted index handling object using
+ // run time configuration parameters listed in the <b>CONFIGURATION</b>
+ // section.
+ //
+ WordListMulti(WordContext* ncontext);
+ virtual ~WordListMulti();
+
+#ifndef SWIG
+ virtual int Override(const WordReference& wordRef);
+#endif /* SWIG */
+
+ //-
+ // Returns OK if <b>wordRef</b> exists in the index, NOTOK otherwise.
+ //
+ virtual int Exists(const WordReference& wordRef);
+
+ //
+ // Delete permanently
+ //
+ //-
+ // Delete all entries in the index whose key matches the
+ // <i>Key()</i> part of <b>wordRef</b>, using the <i>Walk</i>
+ // method.
+ // Returns the number of entries successfully deleted.
+ //
+ virtual int WalkDelete(const WordReference& wordRef);
+ //-
+ // Delete the entry in the index that exactly matches the
+ // <i>Key()</i> part of <b>wordRef.</b>
+ // Returns OK if deletion is successfull, NOTOK otherwise.
+ //
+ virtual int Delete(const WordReference& wordRef);
+
+ //-
+ // Open inverted index <b>filename.</b> <b>mode</b>
+ // may be <i>O_RDONLY</i> or <i>O_RDWR.</i> If mode is
+ // <i>O_RDWR</i> it can be or'ed with <i>O_TRUNC</i> to reset
+ // the content of an existing inverted index.
+ // Return OK on success, NOTOK otherwise.
+ //
+ virtual int Open(const String& filename, int mode);
+ //-
+ // Close inverted index.
+ // Return OK on success, NOTOK otherwise.
+ //
+ virtual int Close();
+ //-
+ // Return the size of the index in pages.
+ //
+ virtual unsigned int Size() const;
+ int AddIndex();
+ int Merge();
+
+ //-
+ // Alias to the <b>Find</b> method.
+ //
+ virtual List *operator [] (const WordReference& wordRef);
+ //-
+ // Returns the list of word occurrences matching the <i>Key()</i>
+ // part of <b>wordRef.</b> In the <i>Key()</i>, the string
+ // (accessed with <i>GetWord()</i>) matches any string that begins
+ // with it. The <i>List</i> returned contains pointers to
+ // <i>WordReference</i> objects. It is the responsibility of the
+ // caller to free the list.
+ //
+ virtual List *Prefix (const WordReference& prefix);
+
+ //
+ // Iterate over the complete database.
+ //
+#ifndef SWIG
+ //-
+ // Returns a list of all unique words contained in the inverted
+ // index. The <i>List</i> returned contains pointers to
+ // <i>String</i> objects. It is the responsibility of the caller
+ // to free the list. See List.h header for usage.
+ //
+ virtual List *Words();
+#endif /* SWIG */
+ //-
+ // Returns a list of all entries contained in the
+ // inverted index. The <i>List</i> returned contains pointers to
+ // <i>WordReference</i> objects. It is the responsibility of
+ // the caller to free the list. See List.h header for usage.
+ //
+ virtual List *WordRefs();
+
+#ifndef SWIG
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and call <b>ncallback</b> with
+ // <b>ncallback_data</b> for every match.
+ //
+ virtual inline WordCursor *Cursor(wordlist_walk_callback_t callback, Object *callback_data) { return new WordCursorOne(this, callback, callback_data); }
+#endif /* SWIG */
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and that match <b>nsearchKey.</b> If
+ // <b>naction</b> is set to HTDIG_WORDLIST_WALKER calls
+ // <b>searchKey.callback</b> with <b>searchKey.callback_data</b>
+ // for every match. If <b>naction</b> is set to
+ // HTDIG_WORDLIST_COLLECT push each match in <b>searchKey.collectRes</b>
+ // data member as a <b>WordReference</b> object. It is the responsibility
+ // of the caller to free the <b>searchKey.collectRes</b> list.
+ //
+ virtual inline WordCursor *Cursor(const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { return new WordCursorOne(this, searchKey, action); }
+#ifndef SWIG
+ //-
+ // Create a cursor that searches all the occurrences in the
+ // inverted index and that match <b>nsearchKey</b> and calls
+ // <b>ncallback</b> with <b>ncallback_data</b> for every match.
+ //
+ virtual inline WordCursor *Cursor(const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { return new WordCursorOne(this, searchKey, callback, callback_data); }
+#endif /* SWIG */
+
+ //
+ // Update/get global word statistics statistics
+ //
+ //-
+ // Add one to the reference count for the string contained
+ // in the <i>Key().GetWord()</i> part of <b>wordRef.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Ref(const WordReference& wordRef);
+ //-
+ // Substract one to the reference count for the string contained
+ // in the <i>Key().GetWord()</i> part of <b>wordRef.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Unref(const WordReference& wordRef);
+ virtual int AllRef();
+
+#ifndef SWIG
+ //-
+ // Return in <b>noccurrence</b> the number of occurrences of the
+ // string contained in the <i>GetWord()</i> part of <b>key.</b>
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Noccurrence(const String& key, unsigned int& noccurrence) const;
+ virtual int Write(FILE* f) { return NOTOK; }
+ virtual int Read(FILE* f) { return NOTOK; }
+
+ virtual WordKey Key(const String& bufferin) { abort(); return WordKey(0); }
+
+ virtual WordReference Word(const String& bufferin, int exists = 0) { abort(); return WordReference(0); }
+
+#endif /* SWIG */
+ //
+ // Retrieve WordReferences from the database.
+ // Backend of WordRefs, operator[], Prefix...
+ //
+ virtual List *Collect(const WordReference& word);
+#ifndef SWIG
+ List* dbs;
+ int serial;
+ int file_max;
+ int file_min;
+ unsigned int put_max;
+#endif /* SWIG */
+};
+
+#endif /* _WordListMulti_h_ */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordListOne.cc b/debian/htdig/htdig-3.2.0b6/htword/WordListOne.cc
new file mode 100644
index 00000000..34e0019a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordListOne.cc
@@ -0,0 +1,485 @@
+//
+// WordListOne.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordListOne.cc,v 1.6 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordListOne.h"
+#include "WordReference.h"
+#include "WordRecord.h"
+#include "WordType.h"
+#include "WordContext.h"
+#include "Configuration.h"
+#include "htString.h"
+#include "HtTime.h"
+#include "WordDBCompress.h"
+#include "WordDBCache.h"
+#include "WordDead.h"
+#include "WordMeta.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+
+// *****************************************************************************
+//
+WordListOne::WordListOne(WordContext* ncontext)
+{
+ context = ncontext;
+ db = new WordDB(ncontext->GetDBInfo());
+ dict = new WordDict();
+ dict->Initialize(this);
+ meta = new WordMeta();
+ meta->Initialize(this);
+ dead = new WordDead();
+ dead->Initialize(this);
+
+ // The database itself hasn't been opened yet
+ isopen = 0;
+ Configuration& config = context->GetConfiguration();
+ extended = config.Boolean("wordlist_extend");
+ verbose = config.Value("wordlist_verbose");
+ compressor = 0;
+ caches = 0;
+ flags = 0;
+}
+
+// *****************************************************************************
+//
+WordListOne::~WordListOne()
+{
+ BatchEnd();
+ Close();
+ delete dead;
+ delete meta;
+ delete dict;
+ delete db;
+}
+
+static int word_db_qcmp(WordContext* context, const WordDBCacheEntry *a, const WordDBCacheEntry *b)
+{
+ return WordKey::Compare(context, (const unsigned char*)a->key, a->key_size, (const unsigned char*)b->key, b->key_size);
+}
+
+// *****************************************************************************
+//
+int WordListOne::Open(const String& nfilename, int mode)
+{
+ filename = nfilename;
+
+ int usecompress = 0;
+ Configuration& config = context->GetConfiguration();
+
+ if(config.Boolean("wordlist_compress") == 1) {
+ usecompress = DB_COMPRESS;
+ WordDBCompress* compressor = new WordDBCompress(context);
+ // compressor->debug = config.Value("wordlist_compress_debug");
+ SetCompressor(compressor);
+
+ context->GetDBInfo().dbenv->mp_cmpr_info = compressor->CmprInfo();
+ context->GetDBInfo().dbenv->flags |= DB_ENV_CMPR;
+ }
+
+ flags = (mode & O_RDWR) ? DB_CREATE : DB_RDONLY;
+ flags |= usecompress;
+ if(mode & O_TRUNC) {
+ if(mode & O_RDWR) {
+ unlink((char*)filename);
+ } else
+ fprintf(stderr, "WordListOne::Open: O_TRUNC | O_RDONLY is meaningless\n");
+ }
+
+ WordLock* lock;
+ Meta()->Lock("open", lock);
+
+ db->set_bt_compare(word_db_cmp, (void*)context);
+
+ if(config.Boolean("wordlist_cache_inserts", 0)) {
+ int size = config.Value("wordlist_cache_size", 0);
+ if(size / 2 < WORD_DB_CACHE_MINIMUM)
+ size = 0;
+ else
+ size /= 2;
+
+ db->CacheOn(context, size);
+ db->CacheCompare(word_db_qcmp);
+ }
+
+ db->set_pagesize(Pagesize());
+
+ int ret = db->Open(filename, "index", DB_BTREE, flags, 0666, WORD_DB_INDEX) == 0 ? OK : NOTOK;
+ if(ret == NOTOK) return ret;
+ if(dict->Open() != OK) return NOTOK;
+ if(meta->Open() != OK) return NOTOK;
+ if(dead->Open() != OK) return NOTOK;
+
+ isopen = 1;
+
+ Meta()->Unlock("open", lock);
+
+ return ret;
+}
+
+// *****************************************************************************
+//
+int WordListOne::Close()
+{
+ if(isopen) {
+ if(db->Close() != 0) return NOTOK;
+ if(dict->Close() != 0) return NOTOK;
+ if(meta->Close() != 0) return NOTOK;
+ if(dead->Close() != 0) return NOTOK;
+ isopen = 0;
+ }
+
+ {
+ WordDBCompress* compressor = GetCompressor();
+ if(compressor) {
+ delete compressor;
+ SetCompressor(0);
+ }
+ delete context->GetDBInfo().dbenv->mp_cmpr_info;
+ context->GetDBInfo().dbenv->mp_cmpr_info = 0;
+ context->GetDBInfo().dbenv->flags &= ~DB_ENV_CMPR;
+ }
+
+ return OK;
+}
+
+// ****************************************************************************
+//
+unsigned int WordListOne::Size() const
+{
+ return db->Size();
+}
+
+// ****************************************************************************
+//
+int WordListOne::Override(const WordReference& arg)
+{
+ if (arg.GetWord().length() == 0) {
+ fprintf(stderr, "WordListOne::Override(%s) word is zero length\n", (char*)arg.Get());
+ return NOTOK;
+ }
+ if (!arg.Key().Filled()) {
+ fprintf(stderr, "WordListOne::Override(%s) key is not fully defined\n", (char*)arg.Get());
+ return NOTOK;
+ }
+
+ WordType& wtype = context->GetType();
+ WordReference wordRef(arg);
+ String word = wordRef.GetWord();
+ if(wtype.Normalize(word) & WORD_NORMALIZE_NOTOK)
+ return NOTOK;
+ wordRef.SetWord(word);
+ unsigned int wordid = 0;
+ if(dict->SerialRef(word, wordid) != OK) return NOTOK;
+ wordRef.Key().Set(WORD_KEY_WORD, wordid);
+
+ int ret = NOTOK;
+
+ if(caches) {
+ String key;
+ String record;
+ if(wordRef.Pack(key, record) != OK)
+ return NOTOK;
+ ret = caches->Add(key.get(), key.length(), record.get(), record.length()) == 0 ? OK : NOTOK;
+ if(caches->Full()) caches->Merge(*db);
+ } else {
+ ret = db->Put(wordRef, 0) == 0 ? OK : NOTOK;
+ }
+
+ return ret;
+}
+
+
+// *****************************************************************************
+//
+List *WordListOne::operator [] (const WordReference& wordRef)
+{
+ return Collect(wordRef);
+}
+
+// *****************************************************************************
+//
+List *WordListOne::Prefix (const WordReference& prefix)
+{
+ List* result = new List();
+ WordDictCursor* cursor = Dict()->CursorPrefix(prefix.GetWord());
+ String word;
+ WordDictRecord record;
+ WordReference prefix2(prefix);
+ while(Dict()->NextPrefix(cursor, word, record) == 0) {
+ prefix2.Key().Set(WORD_KEY_WORD, record.Id());
+ List* tmp_result = Collect(prefix2);
+ while(tmp_result->Count() > 0) {
+ WordReference* entry = (WordReference*)tmp_result->Shift(LIST_REMOVE_RELEASE);
+ entry->SetWord(word);
+ result->Push(entry);
+ }
+ delete tmp_result;
+ }
+ return result;
+}
+
+// *****************************************************************************
+//
+List *WordListOne::WordRefs()
+{
+ return Collect(WordReference(context));
+}
+
+// *****************************************************************************
+//
+List *WordListOne::Collect(const WordReference& wordRef)
+{
+ WordCursor *search = Cursor(wordRef.Key(), HTDIG_WORDLIST_COLLECTOR);
+ if(search->Walk() != OK) return 0;
+ List* result = search->GetResults();
+ delete search;
+ return result;
+}
+
+// *****************************************************************************
+//
+int
+WordListOne::Read(FILE* f)
+{
+ WordReference wordRef(context);
+#define WORD_BUFFER_SIZE 1024
+ char buffer[WORD_BUFFER_SIZE + 1];
+ String line;
+ int line_number = 0;
+ int inserted = 0;
+
+ BatchStart();
+
+ String key;
+ String record;
+
+ while(fgets(buffer, WORD_BUFFER_SIZE, f)) {
+ line_number++;
+ int buffer_length = strlen(buffer);
+ int eol = buffer[buffer_length - 1] == '\n';
+
+ if(eol) buffer[--buffer_length] = '\0';
+
+ line.append(buffer, buffer_length);
+ //
+ // Join big lines
+ //
+ if(!eol) continue;
+ //
+ // If line ends with a \ continue
+ //
+ if(line.last() == '\\') {
+ line.chop(1);
+ continue;
+ }
+
+ if(!line.empty()) {
+ StringList fields(line, "\t ");
+
+ //
+ // Convert the word to a wordid
+ //
+ String* word = (String*)fields.Get_First();
+ unsigned int wordid;
+ if(dict->SerialRef(*word, wordid) != OK) return NOTOK;
+ word->trunc();
+ (*word) << wordid;
+
+ if(wordRef.SetList(fields) != OK) {
+ fprintf(stderr, "WordList::Read: line %d : %s\n", line_number, (char*)line);
+ fprintf(stderr, " cannot build WordReference (ignored)\n");
+ } else {
+ if(wordRef.Pack(key, record) != OK) {
+ fprintf(stderr, "WordList::Read: line %d : %s\n", line_number, (char*)line);
+ fprintf(stderr, " pack failed (ignored)\n");
+ } else {
+ caches->Add(key.get(), key.length(), record.get(), record.length());
+ inserted++;
+ }
+ if(verbose && (inserted % 10000 == 0)) fprintf(stderr, "WordList::Read: inserted %d entries\n", inserted);
+ if(verbose > 1) fprintf(stderr, "WordList::Read: inserting %s\n", (char*)wordRef.Get());
+ }
+
+ line.trunc();
+ }
+ }
+
+ BatchEnd();
+
+ return inserted;
+}
+
+// *****************************************************************************
+//
+// streaming operators for ascii dumping and reading a list
+class FileOutData : public Object
+{
+public:
+ FILE* f;
+ String word;
+ FileOutData(FILE* f_arg) : f(f_arg) { }
+};
+
+// *****************************************************************************
+//
+static int
+wordlist_walk_callback_file_out(WordList *, WordDBCursor& , const WordReference *wordRef, Object &ndata)
+{
+ FileOutData& data = (FileOutData&)ndata;
+ ((WordReference*)wordRef)->SetWord(data.word);
+ fprintf(data.f, "%s\n", (char*)wordRef->Get());
+ return OK;
+}
+
+int WordListOne::Write(FILE* f)
+{
+ FileOutData data(f);
+ WordDictCursor* cursor = dict->Cursor();
+ int ret;
+ String word;
+ WordDictRecord wordinfo;
+ while((ret = dict->Next(cursor, word, wordinfo)) == 0) {
+ WordKey key(context);
+ key.Set(WORD_KEY_WORD, wordinfo.Id());
+ data.word = word;
+ WordCursor *search = Cursor(key, wordlist_walk_callback_file_out, (Object *)&data);
+ search->Walk();
+ delete search;
+ }
+ return ret == DB_NOTFOUND ? OK : NOTOK;
+}
+
+
+// *****************************************************************************
+//
+// Callback data dedicated to Dump and dump_word communication
+//
+class DeleteWordData : public Object
+{
+public:
+ DeleteWordData() { count = 0; }
+
+ int count;
+};
+
+// *****************************************************************************
+//
+//
+static int delete_word(WordList *words, WordDBCursor &cursor, const WordReference *word, Object &data)
+{
+ WordListOne *words_one = (WordListOne*)words;
+ if(words_one->DeleteCursor(cursor) == 0) {
+ ((DeleteWordData&)data).count++;
+ return OK;
+ } else {
+ fprintf(stderr, "WordList delete_word: deleting %s failed\n", (char*)word->Get());
+ return NOTOK;
+ }
+}
+
+// *****************************************************************************
+//
+// Delete all records matching wordRef, return the number of
+// deleted records.
+//
+int WordListOne::WalkDelete(const WordReference& wordRef)
+{
+ DeleteWordData data;
+ WordKey key = wordRef.Key();
+
+ if(key.IsDefined(WORD_KEY_WORD)) {
+ WordCursor *description = Cursor(key, delete_word, &data);
+ description->Walk();
+ delete description;
+ dict->Decr(wordRef.GetWord(), data.count);
+ } else {
+ WordDictCursor* cursor = dict->Cursor();
+ int ret;
+ String word;
+ WordDictRecord wordinfo;
+ int total = 0;
+ while((ret = dict->Next(cursor, word, wordinfo)) == 0) {
+ key.Set(WORD_KEY_WORD, wordinfo.Id());
+ WordCursor *search = Cursor(key, delete_word, &data);
+ search->Walk();
+ delete search;
+ dict->Decr(word, data.count);
+ total += data.count;
+ data.count = 0;
+ }
+ data.count = total;
+ }
+ return data.count;
+}
+
+// *****************************************************************************
+//
+// Returns the reference count for word in <count> arg
+//
+int WordListOne::Noccurrence(const String& word, unsigned int& noccurrence) const
+{
+ return dict->Noccurrence(word, noccurrence);
+}
+
+WordKey WordListOne::Key(const String& bufferin)
+{
+ WordKey key(context);
+ StringList fields(bufferin, "\t ");
+ String* field = (String*)fields.Get_First();
+ unsigned int wordid;
+ Dict()->Serial(*field, wordid);
+ field->trunc();
+ (*field) << wordid;
+ key.SetList(fields);
+ return key;
+}
+
+WordReference WordListOne::Word(const String& bufferin, int exists /* = 1 */)
+{
+ WordReference wordRef(context);
+ StringList fields(bufferin, "\t ");
+ String* field = (String*)fields.Get_First();
+ if(context->GetType().Normalize(*field) & WORD_NORMALIZE_NOTOK) {
+ fprintf(stderr, "WordListOne::Word: cannot normalize word\n");
+ }
+ String word = *field;
+ unsigned int wordid;
+ if(exists)
+ Dict()->SerialExists(word, wordid);
+ else
+ Dict()->Serial(word, wordid);
+ field->trunc();
+ (*field) << wordid;
+ wordRef.SetList(fields);
+ wordRef.SetWord(word);
+ return wordRef;
+}
+
+void
+WordListOne::BatchEnd()
+{
+ if(caches) {
+ caches->Merge(*db);
+ WordList::BatchEnd();
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordListOne.h b/debian/htdig/htdig-3.2.0b6/htword/WordListOne.h
new file mode 100644
index 00000000..4d51fc81
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordListOne.h
@@ -0,0 +1,142 @@
+//
+// WordListOne.h
+//
+// NAME
+//
+// manage and use an inverted index file.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// WordContext context;
+//
+// WordList* words = context->List();
+// WordList* words = WordListOne(&context);
+//
+// DESCRIPTION
+//
+// WordList is the <i>mifluz</i> equivalent of a database handler. Each
+// WordList object is bound to an inverted index file and implements the
+// operations to create it, fill it with word occurrences and search
+// for an entry matching a given criterion.
+//
+// The general behavious of WordListOne is described in the WordList
+// manual page. It is prefered to create a WordListOne instance by
+// setting the <i>wordlist_multi</i> configuration parameter to false
+// and calling the <b>WordContext::List</b> method.
+//
+// Only the methods that differ from WordList are listed here.
+// All the methods of WordList are implemented by WordListOne and
+// you should refer to the manual page for more information.
+//
+// The <b>Cursor</b> methods all return a WordCursorOne instance
+// cast to a WordCursor object.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordListOne.h,v 1.4 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordListOne_h_
+#define _WordListOne_h_
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "WordList.h"
+#include "WordCursorOne.h"
+#include "WordDict.h"
+#include "WordMeta.h"
+#include "WordDead.h"
+
+class WordContext;
+
+//
+// Inverted index interface
+//
+class WordListOne : public WordList
+{
+ public:
+ //-
+ // Constructor. Build inverted index handling object using
+ // run time configuration parameters listed in the <b>CONFIGURATION</b>
+ // section of the <b>WordList</b> manual page.
+ //
+ WordListOne(WordContext* ncontext);
+ virtual ~WordListOne();
+
+ virtual int Override(const WordReference& wordRef);
+
+ virtual inline int Exists(const WordReference& wordRef) {
+ return (!Dead()->Exists(wordRef.Key()) && db->Exists(wordRef) == 0) ? OK : NOTOK; }
+
+ virtual int WalkDelete(const WordReference& wordRef);
+ virtual inline int Delete(const WordReference& wordRef) {
+ if(db->Del(wordRef) == 0)
+ return dict->Unref(wordRef.GetWord());
+ else
+ return NOTOK;
+ }
+ //-
+ // Delete the inverted index entry currently pointed to by the
+ // <b>cursor.</b>
+ // Returns 0 on success, Berkeley DB error code on error. This
+ // is mainly useful when implementing a callback function for
+ // a <b>WordCursor.</b>
+ //
+ int DeleteCursor(WordDBCursor& cursor) { return cursor.Del(); }
+
+ virtual int Open(const String& filename, int mode);
+ virtual int Close();
+ virtual unsigned int Size() const;
+ virtual int Pagesize() const {
+ Configuration& config = context->GetConfiguration();
+
+ return config.Value("wordlist_page_size", 0);
+ }
+
+ virtual inline WordDict *Dict() { return dict; }
+ virtual inline WordMeta *Meta() { return meta; }
+ virtual inline WordDead *Dead() { return dead; }
+
+ virtual List *operator [] (const WordReference& wordRef);
+ virtual List *Prefix (const WordReference& prefix);
+
+ virtual List *Words() { return dict->Words(); }
+ virtual List *WordRefs();
+
+ virtual inline WordCursor *Cursor(wordlist_walk_callback_t callback, Object *callback_data) { return new WordCursorOne(this, callback, callback_data); }
+ virtual inline WordCursor *Cursor(const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { return new WordCursorOne(this, searchKey, action); }
+ virtual inline WordCursor *Cursor(const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { return new WordCursorOne(this, searchKey, callback, callback_data); }
+
+ virtual WordKey Key(const String& bufferin);
+
+ virtual WordReference Word(const String& bufferin, int exists = 0);
+
+ virtual void BatchEnd();
+
+ virtual int Noccurrence(const String& key, unsigned int& noccurrence) const;
+
+ virtual int Write(FILE* f);
+
+ virtual inline int WriteDict(FILE* f) { return dict->Write(f); }
+
+ virtual int Read(FILE* f);
+
+ virtual List *Collect(const WordReference& word);
+
+ WordDB *db;
+ WordDict *dict;
+ WordMeta *meta;
+ WordDead *dead;
+};
+
+#endif /* _WordListOne_h_ */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordMeta.cc b/debian/htdig/htdig-3.2.0b6/htword/WordMeta.cc
new file mode 100644
index 00000000..66741a4e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordMeta.cc
@@ -0,0 +1,182 @@
+//
+// WordMeta.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordMeta.cc,v 1.4 2004/05/28 13:15:28 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+
+extern "C" {
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "mp.h"
+}
+
+#include "WordMeta.h"
+#include "WordListOne.h"
+
+#define WORD_META_SERIAL_SIZE (WORD_META_SERIAL_FILE + 1)
+
+class WordLock {
+public:
+ WordLock() { lock.off = LOCK_INVALID; }
+
+ DB_LOCK lock;
+};
+
+//
+// Total size of structure must *NOT* be over 256 bytes.
+//
+typedef struct _WordMetaInfo {
+ DBMETA meta;
+ unsigned int serials[WORD_META_SERIAL_SIZE];
+} WordMetaInfo;
+
+class WordMetaImp
+{
+public:
+ WordMetaImp() {
+ mpf = 0;
+ pgno = PGNO_INVALID;
+ info = 0;
+ }
+
+ DB_MPOOLFILE *mpf;
+ db_pgno_t pgno;
+ WordMetaInfo *info;
+};
+
+WordMeta::~WordMeta()
+{
+ delete imp;
+ delete db;
+}
+
+int WordMeta::Initialize(WordList* nwords)
+{
+ words = nwords;
+ db = new WordDB(nwords->GetContext()->GetDBInfo());
+ imp = new WordMetaImp();
+ return OK;
+}
+
+int WordMeta::Open()
+{
+ const String& filename = words->Filename();
+ int flags = words->Flags();
+
+ db->set_pagesize(words->Pagesize());
+
+ if(db->Open(filename, "meta", DB_BTREE, flags, 0666, WORD_DB_DICT) != 0)
+ return NOTOK;
+
+ imp->mpf = db->db->mpf;
+
+ int ret;
+ String kpgno("pgno");
+
+ if((ret = db->Get(0, kpgno, imp->pgno, 0)) != 0 && ret != DB_NOTFOUND)
+ return NOTOK;
+
+ /*
+ * First time thru, create the meta page and initialize it.
+ */
+ if(ret == DB_NOTFOUND) {
+ if(CDB_memp_fget(imp->mpf, &imp->pgno, DB_MPOOL_NEW, (void**)&imp->info) != 0)
+ return NOTOK;
+ memset((char*)imp->info, '\0', sizeof(WordMetaInfo));
+ imp->info->meta.type = P_INVALID;
+ imp->info->meta.pgno = imp->pgno;
+ if(CDB_memp_fput(imp->mpf, (void*)imp->info, DB_MPOOL_DIRTY) != 0)
+ return NOTOK;
+
+ if(db->Put(0, kpgno, imp->pgno, 0) != 0)
+ return NOTOK;
+ }
+
+ return OK;
+}
+
+int WordMeta::Close()
+{
+ return db->Close() == 0 ? OK : NOTOK;
+}
+
+int WordMeta::Serial(int what, unsigned int& serial)
+{
+ serial = WORD_META_SERIAL_INVALID;
+ if(CDB_memp_fget(imp->mpf, &imp->pgno, 0, (void**)&imp->info) != 0)
+ return NOTOK;
+ serial = ++imp->info->serials[what];
+ if(CDB_memp_fput(imp->mpf, (void*)imp->info, DB_MPOOL_DIRTY) != 0)
+ return NOTOK;
+
+ return OK;
+}
+
+int WordMeta::GetSerial(int what, unsigned int& serial)
+{
+ serial = WORD_META_SERIAL_INVALID;
+ if(CDB_memp_fget(imp->mpf, &imp->pgno, 0, (void**)&imp->info) != 0)
+ return NOTOK;
+ serial = imp->info->serials[what];
+ if(CDB_memp_fput(imp->mpf, (void*)imp->info, 0) != 0)
+ return NOTOK;
+
+ return OK;
+}
+
+int WordMeta::SetSerial(int what, unsigned int serial)
+{
+ if(CDB_memp_fget(imp->mpf, &imp->pgno, 0, (void**)&imp->info) != 0)
+ return NOTOK;
+ imp->info->serials[what] = serial;
+ if(CDB_memp_fput(imp->mpf, (void*)imp->info, DB_MPOOL_DIRTY) != 0)
+ return NOTOK;
+
+ return OK;
+}
+
+int WordMeta::Lock(const String& resource, WordLock*& lock)
+{
+ lock = new WordLock;
+ DB_ENV* dbenv = words->GetContext()->GetDBInfo().dbenv;
+ u_int32_t id;
+ if(CDB_lock_id(dbenv, &id) != 0) {
+ delete lock;
+ lock = 0;
+ return NOTOK;
+ }
+ DBT obj;
+ obj.size = resource.length();
+ obj.data = (void*)resource.get();
+ if(CDB_lock_get(dbenv, id, 0, &obj, DB_LOCK_WRITE, &lock->lock) != 0) {
+ delete lock;
+ lock = 0;
+ return NOTOK;
+ }
+ return OK;
+}
+
+int WordMeta::Unlock(const String& resource, WordLock*& lock)
+{
+ DB_ENV* dbenv = words->GetContext()->GetDBInfo().dbenv;
+
+ int ret = CDB_lock_put(dbenv, &lock->lock);
+
+ delete lock;
+ lock = 0;
+
+ return ret == 0 ? OK : NOTOK;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordMeta.h b/debian/htdig/htdig-3.2.0b6/htword/WordMeta.h
new file mode 100644
index 00000000..5bcc7f48
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordMeta.h
@@ -0,0 +1,87 @@
+//
+// WordMeta.h
+//
+// NAME
+//
+// abstract class to manage and use an inverted index file.
+//
+// SYNOPSIS
+//
+// #include <mifluz.h>
+//
+// WordContext context;
+//
+// WordMeta* words = context->Meta();
+//
+// delete words;
+//
+// DESCRIPTION
+//
+// WordMeta is the <i>mifluz</i> equivalent of a database handler. Each
+// WordMeta object is bound to an inverted index file and implements the
+// operations to create it, fill it with word occurrences and search
+// for an entry matching a given criterion.
+//
+// WordMeta is an abstract class and cannot be instanciated.
+// The <b>Meta</b> method of the class WordContext will create
+// an instance using the appropriate derived class, either WordMetaOne
+// or WordMetaMulti. Refer to the corresponding manual pages for
+// more information on their specific semantic.
+//
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordMeta.h,v 1.4 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordMeta_h_
+#define _WordMeta_h_
+
+#include <stdio.h>
+
+#include "htString.h"
+#include "WordDB.h"
+
+class WordContext;
+class WordLock;
+class WordMetaImp;
+
+//
+// Serial number range [1-2^32]
+//
+#define WORD_META_SERIAL_INVALID 0
+
+#define WORD_META_SERIAL_WORD 0
+#define WORD_META_SERIAL_FILE 1
+
+class WordMeta
+{
+ public:
+ WordMeta() { words = 0; db = 0; imp = 0; }
+ ~WordMeta();
+
+ int Initialize(WordList* words);
+
+ int Open();
+ int Close();
+
+ int Serial(int what, unsigned int& serial);
+ int GetSerial(int what, unsigned int& serial);
+ int SetSerial(int what, unsigned int serial);
+
+ int Lock(const String& resource, WordLock*& lock);
+ int Unlock(const String& resource, WordLock*& lock);
+
+ private:
+ WordList *words;
+ WordDB *db;
+ WordMetaImp *imp;
+};
+#endif /* _WordMeta_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.cc b/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.cc
new file mode 100644
index 00000000..d5f342fd
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.cc
@@ -0,0 +1,272 @@
+//
+// WordMonitor.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordMonitor.cc,v 1.7 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#include "StringList.h"
+#include "WordMonitor.h"
+
+#define WORD_MONITOR_RRD 1
+#define WORD_MONITOR_READABLE 2
+
+WordMonitor* WordMonitor::instance = 0;
+
+char* WordMonitor::values_names[WORD_MONITOR_VALUES_SIZE] = {
+ "",
+ "C.Write",
+ "C.Read",
+ "C.Compress 1/1",
+ "C.Compress 1/2",
+ "C.Compress 1/3",
+ "C.Compress 1/4",
+ "C.Compress 1/5",
+ "C.Compress 1/6",
+ "C.Compress 1/7",
+ "C.Compress 1/8",
+ "C.Compress 1/9",
+ "C.Compress 1/10",
+ "C.Compress 1/>10",
+ "C.P_IBTREE",
+ "C.P_LBTREE",
+ "C.P_UNKNOWN",
+ "C.Put",
+ "C.Get (0)",
+ "C.Get (NEXT)",
+ "C.Get (SET_RANGE)",
+ "C.Get (Other)",
+ "G.LEVEL",
+ "G.PGNO",
+ "C.CMP",
+ 0
+};
+
+WordMonitor::WordMonitor(const Configuration &config)
+{
+ memset((char*)values, '\0', sizeof(unsigned int) * WORD_MONITOR_VALUES_SIZE);
+ memset((char*)old_values, '\0', sizeof(unsigned int) * WORD_MONITOR_VALUES_SIZE);
+ started = elapsed = time(0);
+ output_style = WORD_MONITOR_READABLE;
+ if((period = config.Value("wordlist_monitor_period"))) {
+ const String& desc = config.Find("wordlist_monitor_output");
+ StringList fields(desc, ',');
+
+ if(fields.Count() > 0) {
+ char* filename = fields[0];
+ if(filename[0] == '\0')
+ output = stderr;
+ else {
+ output = fopen(filename, "a");
+ if(!output) {
+ fprintf(stderr, "WordMonitor::WordMonitor: cannot open %s for writing ", filename);
+ perror("");
+ output = stderr;
+ return;
+ }
+ }
+ if(fields.Count() > 1) {
+ char* style = fields[1];
+ if(!mystrcasecmp(style, "rrd"))
+ output_style = WORD_MONITOR_RRD;
+ else
+ output_style = WORD_MONITOR_READABLE;
+ }
+ }
+ TimerStart();
+ }
+}
+
+WordMonitor::~WordMonitor()
+{
+ TimerStop();
+ if(output != stderr)
+ fclose(output);
+}
+
+void
+WordMonitor::Initialize(const Configuration &config_arg)
+{
+ if(instance != 0)
+ delete instance;
+ instance = new WordMonitor(config_arg);
+}
+
+const String
+WordMonitor::Report() const
+{
+ String output;
+ int i;
+ time_t now = time(0);
+
+ if(output_style == WORD_MONITOR_RRD)
+ output << (int)now << ":";
+
+ for(i = 0; i < WORD_MONITOR_VALUES_SIZE; i++) {
+ if(!values_names[i]) break;
+ if(values_names[i][0]) {
+ if(output_style == WORD_MONITOR_READABLE) {
+ output << values_names[i] << ": " << values[i];
+ if((now - elapsed) > 0) {
+ output << ", per sec : " << (int)(values[i] / (now - started));
+ output << ", delta : " << (values[i] - old_values[i]);
+ output << ", per sec : " << (int)((values[i] - old_values[i]) / (now - elapsed));
+ }
+ output << "|";
+ } else if(output_style == WORD_MONITOR_RRD) {
+ output << values[i] << ":";
+ }
+ }
+ }
+ memcpy((char*)old_values, (char*)values, sizeof(unsigned int) * WORD_MONITOR_VALUES_SIZE);
+ return output;
+}
+
+static void handler_alarm(int signal)
+{
+ WordMonitor* monitor = WordMonitor::Instance();
+ if(!monitor) {
+ fprintf(stderr, "WordMonitor::handler_alarm: no instance\n");
+ return;
+ }
+ monitor->TimerClick(signal);
+}
+
+void
+WordMonitor::TimerStart()
+{
+ if(period < 5) {
+ fprintf(stderr, "WordMonitor::TimerStart: wordlist_monitor_period must be > 5 (currently %d) otherwise monitoring is not accurate\n", period);
+ return;
+ }
+
+#ifndef _MSC_VER /* _WIN32 */
+ struct sigaction action;
+ struct sigaction old_action;
+ memset((char*)&action, '\0', sizeof(struct sigaction));
+ memset((char*)&old_action, '\0', sizeof(struct sigaction));
+ action.sa_handler = handler_alarm;
+ if(sigaction(SIGALRM, &action, &old_action) != 0) {
+ fprintf(stderr, "WordMonitor::TimerStart: installing SIGALRM ");
+ perror("");
+ }
+
+ if(old_action.sa_handler != SIG_DFL) {
+ fprintf(stderr, "WordMonitor::TimerStart: found an installed action while installing SIGALRM, restoring old action\n");
+ if(sigaction(SIGALRM, &old_action, NULL) != 0) {
+ fprintf(stderr, "WordMonitor::TimerStart: installing old SIGALRM ");
+ perror("");
+ }
+ return;
+ }
+#endif
+
+ fprintf(output, "----------------- WordMonitor starting -------------------\n");
+ if(output_style == WORD_MONITOR_RRD) {
+ fprintf(output, "Started:%ld\n", started);
+ fprintf(output, "Period:%d\n", period);
+ fprintf(output, "Time:");
+ int i;
+ for(i = 0; i < WORD_MONITOR_VALUES_SIZE; i++) {
+ if(!values_names[i]) break;
+ if(values_names[i][0])
+ fprintf(output, "%s:", values_names[i]);
+ }
+ fprintf(output, "\n");
+ }
+ fflush(output);
+ TimerClick(0);
+}
+
+void
+WordMonitor::TimerClick(int signal)
+{
+ if(signal) {
+ //
+ // Do not report if less than <period> since last report.
+ //
+ if(time(0) - elapsed >= period) {
+ fprintf(output, "%s\n", (const char*)Report());
+ elapsed = time(0);
+ fflush(output);
+ }
+ }
+#ifndef _MSC_VER /* _WIN32 */
+ alarm(period);
+#endif
+}
+
+void
+WordMonitor::TimerStop()
+{
+ if(period > 0) {
+#ifndef _MSC_VER /* _WIN32 */
+ alarm(0);
+ struct sigaction action;
+ memset((char*)&action, '\0', sizeof(struct sigaction));
+ action.sa_handler = SIG_DFL;
+ if(sigaction(SIGALRM, &action, NULL) != 0) {
+ fprintf(stderr, "WordMonitor::TimerStart: resetting SIGALRM to SIG_DFL ");
+ perror("");
+ }
+
+ // Make sure last report is at least one second older than the previous one.
+ //
+ if(time(0) - elapsed < 1)
+ sleep(2);
+ fprintf(output, "%s\n", (const char*)Report());
+ fprintf(output, "----------------- WordMonitor finished -------------------\n");
+#endif
+ }
+}
+
+//
+// C interface to WordMonitor instance
+//
+
+extern "C" {
+ void word_monitor_click()
+ {
+ WordMonitor* monitor = WordMonitor::Instance();
+#ifndef _MSC_VER /* _WIN32 */
+ if(monitor)
+ monitor->TimerClick(SIGALRM);
+#endif
+ }
+ void word_monitor_add(int index, unsigned int value)
+ {
+ WordMonitor* monitor = WordMonitor::Instance();
+ if(monitor)
+ monitor->Add(index, value);
+ }
+ void word_monitor_set(int index, unsigned int value)
+ {
+ WordMonitor* monitor = WordMonitor::Instance();
+ if(monitor)
+ monitor->Set(index, value);
+ }
+ unsigned int word_monitor_get(int index)
+ {
+ WordMonitor* monitor = WordMonitor::Instance();
+ if(monitor)
+ return monitor->Get(index);
+ else
+ return 0;
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.h b/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.h
new file mode 100644
index 00000000..c1ce3c7e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordMonitor.h
@@ -0,0 +1,141 @@
+//
+// WordMonitor.h
+//
+// NAME
+// monitoring classes activity.
+//
+// SYNOPSIS
+//
+// Only called thru WordContext::Initialize()
+//
+// DESCRIPTION
+//
+// The test directory contains a <i>benchmark-report</i> script used to generate
+// and archive graphs from the output of <i>WordMonitor</i>.
+//
+// CONFIGURATION
+//
+// wordlist_monitor_period <sec> (default 0)
+// If the value <b>sec</b> is a positive integer, set a timer to
+// print reports every <b>sec</b> seconds. The timer is set using
+// the ALRM signal and will fail if the calling application already
+// has a handler on that signal.
+//
+// wordlist_monitor_output <file>[,{rrd,readable] (default stderr)
+// Print reports on <b>file</b> instead of the default <b>stderr</b>.
+// If <b>type</b> is set to <b>rrd</b> the output is fit for the
+// <i>benchmark-report</b> script. Otherwise it a (hardly :-) readable
+// string.
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordMonitor.h,v 1.5 2004/05/28 13:15:28 lha Exp $
+//
+#ifndef _WordMonitor_h_
+#define _WordMonitor_h_
+
+#include <stdio.h>
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#define WORD_MONITOR_WRITE 1
+#define WORD_MONITOR_READ 2
+#define WORD_MONITOR_COMPRESS_01 3
+#define WORD_MONITOR_COMPRESS_02 4
+#define WORD_MONITOR_COMPRESS_03 5
+#define WORD_MONITOR_COMPRESS_04 6
+#define WORD_MONITOR_COMPRESS_05 7
+#define WORD_MONITOR_COMPRESS_06 8
+#define WORD_MONITOR_COMPRESS_07 9
+#define WORD_MONITOR_COMPRESS_08 10
+#define WORD_MONITOR_COMPRESS_09 11
+#define WORD_MONITOR_COMPRESS_10 12
+#define WORD_MONITOR_COMPRESS_MORE 13
+#define WORD_MONITOR_PAGE_IBTREE 14
+#define WORD_MONITOR_PAGE_LBTREE 15
+#define WORD_MONITOR_PAGE_UNKNOWN 16
+#define WORD_MONITOR_PUT 17
+#define WORD_MONITOR_GET 18
+#define WORD_MONITOR_GET_NEXT 19
+#define WORD_MONITOR_GET_SET_RANGE 20
+#define WORD_MONITOR_GET_OTHER 21
+#define WORD_MONITOR_LEVEL 22
+#define WORD_MONITOR_PGNO 23
+#define WORD_MONITOR_CMP 24
+
+#define WORD_MONITOR_VALUES_SIZE 50
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ void word_monitor_click();
+ void word_monitor_add(int index, unsigned int value);
+ void word_monitor_set(int index, unsigned int value);
+ unsigned int word_monitor_get(int index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+#include "Configuration.h"
+#include "htString.h"
+
+class WordMonitor {
+ public:
+ WordMonitor(const Configuration &config);
+ ~WordMonitor();
+
+ //
+ // Unique instance handlers
+ //
+ static void Initialize(const Configuration& config);
+ static WordMonitor* Instance() { return instance; }
+
+ void Add(int index, unsigned int value) { values[index] += value; }
+ void Set(int index, unsigned int value) { values[index] = value; }
+ unsigned int Get(int index) { return values[index]; }
+
+ const String Report() const;
+
+ void TimerStart();
+ void TimerClick(int signal);
+ void TimerStop();
+
+ private:
+ unsigned int values[WORD_MONITOR_VALUES_SIZE];
+ unsigned int old_values[WORD_MONITOR_VALUES_SIZE];
+ time_t started;
+ time_t elapsed;
+ int period;
+ FILE* output;
+ int output_style;
+ static char* values_names[WORD_MONITOR_VALUES_SIZE];
+
+ //
+ // Unique instance pointer
+ //
+ static WordMonitor* instance;
+};
+
+#endif /* __cplusplus */
+
+#endif /* _WordMonitor_h_ */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordRecord.cc b/debian/htdig/htdig-3.2.0b6/htword/WordRecord.cc
new file mode 100644
index 00000000..6f5ea443
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordRecord.cc
@@ -0,0 +1,144 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+// WordRecord.cc
+//
+// WordRecord: data portion of the inverted index database
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "WordRecord.h"
+
+//
+// WordRecord implementation
+//
+
+//
+// Convert the whole structure to an ascii string description
+//
+int
+WordRecord::Get(String& buffer) const
+{
+ buffer.trunc();
+
+ switch(type) {
+
+ case WORD_RECORD_DATA:
+ buffer << info.data;
+ break;
+
+ case WORD_RECORD_STATS:
+ buffer << info.stats.noccurrence << "\t";
+ buffer << info.stats.ndoc;
+ break;
+
+ case WORD_RECORD_NONE:
+ break;
+
+ default:
+ fprintf(stderr, "WordRecord::Get: unknown type %d\n", type);
+ return NOTOK;
+ break;
+ }
+
+ return OK;
+}
+
+String
+WordRecord::Get() const
+{
+ String tmp;
+ Get(tmp);
+ return tmp;
+}
+
+//
+// Set a record from an ascii representation
+//
+int
+WordRecord::Set(const String& buffer)
+{
+ StringList fields(buffer, "\t ");
+ return SetList(fields);
+}
+
+int
+WordRecord::SetList(StringList& fields)
+{
+ int i = 0;
+
+ switch(type)
+ {
+
+ case WORD_RECORD_DATA:
+ {
+ String* field = (String*)fields.Get_First();
+
+ if(field == 0) {
+ fprintf(stderr, "WordRecord::Set: failed to retrieve field %d\n", i);
+ return NOTOK;
+ }
+ info.data = (unsigned int)atoi(field->get());
+ fields.Remove(field);
+ i++;
+ }
+ break;
+
+ case WORD_RECORD_STATS:
+ {
+ String* field = (String*)fields.Get_First();
+
+ if(field == 0) {
+ fprintf(stderr, "WordRecord::Set: failed to retrieve field %d\n", i);
+ return NOTOK;
+ }
+ info.stats.noccurrence = (unsigned int)atoi(field->get());
+ fields.Remove(field);
+ i++;
+
+ field = (String*)fields.Get_First();
+
+ if(field == 0) {
+ fprintf(stderr, "WordRecord::Set: failed to retrieve field %d\n", i);
+ return NOTOK;
+ }
+ info.stats.ndoc = (unsigned int)atoi(field->get());
+ fields.Remove(field);
+ i++;
+ }
+ break;
+
+ case WORD_RECORD_NONE:
+ break;
+
+ default:
+ fprintf(stderr, "WordRecord::Set: unknown type %d\n", type);
+ break;
+ }
+
+ return OK;
+}
+
+int
+WordRecord::Write(FILE* f) const
+{
+ String tmp;
+ Get(tmp);
+ fprintf(f, "%s", (char*)tmp);
+ return 0;
+}
+
+void
+WordRecord::Print() const
+{
+ Write(stderr);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordRecord.h b/debian/htdig/htdig-3.2.0b6/htword/WordRecord.h
new file mode 100644
index 00000000..feeff089
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordRecord.h
@@ -0,0 +1,198 @@
+//
+// WordRecord.h
+//
+// NAME
+// inverted index record.
+//
+// SYNOPSIS
+//
+// #include <WordRecord.h>
+//
+// WordRecord record();
+// if(record.DefaultType() == WORD_RECORD_DATA) {
+// record.info.data = ...
+// }
+//
+// DESCRIPTION
+//
+// The record can only contain one integer, if the default record
+// type (see CONFIGURATION in <i>WordKeyInfo</i>) is set to <i>DATA.</i>
+// If the default type is set to <i>NONE</i> the record does not contain
+// any usable information.
+//
+// ASCII FORMAT
+//
+// If default type is <i>DATA</i> it is the decimal representation of
+// an integer. If default type is <i>NONE</i> it is the empty string.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordRecord.h,v 1.10 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordRecord_h_
+#define _WordRecord_h_
+
+#ifndef SWIG
+#include "HtPack.h"
+#include "StringList.h"
+#include "Configuration.h"
+#include "WordRecordInfo.h"
+#endif /* SWIG */
+
+/* And this is how we will compress this structure, for disk
+ storage. See HtPack.h (If there's a portable method by
+ which this format string does not have to be specified at
+ all, it should be preferred. For now, at least it is kept
+ here, together with the actual struct declaration.)
+
+ Since none of the values are non-zero, we want to use
+ unsigned chars and unsigned short ints when possible. */
+
+#ifndef SWIG
+#define WORD_RECORD_DATA_FORMAT "u"
+#define WORD_RECORD_STATS_FORMAT "u2"
+#endif /* SWIG */
+
+//
+// Statistical information on a word
+//
+class WordRecordStat {
+ public:
+ unsigned int noccurrence;
+ unsigned int ndoc;
+};
+
+//
+// The data members of WordRecord. Should really be a union but
+// is quite difficult to handle properly for scripting language
+// interfaces.
+//
+class WordRecordStorage {
+ public:
+ //
+ // Arbitrary data
+ //
+ unsigned int data;
+ //
+ // Statistical data used by WordStat
+ //
+ WordRecordStat stats;
+};
+
+//
+// Describe the data associated with a key (WordKey)
+//
+// If type is:
+// WORD_RECORD_DATA info.data is valid
+// WORD_RECORD_STATS info.stats is valid
+// WORD_RECORD_NONE nothing valid
+//
+class WordRecord
+{
+ public:
+ WordRecord() { Clear(); }
+
+ void Clear() { memset((char*)&info, '\0', sizeof(info)); type = DefaultType(); }
+
+#ifndef SWIG
+ //
+ // Convenience functions to access key structure information (see WordKeyInfo.h)
+ //
+ static inline const WordRecordInfo* Info() { return WordRecordInfo::Instance(); }
+#endif /* SWIG */
+ static inline int DefaultType() { return Info()->default_type; }
+
+#ifndef SWIG
+ int Pack(String& packed) const {
+ switch(type) {
+
+ case WORD_RECORD_DATA:
+ packed = htPack(WORD_RECORD_DATA_FORMAT, (char *)&info.data);
+ break;
+
+ case WORD_RECORD_STATS:
+ packed = htPack(WORD_RECORD_STATS_FORMAT, (char *)&info.stats);
+ break;
+
+ case WORD_RECORD_NONE:
+ packed.trunc();
+ break;
+
+ default:
+ fprintf(stderr, "WordRecord::Pack: unknown type %d\n", type);
+ return NOTOK;
+ break;
+ }
+ return OK;
+ }
+
+ int Unpack(const String& packed) {
+ String decompressed;
+
+ switch(type) {
+
+ case WORD_RECORD_DATA:
+ decompressed = htUnpack(WORD_RECORD_DATA_FORMAT, packed);
+ if(decompressed.length() != sizeof(info.data)) {
+ fprintf(stderr, "WordRecord::Unpack: decoding mismatch\n");
+ return NOTOK;
+ }
+ memcpy((char*)&info.data, (char*)decompressed, sizeof(info.data));
+ break;
+
+ case WORD_RECORD_STATS:
+ decompressed = htUnpack(WORD_RECORD_STATS_FORMAT, packed);
+ if(decompressed.length() != sizeof(info.stats)) {
+ fprintf(stderr, "WordRecord::Unpack: decoding mismatch\n");
+ return NOTOK;
+ }
+ memcpy((char*)&info.stats, (char*)decompressed, sizeof(info.stats));
+ break;
+
+ case WORD_RECORD_NONE:
+ break;
+
+ default:
+ fprintf(stderr, "WordRecord::Pack: unknown type %d\n", (int)type);
+ return NOTOK;
+ break;
+ }
+
+ return OK;
+ }
+#endif /* SWIG */
+
+#ifndef SWIG
+ //
+ // Set the whole structure from ASCII string description
+ //
+ int Set(const String& bufferin);
+ int SetList(StringList& fields);
+ //
+ // Convert the whole structure to an ASCII string description
+ //
+ int Get(String& bufferout) const;
+ String Get() const;
+#endif /* SWIG */
+
+#ifndef SWIG
+ //
+ // Print object in ASCII form on FILE (uses Get)
+ //
+ int Write(FILE* f) const;
+#endif /* SWIG */
+ void Print() const;
+
+ unsigned char type;
+ WordRecordStorage info;
+};
+
+#endif /* _WordRecord_h_ */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.cc b/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.cc
new file mode 100644
index 00000000..a9a25385
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.cc
@@ -0,0 +1,51 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+//
+// WordRecordInfo.cc
+//
+// WordRecord: data portion of the inverted index database
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Configuration.h"
+#include "WordRecordInfo.h"
+
+WordRecordInfo* WordRecordInfo::instance = 0;
+
+//
+// WordRecordInfo implementation
+//
+void
+WordRecordInfo::Initialize(const Configuration &config)
+{
+ if(instance != 0)
+ delete instance;
+ instance = new WordRecordInfo(config);
+}
+
+WordRecordInfo::WordRecordInfo(const Configuration& config)
+{
+ default_type = WORD_RECORD_INVALID;
+ const String &recorddesc = config["wordlist_wordrecord_description"];
+ if(!recorddesc.nocase_compare("data"))
+ {
+ default_type = WORD_RECORD_DATA;
+ }
+ else
+ if(!recorddesc.nocase_compare("none") || recorddesc.empty())
+ {
+ default_type = WORD_RECORD_NONE;
+ }
+ else
+ {
+ fprintf(stderr, "WordRecordInfo::WordRecordInfo: invalid wordlist_wordrecord_description: %s\n", (const char*)recorddesc);
+ }
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.h b/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.h
new file mode 100644
index 00000000..7f4f59ff
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordRecordInfo.h
@@ -0,0 +1,83 @@
+//
+// WordRecord.h
+//
+// NAME
+// information on the record structure of the inverted index.
+//
+// SYNOPSIS
+//
+// Only called thru WordContext::Initialize()
+//
+// DESCRIPTION
+//
+// The structure of a record is very limited. It can contain
+// at most two integer (int) values.
+//
+// CONFIGURATION
+//
+// wordlist_wordrecord_description {NONE|DATA} (no default)
+// NONE: the record is empty
+// <br>
+// DATA: the record contains two integers (int)
+//
+//
+// END
+//
+// WordRecord: Record for storing word information in the word database
+// Each word occurrence is stored as a separate key/record pair.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordRecordInfo.h,v 1.4 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordRecordInfo_h_
+#define _WordRecordInfo_h_
+
+//
+// Possible values of the type data field
+//
+#define WORD_RECORD_INVALID 0
+#define WORD_RECORD_DATA 1
+#define WORD_RECORD_STATS 2
+#define WORD_RECORD_NONE 3
+
+#ifndef SWIG
+//
+// Meta information about WordRecord
+//
+// wordlist_wordrecord_description: DATA
+// use WordRecordStorage::data for each word occurent
+// wordlist_wordrecord_description: NONE
+// or
+// wordlist_wordrecord_description not specified
+// the data associated with each word occurrence is empty
+//
+class WordRecordInfo
+{
+ public:
+ WordRecordInfo(const Configuration& config);
+ //
+ // Unique instance handlers
+ //
+ static void Initialize(const Configuration& config);
+ static WordRecordInfo* Instance() {
+ if(instance) return instance;
+ fprintf(stderr, "WordRecordInfo::Instance: no instance\n");
+ return 0;
+ }
+
+ int default_type;
+
+ //
+ // Unique instance pointer
+ //
+ static WordRecordInfo* instance;
+};
+#endif /* SWIG */
+
+#endif /* _WordRecordInfo_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordReference.cc b/debian/htdig/htdig-3.2.0b6/htword/WordReference.cc
new file mode 100644
index 00000000..320ff418
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordReference.cc
@@ -0,0 +1,88 @@
+//
+// WordReference.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordReference.cc,v 1.8 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordReference.h"
+
+int WordReference::Merge(const WordReference& other)
+{
+ int ret = key.Merge(other.Key());
+ record = other.record;
+
+ return ret;
+}
+
+//
+// Set the structure from an ascii representation
+//
+int
+WordReference::Set(const String& buffer)
+{
+ StringList fields(buffer, "\t ");
+ return SetList(fields);
+}
+
+//
+// Set the structure from list of fields
+//
+int
+WordReference::SetList(StringList& fields)
+{
+ Clear();
+ if(key.SetList(fields) != OK ||
+ record.SetList(fields) != OK)
+ return NOTOK;
+ else
+ return OK;
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int
+WordReference::Get(String& buffer) const
+{
+ String tmp;
+ buffer.trunc();
+
+ if(key.Get(tmp) != OK) return NOTOK;
+ buffer.append(tmp);
+
+ if(record.Get(tmp) != OK) return NOTOK;
+ buffer.append(tmp);
+
+ return OK;
+}
+
+String
+WordReference::Get() const
+{
+ String tmp;
+ key.Get(tmp);
+ return tmp;
+}
+
+int WordReference::Write(FILE* f) const
+{
+ String tmp;
+ key.Get(tmp);
+ fprintf(f, "%s", (char*)tmp);
+ return 0;
+}
+
+void WordReference::Print() const
+{
+ Write(stderr);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordReference.h b/debian/htdig/htdig-3.2.0b6/htword/WordReference.h
new file mode 100644
index 00000000..b6e1215d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordReference.h
@@ -0,0 +1,263 @@
+//
+// WordReference.h
+//
+// NAME
+// inverted index occurrence.
+//
+// SYNOPSIS
+//
+// #include <WordReference.h>
+//
+// WordReference wordRef("word");
+// WordReference wordRef();
+// WordReference wordRef(WordKey("key <DEF> 1 2"), WordRecord());
+//
+// WordKey& key = wordRef.Key();
+// WordKey& record = wordRef.Record();
+//
+// wordRef.Clear();
+//
+// DESCRIPTION
+//
+// A <i>WordReference</i> object is an agregate of a <i>WordKey</i> object
+// and a <i>WordRecord</i> object.
+//
+// ASCII FORMAT
+//
+// The ASCII description is a string with fields separated by tabs or
+// white space. It is made of the ASCII description of a
+// <i>WordKey</i> object immediately followed by the ASCII
+// description of a <i>WordRecord</i> object. See the corresponding
+// manual pages for more information.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordReference.h,v 1.7 2004/05/28 13:15:28 lha Exp $
+//
+#ifndef _WordReference_h_
+#define _WordReference_h_
+
+#ifndef SWIG
+#include "htString.h"
+#include "WordRecord.h"
+#include "WordKey.h"
+#endif /* SWIG */
+
+//
+// Describe the WordKey/WordRecord pair
+//
+class WordReference : public Object
+{
+ public:
+ //
+ // Construction/Destruction
+ //-
+ // Constructor. Build an object with empty key and empty record.
+ //
+ WordReference() {}
+#ifndef SWIG
+ //-
+ // Constructor. Build an object from disk representation of <b>key</b>
+ // and <b>record</b>.
+ //
+ WordReference(const String& key0, const String& record0) {
+ Unpack(key0, record0);
+ }
+ //-
+ // Constructor. Build an object with key word set to <b>word</b>
+ // and otherwise empty and empty record.
+ //
+ WordReference(const String& word) {
+ Clear();
+ key.SetWord(word);
+ }
+#endif /* SWIG */
+ ~WordReference() {}
+
+ //-
+ // Reset to empty key and record
+ //
+ void Clear() { key.Clear(); record.Clear(); }
+
+ //
+ // Accessors
+ //-
+ // Return the key object.
+ //
+ WordKey& Key() { return key; }
+#ifndef SWIG
+ //-
+ // Return the key object as const.
+ //
+ const WordKey& Key() const { return key; }
+#endif /* SWIG */
+ //-
+ // Return the record object.
+ //
+ WordRecord& Record() { return record; }
+#ifndef SWIG
+ //-
+ // Return the record object as const.
+ //
+ const WordRecord& Record() const { return record; }
+#endif /* SWIG */
+
+ //
+ // Conversion
+ //
+#ifdef SWIG
+%name(SetKey)
+#endif /* SWIG */
+ //-
+ // Copy <b>arg</b> in the key part of the object.
+ //
+ void Key(const WordKey& arg) { key = arg; }
+#ifndef SWIG
+ //-
+ // Set key structure from disk storage format as found in
+ // <b>packed</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int KeyUnpack(const String& packed) { return key.Unpack(packed); }
+ //
+ //-
+ // Convert key object into disk storage format as found in
+ // return the resulting string.
+ //
+ String KeyPack() const { String tmp; key.Pack(tmp); return tmp; }
+ //-
+ // Convert key object into disk storage format as found in
+ // and place the result in <b>packed</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int KeyPack(String& packed) const { return key.Pack(packed); }
+#endif /* SWIG */
+
+#ifdef SWIG
+%name(SetRecord)
+#endif /* SWIG */
+ //-
+ // Copy <b>arg</b> in the record part of the object.
+ //
+ void Record(const WordRecord& arg) { record = arg; }
+#ifndef SWIG
+ //-
+ // Set record structure from disk storage format as found in
+ // <b>packed</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int RecordUnpack(const String& packed) { return record.Unpack(packed); }
+ //-
+ // Convert record object into disk storage format as found in
+ // return the resulting string.
+ //
+ String RecordPack() const { String tmp; record.Pack(tmp); return tmp; }
+ //-
+ // Convert record object into disk storage format as found in
+ // and place the result in <b>packed</b> string.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int RecordPack(String& packed) const { return record.Pack(packed); }
+
+ //-
+ // Short hand for KeyPack(<b>ckey</b>) RecordPack(<b>crecord</b>).
+ //
+ inline int Pack(String& ckey, String& crecord) const {
+ if(key.Pack(ckey) == NOTOK) return NOTOK;
+ if(record.Pack(crecord) == NOTOK) return NOTOK;
+ return OK;
+ }
+ //-
+ // Short hand for KeyUnpack(<b>ckey</b>) RecordUnpack(<b>crecord</b>).
+ //
+ int Unpack(const String& ckey, const String& crecord) {
+ if(key.Unpack(ckey) == NOTOK) return NOTOK;
+ if(record.Unpack(crecord) == NOTOK) return NOTOK;
+ return OK;
+ }
+#endif /* SWIG */
+
+ //
+ // Transformations
+ //
+ //-
+ // Merge key with other.Key() using the <i>WordKey::Merge</i> method:
+ // key.Merge(other.Key()).
+ // See the corresponding manual page for details. Copy other.record
+ // into the record part of the object.
+ //
+ int Merge(const WordReference& other);
+#ifndef SWIG
+ //-
+ // Copy <b>master</b> before merging with <b>master.</b>Merge(<b>slave</b>)
+ // and return the copy. Prevents alteration of <b>master</b>.
+ //
+ static WordReference Merge(const WordReference& master, const WordReference& slave) {
+ WordReference tmp(master);
+ tmp.Merge(slave);
+ return tmp;
+ }
+#endif /* SWIG */
+
+#ifndef SWIG
+ int compare(Object *to) { String word(((WordReference *) to)->key.GetWord()); return key.GetWord().nocase_compare(word); }
+#endif /* SWIG */
+
+#ifndef SWIG
+ //
+ // Set the whole structure from ASCII string description
+ //
+ //-
+ // Set the whole structure from ASCII string in <b>bufferin</b>.
+ // See <i>ASCII FORMAT</i> section.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Set(const String& bufferin);
+ int SetList(StringList& fields);
+ //-
+ // Convert the whole structure to an ASCII string description
+ // in <b>bufferout.</b>
+ // See <i>ASCII FORMAT</i> section.
+ // Return OK if successfull, NOTOK otherwise.
+ //
+ int Get(String& bufferout) const;
+ //-
+ // Convert the whole structure to an ASCII string description
+ // and return it.
+ // See <i>ASCII FORMAT</i> section.
+ //
+ String Get() const;
+#endif /* SWIG */
+
+ //
+ // Debuging
+ //
+#ifndef SWIG
+ //-
+ // Print object in ASCII form on <b>f</b> (uses <i>Get</i> method).
+ // See <i>ASCII FORMAT</i> section.
+ //
+ int Write(FILE* f) const;
+#endif /* SWIG */
+ //-
+ // Print object in ASCII form on <b>stdout</b> (uses <i>Get</i> method).
+ // See <i>ASCII FORMAT</i> section.
+ //
+ void Print() const;
+
+ protected:
+
+#ifndef SWIG
+ WordKey key;
+ WordRecord record;
+#endif /* SWIG */
+};
+
+#endif /* _WordReference_h */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordStat.cc b/debian/htdig/htdig-3.2.0b6/htword/WordStat.cc
new file mode 100644
index 00000000..cd9cb358
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordStat.cc
@@ -0,0 +1,19 @@
+//
+// WordStat.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordStat.cc,v 1.5 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordStat.h"
+
+WordReference* WordStat::word_stat_last = 0;
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordStat.h b/debian/htdig/htdig-3.2.0b6/htword/WordStat.h
new file mode 100644
index 00000000..b2889687
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordStat.h
@@ -0,0 +1,60 @@
+//
+// WordStat.h
+//
+// WordStat: Kind of record that holds statistics about each distinct word
+// in the database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordStat.h,v 1.5 2004/05/28 13:15:28 lha Exp $
+//
+#ifndef _WordStat_h_
+#define _WordStat_h_
+
+#include "WordReference.h"
+
+class WordStat : public WordReference
+{
+ public:
+ //
+ // Construction/Destruction
+ //
+ WordStat() { record.type = WORD_RECORD_STATS; }
+ WordStat(const String& key_arg, const String& record_arg) : WordReference(key_arg, record_arg) {
+ record.type = WORD_RECORD_STATS;
+ }
+ WordStat(const String& word) {
+ Clear();
+ key.SetWord(String("\001") + word);
+ record.type = WORD_RECORD_STATS;
+ }
+
+ ~WordStat() {}
+
+ //
+ // Accessors
+ //
+ unsigned int Noccurrence() const { return record.info.stats.noccurrence; }
+ unsigned int &Noccurrence() { return record.info.stats.noccurrence; }
+
+ //
+ // Return upper boundary key of reference count records
+ //
+ static inline const WordReference& Last() {
+ if(!word_stat_last)
+ word_stat_last = new WordReference("\002");
+ return *word_stat_last;
+ }
+
+ protected:
+
+ static WordReference* word_stat_last;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordType.cc b/debian/htdig/htdig-3.2.0b6/htword/WordType.cc
new file mode 100644
index 00000000..355f1380
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordType.cc
@@ -0,0 +1,219 @@
+//
+// WordType.cc
+//
+// WordType: Wrap some attributes to make is...() type
+// functions and other common functions without having to manage
+// the attributes or the exact attribute combination semantics.
+// Configuration parameter used:
+// valid_punctuation,extra_word_characters,minimum_word_length,
+// maximum_word_length,allow_numbers,bad_word_list
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordType.cc,v 1.9 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <ctype.h>
+#include <stdio.h>
+
+#include "WordType.h"
+
+WordType* WordType::instance = 0;
+
+void
+WordType::Initialize(const Configuration &config_arg)
+{
+ if(instance != 0)
+ delete instance;
+ instance = new WordType(config_arg);
+}
+
+WordType::WordType(const Configuration &config)
+{
+ const String valid_punct = config["valid_punctuation"];
+ const String extra_word_chars = config["extra_word_characters"];
+
+ minimum_length = config.Value("minimum_word_length", 3);
+ maximum_length = config.Value("maximum_word_length", 12);
+ allow_numbers = config.Boolean("allow_numbers", 0);
+
+ extra_word_characters = extra_word_chars;
+ valid_punctuation = valid_punct;
+ other_chars_in_word = extra_word_chars;
+ other_chars_in_word.append(valid_punct);
+
+ chrtypes[0] = 0;
+ for (int i = 1; i < 256; i++)
+ {
+ chrtypes[i] = 0;
+ if (isalpha(i))
+ chrtypes[i] |= WORD_TYPE_ALPHA;
+ if (isdigit(i))
+ chrtypes[i] |= WORD_TYPE_DIGIT;
+ if (iscntrl(i))
+ chrtypes[i] |= WORD_TYPE_CONTROL;
+ if (strchr(extra_word_chars, i))
+ chrtypes[i] |= WORD_TYPE_EXTRA;
+ if (strchr(valid_punct, i))
+ chrtypes[i] |= WORD_TYPE_VALIDPUNCT;
+ }
+
+ {
+ const String filename = config["bad_word_list"];
+ FILE *fl = fopen(filename, "r");
+ char buffer[1000];
+ char *word;
+ String new_word;
+
+ // Read in the badwords file (it's just a text file)
+ while (fl && fgets(buffer, sizeof(buffer), fl))
+ {
+ word = strtok(buffer, "\r\n \t");
+ if (word && *word)
+ {
+ int flags;
+ new_word = word;
+ if((flags = Normalize(new_word)) & WORD_NORMALIZE_NOTOK) {
+ fprintf(stderr, "WordType::WordType: reading bad words from %s found %s, ignored because %s\n", (const char*)filename, word, (char*)NormalizeStatus(flags & WORD_NORMALIZE_NOTOK));
+ } else {
+ badwords.Add(new_word, 0);
+ }
+ }
+ }
+
+ if (fl)
+ fclose(fl);
+ }
+}
+
+WordType::~WordType()
+{
+}
+
+//
+// Normalize a word according to configuration specifications and
+// builting transformations.
+// *EVERY* word inserted in the inverted index goes thru this. If
+// a word is rejected by Normalize there is 0% chance to find it
+// in the word database.
+//
+int
+WordType::Normalize(String& word) const
+{
+ int status = WORD_NORMALIZE_GOOD;
+
+ //
+ // Reject empty strings, always
+ //
+ if(word.empty())
+ return status | WORD_NORMALIZE_NULL;
+
+ //
+ // Always convert to lowercase
+ //
+ if(word.lowercase())
+ status |= WORD_NORMALIZE_CAPITAL;
+
+ //
+ // Remove punctuation characters according to configuration
+ //
+ if(StripPunctuation(word))
+ status |= WORD_NORMALIZE_PUNCTUATION;
+
+ //
+ // Truncate words too long according to configuration
+ //
+ if(word.length() > maximum_length) {
+ word.chop(word.length() - maximum_length);
+ status |= WORD_NORMALIZE_TOOLONG;
+ }
+
+ //
+ // Reject words too short according to configuration
+ //
+ if(word.length() < minimum_length)
+ return status | WORD_NORMALIZE_TOOSHORT;
+
+ //
+ // Reject if contains control characters
+ //
+ int alpha = 0;
+ for(const unsigned char *p = (const unsigned char*)(const char*)(char *)word; *p; p++) {
+ if(IsStrictChar(*p) && (allow_numbers || !IsDigit(*p))) {
+ alpha = 1;
+ } else if(IsControl(*p)) {
+ return status | WORD_NORMALIZE_CONTROL;
+ }
+ }
+
+ //
+ // Reject if contains no alpha characters (according to configuration)
+ //
+ if(!alpha) return status | WORD_NORMALIZE_NOALPHA;
+
+ //
+ // Reject if listed in config[bad_word_list]
+ //
+ if(badwords.Exists(word))
+ return status | WORD_NORMALIZE_BAD;
+
+ //
+ // Accept and report the transformations that occured
+ //
+ return status;
+}
+
+//
+// Convert the integer status into a readable string
+//
+String
+WordType::NormalizeStatus(int flags)
+{
+ String tmp;
+
+ if(flags & WORD_NORMALIZE_TOOLONG) tmp << "TOOLONG ";
+ if(flags & WORD_NORMALIZE_TOOSHORT) tmp << "TOOSHORT ";
+ if(flags & WORD_NORMALIZE_CAPITAL) tmp << "CAPITAL ";
+ if(flags & WORD_NORMALIZE_NUMBER) tmp << "NUMBER ";
+ if(flags & WORD_NORMALIZE_CONTROL) tmp << "CONTROL ";
+ if(flags & WORD_NORMALIZE_BAD) tmp << "BAD ";
+ if(flags & WORD_NORMALIZE_NULL) tmp << "NULL ";
+ if(flags & WORD_NORMALIZE_PUNCTUATION) tmp << "PUNCTUATION ";
+ if(flags & WORD_NORMALIZE_NOALPHA) tmp << "NOALPHA ";
+
+ if(tmp.empty()) tmp << "GOOD";
+
+ return tmp;
+}
+
+//
+// Non-destructive tokenizer using external int as pointer into String
+// does word separation by our rules (so it can be subclassed too)
+//
+String
+WordType::WordToken(const String tokens, int &current) const
+{
+ unsigned char text = tokens[current];
+ String ret;
+
+ while (text && !IsStrictChar(text))
+ text = tokens[++current];
+
+ if (text)
+ {
+ while (text && IsChar(text))
+ {
+ ret << text;
+ text = tokens[++current];
+ }
+ }
+ return ret;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordType.h b/debian/htdig/htdig-3.2.0b6/htword/WordType.h
new file mode 100644
index 00000000..8406104e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordType.h
@@ -0,0 +1,157 @@
+//
+// WordType.h
+//
+// WordType: Wrap some attributes to make is...() type
+// functions and other common functions without having to manage
+// the attributes or the exact attribute combination semantics.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordType.h,v 1.5 2004/05/28 13:15:28 lha Exp $
+//
+
+#ifndef _WordType_h
+#define _WordType_h
+
+#include "htString.h"
+#include "Configuration.h"
+//
+// Return values of Normalize, to get them in string form use NormalizeStatus
+//
+#define WORD_NORMALIZE_GOOD 0x0000
+#define WORD_NORMALIZE_TOOLONG 0x0001
+#define WORD_NORMALIZE_TOOSHORT 0x0002
+#define WORD_NORMALIZE_CAPITAL 0x0004
+#define WORD_NORMALIZE_NUMBER 0x0008
+#define WORD_NORMALIZE_CONTROL 0x0010
+#define WORD_NORMALIZE_BAD 0x0020
+#define WORD_NORMALIZE_NULL 0x0040
+#define WORD_NORMALIZE_PUNCTUATION 0x0080
+#define WORD_NORMALIZE_NOALPHA 0x0100
+
+//
+// Under these conditions the word is said to be invalid.
+// Some conditions (NUMBER,TOOSHORT and BAD) depends on the configuration
+// parameters.
+//
+#define WORD_NORMALIZE_NOTOK (WORD_NORMALIZE_TOOSHORT| \
+ WORD_NORMALIZE_NUMBER| \
+ WORD_NORMALIZE_CONTROL| \
+ WORD_NORMALIZE_BAD| \
+ WORD_NORMALIZE_NULL| \
+ WORD_NORMALIZE_NOALPHA)
+
+class WordType
+{
+public:
+ //
+ // Constructors
+ //
+ WordType(const Configuration& config);
+
+ //
+ // Destructor
+ //
+ virtual ~WordType();
+
+ //
+ // Unique instance handlers
+ //
+ static void Initialize(const Configuration& config);
+ static WordType* Instance() {
+ if(instance) return instance;
+ fprintf(stderr, "WordType::Instance: no instance\n");
+ return 0;
+ }
+
+ //
+ // Predicates
+ //
+ virtual int IsChar(int c) const;
+ virtual int IsStrictChar(int c) const;
+ virtual int IsDigit(int c) const;
+ virtual int IsControl(int c) const;
+
+ //
+ // Transformations
+ //
+ virtual int StripPunctuation(String &s) const;
+ virtual int Normalize(String &s) const;
+
+ //
+ // Splitting
+ //
+ virtual String WordToken(const String s, int &pointer) const;
+
+ //
+ // Error handling
+ //
+ static String NormalizeStatus(int flags);
+
+private:
+
+ String valid_punctuation; // The same as the attribute.
+ String extra_word_characters; // Likewise.
+ String other_chars_in_word; // Attribute "valid_punctuation" plus
+ // "extra_word_characters".
+ char chrtypes[256]; // quick lookup table for types
+ int minimum_length; // Minimum word length
+ int maximum_length; // Maximum word length
+ int allow_numbers; // True if a word may contain numbers
+ Dictionary badwords; // List of excluded words
+
+ //
+ // Unique instance pointer
+ //
+ static WordType* instance;
+};
+
+// Bits to set in chrtypes[]:
+#define WORD_TYPE_ALPHA 0x01
+#define WORD_TYPE_DIGIT 0x02
+#define WORD_TYPE_EXTRA 0x04
+#define WORD_TYPE_VALIDPUNCT 0x08
+#define WORD_TYPE_CONTROL 0x10
+
+// One for characters that when put together are a word
+// (including punctuation).
+inline int
+WordType::IsChar(int c) const
+{
+ return (chrtypes[(unsigned char)c] & (WORD_TYPE_ALPHA|WORD_TYPE_DIGIT|WORD_TYPE_EXTRA|WORD_TYPE_VALIDPUNCT)) != 0;
+}
+
+// Similar, but no punctuation characters.
+inline int
+WordType::IsStrictChar(int c) const
+{
+ return (chrtypes[(unsigned char)c] & (WORD_TYPE_ALPHA|WORD_TYPE_DIGIT|WORD_TYPE_EXTRA)) != 0;
+}
+
+// Reimplementation of isdigit() using the lookup table chrtypes[]
+inline int
+WordType::IsDigit(int c) const
+{
+ return (chrtypes[(unsigned char)c] & WORD_TYPE_DIGIT) != 0;
+}
+
+// Similar to IsDigit, but for iscntrl()
+inline int
+WordType::IsControl(int c) const
+{
+ return (chrtypes[(unsigned char)c] & WORD_TYPE_CONTROL) != 0;
+}
+
+// Let caller get rid of getting and holding a configuration parameter.
+inline int
+WordType::StripPunctuation(String &s) const
+{
+ return s.remove(valid_punctuation);
+}
+
+
+#endif /* __WordType_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htword/word.desc b/debian/htdig/htdig-3.2.0b6/htword/word.desc
new file mode 100644
index 00000000..7de66973
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/word.desc
@@ -0,0 +1,15 @@
+#
+# Structure of a key
+#
+
+nfields 4
+
+#NAME SIZE SORTPOSITION
+
+Location 16 3
+
+Flags 8 2
+
+DocID 32 1
+
+Word 0 0