summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/search.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/search.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/test/search.cc3543
1 files changed, 3543 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/search.cc b/debian/htdig/htdig-3.2.0b6/test/search.cc
new file mode 100644
index 00000000..4f5e9690
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/test/search.cc
@@ -0,0 +1,3543 @@
+//
+// search.cc
+//
+// search: Sample implementation of search algorithms using
+// a mifluz inverted index.
+//
+// Each class is documented in the class definition. Before
+// each method declaration a comment explains the semantic of
+// the method. In the method definition comments in the code
+// may contain additional information.
+//
+// Each virtual function is documented in the base class, not
+// in the derived classes except for semantic differences.
+//
+// The class tree is:
+//
+// WordKeySemantic
+//
+// WordExclude
+// WordExcludeMask
+// WordPermute
+//
+// WordSearch
+//
+// WordMatch
+//
+// WordTree
+// WordTreeOperand
+// WordTreeOptional
+// WordTreeOr
+// WordTreeAnd
+// WordTreeNear
+// WordTreeMandatory
+// WordTreeNot
+// WordTreeLiteral
+//
+// WordParser
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: search.cc,v 1.9 2004/05/28 13:15:29 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include <htconfig.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif /* HAVE_GETOPT_H */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+#include <stdlib.h>
+
+#include <htString.h>
+#include <WordList.h>
+#include <WordContext.h>
+#include <WordCursor.h>
+
+//
+// Verbosity level set with -v (++)
+//
+static int verbose = 0;
+
+// ************************* Document definition implementation ***********
+
+#define TAG 1
+#define SERVER 2
+#define URL 3
+#define LOCATION 4
+
+// *********************** WordKeySemantic implementation ********************
+//
+// NAME
+//
+// encapsulate WordKey semantic for document and location
+//
+// SYNOPSIS
+//
+// #include <WordKeySemantic.h>
+//
+// #define SERVER 1
+// #define URL 2
+// #define LOCATION 3
+//
+// static int document[] = {
+// SERVER,
+// URL
+// };
+//
+// WordKeySemantic semantic;
+// semantic.Initialize(document, sizeof(document)/sizeof(int), LOCATION);
+//
+// DESCRIPTION
+//
+// Encapsulate the semantic of a WordKey object fields. It defines
+// what a document and a location are. It implements the set of
+// operation that a search needs to perform given the fact that it
+// implements a search whose purpose is to retrieve a document and
+// wants to implement proximity search based on a word location.
+//
+//
+// END
+//
+// A document is a set of fields in a given order.
+// A location is a field.
+// The actual fields used to implement WordKeySemantic methods are
+// set with the Initialize method.
+//
+class WordKeySemantic {
+public:
+ WordKeySemantic();
+ ~WordKeySemantic();
+
+ //-
+ // Set the actual field numbers that define what a document is and
+ // what a location is. The <b>document_arg<b> is a list of WordKey field
+ // positions of length <b>document_length_arg</b> that must be adjacent.
+ // The <b>location_arg</b> is the WordKey field position of the word
+ // location within a document.
+ // Return OK on success, NOTOK on failure.
+ //
+ int Initialize(int* document_arg, int document_length_arg, int location_arg);
+
+ //
+ // These functions and only these know what a document is.
+ // This should really be a class containing function pointers and be
+ // given as argument to the search algorithm.
+ //
+ //-
+ // Copy the document in <b>from</b> into <b>to.</b>
+ //
+ void DocumentSet(const WordKey& from, WordKey& to);
+ //-
+ // Increment the document in <b>key</b> using the <i>SetToFollowing</i>
+ // method of WordKey. <b>uniq</b> is the WordKey position at which the
+ // increment starts.
+ //
+ void DocumentNext(WordKey& key, int uniq);
+ //-
+ // Compare the document fields defined in both <b>a</b> and <b>b</b>
+ // and return the difference a - b, as in strcmp. If all document
+ // fields in <b>a</b> or <b>b</b> are undefined return 1.
+ //
+ int DocumentCompare(const WordKey& a, const WordKey& b);
+ //-
+ // Set all document fields to 0.
+ //
+ int DocumentClear(WordKey& key);
+
+ //
+ // These functions and only these know what a location is.
+ // This should really be a class containing function pointers and be
+ // given as argument to the search algorithm.
+ //
+ //-
+ // Copy the document and location in <b>from</b> into <b>to.</b>
+ //
+ void LocationSet(const WordKey& from, WordKey& to);
+ //-
+ // Increment the document and location in <b>key</b>
+ // using the <i>SetToFollowing</i>
+ // method of WordKey.
+ //
+ void LocationNext(WordKey& key);
+ //-
+ // Compare <b>expected</b> location to <b>actual</b> location. Compares equal
+ // as long as expected location is at a maximum distance of <b>proximity</b>
+ // of actual. If <b>actual</b> only has undefined field, return > 0.
+ // <b>expected</b> must always be the lowest possible bound.
+ // <b>actual</b> is tolerated if it is greater than <b>actual</b> but not
+ // greater than <b>proximity</b> if <b>proximity</b> > 0 or abs(<b>proximity</b>) * 2 if
+ // <b>proximity</b> < 0.
+ // Return the difference expected - actual.
+ //
+ int LocationCompare(const WordKey& expected, const WordKey& actual, int proximity = 0);
+ //-
+ // <b>key</b> is the expected location of a searched key.
+ // LocationNearLowest modifies <b>key</b> to add tolerance accroding to
+ // <b>proximity</b>.
+ //
+ // The idea is that <b>key</b> will be the lowest possible match for
+ // for the <b>proximity</b> range. If <proxmity> is positive, <b>key</b>
+ // is already the lowest possible match since we accept [0 proximity].
+ // If <b>proximity</b> is negative, substract it since we accept
+ // [-proximity proximity].
+ //
+ // For better understanding see the functions in which it is used.
+ //
+ void LocationNearLowest(WordKey& key, int proximity);
+
+ //-
+ // Undefined the location field in <b>key.</b>.
+ //
+ void Location2Document(WordKey& key);
+
+protected:
+ int* document;
+ int document_length;
+ int location;
+};
+
+WordKeySemantic::WordKeySemantic()
+{
+ int nfields = WordKey::NFields();
+ document = new int[nfields];
+ document_length = 0;
+ location = -1;
+}
+
+WordKeySemantic::~WordKeySemantic()
+{
+ if(document) delete [] document;
+}
+
+int WordKeySemantic::Initialize(int* document_arg, int document_length_arg, int location_arg)
+{
+ memcpy((char*)document, (char*)document_arg, document_length_arg * sizeof(int));
+ document_length = document_length_arg;
+ location = location_arg;
+ return OK;
+}
+
+void WordKeySemantic::DocumentSet(const WordKey& from, WordKey& to)
+{
+ to.Clear();
+ for(int i = 0; i < document_length; i++)
+ to.Set(document[i], from.Get(document[i]));
+}
+
+int WordKeySemantic::DocumentCompare(const WordKey& a, const WordKey& b)
+{
+ int ret = 1;
+ for(int i = 0; i < document_length; i++) {
+ int idx = document[i];
+ if((a.IsDefined(idx) && b.IsDefined(idx)) &&
+ (ret = a.Get(idx) - b.Get(idx)) != 0) return ret;
+ }
+ return ret;
+}
+
+int WordKeySemantic::DocumentClear(WordKey& key)
+{
+ for(int i = 0; i < document_length; i++)
+ key.Set(document[i], 0);
+ return 0;
+}
+
+void WordKeySemantic::DocumentNext(WordKey& key, int uniq)
+{
+ if(uniq)
+ key.SetToFollowing(uniq);
+ else
+ key.SetToFollowing(document[document_length-1]);
+}
+
+
+void WordKeySemantic::LocationSet(const WordKey& from, WordKey& to)
+{
+ DocumentSet(from, to);
+ to.Set(location, from.Get(location));
+}
+
+int WordKeySemantic::LocationCompare(const WordKey& expected, const WordKey& actual, int proximity)
+{
+ int ret = 1;
+ if((ret = DocumentCompare(expected, actual)) != 0) return ret;
+ //
+ // Only compare location if defined.
+ //
+ if((expected.IsDefined(location) && actual.IsDefined(location)) &&
+ (ret = expected.Get(location) - actual.Get(location))) {
+ if(proximity < 0) {
+ //
+ // -N means ok if in range [-N +N]
+ //
+ proximity *= 2;
+ if(ret < 0 && ret >= proximity)
+ ret = 0;
+ } else {
+ //
+ // N means ok if in range [0 +N]
+ //
+ if(ret < 0 && ret >= -proximity)
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+void WordKeySemantic::LocationNext(WordKey& key)
+{
+ key.SetToFollowing(location);
+}
+
+void WordKeySemantic::LocationNearLowest(WordKey& key, int proximity)
+{
+ if(proximity < 0) {
+ if(key.Underflow(location, proximity))
+ key.Get(location) = 0;
+ else
+ key.Get(location) += proximity;
+ }
+}
+
+void WordKeySemantic::Location2Document(WordKey& key)
+{
+ key.Undefined(location);
+}
+
+// ************************* WordExclude implementation ********************
+//
+// NAME
+//
+// permute bits in bit field
+//
+// SYNOPSIS
+//
+// #include <WordExclude.h>
+//
+// #define BITS 5
+//
+// WordExclude permute;
+// permute.Initialize(BITS);
+// while(permute.Next() == WORD_EXCLUDE_OK)
+// ...
+//
+// DESCRIPTION
+//
+// Count from 1 to the specified maximum. A variable++ loop does the same.
+// The <b>WordExclude</b> class counts in a specific order.
+// It first step thru all the permutations containing only 1 bit set, in
+// increasing order. Then thru all the permutations containing 2 bits set,
+// in increasing order. As so forth until the maximum number is reached.
+// See the <b>Permute</b> method for more information.
+//
+//
+// END
+
+//
+// Helper that displays an unsigned int in binary/hexa/decimal
+//
+static inline void show_bits(unsigned int result)
+{
+ int i;
+ for(i = 0; i < 10; i++) {
+ fprintf(stderr, "%c", (result & (1 << i)) ? '1' : '0');
+ }
+ fprintf(stderr, " (0x%08x - %15d)\n", result, result);
+}
+
+//
+// WordExclude methods return values
+//
+#define WORD_EXCLUDE_OK 1
+#define WORD_EXCLUDE_END 2
+
+//
+// Maximum number of bits
+//
+#define WORD_EXCLUDE_MAX (sizeof(unsigned int) * 8)
+
+//
+// Convert a position <p> in a <l> bits mask into a bit offset (from 0)
+//
+#define WORD_EXCLUDE_POSITION2BIT(l,p) ((l) - (p) - 1)
+
+class WordExclude {
+public:
+ //-
+ // Reset the generator and prepare it for <b>length</b> bits generation.
+ // The <b>length</b> cannot be greater than <i>WORD_EXCLUDE_MAX.</i>
+ // Returns OK if no error occurs, NOTOK otherwise.
+ //
+ virtual int Initialize(unsigned int length);
+ //-
+ // Move to next exclude mask. Returns WORD_EXCLUDE_OK if successfull,
+ // WORD_EXCLUDE_END if at the end of the permutations. It starts by
+ // calling <i>Permute</i> with one bit set, then two and up to
+ // <i>Maxi()</i> included. The last permutation only generates one
+ // possibility since all the bits are set.
+ //
+ virtual int Next();
+ //-
+ // Exclude bit for <b>position</b> starts at most significant bit. That is
+ // position 0 exclude bit is most significant bit of the current mask.
+ // Returns true if position is excluded, false otherwise.
+ //
+ virtual inline unsigned int Excluded(int position) { return mask & (1 << WORD_EXCLUDE_POSITION2BIT(maxi, position)); }
+ //-
+ // Returns how many bits are not excluded with current mask.
+ //
+ virtual inline int NotExcludedCount() const { return maxi - bits; }
+ //-
+ // Returns how many bits are excluded with current mask.
+ //
+ virtual inline int ExcludedCount() const { return bits; }
+ //
+ // Save and restore in string
+ //
+ //-
+ // Write an ascii representation of the WordExclude object in <b>buffer.</b>
+ // Each bit is represented by the character 0 or 1. The most significant
+ // bit is the last character in the string. For instance
+ // 1000 is the string representation of a WordExclude object initialized
+ // with length = 4 after the first <i>Next</i> operation.
+ //
+ virtual void Get(String& buffer) const;
+ //-
+ // Initialize the object from the string representation in <b>buffer.</b>
+ // Returns OK on success, NOTOK on failure.
+ //
+ virtual int Set(const String& buffer);
+
+ //-
+ // Generate all the permutations
+ // containing <i>n</i> bits in a <b>bits</b> bit word in increasing order.
+ // The <b>mask</b> argument is originally filled by the caller
+ // with the <i>n</i> least significant bits set. A call to Permute
+ // generates the next permutation immediately greater (numerically)
+ // than the one contained in <b>mask</b>.
+ //
+ // Permute returns the next permutation or 0 if it reached the
+ // maximum.
+ //
+ // To understand the algorithm, imagine 1 is a ball and 0 a space.
+ //
+ // When playing the game you start with a rack of <b>bits</b> slots filled
+ // with <i>n</i> balls all on the left side. You end the game when all
+ // the balls are on the right side.
+ //
+ // Sarting from the left, search for the first ball that has an empty
+ // space to the right. While searching remove all the balls you find.
+ // Place a ball in the empty space you found, at the right of the last
+ // ball removed. Sarting from the left, fill all empty spaces with
+ // the removed balls. Repeat until all balls are to the right.
+ //
+ // Here is a sample generated by repeated calls to WordExclude::Permute:
+ // (left most bit is least significant)
+ // <pre>
+ // mask = 1111100000
+ // while(mask = WordExclude::Permute(mask, 7))
+ // show_bits(mask)
+ //
+ // 1111100000 (0x0000001f - 31)
+ // 1111010000 (0x0000002f - 47)
+ // 1110110000 (0x00000037 - 55)
+ // 1101110000 (0x0000003b - 59)
+ // 1011110000 (0x0000003d - 61)
+ // 0111110000 (0x0000003e - 62)
+ // 1111001000 (0x0000004f - 79)
+ // 1110101000 (0x00000057 - 87)
+ // 1101101000 (0x0000005b - 91)
+ // 1011101000 (0x0000005d - 93)
+ // 0111101000 (0x0000005e - 94)
+ // 1110011000 (0x00000067 - 103)
+ // 1101011000 (0x0000006b - 107)
+ // 1011011000 (0x0000006d - 109)
+ // 0111011000 (0x0000006e - 110)
+ // 1100111000 (0x00000073 - 115)
+ // 1010111000 (0x00000075 - 117)
+ // 0110111000 (0x00000076 - 118)
+ // 1001111000 (0x00000079 - 121)
+ // 0101111000 (0x0000007a - 122)
+ // 0011111000 (0x0000007c - 124)
+ // </pre>
+ // A recursive implementation would be:
+ // <pre>
+ // /* Recursive */
+ // void permute(unsigned int result, int bits_count, int bits_toset)
+ // {
+ // if(bits_toset <= 0 || bits_count <= 0) {
+ // if(bits_toset <= 0)
+ // do_something(result);
+ // } else {
+ // permute(result, bits_count - 1, bits_toset);
+ // permute(result | (1 << (bits_count - 1)), bits_count - 1, bits_toset - 1);
+ // }
+ // }
+ // </pre>
+ // Which is more elegant but not practical at all in our case.
+ //
+ inline unsigned int Permute(unsigned int mask, unsigned int bits);
+
+ //-
+ // Return the current bit field value.
+ //
+ virtual inline unsigned int& Mask() { return mask; }
+ virtual inline unsigned int Mask() const { return mask; }
+
+ virtual inline unsigned int& Maxi() { return maxi; }
+ virtual inline unsigned int Maxi() const { return maxi; }
+
+ virtual inline unsigned int& Bits() { return bits; }
+ virtual inline unsigned int Bits() const { return bits; }
+
+private:
+ unsigned int mask;
+ unsigned int maxi;
+ unsigned int bits;
+};
+
+int WordExclude::Initialize(unsigned int length)
+{
+ if(length > WORD_EXCLUDE_MAX) {
+ fprintf(stderr, "WordExclude::Initialize: length must be < %d\n", (int)WORD_EXCLUDE_MAX);
+ return NOTOK;
+ }
+
+ mask = 0;
+ bits = 0;
+ maxi = length;
+
+ return OK;
+}
+
+inline unsigned int WordExclude::Permute(unsigned int mask, unsigned int bits)
+{
+ unsigned int bits_cleared = 0;
+ unsigned int j;
+ for(j = 0; j < bits; j++) {
+ if(mask & (1 << j)) {
+ bits_cleared++;
+ mask &= ~(1 << j);
+ } else {
+ if(bits_cleared) {
+ bits_cleared--;
+ mask |= (1 << j);
+ break;
+ }
+ }
+ }
+
+ if(j >= bits)
+ return 0;
+
+ for(j = 0; j < bits_cleared; j++)
+ mask |= (1 << j);
+
+ return mask;
+}
+
+int WordExclude::Next()
+{
+ mask = Permute(mask, maxi);
+
+ int ret = WORD_EXCLUDE_OK;
+
+ if(mask == 0) {
+ bits++;
+ if(bits > maxi)
+ ret = WORD_EXCLUDE_END;
+ else {
+ unsigned int i;
+ for(i = 0; i < bits; i++)
+ mask |= (1 << i);
+ ret = WORD_EXCLUDE_OK;
+ }
+ }
+
+ if(verbose > 2) show_bits(mask);
+
+ return ret;
+}
+
+void WordExclude::Get(String& buffer) const
+{
+ buffer.trunc();
+ unsigned int i;
+ for(i = 0; i < maxi; i++) {
+ buffer << ((mask & (1 << i)) ? '1' : '0');
+ }
+}
+
+int WordExclude::Set(const String& buffer)
+{
+ if(Initialize(buffer.length()) == NOTOK)
+ return NOTOK;
+ unsigned int i;
+ for(i = 0; i < maxi; i++) {
+ if(buffer[i] == '1') {
+ mask |= (1 << i);
+ bits++;
+ }
+ }
+ return OK;
+}
+
+// ************************* WordExcludeMask implementation *******************
+//
+// NAME
+//
+// WordExclude specialization that ignore some bits
+//
+// SYNOPSIS
+//
+// #include <WordExcludeMask.h>
+//
+// #define BITS 9
+// #define IGNORE 0x0f0
+// #define IGNORE_MASK 0x050
+//
+// WordExcludeMask permute;
+// permute.Initialize(BITS, IGNORE, IGNORE_MASK);
+// while(permute.Next() == WORD_EXCLUDE_OK)
+// ...
+//
+// DESCRIPTION
+//
+// Only perform WordExclude operations on the bits that are not set in
+// <i>ignore.</i> The bits of <i>ignore_mask</i> that are set in
+// <i>ignore</i> are untouched. In the synopsis section, for instance,
+// bits 1,2,3,4 and 9 will be permuted and the bits 5,6,7,8 will be
+// left untouched.
+//
+//
+// END
+//
+#define WORD_EXCLUDE_IGNORED (-1)
+
+class WordExcludeMask : public WordExclude {
+public:
+ //-
+ // <b>ignore</b> gives the mask of bits to ignore. The actual WordExclude
+ // operations are made on a number of bits that is <b>length</b> - (the number
+ // of bits set in <b>ignore).</b>
+ // The <b>ignore_mask_arg</b> contains the actual values of the bits ignored by
+ // the <b>ignore</b> argument.
+ //
+ virtual inline int Initialize(unsigned int length, unsigned int ignore, unsigned int ignore_mask_arg) {
+ ignore_mask = ignore_mask_arg;
+ ignore_maxi = length;
+ unsigned int maxi = 0;
+ unsigned int i;
+ for(i = 0, ignore_bits = 0; i < length; i++) {
+ if(ignore & (1 << i)) {
+ bit2bit[i] = WORD_EXCLUDE_IGNORED;
+ if(ignore_mask & (1 << i)) ignore_bits++;
+ } else {
+ bit2bit[i] = maxi++;
+ }
+ }
+
+ return WordExclude::Initialize(maxi);
+ }
+
+ virtual inline unsigned int Excluded(int position) {
+ position = WORD_EXCLUDE_POSITION2BIT(ignore_maxi, position);
+ if(bit2bit[position] == WORD_EXCLUDE_IGNORED)
+ return ignore_mask & (1 << position);
+ else
+ return WordExclude::Mask() & (1 << bit2bit[position]);
+ }
+
+ virtual inline int NotExcludedCount() const {
+ return ignore_maxi - ignore_bits - WordExclude::Bits();
+ }
+
+ virtual inline int ExcludedCount() const {
+ return ignore_bits - WordExclude::Bits();
+ }
+
+ //-
+ // The semantic is the same as the Get method of Wordexclude
+ // except that ignored bits are assigned 3 and 2 instead of 1 and 0
+ // respectively.
+ //
+ virtual void Get(String& buffer) const;
+ //-
+ // The semantic is the same as the Get method of Wordexclude
+ // except that ignored bits are assigned 3 and 2 instead of 1 and 0
+ // respectively.
+ //
+ virtual int Set(const String& buffer);
+
+ virtual inline unsigned int Mask() const {
+ unsigned int ret = ignore_mask;
+ unsigned int i;
+ for(i = 0; i < ignore_maxi; i++) {
+ if(bit2bit[i] != WORD_EXCLUDE_IGNORED) {
+ if(WordExclude::Mask() & (1 << bit2bit[i]))
+ ret |= (1 << i);
+ }
+ }
+ return ret;
+ }
+
+ virtual inline unsigned int Maxi() const { return ignore_maxi; }
+
+ virtual inline unsigned int Bits() const { return ignore_bits + WordExclude::Bits(); }
+
+private:
+ unsigned int ignore_mask;
+ unsigned int ignore_maxi;
+ unsigned int ignore_bits;
+ int bit2bit[WORD_EXCLUDE_MAX];
+};
+
+void WordExcludeMask::Get(String& buffer) const
+{
+ buffer.trunc();
+ unsigned int i;
+ for(i = 0; i < ignore_maxi; i++) {
+ if(bit2bit[i] == WORD_EXCLUDE_IGNORED)
+ buffer << ((ignore_mask & (1 << i)) ? '3' : '2');
+ else
+ buffer << ((WordExclude::Mask() & (1 << bit2bit[i])) ? '1' : '0');
+ }
+}
+
+int WordExcludeMask::Set(const String& buffer)
+{
+ WordExclude::Initialize(0);
+
+ unsigned int& maxi = WordExclude::Maxi();
+ unsigned int& mask = WordExclude::Mask();
+ unsigned int& bits = WordExclude::Bits();
+ ignore_mask = 0;
+ ignore_bits = 0;
+ ignore_maxi = buffer.length();
+
+ unsigned int i;
+ for(i = 0; i < ignore_maxi; i++) {
+ if(buffer[i] == '1' || buffer[i] == '0') {
+ if(buffer[i] == '1') {
+ mask |= (1 << maxi);
+ bits++;
+ }
+ bit2bit[i] = maxi;
+ maxi++;
+ } else if(buffer[i] == '3' || buffer[i] == '2') {
+ if(buffer[i] == '3') {
+ ignore_mask |= (1 << i);
+ ignore_bits++;
+ }
+ bit2bit[i] = WORD_EXCLUDE_IGNORED;
+ }
+ }
+
+ return OK;
+}
+
+// ************************* WordPermute implementation ********************
+//
+// NAME
+//
+// WordExclude specialization with proximity toggle
+//
+// SYNOPSIS
+//
+// #include <WordPermute.h>
+//
+// #define BITS 5
+//
+// WordPermute permute;
+// permute.Initialize(BITS);
+// while(permute.Next() == WORD_EXCLUDE_OK)
+// if(permute.UseProximity()) ...
+//
+// DESCRIPTION
+//
+// Each WordExclude permutation is used twice by Next. Once with
+// the proximity flag set and once with the proximity flag cleared.
+// If the length of the bit field (length argument of Initialize) is
+// lower or equal to 1, then the proximity flag is always false.
+//
+//
+// END
+//
+// WordPermute methods return values
+//
+#define WORD_PERMUTE_OK WORD_EXCLUDE_OK
+#define WORD_PERMUTE_END WORD_EXCLUDE_END
+
+//
+// Use or don't use proximity flag
+//
+#define WORD_PERMUTE_PROXIMITY_NO 0
+#define WORD_PERMUTE_PROXIMITY_TOGGLE 1
+#define WORD_PERMUTE_PROXIMITY_ONLY 2
+
+//
+// Deals with word exclusion and proximity permutations for
+// the implementation of the Optional retrieval model.
+//
+class WordPermute : public WordExcludeMask {
+public:
+ //-
+ // The <b>nuse_proximity</b> may be set to the following:
+ //
+ // WORD_PERMUTE_PROXIMITY_NO so that the object behaves as
+ // WordExcludeMask and Proximity() always return false.
+ //
+ // WORD_PERMUTE_PROXIMITY_TOGGLE so that each permutation is issued twice:
+ // once with the proximity flag set (Proximity() method) and once with
+ // the proximity flag cleared.
+ //
+ // WORD_PERMUTE_PROXIMITY_ONLY so that the object behaves as
+ // WordExcludeMask and Proximity() always return true.
+ //
+ virtual inline int Initialize(unsigned int length, unsigned int ignore, unsigned int ignore_mask_arg, int nuse_proximity) {
+ use_proximity = nuse_proximity;
+ switch(use_proximity) {
+ case WORD_PERMUTE_PROXIMITY_NO:
+ proximity = 0;
+ break;
+ case WORD_PERMUTE_PROXIMITY_TOGGLE:
+ //
+ // Don't bother to try proximity search if only one word
+ // is involved.
+ //
+ proximity = length > 1;
+ break;
+ case WORD_PERMUTE_PROXIMITY_ONLY:
+ proximity = 1;
+ break;
+ default:
+ fprintf(stderr, "WordPermute::Initialize: unexpected use_proximity = %d\n", use_proximity);
+ return 0;
+ }
+ return WordExcludeMask::Initialize(length, ignore, ignore_mask_arg);
+ }
+
+ //-
+ // Return true if the proximity flag is set, false if it is
+ // cleared.
+ //
+ inline int Proximity() {
+ switch(use_proximity) {
+ case WORD_PERMUTE_PROXIMITY_NO:
+ return 0;
+ break;
+ case WORD_PERMUTE_PROXIMITY_TOGGLE:
+ return proximity;
+ break;
+ case WORD_PERMUTE_PROXIMITY_ONLY:
+ return 1;
+ break;
+ default:
+ fprintf(stderr, "WordPermute::Proximity: unexpected use_proximity = %d\n", use_proximity);
+ return 0;
+ break;
+ }
+ }
+
+ //-
+ // Return WORD_PERMUTE_PROXIMITY_NO, WORD_PERMUTE_PROXIMITY_TOGGLE or
+ // WORD_PERMUTE_PROXIMITY_ONLY.
+ //
+ inline int UseProximity() { return use_proximity; }
+
+ //-
+ // Find the next permutation. If <b>WORD_PERMUTE_PROXIMITY_TOGGLE<b> was
+ // specified in Initialize each permutation is issued twice (see
+ // Proximity() to differentiate them), except when the mask
+ // only contains one non exluded bit (NotExcludeCount() <= 1).
+ // In both case the last permutation with all bits excluded
+ // (i.e. when NotExcludedCount() <= 0) is never returned because
+ // it is useless.
+ //
+ virtual int Next() {
+ if(Maxi() <= 0)
+ return WORD_PERMUTE_END;
+
+ int ret = WORD_PERMUTE_OK;
+ int check_useless = 0;
+ if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) {
+ //
+ // Move to next permutation as follows:
+ // exclude mask 1 + use proximity
+ // exclude mask 1 + don't use proximity
+ // exclude mask 2 + use proximity
+ // exclude mask 2 + don't use proximity
+ // and so on.
+ // If only one word is involved never use proximity.
+ //
+ if(proximity) {
+ proximity = 0;
+ } else {
+ proximity = 1;
+ if((ret = WordExcludeMask::Next()) == WORD_PERMUTE_OK) {
+ //
+ // Do not toggle proximity for only one non excluded word
+ //
+ if(NotExcludedCount() <= 1)
+ proximity = 0;
+ check_useless = 1;
+ } else if(ret == WORD_PERMUTE_END)
+ proximity = 0;
+ }
+ } else {
+ ret = WordExcludeMask::Next();
+ check_useless = 1;
+ }
+
+ if(check_useless && ret == WORD_PERMUTE_OK) {
+ //
+ // If no bits are ignored or all ignore_mask bits are set to
+ // one, the last permutation has all exclude bits set, which
+ // is useless. Just skip it and expect to be at the end of
+ // all permutations.
+ //
+ if(NotExcludedCount() <= 0) {
+ ret = WordExcludeMask::Next();
+ if(ret != WORD_PERMUTE_END) {
+ fprintf(stderr, "WordPermute::Next: expected WORD_PERMUTE_END\n");
+ ret = NOTOK;
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ //-
+ // The semantic is the same as the Get method of Wordexclude
+ // but a letter T is appended to the string if the proximity
+ // flag is set, or F is appended to the string if the proximity
+ // is clear.
+ //
+ virtual inline void Get(String& buffer) const {
+ WordExcludeMask::Get(buffer);
+ if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE)
+ buffer << (proximity ? 'T' : 'F');
+ }
+
+ //-
+ // The semantic is the same as the Get method of Wordexclude
+ // but if the string end with a T the proximity flag is set
+ // and if the string end with a F the proximity flag is cleared.
+ //
+ virtual inline int Set(const String& buffer) {
+ if(buffer.length() < 1) {
+ fprintf(stderr, "WordPermute::Set: buffer length < 1\n");
+ return NOTOK;
+ }
+ int ret = OK;
+ if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) {
+ if((ret = WordExcludeMask::Set(buffer.sub(0, buffer.length() - 1))) == OK)
+ proximity = buffer.last() == 'T';
+ } else {
+ ret = WordExcludeMask::Set(buffer);
+ }
+
+ return ret;
+ }
+
+protected:
+ int use_proximity;
+ int proximity;
+};
+
+// ************************* WordTree implementation ********************
+//
+// NAME
+//
+// Base class for query resolution nodes
+//
+// SYNOPSIS
+//
+// #include <WordTree.h>
+//
+// class WordTreeMethod : public WordTree {
+// ...
+// };
+//
+// DESCRIPTION
+//
+// The WordTree class is derived from the WordCursor class and implement
+// the basic operations and data structures needed for query resolution.
+// It is the common base class of all the classes that actually implement
+// a query resolution. The derived classes must be implemented to follow
+// the WordCursor semantic for Walk* operations.
+//
+//
+// END
+//
+
+#define WORD_WALK_REDO 0x1000
+#define WORD_WALK_RESTART 0x2000
+#define WORD_WALK_NEXT 0x4000
+
+//
+// Return values of CursorsObeyProximity method
+//
+#define WORD_SEARCH_NOPROXIMITY 1
+
+//
+// operand values
+//
+#define WORD_TREE_OR 1
+#define WORD_TREE_AND 2
+#define WORD_TREE_NEAR 3
+#define WORD_TREE_OPTIONAL 4
+#define WORD_TREE_LITERAL 5
+#define WORD_TREE_MANDATORY 6
+#define WORD_TREE_NOT 7
+
+#define WORD_TREE_OP_SIZE 20
+
+//
+// Default proximity is to search for adjacent words in order
+//
+#ifndef WORD_SEARCH_DEFAULT_PROXIMITY
+#define WORD_SEARCH_DEFAULT_PROXIMITY 1
+#endif /* WORD_SEARCH_DEFAULT_PROXIMITY */
+
+static char* operator_name[WORD_TREE_OP_SIZE] = {
+ "",
+ "or",
+ "and",
+ "near",
+ "optional",
+ "literal",
+ "mandatory",
+ "not",
+ 0
+};
+
+class WordTree : public WordCursor {
+public:
+ WordTree() {
+ proximity = 0;
+ uniq = 0;
+ }
+
+ virtual int ContextSaveList(StringList& list) const {
+ return OK;
+ }
+
+ virtual int ContextRestoreList(StringList& list) {
+ return OK;
+ }
+
+ //-
+ // Initialize the object. <b>words</b> is used to initialize the
+ // WordCursor base class, <b>document, document_length</b> and
+ // <b>location</b> are used to initialize the WordKeySemantic data
+ // member. The <b>nuniq</b> is the WordKey field position used by
+ // the WordKeySemantic::DocumentNext function. The <b>nproximity</b>
+ // is the proximity factor used by the WordKeySemantic::LocationCompare
+ // method.
+ // Return OK on success, NOTOK on failure.
+ //
+ virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) {
+ int ret;
+ proximity = nproximity;
+ uniq = nuniq;
+ if((ret = key_semantic.Initialize(document, document_length, location)) != OK)
+ return ret;
+ WordKey key;
+ if(!scope.empty()) {
+ if(key.Set(scope) != OK) {
+ fprintf(stderr, "WordTree::Prepare: setting scope %s failed\n", (char*)scope);
+ return NOTOK;
+ }
+ }
+ key.SetWord(search);
+ return WordCursor::Initialize(words, key, 0, 0, HTDIG_WORDLIST_WALKER);
+ }
+
+ //-
+ // Return a copy of the last document found.
+ //
+ WordKey GetDocument() {
+ WordKey found;
+ key_semantic.DocumentSet(GetFound().Key(), found);
+ return found;
+ }
+
+ //-
+ // Store in the <i>info</i> data member textual information about
+ // the latest match found.
+ //
+ virtual void SetInfo() { info = GetFound().Key().GetWord(); }
+
+ //-
+ // Return a copy of the <i>info</i> data member. Should be
+ // called after SetInfo().
+ //
+ String GetInfo() { return info; }
+
+ //-
+ // Sort WordTree data members (if any) in ascending frequency order.
+ // Return OK on success, NOTOK on failure.
+ //
+ virtual int AscendingFrequency() { return OK; }
+
+ //-
+ // Delete WordTree data members (if any) that have a zero frequency.
+ // The number of data members deleted is returned in <b>stripped</b>.
+ // Return OK on success, NOTOK on failure.
+ //
+ virtual int StripNonExistent(unsigned int& stripped) {
+ stripped = 0;
+ return OK;
+ }
+
+ //
+ // Input
+ //
+ //-
+ // Proximity factor. See WordKeySemantic::LocationCompare.
+ //
+ int proximity;
+ //-
+ // Uniq WordKey field position. See WordKeySemantic::DocumentNext.
+ //
+ int uniq;
+ //-
+ // Semantic of the WordKey object.
+ //
+ WordKeySemantic key_semantic;
+ //-
+ // Textual representation of the search scope.
+ //
+ String scope;
+ //-
+ // Original search criterion that may be different from the
+ // WordCursor::searchKey data member.
+ //
+ String search;
+
+ //
+ // Internal state
+ //
+ //-
+ // Textual information about the latest match.
+ //
+ String info;
+};
+
+// ************************* WordTreeLiteral implementation ****************
+
+class WordTreeLiteral : public WordTree {
+public:
+ //-
+ // Constructor. The search criterion is <b>string</b> and the
+ // scope is <b>nscope.</b>.
+ //
+ WordTreeLiteral(const char* string, const char* nscope = "") {
+ search.set((char*)string);
+ scope.set((char*)nscope);
+ }
+
+ //-
+ // Returns WORD_TREE_LITERAL.
+ //
+ int IsA() const { return WORD_TREE_LITERAL; }
+
+ virtual int WalkRewind();
+ //-
+ // Only return a match for each distinct document.
+ //
+ virtual int WalkNext();
+ virtual int Seek(const WordKey& patch);
+
+ //-
+ // If scope is set the <b>bufferout</b> is filled with
+ // <pre>
+ // ( word "scope" )
+ // </pre>
+ // otherwise the <b>bufferout</b> only contains the word.
+ //
+ virtual int Get(String& bufferout) const {
+ if(scope.empty())
+ bufferout << search;
+ else
+ bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" " << search << " )";
+ return OK;
+ }
+
+protected:
+ WordKey current_document;
+};
+
+int WordTreeLiteral::WalkRewind()
+{
+ current_document.Clear();
+ return WordCursor::WalkRewind();
+}
+
+int WordTreeLiteral::WalkNext()
+{
+ int ret;
+ do {
+ ret = WordCursor::WalkNext();
+ if(verbose > 3) fprintf(stderr, "WordTreeLiteral::WalkNext: reached %s\n", (char*)GetDocument().Get());
+ } while(ret == OK &&
+ key_semantic.DocumentCompare(current_document, GetDocument()) == 0);
+
+ if(ret == OK)
+ current_document = GetDocument();
+ else
+ current_document.Clear();
+
+ return ret;
+}
+
+int WordTreeLiteral::Seek(const WordKey& position)
+{
+ current_document.Clear();
+ return WordCursor::Seek(position);
+}
+
+// ************************* WordTreeOperand implementation ****************
+//
+// NAME
+//
+// Base class for boolean query resolution nodes
+//
+// SYNOPSIS
+//
+// #include <WordTree.h>
+//
+// class WordTreeMethod : public WordTreeOperand {
+// ...
+// };
+//
+// DESCRIPTION
+//
+// The WordTreeOperand class is derived from WordTree and implemet
+// the basic operations and data structures needed for query resultion
+// of boolean operators. It contains a list of WordTree objects (the
+// operands or cursors) and redefine the basic WordCursor methods
+// to operate on all of them according to the logic defined by the
+// derived class.
+//
+//
+// END
+//
+
+//
+// Helper for debugging that returns the string representation
+// of the return codes.
+//
+static char* ret2str(int ret)
+{
+ if(ret == WORD_WALK_REDO)
+ return "REDO";
+
+ if(ret == WORD_WALK_RESTART)
+ return "RESTART";
+
+ if(ret == WORD_WALK_NEXT)
+ return "NEXT";
+
+ if(ret == OK)
+ return "OK";
+
+ if(ret == NOTOK)
+ return "NOTOK";
+
+ if(ret == WORD_WALK_ATEND)
+ return "ATEND";
+
+ return "???";
+}
+
+class WordTreeOperand : public WordTree
+{
+public:
+ //-
+ // Constructor. The scope is <b>nscope</b>.
+ //
+ WordTreeOperand(const char* nscope) {
+ scope.set((char*)nscope);
+ }
+ //-
+ // Free the objects pointed by <i>cursors</i> with delete as well
+ // as the <i>cursors</i> array itself with delete [].
+ //
+ virtual ~WordTreeOperand();
+
+ virtual void Clear() {
+ cursors = 0;
+ cursors_length = 0;
+ WordCursor::Clear();
+ }
+
+ //-
+ // Recursively call Optimize on each <i>cursors</i>.
+ //
+ virtual int Optimize();
+
+ //-
+ // Change the <i>permutation</i> data member ignore mask according
+ // to WORD_TREE_MANDATORY and WORD_TREE_NOT nodes found in
+ // <i>cursors</i>. MANDATORY and NOT nodes are reduced (replaced
+ // by their first child cursor. For each MANDATORY and NOT nodes
+ // the bit (see WordExcludeMask for information)
+ // corresponding to their position is ignored (set in the <b>ignore</b>
+ // argument of the WordExcludeMask::Initialize function. For NOT
+ // nodes, the bit corresponding to their position is set in
+ // the <b>ignore_mask</b> of the WordExcludeMask::Initialize function
+ // (i.e. implementing a <i>not</i> operation).
+ // The <b>proximity</b> argument may be WORD_PERMUTE_PROXIMITY_TOGGLE or
+ // WORD_PERMUTE_PROXIMITY_NO.
+ // Returns OK on success, NOTOK on failure.
+ //
+ int OptimizeOr(int proximity);
+
+ virtual int ContextSave(String& buffer) const {
+ StringList list;
+ int ret;
+ if((ret = ContextSaveList(list)) != OK)
+ return ret;
+
+ buffer.trunc();
+ String* element;
+ list.Start_Get();
+ while((element = (String*)list.Get_Next())) {
+ buffer << (*element) << ';';
+ }
+ //
+ // Trim last ;
+ //
+ buffer.chop(1);
+
+ return OK;
+ }
+
+ virtual int ContextSaveList(StringList& list) const {
+ //
+ // Apply to each cursor
+ //
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ if(cursors[i]->ContextSaveList(list) == NOTOK)
+ return NOTOK;
+ return OK;
+ }
+
+ virtual int ContextRestore(const String& buffer) {
+ if(!buffer.empty()) {
+ StringList list(buffer, ";");
+ return ContextRestoreList(list);
+ } else {
+ return OK;
+ }
+ }
+
+ virtual int ContextRestoreList(StringList& list) {
+ //
+ // Apply to each cursor
+ //
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ if(cursors[i]->ContextRestoreList(list) == NOTOK)
+ return NOTOK;
+ return OK;
+ }
+
+ //-
+ // Recursively call WalkInit on each <i>cursors</i>.
+ //
+ virtual int WalkInit();
+ //-
+ // Recursively call WalkRewind on each <i>cursors</i>.
+ // Reset the <i>pos</i> data member with WordKeySemantic::DocumentClear.
+ //
+ virtual int WalkRewind();
+ //-
+ // Recursively call WalkFinish on each <i>cursors</i>.
+ //
+ virtual int WalkFinish();
+ //-
+ // Recursively call Seek on each <i>cursors</i>.
+ // Save the <b>patch</b> argument in the <i>pos</i> data
+ // member.
+ //
+ virtual int Seek(const WordKey& patch);
+
+ //-
+ // The number of occurrence of a WordTreeOperand is the sum of the
+ // number of occurrence of each term.
+ //
+ virtual int Noccurrence(unsigned int& noccurrence) const {
+ noccurrence = 0;
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++) {
+ unsigned int frequency;
+ if(cursors[i]->Noccurrence(frequency) != OK)
+ return NOTOK;
+ noccurrence += frequency;
+ }
+ return OK;
+ }
+
+ //-
+ // The <b>bufferout</b> argument is filled with a lisp like representation
+ // of the tree starting at this node.
+ //
+ virtual int Get(String& bufferout) const {
+ bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" ";
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ bufferout << cursors[i]->Get() << " ";
+ bufferout << " )";
+ return OK;
+ }
+
+ //-
+ // Call Prepare on each <i>cursors</i>. Set the <i>search</i> member
+ // with an textual representation of the tree starting at this node.
+ //
+ virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) {
+ int ret;
+ if((ret = WordTree::Prepare(words, nuniq, nproximity, document, document_length, location)) != OK)
+ return ret;
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++) {
+ if((ret = cursors[i]->Prepare(words, nuniq, nproximity, document, document_length, location)) != OK)
+ return ret;
+ }
+ return Get(GetSearch().GetWord());
+ }
+
+ //-
+ // The current cursor offset (set by Seek for instance). It
+ // duplicates the function of the WordCursor <i>key</i> data member
+ // because the data type is different (WordKey instead of String).
+ //
+ WordKey pos;
+ //-
+ // Sub nodes array.
+ //
+ WordTree** cursors;
+ //-
+ // Number of valid entries in the <i>cursors</i> member.
+ //
+ unsigned int cursors_length;
+ //-
+ // Permutation generator with proximity toggle
+ //
+ WordPermute permutation;
+};
+
+WordTreeOperand::~WordTreeOperand()
+{
+ if(cursors) {
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ delete cursors[i];
+ free(cursors);
+ }
+}
+
+int
+WordTreeOperand::Optimize()
+{
+ //
+ // Apply to each cursor
+ //
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ if(cursors[i]->Optimize() == NOTOK)
+ return NOTOK;
+ return OK;
+}
+
+int WordTreeOperand::OptimizeOr(int proximity)
+{
+ unsigned int ignore = 0;
+ unsigned int ignore_mask = 0;
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++) {
+ int reduce;
+ //
+ // Set ignore & ignore_mask if cursor is NOT or MANDATORY
+ //
+ switch(cursors[i]->IsA()) {
+ case WORD_TREE_MANDATORY:
+ ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
+ reduce = 1;
+ break;
+ case WORD_TREE_NOT:
+ ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
+ ignore_mask |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i));
+ reduce = 1;
+ break;
+ default:
+ reduce = 0;
+ break;
+ }
+ //
+ // Replace the NOT or MANDATORY node by its only child
+ //
+ if(reduce) {
+ WordTreeOperand* old = (WordTreeOperand*)cursors[i];
+ cursors[i] = old->cursors[0];
+ old->cursors[0] = 0;
+ old->cursors_length--;
+ if(old->cursors_length > 0) {
+ fprintf(stderr, "WordTreeOptional::OptimizeOr: too many cursors\n");
+ return NOTOK;
+ }
+ delete old;
+ }
+ }
+ return permutation.Initialize(cursors_length, ignore, ignore_mask, proximity);
+}
+
+int
+WordTreeOperand::WalkInit()
+{
+ unsigned int i;
+ int ret = WORD_WALK_ATEND;
+ for(i = 0; i < cursors_length; i++)
+ if((ret = cursors[i]->WalkInit()) != OK)
+ return ret;
+ return (status = ret);
+}
+
+int
+WordTreeOperand::WalkRewind()
+{
+ unsigned int i;
+ int ret = OK;
+ for(i = 0; i < cursors_length; i++)
+ if((ret = cursors[i]->WalkRewind()) != OK)
+ return ret;
+ status = OK;
+ key_semantic.DocumentClear(pos);
+ cursor_get_flags = DB_SET_RANGE;
+ found.Clear();
+ return ret;
+}
+
+int
+WordTreeOperand::WalkFinish()
+{
+ unsigned int i;
+ int ret = OK;
+ for(i = 0; i < cursors_length; i++)
+ if((ret = cursors[i]->WalkFinish()) != OK)
+ return ret;
+ return ret;
+}
+
+int
+WordTreeOperand::Seek(const WordKey& patch)
+{
+ pos.CopyFrom(patch);
+ cursor_get_flags = DB_SET_RANGE;
+
+ unsigned int i;
+ int ret = OK;
+ for(i = 0; i < cursors_length; i++)
+ if((ret = cursors[i]->Seek(patch)) != OK &&
+ ret != WORD_WALK_ATEND)
+ return ret;
+ status = OK;
+ return OK;
+}
+
+// ************************* WordTreeOptional implementation ****************
+
+class WordTreeOptional : public WordTreeOperand {
+ public:
+ WordTreeOptional(const char* nscope) : WordTreeOperand(nscope) { }
+
+ //-
+ // Return WORD_TREE_OPTIONAL
+ //
+ virtual int IsA() const { return WORD_TREE_OPTIONAL; }
+
+ virtual int Optimize();
+
+ virtual int ContextSaveList(StringList& list) const;
+
+ virtual int ContextRestoreList(StringList& list);
+
+ //-
+ // Multipass walk of the occurrences according to the <i>permutation</i>
+ // data member specifications. First search for documents containing
+ // all occurrences near to each other. Then documents that
+ // contain all occurrences far appart. Then ignore the most frequent
+ // search criterion and search for documents that contain all the others
+ // near to each other. The logic goes on until there only remains the
+ // most frequent word.
+ //
+ virtual int WalkNext();
+ //-
+ // Only seek the first non excluded cursor. The implementation
+ // of WalkNext makes it useless to seek the others.
+ //
+ virtual int Seek(const WordKey& position);
+
+ virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) {
+ int ret;
+ if((ret = permutation.Initialize(cursors_length, 0, 0, WORD_PERMUTE_PROXIMITY_TOGGLE)) != OK)
+ return ret;
+ return WordTreeOperand::Prepare(words, nuniq, nproximity, document, document_length, location);
+ }
+
+ virtual void SetInfo();
+
+ virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_TOGGLE; }
+
+ virtual int UsePermutation() const { return 1; }
+
+ //-
+ // Returns true if all cursors must have a frequency > 0, false otherwise.
+ //
+ virtual int AllOrNothing() const { return 0; }
+
+ //-
+ // Comparison between <b>cursor</b> and <b>constraint</b> is made
+ // with WordKeySemantic::LocationCompare using the <b>proximity</b>
+ // argument. If <b>master</b> is NULL it is set to point to <b>
+ // <b>cursor</b>.
+ //
+ // Return WORD_WALK_NEXT if <b>cursor</b> is at <b>constraint</b> and
+ // set <b>constraint</b> if <b>cursor</b> is <b>master</b>.
+ //
+ // Return WORD_WALK_REDO if <b>cursor</b> is above <b>constraint</b> and
+ // call cursor.WalkNext().
+ //
+ // Return WORD_WALK_RESTART if <b>cursor</b> is below <b>constraint</b> and
+ // set <b>constraint</b> from <b>cursor</b> using
+ // WordKeySemantic::DocumentSet if <b>cursor</b> is not <b>master</b>
+ // otherwise also set location of <b>constraint</b> using
+ // WordKeySemantic::LocationSet and call WordKeySemantic::LocationNext
+ // on <b>constraint.</b>
+ //
+ // Return WORD_WALK_ATEND if no more match possible.
+ //
+ // Return NOTOK on failure.
+ //
+ int SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity);
+ //-
+ // Comparison between <b>cursor</b> and <b>document</b> is made
+ // with WordKeySemantic::DocumentCompare.
+ //
+ // Return WORD_WALK_NEXT if <b>cursor</b> is above <b>document.</b>
+ //
+ // Return WORD_WALK_REDO if <b>cursor</b> is below <b>document</b>
+ // and call cursor.WalkNext().
+ //
+ // Return WORD_WALK_RESTART if <b>cursor</b> is at <b>document</b>
+ // and call WordKeySemantic::DocumentNext method on <b>document.</b>
+ //
+ // Return WORD_WALK_ATEND if no more match possible.
+ //
+ // Return NOTOK on failure.
+ //
+ int SearchCursorNot(WordTree& cursor, WordKey& document);
+ //-
+ // Comparison between <b>cursor</b> and <b>document</b> is made
+ // with WordKeySemantic::DocumentCompare.
+ //
+ // Return WORD_WALK_NEXT if <b>cursor</b> is at <b>document.</b>.
+ //
+ // Return WORD_WALK_REDO if <b>cursor</b> is below <b>document</b>
+ //
+ // Return WORD_WALK_RESTART if <b>cursor</b> is above <b>document</b>
+ // and call WordKeySemantic::DocumentNext method on <b>document.</b>
+ //
+ // Return WORD_WALK_ATEND if no more match possible.
+ //
+ // Return NOTOK on failure.
+ //
+ //
+ int SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation);
+ //
+ // We know that :
+ // 1) document does not contain any excluded words.
+ // 2) contains at least one occurrence of each non excluded word.
+ // The logic, although very similar to WordSearchNear::SearchOne
+ // is therefore simpler. We ignore all excluded cursors and
+ // return WORD_SEARCH_NOPROXIMITY as soon as a cursor move outside
+ // <document>.
+ //
+ //-
+ // If <b>document</b> contains words that match proximity
+ // requirement, return OK. Return WORD_SEARCH_NOPROXIMITY if proximity
+ // requirement cannot be matched for <document>.
+ //
+ int CursorsObeyProximity(WordKey& document);
+
+ //-
+ // Sort the <i>cursors</i> in ascending frequency order using the
+ // Noccurrence method on each cursor.
+ // Return OK on success, NOTOK on failure.
+ //
+ virtual int AscendingFrequency();
+ //-
+ // Delete all elements of the <i>cursors</i> array that have a
+ // zero frequency. The <i>cursors</i> array is shrinked and the
+ // <i>cursors_length</i> set accordingly. Returns the number of
+ // deletions in the <b>stripped</i> argument.
+ // Return OK on success, NOTOK on failure.
+ //
+ virtual int StripNonExistent(unsigned int& stripped);
+};
+
+int WordTreeOptional::Optimize()
+{
+ int ret;
+ if((ret = WordTreeOperand::Optimize()) != OK)
+ return ret;
+
+ if(UseProximity() != WORD_PERMUTE_PROXIMITY_ONLY) {
+ if((ret = AscendingFrequency()) != OK)
+ return ret;
+ }
+
+ unsigned int stripped;
+ if((ret = StripNonExistent(stripped)) != OK)
+ return ret;
+
+ if(AllOrNothing() && stripped) {
+ //
+ // One word is missing and everything is lost,
+ // Just kill the remaining cursors.
+ //
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ delete cursors[i];
+ cursors_length = 0;
+
+ return OK;
+ } else {
+ return OptimizeOr(UseProximity());
+ }
+}
+
+int WordTreeOptional::ContextSaveList(StringList& list) const
+{
+ int ret;
+ if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
+ return ret;
+
+ if(UsePermutation()) {
+ String* buffer = new String();
+ permutation.Get(*buffer);
+
+ list.Add(buffer);
+ }
+
+ {
+ String* buffer = new String();
+ if((ret = WordCursor::ContextSave(*buffer)) != OK)
+ return ret;
+
+ list.Add(buffer);
+ }
+
+ return OK;
+}
+
+int WordTreeOptional::ContextRestoreList(StringList& list)
+{
+ int ret;
+ if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
+ return ret;
+
+ if(UsePermutation()) {
+ char* buffer = list[0];
+ if((ret = permutation.Set(buffer)) != OK)
+ return ret;
+ list.Remove(0);
+ }
+
+ {
+ char* buffer = list[0];
+ if(!buffer) return NOTOK;
+ WordKey key(buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ cursor_get_flags = DB_NEXT;
+
+ list.Remove(0);
+ }
+
+ return OK;
+}
+
+int WordTreeOptional::WalkNext()
+{
+ WordKey& constraint = pos;
+ //
+ // Set constraint with all 0
+ //
+ if(constraint.Empty())
+ key_semantic.DocumentClear(constraint);
+
+ //
+ // Advance cursors to next constraint, if not at the
+ // beginning of the search.
+ //
+ int ret = OK;
+ int match_ok = 0;
+ do {
+ //
+ // Advance cursors so that next call fetches another constraint
+ //
+ if(cursor_get_flags == DB_NEXT)
+ key_semantic.DocumentNext(constraint, uniq);
+
+ if((ret = Seek(constraint)) != OK)
+ return ret;
+
+ int near = permutation.Proximity();
+ WordTree* first = 0;
+ for(unsigned int i = 0; i < cursors_length;) {
+ WordTree& cursor = *(cursors[i]);
+ near = permutation.Proximity();
+ int excluded = permutation.Excluded(i);
+ if(verbose) fprintf(stderr, "WordTreeOptional::WalkNext: %s excluded = %s, proximity = %s\n", (char*)cursor.GetSearch().GetWord(), (excluded ? "yes" : "no"), (near ? "yes" : "no" ));
+
+ int ret;
+ if(excluded) {
+ ret = SearchCursorNot(cursor, constraint);
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: Not -> %s\n", ret2str(ret));
+ } else {
+ if(near) {
+ ret = SearchCursorNear(cursor, first, constraint, proximity);
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: Near -> %s\n", ret2str(ret));
+ } else {
+ ret = SearchCursorAnd(cursor, constraint, permutation);
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: And -> %s\n", ret2str(ret));
+ }
+ }
+
+ switch(ret) {
+ case WORD_WALK_ATEND:
+ if(UsePermutation()) {
+ //
+ // The search is over with this permutation, try another one.
+ //
+ switch(permutation.Next()) {
+ //
+ // No permutations left, the end
+ //
+ case WORD_PERMUTE_END:
+ return (status = WORD_WALK_ATEND);
+ break;
+
+ //
+ // Sart over with this permutation
+ //
+ case WORD_PERMUTE_OK:
+ if(WalkRewind() != OK)
+ return NOTOK;
+ break;
+ }
+ first = 0;
+ i = 0;
+ } else {
+ return (status = WORD_WALK_ATEND);
+ }
+ break;
+ case WORD_WALK_REDO:
+ break;
+ case WORD_WALK_RESTART:
+ first = 0;
+ i = 0;
+ break;
+ case WORD_WALK_NEXT:
+ i++;
+ break;
+ case NOTOK:
+ default:
+ return ret;
+ break;
+ }
+ }
+
+ cursor_get_flags = DB_NEXT;
+
+ SetInfo();
+
+ //
+ // Save possible result, i.e. first non excluded cursor
+ //
+ for(unsigned int i = 0; i < cursors_length; i++) {
+ WordTree& cursor = *(cursors[i]);
+ if(!permutation.Excluded(i)) {
+ found.Key().CopyFrom(cursor.GetFound().Key());
+ break;
+ }
+ }
+
+ match_ok = 1;
+ //
+ // Only bother if near and non near search are involved
+ //
+ if(UseProximity() == WORD_PERMUTE_PROXIMITY_TOGGLE) {
+ //
+ // If we reach this point in the function and
+ // either proximity search is active or there is
+ // only one word involved, the match is valid.
+ // Otherwise it may be excluded, see below.
+ //
+ if(!near && permutation.NotExcludedCount() > 1) {
+ //
+ // If not using proximity, a match that fits the proximity
+ // requirements must be skipped because it was matched by
+ // the previous permutation (see WordPermute).
+ //
+ switch(CursorsObeyProximity(constraint)) {
+ case OK:
+ match_ok = 0;
+ break;
+ case WORD_SEARCH_NOPROXIMITY:
+ match_ok = 1;
+ break;
+ default:
+ case NOTOK:
+ return NOTOK;
+ break;
+ }
+ }
+ }
+ } while(!match_ok && ret == OK);
+
+ return ret;
+}
+
+int WordTreeOptional::Seek(const WordKey& position)
+{
+ pos.CopyFrom(position);
+ cursor_get_flags = DB_SET_RANGE;
+ status = OK;
+
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++) {
+ if(!permutation.Excluded(i)) {
+ WordTree& cursor = *(cursors[i]);
+ return cursor.Seek(position);
+ }
+ }
+
+ fprintf(stderr, "WordTreeOptional::Seek: failed\n");
+ return NOTOK;
+}
+
+
+void WordTreeOptional::SetInfo()
+{
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ cursors[i]->SetInfo();
+
+ info.trunc();
+
+ for(i = 0; i < cursors_length; i++) {
+ WordTree& cursor = *(cursors[i]);
+
+ if(!permutation.Excluded(i))
+ info << cursor.info << " ";
+ }
+
+ info << (permutation.Proximity() ? "proximity" : "");
+}
+
+int WordTreeOptional::SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity)
+{
+ int is_master = master == 0 || master == &cursor;
+ if(master == 0) master = &cursor;
+ const WordKey& masterKey = master->GetFound().Key();
+
+ int direction = key_semantic.LocationCompare(constraint, cursor.GetFound().Key(), proximity);
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorNear: LocationCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(constraint.Get()), (char*)(cursor.GetFound().Key().Get()), direction);
+
+ //
+ // If the cursor is in the authorized locations, consider
+ // next cursor
+ //
+ if(direction == 0) {
+ //
+ // master cursor makes the rules for location : its location
+ // is the base to calculate other words mandatory loacations.
+ //
+ if(is_master)
+ key_semantic.LocationSet(cursor.GetFound().Key(), constraint);
+ //
+ // Fix location constraint to accomodate proximity tolerance.
+ //
+ key_semantic.LocationNearLowest(constraint, proximity);
+ return WORD_WALK_NEXT;
+
+ //
+ // If current location is above cursor location
+ //
+ } else if(direction > 0) {
+ //
+ // Move the cursor up to the location.
+ //
+ cursor.Seek(constraint);
+ if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorNear: leap to %s\n", (char*)constraint.Get());
+ int ret;
+ if((ret = cursor.WalkNext()) == OK) {
+ //
+ // Remove the location constraint for the master word
+ // so that it matches and then enforce location for other
+ // keys.
+ //
+ if(is_master)
+ key_semantic.Location2Document(constraint);
+ //
+ // Reconsider the situation for this cursor
+ //
+ return WORD_WALK_REDO;
+ } else {
+ return ret;
+ }
+
+ //
+ // If current location is lower than cursor location,
+ // meaning that the cursor found no match for the current
+ // location.
+ //
+ } else if(direction < 0) {
+ //
+ // The cursor document becomes the current document.
+ // The master cursor is forced to catch up.
+ //
+ key_semantic.DocumentSet(cursor.GetDocument(), constraint);
+ //
+ // It is possible that this cursor document is the same
+ // as the master cursor document (if this cursor hit in the
+ // same document but a higher location). In this case we must
+ // increase the location of the master cursor otherwise it will
+ // match without moving and loop forever.
+ //
+ if(!is_master && key_semantic.DocumentCompare(masterKey, constraint) == 0) {
+ key_semantic.LocationSet(masterKey, constraint);
+ key_semantic.LocationNext(constraint);
+ }
+ //
+ // Since the current location changed, start over.
+ //
+ return WORD_WALK_RESTART;
+ } else {
+ fprintf(stderr, "WordTreeOptional::WordCursorNear: reached unreachable statement\n");
+ return NOTOK;
+ }
+ return NOTOK;
+}
+
+int WordTreeOptional::SearchCursorNot(WordTree& cursor, WordKey& document)
+{
+ int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorNot: DocumentCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(document.Get()), (char*)(cursor.GetFound().Key().Get()), direction);
+
+ //
+ // If the cursor is above the current document
+ // (being at the end of walk is being above all documents).
+ //
+ // Means that the cursor is positioned in an acceptable document
+ // and proceed to the next cursor.
+ //
+ if(direction < 0 || cursor.IsAtEnd()) {
+ return WORD_WALK_NEXT;
+
+ //
+ // If the cursor is below current document
+ //
+ } else if(direction > 0) {
+ //
+ // Move the cursor up to the document
+ //
+ cursor.Seek(document);
+ if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorNot: leap to %s\n", (char*)document.Get());
+ int ret;
+ if((ret = cursor.WalkNext()) != OK && ret != WORD_WALK_ATEND)
+ return NOTOK;
+ //
+ // It is expected in this case that the cursor has moved above
+ // the current document and another visit in the loop will
+ // tell us.
+ //
+ return WORD_WALK_REDO;
+
+ //
+ // If the cursor matches the current document.
+ //
+ // Means that the current document is not a possible match
+ // since it is pointed by this cursor.
+ //
+ } else if(direction == 0) {
+ //
+ // The cursor does not give any hint on a possible
+ // next document, just go to the next possible one.
+ //
+ key_semantic.DocumentNext(document, uniq);
+ //
+ // Since the current document changed, start over.
+ //
+ return WORD_WALK_RESTART;
+ } else {
+ fprintf(stderr, "WordTreeOptional::WordCursorNot: reached unreachable statement\n");
+ return NOTOK;
+ }
+ return NOTOK;
+}
+
+int WordTreeOptional::SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation)
+{
+ int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key());
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorAnd: DocumentCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(document.Get()), (char*)(cursor.GetFound().Key().Get()), direction);
+
+ //
+ // If the cursor is in the current document.
+ //
+ // Means that the cursor is positioned in an acceptable document
+ // and proceed to the next cursor.
+ //
+ if(direction == 0) {
+ return WORD_WALK_NEXT;
+
+ //
+ // If the cursor is below current document
+ //
+ } else if(direction > 0) {
+ //
+ // Move the cursor up to the document
+ //
+ cursor.Seek(document);
+ if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorAnd: leap to %s\n", (char*)document.Get());
+ int ret;
+ if((ret = cursor.WalkNext()) == OK)
+ return WORD_WALK_REDO;
+ else
+ return ret;
+
+ //
+ // If the cursor is above current document.
+ //
+ // Means the the current document is not a possible match
+ // since it will never reach it because it's already
+ // above it.
+ //
+ } else if(direction < 0) {
+ //
+ // The cursor document becomes the current document.
+ //
+ key_semantic.DocumentSet(cursor.GetDocument(), document);
+
+ //
+ // Since the current document changed, start over.
+ //
+ return WORD_WALK_RESTART;
+ } else {
+ fprintf(stderr, "WordTreeOptional::WordCursorAnd: reached unreachable statement\n");
+ return NOTOK;
+ }
+ return NOTOK;
+}
+
+int WordTreeOptional::CursorsObeyProximity(WordKey& document)
+{
+ //
+ // Run if more than one word is involved, proximity
+ // is always true if there is only one word.
+ //
+ if(permutation.NotExcludedCount() <= 1) return OK;
+
+ WordKey location;
+
+ //
+ // The first non excluded cursor contains anchor location.
+ //
+ unsigned int master_index = 0;
+ for(unsigned int i = 0; i < cursors_length; i++) {
+ if(!permutation.Excluded(i)) {
+ master_index = i;
+ break;
+ }
+ }
+ const WordKey& masterKey = cursors[master_index]->GetFound().Key();
+ key_semantic.DocumentSet(masterKey, location);
+
+ for(unsigned int i = 0; i < cursors_length;) {
+ if(permutation.Excluded(i)) {
+ i++;
+ continue;
+ }
+
+ WordTree& cursor = *(cursors[i]);
+ if(cursor.IsAtEnd()) return WORD_SEARCH_NOPROXIMITY;
+ // if(cursor.status & WORD_WALK_FAILED) return NOTOK;
+
+ //
+ // If the cursor moved outside of the tested document,
+ // no proximity match is possible.
+ //
+ if(key_semantic.DocumentCompare(cursor.GetFound().Key(), document) != 0)
+ return WORD_SEARCH_NOPROXIMITY;
+
+ int direction = key_semantic.LocationCompare(location, cursor.GetFound().Key(), proximity);
+
+ //
+ // If the cursor is in the authorized locations, consider
+ // next cursor
+ //
+ if(direction == 0) {
+ //
+ // master cursor makes the rules for location : its location
+ // is the base to calculate other words mandatory loacations.
+ //
+ if(i == master_index)
+ key_semantic.LocationSet(cursor.GetFound().Key(), location);
+ //
+ // Fix location constraint to accomodate proximity tolerance.
+ //
+ key_semantic.LocationNearLowest(location, proximity);
+ i++;
+
+ //
+ // If current location is greater than cursor location
+ //
+ } else if(direction > 0) {
+ //
+ // Move the cursor up to the location.
+ //
+ cursor.Seek(location);
+ if(verbose > 1) fprintf(stderr, "WordTreeOptional::CursorsObeyProximity: leap to %s\n", (char*)location.Get());
+ int ret;
+ if((ret = cursor.WalkNext()) != OK) {
+ if(ret == WORD_WALK_ATEND) {
+ return WORD_SEARCH_NOPROXIMITY;
+ } else {
+ return NOTOK;
+ }
+ }
+ //
+ // Remove the location constraint for the master word
+ // so that it matches and then enforce location for other
+ // keys.
+ //
+ if(i == master_index)
+ key_semantic.Location2Document(location);
+ //
+ // Reconsider the situation for this cursor
+ //
+
+ //
+ // If current location is lower than cursor location,
+ // meaning that the cursor found no match in the current
+ // document.
+ //
+ } else if(direction < 0) {
+ //
+ // Move to next master key, if possible.
+ //
+ if(i != master_index) {
+ key_semantic.LocationSet(masterKey, location);
+ key_semantic.LocationNext(location);
+ }
+ //
+ // Since the current location changed, start over.
+ //
+ i = 0;
+ }
+ }
+
+ return OK;
+}
+
+//
+// Helper class for AscendingFrequency method
+//
+class WordSort {
+public:
+ unsigned int frequency;
+ WordTree *cursor;
+};
+
+//
+// Helper function for AscendingFrequency method
+//
+static int ascending_frequency(const void *a, const void *b)
+{
+ const WordSort& a_cursor = *(WordSort*)a;
+ const WordSort& b_cursor = *(WordSort*)b;
+
+ return a_cursor.frequency - b_cursor.frequency;
+}
+
+int WordTreeOptional::AscendingFrequency()
+{
+ //
+ // Reorder cursors
+ //
+ WordSort *tmp = new WordSort[cursors_length];
+
+ memset((char*)tmp, '\0', cursors_length * sizeof(WordSort));
+
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++) {
+ unsigned int frequency;
+ if(cursors[i]->Noccurrence(frequency) != OK) {
+ delete [] tmp;
+ return NOTOK;
+ }
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::AscendingFrequency: %s occurs %d times\n", (char*)cursors[i]->GetSearch().Get(), frequency);
+ tmp[i].frequency = frequency;
+ tmp[i].cursor = cursors[i];
+ }
+
+ memset((char*)cursors, '\0', cursors_length * sizeof(WordTree*));
+
+ qsort((void *)tmp, cursors_length, sizeof(WordSort), &ascending_frequency);
+
+ for(i = 0; i < cursors_length; i++)
+ cursors[i] = tmp[i].cursor;
+
+ delete [] tmp;
+ return OK;
+}
+
+int WordTreeOptional::StripNonExistent(unsigned int& stripped)
+{
+ stripped = 0;
+
+ WordTree** tmp = new WordTree*[cursors_length];
+ memset((char*)tmp, '\0', cursors_length * sizeof(WordTree*));
+
+ unsigned int from;
+ unsigned int to;
+
+ for(to = from = 0; from < cursors_length; from++) {
+ unsigned int frequency;
+ if(cursors[from]->Noccurrence(frequency) != OK) {
+ delete [] tmp;
+ return NOTOK;
+ }
+
+ if(verbose > 2) fprintf(stderr, "WordTreeOptional::StripNonExistent: %s occurs %d times\n", (char*)cursors[from]->GetSearch().Get(), frequency);
+ if(frequency > 0) {
+ tmp[to++] = cursors[from];
+ } else {
+ delete cursors[from];
+ stripped++;
+ }
+ }
+
+ memset((char*)cursors, '\0', cursors_length * sizeof(WordTree*));
+
+ cursors_length = to;
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ cursors[i] = tmp[i];
+
+ delete [] tmp;
+
+ return OK;
+}
+
+// ************************* WordTreeOr implementation ********************
+
+class WordTreeOr : public WordTreeOperand {
+ public:
+ WordTreeOr(const char* nscope) : WordTreeOperand(nscope) { }
+
+ //-
+ // Return WORD_TREE_OR
+ //
+ virtual int IsA() const { return WORD_TREE_OR; }
+
+ virtual int Optimize();
+
+ virtual int ContextSaveList(StringList& list) const;
+
+ virtual int ContextRestoreList(StringList& list);
+
+ virtual void SetInfo();
+
+ virtual int WalkNext();
+
+ virtual int UsePermutation() const { return 0; }
+
+ virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; }
+};
+
+int WordTreeOr::Optimize()
+{
+ int ret;
+ if((ret = WordTreeOperand::Optimize()) != OK)
+ return ret;
+
+ if((ret = AscendingFrequency()) != OK)
+ return ret;
+
+ unsigned int stripped;
+ if((ret = StripNonExistent(stripped)) != OK)
+ return ret;
+
+ return OptimizeOr(WORD_PERMUTE_PROXIMITY_NO);
+}
+
+int WordTreeOr::ContextSaveList(StringList& list) const
+{
+ int ret;
+ if((ret = WordTreeOperand::ContextSaveList(list)) != OK)
+ return ret;
+
+ {
+ String* buffer = new String();
+ permutation.Get(*buffer);
+
+ list.Add(buffer);
+ }
+
+ {
+ String* buffer = new String();
+ if((ret = WordCursor::ContextSave(*buffer)) != OK)
+ return ret;
+
+ list.Add(buffer);
+ }
+
+ return OK;
+}
+
+int WordTreeOr::ContextRestoreList(StringList& list)
+{
+ int ret;
+ if((ret = WordTreeOperand::ContextRestoreList(list)) != OK)
+ return ret;
+
+ {
+ char* buffer = list[0];
+ if((ret = permutation.Set(buffer)) != OK)
+ return ret;
+ list.Remove(0);
+ }
+
+ {
+ char* buffer = list[0];
+ if(!buffer) return NOTOK;
+ WordKey key(buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ cursor_get_flags = DB_NEXT;
+
+ list.Remove(0);
+ }
+
+ return OK;
+}
+
+void WordTreeOr::SetInfo()
+{
+ unsigned int i;
+ for(i = 0; i < cursors_length; i++)
+ cursors[i]->SetInfo();
+
+ info.trunc();
+
+ for(i = 0; i < cursors_length; i++) {
+ WordTree& cursor = *(cursors[i]);
+
+ if(!permutation.Excluded(i) &&
+ !cursor.IsAtEnd() &&
+ key_semantic.DocumentCompare(cursor.GetFound().Key(), GetFound().Key()) == 0) {
+ info << cursor.info << " ";
+ }
+ }
+}
+
+int WordTreeOr::WalkNext()
+{
+ WordKey& constraint = pos;
+ //
+ // Set constraint with all 0
+ //
+ if(constraint.Empty())
+ key_semantic.DocumentClear(constraint);
+
+ WordKey candidate;
+ int match_ok;
+ do {
+ int ret;
+ unsigned int i;
+ candidate.Clear();
+ //
+ // Advance cursors so that next call fetches another constraint
+ //
+ if(cursor_get_flags == DB_NEXT)
+ key_semantic.DocumentNext(constraint, uniq);
+
+ if((ret = Seek(constraint)) != OK)
+ return ret;
+
+ match_ok = 1;
+ //
+ // All non excluded cursors are about to move
+ // at or beyond constraint. Search for the one (candidate) that
+ // is located at the lowest location beyond the constraint.
+ //
+ for(i = 0; i < cursors_length; i++) {
+ if(permutation.Excluded(i))
+ continue;
+ WordTree& cursor = *(cursors[i]);
+
+ switch((ret = cursor.WalkNext())) {
+ case WORD_WALK_ATEND:
+ //
+ // Constraint is above all matches for this cursor
+ //
+ break;
+ case OK:
+ //
+ // If candidate is not set or current cursor is below
+ // the current candidate, the curent cursor document becomes
+ // the candidate.
+ //
+ if(candidate.Empty() ||
+ key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) > 0) {
+ key_semantic.DocumentSet(cursor.GetDocument(), candidate);
+ }
+ break;
+ default:
+ return ret;
+ break;
+ }
+ }
+
+ //
+ // No candidate ? It's the end of the match list.
+ //
+ if(candidate.Empty())
+ return WORD_WALK_ATEND;
+
+ found.Key().CopyFrom(candidate);
+
+ SetInfo();
+
+ if(permutation.ExcludedCount() > 0) {
+ if((ret = Seek(candidate)) != OK)
+ return ret;
+
+ //
+ // Restart loop if candidate matches an excluded cursor.
+ //
+ for(i = 0; i < cursors_length && match_ok; i++) {
+ if(!permutation.Excluded(i))
+ continue;
+ WordTree& cursor = *(cursors[i]);
+
+ switch((ret = cursor.WalkNext())) {
+ case WORD_WALK_ATEND:
+ //
+ // This excluded cursor can't match the candidate, fine.
+ //
+ break;
+ case OK:
+ //
+ // This excluded cursor matches candidate therefore it's
+ // not a valid candidate. Restart search with this candidate
+ // as the constraint.
+ //
+ if(key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) == 0) {
+ constraint = candidate;
+ match_ok = 0;
+ }
+ break;
+ default:
+ return ret;
+ break;
+ }
+
+ }
+ }
+
+ cursor_get_flags = DB_NEXT;
+
+ } while(!match_ok);
+
+ constraint = candidate;
+
+ return OK;
+}
+
+// ************************* WordTreeAnd implementation ********************
+
+class WordTreeAnd : public WordTreeOptional {
+ public:
+ WordTreeAnd(const char* nscope) : WordTreeOptional(nscope) { }
+
+ //-
+ // Return WORD_TREE_AND
+ //
+ virtual int IsA() const { return WORD_TREE_AND; }
+
+ virtual int UsePermutation() const { return 0; }
+
+ virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; }
+
+ virtual int AllOrNothing() const { return 1; }
+};
+
+// ************************* WordTreeNear implementation ********************
+
+class WordTreeNear : public WordTreeOptional {
+ public:
+ WordTreeNear(const char* nscope) : WordTreeOptional(nscope) { }
+
+ //-
+ // Return WORD_TREE_NEAR
+ //
+ virtual int IsA() const { return WORD_TREE_NEAR; }
+
+ virtual int UsePermutation() const { return 0; }
+
+ virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_ONLY; }
+
+ virtual int AllOrNothing() const { return 1; }
+};
+
+// ************************* WordTreeMandatory implementation ***************
+
+class WordTreeMandatory : public WordTreeOperand {
+ public:
+ WordTreeMandatory(const char* nscope) : WordTreeOperand(nscope) { }
+
+ //-
+ // Return WORD_TREE_MANDATORY
+ //
+ virtual int IsA() const { return WORD_TREE_MANDATORY; }
+};
+
+// ************************* WordTreeNot implementation ***************
+
+class WordTreeNot : public WordTreeOperand {
+ public:
+ WordTreeNot(const char* nscope) : WordTreeOperand(nscope) { }
+
+ //-
+ // Return WORD_TREE_NOT
+ //
+ virtual int IsA() const { return WORD_TREE_NOT; }
+};
+
+// ************************* WordMatch implementation ********************
+
+//
+// Return value of the Search method, tells us which document
+// matched and why.
+//
+class WordMatch {
+public:
+
+ //-
+ // Return a textual representation of the object.
+ //
+ String Get() const;
+
+ //-
+ // The document that matched
+ //
+ WordKey match;
+ //-
+ // An ascii description of why it matched.
+ //
+ String info;
+};
+
+String WordMatch::Get() const
+{
+ String tmp;
+ match.Get(tmp);
+ if(!info.empty())
+ tmp << "(" << info << ")";
+ return tmp;
+}
+
+// ************************* WordSearch implementation ********************
+//
+// NAME
+//
+// Solve a query from a WordTree syntax tree
+//
+// SYNOPSIS
+//
+// #include <WordSearch.h>
+//
+// WordTree* expr = get_query();
+// WordSearch search;
+// search.limit_count = NUMBER_OF_RESULTS;
+// WordMatch* search.Search(expr);
+// ...
+//
+// DESCRIPTION
+//
+// The WordSearch class is a wrapper to query an inverted index
+// using a WordTree syntax tree.
+//
+// END
+//
+class WordSearch {
+public:
+ WordSearch();
+
+ //-
+ // Perform a search from the <b>expr</b> specifications.
+ // Restore the context from <i>context_in</i> on <b>expr</b>.
+ // Then skip (using WalkNext) <i>limit_bottom</i> entries.
+ // Then collect in a WordMatch array of size <i>limit_count</i>
+ // each match returned by WalkNext. When finished store
+ // the context (ContextSave) in <i>context_out</i>.
+ // It is the responsibility of the caller to free the WordMatch
+ // array. If no match are found a null pointer is returned.
+ //
+ WordMatch *Search();
+
+ //
+ // Search backend, only run the WalkNext loop but does not
+ // allocate/deallocate data.
+ //
+ int SearchLoop(WordTree *expr);
+
+ //
+ // Return a context description string to resume the
+ // search at a given point.
+ //
+ const String& Context() const { return context_out; }
+
+ //
+ // Input
+ //
+ unsigned int limit_bottom;
+ unsigned int limit_count;
+ String context_in;
+ WordTree* expr;
+
+ //
+ // Output
+ //
+ WordMatch* matches;
+ unsigned int matches_size;
+ unsigned int matches_length;
+ String context_out;
+};
+
+WordSearch::WordSearch()
+{
+ //
+ // Input
+ //
+ limit_bottom = 0;
+ limit_count = 0;
+ context_in.trunc();
+ expr = 0;
+
+ //
+ // Output
+ //
+ matches = 0;
+ matches_size = 0;
+ matches_length = 0;
+ context_out.trunc();
+}
+
+WordMatch *WordSearch::Search()
+{
+ int ret = 0;
+
+ if(verbose) fprintf(stderr, "WordSearch::Search: non optimized expression %s\n", (char*)expr->Get());
+ if(expr->Optimize() != OK)
+ return 0;
+ if(verbose) fprintf(stderr, "WordSearch::Search: optimized expression %s\n", (char*)expr->Get());
+
+
+ //
+ // Build space for results
+ //
+ matches_size = limit_count + 1;
+ matches = new WordMatch[matches_size];
+ matches_length = 0;
+
+ //
+ // Move to first possible position.
+ //
+ if(expr->WalkInit() != OK)
+ goto end;
+
+ if(expr->ContextRestore(context_in) == NOTOK)
+ goto end;
+ ret = SearchLoop(expr);
+ //
+ // Don't bother saving the context if at end of
+ // search (WORD_WALK_ATEND) or error (NOTOK)
+ //
+ if(ret == OK && expr->ContextSave(context_out) == NOTOK)
+ goto end;
+
+end:
+ expr->WalkFinish();
+
+ if(ret == NOTOK || matches_length <= 0) {
+ delete [] matches;
+ matches = 0;
+ }
+
+ return matches;
+}
+
+int WordSearch::SearchLoop(WordTree *expr)
+{
+ int ret = OK;
+ unsigned int i;
+ //
+ // Skip the first <limit_bottom> documents
+ //
+ {
+ for(i = 0; i < limit_bottom; i++) {
+ if((ret = expr->WalkNext()) != OK)
+ return ret;
+ }
+ }
+ //
+ // Get documents up to <limit_count> or exhaustion
+ //
+ for(matches_length = 0; matches_length < limit_count; matches_length++) {
+ if((ret = expr->WalkNext()) != OK) {
+ break;
+ } else {
+ matches[matches_length].match = expr->GetDocument();
+ if(expr->IsA() != WORD_TREE_LITERAL)
+ matches[matches_length].info = ((WordTreeOperand*)expr)->GetInfo();
+ if(verbose) fprintf(stderr, "WordSearch::Search: match %s\n", (char*)matches[matches_length].match.Get());
+ }
+ }
+
+ if(ret == WORD_WALK_ATEND)
+ matches[matches_length].match.Clear();
+
+ return ret;
+}
+
+// ************************* WordParser implementation ********************
+//
+// NAME
+//
+// Textual query parser for test purpose
+//
+// SYNOPSIS
+//
+// #include <WordParser.h>
+//
+// WordParser parser;
+// WordTree* expr = parser.Parse("( or \"scope1\" a query )");
+// ...
+// delete expr;
+//
+// DESCRIPTION
+//
+// The WordParser class implement a lisp-like parser for queries
+// implemented by the WordTree derived classes. The syntax is rigid
+// and should not be used for human input. The generic syntax of an
+// expression is
+// <pre>
+// ( operator "scope" operand [operand ...] )
+// </pre>
+// The parenthesis must <b>always</b> be surrounded by white space otherwise
+// the parser will be lost. The separator is white space and newline.
+// Tabulation may be used in scope to separate key fields.
+//
+// As a special case a single word is strictly equivalent
+// to
+// <pre>
+// ( literal "" word )
+// </pre>
+//
+// Operators can be lower case or upper case. There is almost no syntax
+// checking and it's the responsibility of the caller to associate meaningfull
+// operands. For instance ( near ( not foo ) bar ) is meaningless.
+//
+// OPERATORS
+//
+// <dl>
+//
+// <dt> optional
+// <dd> WordTreeOptional
+//
+// <dt> or
+// <dd> WordTreeOr
+//
+// <dt> and
+// <dd> WordTreeAnd
+//
+// <dt> near
+// <dd> WordTreeNear
+//
+// <dt> not,forbiden
+// <dd> WordTreeNot
+//
+// <dt> mandatory
+// <dd> WordTreeMandatory
+//
+// <dt> literal
+// <dd> WordTreeLiteral
+//
+// </dl>
+//
+//
+// END
+
+//
+// Possible values of the info argument of ParseOperands
+//
+#define WORD_TREE_MANY 0x01
+#define WORD_TREE_ONE 0x02
+#define WORD_TREE_TWO 0x04
+
+class WordParser {
+public:
+ WordTree *Parse(const String& expr);
+ WordTree *ParseList(StringList& terms);
+
+ WordTree *ParseExpr(StringList& terms);
+ WordTree *ParseUnary(StringList& terms);
+ WordTree *ParseConj(StringList& terms);
+ void ParseOperands(StringList& terms, int info, WordTreeOperand* expr);
+ WordTree *ParseLiteral(StringList& terms);
+ char *ParseScope(StringList& terms);
+
+ void Shift(StringList& terms);
+ char *Term(StringList& terms);
+};
+
+WordTree *WordParser::Parse(const String& expr)
+{
+ StringList terms(expr, " \n");
+ return ParseList(terms);
+}
+
+WordTree *WordParser::ParseList(StringList& terms)
+{
+ WordTree *expr = ParseExpr(terms);
+ return expr;
+}
+
+WordTree *WordParser::ParseExpr(StringList& terms)
+{
+ WordTree *expr = 0;
+ char* term = strdup(Term(terms));
+ if(!strcmp(term, "(")) {
+ Shift(terms);
+ expr = ParseExpr(terms);
+ } else if(!strcmp(term, ")")) {
+ //
+ // At end of expression, return null
+ //
+ } else if(!mystrcasecmp(term, "optional") ||
+ !mystrcasecmp(term, "or") ||
+ !mystrcasecmp(term, "and") ||
+ !mystrcasecmp(term, "near")) {
+ expr = ParseConj(terms);
+ } else if(!mystrcasecmp(term, "not") ||
+ !mystrcasecmp(term, "mandatory") ||
+ !mystrcasecmp(term, "forbiden")) {
+ expr = ParseUnary(terms);
+ } else {
+ expr = ParseLiteral(terms);
+ }
+ free(term);
+ return expr;
+}
+
+WordTree *WordParser::ParseUnary(StringList& terms)
+{
+ int op = 0;
+ if(!mystrcasecmp(Term(terms), "mandatory"))
+ op = WORD_TREE_MANDATORY;
+ else if(!mystrcasecmp(Term(terms), "forbiden") ||
+ !mystrcasecmp(Term(terms), "not"))
+ op = WORD_TREE_NOT;
+
+ Shift(terms);
+ char* scope = ParseScope(terms);
+ WordTreeOperand *expr = 0;
+ switch(op) {
+ case WORD_TREE_MANDATORY:
+ expr = new WordTreeMandatory(scope);
+ break;
+ case WORD_TREE_NOT:
+ expr = new WordTreeNot(scope);
+ break;
+ default:
+ fprintf(stderr, "WordParser::ParseUnary: unexpected operator %d\n", op);
+ exit(1);
+ break;
+ }
+ free(scope);
+ ParseOperands(terms, WORD_TREE_ONE, expr);
+ return expr;
+}
+
+WordTree *WordParser::ParseConj(StringList& terms)
+{
+ int op = 0;
+ if(!mystrcasecmp(Term(terms), "optional"))
+ op = WORD_TREE_OPTIONAL;
+ else if(!mystrcasecmp(Term(terms), "or"))
+ op = WORD_TREE_OR;
+ else if(!mystrcasecmp(Term(terms), "and"))
+ op = WORD_TREE_AND;
+ else if(!mystrcasecmp(Term(terms), "near"))
+ op = WORD_TREE_NEAR;
+
+ Shift(terms);
+ char* scope = ParseScope(terms);
+ WordTreeOperand *expr = 0;
+ switch(op) {
+ case WORD_TREE_OR:
+ expr = new WordTreeOr(scope);
+ break;
+ case WORD_TREE_OPTIONAL:
+ expr = new WordTreeOptional(scope);
+ break;
+ case WORD_TREE_AND:
+ expr = new WordTreeAnd(scope);
+ break;
+ case WORD_TREE_NEAR:
+ expr = new WordTreeNear(scope);
+ break;
+ default:
+ fprintf(stderr, "WordParser::ParseOrAnd: unexpected operator %d\n", op);
+ exit(1);
+ break;
+ }
+ free(scope);
+ ParseOperands(terms, WORD_TREE_MANY, expr);
+ return expr;
+}
+
+void WordParser::ParseOperands(StringList& terms, int info, WordTreeOperand* expr)
+{
+ unsigned int operands_length = 0;
+ unsigned int operands_size = 1;
+ WordTree **operands = (WordTree**)malloc(operands_size * sizeof(WordTree*));
+ WordTree *subexpr = 0;
+ while((subexpr = ParseExpr(terms))) {
+ operands_length++;
+ if((info & WORD_TREE_ONE) && operands_length > 1) {
+ fprintf(stderr, "WordParser::ParseOperands: expected only one operands\n");
+ exit(1);
+ } else if((info & WORD_TREE_TWO) && operands_length > 2) {
+ fprintf(stderr, "WordParser::ParseOperands: expected only two operands\n");
+ exit(1);
+ }
+ if(operands_length > operands_size) {
+ operands_size = operands_length * 2;
+ operands = (WordTree**)realloc(operands, operands_size * sizeof(WordTree*));
+ }
+ operands[operands_length - 1] = subexpr;
+ }
+ //
+ // Discard close parenthesis
+ //
+ if(strcmp(Term(terms), ")")) {
+ fprintf(stderr, "WordParser::ParseOperands: expected close parenthesis\n");
+ exit(1);
+ }
+ Shift(terms);
+
+ expr->cursors = operands;
+ expr->cursors_length = operands_length;
+}
+
+WordTree *WordParser::ParseLiteral(StringList& terms)
+{
+ char* term = strdup(Term(terms));
+ char* scope = 0;
+ if(!mystrcasecmp(term, "literal")) {
+ Shift(terms);
+ scope = ParseScope(terms);
+ free(term);
+ term = strdup(Term(terms));
+ Shift(terms);
+ } else {
+ scope = strdup("");
+ }
+ WordTreeLiteral *expr = new WordTreeLiteral(term, scope);
+ Shift(terms);
+ free(scope);
+ free(term);
+ return expr;
+}
+
+char *WordParser::ParseScope(StringList& terms)
+{
+ char *scope = Term(terms);
+ int scope_length = strlen(scope);
+
+ //
+ // Remove surrounding quotes, if any
+ //
+ if(scope_length > 0) {
+ if(scope[scope_length - 1] == '"')
+ scope[--scope_length] = '\0';
+ if(scope[0] == '"')
+ scope++;
+ }
+
+ scope = strdup(scope);
+
+ Shift(terms);
+
+ return scope;
+}
+
+char *WordParser::Term(StringList& terms)
+{
+ char *term = terms[0];
+ if(!term) {
+ fprintf(stderr, "WordParser::Term: unexpected end of expression\n");
+ exit(1);
+ }
+ return term;
+}
+
+void WordParser::Shift(StringList& terms)
+{
+ terms.Shift(LIST_REMOVE_DESTROY);
+}
+
+// ************************* main loop implementation ********************
+
+//
+// Store all options from the command line
+//
+class params_t
+{
+public:
+ char* dbfile;
+ char* find;
+ unsigned int bottom;
+ unsigned int count;
+ char* context;
+ int uniq_server;
+ int proximity;
+ int nop;
+ int exclude;
+};
+
+//
+// Explain options
+//
+static void usage();
+//
+// Torture WordExclude* classes
+//
+static void exclude_test();
+
+int main(int ac, char **av)
+{
+ int c;
+ extern char *optarg;
+ params_t params;
+
+ params.dbfile = strdup("test");
+ params.find = 0;
+ params.bottom = 0;
+ params.count = 10;
+ params.context = 0;
+ params.uniq_server = 0;
+ params.proximity = WORD_SEARCH_DEFAULT_PROXIMITY;
+ params.nop = 0;
+ params.exclude = 0;
+
+ while ((c = getopt(ac, av, "vB:f:b:c:C:SP:ne")) != -1)
+ {
+ switch (c)
+ {
+ case 'v':
+ verbose++;
+ break;
+ case 'B':
+ free(params.dbfile);
+ params.dbfile = strdup(optarg);
+ break;
+ case 'f':
+ params.find = strdup(optarg);
+ break;
+ case 'b':
+ params.bottom = (unsigned int)atoi(optarg);
+ break;
+ case 'c':
+ params.count = (unsigned int)atoi(optarg);
+ break;
+ case 'C':
+ params.context = strdup(optarg);
+ break;
+ case 'P':
+ params.proximity = atoi(optarg);
+ break;
+ case 'S':
+ params.uniq_server = SERVER;
+ break;
+ case 'n':
+ params.nop = 1;
+ break;
+ case 'e':
+ params.exclude = 1;
+ break;
+ case '?':
+ usage();
+ break;
+ }
+ }
+
+ if(params.exclude) {
+ exclude_test();
+ exit(0);
+ }
+
+ if(!params.find)
+ usage();
+
+ Configuration* config = WordContext::Initialize();
+ if(!config) {
+ fprintf(stderr, "search: no config file found\n");
+ exit(1);
+ }
+
+ //
+ // Forward command line verbosity to htword library.
+ //
+ if(verbose > 1) {
+ String tmp;
+ tmp << (verbose - 1);
+ config->Add("wordlist_verbose", tmp);
+ }
+
+ //
+ // Prepare the index (-B).
+ //
+ WordList words(*config);
+ words.Open(params.dbfile, O_RDONLY);
+
+ //
+ // Try the query parser alone
+ //
+ if(params.nop) {
+ WordTree* expr = WordParser().Parse(params.find);
+ printf("%s\n", (char*)expr->Get());
+ exit(0);
+ }
+
+ //
+ // Build a syntax tree from the expression provided by user
+ //
+ WordTree* expr = WordParser().Parse(params.find);
+
+ //
+ // Define the semantic of the key
+ //
+ {
+#define DOCUMENT_LENGTH 3
+ static int document[DOCUMENT_LENGTH] = {
+ TAG,
+ SERVER,
+ URL
+ };
+ int document_length = DOCUMENT_LENGTH;
+ int location = LOCATION;
+ if(expr->Prepare(&words, params.uniq_server, params.proximity, document, document_length, location) != OK)
+ exit(1);
+ }
+
+ WordSearch* search = new WordSearch();
+
+ //
+ // Forward query options to WordSearch object
+ //
+ search->limit_bottom = params.bottom; // -b
+ search->limit_count = params.count; // -c
+ if(params.context) // -C
+ search->context_in.set(params.context, strlen(params.context));
+
+ //
+ // Perform the search (-f)
+ //
+ search->expr = expr;
+ WordMatch* matches = search->Search();
+
+ //
+ // Display results, if any.
+ //
+ if(matches) {
+ int i;
+ for(i = 0; !matches[i].match.Empty(); i++)
+ printf("match: %s\n", (char*)matches[i].Get());
+ const String& context = search->Context();
+ if(!context.empty())
+ printf("context: %s\n", (const char*)context);
+ delete [] matches;
+ } else {
+ printf("match: none\n");
+ }
+
+ //
+ // Cleanup
+ //
+ delete search;
+ if(params.context) free(params.context);
+ if(params.find) free(params.find);
+ if(params.dbfile) free(params.dbfile);
+ delete expr;
+
+ words.Close();
+ delete config;
+}
+
+static void exclude_test()
+{
+ static unsigned int expected[] = {
+ 0x00000001,
+ 0x00000002,
+ 0x00000004,
+ 0x00000008,
+ 0x00000010,
+ 0x00000003,
+ 0x00000005,
+ 0x00000006,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000c,
+ 0x00000011,
+ 0x00000012,
+ 0x00000014,
+ 0x00000018,
+ 0x00000007,
+ 0x0000000b,
+ 0x0000000d,
+ 0x0000000e,
+ 0x00000013,
+ 0x00000015,
+ 0x00000016,
+ 0x00000019,
+ 0x0000001a,
+ 0x0000001c,
+ 0x0000000f,
+ 0x00000017,
+ 0x0000001b,
+ 0x0000001d,
+ 0x0000001e,
+ 0x0000001f
+ };
+
+ //
+ // WordExclude
+ //
+ if(verbose) fprintf(stderr, "exclude_test: testing WordExclude\n");
+ {
+ WordExclude exclude;
+ exclude.Initialize(5);
+ int count = 0;
+ while(exclude.Next() == WORD_EXCLUDE_OK) {
+ if(expected[count] != exclude.Mask()) {
+ fprintf(stderr, "exclude_test: WordExclude iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask());
+ exit(1);
+ }
+ count++;
+ }
+ if(count != sizeof(expected)/sizeof(unsigned int)) {
+ fprintf(stderr, "exclude_test: WordExclude expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count);
+ exit(1);
+ }
+ }
+
+ //
+ // WordExcludeMask without ignore bits behaves exactly the same
+ // as WordExclude.
+ //
+ if(verbose) fprintf(stderr, "exclude_test: testing WordExcludeMask behaving like WordExclude\n");
+ {
+ WordExcludeMask exclude;
+ exclude.Initialize(5, 0, 0);
+ int count = 0;
+ while(exclude.Next() == WORD_EXCLUDE_OK) {
+ if(expected[count] != exclude.Mask()) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 1 iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask());
+ exit(1);
+ }
+ count++;
+ }
+ if(count != sizeof(expected)/sizeof(unsigned int)) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 1 expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count);
+ exit(1);
+ }
+ }
+
+ //
+ // WordExcludeMask
+ //
+ if(verbose) fprintf(stderr, "exclude_test: testing WordExcludeMask\n");
+ {
+ static unsigned int expected[] = {
+ 0x00000102,
+ 0x00000108,
+ 0x00000120,
+ 0x00000180,
+ 0x0000010a,
+ 0x00000122,
+ 0x00000128,
+ 0x00000182,
+ 0x00000188,
+ 0x000001a0,
+ 0x0000012a,
+ 0x0000018a,
+ 0x000001a2,
+ 0x000001a8,
+ 0x000001aa
+ };
+ static unsigned int excluded[] = {
+ 1,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 0,
+ 1,
+ 0,
+ 0,
+ 1,
+ 1,
+ 1,
+ 0,
+ 1
+ };
+
+ WordExcludeMask exclude;
+ unsigned int ignore = 0x155;
+ unsigned int ignore_mask = 0x100;
+ exclude.Initialize(9, ignore, ignore_mask);
+ if(verbose) {
+ fprintf(stderr, "exclude_test: ignore\n");
+ show_bits(ignore);
+ fprintf(stderr, "exclude_test: ignore_mask\n");
+ show_bits(ignore_mask);
+ }
+ if(exclude.NotExcludedCount() != 8) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 2 expected NoExcludedCount = 8 but got %d\n", exclude.NotExcludedCount());
+ exit(1);
+ }
+ int count = 0;
+ while(exclude.Next() == WORD_EXCLUDE_OK) {
+ if(expected[count] != exclude.Mask()) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask());
+ exit(1);
+ }
+ //
+ // Test Excluded() method on ignored bit
+ // Is bit 5 set ? (9 - 4) = 5 (counting from 1)
+ //
+ if(exclude.Excluded(4)) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d bit 5 was set 0x%08x\n", count, exclude.Mask());
+ exit(1);
+ }
+ //
+ // Test Excluded() method on variable bit
+ // Is bit 2 set ? (9 - 2) = 7 (counting from 1)
+ //
+ if((exclude.Excluded(7) && !excluded[count]) ||
+ (!exclude.Excluded(7) && excluded[count])) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d expected bit 2 %s but was %s in 0x%08x\n", count, (excluded[count] ? "set" : "not set"), (excluded[count] ? "not set" : "set"), expected[count]);
+ exit(1);
+ }
+ count++;
+ }
+ if(count != sizeof(expected)/sizeof(unsigned int)) {
+ fprintf(stderr, "exclude_test: WordExcludeMask 2 expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count);
+ exit(1);
+ }
+ }
+
+ {
+ WordExclude exclude;
+ String ascii("110101");
+ String tmp;
+ exclude.Set(ascii);
+ exclude.Get(tmp);
+ if(tmp != ascii) {
+ fprintf(stderr, "exclude_test: WordExclude::Get/Set expected %s but got %s\n", (char*)ascii, (char*)tmp);
+ exit(1);
+ }
+ if(exclude.Mask() != 0x2b) {
+ fprintf(stderr, "exclude_test: WordExclude::Mask expected 0x2b but got 0x%02x\n", exclude.Mask());
+ exit(1);
+ }
+ }
+ {
+ WordExcludeMask exclude;
+ String ascii("12031");
+ String tmp;
+ exclude.Set(ascii);
+ exclude.Get(tmp);
+ if(tmp != ascii) {
+ fprintf(stderr, "exclude_test: WordExcludeMask::Get/Set expected %s but got %s\n", (char*)ascii, (char*)tmp);
+ exit(1);
+ }
+ if(exclude.Mask() != 0x19) {
+ fprintf(stderr, "exclude_test: WordExcludeMask::Mask expected 0x19 but got 0x%02x\n", exclude.Mask());
+ exit(1);
+ }
+ }
+}
+
+// *****************************************************************************
+// void usage()
+// Display program usage information
+//
+static void usage()
+{
+ printf("usage:\tsearch -f words [options]\n");
+ printf("\tsearch -e\n");
+ printf("Options:\n");
+ printf("\t-v\t\tIncreases the verbosity.\n");
+ printf("\t-B dbfile\tUse <dbfile> as a db file name (default test).\n");
+ printf("\t-f expr\t\tLisp like search expression.\n");
+ printf("\t\t\tSee WordParser comments in source for more information.\n");
+ printf("\t-b number\tSkip number documents before retrieving.\n");
+ printf("\t-c number\tRetrieve number documents at most.\n");
+ printf("\t-n\t\tOnly parse the search expression and print it.\n");
+ printf("\t-P proximity\tUse with near/optional, proximity tolerance is <proximity>\n");
+ printf("\t\t\tif negative order of terms is not meaningful\n");
+ printf("\t\t\t(default 1).\n");
+ printf("\t-C context\tResume search at <context>.\n");
+ printf("\t-S\t\tReturn at most one match per server.\n");
+ printf("\n");
+ printf("\t-e\t\tRun tests on WordExclude and WordExcludeMask.\n");
+ exit(1);
+}