summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc582
1 files changed, 582 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc
new file mode 100644
index 00000000..d0980e04
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc
@@ -0,0 +1,582 @@
+//
+// WordCursor.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursor.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "WordCursor.h"
+#include "WordStat.h"
+#include "WordList.h"
+
+#include <stdio.h>
+
+//
+// WordCursor implementation
+//
+
+// *****************************************************************************
+//
+int WordCursor::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
+{
+ action = naction;
+ searchKey = nsearchKey;
+ callback = ncallback;
+ callback_data = ncallback_data;
+ words = nwords;
+ return OK;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::Clear()
+{
+ searchKey.Clear();
+ action = 0;
+ callback = 0;
+ callback_data = 0;
+ ClearResult();
+ ClearInternal();
+ words = 0;
+
+ //
+ // Debugging section.
+ //
+ traceRes = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::ClearInternal()
+{
+ cursor.Close();
+ key.trunc();
+ data.trunc();
+ prefixKey.Clear();
+ cursor_get_flags = DB_SET_RANGE;
+ searchKeyIsSameAsPrefix = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursor::ClearResult()
+{
+ collectRes = 0;
+ found.Clear();
+ status = OK;
+}
+
+int
+WordCursor::ContextRestore(const String& buffer)
+{
+ int ret = OK;
+ if(!buffer.empty()) {
+ WordKey key(buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ //
+ // Move to restored position so that next call to
+ // WalkNext will go above the restored position.
+ //
+ if((ret = WalkNext()) != OK)
+ return ret;
+ }
+ return ret;
+}
+
+// *****************************************************************************
+//
+// Walk and collect data from the word database.
+//
+// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
+// stored in a list and the list is returned.
+// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
+// is called for each WordReference found. No list is built and the
+// function returns a null pointer.
+//
+// The <searchKey> argument may be a fully qualified key, containing precise values for each
+// field of the key. It may also contain only some fields of the key. In both cases
+// all the word occurrences matching the fields set in the key are retrieved. It may
+// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
+// be *slow* if key is not a prefix because it forces a complete walk of the
+// index.
+//
+int
+WordCursor::Walk()
+{
+ int ret;
+ if((ret = WalkInit()) != OK) return ret;
+ while((ret = WalkNext()) == OK)
+ ;
+ int ret1;
+ if((ret1 = WalkFinish()) != OK) return ret1;
+
+ return ret == WORD_WALK_ATEND ? OK : NOTOK;
+}
+
+int
+WordCursor::WalkInit()
+{
+ int ret = OK;
+
+ ClearResult();
+ ClearInternal();
+
+ WordReference wordRef;
+
+ if((ret = cursor.Open(words->db.db)) != 0)
+ return ret;
+
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
+
+ if(action & HTDIG_WORDLIST_COLLECTOR) {
+ collectRes = new List;
+ }
+
+ const WordReference& last = WordStat::Last();
+
+ WordKey first_key;
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ //
+ // Move past the stat data
+ //
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is empty\n");
+ first_key = last.Key();
+
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is not a prefix\n");
+ prefixKey.Clear();
+ //
+ // Move past the stat data
+ //
+ first_key = last.Key();
+ } else {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: go to %s \n", (char*)prefixKey.Get());
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Init
+ //
+ found.Key().CopyFrom(first_key);
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursor::WalkRewind()
+{
+ const WordReference& last = WordStat::Last();
+
+ WordKey first_key;
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ first_key = last.Key();
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ prefixKey.Clear();
+ //
+ // Move past the stat data
+ //
+ first_key = last.Key();
+ } else {
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Rewind
+ //
+ found.Key().CopyFrom(first_key);
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursor::WalkNext()
+{
+ int ret;
+ while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNext: got false match, retry\n");
+
+ return ret;
+}
+
+int
+WordCursor::WalkNextStep()
+{
+ status = OK;
+
+ {
+ int error;
+ if((error = cursor.Get(key, data, cursor_get_flags)) != 0) {
+ if(error == DB_NOTFOUND) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ } else {
+ return WORD_WALK_GET_FAILED;
+ }
+ }
+ }
+
+ //
+ // Next step operation is always sequential walk
+ //
+ cursor_get_flags = DB_NEXT;
+
+ found.Unpack(key, data);
+
+ if(traceRes) traceRes->Add(new WordReference(found));
+
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ //
+ // Don't bother to compare keys if we want to walk all the entries
+ //
+ if(!(searchKey.Empty())) {
+ // examples
+ // searchKey: aabc 1 ? ? ?
+ // prefixKey: aabc 1 ? ? ?
+
+ //
+ // Stop loop if we reach a record whose key does not
+ // match prefix key requirement, provided we have a valid
+ // prefix key.
+ // (ie. stop loop if we're past last possible match...)
+ //
+ if(!prefixKey.Empty() &&
+ !prefixKey.Equal(found.Key())) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ }
+
+ //
+ // Skip entries that do not exactly match the specified key.
+ //
+ if(!searchKeyIsSameAsPrefix &&
+ !searchKey.Equal(found.Key())) {
+ int ret;
+ switch((ret = SkipUselessSequentialWalking())) {
+ case OK:
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
+ return WORD_WALK_NOMATCH_FAILED;
+ break;
+ case WORD_WALK_ATEND:
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ break;
+ default:
+ fprintf(stderr, "WordCursor::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
+ return NOTOK;
+ break;
+ }
+ }
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ if(collectRes) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: collect\n");
+ collectRes->Add(new WordReference(found));
+ } else if(callback) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: calling callback\n");
+ int ret = (*callback)(words, cursor, &found, *(callback_data) );
+ //
+ // The callback function tells us that something went wrong, might
+ // as well stop walking.
+ //
+ if(ret != OK) {
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: callback returned NOTOK");
+ return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND);
+ }
+ }
+
+ return OK;
+}
+
+int
+WordCursor::WalkFinish()
+{
+ if(words->verbose) fprintf(stderr, "WordCursor::WalkFinish\n");
+
+ return cursor.Close() == 0 ? OK : NOTOK;
+}
+
+// *****************************************************************************
+//
+// Helper for SkipUselessSequentialWalking.
+// Undefine in foundKey all fields defined in searchKey
+// so that they are not considered by SetToFollowing.
+// It could become a method of WordKey but lacks generalisation and
+// from what I see it is a rather specific operation.
+//
+static inline void complement(WordKey& key, const WordKey& mask)
+{
+ int nfields = WordKey::NFields();
+ int i;
+ //
+ // Undefine in 'key' all fields defined in 'mask'
+ //
+ for(i = 0; i < nfields; i++) {
+ if(mask.IsDefined(i))
+ key.Undefined(i);
+ else
+ key.SetDefined(i);
+ }
+ //
+ // If searching for a prefix, we must allow the word in
+ // key to increment.
+ //
+ if(mask.IsDefinedWordSuffix()) {
+ key.UndefinedWordSuffix();
+ } else {
+ key.SetDefinedWordSuffix();
+ key.SetDefined(0);
+ }
+}
+
+// *****************************************************************************
+//
+// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of
+// sequential iterating (DB_NEXT).
+// If it is decided that jump is a better move :
+// cursor_set_flags = DB_SET_RANGE
+// key = calculated next possible key
+// Else
+// do nothing
+// Return values
+// OK: skipping successfull.
+// WORD_WALK_ATEND : no more possible match, reached the maximum
+// WORD_WALK_FAILED: general failure, occurs if called and no skipping
+// necessary.
+//
+// Sequential searching can waste time by searching all keys, for example:
+// If searching for Key: argh <DEF> <UNDEF> 10
+// Under normal circonstances we would do the following
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_NEXT
+// 3: argh 2 15 nomatch DB_NEXT
+// 4: argh 2 20 nomatch DB_NEXT
+// 5: argh 2 30 nomatch DB_NEXT
+// 6: argh 5 1 nomatch DB_NEXT
+// 7: argh 5 8 nomatch DB_NEXT
+// 8: argh 8 6 nomatch DB_NEXT
+//
+// But the optimal would be
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10
+// 3: argh 2 15
+// 4: argh 2 20
+// 5: argh 2 30
+// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10
+// 7: argh 5 8
+// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10
+//
+// That saves a lot of unecessary hit. The underlying logic is a bit
+// more complex but you have the idea.
+//
+int
+WordCursor::SkipUselessSequentialWalking()
+{
+ WordKey& foundKey = found.Key();
+
+ int nfields = WordKey::NFields();
+ int i;
+
+ //
+ // Find out how the searchKey and the foundKey differ.
+ //
+ int diff_field = 0;
+ int lower = 0;
+ if(!foundKey.Diff(searchKey, diff_field, lower)) {
+ //
+ // foundKey matches searchKey (no difference), don't
+ // skip, everything is fine. The caller of SkipUselessSequentialWalking
+ // is expected to avoid this case for efficiency.
+ //
+ return WORD_WALK_FAILED;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Undefine in foundKey all fields defined in searchKey
+ // so that they are not considered by SetToFollowing.
+ //
+ complement(foundKey, searchKey);
+
+ //
+ // If the key found is lower than the searched key when
+ // considering only the fields defined in the search key,
+ // we only need to enforce the key to get the match.
+ // Otherwise we need to increment the found key to jump
+ // properly.
+ //
+ if(lower) {
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
+ for(i = diff_field + 1; i < nfields; i++)
+ if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
+ } else {
+ if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: increment the key to jump forward\n");
+ //
+ // diff_field - 1 is not really necessary because diff_field is undefined
+ // in foundKey and would therefore be ignored by SetToFollowing. We write
+ // diff_field - 1 to clearly state that incrementing begins just before the
+ // field for which a difference was found.
+ //
+ int ret;
+ if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
+ return ret;
+ }
+
+ //
+ // Copy all fields defined in searchKey into foundKey. This will copy
+ // searchKey in foundKey because all these fields have been
+ // previously undefined in foundKey.
+ //
+ foundKey.Merge(searchKey);
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Instruct Next function to jump to the calculated key
+ //
+ if(foundKey.Pack(key) == NOTOK) {
+ return WORD_WALK_FAILED;
+ }
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+// Copy defined fields in patch into foundKey and
+// initialize internal state so that WalkNext jumps to
+// this key next time it's called.
+//
+// Technically this means : Override latest key found (found data member)
+// with patch fields values, starting from the first field set in
+// patch up to the last. Pack the result in the key field and set
+// cursor_get_flags to DB_SET_RANGE.
+//
+int
+WordCursor::Seek(const WordKey& patch)
+{
+ int nfields = WordKey::NFields();
+ WordKey pos = searchKey;
+
+ if(patch.Empty()) {
+ fprintf(stderr, "WordCursor::Seek: empty patch is useless\n");
+ return NOTOK;
+ }
+
+ int i;
+ //
+ // Leave the most significant fields untouched
+ //
+ for(i = WORD_FIRSTFIELD; i < nfields; i++)
+ if(patch.IsDefined(i))
+ break;
+ //
+ // From the first value set in the patch to the end
+ // override.
+ //
+ for(; i < nfields; i++) {
+ if(patch.IsDefined(i))
+ pos.Set(i, patch.Get(i));
+ else
+ pos.Set(i, 0);
+ }
+
+ if(!pos.Filled()) {
+ fprintf(stderr, "WordCursor::Seek: only make sense if the resulting key is fully defined\n");
+ return NOTOK;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursor::Seek: seek to %s\n", (char*)pos.Get());
+
+ //
+ // Next move will jump to the patched key
+ //
+ pos.Pack(key);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int WordCursor::Noccurrence(unsigned int& noccurrence) const
+{
+ if(!words) {
+ fprintf(stderr, "WordCursor::Noccurrence: words not set (call Prepare first)\n");
+ return NOTOK;
+ }
+ return words->Noccurrence(searchKey, noccurrence);
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int WordCursor::Get(String& bufferout) const
+{
+ String tmp;
+ bufferout.trunc();
+
+ searchKey.Get(tmp);
+ bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set");
+ found.Get(tmp);
+ bufferout << ", found = " << tmp << ", status = " << status;
+ prefixKey.Get(tmp);
+ bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
+
+ return OK;
+}