diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc new file mode 100644 index 00000000..d0980e04 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.cc @@ -0,0 +1,582 @@ +// +// WordCursor.cc +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordCursor.cc,v 1.4 2004/05/28 13:15:26 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#include "WordCursor.h" +#include "WordStat.h" +#include "WordList.h" + +#include <stdio.h> + +// +// WordCursor implementation +// + +// ***************************************************************************** +// +int WordCursor::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction) +{ + action = naction; + searchKey = nsearchKey; + callback = ncallback; + callback_data = ncallback_data; + words = nwords; + return OK; +} + +// ***************************************************************************** +// +void +WordCursor::Clear() +{ + searchKey.Clear(); + action = 0; + callback = 0; + callback_data = 0; + ClearResult(); + ClearInternal(); + words = 0; + + // + // Debugging section. + // + traceRes = 0; +} + +// ***************************************************************************** +// +void +WordCursor::ClearInternal() +{ + cursor.Close(); + key.trunc(); + data.trunc(); + prefixKey.Clear(); + cursor_get_flags = DB_SET_RANGE; + searchKeyIsSameAsPrefix = 0; +} + +// ***************************************************************************** +// +void +WordCursor::ClearResult() +{ + collectRes = 0; + found.Clear(); + status = OK; +} + +int +WordCursor::ContextRestore(const String& buffer) +{ + int ret = OK; + if(!buffer.empty()) { + WordKey key(buffer); + if((ret = Seek(key)) != OK) + return ret; + // + // Move to restored position so that next call to + // WalkNext will go above the restored position. + // + if((ret = WalkNext()) != OK) + return ret; + } + return ret; +} + +// ***************************************************************************** +// +// Walk and collect data from the word database. +// +// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are +// stored in a list and the list is returned. +// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function +// is called for each WordReference found. No list is built and the +// function returns a null pointer. +// +// The <searchKey> argument may be a fully qualified key, containing precise values for each +// field of the key. It may also contain only some fields of the key. In both cases +// all the word occurrences matching the fields set in the key are retrieved. It may +// be fast if key is a prefix (see WordKey::Prefix for a definition). It may +// be *slow* if key is not a prefix because it forces a complete walk of the +// index. +// +int +WordCursor::Walk() +{ + int ret; + if((ret = WalkInit()) != OK) return ret; + while((ret = WalkNext()) == OK) + ; + int ret1; + if((ret1 = WalkFinish()) != OK) return ret1; + + return ret == WORD_WALK_ATEND ? OK : NOTOK; +} + +int +WordCursor::WalkInit() +{ + int ret = OK; + + ClearResult(); + ClearInternal(); + + WordReference wordRef; + + if((ret = cursor.Open(words->db.db)) != 0) + return ret; + + if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get()); + + if(action & HTDIG_WORDLIST_COLLECTOR) { + collectRes = new List; + } + + const WordReference& last = WordStat::Last(); + + WordKey first_key; + // + // Move the cursor to start walking and do some sanity checks. + // + if(searchKey.Empty()) { + // + // Move past the stat data + // + if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is empty\n"); + first_key = last.Key(); + + } else { + prefixKey = searchKey; + // + // If the key is a prefix, the start key is + // the longest possible prefix contained in the key. If the + // key does not contain any prefix, start from the beginning + // of the file. + // + if(prefixKey.PrefixOnly() == NOTOK) { + if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: at start of keys because search key is not a prefix\n"); + prefixKey.Clear(); + // + // Move past the stat data + // + first_key = last.Key(); + } else { + if(words->verbose) fprintf(stderr, "WordCursor::WalkInit: go to %s \n", (char*)prefixKey.Get()); + first_key = prefixKey; + } + } + + first_key.Pack(key); + // + // Allow Seek immediately after Init + // + found.Key().CopyFrom(first_key); + + status = OK; + searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +int +WordCursor::WalkRewind() +{ + const WordReference& last = WordStat::Last(); + + WordKey first_key; + // + // Move the cursor to start walking and do some sanity checks. + // + if(searchKey.Empty()) { + first_key = last.Key(); + } else { + prefixKey = searchKey; + // + // If the key is a prefix, the start key is + // the longest possible prefix contained in the key. If the + // key does not contain any prefix, start from the beginning + // of the file. + // + if(prefixKey.PrefixOnly() == NOTOK) { + prefixKey.Clear(); + // + // Move past the stat data + // + first_key = last.Key(); + } else { + first_key = prefixKey; + } + } + + first_key.Pack(key); + // + // Allow Seek immediately after Rewind + // + found.Key().CopyFrom(first_key); + + status = OK; + searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +int +WordCursor::WalkNext() +{ + int ret; + while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED) + if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNext: got false match, retry\n"); + + return ret; +} + +int +WordCursor::WalkNextStep() +{ + status = OK; + + { + int error; + if((error = cursor.Get(key, data, cursor_get_flags)) != 0) { + if(error == DB_NOTFOUND) { + if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + } else { + return WORD_WALK_GET_FAILED; + } + } + } + + // + // Next step operation is always sequential walk + // + cursor_get_flags = DB_NEXT; + + found.Unpack(key, data); + + if(traceRes) traceRes->Add(new WordReference(found)); + + if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get()); + + // + // Don't bother to compare keys if we want to walk all the entries + // + if(!(searchKey.Empty())) { + // examples + // searchKey: aabc 1 ? ? ? + // prefixKey: aabc 1 ? ? ? + + // + // Stop loop if we reach a record whose key does not + // match prefix key requirement, provided we have a valid + // prefix key. + // (ie. stop loop if we're past last possible match...) + // + if(!prefixKey.Empty() && + !prefixKey.Equal(found.Key())) { + if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + } + + // + // Skip entries that do not exactly match the specified key. + // + if(!searchKeyIsSameAsPrefix && + !searchKey.Equal(found.Key())) { + int ret; + switch((ret = SkipUselessSequentialWalking())) { + case OK: + if(words->verbose > 1) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get()); + return WORD_WALK_NOMATCH_FAILED; + break; + case WORD_WALK_ATEND: + if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + break; + default: + fprintf(stderr, "WordCursor::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret); + return NOTOK; + break; + } + } + } + + if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get()); + + if(collectRes) { + if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: collect\n"); + collectRes->Add(new WordReference(found)); + } else if(callback) { + if(words->verbose > 2) fprintf(stderr, "WordCursor::WalkNextStep: calling callback\n"); + int ret = (*callback)(words, cursor, &found, *(callback_data) ); + // + // The callback function tells us that something went wrong, might + // as well stop walking. + // + if(ret != OK) { + if(words->verbose) fprintf(stderr, "WordCursor::WalkNextStep: callback returned NOTOK"); + return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND); + } + } + + return OK; +} + +int +WordCursor::WalkFinish() +{ + if(words->verbose) fprintf(stderr, "WordCursor::WalkFinish\n"); + + return cursor.Close() == 0 ? OK : NOTOK; +} + +// ***************************************************************************** +// +// Helper for SkipUselessSequentialWalking. +// Undefine in foundKey all fields defined in searchKey +// so that they are not considered by SetToFollowing. +// It could become a method of WordKey but lacks generalisation and +// from what I see it is a rather specific operation. +// +static inline void complement(WordKey& key, const WordKey& mask) +{ + int nfields = WordKey::NFields(); + int i; + // + // Undefine in 'key' all fields defined in 'mask' + // + for(i = 0; i < nfields; i++) { + if(mask.IsDefined(i)) + key.Undefined(i); + else + key.SetDefined(i); + } + // + // If searching for a prefix, we must allow the word in + // key to increment. + // + if(mask.IsDefinedWordSuffix()) { + key.UndefinedWordSuffix(); + } else { + key.SetDefinedWordSuffix(); + key.SetDefined(0); + } +} + +// ***************************************************************************** +// +// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of +// sequential iterating (DB_NEXT). +// If it is decided that jump is a better move : +// cursor_set_flags = DB_SET_RANGE +// key = calculated next possible key +// Else +// do nothing +// Return values +// OK: skipping successfull. +// WORD_WALK_ATEND : no more possible match, reached the maximum +// WORD_WALK_FAILED: general failure, occurs if called and no skipping +// necessary. +// +// Sequential searching can waste time by searching all keys, for example: +// If searching for Key: argh <DEF> <UNDEF> 10 +// Under normal circonstances we would do the following +// +// DATA STATUS ACTION +// 1: argh 1 10 match DB_NEXT +// 2: argh 2 11 nomatch DB_NEXT +// 3: argh 2 15 nomatch DB_NEXT +// 4: argh 2 20 nomatch DB_NEXT +// 5: argh 2 30 nomatch DB_NEXT +// 6: argh 5 1 nomatch DB_NEXT +// 7: argh 5 8 nomatch DB_NEXT +// 8: argh 8 6 nomatch DB_NEXT +// +// But the optimal would be +// +// DATA STATUS ACTION +// 1: argh 1 10 match DB_NEXT +// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10 +// 3: argh 2 15 +// 4: argh 2 20 +// 5: argh 2 30 +// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10 +// 7: argh 5 8 +// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10 +// +// That saves a lot of unecessary hit. The underlying logic is a bit +// more complex but you have the idea. +// +int +WordCursor::SkipUselessSequentialWalking() +{ + WordKey& foundKey = found.Key(); + + int nfields = WordKey::NFields(); + int i; + + // + // Find out how the searchKey and the foundKey differ. + // + int diff_field = 0; + int lower = 0; + if(!foundKey.Diff(searchKey, diff_field, lower)) { + // + // foundKey matches searchKey (no difference), don't + // skip, everything is fine. The caller of SkipUselessSequentialWalking + // is expected to avoid this case for efficiency. + // + return WORD_WALK_FAILED; + } + + if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get()); + + // + // Undefine in foundKey all fields defined in searchKey + // so that they are not considered by SetToFollowing. + // + complement(foundKey, searchKey); + + // + // If the key found is lower than the searched key when + // considering only the fields defined in the search key, + // we only need to enforce the key to get the match. + // Otherwise we need to increment the found key to jump + // properly. + // + if(lower) { + if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n"); + for(i = diff_field + 1; i < nfields; i++) + if(foundKey.IsDefined(i)) foundKey.Set(i, 0); + } else { + if(words->verbose > 1) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: increment the key to jump forward\n"); + // + // diff_field - 1 is not really necessary because diff_field is undefined + // in foundKey and would therefore be ignored by SetToFollowing. We write + // diff_field - 1 to clearly state that incrementing begins just before the + // field for which a difference was found. + // + int ret; + if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK) + return ret; + } + + // + // Copy all fields defined in searchKey into foundKey. This will copy + // searchKey in foundKey because all these fields have been + // previously undefined in foundKey. + // + foundKey.Merge(searchKey); + + if(words->verbose > 2) fprintf(stderr, "WordCursor::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get()); + + // + // Instruct Next function to jump to the calculated key + // + if(foundKey.Pack(key) == NOTOK) { + return WORD_WALK_FAILED; + } + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +// ***************************************************************************** +// +// Copy defined fields in patch into foundKey and +// initialize internal state so that WalkNext jumps to +// this key next time it's called. +// +// Technically this means : Override latest key found (found data member) +// with patch fields values, starting from the first field set in +// patch up to the last. Pack the result in the key field and set +// cursor_get_flags to DB_SET_RANGE. +// +int +WordCursor::Seek(const WordKey& patch) +{ + int nfields = WordKey::NFields(); + WordKey pos = searchKey; + + if(patch.Empty()) { + fprintf(stderr, "WordCursor::Seek: empty patch is useless\n"); + return NOTOK; + } + + int i; + // + // Leave the most significant fields untouched + // + for(i = WORD_FIRSTFIELD; i < nfields; i++) + if(patch.IsDefined(i)) + break; + // + // From the first value set in the patch to the end + // override. + // + for(; i < nfields; i++) { + if(patch.IsDefined(i)) + pos.Set(i, patch.Get(i)); + else + pos.Set(i, 0); + } + + if(!pos.Filled()) { + fprintf(stderr, "WordCursor::Seek: only make sense if the resulting key is fully defined\n"); + return NOTOK; + } + + if(words->verbose > 2) fprintf(stderr, "WordCursor::Seek: seek to %s\n", (char*)pos.Get()); + + // + // Next move will jump to the patched key + // + pos.Pack(key); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +int WordCursor::Noccurrence(unsigned int& noccurrence) const +{ + if(!words) { + fprintf(stderr, "WordCursor::Noccurrence: words not set (call Prepare first)\n"); + return NOTOK; + } + return words->Noccurrence(searchKey, noccurrence); +} + +// +// Convert the whole structure to an ascii string description +// +int WordCursor::Get(String& bufferout) const +{ + String tmp; + bufferout.trunc(); + + searchKey.Get(tmp); + bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set"); + found.Get(tmp); + bufferout << ", found = " << tmp << ", status = " << status; + prefixKey.Get(tmp); + bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags; + + return OK; +} |