diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/word.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/test/word.cc | 1075 |
1 files changed, 1075 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/word.cc b/debian/htdig/htdig-3.2.0b6/test/word.cc new file mode 100644 index 00000000..f0f571b1 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.cc @@ -0,0 +1,1075 @@ +// +// word.cc +// +// word: Implement tests for the word database related classes. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: word.cc,v 1.19 2004/05/28 13:15:30 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif + +#include "WordKey.h" +#include "WordList.h" +#include "WordContext.h" +#include "Configuration.h" + +static ConfigDefaults config_defaults[] = { + { "word_db", "test", 0 }, + { 0 } +}; + +static Configuration* config = 0; + +typedef struct +{ + int key; + int list; + int skip; + int compress; + int env; +} params_t; + +static void usage(); +static void doword(params_t* params); +static void dolist(params_t* params); +static void dokey(params_t* params); +static void doskip(params_t* params); +static void doenv(params_t* params); +static void pack_show_wordreference(const WordReference& wordRef); +static void pack_show_key(const String& key); + +static int verbose = 0; + +// ***************************************************************************** +// int main(int ac, char **av) +// + +int main(int ac, char **av) +{ + int c; + params_t params; + + params.key = 0; + params.list = 0; + params.skip = 0; + params.env = 0; + params.compress = 0; + + while ((c = getopt(ac, av, "ve:klbszw:")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'k': + params.key = 1; + break; + case 'l': + params.list = 1; + break; + case 's': + params.skip = 1; + break; + case 'e': + params.env = atoi(optarg); + break; + case 'z': + params.compress = 1; + break; + case '?': + usage(); + break; + } + } + + doword(¶ms); + + return 0; +} + +// +// mifluz.conf structure +// +#define WORD_DOCID 1 +#define WORD_FLAGS 2 +#define WORD_LOCATION 3 + +static void doword(params_t* params) +{ + if(params->key) { + if(verbose) fprintf(stderr, "Test WordKey class\n"); + dokey(params); + } + + if(params->list || params->skip || params->env) { + config = WordContext::Initialize(config_defaults); + if(params->compress) { + config->Add("wordlist_compress", "true"); + } + if(verbose > 2) { + String tmp; + tmp << (verbose - 2); + config->Add("wordlist_verbose", tmp); + } + if(params->env) { + config->Add("wordlist_env_share", "true"); + config->Add("wordlist_env_dir", "."); + } + + WordContext::Initialize(*config); + } + + + if(params->list) { + if(verbose) fprintf(stderr, "Test WordList class\n"); + dolist(params); + } + + if(params->skip) { + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking method\n"); + doskip(params); + } + + if(params->env) { + if(verbose) fprintf(stderr, "Test WordList with shared env\n"); + doenv(params); + } +} + +static void dolist(params_t*) +{ + static char* word_list[] = { + "The", // DocID = 1 + "quick", // DocID = 2 + "brown", // DocID = 3 + "fox", // DocID = 4 + "jumps", // DocID = 5 + "over", // DocID = 6 + "the", // DocID = 7 + "lazy", // DocID = 8 + "dog", // DocID = 9 + 0 + }; + + // + // Most simple case. Insert a few words and + // search them, using exact match. + // + { + + // setup a new wordlist + WordList words(*config); + if(verbose)WordKeyInfo::Instance()->Show(); + words.Open((*config)["word_db"], O_RDWR); + + + // create entries from word_list + WordReference wordRef; + wordRef.Key().Set(WORD_FLAGS, 67); + unsigned int location = 0; + unsigned int anchor = 0; + unsigned int docid = 1; + if(verbose) fprintf(stderr, "Inserting\n"); + + for(char** p = word_list; *p; p++) { + if(verbose > 4) fprintf(stderr, "inserting word: %s\n", *p); + wordRef.Key().SetWord(*p); + wordRef.Key().Set(WORD_DOCID, docid); + wordRef.Key().Set(WORD_LOCATION, location); + wordRef.Record().info.data = anchor; + if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get()); + if(verbose > 2) pack_show_wordreference(wordRef); + words.Insert(wordRef); + location += strlen(*p); + anchor++; + docid++; + } + words.Close(); + + location = anchor = 0; + docid = 1; + + if(verbose) fprintf(stderr, "Searching\n"); + + // reopen wordlist + words.Open((*config)["word_db"], O_RDONLY); + // check if each word (from word_list) is there + for(char** p = word_list; *p; p++) + { + // recreate wordref from each word + wordRef.Key().SetWord(*p); + wordRef.Key().Set(WORD_LOCATION, location); + wordRef.Record().info.data = anchor; + wordRef.Key().Set(WORD_DOCID, docid); + + location += strlen(*p); + anchor++; + docid++; + + // + // Skip first word because we don't want to deal with upper/lower case at present. + // + if(p == word_list) continue; + + // check if wordref is in wordlist + if(verbose) fprintf(stderr, "searching for %s ... ", *p); + if(verbose > 2) pack_show_wordreference(wordRef); + if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get()); + // find matches in wordlist + List *result = words[wordRef]; + if(!result) { + fprintf(stderr, "dolist: words[wordRef] returned null pointer\n"); + exit(1); + } + result->Start_Get(); + int count = 0; + WordReference* found; + // loop through found matches + while((found = (WordReference*)result->Get_Next())) + { + if(wordRef.Key().GetWord() != found->Key().GetWord()) + { + fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord()); + exit(1); + } + count++; + } + if(count != 1) { + fprintf(stderr, "dolist: simple: searching %s, got %d matches instead of 1\n", (char*)wordRef.Key().GetWord(), count); + exit(1); + } + if(verbose) fprintf(stderr, "done\n"); + + delete result; + + } + } + // + // Print all records as sorted within Berkeley DB with number + // of occurrences. + // + if(verbose) { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + List *result = words.Words(); + if(result == 0) { + fprintf(stderr, "dolist: getting all words failed\n"); + exit(1); + } + result->Start_Get(); + int count = 0; + String* found; + while((found = (String*)result->Get_Next())) { + unsigned int noccurrence; + WordKey key; + key.SetWord(*found); + words.Noccurrence(key, noccurrence); + fprintf(stderr, "%s (%d)\n", (char*)(*found), noccurrence); + count++; + } + if(count != 8) { + fprintf(stderr, "dolist: getting all words, got %d matches instead of 8\n", count); + exit(1); + } + + delete result; + } + // + // Search all occurrences of 'the' + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef; + wordRef.Key().SetWord("the"); + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'the' failed\n"); + exit(1); + } else if(noccurrence != 2) { + fprintf(stderr, "dolist: get ref count of 'the' failed, got %d instead of 2\n", noccurrence); + exit(1); + } + List *result = words[wordRef]; + result->Start_Get(); + int count = 0; + WordReference* found; + while((found = (WordReference*)result->Get_Next())) { + if(wordRef.Key().GetWord() != found->Key().GetWord()) { + fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord()); + exit(1); + } + if(verbose) fprintf(stderr, "%s\n", (char*)found->Get()); + count++; + } + if(count != 2) { + fprintf(stderr, "dolist: searching occurrences of '%s', got %d matches instead of 2\n", (char*)wordRef.Key().GetWord(), count); + exit(1); + } + + delete result; + } + // + // Delete all occurrences of 'the' + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef("the"); + if(verbose) { + fprintf(stderr, "**** Delete test:\n"); + words.Write(stderr); + fprintf(stderr, "**** Delete test:\n"); + } + int count; + if((count = words.WalkDelete(wordRef)) != 2) { + fprintf(stderr, "dolist: delete occurrences of 'the', got %d deletion instead of 2\n", count); + exit(1); + } + + List* result = words[wordRef]; + if(result->Count() != 0) { + fprintf(stderr, "dolist: unexpectedly found 'the' \n"); + exit(1); + } + delete result; + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'thy' failed\n"); + exit(1); + } else if(noccurrence != 0) { + fprintf(stderr, "dolist: get ref count of 'thy' failed, got %d instead of 0\n", noccurrence); + exit(1); + } + } + // + // Delete all words in document 5 (only one word : jumps) + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef; + wordRef.Key().Set(WORD_DOCID, 5); + int count; + if((count = words.WalkDelete(wordRef)) != 1) { + fprintf(stderr, "dolist: delete occurrences in DocID 5, %d deletion instead of 1\n", count); + exit(1); + } + + wordRef.Clear(); + wordRef.Key().SetWord("jumps"); + List* result = words[wordRef]; + if(result->Count() != 0) { + fprintf(stderr, "dolist: unexpectedly found 'jumps' \n"); + exit(1); + } + delete result; + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'jumps' failed\n"); + exit(1); + } else if(noccurrence != 0) { + fprintf(stderr, "dolist: get ref count of 'jumps' failed, got %d instead of 0\n", noccurrence); + exit(1); + } + } +} + +#define WORD_BIT_MASK(b) ((b) == 32 ? 0xffffffff : (( 1 << (b)) - 1)) + +// +// See WordKey.h +// Tested: Pack, Unpack, Compare (both forms), accessors, meta information +// +static void +dokey(params_t* params) +{ + static char *key_descs[] = { + "Word/DocID 5/Flags 8/Location 19", + "Word/DocID 3/Location 2/Flags 11", + "Word/DocID 3/Flags 8/Location 5", + "Word/DocID 3/Flags 14/Location 7", + "Word/DocID 3/Flags 9/Location 7/Foo1 13/Foo2 16", + 0, + }; + char** key_desc; + + for(key_desc = key_descs; *key_desc; key_desc++) { + WordKeyInfo::InitializeFromString(*key_desc); + + if(verbose) + WordKeyInfo::Instance()->Show(); + + WordKey word; + word.SetWord("aword"); + int j; + for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) { + WordKeyNum value = (0xdededede & word.MaxValue(j)); + word.Set(j, value); + } + if(verbose > 1) fprintf(stderr, "WORD: %s\n", (char*)word.Get()); + + String packed; + word.Pack(packed); + + WordKey other_word; + other_word.Unpack(packed); + if(verbose > 1) fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get()); + + int failed = 0 ; + for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) { + if(word.Get(j) != other_word.Get(j)) { + failed = 1; + break; + } + } + if(word.GetWord() != other_word.GetWord() || + !word.IsDefined(0) || + !other_word.IsDefined(0)) + failed = 1; + + if(failed) { + fprintf(stderr, "Original and packed/unpacked not equal\n"); + WordKeyInfo::Instance()->Show(); + fprintf(stderr, "WORD: %s\n", (char*)word.Get()); + pack_show_key(packed); + fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get()); + exit(1); + } + + // + // Compare in packed form + // + if(!word.PackEqual(other_word)) + { + fprintf(stderr, "dokey: %s not equal (object compare)\n", *key_desc); + exit(1); + } + + // + // Pack the other_word + // + String other_packed; + + other_word.Pack(other_packed); + // + // The two (word and other_word) must compare equal + // using the alternate comparison (fast) interface. + // + if(WordKey::Compare(packed, other_packed) != 0) { + fprintf(stderr, "dokey: %s not equal (fast compare)\n", *key_desc); + exit(1); + } + + word.SetWord("Test string"); + word.Set(WORD_DOCID,1); + other_word.SetWord("Test string"); + word.Pack(packed); + // + // Add one char to the word, they must not compare equal and + // the difference must be minus one. + // + other_word.GetWord().append("a"); + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != -1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different length, expected -1 got %d\n", *key_desc, ret); + exit(1); + } + } + other_word.SetWord("Test string"); + + // + // Change T to S + // the difference must be one. + // + { + String& tmp = other_word.GetWord(); + tmp[tmp.indexOf('T')] = 'S'; + } + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != 1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different letter (S instead of T), expected 1 got %d\n", *key_desc, ret); + exit(1); + } + } + other_word.SetWord("Test string"); + + // + // Substract one to the first numeric field + // The difference must be one. + // + other_word.Set(WORD_DOCID,word.Get(WORD_DOCID) - 1); + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != 1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different numeric field, expected 1 got %d\n", *key_desc, ret); + exit(1); + } + } + } + // + // WordKey::Diff function + // + { + WordKey word("Test1 <DEF> 1 2 3 4 5"); + WordKey other_word("Sest1 <DEF> 1 2 3 4 5"); + // + // Diff must say that field 0 differ and that word is lower than other_word + // + { + int position = 0; + int lower = 0; + if(!word.Diff(other_word, position, lower)) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff failed\n"); + exit(1); + } + if(position != 0 || lower != 1) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff expected position = 0 and lower = 1 but got position = %d and lower = %d\n", position, lower); + exit(1); + } + } + // + // Only compare prefix + // + other_word.SetWord("Test"); + other_word.UndefinedWordSuffix(); + other_word.Set(WORD_DOCID, 5); + { + int position = 0; + int lower = 0; + if(!word.Diff(other_word, position, lower)) { + fprintf(stderr, "dokey: diff failed\n"); + exit(1); + } + if(position != 1 || lower != 1) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff expected position = 1 and lower = 1 but got position = %d and lower = %d\n", position, lower); + exit(1); + } + } + // + // Same key have no diff + // + { + int position = 0; + int lower = 0; + if(word.Diff(word, position, lower)) { + fprintf(stderr, "dokey: diff found when comparing %s with itself\n", (char*)word.Get()); + exit(1); + } + } + } +} + +static void pack_show_key(const String& key) +{ + int i; + char c; + + for(i=0; i < key.length(); i++) { + c = (isprint(key[i]) ? key[i] : '#'); + fprintf(stderr, "%c-%2x ", c, key[i]); + } + fprintf(stderr, "\n"); + + for(i = 0; i < key.length(); i++) { + int j; + for(j = 0; j < 8; j++) + fprintf(stderr, "%c", (key[j] & (1<<(j))) ? '1' : '0'); + } + + fprintf(stderr, "\n"); + fprintf(stderr, "^0 ^1 ^2 ^3 ^4 ^5 ^6 ^7\n"); + fprintf(stderr, "0123456701234567012345670123456701234567012345670123456701234567\n"); +} + +static void pack_show_wordreference(const WordReference& wordRef) +{ + String key; + String record; + + wordRef.Pack(key, record); + + fprintf(stderr, "key = "); + for(int i = 0; i < key.length(); i++) { + fprintf(stderr, "0x%02x(%c) ", key[i] & 0xff, key[i]); + } + fprintf(stderr, " record = "); + for(int i = 0; i < record.length(); i++) { + fprintf(stderr, "0x%02x(%c) ", record[i] & 0xff, record[i]); + } + fprintf(stderr, "\n"); +} + + + +//***************************************************************************** +// void doskip() +// Test SkipUselessSequentialWalking in WordList class +// +static void doskip_normal(params_t*); +static void doskip_harness(params_t*); +static void doskip_overflow(params_t*); +static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string); + +static void doskip(params_t* params) +{ + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking normal\n"); + doskip_normal(params); + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking harness\n"); + doskip_harness(params); + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking overflow\n"); + doskip_overflow(params); +} + +static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string) +{ + const WordKey& found = search.GetFound().Key(); + ((WordKey&)found).Set(found_string); + if(search.SkipUselessSequentialWalking() == NOTOK) { + fprintf(stderr, "doskip_try: SkipUselessSequentialWalking NOTOK searching %s at step %s expecting %s\n", (char*)search.GetSearch().Get(), (char*)found.Get(), (char*)expected_string); + exit(1); + } + + WordKey expected(expected_string); + if(!found.ExactEqual(expected)) { + fprintf(stderr, "doskip_try: expected %s but got %s\n", (char*)expected.Get(), (char*)found.Get()); + exit(1); + } +} + +// +// Create artificial WordCursor context +// in which SkipUselessSequentialWalking calls SetToFollowing +// that triggers overflow condition. +// +static void doskip_overflow(params_t*) +{ +#define WORD_FIELD1 1 +#define WORD_FIELD2 2 +#define WORD_FIELD3 3 + + static ConfigDefaults config_defaults[] = { + { "wordlist_wordkey_description", "Word/FIELD1 32/FIELD2 8/FIELD3 16", 0 }, + { 0 } + }; + Configuration config; + config.Defaults(config_defaults); + if(verbose > 2) config.Add("wordlist_verbose", "3"); + WordContext::Initialize(config); + { + WordList* words = new WordList(config); + + // + // Looking for zebra at location 3 + // + WordKey key("zebra <UNDEF> <UNDEF> <UNDEF> 3"); + WordCursor *search = words->Cursor(key); + + { + // + // Pretend we found zebra <DEF> 3 <MAX> 7 + // That is a valid candidate for SkipUselessSequentialWalking + // + String found; + found << "zebra <DEF> 3 " << WordKey::MaxValue(WORD_FIELD2) << " 7"; + + // + // Overflow on FIELD2 must trigger ++ on FIELD1 + // + String expected("zebra <DEF> 4 0 3"); + doskip_try(*words, *search, found, expected); + } + + { + // + // Prented we found zebra <DEF> <MAX> <MAX> 7 + // That is a valid candidate for SkipUselessSequentialWalking + // + String found; + found << "zebra <DEF> " << WordKey::MaxValue(WORD_FIELD1) << " " << WordKey::MaxValue(WORD_FIELD2) << " 7"; + + // + // Overflow on FIELD2 must trigger append \001 on word Word + // + String expected("zebra\001 <DEF> 0 0 3"); + doskip_try(*words, *search, found, expected); + + // + // Cannot increment, SkipUselessSequentialWalking returns NOTOK + // + ((WordKey&)search->GetSearch()).SetDefinedWordSuffix(); + ((WordReference&)search->GetFound()).Key().Set(found); + if(search->SkipUselessSequentialWalking() != WORD_WALK_ATEND) { + fprintf(stderr, "doskip_overflow: SkipUselessSequentialWalking expected NOTOK & WORD_WALK_ATEND searching %s\n", (char*)key.Get()); + exit(1); + } + + } + + delete search; + words->Close(); + delete words; + } + + // + // Restore default configuration + // + WordContext::Initialize(*::config); + +#undef WORD_FIELD1 +#undef WORD_FIELD2 +#undef WORD_FIELD3 +} + +// +// Create artificial WordCursor contexts +// that covers all possible behaviour of SkipUselessSequentialWalking. +// +static void doskip_harness(params_t*) +{ +#define WORD_FIELD1 1 +#define WORD_FIELD2 2 +#define WORD_FIELD3 3 +#define WORD_FIELD4 4 +#define WORD_FIELD5 5 + + static ConfigDefaults config_defaults[] = { + { "wordlist_wordkey_description", "Word/FIELD1 8/FIELD2 8/FIELD3 8/FIELD4 8/FIELD5 8", 0 }, + { 0 } + }; + Configuration config; + config.Defaults(config_defaults); + if(verbose > 2) config.Add("wordlist_verbose", "3"); + WordContext::Initialize(config); + { + WordList* words = new WordList(config); + + // + // Searching + // + // z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF> + // + // in data set + // + // DATA SEE STATUS OPERATION + // zebra <DEF> 1 5 1 4 3 found next + // zebra <DEF> 1 6 1 4 3 a nomatch skip to zebra <DEF> 2 5 0 4 0 + // zebra <DEF> 1 6 2 4 3 ignore + // zebra <DEF> 2 3 1 4 3 ignore + // zebra <DEF> <MAX> 6 1 4 3 b nomatch skip to zebra\001 <DEF> 0 5 0 4 0 + // zebra <DEF> <MAX> 7 1 4 3 ignore + // zebra <DEF> <MAX> 8 1 4 3 ignore + // zebra <DEF> <MAX> 9 1 4 3 ignore + // zippo <DEF> 0 3 1 4 3 ignore + // zippo <DEF> 8 5 1 1 3 c nomatch skip to zippo <DEF> 8 5 1 4 0 + // zippo <DEF> 8 5 1 2 3 ignore + // zippo <DEF> 8 5 1 2 5 ignore + // zippo <DEF> 8 5 1 2 9 ignore + // zippo <DEF> 8 5 1 3 9 ignore + // zorro <DEF> 3 5 <MAX> 6 3 d nomatch skip to zorro <DEF> 4 5 0 4 0 + // zorro <DEF> 3 5 <MAX> 6 5 ignore + // zorro <DEF> 3 5 <MAX> 8 5 ignore + // zorro <DEF> 4 5 2 4 3 found + // + // legend: status is what WalkNextStep function says about the key + // nomatch means searchKey.Equal(found.Key()) is false + // found means searchKey.Equal(found.Key()) is true + // ignore means we jump over it + // operation is the next operation decided by WalkNextStep + // always skip if SkipUselessSequentialWalking is called. + // In general SkipUselessSequentialWalking is not always + // called on nomatch. But it is always called if the + // search key is not a prefix key, which is our case. + // see is a reference to the list bellow + // + // a) Needless to search for keys in which the FIELD1 is equal to 1 since + // the FIELD2 is greater than the searched value. Any key with the FIELD1 + // set to 1 that follow this one will have a FIELD2 greater than the searched + // value (5) since the keys are sorted in ascending order. + // The next possible key is the one that has FIELD1++. + // + // b) Same logic as before but, the FIELD1 has already reached its maximum value + // and can't be incremented. zebra will therefore be incremented by appending + // a \001 to it. This is only possible since we search for words beginning + // with z (z <UNDEF>). We would not do that if searching for (zebra <DEF>). + // + // c) The found key does not match the constraint (FIELD4 is lower than the searched + // value). We only need to set FIELD4 to the searched value to jump to the + // match. No incrementation in this case. + // + // d) The FIELD4 is greater than the searched value, making this a lot similar + // to the b) case since the FIELD3 value is <MAX>. However FIELD2 matches + // the search key, it is therefore useless to increment it. We must ignore + // it and increment FIELD1. + // + // Looking for zebra with flags 5 + // + WordKey key("z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>"); + WordCursor *search = words->Cursor(key); + +#define WORD_NTEST 4 + + static char* found_strings[WORD_NTEST]; + static char* expected_strings[WORD_NTEST]; + + int i = 0; + char tmp[1024]; + + // + // See a) in comment above + // + found_strings[i] = strdup("zebra <DEF> 1 6 1 4 3"); + expected_strings[i] = strdup("zebra <DEF> 2 5 0 4 0"); + i++; + + // + // See b) in comment above + // + sprintf(tmp, "zebra <DEF> %d 6 1 4 3", WordKey::MaxValue(WORD_FIELD1)); + found_strings[i] = strdup(tmp); + expected_strings[i] = strdup("zebra\001 <DEF> 0 5 0 4 0"); + i++; + + // + // See c) in comment above + // + found_strings[i] = strdup("zippo <DEF> 8 5 1 1 3"); + expected_strings[i] = strdup("zippo <DEF> 8 5 1 4 0"); + i++; + + // + // See d) in comment above + // + sprintf(tmp, "zorro <DEF> 3 5 %d 6 3", WordKey::MaxValue(WORD_FIELD3)); + found_strings[i] = strdup(tmp); + expected_strings[i] = strdup("zorro <DEF> 4 5 0 4 0"); + i++; + + for(i = 0; i < WORD_NTEST; i++) { + doskip_try(*words, *search, found_strings[i], expected_strings[i]); + free(found_strings[i]); + free(expected_strings[i]); + } + + delete search; + words->Close(); + delete words; + } + + // + // Restore default configuration + // + WordContext::Initialize(*::config); + +#undef WORD_FIELD1 +#undef WORD_FIELD2 +#undef WORD_FIELD3 +#undef WORD_FIELD4 +#undef WORD_FIELD5 +} + +int +get_int_array(char *s,int **plist,int &n) +{ + int i=0; + for(n=0;s[i];n++) + { + for(;s[i] && !isalnum(s[i]);i++); + if(!s[i]){break;} + for(;s[i] && isalnum(s[i]);i++); + } + if(!n){*plist=NULL;return(NOTOK);} + int *list=new int[n]; + *plist=list; + int j; + i=0; + for(j=0;s[i];j++) + { + for(;s[i] && !isalnum(s[i]);i++); + if(!s[i]){break;} + list[j]=atoi(s+i); + for(;s[i] && isalnum(s[i]);i++); + } + return(OK); +} +class SkipTestEntry +{ +public: + char *searchkey; + char *goodorder; + void GetSearchKey(WordKey &searchKey) + { + searchKey.Set((String)searchkey); + if(verbose) fprintf(stderr, "GetSearchKey: string: %s got: %s\n", (char*)searchkey, (char*)searchKey.Get()); + } + int Check(WordList &WList) + { + WordKey empty; + WordReference srchwrd; + GetSearchKey(srchwrd.Key()); + Object o; + if(verbose) fprintf(stderr, "checking SkipUselessSequentialWalking on: %s\n", (char*)srchwrd.Get()); + if(verbose) fprintf(stderr, "walking all:\n"); + List *all = WList.WordRefs(); + if(verbose) fprintf(stderr, "walking search: searching for: %s\n", (char*)srchwrd.Get()); + + WordCursor *search = WList.Cursor(srchwrd.Key(), HTDIG_WORDLIST_COLLECTOR); + search->SetTraces(new List); + search->Walk(); + List *wresw = search->GetResults(); + List *wres = search->GetTraces(); + wresw->Start_Get(); + wres->Start_Get(); + WordReference *found; + WordReference *correct; + int i; + int ngoodorder; + int *goodorder_a; + get_int_array(goodorder,&goodorder_a,ngoodorder); + for(i=0;(found = (WordReference*)wres->Get_Next());i++) + { + if(i>=ngoodorder) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! i>=ngoodorder\n"); + exit(1); + } + if(verbose) fprintf(stderr, "Check actual %d'th walked: %s\n", i, (char*)found->Get()); + correct = (WordReference*)all->Nth(goodorder_a[i]); + if(verbose) fprintf(stderr, "Check correct %d : %s\n", goodorder_a[i], (char*)correct->Get()); + if(!correct->Key().Equal(found->Key())) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! at position: %d\n", i); + exit(1); + } + } + if(i<ngoodorder) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! n<ngoodorder\n"); + exit(1); + } + + delete [] goodorder_a; + delete wresw; + delete wres; + delete all; + delete search; + return OK; + } +}; + +SkipTestEntry SkipTestEntries[]= +{ + { + "et <DEF> <UNDEF> 0 10 ", + "3 4 5 9 10 12 13 14" + }, + { + "et <UNDEF> 20 0 <UNDEF> ", + "3 4 5 6 7 8 9 14 17", + }, +}; + +static void doskip_normal(params_t*) +{ + if(verbose > 0) fprintf(stderr, "doing SkipUselessSequentialWalking test\n"); + // read db into WList from file: skiptest_db.txt + if(verbose) fprintf(stderr, "WList config:minimum_word_length: %d\n", config->Value("minimum_word_length")); + WordList WList(*config); + WList.Open((*config)["word_db"], O_RDWR); + // now check walk order for a few search terms + int i; + if(verbose) fprintf(stderr, "number of entries: %d\n", (int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry))); + for(i=0;i<(int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry));i++) { + if(SkipTestEntries[i].Check(WList) == NOTOK) { + fprintf(stderr, "SkipUselessSequentialWalking test failed on SkipTestEntry number: %d\n", i); + exit(1); + } + } + WList.Close(); +} + +static void doenv(params_t* params) +{ + WordReference wordRef; + WordKey& key = wordRef.Key(); + key.Set("the <def> 1 2 3"); + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + int i; + for(i = params->env; i < 10000; i += 2) { + key.Set(WORD_DOCID, i); + if(words.Insert(wordRef) != OK) { + fprintf(stderr, "doenv: cannot insert %d\n", i); + exit(1); + } + } + for(i = params->env; i < 10000; i += 2) { + key.Set(WORD_DOCID, i); + if(words.Exists(wordRef) != OK) { + fprintf(stderr, "doenv: cannot find %d\n", i); + exit(1); + } + } + words.Close(); +} + +//***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + printf("usage: word [options]\n"); + printf("Options:\n"); + printf("\t-v\t\tIncreases the verbosity\n"); + printf("\t-k\t\tTest WordKey\n"); + printf("\t-l\t\tTest WordList\n"); + printf("\t-e n\t\tTest WordList with shared environnement, process number <n>\n"); + printf("\t-s\t\tTest WordList::SkipUselessSequentialWalking\n"); + printf("\t-z\t\tActivate compression test (use with -s, -b or -l)\n"); + exit(0); +} |