diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc | 957 |
1 files changed, 957 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc new file mode 100644 index 00000000..8c410784 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc @@ -0,0 +1,957 @@ +// +// htsearch.cc +// +// htsearch: The main search CGI. Parses the CGI input, reads the config files +// and calls the necessary code to put together the result lists +// and the final display. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htsearch.cc,v 1.72 2004/05/28 13:15:24 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "htsearch.h" +#include "WeightWord.h" +#include "parser.h" +#include "Display.h" +#include "../htfuzzy/Fuzzy.h" +#include "cgi.h" +#include "WordRecord.h" +#include "HtWordList.h" +#include "StringList.h" +#include "IntObject.h" +#include "HtURLCodec.h" +#include "HtURLRewriter.h" +#include "WordContext.h" +#include "HtRegex.h" +#include "Collection.h" + +#include <time.h> +#include <ctype.h> +#include <signal.h> + + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +typedef void (*SIGNAL_HANDLER) (...); + +// ResultList *htsearch(const String&, List &, Parser *); +void htsearch(Collection *, List &, Parser *); + +void setupWords(char *, List &, int, Parser *, String &); +void createLogicalWords(List &, String &, String &); +void reportError(char *); +void convertToBoolean(List &words); +void doFuzzy(WeightWord *, List &, List &); +void addRequiredWords(List &, StringList &); +void usage(); + +int debug = 0; +int minimum_word_length = 3; +StringList boolean_keywords; + +StringList collectionList; // List of databases to search on + +// reconised word prefixes (for field-restricted search and per-word fuzzy +// algorithms) in *descending* alphabetical order. +// Don't use a dictionary structure, as setup time outweights saving. +struct {char *name; unsigned int flag; } colonPrefix [] = +{ + { "url", FLAG_URL }, + { "title", FLAG_TITLE }, + { "text", FLAG_PLAIN }, // FLAG_TEXT is 0, i.e. *no* flag... + { "link", FLAG_LINK_TEXT }, + { "keyword", FLAG_KEYWORDS }, + { "hidden", FLAG_HIDDEN }, + { "heading", FLAG_HEADING }, + { "exact", FLAG_EXACT }, + { "descr", FLAG_DESCRIPTION }, +// { "cap", FLAG_CAPITAL }, + { "author", FLAG_AUTHOR }, + { "", 0 }, +}; + +//***************************************************************************** +// int main() +// +int +main(int ac, char **av) +{ + int c; + extern char *optarg; + int override_config=0; + // List searchWords; + List *searchWords = NULL; + String configFile = DEFAULT_CONFIG_FILE; + int pageNumber = 1; + HtRegex limit_to; + HtRegex exclude_these; + String logicalWords; + String origPattern; + String logicalPattern; + // StringMatch searchWordsPattern; + StringMatch *searchWordsPattern = NULL; + StringList requiredWords; + int i; + Dictionary selected_collections; // Multiple database support + + // + // Parse command line arguments + // + while ((c = getopt(ac, av, "c:dv")) != -1) + { + switch (c) + { + case 'c': + // The default is obviously to do this securely + // but if people want to shoot themselves in the foot... +#ifndef ALLOW_INSECURE_CGI_CONFIG + if (!getenv("REQUEST_METHOD")) + { +#endif + configFile = optarg; + override_config=1; +#ifndef ALLOW_INSECURE_CGI_CONFIG + } +#endif + break; + case 'v': + debug++; + break; + case 'd': + debug++; + break; + case '?': + usage(); + break; + } + } + + // + // The total search can NEVER take more than 5 minutes. + // +#ifndef _MSC_VER /* _WIN32 */ + alarm(5 * 60); +#endif + + // + // Parse the CGI parameters. + // + char none[] = ""; + cgi input(optind < ac ? av[optind] : none); + + // Multiple databases may be specified for search. + // Identify all databases specified with the "config=" parameter. + if (input.exists("config")) + { + collectionList.Create(input["config"], " \t\001|"); + } + if (collectionList.Count() == 0) + collectionList.Add(""); // use default config + String errorMsg = ""; + String originalWords = input["words"]; + originalWords.chop(" \t\r\n"); + + HtConfiguration* config= HtConfiguration::config(); + + // Iterate over all specified collections (databases) + for (int cInd=0; errorMsg.empty() && cInd < collectionList.Count(); cInd++) + { + // Each collection is handled in an iteration. Reset the following so + // that we start with a clean slate. + // + logicalWords = 0; + origPattern = 0; + logicalPattern = 0; + searchWords = new List; + searchWordsPattern = new StringMatch; + + char *config_name = collectionList[cInd]; + if (config_name && config_name[0] == '\0') + config_name = NULL; // use default config + + // + // Setup the configuration database. First we read the compiled defaults. + // Then we override those with defaults read in from the configuration + // file, and finally we override some attributes with information we + // got from the HTML form. + // + config->Defaults(&defaults[0]); + // To allow . in filename while still being 'secure', + // e.g. htdig-f.q.d.n.conf + if (!override_config && config_name + && (strstr(config_name, "./") == NULL)) + { + char *configDir = getenv("CONFIG_DIR"); + if (configDir) + { + configFile = configDir; + } + else + { + configFile = CONFIG_DIR; + } + if (strlen(config_name) == 0) + configFile = DEFAULT_CONFIG_FILE; + else + configFile << '/' << config_name << ".conf"; + } + if (access((char*)configFile, R_OK) < 0) + { + reportError(form("Unable to read configuration file '%s'", + configFile.get())); + } + config->Read(configFile); + + // Initialize htword library (key description + wordtype...) + WordContext::Initialize(*config); + + if (input.exists("method")) + config->Add("match_method", input["method"]); + if (input.exists("format")) + config->Add("template_name", input["format"]); + + if (input.exists("matchesperpage")) + { + // minimum check for a valid int value of "matchesperpage" cgi variable + if (atoi(input["matchesperpage"]) > 0) + config->Add("matches_per_page", input["matchesperpage"]); + } + + if (input.exists("page")) + pageNumber = atoi(input["page"]); + if (input.exists("config")) + config->Add("config", input["config"]); + if (input.exists("restrict")) + config->Add("restrict", input["restrict"]); + if (input.exists("exclude")) + config->Add("exclude", input["exclude"]); + if (input.exists("keywords")) + config->Add("keywords", input["keywords"]); + requiredWords.Create(config->Find("keywords"), " \t\r\n\001"); + if (input.exists("sort")) + config->Add("sort", input["sort"]); + + // Changes added 3-31-99, by Mike Grommet + // Check form entries for starting date, and ending date + // Each date consists of a month, day, and year + + if (input.exists("startmonth")) + config->Add("startmonth", input["startmonth"]); + if (input.exists("startday")) + config->Add("startday", input["startday"]); + if (input.exists("startyear")) + config->Add("startyear", input["startyear"]); + + if (input.exists("endmonth")) + config->Add("endmonth", input["endmonth"]); + if (input.exists("endday")) + config->Add("endday", input["endday"]); + if (input.exists("endyear")) + config->Add("endyear", input["endyear"]); + + // END OF CHANGES BY MIKE GROMMET + + + minimum_word_length = config->Value("minimum_word_length", minimum_word_length); + + StringList form_vars(config->Find("allow_in_form"), " \t\r\n"); + for (i= 0; i < form_vars.Count(); i++) + { + if (input.exists(form_vars[i])) + config->Add(form_vars[i], input[form_vars[i]]); + } + + // + // Compile the URL limit patterns. + // + + if (config->Find("restrict").length()) + { + // Create a temporary list from either the configuration + // file or the input parameter + StringList l(config->Find("restrict"), " \t\r\n\001|"); + limit_to.setEscaped(l); + String u = l.Join('|'); + config->Add("restrict", u); // re-create the config attribute + } + if (config->Find("exclude").length()) + { + // Create a temporary list from either the configuration + // file or the input parameter + StringList l(config->Find("exclude"), " \t\r\n\001|"); + exclude_these.setEscaped(l); + String u = l.Join('|'); + config->Add("exclude", u); // re-create the config attribute + } + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + // for htsearch, use search_rewrite_rules attribute for HtURLRewriter. + config->AddParsed("url_rewrite_rules", "${search_rewrite_rules}"); + url_part_errors = HtURLRewriter::instance()->ErrMsg(); + if (url_part_errors.length() != 0) + reportError(form("Invalid url_rewrite_rules: %s", + url_part_errors.get())); + + // Load boolean_keywords from configuration + // they should be placed in this order: + // 0 1 2 + // and or not + boolean_keywords.Destroy(); + boolean_keywords.Create(config->Find("boolean_keywords"), + "| \t\r\n\001"); + if (boolean_keywords.Count() != 3) + reportError("boolean_keywords attribute should have three entries"); + + Parser *parser = new Parser(); + + // + // Parse the words to search for from the argument list. + // This will produce a list of WeightWord objects. + // + setupWords(originalWords, *searchWords, + strcmp(config->Find("match_method"), "boolean") == 0, + parser, origPattern); + + // + // Convert the list of WeightWord objects to a pattern string + // that we can compile. + // + createLogicalWords(*searchWords, logicalWords, logicalPattern); + + // + // Assemble the full pattern for excerpt matching and highlighting + // + origPattern += logicalPattern; + searchWordsPattern->IgnoreCase(); + searchWordsPattern->IgnorePunct(); + searchWordsPattern->Pattern(logicalPattern); // this should now be enough + //searchWordsPattern.Pattern(origPattern); + //if (debug > 2) + // cout << "Excerpt pattern: " << origPattern << "\n"; + + // + // If required keywords were given in the search form, we will + // modify the current searchWords list to include the required + // words. + // + if (requiredWords.Count() > 0) + { + addRequiredWords(*searchWords, requiredWords); + } + + // + // Perform the actual search. The function htsearch() is used for this. + // The Dictionary it returns is then passed on to the Display object to + // actually render the results in HTML. + // + const String word_db = config->Find("word_db"); + if (access(word_db, R_OK) < 0) + { + reportError(form("Unable to read word database file '%s'\nDid you run htdig?", + word_db.get())); + } + // ResultList *results = htsearch((char*)word_db, searchWords, parser); + + String doc_index = config->Find("doc_index"); + if (access((char*)doc_index, R_OK) < 0) + { + reportError(form("Unable to read document index file '%s'\nDid you run htdig?", + doc_index.get())); + } + + const String doc_db = config->Find("doc_db"); + if (access(doc_db, R_OK) < 0) + { + reportError(form("Unable to read document database file '%s'\nDid you run htdig?", + doc_db.get())); + } + + const String doc_excerpt = config->Find("doc_excerpt"); + if (access(doc_excerpt, R_OK) < 0) + { + reportError(form("Unable to read document excerpts '%s'\nDid you run htdig?", + doc_excerpt.get())); + } + + // Multiple database support + Collection *collection = new Collection((char*)configFile, + word_db.get(), doc_index.get(), doc_db.get(), doc_excerpt.get()); + + // Perform search within the collection. Each collection stores its + // own result list. + htsearch(collection, *searchWords, parser); + collection->setSearchWords(searchWords); + collection->setSearchWordsPattern(searchWordsPattern); + selected_collections.Add(configFile, collection); + + if (parser->hadError()) + errorMsg = parser->getErrorMessage(); + + delete parser; + } + + // Display display(doc_db, 0, doc_excerpt); + Display display(&selected_collections); + if (display.hasTemplateError()) + { + reportError(form("Unable to read template file '%s'\nDoes it exist?", + (const char*)config->Find("template_name"))); + return 0; + } + display.setOriginalWords(originalWords); + // display.setResults(results); + // display.setSearchWords(&searchWords); + display.setLimit(&limit_to); + display.setExclude(&exclude_these); + // display.setAllWordsPattern(searchWordsPattern); + display.setCGI(&input); + display.setLogicalWords(logicalWords); + if (!errorMsg.empty()) + display.displaySyntaxError(errorMsg); + else + display.display(pageNumber); + + // delete results; + // delete parser; + return 0; +} + +//***************************************************************************** +void +createLogicalWords(List &searchWords, String &logicalWords, String &wm) +{ + String pattern; + int i; + int wasHidden = 0; + int inPhrase = 0; + + for (i = 0; i < searchWords.Count(); i++) + { + WeightWord *ww = (WeightWord *) searchWords[i]; + if (!ww->isHidden) + { + if (strcmp((char*)ww->word, "&") == 0 && wasHidden == 0) + logicalWords << ' ' << boolean_keywords[AND] << ' '; + else if (strcmp((char*)ww->word, "|") == 0 && wasHidden == 0) + logicalWords << ' ' << boolean_keywords[OR] << ' '; + else if (strcmp((char*)ww->word, "!") == 0 && wasHidden == 0) + logicalWords << ' ' << boolean_keywords[NOT] << ' '; + else if (strcmp((char*)ww->word, "\"") == 0 && wasHidden == 0) + { + if (inPhrase) + logicalWords.chop(' '); + inPhrase = !inPhrase; + logicalWords << "\""; + } + else if (wasHidden == 0) + { + logicalWords << ww->word; + if (inPhrase) + logicalWords << " "; + } + wasHidden = 0; + } + else + wasHidden = 1; + // generate patterns to search for and highlight in excerpt + if (ww->weight > 0 // Ignore boolean syntax stuff + && (!ww->isIgnore || inPhrase)) // Ignore bad/short words + { // but highlight them in phrases + char spacer = inPhrase ? ' ' : '|'; + if (wm.length()) + wm << spacer; + wm << ww->word; + if (!ww->isIgnore) // ignore bad/short words for searching + { + if (pattern.length()) + pattern << spacer; + pattern << ww->word; + } + } + } + + if (debug) + { + cerr << "LogicalWords: " << logicalWords << endl; + cerr << "Pattern: " << pattern << endl; + cerr << "Highlight Pattern: " << wm << endl; + } +} + +void +dumpWords(List &words, char *msg = "") +{ + if (debug) + { + cerr << msg << ": '"; + for (int i = 0; i < words.Count(); i++) + { + WeightWord *ww = (WeightWord *) words[i]; + cerr << ww->word << ':' << ww->isHidden << ' '; + } + cerr << "'\n"; + } +} + +//***************************************************************************** +// void setupWords(char *allWords, List &searchWords, +// int boolean, Parser *parser, String &originalPattern) +// +void +setupWords(char *allWords, List &searchWords, int boolean, Parser *parser, + String &originalPattern) +{ + HtConfiguration* config= HtConfiguration::config(); + List tempWords; + int i; + + // + // Parse the words we need to search for. It should be a list of words + // with optional 'and' and 'or' between them. The list of words + // will be put in the searchWords list and at the same time in the + // String pattern separated with '|'. + // + + // + // Convert the string to a list of WeightWord objects. The special + // characters '(' and ')' will be put into their own WeightWord objects. + // + unsigned char *pos = (unsigned char*) allWords; + unsigned char t; + String word; + const String prefix_suffix = config->Find("prefix_match_character"); + + while (*pos) + { + while (1) + { + if (debug > 3) + cerr << "setupWords: " << pos << endl; + t = *pos++; + if (isspace(t)) + { + continue; + } + else if (t == '"') + { + tempWords.Add(new WeightWord("\"", -1.0)); + break; + } + else if (boolean && (t == '(' || t == ')')) + { + char s[2]; + s[0] = t; + s[1] = '\0'; + tempWords.Add(new WeightWord(s, -1.0)); + break; + } + else if (HtIsWordChar(t) || + (strchr(prefix_suffix, t) != NULL) || + (t >= 161 && t <= 255)) + { + unsigned int fieldFlag = 0; + word = 0; + do // while recognised prefix, followed by ':' + { + while (t && (HtIsWordChar(t) || + (strchr(prefix_suffix, t) != NULL) || + (t >= 161 && t <= 255))) + { + word << (char) t; + t = *pos++; + } + if (debug > 2) + cerr << "word: " << word << endl; + if (t == ':') // e.g. "author:word" to search + { // only in author + word.lowercase(); + t = *pos++; + if (t && (HtIsWordChar (t) || + (strchr(prefix_suffix, t) != NULL) || + (t >= 161 && t <= 255))) + { + int i, cmp; + const char *w = word.get(); + // linear search of known prefixes, with "" flag. + for (i = 0; (cmp = mystrcasecmp (w, colonPrefix[i].name)) < 0; i++) + ; + if (debug > 2) + cerr << "field: "<< colonPrefix[i].name << endl; + if (cmp == 0) // if prefix found... + { + fieldFlag |= colonPrefix [i].flag; + word = 0; + } + } + } + } while (!word.length() && t); + pos--; + if (!t && !word.length()) // query ended with junk chars + break; + + if (boolean && (mystrcasecmp(word.get(), "+") == 0 + || mystrcasecmp(word.get(), boolean_keywords[AND]) == 0)) + { + tempWords.Add(new WeightWord("&", -1.0)); + } + else if (boolean && + mystrcasecmp(word.get(), boolean_keywords[OR]) == 0) + { + tempWords.Add(new WeightWord("|", -1.0)); + } + else if (boolean && (mystrcasecmp(word.get(), "-") == 0 + || mystrcasecmp(word.get(), boolean_keywords[NOT]) == 0)) + { + tempWords.Add(new WeightWord("!", -1.0)); + } + else + { + // Add word to excerpt matching list + originalPattern << word << "|"; + WeightWord *ww = new WeightWord(word, 1.0, fieldFlag); + if(HtWordNormalize(word) & WORD_NORMALIZE_NOTOK) + ww->isIgnore = 1; + tempWords.Add(ww); + } + break; + } + } + } + + dumpWords(tempWords, "tempWords"); + + // + // If the user specified boolean expression operators, the whole + // expression has to be syntactically correct. If not, we need + // to report a syntax error. + // + if (boolean) + { + if (!parser->checkSyntax(&tempWords)) + { + for (i = 0; i < tempWords.Count(); i++) + { + searchWords.Add(tempWords[i]); + } + tempWords.Release(); + return; +// reportError("Syntax error"); + } + } + else + { + convertToBoolean(tempWords); + } + + dumpWords(tempWords, "Boolean"); + + // + // We need to assign weights to the words according to the search_algorithm + // configuration attribute. + // For algorithms other than exact, we need to also do word lookups. + // + StringList algs(config->Find("search_algorithm"), " \t"); + List algorithms; + String name, weight; + double fweight; + Fuzzy *fuzzy = 0; + + // + // Generate the list of algorithms to use and associate the given + // weights with them. + // + for (i = 0; i < algs.Count(); i++) + { + name = strtok(algs[i], ":"); + weight = strtok(0, ":"); + if (name.length() == 0) + name = "exact"; + if (weight.length() == 0) + weight = "1"; + fweight = atof((char*)weight); + + fuzzy = Fuzzy::getFuzzyByName(name, *config); + if (fuzzy) + { + if (debug > 1) + cerr << "Adding algorithm " << name.get() << endl; + fuzzy->setWeight(fweight); + fuzzy->openIndex(); + algorithms.Add(fuzzy); + } else if (debug) + cerr << "Unknown fuzzy search algorithm " << name.get() << endl; + } + + dumpWords(searchWords, "initial"); + + // + // For each of the words, apply all the algorithms. + // + int in_phrase = 0; // If we get into a phrase, we don't want to fuzz. + for (i = 0; i < tempWords.Count(); i++) + { + WeightWord *ww = (WeightWord *) tempWords[i]; + if (ww->weight > 0 && !ww->isIgnore && !in_phrase) +// I think that should be: +// if (ww->weight > 0 && !ww->isIgnore && !in_phrase && !ww->isExact) + { + // + // Apply all the algorithms to the word. + // + if (debug) + cerr << "Fuzzy on: " << ww->word << endl; + doFuzzy(ww, searchWords, algorithms); + delete ww; + } + else if (ww->word.length() == 1 && ww->word[0] == '"') + { + in_phrase = !in_phrase; + if (debug) + cerr << "Add: " << ww->word << endl; + searchWords.Add(ww); + } + else + { + // + // This is '(', ')', '&', or '|'. These will be automatically + // transfered to the searchWords list. + // + if (debug) + cerr << "Add: " << ww->word << endl; + searchWords.Add(ww); + } + dumpWords(searchWords, "searchWords"); + } + tempWords.Release(); +} + + +//***************************************************************************** +void +doFuzzy(WeightWord *ww, List &searchWords, List &algorithms) +{ + List fuzzyWords; + List weightWords; + Fuzzy *fuzzy = 0; + WeightWord *newWw = 0; + String *word = 0; + + algorithms.Start_Get(); + while ((fuzzy = (Fuzzy *) algorithms.Get_Next())) + { + if (debug > 1) + cerr << " " << fuzzy->getName(); + fuzzy->getWords(ww->word, fuzzyWords); + fuzzyWords.Start_Get(); + while ((word = (String *) fuzzyWords.Get_Next())) + { + if (debug > 1) + cerr << " " << word->get(); + // (should be a "copy with changed weight" constructor...) + newWw = new WeightWord(word->get(), fuzzy->getWeight()); + newWw->isExact = ww->isExact; + newWw->isHidden = ww->isHidden; + newWw->flags = ww->flags; + weightWords.Add(newWw); + } + if (debug > 1) + cerr << endl; + fuzzyWords.Destroy(); + } + + // + // We now have a list of substitute words. They need to be added + // to the searchWords. + // + if (weightWords.Count()) + { + if (weightWords.Count() > 1) + searchWords.Add(new WeightWord("(", -1.0)); + for (int i = 0; i < weightWords.Count(); i++) + { + if (i > 0) + searchWords.Add(new WeightWord("|", -1.0)); + searchWords.Add(weightWords[i]); + } + if (weightWords.Count() > 1) + searchWords.Add(new WeightWord(")", -1.0)); + } + else // if no fuzzy matches, add exact word, but give it tiny weight + { + searchWords.Add(new WeightWord(ww->word.get(), 0.000001)); + } + weightWords.Release(); +} + + +//***************************************************************************** +// void convertToBoolean(List &words) +// +void +convertToBoolean(List &words) +{ + HtConfiguration* config= HtConfiguration::config(); + List list; + int i; + int do_and = strcmp(config->Find("match_method"), "and") == 0; + int in_phrase = 0; + + String quote = "\""; + + if (words.Count() == 0) + return; + list.Add(words[0]); + + // We might start off with a phrase match + if (((WeightWord *) words[0])->word == quote) + in_phrase = 1; + + for (i = 1; i < words.Count(); i++) + { + if (do_and && !in_phrase) + list.Add(new WeightWord("&", -1.0)); + else if (!in_phrase) + list.Add(new WeightWord("|", -1.0)); + + if (((WeightWord *) words[i])->word == quote) + in_phrase = !in_phrase; + + list.Add(words[i]); + } + words.Release(); + + for (i = 0; i < list.Count(); i++) + { + words.Add(list[i]); + } + list.Release(); +} + + +//***************************************************************************** +// Dictionary *htsearch(char *wordfile, List &searchWords, Parser *parser) +// This returns a dictionary indexed by document ID and containing a +// List of HtWordReference objects. +// +void +htsearch(Collection *collection, List &searchWords, Parser *parser) +{ + // + // Pick the database type we are going to use + // + ResultList *matches = new ResultList; + if (searchWords.Count() > 0) + { + // parser->setDatabase(wordfile); + parser->setCollection(collection); + parser->parse(&searchWords, *matches); + } + + collection->setResultList(matches); + // return matches; +} + + +//***************************************************************************** +// Modify the search words list to include the required words as well. +// This is done by putting the existing search words in parenthesis and +// appending the required words separated with "and". +void +addRequiredWords(List &searchWords, StringList &requiredWords) +{ + HtConfiguration* config= HtConfiguration::config(); + static int any_keywords = config->Boolean("any_keywords", 0); + if (requiredWords.Count() == 0) + return; + if (searchWords.Count() > 0) + { + searchWords.Insert(new WeightWord("(", -1.0), 0); + searchWords.Add(new WeightWord(")", -1.0)); + searchWords.Add(new WeightWord("&", -1.0)); + } + if (requiredWords.Count() == 1) + { + searchWords.Add(new WeightWord(requiredWords[0], 1.0)); + } + else + { + searchWords.Add(new WeightWord("(", -1.0)); + searchWords.Add(new WeightWord(requiredWords[0], 1.0)); + for (int i = 1; i < requiredWords.Count(); i++) + { + if (any_keywords) + searchWords.Add(new WeightWord("|", -1.0)); + else + searchWords.Add(new WeightWord("&", -1.0)); + searchWords.Add(new WeightWord(requiredWords[i], 1.0)); + } + searchWords.Add(new WeightWord(")", -1.0)); + } +} + + +//***************************************************************************** +// Report an error. Since we don' know if we are running as a CGI or not, +// we will assume this is the first thing returned by a CGI program. +// +void +reportError(char *msg) +{ + HtConfiguration* config= HtConfiguration::config(); + cout << "Content-type: text/html\r\n\r\n"; + cout << "<html><head><title>htsearch error</title></head>\n"; + cout << "<body bgcolor=\"#ffffff\">\n"; + cout << "<h1>ht://Dig error</h1>\n"; + cout << "<p>htsearch detected an error. Please report this to the\n"; + cout << "webmaster of this site by sending an e-mail to:\n"; + cout << "<a href=\"mailto:" << config->Find("maintainer") << "\">"; + cout << config->Find("maintainer") << "</a>\n"; + cout << "The error message is:</p>\n"; + cout << "<pre>\n" << msg << "\n</pre>\n</body></html>\n"; + exit(1); +} + +//***************************************************************************** +// void usage() +// Display program usage information--assumes we're running from a cmd line +// +void usage() +{ + cout << "usage: htsearch [-v][-d][-c configfile] [query_string]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-v -d\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + cout << "\tquery_string\tA CGI-style query string can be given as a single\n"; + cout << "\t\targument, and is only used if the REQUEST_METHOD environment\n"; + cout << "\t\tvariable is not set. If no query_string is given, and\n"; + cout << "\t\tREQUEST_METHOD is not set, htsearch will prompt for the query.\n\n"; + exit(0); +} |