summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc265
1 files changed, 265 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc
new file mode 100644
index 00000000..f7597c8e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_htfuzzy.cc
@@ -0,0 +1,265 @@
+//----------------------------------------------------------------
+//
+// libhtdig_htfuzzy.cc
+//
+// 1/25/2002 created from htfuzzy.cc
+//
+// Neal Richter nealr@rightnow.com
+//
+// libhtdig_htfuzzy.cc
+//
+// htfuzzy: Create one or more ``fuzzy'' indexes into the main word database.
+// These indexes can be used by htsearch to perform a search that uses
+// other algorithms than exact word match.
+//
+// This program is meant to be run after htmerge has created the word
+// database.
+//
+// For each fuzzy algorithm, there will be a separate database. Each
+// database is simply a mapping from the fuzzy key to a list of words
+// in the main word database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: libhtdig_htfuzzy.cc,v 1.5 2004/05/28 13:15:29 lha Exp $
+//
+//----------------------------------------------------------------
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+extern "C"
+{
+#include "libhtdig_api.h"
+}
+
+#include "libhtdig_log.h"
+
+
+//#include "htfuzzy.h" //NOT USED
+
+#include "Fuzzy.h"
+#include "Accents.h"
+#include "Soundex.h"
+#include "Endings.h"
+#include "Metaphone.h"
+#include "Synonym.h"
+#include "htString.h"
+#include "List.h"
+#include "Dictionary.h"
+#include "defaults.h"
+#include "HtWordList.h"
+#include "WordContext.h"
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+#include "HtConfiguration.h"
+#include "HtWordList.h"
+
+#include <stdlib.h>
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#include <stdio.h>
+
+
+
+extern int debug;
+
+static HtConfiguration * config = NULL;
+
+
+//*****************************************************************************
+// int main(int ac, char **av)
+//
+//int main(int ac, char **av)
+
+int htfuzzy_index(htfuzzy_parameters_struct * htfuzzy_parms)
+{
+ String configFile = DEFAULT_CONFIG_FILE;
+ int ret = 0;
+
+ //
+ // Parse command line arguments
+ //
+
+ debug = htfuzzy_parms->debug;
+ if (debug != 0)
+ {
+ ret = logOpen(htfuzzy_parms->logFile);
+
+ if (ret == FALSE)
+ {
+ fprintf(stderr, "htdig: Error opening file [%s]. Error:[%d], %s\n",
+ htfuzzy_parms->logFile, errno, strerror(errno));
+ }
+ }
+
+
+ configFile = htfuzzy_parms->configFile;
+
+ config = HtConfiguration::config();
+
+ //
+ // Determine what algorithms to use
+ //
+ List wordAlgorithms;
+ List noWordAlgorithms;
+
+ if (htfuzzy_parms->algorithms_flag & HTDIG_ALG_SOUNDEX)
+ {
+ wordAlgorithms.Add(new Soundex(*config));
+ }
+ else if (htfuzzy_parms->algorithms_flag & HTDIG_ALG_METAPHONE)
+ {
+ wordAlgorithms.Add(new Metaphone(*config));
+ }
+ else if (htfuzzy_parms->algorithms_flag & HTDIG_ALG_ACCENTS)
+ {
+ wordAlgorithms.Add(new Accents(*config));
+ }
+ else if (htfuzzy_parms->algorithms_flag & HTDIG_ALG_ENDINGS)
+ {
+ noWordAlgorithms.Add(new Endings(*config));
+ }
+ else if (htfuzzy_parms->algorithms_flag & HTDIG_ALG_SYNONYMS)
+ {
+ noWordAlgorithms.Add(new Synonym(*config));
+ }
+
+
+ if (wordAlgorithms.Count() == 0 && noWordAlgorithms.Count() == 0)
+ {
+ logEntry(form("htfuzzy: No algorithms specified\n"));
+ }
+
+ //
+ // Find and parse the configuration file.
+ //
+ config->Defaults(&defaults[0]);
+ if (access((char *) configFile, R_OK) < 0)
+ {
+ reportError(form("[HTFUZZY] Unable to find configuration file '%s'", configFile.get()));
+ }
+ config->Read(configFile);
+
+ // Initialize htword library (key description + wordtype...)
+ WordContext::Initialize(*config);
+
+ Fuzzy *fuzzy;
+ if (wordAlgorithms.Count() > 0)
+ {
+ //
+ // Open the word database so that we can grab the words from it.
+ //
+ HtWordList worddb(*config);
+ if (worddb.Open(config->Find("word_db"), O_RDONLY) == OK)
+ {
+ //
+ // Go through all the words in the database
+ //
+ List *words = worddb.Words();
+ String *key;
+ Fuzzy *fuzzy = 0;
+ String word, fuzzyKey;
+ int count = 0;
+
+ words->Start_Get();
+ while ((key = (String *) words->Get_Next()))
+ {
+ word = *key;
+ wordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) wordAlgorithms.Get_Next()))
+ {
+ fuzzy->addWord(word);
+ }
+ count++;
+ if ((count % 100) == 0 && debug)
+ {
+ //cout << "htfuzzy: words: " << count << '\n';
+ }
+ }
+ if (debug)
+ {
+ logEntry(form("htfuzzy: total words: %d\n", count));
+ logEntry(form("htfuzzy: Writing index files...\n"));
+ }
+
+ //
+ // All the information is now in memory.
+ // Write all of it out to the individual databases
+ //
+ wordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) wordAlgorithms.Get_Next()))
+ {
+ fuzzy->writeDB();
+ }
+ worddb.Close();
+ words->Destroy();
+ delete words;
+ if (fuzzy)
+ delete fuzzy;
+ }
+ else
+ {
+ reportError(form("[htfuzzy] Unable to open word database %s", config->Find("word_db").get()));
+ }
+ }
+ if (noWordAlgorithms.Count() > 0)
+ {
+ noWordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) noWordAlgorithms.Get_Next()))
+ {
+ if (debug)
+ {
+ logEntry(form( "htfuzzy: Selected algorithm: %s\n", fuzzy->getName()));
+ }
+ if (fuzzy->createDB(*config) == NOTOK)
+ {
+ logEntry(form("htfuzzy: Could not create database for algorithm: %s\n", fuzzy->getName()));
+ }
+ }
+ }
+
+ if (debug)
+ {
+ logEntry("htfuzzy: Done.\n");
+ }
+
+ if (debug != 0)
+ {
+ ret = logClose();
+
+ if (ret == FALSE)
+ {
+ fprintf(stderr, "htfuzzy: Error closing file [%s]. Error:[%d], %s\n",
+ htfuzzy_parms->logFile, errno, strerror(errno));
+ }
+ }
+
+
+ delete config;
+
+ return 0;
+}
+
+