summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc265
1 files changed, 265 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc
new file mode 100644
index 00000000..5a3789db
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/htfuzzy.cc
@@ -0,0 +1,265 @@
+//
+// htfuzzy.cc
+//
+// htfuzzy: Create one or more ``fuzzy'' indexes into the main word database.
+// These indexes can be used by htsearch to perform a search that uses
+// other algorithms than exact word match.
+//
+// This program is meant to be run after htmerge has created the word
+// database.
+//
+// For each fuzzy algorithm, there will be a separate database. Each
+// database is simply a mapping from the fuzzy key to a list of words
+// in the main word database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: htfuzzy.cc,v 1.20 2004/05/28 13:15:20 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "htfuzzy.h"
+#include "Fuzzy.h"
+#include "Accents.h"
+#include "Soundex.h"
+#include "Endings.h"
+#include "Metaphone.h"
+#include "Synonym.h"
+#include "htString.h"
+#include "List.h"
+#include "Dictionary.h"
+#include "defaults.h"
+#include "HtWordList.h"
+#include "WordContext.h"
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#elif HAVE_GETOPT_LOCAL
+#include <getopt_local.h>
+#endif
+
+int debug = 0;
+
+void usage();
+
+
+//*****************************************************************************
+// int main(int ac, char **av)
+//
+int
+main(int ac, char **av)
+{
+ int c, i;
+ extern char *optarg;
+ extern int optind;
+ String configFile = DEFAULT_CONFIG_FILE;
+
+ //
+ // Parse command line arguments
+ //
+ while ((c = getopt(ac, av, "c:v")) != -1)
+ {
+ switch (c)
+ {
+ case 'c':
+ configFile = optarg;
+ break;
+
+ case 'v':
+ debug++;
+ break;
+
+ default:
+ usage();
+ }
+ }
+
+ HtConfiguration* config= HtConfiguration::config();
+ //
+ // Determine what algorithms to use
+ //
+ List wordAlgorithms;
+ List noWordAlgorithms;
+ for (i = optind; i < ac; i++)
+ {
+ if (mystrcasecmp(av[i], "soundex") == 0)
+ {
+ wordAlgorithms.Add(new Soundex(*config));
+ }
+ else if (mystrcasecmp(av[i], "metaphone") == 0)
+ {
+ wordAlgorithms.Add(new Metaphone(*config));
+ }
+ else if (mystrcasecmp(av[i], "accents") == 0)
+ {
+ wordAlgorithms.Add(new Accents(*config));
+ }
+ else if (mystrcasecmp(av[i], "endings") == 0)
+ {
+ noWordAlgorithms.Add(new Endings(*config));
+ }
+ else if (mystrcasecmp(av[i], "synonyms") == 0)
+ {
+ noWordAlgorithms.Add(new Synonym(*config));
+ }
+ else
+ {
+ reportError(form("'%s' is not a supported algorithm",
+ av[i]));
+ }
+ }
+ if (wordAlgorithms.Count() == 0 && noWordAlgorithms.Count() == 0)
+ {
+ cout << "htfuzzy: No algorithms specified\n";
+ usage();
+ }
+
+ //
+ // Find and parse the configuration file.
+ //
+ config->Defaults(&defaults[0]);
+ if (access((char*)configFile, R_OK) < 0)
+ {
+ reportError(form("Unable to find configuration file '%s'",
+ configFile.get()));
+ }
+ config->Read(configFile);
+
+ // Initialize htword library (key description + wordtype...)
+ WordContext::Initialize(*config);
+
+ Fuzzy *fuzzy;
+ if (wordAlgorithms.Count() > 0)
+ {
+ //
+ // Open the word database so that we can grab the words from it.
+ //
+ HtWordList worddb(*config);
+ if (worddb.Open(config->Find("word_db"), O_RDONLY) == OK)
+ {
+ //
+ // Go through all the words in the database
+ //
+ List *words = worddb.Words();
+ String *key;
+ Fuzzy *fuzzy = 0;
+ String word, fuzzyKey;
+ int count = 0;
+
+ words->Start_Get();
+ while ((key = (String *) words->Get_Next()))
+ {
+ word = *key;
+ wordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) wordAlgorithms.Get_Next()))
+ {
+ fuzzy->addWord(word);
+ }
+ count++;
+ if ((count % 100) == 0 && debug)
+ {
+ cout << "htfuzzy: words: " << count << '\n';
+ cout.flush();
+ }
+ }
+ if (debug)
+ {
+ cout << "htfuzzy: total words: " << count << "\n";
+ cout << "htfuzzy: Writing index files...\n";
+ }
+
+ //
+ // All the information is now in memory.
+ // Write all of it out to the individual databases
+ //
+ wordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) wordAlgorithms.Get_Next()))
+ {
+ fuzzy->writeDB();
+ }
+ worddb.Close();
+ words->Destroy();
+ delete words;
+ if (fuzzy)
+ delete fuzzy;
+ }
+ else
+ {
+ reportError(form("Unable to open word database %s", config->Find("word_db").get()));
+ }
+ }
+ if (noWordAlgorithms.Count() > 0)
+ {
+ noWordAlgorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) noWordAlgorithms.Get_Next()))
+ {
+ if (debug)
+ {
+ cout << "htfuzzy: Selected algorithm: " << fuzzy->getName()
+ << endl;
+ }
+ if (fuzzy->createDB(*config) == NOTOK)
+ {
+ cout << "htfuzzy: Could not create database for algorithm: "
+ << fuzzy->getName() << endl;
+ }
+ }
+ }
+
+ if (debug)
+ {
+ cout << "htfuzzy: Done.\n";
+ }
+
+ return 0;
+}
+
+
+//*****************************************************************************
+// void usage()
+//
+void
+usage()
+{
+ cout << "usage: htfuzzy [-c configfile][-v] algorithm ...\n";
+ cout << "This program is part of ht://Dig " << VERSION << "\n\n";
+ cout << "Supported algorithms:\n";
+ cout << "\tsoundex\n";
+ cout << "\tmetaphone\n";
+ cout << "\taccents\n";
+ cout << "\tendings\n";
+ cout << "\tsynonyms\n";
+ cout << "\n";
+
+ cout << "Options:\n";
+
+ cout << "\t-c configfile\n";
+ cout << "\t\tUse the specified configuration file instead of the\n";
+ cout << "\t\tdefault.\n\n";
+
+ cout << "\t-v\tVerbose mode. This increases the verbosity of the\n";
+ cout << "\t\tprogram. Using more than 2 is probably only useful\n";
+ cout << "\t\tfor debugging purposes.\n\n";
+
+ exit(0);
+}
+
+
+//*****************************************************************************
+// void reportError(char *msg)
+//
+void
+reportError(char *msg)
+{
+ cout << "htfuzzy: " << msg << "\n\n";
+ exit(1);
+}
+
+