summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/httools/htdump.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/httools/htdump.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/httools/htdump.cc200
1 files changed, 200 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/httools/htdump.cc b/debian/htdig/htdig-3.2.0b6/httools/htdump.cc
new file mode 100644
index 00000000..c52dbda9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/httools/htdump.cc
@@ -0,0 +1,200 @@
+//
+// htdump.cc
+//
+// htdump: A utility to create ASCII text versions of the document
+// and/or word databases. These can be used by external programs,
+// edited, or used as a platform and version-independent form of the DB.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: htdump.cc,v 1.6 2004/05/28 13:15:25 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WordContext.h"
+#include "HtURLCodec.h"
+#include "HtWordList.h"
+#include "HtConfiguration.h"
+#include "DocumentDB.h"
+#include "defaults.h"
+
+#include <errno.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#elif HAVE_GETOPT_LOCAL
+#include <getopt_local.h>
+#endif
+
+int verbose = 0;
+
+void usage();
+void reportError(char *msg);
+
+//*****************************************************************************
+// int main(int ac, char **av)
+//
+int main(int ac, char **av)
+{
+ int do_words = 1;
+ int do_docs = 1;
+ int alt_work_area = 0;
+ String configfile = DEFAULT_CONFIG_FILE;
+ int c;
+ extern char *optarg;
+
+ while ((c = getopt(ac, av, "vdwc:a")) != -1)
+ {
+ switch (c)
+ {
+ case 'c':
+ configfile = optarg;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'a':
+ alt_work_area++;
+ break;
+ case 'w':
+ do_words = 0;
+ break;
+ case 'd':
+ do_docs = 0;
+ break;
+ case '?':
+ usage();
+ break;
+ }
+ }
+
+ HtConfiguration* config= HtConfiguration::config();
+ config->Defaults(&defaults[0]);
+
+ if (access((char*)configfile, R_OK) < 0)
+ {
+ reportError(form("Unable to find configuration file '%s'",
+ configfile.get()));
+ }
+
+ config->Read(configfile);
+
+ //
+ // Check url_part_aliases and common_url_parts for
+ // errors.
+ String url_part_errors = HtURLCodec::instance()->ErrMsg();
+
+ if (url_part_errors.length() != 0)
+ reportError(form("Invalid url_part_aliases or common_url_parts: %s",
+ url_part_errors.get()));
+
+
+ // We may need these through the methods we call
+ if (alt_work_area != 0)
+ {
+ String configValue;
+
+ configValue = config->Find("word_db");
+ if (configValue.length() != 0)
+ {
+ configValue << ".work";
+ config->Add("word_db", configValue);
+ }
+
+ configValue = config->Find("doc_db");
+ if (configValue.length() != 0)
+ {
+ configValue << ".work";
+ config->Add("doc_db", configValue);
+ }
+
+ configValue = config->Find("doc_index");
+ if (configValue.length() != 0)
+ {
+ configValue << ".work";
+ config->Add("doc_index", configValue);
+ }
+
+ configValue = config->Find("doc_excerpt");
+ if (configValue.length() != 0)
+ {
+ configValue << ".work";
+ config->Add("doc_excerpt", configValue);
+ }
+ }
+
+ if (do_docs)
+ {
+ const String doc_list = config->Find("doc_list");
+ unlink(doc_list);
+ DocumentDB docs;
+ if (docs.Read(config->Find("doc_db"), config->Find("doc_index"),
+ config->Find("doc_excerpt")) == OK)
+ {
+ docs.DumpDB(doc_list, verbose);
+ docs.Close();
+ }
+ }
+ if (do_words)
+ {
+
+ // Initialize htword
+ WordContext::Initialize(*config);
+
+ const String word_dump = config->Find("word_dump");
+ unlink(word_dump);
+ HtWordList words(*config);
+ if(words.Open(config->Find("word_db"), O_RDONLY) == OK) {
+ words.Dump(word_dump);
+ words.Close();
+ }
+ }
+
+ return 0;
+}
+
+
+//*****************************************************************************
+// void usage()
+// Display program usage information
+//
+void usage()
+{
+ cout << "usage: htdump [-v][-d][-w][-a][-c configfile]\n";
+ cout << "This program is part of ht://Dig " << VERSION << "\n\n";
+ cout << "Options:\n";
+ cout << "\t-v\tVerbose mode. This increases the verbosity of the\n";
+ cout << "\t\tprogram. Using more than 2 is probably only useful\n";
+ cout << "\t\tfor debugging purposes. The default verbose mode\n";
+ cout << "\t\tgives a progress on what it is doing and where it is.\n\n";
+ cout << "\t-d\tDo NOT dump the document database.\n\n";
+ cout << "\t-w\tDo NOT dump the word database.\n\n";
+ cout << "\t-a\tUse alternate work files.\n";
+ cout << "\t\tTells htdump to append .work to the database files \n";
+ cout << "\t\tallowing it to operate on a second set of databases.\n";
+ cout << "\t-c configfile\n";
+ cout << "\t\tUse the specified configuration file instead on the\n";
+ cout << "\t\tdefault.\n\n";
+ exit(0);
+}
+
+
+//*****************************************************************************
+// Report an error and die
+//
+void reportError(char *msg)
+{
+ cout << "htdump: " << msg << "\n\n";
+ exit(1);
+}