summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc51
1 files changed, 51 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc
new file mode 100644
index 00000000..cacb9342
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc
@@ -0,0 +1,51 @@
+//
+// HtWordType.h
+//
+// functions for determining valid words/characters
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtWordType.cc,v 1.11 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtWordType.h"
+#include "WordType.h"
+
+int HtIsWordChar(char c) { return WordType::Instance()->IsChar(c); }
+int HtIsStrictWordChar(char c) { return WordType::Instance()->IsStrictChar(c); }
+int HtWordNormalize(String &w) { return WordType::Instance()->Normalize(w); }
+int HtStripPunctuation(String &w) { return WordType::Instance()->StripPunctuation(w); }
+
+
+// much like strtok(), and destructive of the source string like strtok(),
+// but does word separation by our rules.
+char *
+HtWordToken(char *str)
+{
+ unsigned char *text = (unsigned char *)str;
+ char *ret = 0;
+ static unsigned char *prev = 0;
+
+ if (!text)
+ text = prev;
+ while (text && *text && !HtIsStrictWordChar(*text))
+ text++;
+ if (text && *text)
+ {
+ ret = (char *)text;
+ while (*text && HtIsWordChar(*text))
+ text++;
+ if (*text)
+ *text++ = '\0';
+ }
+ prev = text;
+ return ret;
+}