diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc new file mode 100644 index 00000000..cacb9342 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc @@ -0,0 +1,51 @@ +// +// HtWordType.h +// +// functions for determining valid words/characters +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: HtWordType.cc,v 1.11 2004/05/28 13:15:21 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "HtWordType.h" +#include "WordType.h" + +int HtIsWordChar(char c) { return WordType::Instance()->IsChar(c); } +int HtIsStrictWordChar(char c) { return WordType::Instance()->IsStrictChar(c); } +int HtWordNormalize(String &w) { return WordType::Instance()->Normalize(w); } +int HtStripPunctuation(String &w) { return WordType::Instance()->StripPunctuation(w); } + + +// much like strtok(), and destructive of the source string like strtok(), +// but does word separation by our rules. +char * +HtWordToken(char *str) +{ + unsigned char *text = (unsigned char *)str; + char *ret = 0; + static unsigned char *prev = 0; + + if (!text) + text = prev; + while (text && *text && !HtIsStrictWordChar(*text)) + text++; + if (text && *text) + { + ret = (char *)text; + while (*text && HtIsWordChar(*text)) + text++; + if (*text) + *text++ = '\0'; + } + prev = text; + return ret; +} |