diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htdig/Parsable.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htdig/Parsable.cc | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htdig/Parsable.cc b/debian/htdig/htdig-3.2.0b6/htdig/Parsable.cc new file mode 100644 index 00000000..049362a8 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htdig/Parsable.cc @@ -0,0 +1,96 @@ +// +// Parsable.cc +// +// Parsable: Base class for file parsers (HTML, PDF, ExternalParser ...) +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: Parsable.cc,v 1.9 2004/05/28 13:15:15 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "Parsable.h" +#include "htdig.h" +#include "defaults.h" + + +//***************************************************************************** +// Parsable::Parsable() +// +Parsable::Parsable() +{ + HtConfiguration* config= HtConfiguration::config(); + contents = 0; + max_head_length = config->Value("max_head_length", 0); + max_description_length = config->Value("max_description_length", 50); + max_meta_description_length = config->Value("max_meta_description_length", 0); + + max_keywords = config->Value("max_keywords", -1); + if (max_keywords < 0) + max_keywords = (int) ((unsigned int) ~1 >> 1); + minimum_word_length = config->Value("minimum_word_length", 3); +} + + +//***************************************************************************** +// Parsable::~Parsable() +// +Parsable::~Parsable() +{ + delete contents; +} + + +//***************************************************************************** +// void Parsable::setContents(char *data, int length) +// This will set the contents of the parsable object. +// +void +Parsable::setContents(char *data, int length) +{ + delete contents; + contents = new String(data, length); +} + +//***************************************************************************** +// void Parsable::addString(char *s, int& wordindex, int slot) +// Add all words in string s in "heading level" slot, incrementing wordindex +// along the way. String s is corrupted. +// +void +Parsable::addString(Retriever& retriever, char *s, int& wordindex, int slot) +{ + char *w = HtWordToken(s); + while (w) + { + if (strlen(w) >= minimum_word_length) + retriever.got_word(w, wordindex++, slot); // slot for img_alt + w = HtWordToken(0); + } + w = '\0'; +} + +//***************************************************************************** +// void Parsable::addKeywordString(char *s, int& wordindex) +// Add all words in string s as keywords, incrementing wordindex +// along the way. String s is corrupted. +// +void +Parsable::addKeywordString(Retriever& retriever, char *s, int& wordindex) +{ + char *w = HtWordToken(s); + while (w) + { + if (strlen(w) >= minimum_word_length && ++keywordsCount <= max_keywords) + retriever.got_word(w, wordindex++, 9); + w = HtWordToken(0); + } + w = '\0'; +} |