diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc new file mode 100644 index 00000000..6d7f97d8 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc @@ -0,0 +1,184 @@ +// +// SplitMatches.cc +// +// SplitMatches: +// Holds a list of lists with the matches, as specified in +// search_results_order. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 2000-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: SplitMatches.cc,v 1.6 2004/05/28 13:15:24 lha Exp $ + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "StringList.h" +#include "HtRegex.h" +#include "SplitMatches.h" + +#include <stdio.h> +#include <ctype.h> + +// This class is only used in private members of SplitMatches. +// The OO-right thing would be to nest this inside the private +// declaration of SplitMatches, but that would cause portability +// problems according to +// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>. +// +// It is used as a container for a key (String) and a list. +// +class MatchArea : public Object +{ +public: + // Construct from a string applicable to StringMatch. + MatchArea(const String &); + + ~MatchArea(); + + // Does this item match? + // Fail if template is empty, since explicit "*" maps to empty template + inline bool Match(char *s) + { return match.match(s, 0, 0) != 0; } + + // Return the contained list. + List *MatchList() { return &myList; } + +private: + HtRegex match; + List myList; + + // These member functions are not supposed to be implemented, but + // mentioned here as private so the compiler will not generate them if + // someone puts in buggy code that would use them. + MatchArea(); + MatchArea(const MatchArea &); + void operator= (const MatchArea &); +}; + +MatchArea::MatchArea(const String &url_regex) +{ + // We do not want to "install" the catch-the-rest pattern as a real + // pattern; it must always return false for the "Match" operator. + if (strcmp("*", url_regex.get()) != 0) + { + StringList l(url_regex.get(),'|'); + match.setEscaped(l); + } +} + +MatchArea::~MatchArea() +{ +} + +SplitMatches::SplitMatches(Configuration &config) +{ + char *config_item = "search_results_order"; + + StringList sl(config[config_item], "\t \r\n"); + + mySubAreas = new List(); + myDefaultList = 0; + + // Parse each as in TemplateList::createFromString. + for (int i = 0; i < sl.Count(); i++) + { + String sub_area_pattern = sl[i]; + MatchArea *match_item = new MatchArea(sub_area_pattern); + mySubAreas->Add(match_item); + + // If this is the magic catch-rest sub-area-pattern, we want to + // use its list-pointer to store all URLs that do not match + // anything else. + // We will iterate over a list where one of the patterns is + // known to not match, but that's a small penalty for keeping + // the code simple. + if (strcmp("*", sub_area_pattern.get()) == 0) + myDefaultList = match_item->MatchList(); + } + + // If we did not have a catch-the-rest pattern, install one at the + // end of the list. + if (myDefaultList == 0) + { + MatchArea *match_item = new MatchArea(String("*")); + mySubAreas->Add(match_item); + + myDefaultList = match_item->MatchList(); + } +} + +SplitMatches::~SplitMatches() +{ + // myDefaultList is a pointer to one of the items in mySubAreas and + // must not be explicitly deleted here. + + delete mySubAreas; +} + +void +SplitMatches::Add(ResultMatch *match, char *url) +{ + List *area_list = mySubAreas; + MatchArea *area_item; + + area_list->Start_Get(); + + // This is a linear search. If there's a problem with that, we + // can improve it. For now, a list with tens of areas seems lots, + // and break-even with a more clever search-scheme is probably in + // the hundreds. + while ((area_item = (MatchArea *) area_list->Get_Next())) + { + // Use the first match only. + if (area_item->Match(url)) + { + area_item->MatchList()->Add(match); + return; + } + } + + // We'll get here if no match was found, so we add to the + // catch-the-rest list. + myDefaultList->Add(match); +} + +// Just a simple iterator function. +List * +SplitMatches::Get_Next() +{ + MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next(); + List *next_area_list = 0; + + if (next_area != 0) + next_area_list = next_area->MatchList(); + + return next_area_list; +} + +// Rip out the sub-areas lists and concatenate them into one list. +List * +SplitMatches::JoinedLists() +{ + + // We make a new list here, so we don't have to worry about + // mySubAreas being dangling or null. + List *all_areas = new List(); + List *sub_areas = mySubAreas; + MatchArea *area; + + sub_areas->Start_Get(); + + while ((area = (MatchArea *) sub_areas->Get_Next())) + { + // "Destructively" move the contents of the list, + // leaving the original list empty. + all_areas->AppendList(*(area->MatchList())); + } + + return all_areas; +} |