summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htlib
diff options
context:
space:
mode:
authorSlávek Banko <slavek.banko@axis.cz>2021-11-05 13:28:23 +0100
committerSlávek Banko <slavek.banko@axis.cz>2021-11-05 13:28:23 +0100
commit8c787c3591c1c885b91a54128835b400858c5cca (patch)
treeeca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/htlib
parentfe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff)
downloadextra-dependencies-8c787c35.tar.gz
extra-dependencies-8c787c35.zip
DEB htdig: Added to repository.
Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htlib')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/.cvsignore7
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Configuration.cc390
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Configuration.h229
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/DB2_db.cc379
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/DB2_db.h53
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Database.cc53
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Database.h91
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Dictionary.cc416
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Dictionary.h99
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtCodec.cc30
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtCodec.h37
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.cc1419
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.h533
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtHeap.cc198
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtHeap.h92
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.cc66
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.h34
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc450
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtPack.h39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRandom.h49
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegex.cc105
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegex.h86
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.cc137
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.h57
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.cc141
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.h58
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.cc84
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.h39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtTime.h128
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVector.cc310
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVector.h137
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.cc91
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.h247
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVectorGenericCode.h262
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVector_String.h23
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtVector_int.h21
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.cc437
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.h70
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc51
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/HtWordType.h27
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/IntObject.cc37
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/IntObject.h39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/List.cc511
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/List.h190
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Makefile.am68
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Makefile.in579
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Makefile.win3234
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Object.cc76
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Object.h47
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/ParsedString.cc202
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/ParsedString.h41
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Queue.cc112
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Queue.h52
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.cc90
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.h47
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Stack.cc111
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/Stack.h52
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/String.cc726
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/StringList.cc192
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/StringList.h73
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/StringMatch.cc601
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/StringMatch.h116
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/String_fmt.cc54
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/ber.h85
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/clib.h49
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/filecopy.cc126
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/filecopy.h40
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/getcwd.c278
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/getopt_local.c122
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/getopt_local.h50
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/good_strtok.cc46
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/good_strtok.h24
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/gregex.h568
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/htString.h260
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/lib.h90
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/libdefs.h109
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/malloc.c39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/md5.cc41
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/md5.h3
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/memcmp.c72
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/memcpy.c144
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/memmove.c143
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.c534
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.h86
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/mktime.c535
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/myqsort.c260
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/myqsort.h23
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/raise.c39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/realloc.c146
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/regex.c7924
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/regex_win32.c5742
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/regex_win32.h548
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/snprintf.c75
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/strcasecmp.cc101
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/strerror.c86
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/strptime.cc377
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/timegm.c142
-rw-r--r--debian/htdig/htdig-3.2.0b6/htlib/vsnprintf.c58
98 files changed, 30190 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/.cvsignore b/debian/htdig/htdig-3.2.0b6/htlib/.cvsignore
new file mode 100644
index 00000000..09dc8ef2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/.cvsignore
@@ -0,0 +1,7 @@
+Makefile
+*.lo
+*.la
+.purify
+.pure
+.deps
+.libs
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Configuration.cc b/debian/htdig/htdig-3.2.0b6/htlib/Configuration.cc
new file mode 100644
index 00000000..56912736
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Configuration.cc
@@ -0,0 +1,390 @@
+//
+// Configuration.cc
+//
+// Configuration: This class provides an object lookup table. Each object
+// in the Configuration is indexed with a string. The objects
+// can be returned by mentioning their string index. Values may
+// include files with `/path/to/file` or other configuration
+// variables with ${variable}
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Configuration.cc,v 1.20 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include "Configuration.h"
+#include "htString.h"
+#include "ParsedString.h"
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <locale.h>
+
+
+//*********************************************************************
+// Configuration::Configuration()
+//
+Configuration::Configuration() : separators("=:"), allow_multiple(0)
+{
+}
+
+
+//*********************************************************************
+// void Configuration::NameValueSeparators(char *s)
+//
+void Configuration::NameValueSeparators(const String& s)
+{
+ separators = s;
+}
+
+
+//*********************************************************************
+// Add an entry to the configuration table.
+//
+void Configuration::Add(const String& str_arg)
+{
+ const char* str = str_arg;
+ String name, value;
+
+ while (str && *str)
+ {
+ while (isspace(*str))
+ str++;
+ name = 0;
+ if (!isalpha(*str))
+ break;
+ // Some isalnum() implementations don't allow all the letters that
+ // isalpha() does, e.g. accented ones. They're not POSIX.2 compliant
+ // but we won't punish them with an infinite loop...
+ if (!isalnum(*str))
+ break;
+ while (isalnum(*str) || *str == '-' || *str == '_')
+ name << *str++;
+
+ name.lowercase();
+
+ //
+ // We have the name. Let's see if we will get a value
+ //
+ while (isspace(*str))
+ str++;
+ if (!*str)
+ {
+ //
+ // End of string. We need to store the name as a boolean TRUE
+ //
+ Add(name, "true");
+ return;
+ }
+
+ if (!strchr((char*)separators, *str))
+ {
+ //
+ // We are now at a new name. The previous one needs to be set
+ // to boolean TRUE
+ //
+ Add(name, "true");
+ continue;
+ }
+
+ //
+ // We now need to deal with the value
+ //
+ str++; // Skip the separator
+ while (isspace(*str))
+ str++;
+ if (!*str)
+ {
+ //
+ // End of string reached. The value must be blank
+ //
+ Add(name, "");
+ break;
+ }
+ value = 0;
+ if (*str == '"')
+ {
+ //
+ // Ah! A quoted value. This should be easy to deal with...
+ // (Just kidding!)
+ //
+ str++;
+ while (*str && *str != '"')
+ {
+ value << *str++;
+ }
+ Add(name, value);
+ if (*str == '"')
+ str++;
+ continue;
+ }
+ else if (*str == '\'')
+ {
+ // A single quoted value.
+ str++;
+ while (*str && *str != '\'')
+ {
+ value << *str++;
+ }
+ Add(name, value);
+ if (*str == '\'')
+ str++;
+ continue;
+ }
+ else
+ {
+ //
+ // A non-quoted string. This string will terminate at the
+ // next blank
+ //
+ while (*str && !isspace(*str))
+ {
+ value << *str++;
+ }
+ Add(name, value);
+ continue;
+ }
+ }
+}
+
+
+//*********************************************************************
+// Add an entry to the configuration table, without allowing variable
+// or file expansion of the value.
+//
+void Configuration::Add(const String& name, const String& value)
+{
+ String escaped;
+ const char *s = value.get();
+ while (*s)
+ {
+ if (strchr("$`\\", *s))
+ escaped << '\\';
+ escaped << *s++;
+ }
+ ParsedString *ps = new ParsedString(escaped);
+ dcGlobalVars.Add(name, ps);
+}
+
+
+//*********************************************************************
+// Add an entry to the configuration table, allowing parsing for variable
+// or file expansion of the value.
+//
+void Configuration::AddParsed(const String& name, const String& value)
+{
+ ParsedString *ps = new ParsedString(value);
+ if (mystrcasecmp(name, "locale") == 0)
+ {
+ String str(setlocale(LC_ALL, ps->get(dcGlobalVars)));
+ ps->set(str);
+
+ //
+ // Set time format to standard to avoid sending If-Modified-Since
+ // http headers in native format which http servers can't
+ // understand
+ //
+ setlocale(LC_TIME, "C");
+ }
+ dcGlobalVars.Add(name, ps);
+}
+
+
+//*********************************************************************
+// Remove an entry from both the hash table and from the list of keys.
+//
+int Configuration::Remove(const String& name)
+{
+ return dcGlobalVars.Remove(name);
+}
+
+
+//*********************************************************************
+// char *Configuration::Find(const char *name) const
+// Retrieve a variable from the configuration database. This variable
+// will be parsed and a new String object will be returned.
+//
+const String Configuration::Find(const String& name) const
+{
+ ParsedString *ps = (ParsedString *) dcGlobalVars[name];
+ if (ps)
+ {
+ return ps->get(dcGlobalVars);
+ }
+ else
+ {
+#ifdef DEBUG
+ fprintf (stderr, "Could not find configuration option %s\n", (const char*)name);
+#endif
+ return 0;
+ }
+}
+
+//-
+// Return 1 if the value of configuration attribute <b>name</b> has
+// been set, 0 otherwise
+int Configuration::Exists(const String& name) const
+{
+ return dcGlobalVars.Exists(name);
+}
+
+//*********************************************************************
+Object *Configuration::Get_Object(char *name) {
+return dcGlobalVars[name];
+}
+
+
+//*********************************************************************
+//
+int Configuration::Value(const String& name, int default_value) const
+{
+ return Find(name).as_integer(default_value);
+}
+
+
+//*********************************************************************
+//
+double Configuration::Double(const String& name, double default_value) const
+{
+ return Find(name).as_double(default_value);
+}
+
+
+//*********************************************************************
+// int Configuration::Boolean(char *name, int default_value)
+//
+int Configuration::Boolean(const String& name, int default_value) const
+{
+ int value = default_value;
+ const String s = Find(name);
+ if (s[0])
+ {
+ if (s.nocase_compare("true") == 0 ||
+ s.nocase_compare("yes") == 0 ||
+ s.nocase_compare("1") == 0)
+ value = 1;
+ else if (s.nocase_compare("false") == 0 ||
+ s.nocase_compare("no") == 0 ||
+ s.nocase_compare("0") == 0)
+ value = 0;
+ }
+
+ return value;
+}
+
+
+//*********************************************************************
+//
+const String Configuration::operator[](const String& name) const
+{
+ return Find(name);
+}
+
+
+//*********************************************************************
+//
+int Configuration::Read(const String& filename)
+{
+ FILE* in = fopen((const char*)filename, "r");
+
+ if(!in) {
+ fprintf(stderr, "Configuration::Read: cannot open %s for reading : ", (const char*)filename);
+ perror("");
+ return NOTOK;
+ }
+
+#define CONFIG_BUFFER_SIZE (50*1024)
+ //
+ // Make the line buffer large so that we can read long lists of start
+ // URLs.
+ //
+ char buffer[CONFIG_BUFFER_SIZE + 1];
+ char *current;
+ String line;
+ String name;
+ char *value;
+ int len;
+ while (fgets(buffer, CONFIG_BUFFER_SIZE, in))
+ {
+ line << buffer;
+ line.chop("\r\n");
+ if (line.last() == '\\')
+ {
+ line.chop(1);
+ continue; // Append the next line to this one
+ }
+
+ current = line.get();
+ if (*current == '#' || *current == '\0')
+ {
+ line = 0;
+ continue; // Comments and blank lines are skipped
+ }
+
+ name = strtok(current, ": =\t");
+ value = strtok(0, "\r\n");
+ if (!value)
+ value = ""; // Blank value
+
+ //
+ // Skip any whitespace before the actual text
+ //
+ while (*value == ' ' || *value == '\t')
+ value++;
+ len = strlen(value) - 1;
+ //
+ // Skip any whitespace after the actual text
+ //
+ while (len >= 0 && (value[len] == ' ' || value[len] == '\t'))
+ {
+ value[len] = '\0';
+ len--;
+ }
+
+ if (mystrcasecmp((char*)name, "include") == 0)
+ {
+ ParsedString ps(value);
+ String str(ps.get(dcGlobalVars));
+ if (str[0] != '/') // Given file name not fully qualified
+ {
+ str = filename; // so strip dir. name from current one
+ len = str.lastIndexOf('/') + 1;
+ if (len > 0)
+ str.chop(str.length() - len);
+ else
+ str = ""; // No slash in current filename
+ str << ps.get(dcGlobalVars);
+ }
+ Read(str);
+ line = 0;
+ continue;
+ }
+
+ AddParsed(name, value);
+ line = 0;
+ }
+ fclose(in);
+ return OK;
+}
+
+
+//*********************************************************************
+// void Configuration::Defaults(ConfigDefaults *array)
+//
+void Configuration::Defaults(const ConfigDefaults *array)
+{
+ for (int i = 0; array[i].name; i++)
+ {
+ AddParsed(array[i].name, array[i].value);
+ }
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Configuration.h b/debian/htdig/htdig-3.2.0b6/htlib/Configuration.h
new file mode 100644
index 00000000..2628617d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Configuration.h
@@ -0,0 +1,229 @@
+//
+// Configuration.h
+//
+// NAME
+//
+// reads the configuration file and manages it in memory.
+//
+// SYNOPSIS
+//
+// #include <Configuration.h>
+//
+// Configuration config;
+//
+// ConfigDefault config_defaults = {
+// { "verbose", "true" },
+// { 0, 0 }
+// };
+//
+// config.Defaults(config_defaults);
+//
+// config.Read("~/.myconfig") ;
+//
+// config.Add("sync", "false");
+//
+// if(config["sync"]) ...
+// if(config.Value("rate") < 50) ...
+// if(config.Boolean("sync")) ...
+//
+// DESCRIPTION
+//
+// The primary purpose of the <b>Configuration</b> class is to parse
+// a configuration file and allow the application to modify the internal
+// data structure. All values are strings and are converted by the
+// appropriate accessors. For instance the <b>Boolean</b> method will
+// return numerical true (not zero) if the string either contains
+// a number that is different from zero or the string <i>true</i>.
+//
+// The <i>ConfigDefaults</i> type is a structure of two char pointers:
+// the name of the configuration attribute and it's value. The end of
+// the array is the first entry that contains a null pointer instead of
+// the attribute name. Numerical
+// values must be in strings. For instance:
+// <pre>
+// ConfigDefault* config_defaults = {
+// { "wordlist_compress", "true" },
+// { "wordlist_page_size", "8192" },
+// { 0, 0 }
+// };
+// </pre>
+// Returns the configuration (object of type <i>Configuration</i>)
+// built if a file was found or config_defaults
+// provided, 0 otherwise.
+// The additional
+// fields of the <b>ConfigDefault</b> are purely informative.
+//
+// FILE FORMAT
+//
+// This configuration file is a plain ASCII text file. Each line in
+// the file is either a comment or contains an attribute.
+// Comment lines are blank lines or lines that start with a '#'.
+// Attributes consist of a variable name and an associated
+// value:
+//
+// <pre>
+// &lt;name&gt;:&lt;whitespace&gt;&lt;value&gt;&lt;newline&gt;
+// </pre>
+//
+// The &lt;name&gt; contains any alphanumeric character or
+// underline (_) The &lt;value&gt; can include any character
+// except newline. It also cannot start with spaces or tabs since
+// those are considered part of the whitespace after the colon. It
+// is important to keep in mind that any trailing spaces or tabs
+// will be included.
+//
+// It is possible to split the &lt;value&gt; across several
+// lines of the configuration file by ending each line with a
+// backslash (\). The effect on the value is that a space is
+// added where the line split occurs.
+//
+// A configuration file can include another file, by using the special
+// &lt;name&gt;, <tt>include</tt>. The &lt;value&gt; is taken as
+// the file name of another configuration file to be read in at
+// this point. If the given file name is not fully qualified, it is
+// taken relative to the directory in which the current configuration
+// file is found. Variable expansion is permitted in the file name.
+// Multiple include statements, and nested includes are also permitted.
+//
+// <pre>
+// include: common.conf
+// </pre>
+//
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Configuration.h,v 1.11 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef _Configuration_h_
+#define _Configuration_h_
+
+#include "Dictionary.h"
+#include "htString.h"
+
+struct ConfigDefaults
+{
+ char *name; // Name of the attribute
+ char *value; // Default value
+ char *type; // Type of the value (string, integer, boolean)
+ char *programs; // Whitespace separated list of programs/modules using this attribute
+ char *block; // Configuration block this can be used in (can be blank)
+ char *version; // Version that introduced the attribute
+ char *category; // Attribute category (to split documentation)
+ char *example; // Example usage of the attribute (HTML)
+ char *description; // Long description of the attribute (HTML)
+};
+
+
+class Configuration : public Object
+{
+public:
+ //-
+ // Constructor
+ //
+ Configuration();
+#ifndef SWIG
+ Configuration(const Configuration& config) :
+ dcGlobalVars(config.dcGlobalVars),
+ separators(config.separators)
+ {
+ allow_multiple = config.allow_multiple;
+ }
+#endif /* SWIG */
+ //-
+ // Destructor
+ //
+ ~Configuration() {}
+
+ //
+ // Adding and deleting items to and from the Configuration
+ //
+#ifndef SWIG
+ //-
+ // Add configuration item <b>str</b> to the configuration. The value
+ // associated with it is undefined.
+ //
+ void Add(const String& str);
+#endif /* SWIG */
+ //-
+ // Add configuration item <b>name</b> to the configuration and associate
+ // it with <b>value</b>.
+ //
+ void Add(const String& name, const String& value);
+ void AddParsed(const String& name, const String& value);
+ //-
+ // Remove the <b>name</b> from the configuration.
+ //
+ int Remove(const String& name);
+
+ //-
+ // Let the Configuration know how to parse name value pairs.
+ // Each character of string <b>s</b> is a valid separator between
+ // the <i>name</i> and the <i>value.</i>
+ //
+ void NameValueSeparators(const String& s);
+
+ //-
+ // Read name/value configuration pairs from the file <b>filename</b>.
+ //
+ virtual int Read(const String& filename);
+
+ //-
+ // Return the value of configuration attribute <b>name</b> as a
+ // <i>String</i>.
+ //
+ const String Find(const String& name) const;
+
+ //-
+ // Return 1 if the value of configuration attribute <b>name</b> has
+ // been set, 0 otherwise
+ int Exists(const String& name) const;
+
+#ifndef SWIG
+ //-
+ // Alias to the <b>Find</b> method.
+ //
+ const String operator[](const String& name) const;
+#endif /* SWIG */
+ //-
+ // Return the value associated with the configuration attribute
+ // <b>name</b>, converted to integer using the atoi(3) function.
+ // If the attribute is not found in the configuration and
+ // a <b>default_value</b> is provided, return it.
+ //
+ int Value(const String& name, int default_value = 0) const;
+ //-
+ // Return the value associated with the configuration attribute
+ // <b>name</b>, converted to double using the atof(3) function.
+ // If the attribute is not found in the configuration and
+ // a <b>default_value</b> is provided, return it.
+ //
+ double Double(const String& name, double default_value = 0) const;
+ //-
+ // Return 1 if the value associated to <b>name</b> is
+ // either <b>1, yes</b> or <b>true</b>.
+ // Return 0 if the value associated to <b>name</b> is
+ // either <b>0, no</b> or <b>false</b>.
+ //
+ int Boolean(const String& name, int default_value = 0) const;
+ Object *Get_Object(char *name);
+
+ //-
+ // Load configuration attributes from the <i>name</i> and <i>value</i>
+ // members of the <b>array</b> argument.
+ //
+ void Defaults(const ConfigDefaults *array);
+
+protected:
+ Dictionary dcGlobalVars;
+ String separators;
+ int allow_multiple;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.cc b/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.cc
new file mode 100644
index 00000000..46127056
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.cc
@@ -0,0 +1,379 @@
+//
+// DB2_db.cc
+//
+// DB2_db: Implements the Berkeley B-Tree database as a Database object
+// (including duplicate values to allow duplicate word entries)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: DB2_db.cc,v 1.26 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#include "DB2_db.h"
+#include "HtConfiguration.h"
+
+// Default cache size in kilobytes.
+// Maybe this should be an config option, just for easy testing and
+// determination for best system performance
+// NOTE: page size is 1KB - do not change!!
+#define CACHE_SIZE_IN_KB 64
+
+//*****************************************************************************
+// DB2_db::DB2_db()
+//
+DB2_db::DB2_db()
+{
+ isOpen = 0;
+ _compare = 0;
+ _prefix = 0;
+}
+
+
+//*****************************************************************************
+// DB2_db::~DB2_db()
+//
+DB2_db::~DB2_db()
+{
+ Close();
+}
+
+
+//*****************************************************************************
+//
+int
+DB2_db::Open(const char *filename, int flags, int mode)
+{
+ //
+ // Initialize the database environment.
+ //
+ if((dbenv = db_init((char *)NULL)) == 0) return NOTOK;
+
+ if(CDB_db_create(&dbp, dbenv, 0) != 0) return NOTOK;
+
+ if(_compare) dbp->set_bt_compare(dbp, _compare);
+ if(_prefix) dbp->set_bt_prefix(dbp, _prefix);
+
+ //
+ // Open the database.
+ //
+ if((errno = dbp->open(dbp, filename, NULL, db_type, flags, mode)) == 0)
+ {
+ //
+ // Acquire a cursor for the database.
+ //
+ if ((seqrc = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0)
+ {
+ seqerr = seqrc;
+ Close();
+ return NOTOK;
+ }
+ isOpen = 1;
+ return OK;
+ }
+ else
+ {
+ return NOTOK;
+ }
+}
+
+
+//*****************************************************************************
+// int DB2_db::Close()
+//
+int
+DB2_db::Close()
+{
+ if(isOpen)
+ {
+ //
+ // Close cursor, database and clean up environment
+ //
+ (void)(dbcp->c_close)(dbcp);
+ (void)(dbp->close)(dbp, 0);
+ (void)(dbenv->close(dbenv, 0));
+ dbenv = 0;
+ }
+ isOpen = 0;
+ return OK;
+}
+
+
+//*****************************************************************************
+// char *DB2_db::Get_Next(String &item, String &key)
+//
+char *
+DB2_db::Get_Next(String &item, String &key)
+{
+ if (isOpen && !seqrc)
+ {
+ //
+ // Return values
+ //
+ key = skey;
+ lkey = skey;
+ item = data;
+
+ //
+ // Search for the next record
+ //
+ DBT local_key;
+ DBT local_data;
+
+ memset(&local_key, 0, sizeof(DBT));
+ memset(&local_data, 0, sizeof(DBT));
+
+ local_key.data = skey.get();
+ local_key.size = skey.length();
+
+ seqrc = dbcp->c_get(dbcp, &local_key, &local_data, DB_NEXT);
+ seqerr = seqrc;
+
+ if(!seqrc) {
+ data = 0;
+ data.append((char*)local_data.data, (int)local_data.size);
+ skey = 0;
+ skey.append((char*)local_key.data, (int)local_key.size);
+ }
+
+ return lkey.get();
+ }
+ else
+ return 0;
+}
+
+//*****************************************************************************
+// void DB2_db::Start_Seq()
+//
+void
+DB2_db::Start_Seq(const String& key)
+{
+ DBT local_key;
+ DBT local_data;
+
+ memset(&local_key, 0, sizeof(DBT));
+ memset(&local_data, 0, sizeof(DBT));
+
+ skey = key;
+
+ local_key.data = skey.get();
+ local_key.size = skey.length();
+
+ if (isOpen && dbp)
+ {
+ //
+ // Okay, get the first key. Use DB_SET_RANGE for finding partial
+ // keys also. If you set it to DB_SET, and the words book, books
+ // and bookstore do exists, it will find them if you specify
+ // book*. However if you specify boo* if will not find
+ // anything. Setting to DB_SET_RANGE will still find the `first'
+ // word after boo* (which is book).
+ //
+ seqrc = dbcp->c_get(dbcp, &local_key, &local_data, DB_SET_RANGE);
+ seqerr = seqrc;
+
+ if(!seqrc) {
+ data = 0;
+ data.append((char*)local_data.data, (int)local_data.size);
+ skey = 0;
+ skey.append((char*)local_key.data, (int)local_key.size);
+ }
+ }
+}
+
+//*****************************************************************************
+// void DB2_db::Start_Get()
+//
+void
+DB2_db::Start_Get()
+{
+ DBT local_key;
+ DBT local_data;
+
+ memset(&local_key, 0, sizeof(DBT));
+ memset(&local_data, 0, sizeof(DBT));
+
+ if (isOpen && dbp)
+ {
+ //
+ // Okay, get the first key. Use DB_SET_RANGE for finding partial
+ // keys also. If you set it to DB_SET, and the words book, books
+ // and bookstore do exists, it will find them if you specify
+ // book*. However if you specify boo* if will not find
+ // anything. Setting to DB_SET_RANGE will still find the `first'
+ // word after boo* (which is book).
+ //
+ seqrc = dbcp->c_get(dbcp, &local_key, &local_data, DB_FIRST);
+ seqerr = seqrc;
+
+ if(!seqrc) {
+ data = 0;
+ data.append((char*)local_data.data, (int)local_data.size);
+ skey = 0;
+ skey.append((char*)local_key.data, (int)local_key.size);
+ }
+ }
+}
+
+//*****************************************************************************
+// int DB2_db::Put(const String &key, const String &data)
+//
+int
+DB2_db::Put(const String &key, const String &data)
+{
+ DBT k, d;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ if (!isOpen)
+ return NOTOK;
+
+ k.data = (char*)key.get();
+ k.size = key.length();
+
+ d.data = (char*)data.get();
+ d.size = data.length();
+
+ //
+ // A 0 in the flags in put means replace, if you didn't specify DB_DUP
+ // somewhere else...
+ //
+ return (dbp->put)(dbp, NULL, &k, &d, 0) == 0 ? OK : NOTOK;
+}
+
+
+//*****************************************************************************
+// int DB2_db::Get(const String &key, String &data)
+//
+int
+DB2_db::Get(const String &key, String &data)
+{
+ DBT k, d;
+
+ memset(&k, 0, sizeof(DBT));
+ memset(&d, 0, sizeof(DBT));
+
+ //
+ // k arg of get should be const but is not. Harmless cast.
+ //
+ k.data = (char*)key.get();
+ k.size = key.length();
+
+ int rc = dbp->get(dbp, NULL, &k, &d, 0);
+ if (rc)
+ return NOTOK;
+
+ data = 0;
+ data.append((char *)d.data, d.size);
+ return OK;
+}
+
+
+//*****************************************************************************
+// int DB2_db::Exists(const String &key)
+//
+int
+DB2_db::Exists(const String &key)
+{
+ String data;
+
+ if (!isOpen)
+ return 0;
+
+ return Get(key, data);
+}
+
+
+//*****************************************************************************
+// int DB2_db::Delete(const String &key)
+//
+int
+DB2_db::Delete(const String &key)
+{
+ DBT k;
+
+ memset(&k, 0, sizeof(DBT));
+
+ if (!isOpen)
+ return 0;
+
+ k.data = (char*)key.get();
+ k.size = key.length();
+
+ return (dbp->del)(dbp, NULL, &k, 0);
+}
+
+
+//*****************************************************************************
+// DB2_db *DB2_db::getDatabaseInstance()
+//
+DB2_db *
+DB2_db::getDatabaseInstance(DBTYPE)
+{
+ return new DB2_db();
+}
+
+//*****************************************************************************
+// void Error(const char *error_prefix, char *message);
+//
+void Error(const char *error_prefix, char *message)
+{
+ // We don't do anything here, it's mostly a stub so we can set a breakpoint
+ // for debugging purposes
+ fprintf(stderr, "%s: %s\n", error_prefix, message);
+}
+
+//******************************************************************************
+
+/*
+ * db_init --
+ * Initialize the environment. Only returns a pointer
+ */
+DB_ENV *
+DB2_db::db_init(char *home)
+{
+ DB_ENV *dbenv;
+ char *progname = "DB2 problem...";
+
+ int error;
+ if((error = CDB_db_env_create(&dbenv, 0)) != 0) {
+ fprintf(stderr, "DB2_db: CDB_db_env_create %s\n", CDB_db_strerror(error));
+ return 0;
+ }
+
+ dbenv->set_errpfx(dbenv, progname);
+ dbenv->set_errcall(dbenv, &Error);
+
+ if((error = dbenv->open(dbenv, (const char*)home, NULL, DB_CREATE | DB_PRIVATE | DB_INIT_LOCK | DB_INIT_MPOOL, 0666)) != 0) {
+ dbenv->err(dbenv, error, "open %s", (home ? home : ""));
+ return 0;
+ }
+
+ return (dbenv);
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.h b/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.h
new file mode 100644
index 00000000..48a6e837
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/DB2_db.h
@@ -0,0 +1,53 @@
+//
+// DB2_db.h
+//
+// DB2_db: Implements the Berkeley B-Tree database as a Database object
+// (including duplicate values to allow duplicate word entries)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: DB2_db.h,v 1.11 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef _DB2_db_h_
+#define _DB2_db_h_
+
+#include "Database.h"
+#include <db.h>
+#include <fcntl.h>
+
+class DB2_db : public Database
+{
+ //
+ // Construction/Destruction
+ //
+protected:
+ DB2_db();
+public:
+ ~DB2_db();
+
+ static DB2_db *getDatabaseInstance(DBTYPE type);
+
+ virtual int OpenReadWrite(const char *filename, int mode) { return Open(filename, DB_CREATE, mode); }
+ virtual int OpenRead(const char *filename) { return Open(filename, DB_RDONLY, 0666); }
+ virtual int Close();
+ virtual int Get(const String &, String &);
+ virtual int Put(const String &, const String &);
+ virtual int Exists(const String &);
+ virtual int Delete(const String &);
+
+ virtual void Start_Get();
+ virtual char *Get_Next(String &item, String &key);
+ virtual void Start_Seq(const String& key);
+
+private:
+ DB_ENV *db_init(char *);
+
+ int Open(const char *filename, int flags, int mode);
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Database.cc b/debian/htdig/htdig-3.2.0b6/htlib/Database.cc
new file mode 100644
index 00000000..968c6630
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Database.cc
@@ -0,0 +1,53 @@
+//
+// Database.cc
+//
+// Database: Class which defines the interface to a generic,
+// simple database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Database.cc,v 1.12 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Database.h"
+#include "DB2_db.h"
+
+//*****************************************************************************
+// Database::Database()
+//
+Database::Database()
+{
+}
+
+
+//*****************************************************************************
+// Database::~Database()
+//
+Database::~Database()
+{
+}
+
+
+//*****************************************************************************
+// Database *Database::getDatabaseInstance()
+//
+Database *
+Database::getDatabaseInstance(DBTYPE type = DB_BTREE)
+{
+ Database* db = DB2_db::getDatabaseInstance(type);
+
+ db->db_type = type;
+
+ return db;
+}
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Database.h b/debian/htdig/htdig-3.2.0b6/htlib/Database.h
new file mode 100644
index 00000000..552de594
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Database.h
@@ -0,0 +1,91 @@
+//
+// Database.h
+//
+// Database: Class which defines the interface to a generic,
+// simple database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Database.h,v 1.15 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef _Database_h_
+#define _Database_h_
+
+#include "Object.h"
+#include "htString.h"
+
+#include <db.h>
+
+// Database Types
+// defined in db.h
+// #define DB_BTREE 1
+// #define DB_HASH 2
+#ifndef GDBM_HASH
+#define GDBM_HASH 2
+#endif
+
+
+class Database : public Object
+{
+ //
+ // Make sure no one can actually create an object of this type or
+ // the derived types. The static getDatabaseInstance() method needs to be
+ // used.
+ //
+protected:
+ Database();
+public:
+ ~Database();
+
+ //
+ // Since the contructor is protected, the only way to actually get
+ // a Database object is through the following class method.
+ // The idea here is that the particular type of database used by
+ // all the programs is to be defined in one place.
+ //
+ static Database *getDatabaseInstance(DBTYPE type);
+
+ //
+ // Common interface
+ //
+ virtual int OpenReadWrite(const char *filename, int mode = 0666) = 0;
+ virtual int OpenRead(const char *filename) = 0;
+ void SetCompare(int (*func)(const DBT *a, const DBT *b)) { _compare = func; }
+ void SetPrefix(size_t (*func)(const DBT *a, const DBT *b)) { _prefix = func; }
+ virtual int Close() = 0;
+ virtual int Put(const String &key, const String &data) = 0;
+ virtual int Get(const String &key, String &data) = 0;
+ virtual int Exists(const String &key) = 0;
+ virtual int Delete(const String &key) = 0;
+
+ virtual void Start_Get() = 0;
+ virtual char *Get_Next() { String item; String key; return Get_Next(item, key); }
+ virtual char *Get_Next(String &item) { String key; return Get_Next(item, key); }
+ virtual char *Get_Next(String &item, String &key) = 0;
+ virtual void Start_Seq(const String& str) = 0;
+ virtual char *Get_Next_Seq() { return Get_Next(); }
+
+protected:
+ int isOpen;
+ DB *dbp; // database
+ DBC *dbcp; // cursor
+
+ String skey; // Next key to search for iterator
+ String data; // Next data to return for iterator
+ String lkey; // Contains the last key returned by iterator
+
+ DB_ENV *dbenv; // database enviroment
+ int (*_compare)(const DBT *a, const DBT *b); // Key comparison
+ size_t (*_prefix)(const DBT *a, const DBT *b); // Key reduction
+
+ int seqrc;
+ int seqerr;
+ DBTYPE db_type;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.cc b/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.cc
new file mode 100644
index 00000000..8c0da019
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.cc
@@ -0,0 +1,416 @@
+//
+// Dictionary.cc
+//
+// Dictionary: This class provides an object lookup table.
+// Each object in the dictionary is indexed with a string.
+// The objects can be returned by mentioning their
+// string index.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Dictionary.cc,v 1.16 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Dictionary.h"
+
+#include <stdlib.h>
+
+class DictionaryEntry
+{
+public:
+ unsigned int hash;
+ char *key;
+ Object *value;
+ DictionaryEntry *next;
+
+ ~DictionaryEntry();
+ void release();
+};
+
+DictionaryEntry::~DictionaryEntry()
+{
+ free(key);
+ delete value;
+}
+
+void
+DictionaryEntry::release()
+{
+ value = NULL; // Prevent the value from being deleted
+}
+
+
+//*********************************************************************
+//
+Dictionary::Dictionary()
+{
+ init(101, 10.0f);
+}
+
+Dictionary::Dictionary(int initialCapacity, float loadFactor)
+{
+ init(initialCapacity, loadFactor);
+}
+
+Dictionary::Dictionary(int initialCapacity)
+{
+ init(initialCapacity, 0.75f);
+}
+
+Dictionary::Dictionary(const Dictionary& other)
+{
+ init(other.initialCapacity, other.loadFactor);
+
+ DictionaryCursor cursor;
+ const char* key;
+ for(other.Start_Get(cursor); (key = other.Get_Next(cursor));) {
+ Add(key, other[key]);
+ }
+}
+
+//*********************************************************************
+//
+Dictionary::~Dictionary()
+{
+ Destroy();
+
+ delete [] table;
+}
+
+
+//*********************************************************************
+//
+void
+Dictionary::Destroy()
+{
+ DictionaryEntry *t, *n;
+
+ for (int i = 0; i < tableLength; i++)
+ {
+ if (table[i] != NULL)
+ {
+ t = table[i];
+ do { // clear out hash chain
+ n = t->next;
+ delete t;
+ t = n;
+ } while (n);
+ table[i] = NULL;
+ }
+ }
+ count = 0;
+}
+
+//*********************************************************************
+//
+void
+Dictionary::Release()
+{
+ DictionaryEntry *t, *n;
+
+ for (int i = 0; i < tableLength; i++)
+ {
+ if (table[i] != NULL)
+ {
+ t = table[i];
+ do { // clear out hash chain
+ n = t->next;
+ t->release();
+ delete t;
+ t = n;
+ } while (n);
+ table[i] = NULL;
+ }
+ }
+ count = 0;
+}
+
+
+//*********************************************************************
+//
+void
+Dictionary::init(int initialCapacity, float loadFactor)
+{
+ if (initialCapacity <= 0)
+ initialCapacity = 101;
+ if (loadFactor <= 0.0)
+ loadFactor = 0.75f;
+ Dictionary::loadFactor = loadFactor;
+ table = new DictionaryEntry*[initialCapacity];
+ for (int i = 0; i < initialCapacity; i++)
+ {
+ table[i] = NULL;
+ }
+ threshold = (int)(initialCapacity * loadFactor);
+ tableLength = initialCapacity;
+ count = 0;
+}
+
+//*********************************************************************
+//
+unsigned int
+Dictionary::hashCode(const char *key) const
+{
+ char *test;
+ long conv_key = strtol(key, &test, 10);
+ if (key && *key && !*test) // Conversion succeeded
+ return conv_key;
+
+ char *base = (char*)malloc(strlen(key) + 2);
+ char *tmp_key = base;
+ strcpy(tmp_key, key);
+
+ unsigned int h = 0;
+ int length = strlen(tmp_key);
+
+ if (length >= 16)
+ {
+ tmp_key += strlen(tmp_key) - 15;
+ length = strlen(tmp_key);
+ }
+ for (int i = length; i > 0; i--)
+ {
+ h = (h*37) + *tmp_key++;
+ }
+
+ free(base);
+ return h;
+}
+
+//*********************************************************************
+// Add an entry to the hash table. This will replace the
+// data associated with an already existing key.
+//
+void
+Dictionary::Add(const String& name, Object *obj)
+{
+ unsigned int hash = hashCode(name);
+ int index = hash % tableLength;
+ DictionaryEntry *e;
+
+ for (e = table[index]; e != NULL; e = e->next)
+ {
+ if (e->hash == hash && strcmp(e->key, name) == 0)
+ {
+ delete e->value;
+ e->value = obj;
+ return;
+ }
+ }
+
+ if (count >= threshold)
+ {
+ rehash();
+ Add(name, obj);
+ return;
+ }
+
+ e = new DictionaryEntry();
+ e->hash = hash;
+ e->key = strdup(name);
+ e->value = obj;
+ e->next = table[index];
+ table[index] = e;
+ count++;
+}
+
+
+//*********************************************************************
+// Remove an entry from the hash table.
+//
+int
+Dictionary::Remove(const String& name)
+{
+ if (!count)
+ return 0;
+
+ unsigned int hash = hashCode(name);
+ int index = hash % tableLength;
+ DictionaryEntry *e, *prev;
+
+ for (e = table[index], prev = NULL; e != NULL; prev = e, e = e->next)
+ {
+ if (hash == e->hash && strcmp(e->key, name) == 0)
+ {
+ if (prev != NULL)
+ {
+ prev->next = e->next;
+ }
+ else
+ {
+ table[index] = e->next;
+ }
+ count--;
+ delete e;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+//*********************************************************************
+//
+Object *Dictionary::Find(const String& name) const
+{
+ if (!count)
+ return NULL;
+
+ unsigned int hash = hashCode(name);
+ int index = hash % tableLength;
+ DictionaryEntry *e;
+
+ for (e = table[index]; e != NULL; e = e->next)
+ {
+ if (e->hash == hash && strcmp(e->key, name) == 0)
+ {
+ return e->value;
+ }
+ }
+ return NULL;
+}
+
+
+//*********************************************************************
+//
+Object *Dictionary::operator[](const String& name) const
+{
+ return Find(name);
+}
+
+
+//*********************************************************************
+//
+int Dictionary::Exists(const String& name) const
+{
+ if (!count)
+ return 0;
+
+ unsigned int hash = hashCode(name);
+ int index = hash % tableLength;
+ DictionaryEntry *e;
+
+ for (e = table[index]; e != NULL; e = e->next)
+ {
+ if (e->hash == hash && strcmp(e->key, name) == 0)
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+//*********************************************************************
+//
+void
+Dictionary::rehash()
+{
+ DictionaryEntry **oldTable = table;
+ int oldCapacity = tableLength;
+
+ int newCapacity;
+ DictionaryEntry *e;
+ int i, index;
+
+ newCapacity = count > oldCapacity ? count * 2 + 1 : oldCapacity * 2 + 1;
+
+ DictionaryEntry **newTable = new DictionaryEntry*[newCapacity];
+
+ for (i = 0; i < newCapacity; i++)
+ {
+ newTable[i] = NULL;
+ }
+
+ threshold = (int) (newCapacity * loadFactor);
+ table = newTable;
+ tableLength = newCapacity;
+
+ for (i = oldCapacity; i-- > 0;)
+ {
+ for (DictionaryEntry *old = oldTable[i]; old != NULL;)
+ {
+ e = old;
+ old = old->next;
+ index = e->hash % newCapacity;
+ e->next = newTable[index];
+ newTable[index] = e;
+ }
+ }
+ delete [] oldTable;
+}
+
+
+//*********************************************************************
+//
+void
+Dictionary::Start_Get(DictionaryCursor& cursor) const
+{
+ cursor.currentTableIndex = -1;
+ cursor.currentDictionaryEntry = NULL;
+}
+
+
+//*********************************************************************
+//
+char *
+Dictionary::Get_Next(DictionaryCursor& cursor) const
+{
+ while (cursor.currentDictionaryEntry == NULL ||
+ cursor.currentDictionaryEntry->next == NULL)
+ {
+ cursor.currentTableIndex++;
+
+ if (cursor.currentTableIndex >= tableLength)
+ {
+ cursor.currentTableIndex--;
+ return NULL;
+ }
+
+ cursor.currentDictionaryEntry = table[cursor.currentTableIndex];
+
+ if (cursor.currentDictionaryEntry != NULL)
+ {
+ return cursor.currentDictionaryEntry->key;
+ }
+ }
+
+ cursor.currentDictionaryEntry = cursor.currentDictionaryEntry->next;
+ return cursor.currentDictionaryEntry->key;
+}
+
+//*********************************************************************
+//
+Object *
+Dictionary::Get_NextElement(DictionaryCursor& cursor) const
+{
+ while (cursor.currentDictionaryEntry == NULL ||
+ cursor.currentDictionaryEntry->next == NULL)
+ {
+ cursor.currentTableIndex++;
+
+ if (cursor.currentTableIndex >= tableLength)
+ {
+ cursor.currentTableIndex--;
+ return NULL;
+ }
+
+ cursor.currentDictionaryEntry = table[cursor.currentTableIndex];
+
+ if (cursor.currentDictionaryEntry != NULL)
+ {
+ return cursor.currentDictionaryEntry->value;
+ }
+ }
+
+ cursor.currentDictionaryEntry = cursor.currentDictionaryEntry->next;
+ return cursor.currentDictionaryEntry->value;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.h b/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.h
new file mode 100644
index 00000000..8ff6cf1c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Dictionary.h
@@ -0,0 +1,99 @@
+//
+// Dictionary.h
+//
+// Dictionary: This class provides an object lookup table.
+// Each object in the dictionary is indexed with a string.
+// The objects can be returned by mentioning their
+// string index.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Dictionary.h,v 1.10 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef _Dictionary_h_
+#define _Dictionary_h_
+
+#include "Object.h"
+#include "htString.h"
+#include "List.h"
+
+class Dictionary;
+class DictionaryEntry;
+
+class DictionaryCursor {
+ public:
+ //
+ // Support for the Start_Get and Get_Next routines
+ //
+ int currentTableIndex;
+ DictionaryEntry *currentDictionaryEntry;
+};
+
+class Dictionary : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ Dictionary();
+ Dictionary(const Dictionary& other);
+ Dictionary(int initialCapacity);
+ Dictionary(int initialCapacity, float loadFactor);
+ ~Dictionary();
+
+ //
+ // Adding and deleting items to and from the dictionary
+ //
+ void Add(const String& name, Object *obj);
+ int Remove(const String& name);
+
+ //
+ // Searching can be done with the Find() member of the array indexing
+ // operator
+ //
+ Object *Find(const String& name) const;
+ Object *operator[](const String& name) const;
+ int Exists(const String& name) const;
+
+ //
+ // We want to be able to go through all the entries in the
+ // dictionary in sequence. To do this, we have the same
+ // traversal interface as the List class
+ //
+ void Start_Get() { Start_Get(cursor); }
+ void Start_Get(DictionaryCursor& cursor) const;
+ //
+ // Get the next key
+ //
+ char *Get_Next() { return Get_Next(cursor); }
+ char *Get_Next(DictionaryCursor& cursor) const;
+ //
+ // Get the next entry
+ //
+ Object *Get_NextElement() { return Get_NextElement(cursor); }
+ Object *Get_NextElement(DictionaryCursor& cursor) const;
+ void Release();
+ void Destroy();
+ int Count() const { return count; }
+
+private:
+ DictionaryEntry **table;
+ int tableLength;
+ int initialCapacity;
+ int count;
+ int threshold;
+ float loadFactor;
+
+ DictionaryCursor cursor;
+
+ void rehash();
+ void init(int, float);
+ unsigned int hashCode(const char *key) const;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.cc
new file mode 100644
index 00000000..69b2682c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.cc
@@ -0,0 +1,30 @@
+//
+// HtCodec.cc
+//
+// HtCodec: Provide a generic means to take a String, code
+// it, and return the encoded string. And vice versa.
+//
+// Keep constructor and destructor in a file of its own.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtCodec.cc,v 1.6 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtCodec.h"
+
+HtCodec::HtCodec()
+{ }
+
+HtCodec::~HtCodec()
+{ }
+
+// End of HtCodec.cc
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.h b/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.h
new file mode 100644
index 00000000..b3c56bf6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtCodec.h
@@ -0,0 +1,37 @@
+//
+// HtCodec.h
+//
+// HtCodec: Provide a generic means to take a String, code
+// it, and return the encoded string. And vice versa.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtCodec.h,v 1.6 2004/05/28 13:15:20 lha Exp $
+//
+#ifndef __HtCodec_h
+#define __HtCodec_h
+
+#include "htString.h"
+
+class HtCodec : public Object
+{
+public:
+ HtCodec();
+ virtual ~HtCodec();
+
+ // Code what's in this string.
+ virtual String encode(const String &) const = 0;
+
+ // Decode what's in this string.
+ virtual String decode(const String &) const = 0;
+
+private:
+ HtCodec(const HtCodec &); // Not supposed to be implemented.
+ void operator= (const HtCodec &); // Not supposed to be implemented.
+};
+
+#endif /* __HtCodec_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.cc
new file mode 100644
index 00000000..3ab40e47
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.cc
@@ -0,0 +1,1419 @@
+//
+// HtDateTime.cc
+//
+// HtDateTime: Parse, split, compare and format dates and times.
+// Uses locale.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtDateTime.cc,v 1.20 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtDateTime.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+#ifndef HAVE_STRPTIME
+// mystrptime() declared in lib.h, defined in htlib/strptime.cc
+#define strptime(s,f,t) mystrptime(s,f,t)
+#else /* HAVE_STRPTIME */
+#ifndef HAVE_STRPTIME_DECL
+extern "C" {
+extern char *strptime(const char *__s, const char *__fmt, struct tm *__tp);
+}
+#endif /* HAVE_STRPTIME_DECL */
+#endif /* HAVE_STRPTIME */
+
+///////
+ // Static local variable : Visible only here !!!
+///////
+
+#define MAXSTRTIME 256 // Max length of my_strtime
+
+static struct tm Ht_tm;
+static char my_strtime[MAXSTRTIME];
+
+
+///////
+ // Recognized Date Formats
+///////
+
+// RFC1123: Sun, 06 Nov 1994 08:49:37 GMT
+#define RFC1123_FORMAT "%a, %d %b %Y %H:%M:%S %Z"
+#define LOOSE_RFC1123_FORMAT "%d %b %Y %H:%M:%S"
+
+// RFC850 : Sunday, 06-Nov-94 08:49:37 GMT
+#define RFC850_FORMAT "%A, %d-%b-%y %H:%M:%S %Z"
+#define LOOSE_RFC850_FORMAT "%d-%b-%y %H:%M:%S"
+
+// ANSI C's asctime() format : Sun Nov 6 08:49:37 1994
+#define ASCTIME_FORMAT "%a %b %e %H:%M:%S %Y"
+#define LOOSE_ASCTIME_FORMAT "%b %e %H:%M:%S %Y"
+
+// ISO8601 : 1994-11-06 08:49:37 GMT
+#define ISO8601_FORMAT "%Y-%m-%d %H:%M:%S %Z"
+
+// ISO8601 (short version): 1994-11-06
+#define ISO8601_SHORT_FORMAT "%Y-%m-%d"
+
+// Timestamp : 19941106084937
+#define TIMESTAMP_FORMAT "%Y%m%d%H%M%S"
+
+
+
+///////
+ // Initialization
+///////
+
+const int HtDateTime::days[] = { 31, 28, 31, 30, 31, 30,
+ 31, 31, 30, 31, 30, 31};
+
+
+/////// // Input Formats // ///////
+
+///////
+ // Generalized date/time parser for "LOOSE" formats
+ // - converts LOOSE RFC850 or RFC1123 date string into a time value
+ // - converts SHORT ISO8601 date string into a time value
+ // - autodetects which of these formats is used
+ // - assumes midnight if time portion omitted
+ // We've had problems using strptime() and timegm() on a few platforms
+ // while parsing these formats, so this is an attempt to sidestep them.
+ //
+ // Returns 0 if parsing failed, or returns number of characters parsed
+ // in date string otherwise, and sets Ht_t field to time_t value.
+///////
+#define EPOCH 1970
+
+int HtDateTime::Parse(const char *date)
+{
+ register const char *s;
+ register const char *t;
+ int day, month, year, hour, minute, second;
+
+ //
+ // Three possible time designations:
+ // Tuesday, 01-Jul-97 16:48:02 GMT (RFC850)
+ // or
+ // Thu, 01 May 1997 00:40:42 GMT (RFC1123)
+ // or
+ // 1997-05-01 00:40:42 GMT (ISO8601)
+ //
+ // We strip off the weekday because we don't need it, and
+ // because some servers send invalid weekdays!
+ // (Some don't even send a weekday, but we'll be flexible...)
+
+ s = date;
+ while (*s && *s != ',')
+ s++;
+ if (*s)
+ s++;
+ else
+ s = date;
+ while (isspace(*s))
+ s++;
+
+ // check for ISO8601 format
+ month = 0;
+ t = s;
+ while (isdigit(*t))
+ t++;
+ if (t > s && *t == '-' && isdigit(t[1]))
+ day = -1;
+ else {
+ // not ISO8601, so try RFC850 or RFC1123
+ // get day...
+ if (!isdigit(*s))
+ return 0;
+ day = 0;
+ while (isdigit(*s))
+ day = day * 10 + (*s++ - '0');
+ if (day > 31)
+ return 0;
+ while (*s == '-' || isspace(*s))
+ s++;
+
+ // get month...
+ // (it's ugly, but it works)
+ switch (*s++) {
+ case 'J': case 'j':
+ switch (*s++) {
+ case 'A': case 'a':
+ month = 1;
+ s++;
+ break;
+ case 'U': case 'u':
+ switch (*s++) {
+ case 'N': case 'n':
+ month = 6;
+ break;
+ case 'L': case 'l':
+ month = 7;
+ break;
+ default:
+ return 0;
+ }
+ break;
+ default:
+ return 0;
+ }
+ break;
+ case 'F': case 'f':
+ month = 2;
+ s += 2;
+ break;
+ case 'M': case 'm':
+ switch (*s++) {
+ case 'A': case 'a':
+ switch (*s++) {
+ case 'R': case 'r':
+ month = 3;
+ break;
+ case 'Y': case 'y':
+ month = 5;
+ break;
+ default:
+ return 0;
+ }
+ break;
+ default:
+ return 0;
+ }
+ break;
+ case 'A': case 'a':
+ switch (*s++) {
+ case 'P': case 'p':
+ month = 4;
+ s++;
+ break;
+ case 'U': case 'u':
+ month = 8;
+ s++;
+ break;
+ default:
+ return 0;
+ }
+ break;
+ case 'S': case 's':
+ month = 9;
+ s += 2;
+ break;
+ case 'O': case 'o':
+ month = 10;
+ s += 2;
+ break;
+ case 'N': case 'n':
+ month = 11;
+ s += 2;
+ break;
+ case 'D': case 'd':
+ month = 12;
+ s += 2;
+ break;
+ default:
+ return 0;
+ }
+ while (*s == '-' || isspace(*s))
+ s++;
+ }
+
+ // get year...
+ if (!isdigit(*s))
+ return 0;
+ year = 0;
+ while (isdigit(*s))
+ year = year * 10 + (*s++ - '0');
+ if (year < 69)
+ year += 2000;
+ else if (year < 1900)
+ year += 1900;
+ else if (year >= 19100) // seen some programs do it, why not check?
+ year -= (19100-2000);
+ while (*s == '-' || isspace(*s))
+ s++;
+
+ if (day < 0) { // still don't have day, so it's ISO8601 format
+ // get month...
+ if (!isdigit(*s))
+ return 0;
+ month = 0;
+ while (isdigit(*s))
+ month = month * 10 + (*s++ - '0');
+ if (month < 1 || month > 12)
+ return 0;
+ while (*s == '-' || isspace(*s))
+ s++;
+
+ // get day...
+ if (!isdigit(*s))
+ return 0;
+ day = 0;
+ while (isdigit(*s))
+ day = day * 10 + (*s++ - '0');
+ if (day < 1 || day > 31)
+ return 0;
+ while (*s == '-' || isspace(*s))
+ s++;
+ }
+
+ // optionally get hour...
+ hour = 0;
+ while (isdigit(*s))
+ hour = hour * 10 + (*s++ - '0');
+ if (hour > 23)
+ return 0;
+ while (*s == ':' || isspace(*s))
+ s++;
+
+ // optionally get minute...
+ minute = 0;
+ while (isdigit(*s))
+ minute = minute * 10 + (*s++ - '0');
+ if (minute > 59)
+ return 0;
+ while (*s == ':' || isspace(*s))
+ s++;
+
+ // optionally get second...
+ second = 0;
+ while (isdigit(*s))
+ second = second * 10 + (*s++ - '0');
+ if (second > 59)
+ return 0;
+ while (*s == ':' || isspace(*s))
+ s++;
+
+ // Assign the new value to time_t field
+ //
+ // Calculate date as seconds since 01 Jan 1970 00:00:00 GMT
+ // This is based somewhat on the date calculation code in NetBSD's
+ // cd9660_node.c code, for which I was unable to find a reference.
+ // It works, though!
+ //
+ Ht_t = (time_t) (((((367L*year - 7L*(year+(month+9)/12)/4
+ - 3L*(((year)+((month)+9)/12-1)/100+1)/4
+ + 275L*(month)/9 + day) -
+ (367L*EPOCH - 7L*(EPOCH+(1+9)/12)/4
+ - 3L*((EPOCH+(1+9)/12-1)/100+1)/4
+ + 275L*1/9 + 1))
+ * 24 + hour) * 60 + minute) * 60 + second);
+
+ // cerr << "Date string '" << date << "' converted to time_t "
+ // << (int)Ht_t << ", used " << (s-date) << " characters\n";
+
+ return s-date;
+}
+
+///////
+ // Personalized format such as C strftime function
+ // Overloaded version 1
+ // It ignores, for now, Time Zone values
+///////
+
+char *HtDateTime::SetFTime(const char *buf, const char *format)
+{
+
+ register char *p;
+ register int r;
+
+ ToGMTime(); // This must be set cos strptime always stores in GM
+
+ p = (char *) buf;
+ if (*format == '%') // skip any unexpected white space
+ while (isspace(*p))
+ p++;
+
+ // Special handling for LOOSE/SHORT formats...
+ if ((strcmp((char *) format, LOOSE_RFC850_FORMAT) == 0 ||
+ strcmp((char *) format, LOOSE_RFC1123_FORMAT) == 0 ||
+ strcmp((char *) format, ISO8601_SHORT_FORMAT) == 0) &&
+ (r = Parse(p)) > 0)
+ return p+r;
+
+ p = (char *) strptime (p, (char *) format, & Ht_tm);
+
+#ifdef TEST_HTDATETIME
+// ViewStructTM(& Ht_tm);
+#endif
+
+ // Assign the new value to time_t value
+ SetDateTime(Ht_tm);
+
+ return p;
+
+}
+
+
+///////
+ // C asctime() standard format
+///////
+
+void HtDateTime::SetAscTime(char *s)
+{
+
+ // Unfortunately, I cannot think of an easy test to
+ // see if we have a weekday *FIX*
+ SetFTime(s, ASCTIME_FORMAT);
+
+}
+
+///////
+ // RFC1123 standard Date format
+ // Sun, 06 Nov 1994 08:49:37 GMT
+///////
+
+void HtDateTime::SetRFC1123(char *s)
+{
+
+ // abbreviated weekday name;
+ // day of the month;
+ // abbreviated month name;
+ // year as ccyy;
+ // hour ( 00 - 23);
+ // minute ( 00 - 59);
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ // First, if we have it, strip off the weekday
+ char *stripped;
+ stripped = strchr(s, ',');
+ if (stripped)
+ stripped++;
+ else
+ stripped = s;
+
+ SetFTime(stripped, LOOSE_RFC1123_FORMAT);
+
+}
+
+
+///////
+ // RFC850 standard Date format
+ // Sunday, 06-Nov-1994 08:49:37 GMT
+///////
+
+void HtDateTime::SetRFC850(char *s)
+{
+
+ // weekday name;
+ // day of the month;
+ // abbreviated month name;
+ // year within century;
+ // hour ( 00 - 23);
+ // minute ( 00 - 59);
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ // First, if we have it, strip off the weekday
+ char *stripped;
+ stripped = strchr(s, ',');
+ if (stripped)
+ stripped++;
+ else
+ stripped = s;
+
+ SetFTime(stripped, LOOSE_RFC850_FORMAT);
+
+}
+
+
+///////
+ // ISO8601 standard Date format
+ // 1994-11-06 08:49:37 GMT
+///////
+
+void HtDateTime::SetISO8601(char *s)
+{
+
+ // year as ccyy;
+ // month ( 01 - 12)
+ // day of the month
+ // hour ( 00 - 23)
+ // minute ( 00 - 59)
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ SetFTime(s, ISO8601_FORMAT);
+
+}
+
+
+///////
+ // Timestamp Date format (MySQL) without timezone
+ // 19941106084937
+///////
+
+void HtDateTime::SetTimeStamp(char *s)
+{
+
+ // year as ccyy;
+ // month ( 01 - 12)
+ // day of the month
+ // hour ( 00 - 23)
+ // minute ( 00 - 59)
+ // seconds ( 00 - 59);
+
+ SetFTime(s, TIMESTAMP_FORMAT);
+
+}
+
+
+///////
+ // Default date and time format for the locale
+///////
+
+void HtDateTime::SetDateTimeDefault(char *s)
+{
+
+ SetFTime(s, "%c");
+
+}
+
+
+
+
+/////// // Output Formats // ///////
+
+
+///////
+ // Personalized format such as C strftime function
+ // Overloaded version 1
+///////
+
+size_t HtDateTime::GetFTime(char *s, size_t max, const char *format) const
+{
+ // Refresh static struct tm variable
+
+ RefreshStructTM();
+
+ return strftime(s, max, format, & Ht_tm);
+
+}
+
+///////
+ // Personalized format such as C strftime function
+ // Overloaded version 2 - The best to be used outside
+ // for temporary uses
+///////
+
+char *HtDateTime::GetFTime(const char *format) const
+{
+
+ // Invoke GetFTime overloaded method
+
+ if(GetFTime(my_strtime, MAXSTRTIME, format))
+ return (char *)my_strtime;
+ else return 0;
+
+}
+
+///////
+ // RFC1123 standard Date format
+ // Sun, 06 Nov 1994 08:49:37 GMT
+///////
+
+char *HtDateTime::GetRFC1123() const
+{
+
+ // abbreviated weekday name;
+ // day of the month;
+ // abbreviated month name;
+ // year as ccyy;
+ // hour ( 00 - 23);
+ // minute ( 00 - 59);
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ GetFTime(my_strtime, MAXSTRTIME, RFC1123_FORMAT);
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // RFC850 standard Date format
+ // Sunday, 06-Nov-94 08:49:37 GMT
+///////
+
+char *HtDateTime::GetRFC850() const
+{
+
+ // full weekday name
+ // day of the month
+ // abbreviated month name
+ // year within century ( 00 - 99 )
+ // hour ( 00 - 23)
+ // minute ( 00 - 59)
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ GetFTime(my_strtime, MAXSTRTIME, RFC850_FORMAT);
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // C asctime() standard format
+///////
+
+char *HtDateTime::GetAscTime() const
+{
+
+ GetFTime(my_strtime, MAXSTRTIME, ASCTIME_FORMAT);
+ return (char *)my_strtime;
+
+}
+
+///////
+ // ISO8601 standard Date format
+ // 1994-11-06 08:49:37 GMT
+///////
+
+char *HtDateTime::GetISO8601() const
+{
+
+ // year as ccyy;
+ // month ( 01 - 12)
+ // day of the month
+ // hour ( 00 - 23)
+ // minute ( 00 - 59)
+ // seconds ( 00 - 59);
+ // time zone name;
+
+ GetFTime(my_strtime, MAXSTRTIME, ISO8601_FORMAT);
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // ISO8601 standard Date format
+ // 1994-11-06 08:49:37 GMT
+///////
+
+char *HtDateTime::GetShortISO8601() const
+{
+
+ // year as ccyy;
+ // month ( 01 - 12)
+ // day of the month
+
+ GetFTime(my_strtime, MAXSTRTIME, ISO8601_SHORT_FORMAT);
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // Timestamp Date format (MySQL) without timezone
+ // 19941106084937
+///////
+
+char *HtDateTime::GetTimeStamp() const
+{
+
+ // year as ccyy;
+ // month ( 01 - 12)
+ // day of the month
+ // hour ( 00 - 23)
+ // minute ( 00 - 59)
+ // seconds ( 00 - 59);
+
+ GetFTime(my_strtime, MAXSTRTIME, TIMESTAMP_FORMAT);
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // Default date and time format for the locale
+///////
+
+char *HtDateTime::GetDateTimeDefault() const
+{
+
+ GetFTime(my_strtime, MAXSTRTIME, "%c");
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // Default date format for the locale
+///////
+
+char *HtDateTime::GetDateDefault() const
+{
+
+ GetFTime(my_strtime, MAXSTRTIME, "%x");
+
+ return (char *)my_strtime;
+
+}
+
+///////
+ // Default time format for the locale
+///////
+
+char *HtDateTime::GetTimeDefault() const
+{
+
+ GetFTime(my_strtime, MAXSTRTIME, "%X");
+
+ return (char *)my_strtime;
+
+}
+
+
+
+///////
+ // Set the static struct tm depending on localtime status
+///////
+
+
+void HtDateTime::RefreshStructTM() const
+{
+
+ if(local_time)
+ // Setting localtime
+ memcpy(& Ht_tm, localtime(&Ht_t), sizeof(struct tm));
+ else
+ // Setting UTC or GM time
+ memcpy(& Ht_tm , gmtime(&Ht_t), sizeof(struct tm));
+
+}
+
+
+// Set the date time from a struct tm pointer
+
+void HtDateTime::SetDateTime(struct tm *ptm)
+{
+
+ if(local_time)
+ Ht_t = mktime(ptm); // Invoke mktime
+ else
+ Ht_t = HtTimeGM(ptm); // Invoke timegm alike function
+
+}
+
+
+// Set time to now
+
+void HtDateTime::SettoNow()
+{
+ Ht_t = time(0);
+}
+
+
+// Sets date by passing specific values
+// The values are reffered to the GM date time
+// Return false if failed
+
+bool HtDateTime::SetGMDateTime ( int year, int mon, int mday,
+ int hour, int min, int sec)
+{
+ struct tm tm_tmp;
+
+ // Year
+
+ if ( ! isAValidYear (year) ) return false;
+
+ if( year < 100)
+ year=Year_From2To4digits (year); // For further checks it's converted
+
+ // Assigning the year
+
+ tm_tmp.tm_year=year-1900;
+
+
+ // Month
+
+ if( ! isAValidMonth(mon) ) return false;
+
+ tm_tmp.tm_mon=mon-1; // Assigning the month to the structure
+
+
+ // Day
+
+ if ( ! isAValidDay ( mday, mon, year ) ) return false;
+
+ tm_tmp.tm_mday=mday; // Assigning the day of the month
+
+
+
+ if(hour >= 0 && hour < 24) tm_tmp.tm_hour = hour;
+ else return false;
+
+ if(min >= 0 && min < 60) tm_tmp.tm_min = min;
+ else return false;
+
+ if(sec >= 0 && sec < 60) tm_tmp.tm_sec = sec;
+ else return false;
+
+ tm_tmp.tm_yday = 0; // day of the year (to be ignored)
+ tm_tmp.tm_isdst = 0; // default for GM (to be ignored)
+
+ // Now we are going to insert the new values as time_t value
+ // This can only be done using GM Time and so ...
+
+ if (isLocalTime())
+ {
+ ToGMTime(); // Change to GM Time
+ SetDateTime(&tm_tmp); // commit it
+ ToLocalTime(); // And then return to Local Time
+ }
+ else SetDateTime(&tm_tmp); // only commit it
+
+ return true;
+
+}
+
+
+///////
+ // Gets a struct tm from the value stored in the object
+ // It's a protected method. Not visible outside the class
+///////
+
+struct tm &HtDateTime::GetStructTM() const
+{
+ RefreshStructTM(); // refresh it
+
+ return Ht_tm;
+}
+
+
+struct tm &HtDateTime::GetGMStructTM() const
+{
+ GetGMStructTM (Ht_tm);
+ return Ht_tm;
+}
+
+void HtDateTime::GetGMStructTM(struct tm & t) const
+{
+ // Directly gets gmtime value
+ memcpy(& t , gmtime(& Ht_t), sizeof(struct tm));
+}
+
+
+///////
+ // Is a leap year?
+///////
+
+bool HtDateTime::LeapYear (int y)
+{
+
+ if(y % 400 == 0 || ( y % 100 != 0 && y % 4 == 0))
+ return true; // a leap year
+ else
+ return false; // and not
+}
+
+
+///////
+ // Is a valid year number?
+///////
+
+bool HtDateTime::isAValidYear (int y)
+{
+
+ if(y >= 1970 && y < 2069) return true; // simple check and most likely
+
+ if(y >= 0 && y < 100) return true; // 2 digits year number
+
+ return false;
+
+}
+
+
+///////
+ // Is a valid month number?
+///////
+
+bool HtDateTime::isAValidMonth (int m)
+{
+
+ if( m >= 1 && m <= 12) return true;
+ else return false;
+
+}
+
+
+///////
+ // Is a valid day?
+///////
+
+bool HtDateTime::isAValidDay (int d, int m, int y)
+{
+
+ if ( ! isAValidYear (y) ) return false; // Checks for the year
+
+ if ( ! isAValidMonth (m) ) return false; // Checks for the month
+
+ if(m == 2)
+ {
+
+ // Expands the 2 digits year number
+ if ( y < 100 ) y=Year_From2To4digits(y);
+
+ if ( LeapYear (y) ) // Checks for the leap year
+ {
+ if (d >= 1 && d <= 29) return true;
+ else return false;
+ }
+ }
+
+ // Acts as default
+
+ if (d >= 1 && d <= days [m -1]) return true;
+ else return false;
+
+}
+
+
+///////
+ // Comparison methods
+///////
+
+
+int HtDateTime::DateTimeCompare (const HtDateTime & right) const
+{
+ int result;
+
+ // Let's compare the date
+
+ result=DateCompare(right);
+
+ if(result) return result;
+
+ // Same date. Let's compare the time
+
+ result=TimeCompare(right);
+
+ return result; // Nothing more to check
+
+}
+
+
+int HtDateTime::GMDateTimeCompare (const HtDateTime & right) const
+{
+ // We must compare the whole time_t value
+
+ if ( * this > right) return 1; // 1st greater than 2nd
+ if ( * this < right) return 1; // 1st lower than 2nd
+
+ return 0;
+
+}
+
+
+int HtDateTime::DateCompare (const HtDateTime & right) const
+{
+
+ // We must transform them in 2 struct tm variables
+
+ struct tm tm1, tm2;
+
+ this->GetGMStructTM (tm1);
+ right.GetGMStructTM (tm2);
+
+ // Let's compare them
+ return DateCompare (&tm1, &tm2);
+
+}
+
+
+int HtDateTime::GMDateCompare (const HtDateTime & right) const
+{
+
+ // We must transform them in 2 struct tm variables
+ // both referred to GM time
+
+ struct tm tm1, tm2;
+
+ this->GetGMStructTM (tm1);
+ right.GetGMStructTM (tm2);
+
+ // Let's compare them
+ return DateCompare (&tm1, &tm2);
+
+}
+
+
+int HtDateTime::TimeCompare (const HtDateTime & right) const
+{
+
+ // We must transform them in 2 struct tm variables
+
+ struct tm tm1, tm2;
+
+ this->GetStructTM (tm1);
+ right.GetStructTM (tm2);
+
+ return TimeCompare (&tm1, &tm2);
+
+}
+
+
+int HtDateTime::GMTimeCompare (const HtDateTime & right) const
+{
+
+ // We must transform them in 2 struct tm variables
+
+ struct tm tm1, tm2;
+
+ // We take the GM value of the time
+ this->GetGMStructTM (tm1);
+ right.GetGMStructTM (tm2);
+
+ return TimeCompare (&tm1, &tm2);
+
+}
+
+
+
+///////
+ // Static methods of comparison between 2 struct tm pointers
+///////
+
+
+///////
+ // Compares only the date (ignoring the time)
+///////
+
+int HtDateTime::DateCompare(const struct tm *tm1, const struct tm *tm2)
+{
+
+ // Let's check the year
+
+ if (tm1->tm_year < tm2->tm_year) return -1;
+ if (tm1->tm_year > tm2->tm_year) return 1;
+
+ // Same year. Let's check the month
+ if (tm1->tm_mon < tm2->tm_mon) return -1;
+ if (tm1->tm_mon > tm2->tm_mon) return 1;
+
+ // Same month. Let's check the day of the month
+
+ if (tm1->tm_mday < tm2->tm_mday) return -1;
+ if (tm1->tm_mday > tm2->tm_mday) return 1;
+
+ // They are equal for the date
+ return 0;
+}
+
+
+///////
+ // Compares only the time (ignoring the date)
+///////
+
+int HtDateTime::TimeCompare(const struct tm *tm1, const struct tm *tm2)
+{
+
+ // Let's check the hour
+
+ if (tm1->tm_hour < tm2->tm_hour) return -1;
+ if (tm1->tm_hour > tm2->tm_hour) return 1;
+
+ // Same hour . Let's check the minutes
+
+ if (tm1->tm_min < tm2->tm_min) return -1;
+ if (tm1->tm_min > tm2->tm_min) return 1;
+
+ // Ooops !!! Same minute. Let's check the seconds
+
+ if (tm1->tm_sec < tm2->tm_sec) return -1;
+ if (tm1->tm_sec > tm2->tm_sec) return 1;
+
+ // They are equal for the time
+ return 0;
+}
+
+
+///////
+ // Compares both date and time
+///////
+
+int HtDateTime::DateTimeCompare(const struct tm *tm1, const struct tm *tm2)
+{
+
+ int compare_date = DateCompare(tm1, tm2);
+
+ if(compare_date) return compare_date; // Different days
+
+ // We are in the same day. Let's check the time
+
+ int compare_time = TimeCompare(tm1, tm2);
+ if(compare_time) return compare_time; // Different time
+
+ // Equal
+ return 0;
+}
+
+time_t HtDateTime::HtTimeGM (struct tm *tm)
+{
+
+#if HAVE_TIMEGM
+ return timegm (tm);
+#else
+ return Httimegm (tm); // timegm replacement in timegm.c
+ // static time_t gmtime_offset;
+ // tm->tm_isdst = 0;
+ // return __mktime_internal (tm, gmtime, &gmtime_offset);
+#endif
+
+}
+
+
+
+// Returns the difference in seconds between two HtDateTime Objects
+
+int HtDateTime::GetDiff(const HtDateTime &d1, const HtDateTime &d2)
+{
+
+ return (int) ( d1.Ht_t - d2.Ht_t );
+
+}
+
+
+
+
+
+
+///////
+ // Only for test and debug
+///////
+
+#ifdef TEST_HTDATETIME
+
+
+///////
+ // View of struct tm fields
+///////
+
+void HtDateTime::ViewStructTM()
+{
+ // Default viewing: refresh depending on time_t value
+
+ RefreshStructTM(); // Refresh static variable
+
+ ViewStructTM(&Ht_tm);
+}
+
+void HtDateTime::ViewStructTM(struct tm *ptm)
+{
+
+ cout << "Struct TM fields" << endl;
+ cout << "================" << endl;
+ cout << "tm_sec :\t" << ptm->tm_sec << endl;
+ cout << "tm_min :\t" << ptm->tm_min << endl;
+ cout << "tm_hour :\t" << ptm->tm_hour << endl;
+ cout << "tm_mday :\t" << ptm->tm_mday << endl;
+ cout << "tm_mon :\t" << ptm->tm_mon << endl;
+ cout << "tm_year :\t" << ptm->tm_year << endl;
+ cout << "tm_wday :\t" << ptm->tm_wday << endl;
+ cout << "tm_yday :\t" << ptm->tm_yday << endl;
+ cout << "tm_isdst :\t" << ptm->tm_isdst<< endl;
+
+}
+
+
+
+
+int HtDateTime::Test(void)
+{
+
+ int ok=1;
+
+ const char *test_dates[] =
+ {
+ "1970.01.01 00:00:00",
+ "1970.01.01 00:00:01",
+ "1972.02.05 23:59:59",
+ "1972.02.28 00:59:59",
+ "1972.02.28 23:59:59",
+ "1972.02.29 00:00:00",
+ "1972.03.01 13:00:04",
+ "1973.03.01 12:00:00",
+ "1980.01.01 00:00:05",
+ "1984.12.31 23:00:00",
+ "1997.06.05 17:55:35",
+ "1999.12.31 23:00:00",
+ "2000.01.01 00:00:05",
+ "2000.02.28 23:00:05",
+ "2000.02.29 23:00:05",
+ "2000.03.01 00:00:05",
+ "2007.06.05 17:55:35",
+ "2038.01.19 03:14:07",
+ 0
+ };
+
+ const char *test_dates_ISO8601[] =
+ {
+ "1970-01-01 00:00:00 GMT",
+ "1970-01-01 00:00:00 CET",
+ "1990-02-27 23:30:20 GMT",
+ "1999-02-28 06:53:40 GMT",
+ "1975-04-27 06:53:40 CET",
+ 0
+ };
+
+ const char *test_dates_RFC1123[] =
+ {
+ "Sun, 06 Nov 1994 08:49:37 GMT",
+ "Sun, 25 Apr 1999 17:49:37 GMT",
+ "Sun, 25 Apr 1999 17:49:37 CET",
+ 0
+ };
+
+ const char *test_dates_RFC850[] =
+ {
+ "Sunday, 06-Nov-94 08:49:37 GMT",
+ "Sunday, 25-Apr-99 17:49:37 GMT",
+ "Sunday, 25-Apr-99 17:49:37 CET",
+ 0
+ };
+
+
+ const char myformat[]="%Y.%m.%d %H:%M:%S";
+
+ // Tests a personal format
+
+ cout << endl << "Beginning Test of a personal format such as "
+ << myformat << endl << endl;
+
+ if (Test((char **)test_dates, (const char *)myformat))
+ cout << "Test OK." << endl;
+ else
+ {
+ cout << "Test Failed." << endl;
+ ok=0;
+ }
+
+
+ // Tests ISO 8601 Format
+
+ cout << endl << "Beginning Test of ISO 8601 format" << endl << endl;
+
+ if(Test((char **)test_dates_ISO8601, (const char *)ISO8601_FORMAT))
+ cout << "Test OK." << endl;
+ else
+ {
+ cout << "Test Failed." << endl;
+ ok=0;
+ }
+
+
+ // Tests RFC 1123 Format
+
+ cout << endl << "Beginning Test of RFC 1123 format" << endl << endl;
+
+ if (Test((char **)test_dates_RFC1123, (const char *)RFC1123_FORMAT))
+ cout << "Test OK." << endl;
+ else
+ {
+ cout << "Test Failed." << endl;
+ ok=0;
+ }
+
+
+ // Tests RFC 850 Format
+
+ cout << endl << "Beginning Test of RFC 850 format" << endl << endl;
+
+ if (Test((char **)test_dates_RFC850, (const char *)RFC850_FORMAT))
+ cout << "Test OK." << endl;
+ else
+ {
+ cout << "Test Failed." << endl;
+ ok=0;
+ }
+
+
+ return(ok ? 1 : 0);
+
+}
+
+
+
+int HtDateTime::Test(char **test_dates, const char *format)
+{
+ int i, ok = 1;
+ HtDateTime orig, conv;
+
+ for (i = 0; (test_dates[i]); i++)
+ {
+
+ cout << "\t " << i+1 << "\tDate string parsing of:" << endl;
+ cout << "\t\t" << test_dates[i] << endl;
+ cout << "\t\tusing format: " << format << endl << endl;
+
+ orig.SetFTime(test_dates[i], format);
+
+ orig.ComparisonTest(conv);
+
+ conv=orig;
+
+ if (orig != conv)
+ {
+ cout << "HtDateTime test failed!" << endl;
+ cout << "\t Original : " << orig.GetRFC1123() << endl;
+ cout << "\t Converted: " << orig.GetRFC1123() << endl;
+ ok = 0;
+ }
+ else
+ {
+ orig.ToLocalTime();
+ cout << endl << "\t Localtime viewing" << endl;
+ orig.ViewFormats();
+ orig.ToGMTime();
+ cout << endl << "\t GMtime viewing" << endl;
+ orig.ViewFormats();
+ //orig.ViewStructTM();
+ }
+
+ cout << endl;
+
+ }
+
+ return ok;
+}
+
+
+void HtDateTime::ComparisonTest (const HtDateTime &right) const
+{
+ int result;
+
+
+ cout << "Comparison between:" << endl;
+
+ cout << " 1. " << this->GetRFC1123() << endl;
+ cout << " 2. " << right.GetRFC1123() << endl;
+ cout << endl;
+
+
+///////
+ // Complete comparison
+///////
+
+ cout << "\tComplete comparison (date and time)" << endl;
+ result = this->DateTimeCompare (right);
+
+ cout << "\t\t " << this->GetDateTimeDefault();
+
+ if (result > 0 )
+ cout << " is greater than ";
+ else if (result < 0 )
+ cout << " is lower than ";
+ else cout << " is equal to ";
+
+ cout << " " << right.GetDateTimeDefault() << endl;
+
+
+
+///////
+ // Date comparison
+///////
+
+ cout << "\tDate comparison (ignoring time)" << endl;
+ result = this->DateCompare (right);
+
+ cout << "\t\t " << this->GetDateDefault();
+
+ if (result > 0 )
+ cout << " is greater than ";
+ else if (result < 0 )
+ cout << " is lower than ";
+ else cout << " is equal to ";
+
+ cout << " " << right.GetDateDefault() << endl;
+
+
+///////
+ // Date comparison (after GM time conversion)
+///////
+
+ cout << "\tDate comparison (ignoring time) - GM time conversion" << endl;
+ result = this->GMDateCompare (right);
+
+ cout << "\t\t " << this->GetDateDefault();
+
+ if (result > 0 )
+ cout << " is greater than ";
+ else if (result < 0 )
+ cout << " is lower than ";
+ else cout << " is equal to ";
+
+ cout << " " << right.GetDateDefault() << endl;
+
+
+
+///////
+ // Time comparison
+///////
+
+ cout << "\tTime comparison (ignoring date)" << endl;
+ result = this->TimeCompare (right);
+
+ cout << "\t\t " << this->GetTimeDefault();
+
+ if (result > 0 )
+ cout << " is greater than ";
+ else if (result < 0 )
+ cout << " is lower than ";
+ else cout << " is equal to ";
+
+ cout << " " << right.GetTimeDefault() << endl;
+
+
+///////
+ // Time comparison (after GM time conversion)
+///////
+
+ cout << "\tTime comparison (ignoring date) - GM time conversion" << endl;
+ result = this->GMTimeCompare (right);
+
+ cout << "\t\t " << this->GetTimeDefault();
+
+ if (result > 0 )
+ cout << " is greater than ";
+ else if (result < 0 )
+ cout << " is lower than ";
+ else cout << " is equal to ";
+
+ cout << " " << right.GetTimeDefault() << endl;
+
+}
+
+
+
+void HtDateTime::ViewFormats()
+{
+
+ cout << "\t\t RFC 1123 Format : " << GetRFC1123() << endl;
+ cout << "\t\t RFC 850 Format : " << GetRFC850() << endl;
+ cout << "\t\t C Asctime Format: " << GetAscTime() << endl;
+ cout << "\t\t ISO 8601 Format : " << GetISO8601() << endl;
+
+}
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.h b/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.h
new file mode 100644
index 00000000..c212b49a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtDateTime.h
@@ -0,0 +1,533 @@
+//
+// HtDateTime.h
+//
+// HtDateTime: Parse, split, compare and format dates and times.
+// Uses locale.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtDateTime.h,v 1.19 2004/05/28 13:15:20 lha Exp $
+
+///////
+ // Class for Date and Time
+ // Gabriele Bartolini - Prato - Italia
+ // Started: 22.04.1999
+///////
+
+// Version: 1.0
+// Release date: 07.05.1999
+
+//
+// General purpose of HtDateTime
+// The general purpose of this class, is to provide an interface for
+// date and time managing, and to unload the programmer to manage
+// time_t, struct tm, time system functions and other related procedures
+// locally ... Everything regarding time and date must be put here.
+// D'you agree with me? Well, here is a summary of the class capabilities.
+
+// Attributes of the class:
+//
+// HtDateTime class has only 2 member attributes
+// - time_t Ht_t
+// - bool local_time
+//
+// Obviously Ht_t contains the most important piece of information.
+// local_time assumes a true value if we wanna consider the date and
+// time information as local. False means that our object value is
+// referred to the Greenwich Meridian time.
+
+// Interface provided:
+//
+// Construction:
+// - Default: set the date time value to now
+// - By passing a time_t value or pointer: Set to it
+// - By passing a struct tm value or pointer: Set to it
+// The last one could be useful sometimes. But it had better not to
+// be used.
+//
+// Parsing interface:
+// Not yet implemented ... :-)
+//
+// Setting Interface:
+// - from time_t: copy the time_t value into the object
+// - from struct tm: set the object's time_t value by converting
+// the value from the struct tm. If local_time is set to true,
+// converts it with mktime, else uses HtTimeGM.
+// - set to now
+// - from a string, by passing the input format: the method uses
+// strptime syntax (and invokes Htstrptime). For now, timezone
+// is ignored, and so data are stored as a GM date time value.
+// - from an int series, by specifying all the information (year,
+// month, day, hour, minute and second). It's all stored as
+// GM value.
+// - from various standard formats, such as C asctime, RFC 1123,
+// RFC 850 (these 3 types are suggested by the HTTP/1.1 standard),
+// ISO 8601, date and time default representation for the locale.
+// This list could get longer ... It all depends on us.
+// - setting the date and time to be represented in a local value
+// or universal (GM) one.
+//
+// Getting Interface
+// - in a personalized output format, by passing a string with
+// strftime values.
+// - in various standard formats, like C asctime, RFC 1123,
+// RFC 850, ISO 8601 (short too), date and time default
+// representation for the locale.
+// - getting the time_t value
+// - queries the local time status
+// - getting specific piece of information of both the date and the
+// the time, like the year, the month, the day of the week, of
+// the year or of the month, ... In short, every kind of thing
+// a tm structure is able to store ...
+//
+// Operator overloading
+// - Copy
+// - Every kind of logical comparison between 2 objects
+//
+// Comparison interface
+// This is divided in 2 sections.
+// - Static section:
+// comparison are made on a 2 struct tm values basis.
+// It's possible to compare the whole date time value, or
+// simply the date or the time value.
+// - Member functions section:
+// comparison are made between 2 HtDateTime objects.
+// You can compare either the whole date time, or the date, or the
+// time, both as they are or referring their values to the GM value.
+//
+// System functions interface
+// They are all done with previous "configure" checks
+// - for strptime
+// - for timegm
+//
+// Static methods
+// - check for a leap year
+// - check for a valid year number (according with time_t capabilities)
+// - check for a valid month number
+// - check for a valid day
+// - converts a 2 digits year number into a 4 digits one: from 1970 to 2069.
+// - converts a 4 digits year number into a 2 digits one.
+// - retrieve the difference in seconds between 2 HtDateTime objs
+//
+// Test Interface (only by defining TEST_HTDATETIME directive).
+//
+
+
+#ifndef _HTDATETIME_H
+#define _HTDATETIME_H
+
+#ifdef HAVE_CONFIG_H
+# include "htconfig.h"
+#endif
+
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#include "htString.h"
+
+// If you wanna do some tests
+#define TEST_HTDATETIME
+
+
+class HtDateTime
+{
+public:
+
+///////
+ // Construction
+///////
+
+ // Default: now and local
+ HtDateTime() {SettoNow(); ToLocalTime();}
+
+ // From an integer (seconds from epoc)
+// Causes ambiguity in systems with time_t an integer...
+// HtDateTime(const int i) {SetDateTime((time_t)i); ToLocalTime();}
+
+ // From a time_t value and pointer
+ HtDateTime(time_t &t) {SetDateTime(t); ToLocalTime();}
+ HtDateTime(time_t *t) {SetDateTime(t); ToLocalTime();}
+
+ // From a struct tm value and pointer
+ HtDateTime(struct tm &t) {SetDateTime(t); ToLocalTime();}
+ HtDateTime(struct tm *t) {SetDateTime(t); ToLocalTime();}
+
+ // Copy constructor
+ inline HtDateTime(const HtDateTime& rhs);
+
+///////
+ // Interface methods
+///////
+
+///////
+ // "Parsing" interface
+///////
+
+ int Parse(const char *); // It looks for the similar format
+ // then sets the date by invoking
+ // right method
+
+
+///////
+ // "Setting" interface
+///////
+
+ // Setting from a time_t value
+ void SetDateTime(const time_t &t) { Ht_t = t; } // by reference
+ void SetDateTime(const time_t *t) { Ht_t = *t; } // by pointer
+
+ // Set object time_t value from a struct tm
+ void SetDateTime(struct tm *); // by pointer
+ inline void SetDateTime(struct tm &t) { SetDateTime(&t);} // by reference
+
+ // Set GM Time from single values input
+ // Return true if it all went good, false else
+ bool SetGMDateTime( int year, int mon, int mday,
+ int hour=0, int min=0, int sec=0);
+
+ // Set to Now
+ void SettoNow();
+
+ // Parsing various input string format
+ // It ignores time Zone value - always stores as GM
+ char *SetFTime(const char *, const char *); // as strptime
+
+ void SetAscTime(char *); // Sun Nov 6 08:49:37 1994
+ void SetRFC1123(char *); // Sun, 06 Nov 1994 08:49:37 GMT
+ void SetRFC850(char *); // Sunday, 06-Nov-94 08:49:37 GMT
+ void SetISO8601(char *); // 1994-11-06 08:49:37 GMT
+ void SetTimeStamp(char *); // 19941106084937
+
+ void SetDateTimeDefault(char *); // Default date and time representation
+ // for the locale
+
+ ///////
+ // Methods for setting Local and GM time formats (Switches)
+ ///////
+
+ void ToLocalTime() {local_time=true;}
+ void ToGMTime() {local_time=false;}
+
+
+///////
+ // "Getting" interface
+///////
+
+
+ ///////
+ // Output formats
+ ///////
+
+ // Personalized output
+ char *GetFTime(const char *format) const; // as strftime
+ size_t GetFTime(char *, size_t, const char *) const; // as strftime
+
+ char *GetAscTime() const; // Sun Nov 6 08:49:37 1994
+ char *GetRFC1123() const; // Sun, 06 Nov 1994 08:49:37 GMT
+ char *GetRFC850() const; // Sunday, 06-Nov-94 08:49:37 GMT
+ char *GetISO8601() const; // 1994-11-06 08:49:37 GMT
+ char *GetTimeStamp() const; // 19941106084937
+
+ char *GetDateTimeDefault() const; // Default date and time representation
+ // for the locale
+
+ // Partial (only date or only time)
+ char *GetShortISO8601() const; // 1994-11-06
+ char *GetDateDefault() const; // Default date form for the locale
+ char *GetTimeDefault() const; // Default time form for the locale
+
+
+ ///////
+ // Gets the time_t value
+ ///////
+
+ time_t GetTime_t() const {return Ht_t;}
+
+
+ ///////
+ // Gets specific date and time values (from a struct tm)
+ ///////
+
+ // Gets the year
+ int GetYear() const { return ( GetStructTM().tm_year + 1900) ;}
+
+ // Gets the month
+ int GetMonth() const { return (GetStructTM().tm_mon + 1);}
+
+ // Gets the day of the week (since Sunday)
+ int GetWDay() const { return (GetStructTM().tm_wday + 1);}
+
+ // Gets the day of the month
+ int GetMDay() const { return GetStructTM().tm_mday;}
+
+ // Gets the day since january 1
+ int GetYDay() const { return (GetStructTM().tm_yday + 1);}
+
+ // Gets the hour
+ int GetHour() const { return GetStructTM().tm_hour;}
+
+ // Gets the minute
+ int GetMinute() const { return GetStructTM().tm_min;}
+
+ // Gets the second
+ int GetSecond() const { return GetStructTM().tm_sec;}
+
+ // Daylight saving time is in effect at that time?
+ int GetIsDst() const { return GetStructTM().tm_isdst;}
+
+
+ ///////
+ // Methods for querying localtime status
+ ///////
+
+ bool isLocalTime() const {return local_time;}
+ bool isGMTime() const {return !local_time;}
+
+
+
+///////
+ // Methods for comparison
+///////
+
+ // Returns 0 if equal, -1 if tm1 is lower than tm2, 1 if tm1 is greater than tm2
+
+ int DateTimeCompare (const HtDateTime &) const; // Compares both date and time
+
+ int DateCompare (const HtDateTime &) const; // Compares the date
+ int TimeCompare (const HtDateTime &) const; // Compares the time
+
+ // Refers the date and the time to a GM value, then compares
+ int GMDateTimeCompare (const HtDateTime &) const; // Compares both date and time
+ int GMDateCompare (const HtDateTime &) const; // Compares the date
+ int GMTimeCompare (const HtDateTime &) const; // Compares the time
+
+
+///////
+ // Operator overloading
+///////
+
+ // For comparisons - between objects of the same class
+
+ inline bool operator==(const HtDateTime &right) const;
+ inline bool operator<(const HtDateTime &right) const;
+
+ bool operator!=(const HtDateTime &right) const
+ {return !( *this == right );}
+
+ bool operator>=(const HtDateTime &right) const
+ {return !( *this < right);}
+
+ bool operator<=(const HtDateTime &right) const
+ {return !( right < *this);}
+
+ bool operator>(const HtDateTime &right) const
+ {return right < *this; }
+
+
+ // For comparisons - between HtDateTime objects and int
+
+ bool operator==(const int right) const // with an int
+ {return ( Ht_t == (time_t) right );}
+
+ bool operator<(const int right) const // with an int
+ {return ( Ht_t < (time_t) right );}
+
+ bool operator!=(const int right) const // with an int
+ {return !( *this == right );}
+
+ bool operator>=(const int right) const // with an int
+ {return !( *this < right);}
+
+ bool operator<=(const int right) const // with an int
+ {return !( *this > right);}
+
+ bool operator>(const int right) const // with an int
+ {return (Ht_t > (time_t) right); }
+
+
+ // For Copy
+
+ inline HtDateTime &operator=(const HtDateTime &right);
+ inline HtDateTime &operator=(const int right);
+
+
+
+
+/////// // STATIC METHODS // ///////
+
+ // Here we can add static methods as we want more :-)
+ // Then invoke them with HtDateTime::MethodXXX ()
+
+
+ inline static bool LeapYear(int); // Is a leap year?
+
+ // These checks are made for time_t compatibility
+ inline static bool isAValidYear(int); // Is a valid year number
+
+ inline static bool isAValidMonth(int); // Is a valid month number
+ inline static bool isAValidDay(int, int, int); // Is a valid day
+
+
+ // Converts a 2 digits year in a 4 one - with no checks
+ static int Year_From2To4digits (int y)
+ {
+ if ( y >= 70 ) return y+1900;
+ else return y+2000;
+ }
+
+ // Converts a 4 digits year in a 2 one - with no checks
+ static int Year_From4To2digits (int y)
+ {
+ if ( y >= 2000 ) return y - 2000;
+ else return y - 1900;
+ }
+
+ static int GetDiff(const HtDateTime &, const HtDateTime &);
+
+ // Check equality from 2 struct tm pointers
+ // Returns 0 if equal, -1 if tm1 is lower than tm2, 1 if tm1 is greater than tm2
+
+ // Compares the whole time information (both date and time)
+ static int DateTimeCompare(const struct tm *tm1,
+ const struct tm *tm2);
+
+ // Compares only date
+ static int DateCompare(const struct tm *tm1,
+ const struct tm *tm2);
+
+ // Compares only time
+ static int TimeCompare(const struct tm *tm1,
+ const struct tm *tm2);
+
+
+
+/////// // HIDDEN ATTRIBUTES & METHODS // ///////
+
+protected: // to permit inheritance
+
+ time_t Ht_t;
+ bool local_time;
+
+ static const int days[];
+
+///////
+ // Sets and gets the struct tm depending on local_time status
+///////
+
+ void RefreshStructTM() const; // Refresh its content
+ struct tm &GetStructTM() const; // gets it
+ void GetStructTM(struct tm & t) const { t=GetStructTM(); } // Gets and copy
+
+
+///////
+ // Gets the struct tm ignoring local_time status
+///////
+
+ struct tm &GetGMStructTM() const; // gets it
+ void GetGMStructTM(struct tm &) const; // Gets and copy
+
+
+///////
+ // Interface for system functions
+///////
+
+ // Interface for timegm
+ static time_t HtTimeGM (struct tm*);
+
+
+#ifdef TEST_HTDATETIME
+
+///////
+ // Only for debug: view of struct tm fields
+///////
+
+public:
+
+ static void ViewStructTM(struct tm *); // view of struct tm fields
+ virtual void ViewStructTM(); // view of struct tm fields
+ void ViewFormats(); // View of various formats
+
+ void ComparisonTest (const HtDateTime &) const; // comparison
+
+
+ // Test of the class
+ static int Test(void);
+ static int Test(char **test_dates, const char *format);
+
+#endif
+
+};
+
+
+///////
+ // Copy constructor
+///////
+
+inline
+HtDateTime::HtDateTime (const HtDateTime& rhs)
+{
+ // Copy the contents
+ Ht_t = rhs.Ht_t;
+ local_time = rhs.local_time;
+}
+
+///////
+ // Operator overloading
+///////
+
+inline
+bool HtDateTime::operator==(const HtDateTime &right) const
+{
+ if(Ht_t==right.Ht_t)
+ return true;
+ else
+ return false;
+}
+
+inline
+bool HtDateTime::operator<(const HtDateTime &right) const
+{
+ if(Ht_t < right.Ht_t) return true;
+ else return false;
+}
+
+///////
+ // Copy
+///////
+
+inline
+HtDateTime &HtDateTime::operator=(const HtDateTime &right)
+{
+ if (this != &right)
+ {
+ Ht_t=right.Ht_t; // Copy the time_t value
+ local_time=right.local_time; // Copy the local_time flag
+ }
+
+ return *this;
+}
+
+inline
+HtDateTime &HtDateTime::operator=(const int right)
+{
+ Ht_t=(time_t)right; // Copy the int as a time_t value
+ ToLocalTime();
+ return *this;
+}
+
+
+
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.cc
new file mode 100644
index 00000000..92267767
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.cc
@@ -0,0 +1,198 @@
+//
+// HtHeap.cc
+//
+// HtHeap: A Heap class which holds objects of type Object.
+// (A heap is a semi-ordered tree-like structure.
+// it ensures that the first item is *always* the largest.
+// NOTE: To use a heap, you must implement the Compare() function for
+// your Object classes. The assumption used here is -1 means
+// less-than, 0 means equal, and +1 means greater-than. Thus
+// this is a "min heap" for that definition.)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtHeap.cc,v 1.12 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtHeap.h"
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+//*********************************************************************
+// void HtHeap::HtHeap()
+// Default constructor
+//
+HtHeap::HtHeap()
+{
+ data = new HtVector;
+}
+
+
+//*********************************************************************
+// void HtHeap::HtHeap(HtVector vector)
+// Constructor from vector
+// (has the side effect of not allocating double memory)
+//
+HtHeap::HtHeap(HtVector vector)
+{
+ int size = vector.Count();
+ data = static_cast<HtVector*>(vector.Copy());
+
+ // Now we have to "heapify" -- start at the first interior node
+ // And push each node down into its subtree
+ // (This is O(n)!)
+ for (int i = parentOf(size); i >= 0; i--)
+ pushDownRoot(i);
+}
+
+
+//*********************************************************************
+// void HtHeap::~HtHeap()
+// Destructor
+//
+HtHeap::~HtHeap()
+{
+ Destroy();
+}
+
+
+//*********************************************************************
+// void HtHeap::Destroy()
+// Deletes all objects from the heap
+//
+void HtHeap::Destroy()
+{
+ data->Destroy();
+ delete data;
+}
+
+
+//*********************************************************************
+// void HtHeap::Add(Object *object)
+// Add an object to the heap.
+//
+void HtHeap::Add(Object *object)
+{
+ data->Add(object);
+ percolateUp(data->Count() - 1);
+}
+
+
+
+//*********************************************************************
+// Object *HtHeap::Remove()
+// Remove an object from the top of the heap
+// This requires re-heapifying by placing the last element on the top
+// and pushing it down.
+//
+Object *HtHeap::Remove()
+{
+ Object *min = Peek();
+
+ data->Assign(data->Last(), 0);
+ data->RemoveFrom(data->Count()-1);
+
+ if (data->Count() > 1)
+ pushDownRoot(0);
+
+ return min;
+}
+
+//*********************************************************************
+// HtHeap *HtHeap::Copy() const
+// Return a deep copy of the heap.
+//
+Object *HtHeap::Copy() const
+{
+ HtHeap *heap = new HtHeap(*data);
+
+ return heap;
+}
+
+
+//*********************************************************************
+// HtHeap &HtHeap::operator=(HtHeap &heap)
+// Return a deep copy of the heap.
+//
+HtHeap &HtHeap::operator=(HtHeap &heap)
+{
+ Destroy();
+ data = heap.data;
+ return *this;
+}
+
+//*********************************************************************
+// voide HtHeap::percolateUp(int leaf)
+// Pushes the node pointed to by leaf upwards
+// it will travel as far as possible upwards to ensure the data is a heap
+//
+void HtHeap:: percolateUp(int leaf)
+{
+ int parent = parentOf(leaf);
+ Object *value = data->Nth(leaf);
+ while (leaf > 0 &&
+ (value->compare(*(data->Nth(parent))) < 0))
+ {
+ data->Assign(data->Nth(parent), leaf);
+ leaf = parent;
+ parent = parentOf(leaf);
+ }
+ data->Assign(value, leaf);
+}
+
+//*********************************************************************
+// void HtHeap::pushDownRoot(int root)
+// Pushes the node pointed to by root into the heap
+// it will go down as far as necessary to ensure the data is a heap
+//
+void HtHeap::pushDownRoot(int root)
+{
+ int size = data->Count() - 1;
+ Object *value = data->Nth(root);
+ while (root < size)
+ {
+ int childPos = leftChildOf(root);
+ if (childPos < size)
+ {
+ if ( rightChildOf(root) < size &&
+ data->Nth(childPos + 1)->compare(*(data->Nth(childPos))) < 0 )
+ {
+ childPos++;
+ }
+ if ( data->Nth(childPos)->compare(*value) < 0 ) // -1, so smaller
+ {
+ // We have to swap this node with the root and then loop
+ data->Assign(data->Nth(childPos), root);
+ data->Assign(value, childPos);
+ root = childPos;
+ }
+ else
+ {
+ // Found the right position, so we're done
+ data->Assign(value, root);
+ return;
+ }
+ }
+ else // childPos >= heapSize
+ {
+ // At a leaf, so we're done
+ data->Assign(value, root);
+ return;
+ }
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.h b/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.h
new file mode 100644
index 00000000..75e3c411
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtHeap.h
@@ -0,0 +1,92 @@
+//
+// HtHeap.h
+//
+// HtHeap: A Heap class which holds objects of type Object.
+// (A heap is a semi-ordered tree-like structure.
+// it ensures that the first item is *always* the largest.
+// NOTE: To use a heap, you must implement the Compare() function for
+// your Object classes. The assumption used here is -1 means
+// less-than, 0 means equal, and +1 means greater-than. Thus
+// this is a "min heap" for that definition.)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtHeap.h,v 1.7 2004/05/28 13:15:20 lha Exp $
+//
+//
+#ifndef _HtHeap_h_
+#define _HtHeap_h_
+#include "Object.h"
+#include "HtVector.h"
+
+class HtHeap : public Object
+{
+public:
+ //
+ // Constructor/Destructor
+ //
+ HtHeap();
+ HtHeap(HtVector vector);
+ ~HtHeap();
+
+ //
+ // Add() will add an Object to the heap in the appropriate location
+ //
+ void Add(Object *);
+
+ //
+ // Destroy() will delete all the objects in the heap. This is
+ // equivalent to calling the destructor
+ //
+ void Destroy();
+
+ //
+ // Peek() will return a reference to the top object in the heap.
+ //
+ Object *Peek() {return data->Nth(0);}
+
+ //
+ // Remove() will return a reference as Peek() but will also
+ // remove the reference from the heap and re-heapify
+ //
+ Object *Remove();
+
+ //
+ // Access to the number of elements
+ //
+ int Count() {return data->Count();}
+ int IsEmpty() {return data->IsEmpty();}
+
+ //
+ // Deep copy member function
+ //
+ Object *Copy() const;
+
+ //
+ // Assignment
+ //
+ HtHeap &operator= (HtHeap *heap) {return *this = *heap;}
+ HtHeap &operator= (HtHeap &heap);
+
+protected:
+ // The vector class should keep track of everything for us
+ HtVector *data;
+
+ // Functions for establishing the relations between elements
+ int parentOf (int i)
+ { return (i - 1)/2; }
+ int leftChildOf (int i)
+ { return 2*i + 1; }
+ int rightChildOf (int i)
+ { return 2* (i+1); }
+
+ // Protected procedures for performing heap-making operations
+ void percolateUp (int leaf); // pushes the node up as far as possible
+ void pushDownRoot (int root); // pushes the node down as necessary
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.cc
new file mode 100644
index 00000000..59412b24
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.cc
@@ -0,0 +1,66 @@
+// HtMaxMin
+//
+// macros and tools for computing max and min of values
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtMaxMin.cc,v 1.5 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include"HtMaxMin.h"
+
+unsigned int
+HtMaxMin::max_v(unsigned int *vals,int n)
+{
+ unsigned int maxv=vals[0];
+ for(int i=1;i<n;i++)
+ {
+ unsigned int v=vals[i];
+ if(v>maxv){maxv=v;}
+ }
+ return(maxv);
+}
+
+unsigned short
+HtMaxMin::max_v(unsigned short *vals,int n)
+{
+ unsigned short maxv=vals[0];
+ for(int i=1;i<n;i++)
+ {
+ unsigned short v=vals[i];
+ if(v>maxv){maxv=v;}
+ }
+ return(maxv);
+}
+
+unsigned int
+HtMaxMin::min_v(unsigned int *vals,int n)
+{
+ unsigned int minv=vals[0];
+ for(int i=1;i<n;i++)
+ {
+ unsigned int v=vals[i];
+ if(v<minv){minv=v;}
+ }
+ return(minv);
+}
+
+unsigned short
+HtMaxMin::min_v(unsigned short *vals,int n)
+{
+ unsigned short minv=vals[0];
+ for(int i=1;i<n;i++)
+ {
+ unsigned short v=vals[i];
+ if(v<minv){minv=v;}
+ }
+ return(minv);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.h b/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.h
new file mode 100644
index 00000000..26d0e44e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtMaxMin.h
@@ -0,0 +1,34 @@
+// HtMaxMin
+//
+// macros and tools for computing max and min of values
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtMaxMin.h,v 1.5 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef _HtMaxMin_h_
+#define _HtMaxMin_h_
+
+#define HtMAX(a,b) (((a)>(b)) ? (a) : (b))
+#define HtMIN(a,b) (((a)<(b)) ? (a) : (b))
+
+
+// Max/Min value of an array
+class HtMaxMin
+{
+ public:
+ // compute max/min of an array of values
+ static unsigned int max_v(unsigned int *vals,int n);
+ static unsigned short max_v(unsigned short *vals,int n);
+ static unsigned int min_v(unsigned int *vals,int n);
+ static unsigned short min_v(unsigned short *vals,int n);
+};
+
+
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc
new file mode 100644
index 00000000..8026622d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc
@@ -0,0 +1,450 @@
+//
+// HtPack.cc
+//
+// HtPack: Compress and uncompress data in e.g. simple structures.
+// The structure must have the layout defined in the ABI;
+// the layout the compiler generates.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtPack.cc,v 1.8 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtPack.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+
+// For the moment, these formats are accepted:
+// "i" native int, with most compressed value 0
+// "u" unsigned int, with most compressed value 0
+// "c" unsigned int, with most compressed value 1.
+//
+// If someone adds other formats (and uses them), please note
+// that structure padding may give surprising effects on some
+// (most) platforms, for example if you try to unpack a
+// structure with the imagined signature "isi" (int, short, int).
+// You will want to solve that portably.
+//
+// Compression is done to 2 bits description (overhead) each,
+// plus variable-sized data.
+// Theoretically, different formats can use different number of
+// bits in the description with a few changes.
+// The description is located in a byte before every four
+// "fields".
+String
+htPack(const char format[], const char *data)
+{
+ const char *s = format;
+
+ // We insert the encodings by number, rather than shifting and
+ // inserting at the "bottom". This should make it faster for
+ // decoding, which presumably is more important than the speed
+ // of encoding.
+ int code_no = 0;
+
+ // Make a wild guess that we will compress some ordinary sized
+ // struct. This guess only has speed effects.
+ String compressed(60);
+
+ // Accumulated codes.
+ unsigned int description = 0;
+
+ // Store the encoding here. We cannot use a char *, as the
+ // string may be reallocated and moved.
+ int code_index = 0;
+
+ // Make place for the first codes.
+ compressed << '\0';
+
+ // Format string loop.
+ while (*s)
+ {
+ int fchar = *s++;
+ int n;
+
+ if (isdigit(*s))
+ {
+ char* t;
+ n = strtol(s, &t, 10);
+ s = t;
+ }
+ else
+ n = 1;
+
+ // Loop over N in e.g. "iN" (default 1).
+ while (n--)
+ {
+ // Format character handling.
+ switch (fchar)
+ {
+ case 'c':
+ {
+ // We compress an unsigned int with the most common
+ // value 1 as this:
+ // 00 - value is 1.
+ // 01 - value fits in unsigned char - appended.
+ // 10 - value fits in unsigned short - appended.
+ // 11 - just plain unsigned int - appended (you lose).
+ unsigned int value;
+
+ // Initialize, but allow disalignment.
+ memcpy(&value, data, sizeof value);
+ data += sizeof(unsigned int);
+
+ int mycode;
+ if (value == 1)
+ {
+ mycode = 0;
+ }
+ else
+ {
+ unsigned char charvalue = (unsigned char) value;
+ unsigned short shortvalue = (unsigned short) value;
+ if (value == charvalue)
+ {
+ mycode = 1;
+ compressed << charvalue;
+ }
+ else if (value == shortvalue)
+ {
+ mycode = 2;
+ compressed.append((char *) &shortvalue, sizeof shortvalue);
+ }
+ else
+ {
+ mycode = 3;
+ compressed.append((char *) &value, sizeof value);
+ }
+ }
+
+ description |= mycode << (2*code_no++);
+ }
+ break;
+
+ case 'i':
+ {
+ // We compress a (signed) int as follows:
+ // 00 - value is 0.
+ // 01 - value fits in char - appended.
+ // 10 - value fits in short - appended.
+ // 11 - just plain int - appended (you lose).
+ int value;
+
+ // Initialize, but allow disalignment.
+ memcpy(&value, data, sizeof value);
+ data += sizeof(int);
+
+ int mycode;
+ if (value == 0)
+ {
+ mycode = 0;
+ }
+ else
+ {
+ char charvalue = char(value);
+ short shortvalue = short(value);
+ if (value == charvalue)
+ {
+ mycode = 1;
+ compressed << charvalue;
+ }
+ else if (value == shortvalue)
+ {
+ mycode = 2;
+ compressed.append((char *) &shortvalue, sizeof shortvalue);
+ }
+ else
+ {
+ mycode = 3;
+ compressed.append((char *) &value, sizeof value);
+ }
+ }
+
+ description |= mycode << (2*code_no++);
+ }
+ break;
+
+ case 'u':
+ {
+ // We compress an unsigned int like an int:
+ // 00 - value is 0.
+ // 01 - value fits in unsigned char - appended.
+ // 10 - value fits in unsigned short - appended.
+ // 11 - just plain unsigned int - appended (you lose).
+ unsigned int value;
+
+ // Initialize, but allow disalignment.
+ memcpy(&value, data, sizeof value);
+ data += sizeof(unsigned int);
+
+ int mycode;
+ if (value == 0)
+ {
+ mycode = 0;
+ }
+ else
+ {
+ unsigned char charvalue = (unsigned char) value;
+ unsigned short shortvalue = (unsigned short) value;
+ if (value == charvalue)
+ {
+ mycode = 1;
+ compressed << charvalue;
+ }
+ else if (value == shortvalue)
+ {
+ mycode = 2;
+ compressed.append((char *) &shortvalue, sizeof shortvalue);
+ }
+ else
+ {
+ mycode = 3;
+ compressed.append((char *) &value, sizeof value);
+ }
+ }
+
+ description |= mycode << (2*code_no++);
+ }
+ break;
+
+ default:
+#ifndef NOSTREAM
+#ifdef DEBUG
+ if (1)
+ cerr << "Invalid char \'" << char(fchar)
+ << "\' in pack format \"" << format << "\""
+ << endl;
+ return "";
+#endif
+#endif
+ ; // Must always have a statement after a label.
+ }
+
+ // Assuming 8-bit chars here. Flush encodings after 4 (2 bits
+ // each) or when the code-string is consumed.
+ if (code_no == 4 || (n == 0 && *s == 0))
+ {
+ char *codepos = compressed.get() + code_index;
+
+ *codepos = description;
+ description = 0;
+ code_no = 0;
+
+ if (n || *s)
+ {
+ // If more data to be encoded, then we need a new place to
+ // store the encodings.
+ code_index = compressed.length();
+ compressed << '\0';
+ }
+ }
+ }
+ }
+
+ return compressed;
+}
+
+
+// Reverse the effect of htPack.
+String
+htUnpack(const char format[], const char *data)
+{
+ const char *s = format;
+
+ // The description needs to be renewed immediately.
+ unsigned int description = 1;
+
+ // Make a wild guess about that we decompress to some ordinary
+ // sized struct and assume the cost of allocation some extra
+ // memory is much less than the cost of allocating more.
+ // This guess only has speed effects.
+ String decompressed(60);
+
+ // Format string loop.
+ while (*s)
+ {
+ int fchar = *s++;
+ int n;
+
+ if (isdigit(*s))
+ {
+ char* t;
+ n = strtol(s, &t, 10);
+ s = t;
+ }
+ else
+ n = 1;
+
+ // Loop over N in e.g. "iN" (default 1).
+ while (n--)
+ {
+ // Time to renew description?
+ if (description == 1)
+ description = 256 | *data++;
+
+ // Format character handling.
+ switch (fchar)
+ {
+ case 'c':
+ {
+ // An unsigned int with the most common value 1 is
+ // compressed as follows:
+ // 00 - value is 1.
+ // 01 - value fits in unsigned char - appended.
+ // 10 - value fits in unsigned short - appended.
+ // 11 - just plain unsigned int - appended (you lose).
+ unsigned int value;
+
+ switch (description & 3)
+ {
+ case 0:
+ value = 1;
+ break;
+
+ case 1:
+ {
+ unsigned char charvalue;
+ memcpy(&charvalue, data, sizeof charvalue);
+ value = charvalue;
+ data++;
+ }
+ break;
+
+ case 2:
+ {
+ unsigned short int shortvalue;
+ memcpy(&shortvalue, data, sizeof shortvalue);
+ value = shortvalue;
+ data += sizeof shortvalue;
+ }
+ break;
+
+ case 3:
+ {
+ memcpy(&value, data, sizeof value);
+ data += sizeof value;
+ }
+ break;
+ }
+ decompressed.append((char *) &value, sizeof value);
+ }
+ break;
+
+ case 'i':
+ {
+ // A (signed) int is compressed as follows:
+ // 00 - value is 0.
+ // 01 - value fits in char - appended.
+ // 10 - value fits in short - appended.
+ // 11 - just plain int - appended (you lose).
+ int value;
+
+ switch (description & 3)
+ {
+ case 0:
+ value = 0;
+ break;
+
+ case 1:
+ {
+ char charvalue;
+ memcpy(&charvalue, data, sizeof charvalue);
+ value = charvalue;
+ data++;
+ }
+ break;
+
+ case 2:
+ {
+ short int shortvalue;
+ memcpy(&shortvalue, data, sizeof shortvalue);
+ value = shortvalue;
+ data += sizeof shortvalue;
+ }
+ break;
+
+ case 3:
+ {
+ memcpy(&value, data, sizeof value);
+ data += sizeof value;
+ }
+ break;
+ }
+ decompressed.append((char *) &value, sizeof value);
+ }
+ break;
+
+ case 'u':
+ {
+ // An unsigned int is compressed as follows:
+ // 00 - value is 0.
+ // 01 - value fits in unsigned char - appended.
+ // 10 - value fits in unsigned short - appended.
+ // 11 - just plain unsigned int - appended (you lose).
+ unsigned int value;
+
+ switch (description & 3)
+ {
+ case 0:
+ value = 0;
+ break;
+
+ case 1:
+ {
+ unsigned char charvalue;
+ memcpy(&charvalue, data, sizeof charvalue);
+ value = charvalue;
+ data++;
+ }
+ break;
+
+ case 2:
+ {
+ unsigned short int shortvalue;
+ memcpy(&shortvalue, data, sizeof shortvalue);
+ value = shortvalue;
+ data += sizeof shortvalue;
+ }
+ break;
+
+ case 3:
+ {
+ memcpy(&value, data, sizeof value);
+ data += sizeof value;
+ }
+ break;
+ }
+ decompressed.append((char *) &value, sizeof value);
+ }
+ break;
+
+ default:
+#ifndef NOSTREAM
+#ifdef DEBUG
+ if (1)
+ cerr << "Invalid char \'" << char(fchar)
+ << "\' in unpack format \"" << format << "\""
+ << endl;
+ return "";
+#endif
+#endif
+ ; // Must always have a statement after a label.
+ }
+
+ description >>= 2;
+ }
+ }
+
+ return decompressed;
+}
+
+// End of HtPack.cc
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtPack.h b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.h
new file mode 100644
index 00000000..3da6fa9a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.h
@@ -0,0 +1,39 @@
+//
+// HtPack.h
+//
+// HtPack: Compress and uncompress data in e.g. simple structures.
+// The structure must have the layout defined in the ABI;
+// the layout the compiler generates.
+//
+// Much like the pack()/unpack() function pair in perl, but
+// compressing, not "packing into a binary structure".
+//
+// Note that the contents of the returned "String" is not
+// necessarily aligned to allow using it as a struct.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtPack.h,v 1.7 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifndef __HtPack_h
+#define __HtPack_h
+
+#include "htString.h"
+
+// Pack.
+// The parameter "format" is not const but should normally be.
+extern String
+htPack(const char format[], const char *theStruct);
+
+// Unpack.
+// The parameter "theString" will be updated to point after the
+// processed amount of data.
+extern String
+htUnpack(const char format[], const char *thePackedData);
+
+#endif // __HtPack_h
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRandom.h b/debian/htdig/htdig-3.2.0b6/htlib/HtRandom.h
new file mode 100644
index 00000000..46c183d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRandom.h
@@ -0,0 +1,49 @@
+// HtRandom.h
+//
+// class HtRandom:
+// tools for random numbers
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRandom.h,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _HtRandom_h_
+#define _HtRandom_h_
+
+
+class HtRandom
+{
+ public:
+ // produce a random unsigned int between v0 and v1
+ static inline unsigned int rnd(unsigned int v0,unsigned int v1)
+ {
+ return((rand()%(v1-v0)) + v0 );
+ }
+
+ // randomly mix up an array of int's
+ static int *randomize_v(int *vals,int n)
+ {
+ int i;
+ if(!vals)
+ {
+ vals=new int[n];
+ for(i=0;i<n;i++){vals[i]=i;}
+ }
+ for(i=0;i<2*n;i++)
+ {
+ int i0=HtRandom::rnd(0,n);
+ int i1=HtRandom::rnd(0,n);
+ int t=vals[i0];
+ vals[i0]=vals[i1];
+ vals[i1]=t;
+ }
+ return(vals);
+ }
+
+};
+#endif // _HtRandom_h_
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.cc
new file mode 100644
index 00000000..a0f74038
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.cc
@@ -0,0 +1,105 @@
+//
+// HtRegex.cc
+//
+// HtRegex: A simple C++ wrapper class for the system regex routines.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegex.cc,v 1.13 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtRegex.h"
+#include <locale.h>
+
+
+HtRegex::HtRegex() : compiled(0) { }
+
+HtRegex::HtRegex(const char *str, int case_sensitive) : compiled(0)
+{
+ set(str, case_sensitive);
+}
+
+HtRegex::~HtRegex()
+{
+ if (compiled != 0) regfree(&re);
+ compiled = 0;
+}
+
+const String &HtRegex::lastError()
+{
+ return lastErrorMessage;
+}
+
+int
+HtRegex::set(const char * str, int case_sensitive)
+{
+ if (compiled != 0) regfree(&re);
+
+ int err;
+ compiled = 0;
+ if (str == NULL) return 0;
+ if (strlen(str) <= 0) return 0;
+ if (err = regcomp(&re, str, case_sensitive ? REG_EXTENDED : (REG_EXTENDED|REG_ICASE)), err == 0)
+ {
+ compiled = 1;
+ }
+ else
+ {
+ size_t len = regerror(err, &re, 0, 0);
+ char *buf = new char[len];
+ regerror(err, &re, buf, len);
+ lastErrorMessage = buf;
+ delete buf;
+ }
+ return compiled;
+}
+
+int
+HtRegex::setEscaped(StringList &list, int case_sensitive)
+{
+ String *str;
+ String transformedLimits;
+ list.Start_Get();
+ while ((str = (String *) list.Get_Next()))
+ {
+ if (str->indexOf('[') == 0 && str->lastIndexOf(']') == str->length()-1)
+ {
+ transformedLimits << str->sub(1,str->length()-2).get();
+ }
+ else // Backquote any regex special characters
+ {
+ for (int pos = 0; pos < str->length(); pos++)
+ {
+ if (strchr("^.[$()|*+?{\\", str->Nth(pos)))
+ transformedLimits << '\\';
+ transformedLimits << str->Nth(pos);
+ }
+ }
+ transformedLimits << "|";
+ }
+ transformedLimits.chop(1);
+
+ return set(transformedLimits, case_sensitive);
+}
+
+int
+HtRegex::match(const char * str, int nullpattern, int nullstr)
+{
+ int rval;
+
+ if (compiled == 0) return(nullpattern);
+ if (str == NULL) return(nullstr);
+ if (strlen(str) <= 0) return(nullstr);
+ rval = regexec(&re, str, (size_t) 0, NULL, 0);
+ if (rval == 0) return(1);
+ else return(0);
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.h b/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.h
new file mode 100644
index 00000000..bb5c60d8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegex.h
@@ -0,0 +1,86 @@
+//
+// HtRegex.h
+//
+// HtRegex: A simple C++ wrapper class for the system regex routines.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegex.h,v 1.13 2004/05/28 13:15:21 lha Exp $
+//
+//
+
+#ifndef _HtRegex_h_
+#define _HtRegex_h_
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Object.h"
+#include "StringList.h"
+
+// This is an attempt to get around compatibility problems
+// with the included regex
+
+#ifdef _MSC_VER /* _WIN32 */
+#include "regex_win32.h"
+#else
+# ifdef USE_RX
+# include <rxposix.h>
+# else // Use regex
+# ifdef HAVE_BROKEN_REGEX
+# include <regex.h>
+# else // include regex code and header
+# include "gregex.h"
+# endif
+# endif
+#endif /* _WIN32 */
+
+#include <sys/types.h>
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+class HtRegex : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ HtRegex();
+ HtRegex(const char *str, int case_sensitive = 0);
+ virtual ~HtRegex();
+
+ //
+ // Methods for setting the pattern
+ //
+ int set(const String& str, int case_sensitive = 0) { return set(str.get(), case_sensitive); }
+ int set(const char *str, int case_sensitive = 0);
+ int setEscaped(StringList &list, int case_sensitive = 0);
+
+ virtual const String &lastError(); // returns the last error message
+
+ //
+ // Methods for checking a match
+ //
+ int match(const String& str, int nullmatch, int nullstr) { return match(str.get(), nullmatch, nullstr); }
+ int match(const char *str, int nullmatch, int nullstr);
+
+protected:
+ int compiled;
+ regex_t re;
+
+ String lastErrorMessage;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.cc
new file mode 100644
index 00000000..2e017627
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.cc
@@ -0,0 +1,137 @@
+//
+// HtRegexList.cc
+//
+// HtRegex: A list of HtRegex objects for handling large regex patterns
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexList.cc,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtRegexList.h"
+#include <locale.h>
+
+class listnode
+{
+public:
+ listnode *next;
+ Object *object;
+};
+
+HtRegexList::HtRegexList()
+{
+ compiled = 0;
+}
+
+HtRegexList::~HtRegexList()
+{
+ compiled = 0;
+}
+
+const String &HtRegexList::lastError()
+{
+ return lastErrorMessage;
+}
+
+int
+HtRegexList::setEscaped(StringList &list, int case_sensitive)
+{
+ if (list.Count() == 0)
+ {
+ compiled = false;
+ return true;
+ }
+
+ String *str;
+ String transformedLimits, currentPattern, prevPattern;
+ HtRegex *limit = new HtRegex;
+
+ list.Start_Get();
+ while ((str = (String *) list.Get_Next()))
+ {
+ if (str->indexOf('[') == 0 && str->lastIndexOf(']') == str->length()-1)
+ {
+ transformedLimits = str->sub(1,str->length()-2).get();
+ }
+ else // Backquote any regex special characters
+ {
+ transformedLimits = 0;
+ for (int pos = 0; pos < str->length(); pos++)
+ {
+ if (strchr("^.[$()|*+?{\\", str->Nth(pos)))
+ transformedLimits << '\\';
+ transformedLimits << str->Nth(pos);
+ }
+ }
+ if (!currentPattern.empty())
+ currentPattern << "|";
+ currentPattern << transformedLimits;
+ if (!limit->set(currentPattern.get(), case_sensitive))
+ {
+ if (prevPattern.empty()) // we haven't set anything yet!
+ {
+ lastErrorMessage = limit->lastError();
+ compiled = 0;
+ return false;
+ }
+ limit->set(prevPattern.get(), case_sensitive); // Go back a step
+ Add(limit);
+ limit = new HtRegex;
+ currentPattern = transformedLimits;
+ if (!limit->set(currentPattern.get(), case_sensitive))
+ {
+ lastErrorMessage = limit->lastError();
+ compiled = 0;
+ return false;
+ }
+ }
+ prevPattern = currentPattern;
+ }
+ Add(limit); // OK, we're done so just add the last compiled pattern
+
+ compiled = 1;
+ return true;
+}
+
+int
+HtRegexList::match(const char * str, int nullpattern, int nullstr)
+{
+ HtRegex *regx;
+
+ if (compiled == 0) return(nullpattern);
+ if (str == NULL) return(nullstr);
+ if (strlen(str) <= 0) return(nullstr);
+
+ if (number == 0) return(1); // An empty pattern matches everything
+
+ Start_Get();
+ while ((regx = (HtRegex *) Get_Next()))
+ {
+ if (regx->match(str, nullpattern, nullstr))
+ {
+ // Move this one to the front and update pointers
+ if (cursor.current_index != -1)
+ {
+ if (cursor.prev)
+ cursor.prev->next = cursor.current->next;
+ cursor.prev = 0;
+ cursor.current->next = head;
+ head = cursor.current;
+ cursor.current = head;
+ cursor.current_index = -1;
+ }
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.h b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.h
new file mode 100644
index 00000000..bbc0ddf8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexList.h
@@ -0,0 +1,57 @@
+//
+// HtRegexList.h
+//
+// HtRegexList: A list of HtRegex objects for handling large regex patterns
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexList.h,v 1.4 2004/05/28 13:15:21 lha Exp $
+//
+//
+
+#ifndef _HtRegexList_h_
+#define _HtRegexList_h_
+
+#include "Object.h"
+#include "List.h"
+#include "StringList.h"
+#include "HtRegex.h"
+
+class HtRegexList : public List
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ HtRegexList();
+ virtual ~HtRegexList();
+
+ //
+ // Setting (construct from a list of patterns)
+ //
+ int setEscaped(StringList &list, int case_sensitive = 0);
+
+ virtual const String &lastError(); // returns the last error message
+
+ //
+ // Methods for checking a match
+ //
+ int match(const String& str, int nullmatch, int nullstr)
+ { return match(str.get(), nullmatch, nullstr); }
+ int match(const char *str, int nullmatch, int nullstr);
+
+protected:
+ int compiled;
+
+ String lastErrorMessage;
+
+private:
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.cc
new file mode 100644
index 00000000..eb409e80
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.cc
@@ -0,0 +1,141 @@
+//
+// HtRegexReplace.cc
+//
+// HtRegexReplace: A subclass of HtRegex that can perform replacements
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexReplace.cc,v 1.4 2004/05/28 13:15:21 lha Exp $
+//
+
+#include "HtRegexReplace.h"
+#include <locale.h>
+
+
+HtRegexReplace::HtRegexReplace()
+{
+}
+
+HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive)
+ : HtRegex(from, case_sensitive)
+{
+ memset(&regs, 0, sizeof(regs));
+ repBuf = 0;
+ segSize =
+ segUsed = 0;
+ segMark = 0;
+ repLen = 0;
+
+ setReplace(to);
+}
+
+HtRegexReplace::~HtRegexReplace()
+{
+ empty();
+}
+
+int HtRegexReplace::replace(String &str, int nullpattern, int nullstr)
+{
+ const int regCount = sizeof(regs) / sizeof(regs[0]);
+ if (compiled == 0 || repBuf == 0) return nullpattern;
+ if (str.length() == 0) return nullstr;
+
+ if (regexec(&re, str.get(), regCount, regs, 0) == 0)
+ {
+ // Firstly work out how long the result string will be. We think this will be more effecient
+ // than letting the buffer grow in stages as we build the result, but who knows?
+ //cout << "!!! Match !!!" << endl;
+ size_t resLen = repLen;
+ int i, reg, repPos;
+ const char *src = str.get();
+
+ for (i = 1; i < (int) segUsed; i += 2)
+ {
+ reg = segMark[i];
+ if (reg < regCount && regs[reg].rm_so != -1)
+ resLen += regs[reg].rm_eo - regs[reg].rm_so;
+ }
+ //cout << "result will be " << resLen << " chars long" << endl;
+ String result(resLen); // Make the result string preallocating the buffer size
+ for (i = 0, repPos = 0;; )
+ {
+ //cout << "appending segment " << i << endl;
+ result.append(repBuf + repPos, segMark[i] - repPos); // part of the replace string
+ repPos = segMark[i]; // move forward
+ if (++i == (int) segUsed) break; // was that the last segment?
+ reg = segMark[i++]; // get the register number
+ if (reg < regCount && regs[reg].rm_so != -1)
+ result.append((char *) src + regs[reg].rm_so, regs[reg].rm_eo - regs[reg].rm_so);
+ }
+ str = result;
+ //cout << "return " << result.get() << endl;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+// Private: place a mark in the mark buffer growing it if necessary.
+void HtRegexReplace::putMark(int n)
+{
+ // assert(segUsed <= segSize);
+ if (segUsed == segSize)
+ {
+ size_t newSize = segSize * 2 + 5; // grow in chunks
+ int *newMark = new int[newSize]; // do we assume that new can't fail?
+ memcpy(newMark, segMark, segSize * sizeof(int));
+ delete segMark;
+ segMark = newMark;
+ segSize = newSize;
+ }
+ segMark[segUsed++] = n;
+}
+
+void HtRegexReplace::empty()
+{
+ // Destroy any existing replace pattern
+ delete repBuf; repBuf = 0;
+ segSize = segUsed = 0;
+ delete segMark; segMark = 0;
+ repLen = 0;
+}
+
+void HtRegexReplace::setReplace(const char *to)
+{
+ empty();
+
+ repBuf = new char[strlen(to)]; // replace buffer can never contain more text than to string
+ int bufPos = 0; // our position within the output buffer
+
+ while (*to)
+ {
+ if (*to == '\\')
+ {
+ if (*++to == '\0') break;
+ if (*to >= '0' && *to <= '9')
+ {
+ putMark(bufPos);
+ putMark(*to - '0');
+ }
+ else
+ {
+ // We could handle some C style escapes here, but instead we just pass the character
+ // after the backslash through. This means that \\, \" and \' will do the right thing.
+ // It's unlikely that anyone will need any C style escapes in ht://Dig anyway.
+ repBuf[bufPos++] = *to;
+ }
+ to++;
+ }
+ else
+ {
+ repBuf[bufPos++] = *to++;
+ }
+ }
+ putMark(bufPos);
+ repLen = (size_t) bufPos;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.h b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.h
new file mode 100644
index 00000000..6cf4d50f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.h
@@ -0,0 +1,58 @@
+//
+// HtRegexReplace.h
+//
+// HtRegexReplace: A subclass of HtRegex that can perform replacements
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexReplace.h,v 1.4 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _HtRegexReplace_h_
+#define _HtRegexReplace_h_
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtRegex.h"
+
+class HtRegexReplace : public HtRegex
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ HtRegexReplace();
+ HtRegexReplace(const char *from, const char *to, int case_sensitive = 0);
+ virtual ~HtRegexReplace();
+
+ //
+ // Methods for setting the replacement pattern
+ //
+ void setReplace(const String& str) { setReplace(str.get()); }
+ void setReplace(const char *str);
+
+ //
+ // Methods for replacing
+ //
+ int replace(String &str, int nullpattern = 0, int nullstr = 0);
+
+protected:
+ char *repBuf; // Replace text.
+ size_t segSize, segUsed;
+ int *segMark;
+ size_t repLen;
+
+ regmatch_t regs[10];
+
+ // Various private methods
+ void putMark(int n);
+ void empty();
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.cc
new file mode 100644
index 00000000..2ba7a4d2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.cc
@@ -0,0 +1,84 @@
+//
+// HtRegexReplaceList.cc
+//
+// HtRegexReplaceList: Perform RegexReplace on a list of from/to pairs.
+// Patterns are applied in order; pattern matching
+// doesn't stop when a match occurs.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexReplaceList.cc,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtRegexReplaceList.h"
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+HtRegexReplaceList::HtRegexReplaceList(StringList &list, int case_sensitive )
+{
+ if (list.Count() & 1)
+ {
+ lastErrorMessage = "HtRegexReplaceList needs an even number of strings";
+ return;
+ }
+
+ int i;
+ String err;
+
+ for (i = 0; i < list.Count(); i += 2)
+ {
+ String from = list[i];
+ String to = list[i+1];
+ HtRegexReplace *replacer = new HtRegexReplace(from.get(), to.get(), case_sensitive);
+ replacers.Add(replacer); // Stash it even if there's an error so it will get destroyed later
+ const String &err = replacer->lastError();
+ if (err.length() != 0)
+ {
+ lastErrorMessage = err;
+ return;
+ }
+ }
+}
+
+HtRegexReplaceList::~HtRegexReplaceList()
+{
+ // replacers gets chucked away
+}
+
+int HtRegexReplaceList::replace(String &str, int nullpattern , int nullstr )
+{
+ int repCount = replacers.Count();
+ int doneCount = 0;
+
+ for (int rep = 0; rep < repCount; rep++)
+ {
+ HtRegexReplace *replacer = (HtRegexReplace *) replacers[rep];
+ if (replacer->replace(str, nullpattern, nullstr) > 0)
+ doneCount++;
+ }
+
+ return doneCount;
+}
+
+const String &HtRegexReplaceList::lastError()
+{
+ return lastErrorMessage;
+}
+
+// End of HtRegexReplaceList.cc
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.h b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.h
new file mode 100644
index 00000000..b51cf2d1
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplaceList.h
@@ -0,0 +1,39 @@
+//
+// HtRegexReplaceList.h
+//
+// HtRegexReplaceList: Perform RegexReplace on a list of from/to pairs.
+// Patterns are applied in order; pattern matching
+// doesn't stop when a match occurs.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtRegexReplaceList.h,v 1.4 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef __HtRegexReplaceList_h
+#define __HtRegexReplaceList_h
+
+#include "HtRegexReplace.h"
+#include "List.h"
+#include "StringList.h"
+
+class HtRegexReplaceList : public Object
+{
+public:
+ // Construct a HtRegexReplaceList. |list| should contain an even
+ // number of strings that constitute from/to pairs.
+ HtRegexReplaceList(StringList &list, int case_sensitive = 0);
+ virtual ~HtRegexReplaceList();
+ int replace(String &str, int nullpattern = 0, int nullstr = 0);
+ virtual const String &lastError();
+
+private:
+ List replacers;
+ String lastErrorMessage;
+};
+
+#endif /* __HtRegexReplaceList_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtTime.h b/debian/htdig/htdig-3.2.0b6/htlib/HtTime.h
new file mode 100644
index 00000000..0f6e5ae7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtTime.h
@@ -0,0 +1,128 @@
+// HtTime.h
+//
+// class HtTime:
+// tools for timing
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtTime.h,v 1.8 2004/05/28 13:15:21 lha Exp $
+//
+#ifndef _HtTime_h_
+#define _HtTime_h_
+
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#ifdef _MSC_VER /* _WIN32 */
+#include <sys/timeb.h>
+#endif
+
+class HtTime
+{
+ public:
+ // time in seconds (double format)
+ static inline double DTime()
+ {
+#ifdef _MSC_VER /* _WIN32 */
+ struct timeb tb;
+ ftime(&tb);
+ return((double)((tb.millitm/1000)+tb.time+tb.timezone));
+#else
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ return(tv.tv_usec/1000000.0+tv.tv_sec);
+#endif
+
+ }
+ // time in seconds relative to T0 (double format)
+ static inline double DTime(double T0)
+ {
+#ifdef _MSC_VER /* _WIN32 */
+ struct timeb tb;
+ ftime(&tb);
+ return((double)(((tb.millitm/1000)+tb.time+tb.timezone))-T0);
+#else
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ return((tv.tv_usec/1000000.0+tv.tv_sec)-T0);
+#endif
+ }
+
+ // Do something every x seconds
+ class Periodic
+ {
+ double t0;
+ double last;
+ double period;
+ public:
+ double total(){return(HtTime::DTime(t0));}
+ void change_period(double nperiod){period=nperiod;}
+ int operator()(double *prperiod=NULL)
+ {
+ double t=HtTime::DTime(t0);
+ if(prperiod){*prperiod=t-last;}
+ if((t-last)>period)
+ {
+ last=t;
+ return(1);
+ }
+ return(0);
+ }
+ Periodic(double nperiod=.1)
+ {
+ period=nperiod;
+ t0=HtTime::DTime();
+ last=0;
+ }
+ };
+
+
+
+#ifdef NOTDEF
+ // print progression message every x seconds
+ class Progression
+ {
+ double t0;
+ double last;
+ double period;
+ char *label;
+ public:
+ double total(){return(HtTime::DTime()-t0);}
+ int operator()(double x)
+ {
+ double t=HtTime::DTime()-t0;
+ if((t-last)>period)
+ {
+ last=t;
+ printf("%s (%f): %f\n",label,t,x);
+ return(1);
+ }
+ return(0);
+ }
+ Progression(double nperiod=.1,char *nlabel=(char *)"progression")
+ {
+ label=nlabel;
+ period=nperiod;
+ t0=HtTime::DTime();
+ last=0;
+ }
+ };
+#endif
+};
+#endif // _HtTime_h_
+
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVector.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtVector.cc
new file mode 100644
index 00000000..b6addea3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVector.cc
@@ -0,0 +1,310 @@
+//
+// HtVector.cc
+//
+// HtVector: A Vector class which holds objects of type Object.
+// (A vector is an array that can expand as necessary)
+// This class is very similar in interface to the List class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtVector.cc,v 1.11 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtVector.h"
+
+//*********************************************************************
+// void HtVector::HtVector()
+// Default constructor
+//
+HtVector::HtVector()
+{
+ data = new Object *[4]; // After all, why would anyone want an empty vector?
+ element_count = 0;
+ allocated = 4;
+ current_index = -1;
+}
+
+
+//*********************************************************************
+// void HtVector::HtVector(int capacity)
+// Constructor with known capacity
+// (has the side effect of not allocating double memory)
+//
+HtVector::HtVector(int capacity)
+{
+ data = new Object *[capacity];
+ element_count = 0;
+ allocated = capacity;
+ current_index = -1;
+}
+
+
+//*********************************************************************
+// void HtVector::~HtVector()
+// Destructor
+//
+HtVector::~HtVector()
+{
+ Destroy();
+}
+
+
+//*********************************************************************
+// void HtVector::Release()
+// Remove all objects from the vector, but do not delete them
+void HtVector::Release()
+{
+ for (current_index = 0; current_index < element_count; current_index++)
+ {
+ data[current_index] = NULL;
+ }
+ if (data)
+ delete [] data;
+ data = NULL;
+ allocated = 0;
+ element_count = 0;
+ current_index = -1;
+}
+
+//*********************************************************************
+// void HtVector::Destroy()
+// Deletes all objects from the vector
+//
+void HtVector::Destroy()
+{
+ for (current_index = 0; current_index < element_count; current_index++)
+ if (data[current_index])
+ {
+ delete data[current_index];
+ data[current_index] = NULL;
+ }
+ if (data)
+ delete [] data;
+ data = NULL;
+ allocated = 0;
+ element_count = 0;
+ current_index = -1;
+}
+
+
+//*********************************************************************
+// void HtVector::Add(Object *object)
+// Add an object to the list.
+//
+void HtVector::Add(Object *object)
+{
+ Allocate(element_count+1);
+ data[element_count] = object;
+ element_count += 1;
+}
+
+
+//*********************************************************************
+// void HtVector::Insert(Object *object, int position)
+// Add an object into the list.
+//
+void HtVector::Insert(Object *object, int position)
+{
+ if (position < 0) return;
+ if (position >= element_count)
+ {
+ Add(object);
+ return;
+ }
+
+ Allocate(element_count + 1);
+ for (int i = element_count; i > position; i--)
+ data[i] = data[i-1];
+ data[position] = object;
+ element_count += 1;
+}
+
+
+//*********************************************************************
+// void HtVector::Assign(Object *object, int position)
+// Assign an object to the position
+//
+void HtVector:: Assign(Object *object, int position)
+{
+ // Simply perform an insert, followed by a remove!
+ Insert(object, position);
+ RemoveFrom(position + 1);
+ return;
+}
+
+
+//*********************************************************************
+// int HtVector::Remove(Object *object)
+// Remove an object from the list.
+//
+int HtVector::Remove(Object *object)
+{
+ return RemoveFrom(Index(object));
+}
+
+
+//*********************************************************************
+// int HtVector::RemoveFrom(int position)
+// Remove an object from the list.
+//
+int HtVector::RemoveFrom(int position)
+{
+ if (position < 0 || position >= element_count)
+ return NOTOK;
+
+ for (int i = position; i < element_count - 1; i++)
+ data[i] = data[i+1];
+ element_count -= 1;
+ return OK;
+}
+
+
+//*********************************************************************
+// Object *HtVector::Get_Next()
+// Return the next object in the list.
+//
+Object *HtVector::Get_Next()
+{
+ current_index++;
+ if (current_index >= element_count)
+ return 0;
+ return data[current_index];
+}
+
+
+//*********************************************************************
+// Object *HtVector::Get_First()
+// Return the first object in the list.
+//
+Object *HtVector::Get_First()
+{
+ if (!IsEmpty())
+ {
+ current_index = 0;
+ return data[0];
+ }
+ else
+ return 0;
+}
+
+
+//*********************************************************************
+// int HtVector::Index(Object *obj)
+// Return the index of an object in the list.
+//
+int HtVector::Index(Object *obj)
+{
+ int index = 0;
+
+ while (index < element_count && data[index] != obj)
+ {
+ index++;
+ }
+ if (index >= element_count)
+ return -1;
+ else
+ return index;
+}
+
+
+//*********************************************************************
+// Object *HtVector::Next(Object *prev)
+// Return the next object in the list. Using this, the list will
+// appear as a circular list.
+//
+Object *HtVector::Next(Object *prev)
+{
+ current_index = Index(prev);
+ if (current_index == -1)
+ return 0;
+
+ current_index++; // We should probably do this with remainders
+ if (current_index >= element_count)
+ current_index = 0;
+ return data[current_index];
+}
+
+
+//*********************************************************************
+// Object *HtVector::Previous(Object *next)
+// Return the previous object in the vector. Using this, the vector will
+// appear as a circular list.
+//
+Object *HtVector::Previous(Object *next)
+{
+ current_index = Index(next);
+ if (current_index == -1)
+ return 0;
+
+ current_index--; // We should probably do this with remainders
+ if (current_index < 0)
+ current_index = element_count - 1;
+ return data[current_index];
+}
+
+
+//*********************************************************************
+// Object *HtVector::Copy() const
+// Return a deep copy of the vector.
+//
+Object *HtVector::Copy() const
+{
+ HtVector *vector = new HtVector(allocated);
+
+ for(int i = 0; i < Count(); i++)
+ vector->Add(data[i]->Copy());
+
+ return vector;
+}
+
+
+//*********************************************************************
+// HtVector &HtVector::operator=(HtVector &vector)
+// Return a deep copy of the list.
+//
+HtVector &HtVector::operator=(HtVector &vector)
+{
+ Destroy();
+
+ for(int i = 0; i < vector.Count(); i++)
+ Add(vector.data[i]->Copy());
+
+ return *this;
+}
+
+
+//*********************************************************************
+// int Allocate(int capacity)
+// Ensure there is at least capacity space in the vector
+//
+void HtVector::Allocate(int capacity)
+{
+ if (capacity > allocated) // Darn, we actually have to do work :-)
+ {
+ Object **old_data = data;
+
+ // Ensure we have more than the capacity and we aren't
+ // always rebuilding the vector (which leads to quadratic behavior)
+ while (allocated < capacity)
+ allocated *= 2;
+
+ data = new Object *[allocated];
+
+ for (int i = 0; i < element_count; i++)
+ {
+ data[i] = old_data[i];
+ old_data[i] = NULL;
+ }
+
+ if (old_data)
+ delete [] old_data;
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVector.h b/debian/htdig/htdig-3.2.0b6/htlib/HtVector.h
new file mode 100644
index 00000000..af1271b6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVector.h
@@ -0,0 +1,137 @@
+//
+// HtVector.h
+//
+// HtVector: A Vector class which holds objects of type Object.
+// (A vector is an array that can expand as necessary)
+// This class is very similar in interface to the List class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtVector.h,v 1.10 2004/05/28 13:15:21 lha Exp $
+//
+//
+#ifndef _HtVector_h_
+#define _HtVector_h_
+#include "Object.h"
+
+class HtVector : public Object
+{
+public:
+ //
+ // Constructor/Destructor
+ //
+ HtVector();
+ HtVector(int capacity);
+ ~HtVector();
+
+ //
+ // Add() will append an Object to the end of the vector
+ //
+ void Add(Object *);
+
+ //
+ // Insert() will insert an object at the given position. If the
+ // position is larger than the number of objects in the vector, the
+ // object is appended; no new objects are created between the end
+ // of the vector and the given position.
+ //
+ void Insert(Object *, int position);
+
+ //
+ // Assign() will assign the object to the given position, replacing
+ // the object currently there. It is functionally equivalent to calling
+ // RemoveFrom() followed by Insert()
+ void Assign(Object *, int position);
+
+ //
+ // Find the given object in the vector and remove it from the vector.
+ // The object will NOT be deleted. If the object is not found,
+ // NOTOK will be returned, else OK.
+ //
+ int Remove(Object *);
+
+ //
+ // Remove the object at the given position
+ // (in some sense, the inverse of Insert)
+ //
+ int RemoveFrom(int position);
+
+ //
+ // Release() will remove all the objects from the vector.
+ // This will NOT delete them
+ void Release();
+
+ //
+ // Destroy() will delete all the objects in the vector. This is
+ // equivalent to calling the destructor
+ //
+ void Destroy();
+
+ //
+ // Vector traversel (a bit redundant since you can use [])
+ //
+ void Start_Get() {current_index = -1;}
+ Object *Get_Next();
+ Object *Get_First();
+ Object *Next(Object *current);
+ Object *Previous(Object *current);
+ Object *Last() {return element_count<=0?(Object *)NULL:data[element_count-1];}
+
+ //
+ // Direct access to vector items. To assign new objects, use
+ // Insert() or Add() or Assign()
+ //
+ Object *operator[] (int n) {return (n<0||n>=element_count)?(Object *)NULL:data[n];}
+ Object *Nth(int n) {return (n<0||n>=element_count)?(Object *)NULL:data[n];}
+
+ //
+ // Access to the number of elements
+ //
+ int Count() const {return element_count;}
+ int IsEmpty() {return element_count==0;}
+
+ //
+ // Get the index number of an object. If the object is not found,
+ // returns -1
+ //
+ int Index(Object *);
+
+ //
+ // Deep copy member function
+ //
+ Object *Copy() const;
+
+ //
+ // Vector Assignment
+ //
+ HtVector &operator= (HtVector *vector) {return *this = *vector;}
+ HtVector &operator= (HtVector &vector);
+
+protected:
+ //
+ // The actual internal data array
+ Object **data;
+
+ //
+ // For traversal it is nice to know where we are...
+ //
+ int current_index;
+
+ //
+ // It's nice to keep track of how many things we contain...
+ // as well as how many slots we've declared
+ //
+ int element_count;
+ int allocated;
+
+ //
+ // Protected function to ensure capacity
+ //
+ void Allocate(int ensureCapacity);
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.cc
new file mode 100644
index 00000000..b3583e0c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.cc
@@ -0,0 +1,91 @@
+//
+// HtVectorGeneric.cc
+//
+// HtVectorGeneric: A Vector class which holds objects of type GType.
+// (A vector is an array that can expand as necessary)
+// This class is very similar in interface to the List class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtVectorGeneric.cc,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtVector_int.h"
+#define GType int
+#define HtVectorGType HtVector_int
+#include "HtVectorGenericCode.h"
+
+
+#define GType char
+#define HtVectorGType HtVector_char
+#include "HtVectorGeneric.h"
+#define GType char
+#define HtVectorGType HtVector_char
+#include "HtVectorGenericCode.h"
+
+#include"HtVector_String.h"
+#define GType String
+#define HtVectorGType HtVector_String
+#include "HtVectorGenericCode.h"
+
+
+#define GType double
+#define HtVectorGType HtVector_double
+#include "HtVectorGeneric.h"
+#define GType double
+#define HtVectorGType HtVector_double
+#include "HtVectorGenericCode.h"
+
+#include<stdio.h>
+
+// this is just to check if it compiles ok
+
+class ZOZO
+{
+ int a,b,c;
+public:
+ void show(){printf("ZOZO SHOW:%d %d %d\n",a,b,c);}
+ ZOZO()
+ {
+ a=1;
+ b=2;
+ c=3;
+ }
+};
+
+
+#define HTVECTORGENERIC_NOTCOMPARABLE
+#define GType ZOZO
+#define HtVectorGType HtVector_ZOZO
+#include "HtVectorGeneric.h"
+#define HTVECTORGENERIC_NOTCOMPARABLE
+#define GType ZOZO
+#define HtVectorGType HtVector_ZOZO
+#include "HtVectorGenericCode.h"
+
+void
+test_HtVectorGeneric()
+{
+ HtVector_int intv;
+ HtVector_char charv;
+ HtVector_ZOZO vz;
+ ZOZO zz;
+ vz.push_back(zz);
+ vz.push_back(zz);
+ vz.push_back(zz);
+
+ int i;
+ for(i=0;i<vz.size();i++)
+ {
+ vz[i].show();
+ }
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.h b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.h
new file mode 100644
index 00000000..8299aa2c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGeneric.h
@@ -0,0 +1,247 @@
+//
+// HtVectorGeneric.h
+//
+// HtVector: A Vector class which holds objects of type GType.
+// (A vector is an array that can expand as necessary)
+// This class is very similar in interface to the List class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtVectorGeneric.h,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+//
+// #ifndef _HtVectorGeneric_h_
+//#define _HtVectorGeneric_h_
+
+
+// HOWTO use:
+// let's say you have a class called Zozo (that has to have an
+// empty constructor Zozo::Zozo() )
+// You want to use vectors of Zozo's (called HtVector_Zozo )
+//
+// In every file you use it you must include its declaration:
+//
+// #define GType Zozo
+// #define HtVectorGType HtVector_Zozo
+// #include "HtVectorGeneric.h"
+//
+// Then you can use it:
+// HtVector_Zozo vect;
+// Zozo a,b,c;
+// vect.push_back(a);
+// vect.push_back(b);
+// vect[0]=c;
+//
+// Somewhere in some .cc file you have to
+//
+// #define GType Zozo
+// #define HtVectorGType HtVector_Zozo
+// #include "HtVectorGenericCode.h"
+//
+//
+// Notes:
+// *If you include the declaration more
+// than once in the same scope you will get
+// errors (HtVector_Zozo : already declared)
+//
+// *Bounds checking is on by default, this should be
+// changed (it's performance critical), if your'e
+// out of bounds an error is printed on cerr
+// but the prg doesnt stop (maybe it should?)
+//
+// *vectors containing pointers DO NOT FREE
+// them, this is deliberate
+//
+
+#include "Object.h"
+
+class HtVectorGType : public Object
+{
+public:
+ //
+ // Constructor/Destructor
+ //
+ HtVectorGType();
+ HtVectorGType(int capacity);
+ ~HtVectorGType();
+
+protected:
+ //
+ // this error checking should be made optional!
+ //
+ inline void CheckBounds(const int n) const {if(n<0 || n>=element_count){ fprintf(stderr, "HtVectorGType::CheckBounds: out of bounds.\n");}}
+
+public:
+
+
+ //
+ // Insert() will insert an object at the given position. If the
+ // position is larger than the number of objects in the vector, the
+ // object is appended; no new objects are created between the end
+ // of the vector and the given position.
+ //
+ void Insert(const GType &, int position);
+
+ // *** this is obsolete in HtVectorGeneric ** use: vector[position]=value;
+// void Assign(GType , int position);
+
+
+ //
+ // Remove the object at the given position
+ // (in some sense, the inverse of Insert)
+ //
+ void RemoveFrom(int position);
+
+ // Release is obsolete since no deletions of pointers contained in
+ // the vector is done
+ // void Release();
+
+ //
+ // Destroy() will delete all the objects in the vector.
+ // Warning: no deletions of pointers contained in
+ // the vector are done (that's up to you)
+ //
+ void Destroy();
+
+ //
+ // Vector traversal (a bit redundant since you can use [])
+ //
+ void Start_Get() {current_index = -1;}
+ GType & Get_Next();
+ GType & Get_First();
+ GType & Last() {return Nth(element_count-1);}
+
+ //
+ // Direct access to vector items. To assign new objects, use
+ // Insert() or Add() or Assign()
+ //
+ inline GType & Nth(int n)
+ {
+#ifdef HtVectorGeneric_CheckBounds
+ CheckBounds(n);
+#endif
+ return data[n];
+ }
+ inline const GType & Nth(int n) const
+ {
+#ifdef HtVectorGeneric_CheckBounds
+ CheckBounds(n);
+#endif
+ return data[n];
+ }
+ inline GType & operator[] (int n) {return Nth(n);}
+ inline const GType & operator[] (int n) const {return Nth(n);}
+
+ //
+ // Access to the number of elements
+ //
+ inline int Count() const {return element_count;}
+ inline int IsEmpty() {return element_count==0;}
+
+
+ //
+ // Accesses wich involve finding an element (only possible if
+ // we can compare two elements)
+ //
+#ifndef HTVECTORGENERIC_NOTCOMPARABLE
+ //
+ // Get the index number of an object. If the object is not found,
+ // returns -1
+ //
+ int Index(const GType &);
+ GType & Next(const GType &current);
+ GType & Previous(const GType &current);
+
+ //
+ // Find the given object in the vector and remove it from the vector.
+ // The object will NOT be deleted. If the object is not found,
+ // NOTOK will be returned, else OK.
+ //
+ void Remove(const GType &);
+#endif
+
+ //
+ // Deep copy member function
+ //
+ Object *Copy() const;
+
+ //
+ // Vector Assignment
+ //
+ HtVectorGType &operator= (HtVectorGType *vector) {return *this = *vector;}
+ HtVectorGType &operator= (const HtVectorGType &vector);
+
+
+protected:
+ //
+ // The actual internal data array
+ GType *data;
+
+ //
+ // For traversal it is nice to know where we are...
+ //
+ int current_index;
+
+ //
+ // It's nice to keep track of how many things we contain...
+ // as well as how many slots we've declared
+ //
+ int element_count;
+ int allocated;
+ protected:
+ //
+ // Protected function to ensure capacity
+ //
+ inline void Allocate(int capacity)
+ {
+ if (capacity > allocated){ActuallyAllocate(capacity);}
+ }
+ void ActuallyAllocate(int);
+
+ public:
+ //
+ // Add() will append an object to the end of the vector
+ //
+ inline void Add(const GType &object)
+ {
+ Allocate(element_count+1);
+ data[element_count] = object;
+ element_count += 1;
+ }
+
+
+ //
+ // STL like accesors
+ //
+ public:
+ inline int size() const {return Count();}
+ inline void push_back(const GType &v) {Add( v);}
+
+ inline GType * begin() {return(data);}
+ inline const GType * begin() const {return(data);}
+ inline GType * end() {return(data+element_count);}
+ inline const GType * end() const {return(data+element_count);}
+
+ inline GType & back() {return Nth(element_count-1);}
+ inline const GType & back() const {return Nth(element_count-1);}
+ inline void pop_back() {RemoveFrom(size()-1);}
+ inline void clear() {;}
+
+ void reserve (int n) {Allocate(n);}
+
+// TODO: erase clear resize insert(...) and many others
+
+};
+
+// #endif
+
+
+#ifdef HTVECTORGENERIC_NOTCOMPARABLE
+#undef HTVECTORGENERIC_NOTCOMPARABLE
+#endif
+#undef HtVectorGType
+#undef GType
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGenericCode.h b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGenericCode.h
new file mode 100644
index 00000000..ac809f3b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVectorGenericCode.h
@@ -0,0 +1,262 @@
+//
+// HtVectorGenericCode.h
+//
+// HtVectorGeneric: A Vector class which holds objects of type GType.
+// (A vector is an array that can expand as necessary)
+// This class is very similar in interface to the List class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtVectorGenericCode.h,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+
+//*********************************************************************
+// void HtVectorGType::HtVectorGType()
+// Default constructor
+//
+HtVectorGType::HtVectorGType()
+{
+ data = new GType[4]; // After all, why would anyone want an empty vector?
+ element_count = 0;
+ allocated = 4;
+ current_index = -1;
+}
+
+
+//*********************************************************************
+// void HtVectorGType::HtVectorGType(int capacity)
+// Constructor with known capacity
+// (has the side effect of not allocating double memory)
+//
+HtVectorGType::HtVectorGType(int capacity)
+{
+ data = new GType[capacity];
+ element_count = 0;
+ allocated = capacity;
+ current_index = -1;
+}
+
+
+//*********************************************************************
+// void HtVectorGType::~HtVectorGType()
+// Destructor
+//
+HtVectorGType::~HtVectorGType()
+{
+ Destroy();
+}
+
+
+
+//*********************************************************************
+// void HtVectorGType::Destroy()
+// Deletes all objects from the vector
+//
+void HtVectorGType::Destroy()
+{
+ if (data)
+ delete [] data;
+ data = NULL;
+ allocated = 0;
+ element_count = 0;
+ current_index = -1;
+}
+
+
+
+//*********************************************************************
+// void HtVectorGType::Insert(GType object, int position)
+// Add an object into the list.
+//
+void HtVectorGType::Insert(const GType &object, int position)
+{
+ if (position < 0) {CheckBounds(position);}
+ if (position >= element_count)
+ {
+ Add(object);
+ return;
+ }
+
+ Allocate(element_count + 1);
+ for (int i = element_count; i > position; i--)
+ data[i] = data[i-1];
+ data[position] = object;
+ element_count += 1;
+}
+
+
+//*********************************************************************
+// int HtVectorGType::RemoveFrom(int position)
+// Remove an object from the list.
+//
+void HtVectorGType::RemoveFrom(int position)
+{
+ CheckBounds(position);
+
+ for (int i = position; i < element_count - 1; i++)
+ {
+ data[i] = data[i+1];
+ }
+ element_count -= 1;
+}
+
+
+//*********************************************************************
+// GType HtVectorGType::Get_Next()
+// Return the next object in the list.
+//
+GType &HtVectorGType::Get_Next()
+{
+ current_index++;
+ CheckBounds(current_index);
+ return data[current_index];
+}
+
+
+//*********************************************************************
+// GType HtVectorGType::Get_First()
+// Return the first object in the list.
+//
+GType &HtVectorGType::Get_First()
+{
+ CheckBounds(0);
+ return data[0];
+}
+
+#ifndef HTVECTORGENERIC_NOTCOMPARABLE
+
+//*********************************************************************
+// int HtVectorGType::Index(GType obj)
+// Return the index of an object in the list.
+//
+int HtVectorGType::Index(const GType &obj)
+{
+ int index0 = 0;
+
+ while (index0 < element_count && data[index0] != obj)
+ {
+ index0++;
+ }
+ if (index0 >= element_count)
+ return -1;
+ else
+ return index0;
+}
+
+
+//*********************************************************************
+// GType HtVectorGType::Next(GType prev)
+// Return the next object in the list. Using this, the list will
+// appear as a circular list.
+//
+GType &HtVectorGType::Next(const GType & prev)
+{
+ current_index = Index(prev);
+ CheckBounds(current_index);
+
+ current_index++; // We should probably do this with remainders
+ return Nth(current_index);
+}
+
+//*********************************************************************
+// GType HtVectorGType::Previous(GType next)
+// Return the previous object in the vector. Using this, the vector will
+// appear as a circular list.
+//
+GType &HtVectorGType::Previous(const GType & next)
+{
+ current_index = Index(next);
+ CheckBounds(current_index);
+
+ current_index--; // We should probably do this with remainders
+ return Nth(current_index);
+}
+
+//*********************************************************************
+// int HtVectorGType::Remove(GType object)
+// Remove an object from the list.
+//
+void HtVectorGType::Remove(const GType &object)
+{
+ int pos = Index(object);
+ CheckBounds(pos);
+ RemoveFrom(pos);
+}
+#endif
+
+//*********************************************************************
+// HtVectorGType *HtVectorGType::Copy() const
+// Return a deep copy of the vector.
+//
+Object *HtVectorGType::Copy() const
+{
+ HtVectorGType *vector = new HtVectorGType(allocated);
+
+ for(int i = 0; i < Count(); i++)
+{
+#ifdef HTVECTORGENERIC_OBJECTPTRTYPE
+ vector->Add(data[i]->Copy());
+#else
+ vector->Add(data[i]);
+#endif
+}
+ return vector;
+}
+
+
+//*********************************************************************
+// HtVectorGType &HtVectorGType::operator=(HtVectorGType &vector)
+// Return a deep copy of the list.
+//
+HtVectorGType &HtVectorGType::operator=(const HtVectorGType &vector)
+{
+ Destroy();
+
+ for(int i = 0; i < vector.Count(); i++)
+ {
+ Add(vector.data[i]);
+ }
+ return *this;
+}
+
+
+//*********************************************************************
+// int Allocate(int capacity)
+// Ensure there is at least capacity space in the vector
+//
+void HtVectorGType::ActuallyAllocate(int capacity)
+{
+ if (capacity > allocated) // Darn, we actually have to do work :-)
+ {
+ GType *old_data = data;
+
+ // Ensure we have more than the capacity and we aren't
+ // always rebuilding the vector (which leads to quadratic behavior)
+ if(!allocated){allocated=1;}
+ while (allocated < capacity)
+ allocated *= 2;
+
+ data = new GType[allocated];
+
+ for (int i = 0; i < element_count; i++)
+ {
+ data[i] = old_data[i];
+ }
+
+ if (old_data)
+ delete [] old_data;
+ }
+}
+
+
+#ifdef HTVECTORGENERIC_NOTCOMPARABLE
+#undef HTVECTORGENERIC_NOTCOMPARABLE
+#endif
+
+#undef HtVectorGType
+#undef GType
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVector_String.h b/debian/htdig/htdig-3.2.0b6/htlib/HtVector_String.h
new file mode 100644
index 00000000..5e72e117
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVector_String.h
@@ -0,0 +1,23 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+
+
+#ifndef _HtVector_String_h_
+#define _HtVector_String_h_
+
+
+#include"htString.h"
+
+#define GType String
+#define HtVectorGType HtVector_String
+#include "HtVectorGeneric.h"
+
+#endif
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtVector_int.h b/debian/htdig/htdig-3.2.0b6/htlib/HtVector_int.h
new file mode 100644
index 00000000..f2bf8a9a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtVector_int.h
@@ -0,0 +1,21 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+
+
+#ifndef _HtVector_int_h_
+#define _HtVector_int_h_
+
+#define GType int
+#define HtVectorGType HtVector_int
+#include "HtVectorGeneric.h"
+
+
+#endif
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.cc
new file mode 100644
index 00000000..5a4474d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.cc
@@ -0,0 +1,437 @@
+//
+// HtWordCodec.cc
+//
+// HtWordCodec: Given two lists of pair of "words" 'from' and 'to';
+// simple one-to-one translations, use those lists to translate.
+// Only restriction are that no null (0) characters must be
+// used in "words", and that there is a character "joiner" that
+// does not appear in any word. One-to-one consistency may be
+// checked at construction.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtWordCodec.cc,v 1.9 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtWordCodec.h"
+
+// Do not use 0, so we can use "normal" string routines.
+// Values 1..4 are used to describe how many bytes are used to
+// keep the number. Do not use other than control-characters,
+// as the first character for internal encodings, so the user
+// can use "international" characters (128 .. 255) for cute
+// encodings to use across different configuration files and
+// databases.
+#define JOIN_CHAR 5
+#define QUOTE_CHAR 6
+#define FIRST_INTERNAL_SINGLECHAR 7
+#define LAST_INTERNAL_SINGLECHAR 31
+
+
+HtWordCodec::HtWordCodec()
+{
+ myFrom = 0;
+ myTo = 0;
+ myFromMatch = 0;
+ myToMatch = 0;
+}
+
+
+HtWordCodec::~HtWordCodec()
+{
+ if (myFrom)
+ delete myFrom;
+
+ if (myTo)
+ delete myTo;
+
+ if (myFromMatch)
+ delete myFromMatch;
+
+ if (myToMatch)
+ delete myToMatch;
+}
+
+
+// Straightforward filling of the encoding-lists.
+HtWordCodec::HtWordCodec(StringList *from, StringList *to, char joiner)
+{
+ myFromMatch = new StringMatch;
+ myToMatch = new StringMatch;
+
+ myTo = to;
+ myFrom = from;
+
+ String to_pattern(myTo->Join(joiner));
+
+ // After being initialized with Join, the strings are not
+ // null-terminated, but that is done through "operator char*".
+ myToMatch->Pattern(to_pattern, joiner);
+
+ String from_pattern(myFrom->Join(joiner));
+ myFromMatch->Pattern(from_pattern, joiner);
+
+}
+
+
+// This constructor is the most complicated function in this class.
+// It handles consistency checking for the supplied code-lists.
+
+// Cleanups for anything except myTo, myFrom, myToMatch is
+// necessary. The member myFromMatch is used as a sanity check
+// for member functions to see that the constructor was
+// successful in case the programmer forgets to check errmsg.
+HtWordCodec::HtWordCodec(StringList &requested_encodings,
+ StringList &frequent_substrings,
+ String &errmsg)
+{
+ if ((requested_encodings.Count() % 2) != 0)
+ {
+ errmsg =
+ "Expected pairs, got odd number of strings";
+
+ return;
+ }
+
+ myFrom = new StringList;
+ myTo = new StringList;
+
+ // Go through requested_encodings and fill myTo and myFrom.
+ // Check that the "to" strings look remotely sane regarding
+ // reserved characters.
+
+ // Iteration temporaries.
+ String *from;
+ String *to;
+
+ int n_of_pairs = requested_encodings.Count() / 2;
+
+ requested_encodings.Start_Get();
+ while ((from = (String *) requested_encodings.Get_Next()) != NULL)
+ {
+ // Sanity check: Reserve empty strings as we cannot do
+ // anything sane with them.
+
+ int templen = from->length();
+ if (templen == 0)
+ {
+ errmsg = "Empty strings are not allowed";
+ return;
+ }
+
+ myFrom->Add(new String(*from));
+
+ // This must be non-null since we checked "oddness" above.
+ to = (String *) requested_encodings.Get_Next();
+
+ templen = to->length();
+ if (templen == 0)
+ {
+ errmsg = "Empty strings are not allowed";
+ return;
+ }
+
+ // We just have to check that there's no JOIN_CHAR in the
+ // string. Since no "to" is allowed to be part of any other
+ // "to", there will be no ambiguity, even if one would
+ // contain a QUOTE_CHAR (which is documented as invalid anyway).
+ if (strchr(from->get(), JOIN_CHAR) != NULL)
+ {
+ errmsg =
+ form("(\"%s\" =>) \"%s\" contains a reserved character (number %d)",
+ from->get(), to->get(), int(JOIN_CHAR));
+ return;
+ }
+
+ // Loop over the other "to"-strings and check that this
+ // string is not a substring of any other "to", or vice versa.
+ // Return in error if it is so.
+ int i;
+ int count = myTo->Count();
+ for (i = 0; i < count; i++)
+ {
+ String *ith = (String *) myTo->Nth(i);
+
+ // Just check if the shorter string is part of the
+ // longer string.
+ if (to->length() < ith->length()
+ ? ith->indexOf(to->get()) != -1
+ : to->indexOf(ith->get()) != -1)
+ {
+ errmsg =
+ form("\"%s\" => \"%s\" collides with (\"%s\" => \"%s\")",
+ from, to, (*myFrom)[i], ith->get());
+
+ return;
+ }
+ }
+
+ // All ok, just add this one.
+ myTo->Add(new String(*to));
+ }
+
+ // Check that none of the "to"-strings is a substring of any
+ // of the "from" strings, since that's hard to support and
+ // most probably is a user mistake anyway.
+
+ StringMatch req_tos;
+ String req_to_pattern(myTo->Join(JOIN_CHAR));
+ int which, length;
+
+ // The StringMatch functions want the strings
+ // zero-terminated, which is done through "operator char*".
+ req_tos.Pattern(req_to_pattern, JOIN_CHAR);
+
+ // Check the requested encodings.
+ if (n_of_pairs != 0)
+ {
+ int i;
+ for (i = 0; i < n_of_pairs; i++)
+ {
+ from = (String *) myFrom->Nth(i);
+ if (req_tos.FindFirst(from->get(), which, length) != -1)
+ {
+ if (i != which)
+ {
+ errmsg =
+ form("(\"%s\" => \"%s\") overlaps (\"%s\" => \"%s\")",
+ (*myFrom)[which], (*myTo)[which],
+ from->get(), (*myTo)[i]);
+ }
+ else
+ {
+ errmsg =
+ form("Overlap in (\"%s\" => \"%s\")",
+ from->get(), (*myTo)[i]);
+ }
+
+ return;
+ }
+ }
+ }
+
+ if (frequent_substrings.Count() != 0)
+ {
+ // Make a temporary search-pattern of the requested
+ // from-strings.
+
+ StringMatch req_froms;
+ String req_from_pattern(myFrom->Join(JOIN_CHAR));
+
+ req_froms.Pattern(req_from_pattern, JOIN_CHAR);
+
+ // Continue filling "to" and "from" from frequent_substrings and
+ // internal encodings. If a frequent_substring is found in the
+ // requested from-strings, it is ignored, but the internal
+ // encoding is still ticked up, so that changes in
+ // requested_encodings (e.g. url_part_aliases) do not change
+ // an existing database (e.g. containing common_url_parts).
+
+ int internal_encoding_no = 0;
+
+ String *common_part;
+ frequent_substrings.Start_Get();
+ String to;
+
+ for (;
+ (common_part = (String *) frequent_substrings.Get_Next()) != NULL;
+ internal_encoding_no++)
+ {
+ int templen = common_part->length();
+ if (templen == 0)
+ {
+ errmsg = "Empty strings are not allowed";
+ return;
+ }
+
+ // Is a "From" string in it, or is a "To" string in it?
+ // Note that checking if there are *any* requested
+ // encodings (n_of_pairs) is not just an "optimization";
+ // it is necessary since StringMatch will return 0 (not
+ // -1) if the pattern is empty (FIXME: changing that
+ // breaks something else in another part of ht://Dig).
+
+ if (n_of_pairs
+ && (req_froms.FindFirst(common_part->get()) != -1
+ || req_tos.FindFirst(common_part->get()) != -1))
+ continue;
+
+ to = 0; // Clear previous run.
+
+ // Dream up an encoding without zeroes.
+ // Use FIRST_INTERNAL_SINGLECHAR .. LAST_INTERNAL_SINGLECHAR
+ // for the first encodings, as much as possible.
+
+ long int number_to_store =
+ internal_encoding_no + FIRST_INTERNAL_SINGLECHAR;
+
+ if (number_to_store <= LAST_INTERNAL_SINGLECHAR)
+ {
+ to << char(number_to_store);
+ }
+ else
+ {
+ // Use <number-of-bytes-in-length>
+ // <number-as-nonzero-bytes> to code the rest.
+ // Note that we assume eight-bit chars here, which
+ // should be ok for all systems you run htdig on.
+ // At least it helps clarity here.
+
+ number_to_store -= LAST_INTERNAL_SINGLECHAR;
+
+ // Make sure highest bit in every byte is "1" by
+ // inserting one there.
+ char to_store[sizeof(number_to_store)+1];
+ int j = 1;
+
+ while (number_to_store > 0x7f)
+ {
+ number_to_store = ((number_to_store & ~0x7f) << 1)
+ | 0x80 | (number_to_store & 0x7f);
+
+ to_store[j++] = char(number_to_store);
+ number_to_store >>= 8;
+ }
+
+ // Finally, store the highest byte. It too shall have
+ // the highest bit set. This is the easiest way to
+ // adjust it not to be QUOTE_CHAR.
+ to_store[0] = j;
+ to_store[j] = char(number_to_store | 0x80);
+
+ to.append(to_store, j+1);
+ }
+
+ // Add to replacement pairs.
+ myFrom->Add(new String(*common_part));
+ myTo->Add(new String(to));
+ }
+ }
+
+ // Now, add the quoted "to":s to the "to"-list, with the unquoted
+ // "to":s to the "from"-list. This way we do not have to
+ // check for quoting separately. Like this:
+ // From To
+ // foo : !
+ // bar : >
+ // baz : $
+ // ! : \!
+ // > : \>
+ // $ : \$
+ //
+ // Since we checked that none of the "To":s are in a "From" we
+ // can do this.
+
+ myTo->Start_Get();
+ int to_count = myTo->Count();
+ String *current;
+ String temp;
+
+ int i;
+ for (i = 0; i < to_count; i++)
+ {
+ // It works to append *and* iterate through a
+ // StringList, despite not having an iterator class.
+ current = (String *) myTo->Nth(i);
+
+ myFrom->Add(new String(*current));
+
+ temp = 0; // Reset any previous round.
+ temp.append(char(QUOTE_CHAR));
+ temp.append(*current);
+
+ myTo->Add(new String(temp));
+ }
+
+ myFromMatch = new StringMatch;
+ myToMatch = new StringMatch;
+
+ String to_pattern(myTo->Join(JOIN_CHAR));
+ String from_pattern(myFrom->Join(JOIN_CHAR));
+
+ // StringMatch class has unchecked limits, better check them.
+ // The length of each string in the pattern an the upper limit
+ // of the needs.
+ if (to_pattern.length() - (myTo->Count() - 1) > 0xffff
+ || from_pattern.length() - (myFrom->Count() - 1) > 0xffff)
+ {
+ errmsg = "Limit reached; use fewer encodings";
+ return;
+ }
+
+ myToMatch->Pattern(to_pattern, JOIN_CHAR);
+ myFromMatch->Pattern(from_pattern, JOIN_CHAR);
+
+ errmsg = 0;
+}
+
+
+// We only need one "coding" function, since quoting and unquoting is
+// handled through the to- and from-lists.
+String
+HtWordCodec::code(const String &orig_string, StringMatch &match,
+ StringList &replacements) const
+{
+ String retval;
+ String tempinput;
+ int offset, which, length;
+ const char *orig;
+
+ // Get a null-terminated string, usable for FindFirst to look at.
+ orig = orig_string.get();
+
+ // Sanity check. If bad use, just return empty strings.
+ if (myFromMatch == NULL)
+ {
+ return retval;
+ }
+
+ // Need to check if "replacements" is empty; that is, if no
+ // transformations should be done. FindFirst() does not return
+ // -1 in this case, it returns 0.
+ if (replacements.Count() == 0)
+ return orig_string;
+
+ // Find the encodings and replace them.
+ while ((offset = match.FindFirst(orig, which, length)) != -1)
+ {
+ // Append the previous part that was not part of a code.
+ retval.append(orig, offset);
+
+ // Replace with the original string.
+ retval.append(replacements[which]);
+
+ orig += offset + length;
+ }
+
+ // Add the final non-matched part.
+ retval.append(orig);
+
+ return retval;
+}
+
+
+// The assymetry is caused by swapping both the matching and
+// replacement lists.
+String
+HtWordCodec::decode(const String &orig) const
+{
+ return code(orig, *myToMatch, *myFrom);
+}
+
+
+String
+HtWordCodec::encode(const String &orig) const
+{
+ return code(orig, *myFromMatch, *myTo);
+}
+
+// End of HtWordCodec.cc
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.h b/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.h
new file mode 100644
index 00000000..96b752ff
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordCodec.h
@@ -0,0 +1,70 @@
+//
+// HtWordCodec.h
+//
+// HtWordCodec: Given two lists of pair of "words" 'from' and 'to';
+// simple one-to-one translations, use those lists to translate.
+// Only restriction are that no null (0) characters must be
+// used in "words", and that there is a character "joiner" that
+// does not appear in any word. One-to-one consistency may be
+// checked at construction.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtWordCodec.h,v 1.6 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef __HtWordCodec_h
+#define __HtWordCodec_h
+
+#include "HtCodec.h"
+#include "StringList.h"
+#include "StringMatch.h"
+
+class HtWordCodec : public HtCodec
+{
+public:
+ HtWordCodec();
+ virtual ~HtWordCodec();
+
+ // Set the lists of asymmetric pairs of "words" in "from" and
+ // "to", using:
+ // * one list of requested encodings with two consecutive
+ // items "to" and "from" per translation
+ // * one list of just words which HtWordCodec will generate
+ // space-saving encodings for.
+ // Either may be empty.
+ // Items in frequent_substrings will be silently ignored if
+ // they collide with anything in requested_encoding_pairs.
+ // CodingError is empty on success, or has a failure message.
+ HtWordCodec(StringList &requested_encodings,
+ StringList &frequest_substrings, String &errmsg);
+
+ // *Or*, set the lists directly, without checking coding
+ // consistency. HtWordCodec will delete these lists when
+ // destroyed. Not really recommended, but this class would be
+ // incomplete without it.
+ HtWordCodec (StringList *from, StringList *to, char joiner = char(1));
+
+ // Same as those in the parent class. Each string to
+ // encode/decode may contain zero or more of words from the
+ // lists. Those words will be replaced.
+ virtual String encode(const String &uncoded) const;
+ virtual String decode(const String &coded) const;
+
+private:
+ HtWordCodec(const HtWordCodec &); // Not supposed to be implemented.
+ void operator= (const HtWordCodec &); // Not supposed to be implemented.
+ StringList *myFrom;
+ StringList *myTo;
+ StringMatch *myFromMatch;
+ StringMatch *myToMatch;
+
+ // Do coding/decoding symmetrically using the provided lookup and lists.
+ String code(const String &, StringMatch& match, StringList& replacements) const;
+};
+
+#endif /* __HtWordCodec_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc
new file mode 100644
index 00000000..cacb9342
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.cc
@@ -0,0 +1,51 @@
+//
+// HtWordType.h
+//
+// functions for determining valid words/characters
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtWordType.cc,v 1.11 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtWordType.h"
+#include "WordType.h"
+
+int HtIsWordChar(char c) { return WordType::Instance()->IsChar(c); }
+int HtIsStrictWordChar(char c) { return WordType::Instance()->IsStrictChar(c); }
+int HtWordNormalize(String &w) { return WordType::Instance()->Normalize(w); }
+int HtStripPunctuation(String &w) { return WordType::Instance()->StripPunctuation(w); }
+
+
+// much like strtok(), and destructive of the source string like strtok(),
+// but does word separation by our rules.
+char *
+HtWordToken(char *str)
+{
+ unsigned char *text = (unsigned char *)str;
+ char *ret = 0;
+ static unsigned char *prev = 0;
+
+ if (!text)
+ text = prev;
+ while (text && *text && !HtIsStrictWordChar(*text))
+ text++;
+ if (text && *text)
+ {
+ ret = (char *)text;
+ while (*text && HtIsWordChar(*text))
+ text++;
+ if (*text)
+ *text++ = '\0';
+ }
+ prev = text;
+ return ret;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.h b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.h
new file mode 100644
index 00000000..ef4f9bf5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/HtWordType.h
@@ -0,0 +1,27 @@
+//
+// HtWordType.h
+//
+// functions for determining valid words/characters
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtWordType.h,v 1.12 2004/05/28 13:15:21 lha Exp $
+//
+#ifndef _HtWordType_h
+#define _HtWordType_h
+
+#include "htString.h"
+
+extern int HtIsWordChar(char c);
+extern int HtIsStrictWordChar(char c);
+extern int HtWordNormalize(String &w);
+extern int HtStripPunctuation(String &w);
+
+// Like strtok(), but using our rules for word separation.
+extern char *HtWordToken(char *s);
+
+#endif /* _HtWordType_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/IntObject.cc b/debian/htdig/htdig-3.2.0b6/htlib/IntObject.cc
new file mode 100644
index 00000000..0b500edf
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/IntObject.cc
@@ -0,0 +1,37 @@
+//
+// IntObject.cc
+//
+// IntObject: int variable encapsulated in Object derived class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: IntObject.cc,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "IntObject.h"
+
+
+//*******************************************************************************
+// IntObject::IntObject()
+//
+IntObject::IntObject()
+{
+}
+
+
+//*******************************************************************************
+// IntObject::~IntObject()
+//
+IntObject::~IntObject()
+{
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/IntObject.h b/debian/htdig/htdig-3.2.0b6/htlib/IntObject.h
new file mode 100644
index 00000000..461a1747
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/IntObject.h
@@ -0,0 +1,39 @@
+//
+// IntObject.h
+//
+// IntObject: int variable encapsulated in Object derived class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: IntObject.h,v 1.7 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _IntObject_h_
+#define _IntObject_h_
+
+#include "Object.h"
+
+class IntObject : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ IntObject();
+ IntObject(int v) { value = v; }
+ ~IntObject();
+
+ int Value() {return value;}
+ void Value(int v) {value = v;}
+
+private:
+ int value;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/List.cc b/debian/htdig/htdig-3.2.0b6/htlib/List.cc
new file mode 100644
index 00000000..dd020aab
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/List.cc
@@ -0,0 +1,511 @@
+//
+// List.cc
+//
+// List: A List class which holds objects of type Object.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: List.cc,v 1.9 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "List.h"
+
+class listnode
+{
+public:
+
+ listnode *next;
+ Object *object;
+};
+
+
+//*********************************************************************
+// List::List()
+// Constructor
+//
+List::List()
+{
+ head = tail = 0;
+ number = 0;
+}
+
+
+//*********************************************************************
+// List::~List()
+// Destructor
+//
+List::~List()
+{
+ Destroy();
+}
+
+
+//*********************************************************************
+// void List::Release()
+// Release all the objects from our list.
+//
+void List::Release()
+{
+ listnode *node;
+ while (head)
+ {
+ node = head;
+ head = head->next;
+ delete node;
+ }
+ head = tail = 0;
+ number = 0;
+ cursor.Clear();
+}
+
+
+//*********************************************************************
+// void List::Destroy()
+// Delete all the objects from our list.
+//
+void List::Destroy()
+{
+ listnode *node;
+ while (head)
+ {
+ node = head;
+ head = head->next;
+ delete node->object;
+ delete node;
+ }
+ head = tail = 0;
+ number = 0;
+ cursor.Clear();
+}
+
+
+//*********************************************************************
+// void List::Add(Object *object)
+// Add an object to the list.
+//
+void List::Add(Object *object)
+{
+ listnode *node = new listnode;
+ node->next = 0;
+ node->object = object;
+ if (tail)
+ {
+ tail->next = node;
+ tail = node;
+ }
+ else
+ {
+ head = tail = node;
+ }
+
+ number++;
+}
+
+
+//*********************************************************************
+// void List::Insert(Object *object, int position)
+// Add an object to the list.
+//
+void List::Insert(Object *object, int position)
+{
+ listnode *node = new listnode;
+ node->next = 0;
+ node->object = object;
+
+ listnode *ln = head;
+ listnode *prev = 0;
+
+ for (int i = 0; i < position && ln; i++, ln = ln->next)
+ prev = ln;
+ if (!ln)
+ {
+ if (tail)
+ tail->next = node;
+ tail = node;
+
+ //
+ // The list is empty. This is a simple case, then.
+ //
+ if (!head)
+ head = node;
+ }
+ else
+ {
+ if (ln == head)
+ {
+ node->next = head;
+ head = node;
+ }
+ else
+ {
+ node->next = ln;
+ prev->next = node;
+ }
+ }
+
+ cursor.current_index = -1;
+ number++;
+}
+
+
+//*********************************************************************
+// void List::Assign(Object *object, int position)
+// Assign a new value to an index.
+//
+void List::Assign(Object *object, int position)
+{
+ //
+ // First make sure that there is something there!
+ //
+ while (number < position + 1)
+ {
+ Add(0);
+ }
+
+ //
+ // Now find the listnode to put the new object in
+ //
+ listnode *temp = head;
+
+ for (int i = 0; temp && i < position; i++)
+ {
+ temp = temp->next;
+ }
+
+ cursor.current_index = -1;
+ delete temp->object;
+ temp->object = object;
+}
+
+
+//*********************************************************************
+// int List::Remove(Object *object)
+// Remove an object from the list.
+//
+int List::Remove(Object *object)
+{
+ listnode *node = head;
+ listnode *prev = 0;
+ while (node)
+ {
+ if (node->object == object)
+ {
+ //
+ // Found it!
+ //
+ //
+ // If we are in the middle of a Get_Next() sequence, we need to
+ // fix up any problems with the current node.
+ //
+ if (cursor.current == node)
+ {
+ cursor.current = node->next;
+ }
+
+ if (head == tail)
+ {
+ head = tail = 0;
+ }
+ else if (head == node)
+ {
+ head = head->next;
+ }
+ else if (tail == node)
+ {
+ tail = prev;
+ tail->next = 0;
+ }
+ else
+ {
+ prev->next = node->next;
+ }
+
+ delete node;
+ number--;
+ cursor.current_index = -1;
+ return 1;
+ }
+ prev = node;
+ node = node->next;
+ }
+ return 0;
+}
+
+//*********************************************************************
+//
+int List::Remove(int position, int action /* = LIST_REMOVE_DESTROY */)
+{
+ Object *o = List::operator[](position);
+ if(action == LIST_REMOVE_DESTROY) delete o;
+ return List::Remove(o);
+}
+
+//*********************************************************************
+// Object *List::Get_Next()
+// Return the next object in the list.
+//
+Object *List::Get_Next(ListCursor& cursor) const
+{
+ listnode *temp = cursor.current;
+
+ if (cursor.current)
+ {
+ cursor.prev = cursor.current;
+ cursor.current = cursor.current->next;
+ if (cursor.current_index >= 0)
+ cursor.current_index++;
+ }
+ else
+ return 0;
+ return temp->object;
+}
+
+
+//*********************************************************************
+// Object *List::Get_First()
+// Return the first object in the list.
+//
+Object *List::Get_First()
+{
+ if (head)
+ return head->object;
+ else
+ return 0;
+}
+
+
+//*********************************************************************
+// int List::Index(Object *obj)
+// Return the index of an object in the list.
+//
+int List::Index(Object *obj)
+{
+ listnode *temp = head;
+ int index = 0;
+
+ while (temp && temp->object != obj)
+ {
+ temp = temp->next;
+ index++;
+ }
+ if (index >= number)
+ return -1;
+ else
+ return index;
+}
+
+
+//*********************************************************************
+// Object *List::Next(Object *prev)
+// Return the next object in the list. Using this, the list will
+// appear as a circular list.
+//
+Object *List::Next(Object *prev)
+{
+ listnode *node = head;
+ while (node)
+ {
+ if (node->object == prev)
+ {
+ node = node->next;
+ if (!node)
+ return head->object;
+ else
+ return node->object;
+ }
+ node = node->next;
+ }
+
+ return 0;
+}
+
+
+//*********************************************************************
+// Object *List::Previous(Object *next)
+// Return the next object in the list. Using this, the list will
+// appear as a circular list.
+//
+Object *List::Previous(Object *next)
+{
+ listnode *node = head;
+ listnode *prev = 0;
+ while (node)
+ {
+ if (node->object == next)
+ {
+ if (!prev)
+ return 0;
+ else
+ return prev->object;
+ }
+ prev = node;
+ node = node->next;
+ }
+
+ return 0;
+}
+
+
+//*********************************************************************
+// Return the nth object in the list.
+//
+const Object *List::Nth(ListCursor& cursor, int n) const
+{
+ if (n < 0 || n >= number)
+ return 0;
+
+ listnode *temp = head;
+
+ if (cursor.current_index == n)
+ return cursor.current->object;
+
+ if (cursor.current && cursor.current_index >= 0 && n == cursor.current_index + 1)
+ {
+ cursor.prev = cursor.current;
+ cursor.current = cursor.current->next;
+ if (!cursor.current)
+ {
+ cursor.current_index = -1;
+ return 0;
+ }
+ cursor.current_index = n;
+ return cursor.current->object;
+ }
+
+ for (int i = 0; temp && i < n; i++)
+ {
+ temp = temp->next;
+ }
+
+ if (temp)
+ {
+ cursor.current_index = n;
+ cursor.current = temp;
+ return temp->object;
+ }
+ else
+ return 0;
+}
+
+
+//*********************************************************************
+// Object *List::Last()
+// Return the last object inserted.
+//
+Object *List::Last()
+{
+ if (tail)
+ {
+ return tail->object;
+ }
+
+ return 0;
+}
+
+//*********************************************************************
+//
+Object *List::Pop(int action /* = LIST_REMOVE_DESTROY */)
+{
+ Object *o = 0;
+ listnode *ln = head;
+ listnode *prev = 0;
+
+ if (tail) {
+ if(action == LIST_REMOVE_DESTROY) {
+ delete tail->object;
+ } else {
+ o = tail->object;
+ }
+ if(head == tail) {
+ head = tail = 0;
+ } else {
+
+ for (int i = 0; ln != tail; i++, ln = ln->next)
+ prev = ln;
+ tail = prev;
+ tail->next = 0;
+ }
+ }
+
+ return o;
+}
+
+
+//*********************************************************************
+// Object *List::Copy() const
+// Return a deep copy of the list.
+//
+Object *List::Copy() const
+{
+ List *list = new List;
+ ListCursor cursor;
+
+ Start_Get(cursor);
+ Object *obj;
+ while ((obj = Get_Next(cursor)))
+ {
+ list->Add(obj->Copy());
+ }
+ return list;
+}
+
+
+//*********************************************************************
+// List &List::operator=(List &list)
+// Return a deep copy of the list.
+//
+List &List::operator=(List &list)
+{
+ Destroy();
+ list.Start_Get();
+ Object *obj;
+ while ((obj = list.Get_Next()))
+ {
+ Add(obj->Copy());
+ }
+ return *this;
+}
+
+
+//*********************************************************************
+// void AppendList(List &list)
+// Move contents of other list to the end of this list, and empty the
+// other list.
+//
+void List::AppendList(List &list)
+{
+ // Never mind an empty list or ourselves.
+ if (list.number == 0 || &list == this)
+ return;
+
+ // Correct our pointers in head and tail.
+ if (tail)
+ {
+ // Link in other list.
+ tail->next = list.head;
+
+ // Update members for added contents.
+ number += list.number;
+ tail = list.tail;
+ }
+ else
+ {
+ head = list.head;
+ tail = list.tail;
+ number = list.number;
+ }
+
+ // Clear others members to be an empty list.
+ list.head = list.tail = 0;
+ list.cursor.current = 0;
+ list.cursor.current_index = -1;
+ list.number = 0;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/List.h b/debian/htdig/htdig-3.2.0b6/htlib/List.h
new file mode 100644
index 00000000..2a3fcd28
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/List.h
@@ -0,0 +1,190 @@
+//
+// List.h
+//
+// List: A List class which holds objects of type Object.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: List.h,v 1.9 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _List_h_
+#define _List_h_
+
+#include "Object.h"
+
+//
+// Behaviour of the Remove method. See comment before method
+// declaration for more information.
+//
+#define LIST_REMOVE_DESTROY 1
+#define LIST_REMOVE_RELEASE 2
+
+class List;
+class listnode;
+
+class ListCursor {
+ public:
+ ListCursor() { current = 0; prev = 0; current_index = -1; }
+ void Clear() { current = 0; prev = 0; current_index = -1; }
+
+ //
+ // Support for the Start_Get and Get_Next routines
+ //
+ listnode *current;
+ listnode *prev;
+ int current_index;
+};
+
+class List : public Object
+{
+public:
+ //
+ // Constructor/Destructor
+ //
+ List();
+ virtual ~List();
+
+ //
+ // Insert at beginning of list.
+ //
+ virtual void Unshift(Object *o) { Insert(o, 0); }
+ //
+ // Remove from the beginning of the list and return the
+ // object.
+ //
+ virtual Object* Shift(int action = LIST_REMOVE_DESTROY) {
+ Object* o = Nth(0);
+ if(Remove(0, action) == NOTOK) return 0;
+ return o;
+ }
+ //
+ // Append an Object to the end of the list
+ //
+ virtual void Push(Object *o) { Add(o); }
+ //
+ // Remove the last object from the list and return it.
+ //
+ virtual Object *Pop(int action = LIST_REMOVE_DESTROY);
+
+ //
+ // Add() will append an Object to the end of the list
+ //
+ virtual void Add(Object *);
+
+ //
+ // Insert() will insert an object at the given position. If the
+ // position is larger than the number of objects in the list, the
+ // object is appended; no new objects are created between the end
+ // of the list and the given position.
+ //
+ virtual void Insert(Object *, int position);
+
+ //
+ // Assign() will replace the object already at the given position
+ // with the new object. If there is no object at the position,the
+ // list is extended with nil objects until the position is reached
+ // and then the given object is put there. (This really makes the
+ // List analogous to a dynamic array...)
+ //
+ virtual void Assign(Object *, int position);
+
+ //
+ // Find the given object in the list and remove it from the list.
+ // The object will NOT be deleted. If the object is not found,
+ // NOTOK will be returned, else OK.
+ //
+ virtual int Remove(Object *);
+
+ //
+ // Remove object at position from the list. If action is
+ // LIST_REMOVE_DESTROY delete the object stored at position.
+ // If action is LIST_REMOVE_RELEASE the object is not deleted.
+ // If the object is not found,
+ // NOTOK will be returned, else OK.
+ //
+ virtual int Remove(int position, int action = LIST_REMOVE_DESTROY);
+
+ //
+ // Release() will set the list to empty. This call will NOT
+ // delete objects that were in the list before this call.
+ //
+ virtual void Release();
+
+ //
+ // Destroy() will delete all the objects in the list. This is
+ // equivalent to calling the destructor
+ //
+ virtual void Destroy();
+
+ //
+ // List traversel
+ //
+ void Start_Get() { Start_Get(cursor); }
+ void Start_Get(ListCursor& cursor0) const { cursor0.current = head; cursor0.prev = 0; cursor0.current_index = -1;}
+ Object *Get_Next() { return Get_Next(cursor); }
+ Object *Get_Next(ListCursor& cursor) const;
+ Object *Get_First();
+ Object *Next(Object *current);
+ Object *Previous(Object *current);
+ Object *Last();
+
+ //
+ // Direct access to list items. This can only be used to retrieve
+ // objects from the list. To assign new objects, use Insert(),
+ // Add(), or Assign().
+ //
+ Object *operator[] (int n) { return Nth(n); }
+ const Object *operator[] (int n) const { return Nth(((List*)this)->cursor, n); }
+ const Object *Nth(ListCursor& cursor, int n) const;
+ const Object *Nth(int n) const { return Nth(((List*)this)->cursor, n); }
+ Object *Nth(int n) { return (Object*)((List*)this)->Nth(((List*)this)->cursor, n); }
+
+ //
+ // Access to the number of elements
+ //
+ int Count() const { return number; }
+
+ //
+ // Get the index number of an object. If the object is not found,
+ // returnes -1
+ //
+ int Index(Object *);
+
+ //
+ // Deep copy member function
+ //
+ Object *Copy() const;
+
+ //
+ // Assignment
+ //
+ List &operator= (List *list) {return *this = *list;}
+ List &operator= (List &list);
+
+ // Move one list to the end of another, emptying the other list.
+ void AppendList (List &list);
+
+protected:
+ //
+ // Pointers into the list
+ //
+ listnode *head;
+ listnode *tail;
+
+ //
+ // For list traversal it is nice to know where we are...
+ //
+ ListCursor cursor;
+
+ //
+ // Its nice to keep track of how many things we contain...
+ //
+ int number;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Makefile.am b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.am
new file mode 100644
index 00000000..15636583
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.am
@@ -0,0 +1,68 @@
+include $(top_srcdir)/Makefile.config
+
+pkglib_LTLIBRARIES = libht.la
+
+libht_la_SOURCES = Configuration.cc Database.cc Dictionary.cc \
+ DB2_db.cc IntObject.cc List.cc Object.cc \
+ ParsedString.cc Queue.cc QuotedStringList.cc Stack.cc \
+ String.cc StringList.cc StringMatch.cc String_fmt.cc \
+ good_strtok.cc strcasecmp.cc \
+ mktime.c strptime.cc timegm.c \
+ getcwd.c memcpy.c memmove.c raise.c strerror.c \
+ HtCodec.cc HtWordCodec.cc \
+ HtVector.cc HtHeap.cc \
+ HtPack.cc HtDateTime.cc HtRegex.cc HtRegexList.cc \
+ HtRegexReplace.cc HtRegexReplaceList.cc \
+ HtVectorGeneric.cc HtMaxMin.cc HtWordType.cc \
+ myqsort.c \
+ md5.cc mhash_md5.c
+
+libht_la_LIBADD=@LTLIBOBJS@
+# this includes regex.c
+
+libht_la_LDFLAGS = -release $(HTDIG_MAJOR_VERSION).$(HTDIG_MINOR_VERSION).$(HTDIG_MICRO_VERSION) ${extra_ldflags}
+
+pkginclude_HEADERS = \
+ clib.h \
+ Configuration.h \
+ DB2_db.h \
+ Database.h \
+ Dictionary.h \
+ HtCodec.h \
+ HtDateTime.h \
+ HtHeap.h \
+ HtPack.h \
+ HtRegex.h \
+ HtRegexList.h \
+ HtRegexReplace.h \
+ HtRegexReplaceList.h \
+ HtVector.h \
+ HtWordCodec.h \
+ IntObject.h \
+ List.h \
+ Object.h \
+ ParsedString.h \
+ Queue.h \
+ QuotedStringList.h \
+ Stack.h \
+ StringList.h \
+ StringMatch.h \
+ good_strtok.h \
+ htString.h \
+ lib.h \
+ gregex.h \
+ HtVectorGeneric.h \
+ HtVectorGenericCode.h \
+ HtVector_int.h \
+ HtVector_String.h \
+ HtMaxMin.h \
+ HtTime.h \
+ HtWordType.h \
+ HtRandom.h \
+ ber.h \
+ libdefs.h \
+ myqsort.h \
+ mhash_md5.h \
+ md5.h
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Makefile.in b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.in
new file mode 100644
index 00000000..bbc58388
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.in
@@ -0,0 +1,579 @@
+# Makefile.in generated by automake 1.7.9 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# To compile with profiling do the following:
+#
+# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_triplet = @host@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+APACHE = @APACHE@
+APACHE_MODULES = @APACHE_MODULES@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CGIBIN_DIR = @CGIBIN_DIR@
+COMMON_DIR = @COMMON_DIR@
+CONFIG_DIR = @CONFIG_DIR@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATABASE_DIR = @DATABASE_DIR@
+DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+FIND = @FIND@
+GUNZIP = @GUNZIP@
+HAVE_SSL = @HAVE_SSL@
+HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@
+HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@
+HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@
+IMAGE_DIR = @IMAGE_DIR@
+IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
+MAKEINFO = @MAKEINFO@
+MV = @MV@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+RANLIB = @RANLIB@
+RRDTOOL = @RRDTOOL@
+SEARCH_DIR = @SEARCH_DIR@
+SEARCH_FORM = @SEARCH_FORM@
+SED = @SED@
+SENDMAIL = @SENDMAIL@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TAR = @TAR@
+TESTS_FALSE = @TESTS_FALSE@
+TESTS_TRUE = @TESTS_TRUE@
+TIME = @TIME@
+TIMEV = @TIMEV@
+USER = @USER@
+VERSION = @VERSION@
+YACC = @YACC@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+exec_prefix = @exec_prefix@
+extra_ldflags = @extra_ldflags@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \
+ -I$(top_srcdir)/include -I$(top_srcdir)/htlib \
+ -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \
+ -I$(top_srcdir)/htword \
+ -I$(top_srcdir)/db -I$(top_builddir)/db \
+ $(LOCAL_DEFINES) $(PROFILING)
+
+
+HTLIBS = $(top_builddir)/htnet/libhtnet.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/htlib/libht.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/db/libhtdb.la \
+ $(top_builddir)/htlib/libht.la
+
+
+pkglib_LTLIBRARIES = libht.la
+
+libht_la_SOURCES = Configuration.cc Database.cc Dictionary.cc \
+ DB2_db.cc IntObject.cc List.cc Object.cc \
+ ParsedString.cc Queue.cc QuotedStringList.cc Stack.cc \
+ String.cc StringList.cc StringMatch.cc String_fmt.cc \
+ good_strtok.cc strcasecmp.cc \
+ mktime.c strptime.cc timegm.c \
+ getcwd.c memcpy.c memmove.c raise.c strerror.c \
+ HtCodec.cc HtWordCodec.cc \
+ HtVector.cc HtHeap.cc \
+ HtPack.cc HtDateTime.cc HtRegex.cc HtRegexList.cc \
+ HtRegexReplace.cc HtRegexReplaceList.cc \
+ HtVectorGeneric.cc HtMaxMin.cc HtWordType.cc \
+ myqsort.c \
+ md5.cc mhash_md5.c
+
+
+libht_la_LIBADD = @LTLIBOBJS@
+
+# this includes regex.c
+libht_la_LDFLAGS = -release $(HTDIG_MAJOR_VERSION).$(HTDIG_MINOR_VERSION).$(HTDIG_MICRO_VERSION) ${extra_ldflags}
+
+pkginclude_HEADERS = \
+ clib.h \
+ Configuration.h \
+ DB2_db.h \
+ Database.h \
+ Dictionary.h \
+ HtCodec.h \
+ HtDateTime.h \
+ HtHeap.h \
+ HtPack.h \
+ HtRegex.h \
+ HtRegexList.h \
+ HtRegexReplace.h \
+ HtRegexReplaceList.h \
+ HtVector.h \
+ HtWordCodec.h \
+ IntObject.h \
+ List.h \
+ Object.h \
+ ParsedString.h \
+ Queue.h \
+ QuotedStringList.h \
+ Stack.h \
+ StringList.h \
+ StringMatch.h \
+ good_strtok.h \
+ htString.h \
+ lib.h \
+ gregex.h \
+ HtVectorGeneric.h \
+ HtVectorGenericCode.h \
+ HtVector_int.h \
+ HtVector_String.h \
+ HtMaxMin.h \
+ HtTime.h \
+ HtWordType.h \
+ HtRandom.h \
+ ber.h \
+ libdefs.h \
+ myqsort.h \
+ mhash_md5.h \
+ md5.h
+
+subdir = htlib
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(pkglib_LTLIBRARIES)
+
+libht_la_DEPENDENCIES = @LTLIBOBJS@
+am_libht_la_OBJECTS = Configuration.lo Database.lo Dictionary.lo \
+ DB2_db.lo IntObject.lo List.lo Object.lo ParsedString.lo \
+ Queue.lo QuotedStringList.lo Stack.lo String.lo StringList.lo \
+ StringMatch.lo String_fmt.lo good_strtok.lo strcasecmp.lo \
+ mktime.lo strptime.lo timegm.lo getcwd.lo memcpy.lo memmove.lo \
+ raise.lo strerror.lo HtCodec.lo HtWordCodec.lo HtVector.lo \
+ HtHeap.lo HtPack.lo HtDateTime.lo HtRegex.lo HtRegexList.lo \
+ HtRegexReplace.lo HtRegexReplaceList.lo HtVectorGeneric.lo \
+ HtMaxMin.lo HtWordType.lo myqsort.lo md5.lo mhash_md5.lo
+libht_la_OBJECTS = $(am_libht_la_OBJECTS)
+
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+ $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libht_la_SOURCES)
+HEADERS = $(pkginclude_HEADERS)
+
+DIST_COMMON = $(pkginclude_HEADERS) $(srcdir)/Makefile.in \
+ $(top_srcdir)/Makefile.config Makefile.am malloc.c memcmp.c \
+ realloc.c regex.c snprintf.c vsnprintf.c
+SOURCES = $(libht_la_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign htlib/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+pkglibLTLIBRARIES_INSTALL = $(INSTALL)
+install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(pkglibdir)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(pkglibdir)/$$f"; \
+ $(LIBTOOL) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(pkglibdir)/$$f; \
+ else :; fi; \
+ done
+
+uninstall-pkglibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ p="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(pkglibdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(pkglibdir)/$$p; \
+ done
+
+clean-pkglibLTLIBRARIES:
+ -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES)
+ @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" = "$$p" && dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libht.la: $(libht_la_OBJECTS) $(libht_la_DEPENDENCIES)
+ $(CXXLINK) -rpath $(pkglibdir) $(libht_la_LDFLAGS) $(libht_la_OBJECTS) $(libht_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.c.o:
+ $(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+ $(COMPILE) -c `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+
+.c.lo:
+ $(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+.cc.o:
+ $(CXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+.cc.obj:
+ $(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+
+.cc.lo:
+ $(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+pkgincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+install-pkgincludeHEADERS: $(pkginclude_HEADERS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(pkgincludedir)
+ @list='$(pkginclude_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(pkgincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(pkgincludedir)/$$f"; \
+ $(pkgincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(pkgincludedir)/$$f; \
+ done
+
+uninstall-pkgincludeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkginclude_HEADERS)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(pkgincludedir)/$$f"; \
+ rm -f $(DESTDIR)$(pkgincludedir)/$$f; \
+ done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+CTAGS = ctags
+CTAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/..
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(pkglibdir) $(DESTDIR)$(pkgincludedir)
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-pkgincludeHEADERS
+
+install-exec-am: install-pkglibLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-info-am uninstall-pkgincludeHEADERS \
+ uninstall-pkglibLTLIBRARIES
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-pkglibLTLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am info info-am install \
+ install-am install-data install-data-am install-exec \
+ install-exec-am install-info install-info-am install-man \
+ install-pkgincludeHEADERS install-pkglibLTLIBRARIES \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool pdf \
+ pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am \
+ uninstall-pkgincludeHEADERS uninstall-pkglibLTLIBRARIES
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Makefile.win32 b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.win32
new file mode 100644
index 00000000..07ca32ba
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Makefile.win32
@@ -0,0 +1,34 @@
+
+TARGET = $(LIBDIR)/libht$(LIBSFX)
+
+# ----------------------------------------------------------------------------
+# add new library members to this list
+
+# ----------------------------------------------------------------------------
+
+include ../Makedefs.win32
+
+CXXSRC = Configuration.cc Database.cc Dictionary.cc DB2_db.cc \
+ IntObject.cc List.cc Object.cc ParsedString.cc Queue.cc \
+ QuotedStringList.cc Stack.cc String.cc StringList.cc \
+ StringMatch.cc String_fmt.cc good_strtok.cc strcasecmp.cc \
+ strptime.cc HtCodec.cc HtWordCodec.cc HtVector.cc HtHeap.cc \
+ HtPack.cc HtDateTime.cc HtRegex.cc HtRegexList.cc \
+ HtRegexReplace.cc HtRegexReplaceList.cc HtVectorGeneric.cc \
+ HtMaxMin.cc HtWordType.cc md5.cc
+
+SRC = mktime.c mhash_md5.c timegm.c getcwd.c memcmp.c memcpy.c \
+ memmove.c raise.c strerror.c myqsort.c
+ifdef WINDIR
+SRC += regex_win32.c getopt_local.c
+CXXSRC += filecopy.cc
+endif
+
+CPPFLAGS += -DHAVE_CONFIG_H -I../db -I. -I../htword -I../htcommon
+
+$(TARGET): $(OBJDIRDEP) $(LIBDIRDEP) $(OBJS)
+ $(AR) $(ARFLAGS) $(OBJS)
+
+include ../Makerules.win32
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Object.cc b/debian/htdig/htdig-3.2.0b6/htlib/Object.cc
new file mode 100644
index 00000000..039947a3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Object.cc
@@ -0,0 +1,76 @@
+//
+// Object.cc
+//
+// Object: This baseclass defines how an object should behave.
+// This includes the ability to be put into a list
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Object.cc,v 1.6 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Object.h"
+
+#include <stdio.h>
+
+
+//***************************************************************************
+// Object::Object()
+//
+#ifdef NOINLINE
+Object::Object()
+{
+}
+
+
+//***************************************************************************
+// Object::~Object()
+//
+Object::~Object()
+{
+}
+
+
+//***************************************************************************
+// int Object::compare(Object *)
+//
+int Object::compare(Object *)
+{
+ return 0;
+}
+
+
+//***************************************************************************
+// Object *Object::Copy()
+//
+Object *Object::Copy()
+{
+ return new Object;
+}
+
+
+//***************************************************************************
+// void Object::Serialize(String &)
+//
+void Object::Serialize(String &)
+{
+}
+
+
+//***************************************************************************
+// void Object::Deserialize(String &, int &)
+//
+void Object::Deserialize(String &, int &)
+{
+}
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Object.h b/debian/htdig/htdig-3.2.0b6/htlib/Object.h
new file mode 100644
index 00000000..29b28b81
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Object.h
@@ -0,0 +1,47 @@
+//
+// Object.h
+//
+// Object: This baseclass defines how an object should behave.
+// This includes the ability to be put into a list
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Object.h,v 1.9 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _Object_h_
+#define _Object_h_
+
+#include "lib.h"
+#include <stdio.h>
+
+class String;
+
+class Object
+{
+public:
+ //
+ // Constructor/Destructor
+ //
+ Object() {}
+ virtual ~Object() {}
+
+ //
+ // To ensure a consistent comparison interface and to allow comparison
+ // of all kinds of different objects, we will define a comparison functions.
+ //
+ virtual int compare(const Object &) const { return 0;}
+
+ //
+ // To allow a deep copy of data structures we will define a standard interface...
+ // This member will return a copy of itself, freshly allocated and deep copied.
+ //
+ virtual Object *Copy() const { fprintf(stderr, "Object::Copy: derived class does not implement Copy\n"); return new Object(); }
+};
+
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.cc b/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.cc
new file mode 100644
index 00000000..3a774fb6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.cc
@@ -0,0 +1,202 @@
+//
+// ParsedString.cc
+//
+// ParsedString: Contains a string. The string my contain $var, ${var}, $(var)
+// `filename`. The get method will expand those using the
+// dictionary given in argument.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ParsedString.cc,v 1.9 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "ParsedString.h"
+
+#include <ctype.h>
+#include <stdio.h>
+
+
+//*****************************************************************************
+// ParsedString::ParsedString()
+//
+ParsedString::ParsedString()
+{
+}
+
+
+//*****************************************************************************
+//
+ParsedString::ParsedString(const String& s)
+{
+ value = s;
+}
+
+
+//*****************************************************************************
+// ParsedString::~ParsedString()
+//
+ParsedString::~ParsedString()
+{
+}
+
+
+//*****************************************************************************
+//
+void
+ParsedString::set(const String& str)
+{
+ value = str;
+}
+
+
+//*****************************************************************************
+// Return a fully parsed string.
+//
+// Allowed syntax:
+// $var
+// ${var}
+// $(var)
+// `filename`
+//
+// The filename can also contain variables
+//
+const String
+ParsedString::get(const Dictionary &dict) const
+{
+ String variable;
+ String parsed;
+ ParsedString *temp;
+ const char *str = value.get();
+ char delim = ' ';
+ int need_delim = 0;
+
+ while (*str)
+ {
+ if (*str == '$')
+ {
+ //
+ // A dollar sign starts a variable.
+ //
+ str++;
+ need_delim = 1;
+ if (*str == '{')
+ delim = '}';
+ else if (*str == '(')
+ delim = ')';
+ else
+ need_delim = 0;
+ if (need_delim)
+ str++;
+ variable.trunc();
+ while (isalnum(*str) || *str == '_' || *str == '-')
+ {
+ variable << *str++;
+ }
+ if (*str)
+ {
+ if (need_delim && *str == delim)
+ {
+ //
+ // Found end of variable
+ //
+ temp = (ParsedString *) dict[variable];
+ if (temp)
+ parsed << temp->get(dict);
+ str++;
+ }
+ else if (need_delim)
+ {
+ //
+ // Error. Probably an illegal value in the name We'll
+ // assume the variable ended here.
+ //
+ temp = (ParsedString *) dict[variable];
+ if (temp)
+ parsed << temp->get(dict);
+ }
+ else
+ {
+ //
+ // This variable didn't have a delimiter.
+ //
+ temp = (ParsedString *) dict[variable];
+ if (temp)
+ parsed << temp->get(dict);
+ }
+ }
+ else
+ {
+ //
+ // End of string reached. We'll assume that this is also
+ // the end of the variable
+ //
+ temp = (ParsedString *) dict[variable];
+ if (temp)
+ parsed << temp->get(dict);
+ }
+ }
+ else if (*str == '`')
+ {
+ //
+ // Back-quote delimits a filename which we need to insert
+ //
+ str++;
+ variable.trunc();
+ while (*str && *str != '`')
+ {
+ variable << *str++;
+ }
+ if (*str == '`')
+ str++;
+ ParsedString filename(variable);
+ variable.trunc();
+ getFileContents(variable, filename.get(dict));
+ parsed << variable;
+ }
+ else if (*str == '\\')
+ {
+ //
+ // Backslash escapes the next character
+ //
+ str++;
+ if (*str)
+ parsed << *str++;
+ }
+ else
+ {
+ //
+ // Normal character
+ //
+ parsed << *str++;
+ }
+ }
+ return parsed;
+}
+
+
+void
+ParsedString::getFileContents(String &str, const String& filename) const
+{
+ FILE *fl = fopen(filename, "r");
+ char buffer[1000];
+
+ if (!fl)
+ return;
+ while (fgets(buffer, sizeof(buffer), fl))
+ {
+ String s(buffer);
+ s.chop("\r\n\t ");
+ str << s << ' ';
+ }
+ str.chop(1);
+ fclose(fl);
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.h b/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.h
new file mode 100644
index 00000000..cd69f933
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/ParsedString.h
@@ -0,0 +1,41 @@
+//
+// ParsedString.h
+//
+// ParsedString: Contains a string. The string my contain $var, ${var}, $(var)
+// `filename`. The get method will expand those using the
+// dictionary given in argument.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ParsedString.h,v 1.9 2004/05/28 13:15:21 lha Exp $
+
+#ifndef _ParsedString_h_
+#define _ParsedString_h_
+
+#include "Object.h"
+#include "htString.h"
+#include "Dictionary.h"
+
+class ParsedString : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ ParsedString();
+ ParsedString(const String& s);
+ ~ParsedString();
+
+ void set(const String& s);
+ const String get(const Dictionary &d) const;
+private:
+ String value;
+
+ void getFileContents(String &str, const String& filename) const;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Queue.cc b/debian/htdig/htdig-3.2.0b6/htlib/Queue.cc
new file mode 100644
index 00000000..2156df0f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Queue.cc
@@ -0,0 +1,112 @@
+//
+// Queue.cc
+//
+// Queue: This class implements a linked list of objects. It itself is also an
+// object
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Queue.cc,v 1.6 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Queue.h"
+
+struct Queuenode
+{
+ Queuenode *next;
+ Object *obj;
+};
+
+//***************************************************************************
+// Queue::Queue()
+//
+Queue::Queue()
+{
+ head = tail = 0;
+ size = 0;
+}
+
+
+//***************************************************************************
+// Queue::~Queue()
+//
+Queue::~Queue()
+{
+ destroy();
+}
+
+
+//***************************************************************************
+// void Queue::destroy()
+//
+void Queue::destroy()
+{
+ while (head)
+ {
+ Object *obj = pop();
+ delete obj;
+ }
+ size = 0;
+ head = tail = 0;
+}
+
+
+//***************************************************************************
+// void Queue::push(Object *obj)
+// Push an object onto the Queue.
+//
+void Queue::push(Object *obj)
+{
+ Queuenode *node = new Queuenode;
+
+ node->obj = obj;
+ node->next = 0;
+ if (tail)
+ ((Queuenode *) tail)->next = node;
+ tail = node;
+ if (!head)
+ head = tail;
+ size++;
+}
+
+
+//***************************************************************************
+// Object *Queue::pop()
+// Return the object at the head of the Queue and remove it
+//
+Object *Queue::pop()
+{
+ if (size == 0)
+ return 0;
+
+ Queuenode *node = (Queuenode *) head;
+ Object *obj = node->obj;
+ head = (void *) node->next;
+ delete node;
+ size--;
+
+ if (!head)
+ tail = 0;
+ return obj;
+}
+
+
+//***************************************************************************
+// Object *Queue::peek()
+// Return the object at the top of the Queue.
+//
+Object *Queue::peek()
+{
+ if (size == 0)
+ return 0;
+
+ return ((Queuenode *)head)->obj;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Queue.h b/debian/htdig/htdig-3.2.0b6/htlib/Queue.h
new file mode 100644
index 00000000..da3066e4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Queue.h
@@ -0,0 +1,52 @@
+//
+// Queue.h
+//
+// Queue: This class implements a linked list of objects. It itself is also an
+// object
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Queue.h,v 1.6 2004/05/28 13:15:21 lha Exp $
+//
+#ifndef _Queue_h_
+#define _Queue_h_
+
+#include "Object.h"
+
+class Queue : public Object
+{
+public:
+ //
+ // Constructors/Destructor
+ //
+ Queue();
+ ~Queue();
+
+ //
+ // Queue access
+ //
+ void push(Object *obj);
+ Object *peek();
+ Object *pop();
+ int Size() {return size;}
+
+ //
+ // Queue destruction
+ //
+ void destroy();
+
+protected:
+ //
+ // These variables are to keep track of the linked list
+ //
+ void *head;
+ void *tail;
+
+ int size;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.cc b/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.cc
new file mode 100644
index 00000000..2d6aa006
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.cc
@@ -0,0 +1,90 @@
+//
+// QuotedStringList.cc
+//
+// QuotedStringList: Fed with a string it will extract separator delimited
+// words and store them in a list. The words may be
+// delimited by " or ', hence the name.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QuotedStringList.cc,v 1.7 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "QuotedStringList.h"
+
+
+//*****************************************************************************
+QuotedStringList::QuotedStringList()
+{
+}
+
+//*****************************************************************************
+int
+QuotedStringList::Create(const char *str, const char *sep, int single)
+{
+ char quote = 0;
+ int quoted = 0;
+ String word;
+
+ while (str && *str)
+ {
+ if (*str == '\\')
+ {
+ if (!str[1])
+ break;
+ word << *++str;
+ }
+ else if (*str == quote)
+ {
+ quote = 0;
+ }
+ else if (!quote && (*str == '"' || *str == '\''))
+ {
+ quote = *str;
+ quoted++;
+ }
+ else if (quote == 0 && strchr(sep, *str))
+ {
+ Add(new String(word));
+ word = 0;
+ quoted = 0;
+ if (!single)
+ {
+ while (strchr(sep, *str))
+ str++;
+ str--;
+ }
+ }
+ else
+ word << *str;
+ str++;
+ }
+
+ //
+ // Add the last word to the list
+ //
+ if (word.length() || quoted)
+ Add(new String(word));
+ return Count();
+}
+
+
+//*****************************************************************************
+int
+QuotedStringList::Create(const char *str, char sep, int single)
+{
+ char t[2] = "1";
+
+ t[0] = sep;
+ return Create(str, t, single);
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.h b/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.h
new file mode 100644
index 00000000..d869248d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/QuotedStringList.h
@@ -0,0 +1,47 @@
+//
+// QuotedStringList.h
+//
+// QuotedStringList: Fed with a string it will extract separator delimited
+// words and store them in a list. The words may be
+// delimited by " or ', hence the name.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QuotedStringList.h,v 1.7 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _QuotedStringList_h_
+#define _QuotedStringList_h_
+
+#include "StringList.h"
+
+class QuotedStringList : public StringList
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ QuotedStringList();
+
+ //
+ // Creation of a String from a string or String
+ //
+ QuotedStringList(const char *str, char sep = '\t', int single = 0) { Create(str, sep, single); }
+ QuotedStringList(const String &str, char sep = '\t', int single = 0) { Create(str, sep, single); }
+ QuotedStringList(const char *str, const char *sep, int single = 0) { Create(str, sep, single); }
+ QuotedStringList(const String &str, const char *sep, int single = 0) { Create(str, sep, single); }
+
+ int Create(const char *str, char sep = '\t', int single = 0);
+ int Create(const String &str, char sep = '\t', int single = 0) { return Create(str.get(), sep, single); }
+ int Create(const char *str, const char *sep, int single = 0);
+ int Create(const String &str, const char *sep, int single = 0) { return Create(str.get(), sep, single); }
+private:
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Stack.cc b/debian/htdig/htdig-3.2.0b6/htlib/Stack.cc
new file mode 100644
index 00000000..5d74519b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Stack.cc
@@ -0,0 +1,111 @@
+//
+// Stack.cc
+//
+// Stack: This class implements a linked list of objects. It itself is also an
+// object
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Stack.cc,v 1.5 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Stack.h"
+
+struct stacknode
+{
+ stacknode *next;
+ Object *obj;
+};
+
+//***************************************************************************
+// Stack::Stack()
+//
+Stack::Stack()
+{
+ sp = 0;
+ size = 0;
+}
+
+
+//***************************************************************************
+// Stack::~Stack()
+//
+Stack::~Stack()
+{
+ while (sp)
+ {
+ Object *obj = pop();
+ delete obj;
+ }
+}
+
+
+//***************************************************************************
+// void Stack::destroy()
+//
+void Stack::destroy()
+{
+ while (sp)
+ {
+ Object *obj = pop();
+ delete obj;
+ }
+}
+
+
+//***************************************************************************
+// void Stack::push(Object *obj)
+// PURPOSE:
+// Push an object onto the stack.
+//
+void Stack::push(Object *obj)
+{
+ stacknode *node = new stacknode;
+
+ node->obj = obj;
+ node->next = (stacknode *) sp;
+ sp = node;
+ size++;
+}
+
+
+//***************************************************************************
+// Object *Stack::pop()
+// PURPOSE:
+// Return the object at the top of the stack and remove it from the stack.
+//
+Object *Stack::pop()
+{
+ if (size == 0)
+ return 0;
+
+ stacknode *node = (stacknode *) sp;
+ Object *obj = node->obj;
+ sp = (void *) node->next;
+ delete node;
+ size--;
+
+ return obj;
+}
+
+
+//***************************************************************************
+// Object *Stack::peek()
+// PURPOSE:
+// Return the object at the top of the stack.
+//
+Object *Stack::peek()
+{
+ if (size == 0)
+ return 0;
+
+ return ((stacknode *)sp)->obj;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/Stack.h b/debian/htdig/htdig-3.2.0b6/htlib/Stack.h
new file mode 100644
index 00000000..82bbc8d2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/Stack.h
@@ -0,0 +1,52 @@
+//
+// Stack.h
+//
+// Stack: This class implements a linked list of objects. It itself is also an
+// object
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Stack.h,v 1.6 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _Stack_h_
+#define _Stack_h_
+
+#include "Object.h"
+
+class Stack : public Object
+{
+public:
+ //
+ // Constructors/Destructor
+ //
+ Stack();
+ ~Stack();
+
+ //
+ // Stack access
+ //
+ void push(Object *obj);
+ Object *peek();
+ Object *pop();
+ int Size() {return size;}
+
+ //
+ // Stack destruction
+ //
+ void destroy();
+
+protected:
+ //
+ // These variables are to keep track of the linked list
+ //
+ void *sp;
+
+ int size;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/String.cc b/debian/htdig/htdig-3.2.0b6/htlib/String.cc
new file mode 100644
index 00000000..918a5881
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/String.cc
@@ -0,0 +1,726 @@
+//
+// String.cc
+//
+// String: (interface in htString.h) Just Another String class.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: String.cc,v 1.40 2004/05/28 13:15:21 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+
+#include "htString.h"
+#include "Object.h"
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+const int MinimumAllocationSize = 4; // Should be power of two.
+
+#ifdef NOINLINE
+String::String()
+{
+ Length = Allocated = 0;
+ Data = 0;
+}
+#endif
+
+String::String(int init)
+{
+ Length = 0;
+ Allocated = init >= MinimumAllocationSize ? init : MinimumAllocationSize;
+ Data = new char[Allocated];
+}
+
+String::String(const char *s)
+{
+ Allocated = Length = 0;
+ Data = 0;
+
+ int len;
+ if (s)
+ {
+ len = strlen(s);
+ copy(s, len, len);
+ }
+}
+
+String::String(const char *s, int len)
+{
+ Allocated = Length = 0;
+ Data = 0;
+ if (s && len > 0)
+ copy(s, len, len);
+}
+
+String::String(const String &s)
+{
+ Allocated = Length = 0;
+ Data = 0;
+
+ if (s.length() > 0)
+ copy(s.Data, s.length(), s.length());
+}
+
+//
+// This can be used for performance reasons if it is known the
+// String will need to grow.
+//
+String::String(const String &s, int allocation_hint)
+{
+ Allocated = Length = 0;
+ Data = 0;
+
+ if (s.length() != 0)
+ {
+ if (allocation_hint < s.length())
+ allocation_hint = s.length();
+ copy(s.Data, s.length(), allocation_hint);
+ }
+}
+
+String::~String()
+{
+ if (Allocated)
+ delete [] Data;
+}
+
+void String::operator = (const String &s)
+{
+ if (s.length() > 0)
+ {
+ allocate_space(s.length());
+ Length = s.length();
+ copy_data_from(s.Data, Length);
+ }
+ else
+ {
+ Length = 0;
+ }
+}
+
+void String::operator = (const char *s)
+{
+ if (s)
+ {
+ int len = strlen(s);
+ allocate_fix_space(len);
+ Length = len;
+ copy_data_from(s, Length);
+ }
+ else
+ Length = 0;
+}
+
+void String::append(const String &s)
+{
+ if (s.length() == 0)
+ return;
+ int new_len = Length + s.length();
+
+ reallocate_space(new_len);
+ copy_data_from(s.Data, s.length(), Length);
+ Length = new_len;
+}
+
+void String::append(const char *s)
+{
+ if (!s)
+ return;
+
+ append(s,strlen(s));
+}
+
+void String::append(const char *s, int slen)
+{
+ if (!s || !slen)
+ return;
+
+// if ( slen == 1 )
+// {
+// append(*s);
+// return;
+// }
+ int new_len = Length + slen;
+
+ if (new_len + 1 > Allocated)
+ reallocate_space(new_len);
+ copy_data_from(s, slen, Length);
+ Length = new_len;
+}
+
+void String::append(char ch)
+{
+ int new_len = Length +1;
+ if (new_len + 1 > Allocated)
+ reallocate_space(new_len);
+ Data[Length] = ch;
+ Length = new_len;
+}
+
+int String::compare(const String& obj) const
+{
+ int len;
+ int result;
+ const char *p1 = Data;
+ const char *p2 = obj.Data;
+
+ len = Length;
+ result = 0;
+
+ if (Length > obj.Length)
+ {
+ result = 1;
+ len = obj.Length;
+ }
+ else if (Length < obj.Length)
+ result = -1;
+
+ while (len)
+ {
+ if (*p1 > *p2)
+ return 1;
+ if (*p1 < *p2)
+ return -1;
+ p1++;
+ p2++;
+ len--;
+ }
+ //
+ // Strings are equal up to the shortest length.
+ // The result depends upon the length difference.
+ //
+ return result;
+}
+
+int String::nocase_compare(const String &s) const
+{
+ const char *p1 = get();
+ const char *p2 = s.get();
+
+ return mystrcasecmp(p1, p2);
+}
+
+int String::Write(int fd) const
+{
+ int left = Length;
+ char *wptr = Data;
+
+ while (left)
+ {
+ int result = write(fd, wptr, left);
+
+ if (result < 0)
+ return result;
+
+ left -= result;
+ wptr += result;
+ }
+ return left;
+}
+
+const char *String::get() const
+{
+ static const char *null = "";
+ if (!Allocated)
+ return null;
+ Data[Length] = '\0'; // We always leave room for this.
+ return Data;
+}
+
+char *String::get()
+{
+ static char *null = "";
+ if (!Allocated)
+ return null;
+ Data[Length] = '\0'; // We always leave room for this.
+ return Data;
+}
+
+char *String::new_char() const
+{
+ char *r;
+ if (!Allocated)
+ {
+ r = new char[1];
+ *r = '\0';
+ return r;
+ }
+ Data[Length] = '\0'; // We always leave room for this.
+ r = new char[Length + 1];
+ strcpy(r, Data);
+ return r;
+}
+
+
+int String::as_integer(int def) const
+{
+ if (Length <= 0)
+ return def;
+ Data[Length] = '\0';
+ return atoi(Data);
+}
+
+double String::as_double(double def) const
+{
+ if (Length <= 0)
+ return def;
+ Data[Length] = '\0';
+ return atof(Data);
+}
+
+String String::sub(int start, int len) const
+{
+ if (start > Length)
+ return 0;
+
+ if (len > Length - start)
+ len = Length - start;
+
+ return String(Data + start, len);
+}
+
+String String::sub(int start) const
+{
+ return sub(start, Length - start);
+}
+
+int String::indexOf(const char *str) const
+{
+ char *c;
+ //
+ // Set the first char after string end to zero to prevent finding
+ // substrings including symbols after actual end of string
+ //
+ if (!Allocated)
+ return -1;
+ Data[Length] = '\0';
+
+ /* OLD CODE: for (i = 0; i < Length; i++) */
+#ifdef HAVE_STRSTR
+ if ((c = strstr(Data, str)) != NULL)
+ return(c -Data);
+#else
+ int len = strlen(str);
+ int i;
+ for (i = 0; i <= Length-len; i++)
+ {
+ if (strncmp(&Data[i], str, len) == 0)
+ return i;
+ }
+#endif
+ return -1;
+}
+
+int String::indexOf(char ch) const
+{
+ int i;
+ for (i = 0; i < Length; i++)
+ {
+ if (Data[i] == ch)
+ return i;
+ }
+ return -1;
+}
+
+int String::indexOf(char ch, int pos) const
+{
+ if (pos >= Length)
+ return -1;
+ for (int i = pos; i < Length; i++)
+ {
+ if (Data[i] == ch)
+ return i;
+ }
+ return -1;
+}
+
+int String::lastIndexOf(char ch, int pos) const
+{
+ if (pos >= Length)
+ return -1;
+ while (pos >= 0)
+ {
+ if (Data[pos] == ch)
+ return pos;
+ pos--;
+ }
+ return -1;
+}
+
+int String::lastIndexOf(char ch) const
+{
+ return lastIndexOf(ch, Length - 1);
+}
+#ifdef NOINLINE
+String &String::operator << (const char *str)
+{
+ append(str);
+ return *this;
+}
+
+String &String::operator << (char ch)
+{
+ append(&ch, 1);
+ return *this;
+}
+#endif
+
+String &String::operator << (int i)
+{
+ char str[20];
+ sprintf(str, "%d", i);
+ append(str);
+ return *this;
+}
+
+String &String::operator << (unsigned int i)
+{
+ char str[20];
+ sprintf(str, "%u", i);
+ append(str);
+ return *this;
+}
+
+String &String::operator << (long l)
+{
+ char str[20];
+ sprintf(str, "%ld", l);
+ append(str);
+ return *this;
+}
+
+String &String::operator << (const String &s)
+{
+ append(s.get(), s.length());
+ return *this;
+}
+
+char String::operator >> (char c)
+{
+ c = '\0';
+
+ if (Allocated && Length)
+ {
+ c = Data[Length - 1];
+ Data[Length - 1] = '\0';
+ Length--;
+ }
+
+ return c;
+}
+
+int String::lowercase()
+{
+ int converted = 0;
+ for (int i = 0; i < Length; i++)
+ {
+ if (isupper((unsigned char)Data[i])) {
+ Data[i] = tolower((unsigned char)Data[i]);
+ converted++;
+ }
+ }
+ return converted;
+}
+
+
+int String::uppercase()
+{
+ int converted = 0;
+ for (int i = 0; i < Length; i++)
+ {
+ if (islower((unsigned char)Data[i])) {
+ Data[i] = toupper((unsigned char)Data[i]);
+ converted++;
+ }
+ }
+ return converted;
+}
+
+
+void String::replace(char c1, char c2)
+{
+ for (int i = 0; i < Length; i++)
+ if (Data[i] == c1)
+ Data[i] = c2;
+}
+
+
+int String::remove(const char *chars)
+{
+ if (Length <= 0)
+ return 0;
+
+ char *good, *bad;
+ int skipped = 0;
+
+ good = bad = Data;
+ for (int i = 0; i < Length; i++)
+ {
+ if (strchr(chars, *bad))
+ skipped++;
+ else
+ *good++ = *bad;
+ bad++;
+ }
+ Length -= skipped;
+
+ return skipped;
+}
+
+String &String::chop(int n)
+{
+ Length -= n;
+ if (Length < 0)
+ Length = 0;
+ return *this;
+}
+
+
+String &String::chop(char ch)
+{
+ while (Length > 0 && Data[Length - 1] == ch)
+ Length--;
+ return *this;
+}
+
+
+String &String::chop(const char *str)
+{
+ while (Length > 0 && strchr(str, Data[Length - 1]))
+ Length--;
+ return *this;
+}
+
+
+void String::Serialize(String &dest)
+{
+ dest.append((char *) &Length, sizeof(Length));
+ dest.append(get(), Length);
+}
+
+
+void String::Deserialize(String &source, int &index)
+{
+ memcpy((char *) &Length, (char *) source.get() + index, sizeof(Length));
+ index += sizeof(Length);
+ allocate_fix_space(Length);
+ copy_data_from(source.get() + index, Length);
+ index += Length;
+}
+
+
+//------------------------------------------------------------------------
+// Non member operators.
+//
+String operator + (const String &a, const String &b)
+{
+ String result(a, a.length() + b.length());
+
+ result.append(b);
+ return result;
+}
+
+int operator == (const String &a, const String &b)
+{
+ if (a.Length != b.Length)
+ return 0;
+
+ return a.compare(b) == 0;
+}
+
+int operator != (const String &a, const String &b)
+{
+ return a.compare(b) != 0;
+}
+
+int operator < (const String &a, const String &b)
+{
+ return a.compare(b) == -1;
+}
+
+int operator > (const String &a, const String &b)
+{
+ return a.compare(b) == 1;
+}
+
+int operator <= (const String &a, const String &b)
+{
+ return a.compare(b) <= 0;
+}
+
+int operator >= (const String &a, const String &b)
+{
+ return a.compare(b) >= 0;
+}
+
+#ifndef NOSTREAM
+ostream &operator << (ostream &o, const String &s)
+{
+ o.write(s.Data, s.length());
+ return o;
+}
+#endif /* NOSTREAM */
+
+//------------------------------------------------------------------------
+// Private Methods.
+//
+
+void String::copy_data_from(const char *s, int len, int dest_offset)
+{
+ memcpy(Data + dest_offset, s, len);
+}
+
+void String::allocate_space(int len)
+{
+ len++; // In case we want to add a null.
+
+ if (len <= Allocated)
+ return;
+
+ if (Allocated)
+ delete [] Data;
+
+ Allocated = MinimumAllocationSize;
+ while (Allocated < len)
+ Allocated <<= 1;
+
+ Data = new char[Allocated];
+}
+
+void String::allocate_fix_space(int len)
+{
+ len++; // In case we want to add a null.
+
+ if (len <= Allocated)
+ return;
+
+ if (Allocated)
+ delete [] Data;
+
+ Allocated = len;
+ if (Allocated < MinimumAllocationSize)
+ Allocated = MinimumAllocationSize;
+ Data = new char[Allocated];
+}
+
+void String::reallocate_space(int len)
+{
+ char *old_data = 0;
+ int old_data_len = 0;
+
+ if (Allocated)
+ {
+ old_data = Data;
+ old_data_len = Length;
+ Allocated = 0;
+ }
+ allocate_space(len);
+ if (old_data)
+ {
+ copy_data_from(old_data, old_data_len);
+ delete [] old_data;
+ }
+}
+
+void String::copy(const char *s, int len, int allocation_hint)
+{
+ if (len == 0 || allocation_hint == 0)
+ return; // We're not actually copying anything!
+ allocate_fix_space(allocation_hint);
+ Length = len;
+ copy_data_from(s, len);
+}
+
+#ifndef NOSTREAM
+void String::debug(ostream &o)
+{
+ o << "Length: " << Length << " Allocated: " << Allocated <<
+ " Data: " << ((void*) Data) << " '" << *this << "'\n";
+}
+#endif /* NOSTREAM */
+
+int String::readLine(FILE *in)
+{
+ Length = 0;
+ allocate_fix_space(2048);
+
+ while (fgets(Data + Length, Allocated - Length, in))
+ {
+ Length += strlen(Data + Length);
+ if (Length == 0)
+ continue;
+ if (Data[Length - 1] == '\n')
+ {
+ //
+ // A full line has been read. Return it.
+ //
+ chop('\n');
+ return 1;
+ }
+ if (Allocated > Length + 1)
+ {
+ //
+ // Not all available space filled. Probably EOF?
+ //
+ continue;
+ }
+ //
+ // Only a partial line was read. Increase available space in
+ // string and read some more.
+ //
+ reallocate_space(Allocated << 1);
+ }
+ chop('\n');
+
+ return Length > 0;
+}
+
+#ifndef NOSTREAM
+istream &operator >> (istream &in, String &line)
+{
+ line.Length = 0;
+ line.allocate_fix_space(2048);
+
+ for (;;)
+ {
+ in.clear();
+ in.getline(line.Data + line.Length, line.Allocated - line.Length);
+ line.Length += strlen(line.Data + line.Length);
+ // if read whole line, or eof, or read fewer chars than the max...
+ if (!in.fail() || in.eof() || line.Length + 1 < line.Allocated)
+ break;
+ //
+ // Only a partial line was read. Increase available space in
+ // string and read some more.
+ //
+ line.reallocate_space(line.Allocated << 1);
+ }
+
+ return in;
+}
+#endif /* NOSTREAM */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/StringList.cc b/debian/htdig/htdig-3.2.0b6/htlib/StringList.cc
new file mode 100644
index 00000000..28e03a4c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/StringList.cc
@@ -0,0 +1,192 @@
+//
+// StringList.cc
+//
+// StringList: Specialized List containing String objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: StringList.cc,v 1.14 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "StringList.h"
+#include "htString.h"
+#include "List.h"
+
+#include <stdlib.h>
+
+
+//*****************************************************************************
+// StringList::StringList()
+//
+StringList::StringList()
+{
+}
+
+//*****************************************************************************
+// int StringList::Create(const char *str, char *sep)
+//
+int StringList::Create(const char *str, const char *sep)
+{
+ String word;
+
+ while (str && *str)
+ {
+ if (strchr(sep, *str))
+ {
+ if (word.length())
+ {
+ List::Add(new String(word));
+ word = 0;
+ }
+ }
+ else
+ word << *str;
+ str++;
+ }
+
+ //
+ // Add the last word to the list
+ //
+ if (word.length())
+ List::Add(new String(word));
+ return Count();
+}
+
+
+//*****************************************************************************
+// int StringList::Create(const char *str, char sep)
+//
+int StringList::Create(const char *str, char sep)
+{
+ String word;
+
+ while (str && *str)
+ {
+ if (*str == sep)
+ {
+ if (word.length())
+ {
+ List::Add(new String(word));
+ word = 0;
+ }
+ }
+ else
+ word << *str;
+ str++;
+ }
+
+ //
+ // Add the last word to the list
+ //
+ if (word.length())
+ List::Add(new String(word));
+ return Count();
+}
+
+
+//*****************************************************************************
+// char *StringList::operator [] (int n)
+//
+char *StringList::operator [] (int n)
+{
+ String *str = (String *) Nth(n);
+ if (str)
+ return str->get();
+ else
+ return 0;
+}
+
+
+//*****************************************************************************
+// void StringList::Add(const char *str)
+//
+void StringList::Add(const char *str)
+{
+ List::Add(new String(str));
+}
+
+
+//*****************************************************************************
+// void StringList::Assign(const char *str, int pos)
+//
+void StringList::Assign(const char *str, int pos)
+{
+ List::Assign(new String(str), pos);
+}
+
+//*****************************************************************************
+// void StringList::Insert(const char *str, int pos)
+//
+void StringList::Insert(const char *str, int pos)
+{
+ List::Insert(new String(str), pos);
+}
+
+//*****************************************************************************
+// static int StringCompare(const void *a, const void *b)
+//
+static int StringCompare(const void *a, const void *b)
+{
+ String *sa, *sb;
+
+ sa = *((String **) a);
+ sb = *((String **) b);
+
+ return strcmp(sa->get(), sb->get());
+}
+
+
+//*****************************************************************************
+// void StringList::Sort(int direction)
+//
+void StringList::Sort(int)
+{
+ String **array = new String*[Count()];
+ int i;
+ int n = Count();
+
+ ListCursor cursor;
+
+ Start_Get(cursor);
+ Object *obj;
+ for(i = 0; i < n && (obj = Get_Next(cursor)); i++) {
+ array[i] = (String*)obj;
+ }
+
+ qsort((char *) array, (size_t) n, (size_t) sizeof(String *),
+ StringCompare);
+
+ Release();
+
+ for (i = 0; i < n; i++)
+ {
+ List::Add(array[i]);
+ }
+
+ delete array;
+}
+
+//*****************************************************************************
+// String StringList::Join(char sep) const
+//
+String StringList::Join(char sep) const
+{
+ String str;
+ int i;
+
+ for (i=0; i < number; i++)
+ {
+ if (str.length())
+ str.append(sep);
+ str.append(*((const String *) Nth(i)));
+ }
+ return str;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/StringList.h b/debian/htdig/htdig-3.2.0b6/htlib/StringList.h
new file mode 100644
index 00000000..63a8dc54
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/StringList.h
@@ -0,0 +1,73 @@
+//
+// StringList.h
+//
+// StringList: Specialized List containing String objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: StringList.h,v 1.12 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _StringList_h_
+#define _StringList_h_
+
+#include "Object.h"
+#include "List.h"
+#include "htString.h"
+
+
+class StringList : public List
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ StringList();
+
+ //
+ // Creation of a String from a string or String
+ //
+ StringList(const char *str, char sep = '\t') { Create(str, sep); }
+ StringList(const String &str, char sep = '\t') { Create(str, sep); }
+ StringList(const char *str, const char *sep) { Create(str, sep); }
+ StringList(const String &str, const char *sep) { Create(str, sep); }
+
+ int Create(const char *str, char sep = '\t');
+ int Create(const String &str, char sep = '\t') { return Create(str.get(), sep); }
+ int Create(const char *str, const char *sep);
+ int Create(const String &str, const char *sep) { return Create(str.get(), sep); }
+
+ //
+ // Standard List operations...
+ //
+ void Add(const char *);
+ void Add(String *obj) { List::Add(obj); }
+ void Insert(const char *, int pos);
+ void Insert(String *obj, int pos) { List::Insert(obj, pos); }
+ void Assign(const char *, int pos);
+ void Assign(String *obj, int pos) { List::Assign(obj, pos); }
+
+ //
+ // Since we know we only store strings, we can reliably sort them.
+ // If direction is 1, the sort will be in descending order
+ //
+ void Sort(int direction = 0);
+
+ //
+ // Join the Elements of the StringList together
+ //
+ String Join(char) const;
+
+ //
+ // Getting at the parts of the StringList
+ //
+ char *operator [] (int n);
+
+private:
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.cc b/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.cc
new file mode 100644
index 00000000..b1512cc3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.cc
@@ -0,0 +1,601 @@
+//
+// StringMatch.cc
+//
+// StringMatch: This class provides an interface to a fairly specialized string
+// lookup facility. It is intended to be used as a replace for any
+// regualr expression matching when the pattern string is in the form:
+//
+// <string1>|<string2>|<string3>|...
+//
+// Just like regular expression routines, the pattern needs to be
+// compiled before it can be used. This is done using the Pattern()
+// member function. Once the pattern has been compiled, the member
+// function Find() can be used to search for the pattern in a string.
+// If a string has been found, the "which" and "length" parameters
+// will be set to the string index and string length respectively.
+// (The string index is counted starting from 0) The return value of
+// Find() is the position at which the string was found or -1 if no
+// strings could be found. If a case insensitive match needs to be
+// performed, call the IgnoreCase() member function before calling
+// Pattern(). This function will setup a character translation table
+// which will convert all uppercase characters to lowercase. If some
+// other translation is required, the TranslationTable() member
+// function can be called to provide a custom table. This table needs
+// to be 256 characters.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: StringMatch.cc,v 1.18 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "StringMatch.h"
+
+#include <string.h>
+#include <ctype.h>
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+//
+// Entries in the state table can either be normal or final.
+// Final states have an match index encoded in them. This number
+// is shifted left by INDEX_SHIFT bits.
+//
+#define MATCH_INDEX_MASK 0xffff0000
+#define STATE_MASK 0x0000ffff
+#define INDEX_SHIFT 16
+
+//*****************************************************************************
+// StringMatch::StringMatch()
+//
+StringMatch::StringMatch()
+{
+ //
+ // Clear out the state table pointers
+ //
+ for (int i = 0; i < 256; i++)
+ table[i] = 0;
+ local_alloc = 0;
+ trans = 0;
+}
+
+
+//*****************************************************************************
+// StringMatch::~StringMatch()
+//
+StringMatch::~StringMatch()
+{
+ for (int i = 0; i < 256; i++)
+ delete [] table[i];
+ if (local_alloc)
+ delete [] trans;
+}
+
+
+//*****************************************************************************
+// void StringMatch::Pattern(char *pattern)
+// Compile the given pattern into a state transition table
+//
+void
+StringMatch::Pattern(char *pattern, char sep)
+{
+ if (!pattern || !*pattern)
+ {
+ //
+ // No pattern to compile...
+ //
+ return;
+ }
+
+ //
+ // Allocate enough space in the state table to hold the worst case
+ // patterns...
+ //
+ int n = strlen(pattern);
+
+ // ...but since the state table does not need an extra state
+ // for each string in the pattern, we can subtract the number
+ // of separators. Wins for small but numerous strings in
+ // the pattern.
+ char *tmpstr;
+ for (tmpstr = pattern;
+ (tmpstr = strchr(tmpstr, sep)) != NULL;
+ tmpstr++) // Pass the separator.
+ n--;
+
+ int i;
+
+ for (i = 0; i < 256; i++)
+ {
+ table[i] = new int[n];
+ memset((unsigned char *) table[i], 0, n * sizeof(int));
+ }
+ for (i = 0; i < n; i++)
+ table[0][i] = i; // "no-op" states for null char, to be ignored
+
+ //
+ // Set up a standard case translation table if needed.
+ //
+ if (!trans)
+ {
+ trans = new unsigned char[256];
+ for (i = 0; i < 256; i++)
+ {
+ trans[i] = (unsigned char)i;
+ }
+ local_alloc = 1;
+ }
+
+ //
+ // Go though each of the patterns and build entries in the table.
+ //
+ int state = 0;
+ int totalStates = 0;
+ unsigned char previous = 0;
+ int previousState = 0;
+ int previousValue = 0;
+ int index = 1;
+ unsigned char chr;
+
+ while ((unsigned char)*pattern)
+ {
+#if 0
+ if (totalStates > n)
+ {
+ cerr << "Fatal! Miscalculation of number of states"
+ << endl;
+ exit (2);
+ }
+#endif
+
+ chr = trans[(unsigned char)*pattern];
+ if (chr == 0)
+ {
+ pattern++;
+ continue;
+ }
+ if (chr == sep)
+ {
+ //
+ // Next pattern
+ //
+ table[previous][previousState] =
+ previousValue | (index << INDEX_SHIFT);
+ index++;
+ state = 0;
+ // totalStates--;
+ }
+ else
+ {
+ previousValue = table[chr][state];
+ previousState = state;
+ if (previousValue)
+ {
+ if (previousValue & MATCH_INDEX_MASK)
+ {
+ if (previousValue & STATE_MASK)
+ {
+ state = previousValue & STATE_MASK;
+ }
+ else
+ {
+ table[chr][state] |= ++totalStates;
+ state = totalStates;
+ }
+ }
+ else
+ {
+ state = previousValue & STATE_MASK;
+ }
+ }
+ else
+ {
+ table[chr][state] = ++totalStates;
+ state = totalStates;
+ }
+ }
+ previous = chr;
+ pattern++;
+ }
+ table[previous][previousState] =
+ previousValue | (index << INDEX_SHIFT);
+}
+
+
+//*****************************************************************************
+// int StringMatch::FindFirst(const char *string, int &which, int &length)
+// Attempt to find the first occurance of the previous compiled patterns.
+//
+int StringMatch::FindFirst(const char *string, int &which, int &length)
+{
+ which = -1;
+ length = -1;
+
+ if (!table[0])
+ return 0;
+
+ int state = 0, new_state = 0;
+ int pos = 0;
+ int start_pos = 0;
+
+ while ((unsigned char)string[pos])
+ {
+ new_state = table[trans[(unsigned char)string[pos] & 0xff]][state];
+ if (new_state)
+ {
+ if (state == 0)
+ {
+ //
+ // Keep track of where we started comparing so that we can
+ // come back to this point later if we didn't match anything
+ //
+ start_pos = pos;
+ }
+ }
+ else
+ {
+ //
+ // We came back to 0 state. This means we didn't match anything.
+ //
+ if (state)
+ {
+ // But we may already have a match, and are just being greedy.
+ if (which != -1)
+ return start_pos;
+
+ pos = start_pos + 1;
+ }
+ else
+ pos++;
+ state = 0;
+ continue;
+ }
+ state = new_state;
+ if (state & MATCH_INDEX_MASK)
+ {
+ //
+ // Matched one of the patterns.
+ // Determine which and return.
+ //
+ which = ((unsigned int) (state & MATCH_INDEX_MASK)
+ >> INDEX_SHIFT) - 1;
+ length = pos - start_pos + 1;
+ state &= STATE_MASK;
+
+ // Continue to find the longest, if there is one.
+ if (state == 0)
+ return start_pos;
+ }
+ pos++;
+ }
+
+ // Maybe we were too greedy.
+ if (which != -1)
+ return start_pos;
+
+ return -1;
+}
+
+
+//*****************************************************************************
+// int StringMatch::Compare(const char *string, int &which, int &length)
+//
+int StringMatch::Compare(const char *string, int &which, int &length)
+{
+ which = -1;
+ length = -1;
+
+ if (!table[0])
+ return 0;
+
+ int state = 0, new_state = 0;
+ int pos = 0;
+ int start_pos = 0;
+
+ //
+ // Skip to at least the start of a word.
+ //
+ while ((unsigned char)string[pos])
+ {
+ new_state = table[trans[string[pos]]][state];
+ if (new_state)
+ {
+ if (state == 0)
+ {
+ start_pos = pos;
+ }
+ }
+ else
+ {
+ // We may already have a match, and are just being greedy.
+ if (which != -1)
+ return 1;
+
+ return 0;
+ }
+ state = new_state;
+ if (state & MATCH_INDEX_MASK)
+ {
+ //
+ // Matched one of the patterns.
+ //
+ which = ((unsigned int) (state & MATCH_INDEX_MASK)
+ >> INDEX_SHIFT) - 1;
+ length = pos - start_pos + 1;
+
+ // Continue to find the longest, if there is one.
+ state &= STATE_MASK;
+ if (state == 0)
+ return 1;
+ }
+ pos++;
+ }
+
+ // Maybe we were too greedy.
+ if (which != -1)
+ return 1;
+
+ return 0;
+}
+
+
+//*****************************************************************************
+// int StringMatch::FindFirstWord(char *string)
+//
+int StringMatch::FindFirstWord(const char *string)
+{
+ int dummy;
+ return FindFirstWord(string, dummy, dummy);
+}
+
+
+//*****************************************************************************
+// int StringMatch::CompareWord(const char *string)
+//
+int StringMatch::CompareWord(const char *string)
+{
+ int dummy;
+ return CompareWord(string, dummy, dummy);
+}
+
+
+//*****************************************************************************
+// int StringMatch::FindFirstWord(char *string, int &which, int &length)
+// Attempt to find the first occurance of the previous compiled patterns.
+//
+int StringMatch::FindFirstWord(const char *string, int &which, int &length)
+{
+ which = -1;
+ length = -1;
+
+ int state = 0, new_state = 0;
+ int pos = 0;
+ int start_pos = 0;
+ int is_word = 1;
+
+ //
+ // Skip to at least the start of a word.
+ //
+ while ((unsigned char)string[pos])
+ {
+ new_state = table[trans[(unsigned char)string[pos]]][state];
+ if (new_state)
+ {
+ if (state == 0)
+ {
+ start_pos = pos;
+ }
+ }
+ else
+ {
+ //
+ // We came back to 0 state. This means we didn't match anything.
+ //
+ if (state)
+ {
+ pos = start_pos + 1;
+ }
+ else
+ pos++;
+ state = 0;
+ continue;
+ }
+ state = new_state;
+
+ if (state & MATCH_INDEX_MASK)
+ {
+ //
+ // Matched one of the patterns.
+ //
+ is_word = 1;
+ if (start_pos != 0)
+ {
+ if (HtIsStrictWordChar((unsigned char)string[start_pos - 1]))
+ is_word = 0;
+ }
+ if (HtIsStrictWordChar((unsigned char)string[pos + 1]))
+ is_word = 0;
+ if (is_word)
+ {
+ //
+ // Determine which and return.
+ //
+ which = ((unsigned int) (state & MATCH_INDEX_MASK)
+ >> INDEX_SHIFT) - 1;
+ length = pos - start_pos + 1;
+ return start_pos;
+ }
+ else
+ {
+ //
+ // Not at the end of word. Continue searching.
+ //
+ if (state & STATE_MASK)
+ {
+ state &= STATE_MASK;
+ }
+ else
+ {
+ pos = start_pos + 1;
+ state = 0;
+ }
+ }
+ }
+ pos++;
+ }
+ return -1;
+}
+
+
+//*****************************************************************************
+// int StringMatch::CompareWord(const char *string, int &which, int &length)
+//
+int StringMatch::CompareWord(const char *string, int &which, int &length)
+{
+ which = -1;
+ length = -1;
+
+ if (!table[0])
+ return 0;
+
+ int state = 0;
+ int position = 0;
+
+ //
+ // Skip to at least the start of a word.
+ //
+ while ((unsigned char)string[position])
+ {
+ state = table[trans[(unsigned char)string[position]]][state];
+ if (state == 0)
+ {
+ return 0;
+ }
+
+ if (state & MATCH_INDEX_MASK)
+ {
+ //
+ // Matched one of the patterns. See if it is a word.
+ //
+ int isWord = 1;
+
+ if ((unsigned char)string[position + 1])
+ {
+ if (HtIsStrictWordChar((unsigned char)string[position + 1]))
+ isWord = 0;
+ }
+
+ if (isWord)
+ {
+ which = ((unsigned int) (state & MATCH_INDEX_MASK)
+ >> INDEX_SHIFT) - 1;
+ length = position + 1;
+ return 1;
+ }
+ else
+ {
+ //
+ // Not at the end of a word. Continue searching.
+ //
+ if ((state & STATE_MASK) != 0)
+ {
+ state &= STATE_MASK;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+ position++;
+ }
+ return 0;
+}
+
+
+//*****************************************************************************
+// void StringMatch::TranslationTable(char *table)
+//
+void StringMatch::TranslationTable(char *table)
+{
+ if (local_alloc)
+ delete [] trans;
+ trans = (unsigned char *) table;
+ local_alloc = 0;
+}
+
+
+//*****************************************************************************
+// void StringMatch::IgnoreCase()
+// Set up the case translation table to convert uppercase to lowercase
+//
+void StringMatch::IgnoreCase()
+{
+ if (!local_alloc || !trans)
+ {
+ trans = new unsigned char[256];
+ for (int i = 0; i < 256; i++)
+ trans[i] = (unsigned char)i;
+ local_alloc = 1;
+ }
+ for (int i = 0; i < 256; i++)
+ if (isupper((unsigned char)i))
+ trans[i] = tolower((unsigned char)i);
+}
+
+
+//*****************************************************************************
+// void StringMatch::IgnorePunct(char *punct)
+// Set up the character translation table to ignore punctuation
+//
+void StringMatch::IgnorePunct(char *punct)
+{
+ if (!local_alloc || !trans)
+ {
+ trans = new unsigned char[256];
+ for (int i = 0; i < 256; i++)
+ trans[i] = (unsigned char)i;
+ local_alloc = 1;
+ }
+ if (punct)
+ for (int i = 0; punct[i]; i++)
+ trans[(unsigned char)punct[i]] = 0;
+ else
+ for (int i = 0; i < 256; i++)
+ if (HtIsWordChar(i) && !HtIsStrictWordChar(i))
+ trans[i] = 0;
+}
+
+
+//*****************************************************************************
+// int StringMatch::FindFirst(const char *source)
+//
+int StringMatch::FindFirst(const char *source)
+{
+ int dummy;
+ return FindFirst(source, dummy, dummy);
+}
+
+
+//*****************************************************************************
+// int StringMatch::Compare(const char *source)
+//
+int StringMatch::Compare(const char *source)
+{
+ int dummy;
+ return Compare(source, dummy, dummy);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.h b/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.h
new file mode 100644
index 00000000..d848fe5e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/StringMatch.h
@@ -0,0 +1,116 @@
+//
+// StringMatch.h
+//
+// StringMatch: This class provides an interface to a fairly specialized string
+// lookup facility. It is intended to be used as a replace for any
+// regular expression matching when the pattern string is in the form:
+//
+// <string1>|<string2>|<string3>|...
+//
+// Just like regular expression routines, the pattern needs to be
+// compiled before it can be used. This is done using the Pattern()
+// member function. Once the pattern has been compiled, the member
+// function Find() can be used to search for the pattern in a string.
+// If a string has been found, the "which" and "length" parameters
+// will be set to the string index and string length respectively.
+// (The string index is counted starting from 0) The return value of
+// Find() is the position at which the string was found or -1 if no
+// strings could be found. If a case insensitive match needs to be
+// performed, call the IgnoreCase() member function before calling
+// Pattern(). This function will setup a character translation table
+// which will convert all uppercase characters to lowercase. If some
+// other translation is required, the TranslationTable() member
+// function can be called to provide a custom table. This table needs
+// to be 256 characters.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: StringMatch.h,v 1.13 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _StringMatch_h_
+#define _StringMatch_h_
+
+#include "Object.h"
+#include "HtWordType.h"
+
+class StringMatch : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ StringMatch();
+ ~StringMatch();
+
+ //
+ // Set the pattern to search for. If given as a string needs to
+ // be in the form <string1>|<string2>|... If in the form of a
+ // List, it should be a list of String objects.
+ //
+ void Pattern(char *pattern, char sep = '|');
+
+ //
+ // Search for any of the strings in the pattern in the given
+ // string The return value is the offset in the source a pattern
+ // was found. In this case, the which variable will be set to the
+ // index of the pattern string and length will be set to the
+ // length of that pattern string. If none of the pattern strings
+ // could be found, the return value will be -1
+ //
+ int FindFirst(const char *string, int &which, int &length);
+ int FindFirst(const char *string);
+
+ int FindFirstWord(const char *string, int &which, int &length);
+ int FindFirstWord(const char *string);
+
+ //
+ // If you are interested in matching instead of searching, use
+ // the following. Same parameters except that the return value will
+ // be 1 if there was a match, 0 if there was not.
+ //
+ int Compare(const char *string, int &which, int &length);
+ int Compare(const char *string);
+
+ int CompareWord(const char *string, int &which, int &length);
+ int CompareWord(const char *string);
+
+ //
+ // Provide a character translation table which will be applied to
+ // both the pattern and the input string. This table should be an
+ // array of 256 characters. If is the caller's responsibility to
+ // manage this table's allocation. The table should remain valid
+ // until this object has been destroyed.
+ //
+ void TranslationTable(char *table);
+
+ //
+ // Build a local translation table which maps all uppercase
+ // characters to lowercase
+ //
+ void IgnoreCase();
+
+ //
+ // Build a local translation table which ignores all given punctuation
+ // characters
+ //
+ void IgnorePunct(char *punct = (char*)NULL);
+
+ //
+ // Determine if there is a pattern associated with this Match object.
+ //
+ int hasPattern() {return table[0] != 0;}
+
+protected:
+ int *table[256];
+ unsigned char *trans;
+ int local_alloc;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/String_fmt.cc b/debian/htdig/htdig-3.2.0b6/htlib/String_fmt.cc
new file mode 100644
index 00000000..753e23cb
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/String_fmt.cc
@@ -0,0 +1,54 @@
+//
+// String_fmt.cc
+//
+// String_fmt: Formatting functions for the String class. Those functions
+// are also used in other files, they are not purely internal
+// to the String class.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: String_fmt.cc,v 1.11 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "htString.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#ifdef _MSC_VER /* _WIN32 */
+#define vsnprintf _vsnprintf
+#endif
+
+static char buf[10000];
+
+//*****************************************************************************
+// char *form(char *fmt, ...)
+//
+char *form(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ return buf;
+}
+
+
+//*****************************************************************************
+// char *vform(char *fmt, va_list args)
+//
+char *vform(const char *fmt, va_list args)
+{
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ return buf;
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/ber.h b/debian/htdig/htdig-3.2.0b6/htlib/ber.h
new file mode 100644
index 00000000..fc0af86d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/ber.h
@@ -0,0 +1,85 @@
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+
+#ifndef _ber_h
+#define _ber_h
+
+#include <stdio.h>
+#include <errno.h>
+
+typedef unsigned int ber_t;
+
+#define BER_MAX_BYTES (sizeof(ber_t) + 1)
+
+inline int ber_buf2value(const unsigned char* buf, int buf_len, ber_t& result) {
+ result = 0;
+ unsigned int bits = 0;
+ int length = 1;
+ while(*buf & 0x80) {
+ if(bits > sizeof(ber_t) * 8) return EINVAL;
+ result |= (*buf & 0x7f) << bits;
+ bits += 7;
+ buf++;
+ length++;
+ if(length > buf_len) return EINVAL;
+ }
+ result |= (*buf & 0x7f) << bits;
+
+ return length;
+}
+
+inline int ber_file2value(FILE* fp, ber_t& result) {
+ result = 0;
+ unsigned int bits = 0;
+ int c;
+ int length = 1;
+ while((c = fgetc(fp)) != EOF && (c & 0x80)) {
+ if(bits > sizeof(ber_t) * 8) return EINVAL;
+ result |= (c & 0x7f) << bits;
+ bits += 7;
+ length++;
+ }
+
+ if(c == EOF) return EINVAL;
+
+ result |= (c & 0x7f) << bits;
+
+ return length;
+}
+
+inline int ber_value2buf(unsigned char* buf, int buf_len, ber_t value)
+{
+ if(buf_len <= 0) return EINVAL;
+ int buf_idx = 0;
+ buf[buf_idx++] = (value & 0x7f);
+ while(value >>= 7) {
+ if(buf_idx >= buf_len) return EINVAL;
+ buf[buf_idx - 1] |= 0x80;
+ buf[buf_idx++] = (value & 0x7f);
+ }
+ return buf_idx;
+}
+
+inline int ber_value2file(FILE* fp, ber_t value)
+{
+ int length = 1;
+ unsigned char current;
+ current = (value & 0x7f);
+ while(value >>= 7) {
+ current |= 0x80;
+ if(fputc(current, fp) == EOF) return EINVAL;
+ current = (value & 0x7f);
+ length++;
+ }
+
+ if(fputc(current, fp) == EOF) return EINVAL;
+
+ return length;
+}
+
+#endif /* _ber_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/clib.h b/debian/htdig/htdig-3.2.0b6/htlib/clib.h
new file mode 100644
index 00000000..04fc5403
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/clib.h
@@ -0,0 +1,49 @@
+/*
+ Part of the ht://Dig package <http://www.htdig.org/>
+ Copyright (c) 1999-2004 The ht://Dig Group
+ For copyright details, see the file COPYING in your distribution
+ or the GNU Library General Public License (LGPL) version 2 or later
+ <http://www.gnu.org/copyleft/lgpl.html>
+*/
+#ifndef _clib_h_
+#define _clib_h_
+
+#include <sys/types.h>
+
+extern "C"
+{
+
+#ifndef HAVE_GETCWD
+char *getcwd(char *, size_t);
+#endif
+
+#ifndef HAVE_MEMCMP
+int memcmp(const void *, const void *, size_t);
+#endif
+
+#ifndef HAVE_MEMCPY
+void *memcpy(void *, const void *, size_t);
+#endif
+
+#ifndef HAVE_MEMMOVE
+void *memmove(void *, const void *, size_t);
+#endif
+
+#ifndef HAVE_RAISE
+int raise (int);
+#endif
+
+#ifndef HAVE_SNPRINTF
+int snprintf(char *, size_t, const char *, ...);
+#endif
+
+#ifndef HAVE_STRERROR
+char *strerror(int);
+#endif
+
+#ifndef HAVE_VSNPRINTF
+int vsnprintf(char *, size_t, const char *, ...);
+#endif
+}
+
+#endif /* _clib_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/filecopy.cc b/debian/htdig/htdig-3.2.0b6/htlib/filecopy.cc
new file mode 100644
index 00000000..4cd23e85
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/filecopy.cc
@@ -0,0 +1,126 @@
+//
+// filecopy.c
+//
+// Copies files from one file to another.
+// Contains both Unix & Native Win32 Implementations
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2003 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// Copyright (c) 2002 RightNow Technologies, Inc.
+// Donated to The ht://Dig Group under LGPL License
+
+#include <stdio.h>
+
+#ifdef _WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#else /* UNIX */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#endif /* _WIN32 | UNIX */
+
+
+#include "filecopy.h"
+
+
+
+//----------------------------------------------------------------------
+// int file_copy (char * from, char * to, char flags)
+//----------------------------------------------------------------------
+//
+// copy file 'from' -> 'to'
+//
+// set flags to FILECOPY_OVERWRITE_ON to overwrite the 'to' file if
+// it exists
+//
+// set flags to FILECOPY_OVERWRITE_OFF to not overwrite the 'to' file
+// if it exists
+//
+// returns 0/FALSE if unsucessful
+// returns 1/TRUE if sucessful
+//
+//
+#ifdef _WIN32
+
+int file_copy (char * from, char * to, char flags)
+{
+ if (flags == FILECOPY_OVERWRITE_ON)
+ {
+ //overwrite
+ if (TRUE != (CopyFile(from , to, FALSE)))
+ return (FALSE);
+ }
+ else if (flags == FILECOPY_OVERWRITE_OFF)
+ {
+ //don't overwrite
+ if (TRUE != (CopyFile(from , to, TRUE)))
+ return (FALSE);
+ }
+ else //bad flag
+ {
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+#else //UNIX
+
+int file_copy (char * from, char * to, char flags)
+{
+ size_t nmemb;
+ //int nmemb;
+ FILE *ifp, *ofp;
+ char buf[BUFSIZ];
+
+ if (flags == FILECOPY_OVERWRITE_OFF) {
+ if (access(to, F_OK) == 0) {
+ //OUTLOG((FUNC, TRWRN, "file %s already exists\n", to));
+ return(FALSE);
+ }
+ else if (errno != ENOENT) {
+ //OUTLOG((FUNC, TRERR, "access(%s, F_OK) failed\n", to));
+ return(FALSE);
+ }
+ }
+
+ if ((ifp=fopen(from, "r")) == NULL) {
+ //OUTLOG((FUNC, TRERR, "%s doesn't exist\n", from));
+ return(FALSE);
+ }
+
+ if ((ofp=fopen(to, "w+")) == NULL) {
+ //OUTLOG((FUNC, TRERR, "can't create %s\n", to));
+ fclose(ifp);
+ return(FALSE);
+ }
+
+ while ((nmemb=fread(buf, 1, sizeof(buf), ifp)) > 0) {
+ if (fwrite(buf, 1, nmemb, ofp) != nmemb) {
+ //OUTLOG((FUNC, TRERR, "fwrite failed\n"));
+ fclose(ifp);
+ fclose(ofp);
+ return(FALSE);
+ }
+ }
+
+ fclose(ifp);
+ fclose(ofp);
+
+ return (TRUE);
+}
+
+#endif /* _WIN32 | UNIX */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/filecopy.h b/debian/htdig/htdig-3.2.0b6/htlib/filecopy.h
new file mode 100644
index 00000000..c888e720
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/filecopy.h
@@ -0,0 +1,40 @@
+//
+// filecopy.h
+//
+// Copies files from one file to another.
+// Contains both Unix & Native Win32 Implementations
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2003 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// Copyright (c) 2002 RightNow Technologies, Inc.
+// Donated to The ht://Dig Group under LGPL License
+
+#ifdef __cplusplus
+//extern "C" {
+#endif
+
+#ifndef FILECOPY_H
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define FILECOPY_OVERWRITE_ON 1
+#define FILECOPY_OVERWRITE_OFF 2
+
+int file_copy (char * from, char * to, char flags);
+
+
+#ifdef __cplusplus
+//}
+#endif
+
+#endif /* FILECOPY_H */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/getcwd.c b/debian/htdig/htdig-3.2.0b6/htlib/getcwd.c
new file mode 100644
index 00000000..019c96be
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/getcwd.c
@@ -0,0 +1,278 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#ifndef HAVE_GETCWD
+
+#define ISDOT(dp) \
+ (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || \
+ (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
+
+#ifndef dirfd
+#define dirfd(dirp) ((dirp)->dd_fd)
+#endif
+
+/*
+ * getcwd --
+ * Get the current working directory.
+ *
+ * PUBLIC: #ifndef HAVE_GETCWD
+ * PUBLIC: char *getcwd __P((char *, size_t));
+ * PUBLIC: #endif
+ */
+char *
+getcwd(pt, size)
+ char *pt;
+ size_t size;
+{
+ register struct dirent *dp;
+ register DIR *dir;
+ register dev_t dev;
+ register ino_t ino;
+ register int first;
+ register char *bpt, *bup;
+ struct stat s;
+ dev_t root_dev;
+ ino_t root_ino;
+ size_t ptsize, upsize;
+ int ret, save_errno;
+ char *ept, *eup, *up;
+
+ /*
+ * If no buffer specified by the user, allocate one as necessary.
+ * If a buffer is specified, the size has to be non-zero. The path
+ * is built from the end of the buffer backwards.
+ */
+ if (pt) {
+ ptsize = 0;
+ if (!size) {
+ __os_set_errno(EINVAL);
+ return (NULL);
+ }
+ if (size == 1) {
+ __os_set_errno(ERANGE);
+ return (NULL);
+ }
+ ept = pt + size;
+ } else {
+ if ((ret = __os_malloc(ptsize = 1024 - 4, NULL, &pt)) != 0) {
+ __os_set_errno(ret);
+ return (NULL);
+ }
+ ept = pt + ptsize;
+ }
+ bpt = ept - 1;
+ *bpt = '\0';
+
+ /*
+ * Allocate bytes (1024 - malloc space) for the string of "../"'s.
+ * Should always be enough (it's 340 levels). If it's not, allocate
+ * as necessary. Special case the first stat, it's ".", not "..".
+ */
+ if ((ret = __os_malloc(upsize = 1024 - 4, NULL, &up)) != 0)
+ goto err;
+ eup = up + 1024;
+ bup = up;
+ up[0] = '.';
+ up[1] = '\0';
+
+ /* Save root values, so know when to stop. */
+ if (stat("/", &s))
+ goto err;
+ root_dev = s.st_dev;
+ root_ino = s.st_ino;
+
+ __os_set_errno(0); /* XXX readdir has no error return. */
+
+ for (first = 1;; first = 0) {
+ /* Stat the current level. */
+ if (lstat(up, &s))
+ goto err;
+
+ /* Save current node values. */
+ ino = s.st_ino;
+ dev = s.st_dev;
+
+ /* Check for reaching root. */
+ if (root_dev == dev && root_ino == ino) {
+ *--bpt = PATH_SEPARATOR[0];
+ /*
+ * It's unclear that it's a requirement to copy the
+ * path to the beginning of the buffer, but it's always
+ * been that way and stuff would probably break.
+ */
+ bcopy(bpt, pt, ept - bpt);
+ __os_free(up, upsize);
+ return (pt);
+ }
+
+ /*
+ * Build pointer to the parent directory, allocating memory
+ * as necessary. Max length is 3 for "../", the largest
+ * possible component name, plus a trailing NULL.
+ */
+ if (bup + 3 + MAXNAMLEN + 1 >= eup) {
+ if (__os_realloc(upsize *= 2, NULL, &up) != 0)
+ goto err;
+ bup = up;
+ eup = up + upsize;
+ }
+ *bup++ = '.';
+ *bup++ = '.';
+ *bup = '\0';
+
+ /* Open and stat parent directory. */
+ if (!(dir = opendir(up)) || fstat(dirfd(dir), &s))
+ goto err;
+
+ /* Add trailing slash for next directory. */
+ *bup++ = PATH_SEPARATOR[0];
+
+ /*
+ * If it's a mount point, have to stat each element because
+ * the inode number in the directory is for the entry in the
+ * parent directory, not the inode number of the mounted file.
+ */
+ save_errno = 0;
+ if (s.st_dev == dev) {
+ for (;;) {
+ if (!(dp = readdir(dir)))
+ goto notfound;
+ if (dp->d_fileno == ino)
+ break;
+ }
+ } else
+ for (;;) {
+ if (!(dp = readdir(dir)))
+ goto notfound;
+ if (ISDOT(dp))
+ continue;
+ bcopy(dp->d_name, bup, dp->d_namlen + 1);
+
+ /* Save the first error for later. */
+ if (lstat(up, &s)) {
+ if (save_errno == 0)
+ save_errno = __os_get_errno();
+ __os_set_errno(0);
+ continue;
+ }
+ if (s.st_dev == dev && s.st_ino == ino)
+ break;
+ }
+
+ /*
+ * Check for length of the current name, preceding slash,
+ * leading slash.
+ */
+ if (bpt - pt < dp->d_namlen + (first ? 1 : 2)) {
+ size_t len, off;
+
+ if (!ptsize) {
+ __os_set_errno(ERANGE);
+ goto err;
+ }
+ off = bpt - pt;
+ len = ept - bpt;
+ if (__os_realloc(ptsize *= 2, NULL, &pt) != 0)
+ goto err;
+ bpt = pt + off;
+ ept = pt + ptsize;
+ bcopy(bpt, ept - len, len);
+ bpt = ept - len;
+ }
+ if (!first)
+ *--bpt = PATH_SEPARATOR[0];
+ bpt -= dp->d_namlen;
+ bcopy(dp->d_name, bpt, dp->d_namlen);
+ (void)closedir(dir);
+
+ /* Truncate any file name. */
+ *bup = '\0';
+ }
+
+notfound:
+ /*
+ * If readdir set errno, use it, not any saved error; otherwise,
+ * didn't find the current directory in its parent directory, set
+ * errno to ENOENT.
+ */
+ if (__os_get_errno() == 0)
+ __os_set_errno(save_errno == 0 ? ENOENT : save_errno);
+ /* FALLTHROUGH */
+err:
+ if (ptsize)
+ __os_free(pt, ptsize);
+ __os_free(up, upsize);
+ return (NULL);
+}
+#endif /* HAVE_GETCWD */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.c b/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.c
new file mode 100644
index 00000000..3e73a009
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.c
@@ -0,0 +1,122 @@
+/* getopt_local.c */
+
+/* Public Domain getopt clone */
+
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 2003 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/* Added to HtDig code June 2003 by Neal Richter, RightNow Technologies */
+
+/*
+** This getopt behaves pretty much like you would expect.
+** It does handle arguments like '-ab-' a little differently
+** then normal; I think the -- 'stop option processing' should
+** be treated like just another option, so that's what mine does.
+** Other getopts seem to ignore the second '-' in '-ab-'.
+**
+** I hereby place this version of getopt in
+** the public domain. Do with this what you will.
+** I'm sure there is a nicer and faster version out there
+** somewhere but I don't care!
+**
+** Robert Osborne, May 1991.
+*/
+
+
+#include <stdio.h>
+
+#include "getopt_local.h"
+
+#ifdef GETOPT_LOCAL
+
+int optind = 1;
+int opterr = 1;
+char *optarg = (char *) 0;
+
+static char *next_arg = (char *) 0;
+
+#define NO_OPT 0
+#define OPT_PLAIN 1
+#define OPT_ARG 2
+
+
+/* ----- getopt -------------------------------------- Oct 23, 1999 21:48 ---
+ */
+int
+getopt(int argc, char *argv[], char *optstring)
+{
+ int ret;
+ int which = NO_OPT;
+
+ if (next_arg == (char*) 0)
+ {
+ if (argv[optind] == (char *) 0 || argv[optind][0] != '-')
+ return -1;
+ next_arg = &argv[optind][1];
+ }
+
+ if ((*next_arg == '\0') || (*next_arg == '-'))
+ {
+ optind++;
+ return -1;
+ }
+
+ while(*optstring)
+ if (*next_arg == *optstring++)
+ which = (*optstring == ':') ? OPT_ARG : OPT_PLAIN;
+
+ switch (which)
+ {
+ case NO_OPT:
+ case OPT_PLAIN:
+ ret = *next_arg++;
+
+ if (*next_arg == '\0')
+ {
+ optind++;
+ next_arg = (char *)0;
+ }
+
+ if (which == OPT_PLAIN)
+ return ret;
+
+ if (opterr)
+ fprintf(stderr, "%s: illegal option -- %c\n", argv[0], ret);
+
+ return '?';
+
+ case OPT_ARG:
+ ret = *next_arg++;
+ optind++;
+
+ if (*next_arg != '\0')
+ {
+ optarg = next_arg;
+ next_arg = (char*) 0;
+ return ret;
+ }
+
+ if (argv[optind] != (char*) 0)
+ {
+ optarg = argv[optind];
+ optind++;
+ next_arg = (char*) 0;
+ return ret;
+ }
+
+ next_arg = (char*) 0;
+ if (opterr)
+ fprintf(stderr, "%s: option requires an option -- %c\n",
+ argv[0], ret);
+ return '?';
+ }
+
+ return(-1);
+}
+#elif defined(_MSC_VER) /* _WIN32 */
+#error _MSC_VER but !GETOPT_LOCAL
+#endif /* GETOPT_LOCAL */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.h b/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.h
new file mode 100644
index 00000000..731a21db
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/getopt_local.h
@@ -0,0 +1,50 @@
+/* getopt_local.h */
+
+/* Public Domain getopt clone */
+
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 2003 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/* Added to HtDig code June 2003 by Neal Richter, RightNow Technologies */
+
+/*
+** This getopt behaves pretty much like you would expect.
+** It does handle arguments like '-ab-' a little differently
+** then normal; I think the -- 'stop option processing' should
+** be treated like just another option, so that's what mine does.
+** Other getopts seem to ignore the second '-' in '-ab-'.
+**
+** I hereby place this version of getopt in
+** the public domain. Do with this what you will.
+** I'm sure there is a nicer and faster version out there
+** somewhere but I don't care!
+**
+** Robert Osborne, May 1991.
+*/
+
+
+#ifndef GETOPT_LOCAL_H
+#define GETOPT_LOCAL_H
+
+#define GETOPT_LOCAL
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* header for getopt_local.c */
+
+extern int optind;
+extern int opterr;
+extern char *optarg;
+
+int getopt(int, char *[], char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GETOPT_LOCAL_H */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.cc b/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.cc
new file mode 100644
index 00000000..ad537f56
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.cc
@@ -0,0 +1,46 @@
+//
+// good_strtok.cc
+//
+// good_strtok: The good_strtok() function is very similar to the
+// standard strtok() library function, except that good_strtok()
+// will only skip over 1 separator if it finds one. This is
+// needed when parsing strings with empty fields.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: good_strtok.cc,v 1.7 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "lib.h"
+
+//
+// Perform the same function as the standard strtok() function except that
+// multiple separators are NOT collapsed into one.
+//
+char *good_strtok(char *str, char term)
+{
+ static char *string;
+
+ if (str)
+ {
+ string = str;
+ }
+
+ if (string == NULL || *string == '\0')
+ return NULL;
+
+ char *p = string;
+ while (*string && *string!=term)
+ string++;
+ if (*string)
+ *string++ = '\0';
+ return p;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.h b/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.h
new file mode 100644
index 00000000..89e05e0b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/good_strtok.h
@@ -0,0 +1,24 @@
+//
+// good_strtok.h
+//
+// good_strtok: The good_strtok() function is very similar to the
+// standard strtok() library function, except that good_strtok()
+// will only skip over 1 separator if it finds one. This is
+// needed when parsing strings with empty fields.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: good_strtok.h,v 1.7 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _good_strtok_h_
+#define _good_strtok_h_
+
+char *good_strtok(char *, char);
+
+#endif
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/gregex.h b/debian/htdig/htdig-3.2.0b6/htlib/gregex.h
new file mode 100644
index 00000000..38c4b68f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/gregex.h
@@ -0,0 +1,568 @@
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+ Copyright (C) 1985,1989-1993,1995-1998, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+# include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if defined(__STDC__)
+
+# define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+# define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* For now unconditionally define __restrict_arr to expand to nothing.
+ Ideally we would have a test for the compiler which allows defining
+ it to restrict. */
+#ifndef __restrict_arr
+# define __restrict_arr
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+ char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/htString.h b/debian/htdig/htdig-3.2.0b6/htlib/htString.h
new file mode 100644
index 00000000..592a072f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/htString.h
@@ -0,0 +1,260 @@
+//
+// htString.h
+//
+// htString: (implementation in String.cc) Just Another String class.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: htString.h,v 1.24 2004/05/28 13:15:21 lha Exp $
+//
+#ifndef __String_h
+#define __String_h
+
+#include "Object.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+class String : public Object
+{
+public:
+ String() { Length = 0; Allocated = 0; Data = 0; } // Create an empty string
+ String(int init); // initial allocated length
+ String(const char *s); // from null terminated s
+ String(const char *s, int len); // from s with length len
+ String(const String &s); // Copy constructor
+
+ //
+ // This can be used for performance reasons if it is known the
+ // String will need to grow.
+ //
+ String(const String &s, int allocation_hint);
+
+ ~String();
+
+ inline int length() const;
+ char *get();
+ const char *get() const;
+ operator char*() { return get(); }
+ operator const char*() const { return get(); }
+
+ //
+ // Interpretation
+ //
+ int as_integer(int def = 0) const;
+ double as_double(double def = 0) const;
+ int empty() const { return length() == 0; }
+
+ //
+ // If it is not posible to use the constructor with an initial
+ // allocation size, use the following member to set the size.
+ //
+ void allocate(int init) {reallocate_space(init);}
+
+ //
+ // allocate space for a new char *, and copy the String in.
+ //
+ char *new_char() const;
+
+ //
+ // Assignment
+ //
+ inline String& set(const char *s, int l) { trunc(); append(s, l); return *this; }
+ inline String& set(char *s) { trunc(); append(s, strlen(s)); return *this; }
+ void operator = (const String &s);
+ void operator = (const char *s);
+ inline void operator += (const String &s) { append(s); }
+ inline void operator += (const char *s) { append(s); }
+
+ //
+ // Appending
+ //
+ inline String &operator << (const char *);
+ inline String &operator << (char);
+ inline String &operator << (unsigned char c) {return *this<<(char)c;}
+ String &operator << (int);
+ String &operator << (unsigned int);
+ String &operator << (long);
+ inline String &operator << (short i) {return *this<<(int)i;}
+ String &operator << (const String &);
+ String &operator << (const String *s) {return *this << *s;}
+
+ //
+ // Access to specific characters
+ //
+ inline char &operator [] (int n);
+ inline char operator [] (int n) const;
+ inline char Nth (int n) { return (*this)[n]; }
+ inline char last() const { return Length > 0 ? Data[Length - 1] : '\0'; }
+
+ //
+ // Removing
+ //
+ char operator >> (char c);
+
+ //
+ // Comparison
+ // Return:
+ // 0 : 'this' is equal to 's'.
+ // -1 : 'this' is less than 's'.
+ // 1 : 'this' is greater than 's'.
+ //
+ int compare(const Object& s) const { return compare((const String&)s); }
+ int compare(const String& s) const;
+ int nocase_compare(const String &s) const;
+
+ //
+ // Searching for parts
+ //
+ int lastIndexOf(char c) const;
+ int lastIndexOf(char c, int pos) const;
+ int indexOf(char c) const;
+ int indexOf(char c, int pos) const;
+ int indexOf(const char *) const;
+ int indexOf(const char *, int pos) const;
+
+ //
+ // Manipulation
+ //
+ void append(const String &s);
+ void append(const char *s);
+ void append(const char *s, int n);
+ void append(char ch);
+
+ inline String &trunc() { Length = 0; return *this; }
+ String &chop(int n = 1);
+ String &chop(char ch = '\n');
+ String &chop(const char *str = (char *)"\r\n");
+
+ //
+ // SubStrings
+ //
+ // The string starting at postion 'start' and length 'len'.
+ //
+ String sub(int start, int len) const;
+ String sub(int start) const;
+
+ //
+ // IO
+ //
+ int Write(int fd) const;
+
+#ifndef NOSTREAM
+ void debug(ostream &o);
+#endif /* NOSTREAM */
+
+ //
+ // Non-member operators
+ //
+ friend String operator + (const String &a, const String &b);
+ friend int operator == (const String &a, const String &b);
+ friend int operator != (const String &a, const String &b);
+ friend int operator < (const String &a, const String &b);
+ friend int operator > (const String &a, const String &b);
+ friend int operator <= (const String &a, const String &b);
+ friend int operator >= (const String &a, const String &b);
+
+#ifndef NOSTREAM
+ friend ostream &operator << (ostream &o, const String &s);
+
+ friend istream &operator >> (istream &in, String &line);
+#endif /* NOSTREAM */
+
+ int readLine(FILE *in);
+
+ int lowercase();
+ int uppercase();
+
+ void replace(char c1, char c2);
+ int remove(const char *);
+
+ Object *Copy() const { return new String(*this); }
+
+ //
+ // Persistent storage support
+ //
+ void Serialize(String &);
+ void Deserialize(String &, int &);
+
+private:
+ int Length; // Current Length
+ int Allocated; // Total space allocated
+ char *Data; // The actual contents
+
+ void copy_data_from(const char *s, int len, int dest_offset = 0);
+ void copy(const char *s, int len, int allocation_hint);
+
+ //
+ // Possibly make Data bigger.
+ //
+ void reallocate_space(int len);
+
+ //
+ // Allocate some space for the data. Delete Data if it
+ // has been allocated.
+ //
+ void allocate_space(int len);
+ // Allocate some space without rounding
+ void allocate_fix_space(int len);
+
+ friend class StringIndex;
+};
+
+extern char *form(const char *, ...);
+extern char *vform(const char *, va_list);
+
+//
+// Inline methods.
+//
+inline String &String::operator << (const char *str)
+{
+ append(str);
+ return *this;
+}
+
+inline String &String::operator << (char ch)
+{
+ append(ch);
+ return *this;
+}
+
+inline int String::length() const
+{
+ return Length;
+}
+
+inline char String::operator [] (int n) const
+{
+ if(n < 0) n = Length + n;
+ if(n >= Length || n < 0) return '\0';
+
+ return Data[n];
+}
+
+static char null = '\0';
+
+inline char &String::operator [] (int n)
+{
+ if(n < 0) n = Length + n;
+ if(n >= Length || n < 0) return null;
+
+ return Data[n];
+}
+
+//
+// Non friend, non member operators
+//
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/lib.h b/debian/htdig/htdig-3.2.0b6/htlib/lib.h
new file mode 100644
index 00000000..d17bd7c4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/lib.h
@@ -0,0 +1,90 @@
+//
+// lib.h
+//
+// lib: Contains typical declarations and header inclusions used by
+// most sources in this directory.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: lib.h,v 1.16 2004/05/28 13:15:21 lha Exp $
+//
+
+#ifndef _lib_h
+#define _lib_h
+
+#ifndef _MSC_VER /* _WIN32 */
+#include "clib.h"
+#endif
+
+#include <string.h>
+
+#ifdef _MSC_VER /* _WIN32 */
+#include "dirent_local.h"
+#define S_ISDIR(v) ((v)&_S_IFDIR)
+#define S_ISREG(v) ((v)&_S_IFREG)
+#else
+#include <dirent.h> // for scandir
+#endif
+
+#ifdef _MSC_VER /* _WIN32 */
+#include <io.h>
+#include <stdlib.h>
+#define S_IFIFO _S_IFIFO // pipe
+#define S_IFBLK 0060000 // block special
+#define S_IFLNK 0120000 // symbolic link
+#define S_IFSOCK 0140000 // socket
+#define S_IFWHT 0160000 // whiteout
+#define R_OK 02
+#define popen _popen
+#define pclose _pclose
+#define lstat stat
+#define readlink(x,y,z) {-1}
+#define sleep(t) _sleep((t) * 1000)
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+//
+// Other defines used throughout the library
+//
+#define OK 0
+#define NOTOK (-1)
+
+//
+// To get rid of inconsistencies between different machines we will ALWAYS
+// use our own version of the following routines
+//
+int mystrcasecmp(const char *, const char *);
+int mystrncasecmp(const char *, const char *, int);
+
+//
+// The standard strstr() function is limited in that it does case-sensitive
+// searches. This version will ignore case.
+//
+const char *mystrcasestr(const char *s, const char *pattern);
+
+//
+// Too many problems with system strptime() functions... Just use our own
+// version of it.
+//
+char *mystrptime(const char *buf, const char *fmt, struct tm *tm);
+
+//
+// timegm() is quite rare, so provide our own.
+//
+extern "C" time_t Httimegm(struct tm *tm);
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/libdefs.h b/debian/htdig/htdig-3.2.0b6/htlib/libdefs.h
new file mode 100644
index 00000000..e4f1d11c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/libdefs.h
@@ -0,0 +1,109 @@
+#ifndef LIBDEFS_H
+#define LIBDEFS_H
+
+/*
+ {{{ includes
+ */
+
+
+#ifdef _MSC_VER /* _WIN32 */
+#include <windows.h>
+#endif
+
+
+#include "htconfig.h"
+
+
+#ifdef STDC_HEADERS
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#endif
+
+/*
+ }}}
+ */
+/*
+ {{{ typedefs
+ */
+
+#if SIZEOF_UNSIGNED_LONG_INT == 8
+typedef unsigned long word64;
+#define TIGER_64BIT
+#elif SIZEOF_UNSIGNED_LONG_LONG_INT == 8
+
+#ifndef _MSC_VER /* _WIN32 */
+typedef unsigned long long word64;
+#else //ifdef _MSC_VER /* _WIN32 */
+typedef DWORD64 word64;
+#endif
+#else
+#error "Cannot find a 64 bit integer in your system, sorry."
+#endif
+
+#if SIZEOF_UNSIGNED_LONG_INT == 4
+typedef unsigned long word32;
+#elif SIZEOF_UNSIGNED_INT == 4
+typedef unsigned int word32;
+#else
+#error "Cannot find a 32 bit integer in your system, sorry."
+#endif
+
+#if SIZEOF_UNSIGNED_INT == 2
+typedef unsigned int word16;
+#elif SIZEOF_UNSIGNED_SHORT_INT == 2
+typedef unsigned short word16;
+#else
+#error "Cannot find a 16 bit integer in your system, sorry."
+#endif
+
+#if SIZEOF_UNSIGNED_CHAR == 1
+typedef unsigned char word8;
+#else
+#error "Cannot find an 8 bit char in your system, sorry."
+#endif
+
+typedef word8 byte;
+typedef word32 dword;
+
+/*
+ }}}
+ */
+
+/*
+ {{{ macros and defines
+ */
+
+#define RAND32 (word32) ((word32)rand() << 17 ^ (word32)rand() << 9 ^ rand())
+
+#ifndef HAVE_MEMMOVE
+#ifdef HAVE_BCOPY
+#define memmove(d, s, n) bcopy ((s), (d), (n))
+#else
+#error "Neither memmove nor bcopy exists on your system."
+#endif
+#endif
+
+#define ENCRYPT 0
+#define DECRYPT 1
+
+/*
+ }}}
+ */
+
+/*
+ {{{ prototypes
+ */
+
+void Bzero(void *s, int n);
+
+word32 byteswap(word32 x);
+
+int BreakToThree(void *key, unsigned int keylen,
+ void *keyword1, void *keyword2, void *keyword3);
+
+/*
+ }}}
+ */
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/malloc.c b/debian/htdig/htdig-3.2.0b6/htlib/malloc.c
new file mode 100644
index 00000000..ec9a0d07
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/malloc.c
@@ -0,0 +1,39 @@
+
+/* Work around bug on some systems where malloc (0) fails.
+ Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* written by Jim Meyering */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+#undef malloc
+
+#include <stddef.h>
+
+char *malloc ();
+
+/* Allocate an N-byte block of memory from the heap.
+ If N is zero, allocate a 1-byte block. */
+
+char *
+rpl_malloc (size_t n)
+{
+ if (n == 0)
+ n = 1;
+ return malloc (n);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/md5.cc b/debian/htdig/htdig-3.2.0b6/htlib/md5.cc
new file mode 100644
index 00000000..999a4758
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/md5.cc
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+extern "C" {
+#include "mhash_md5.h"
+}
+
+#define MD5_LENGTH 16
+
+void md5(char *rhash, char *buf, int len, time_t *date, bool debug)
+{
+
+ int i;
+ MD5_CTX *td;
+ unsigned char *hash;
+
+ td = (MD5_CTX *)malloc(sizeof(MD5_CTX));
+ MD5Init( td);
+ // td = mhash_init(MHASH_MD5);
+
+ MD5Update(td,(unsigned char *) buf, len);
+ // mhash(td, buf, len);
+
+ if (date) {
+ MD5Update(td,(unsigned char *)date, sizeof(*date));
+ }
+
+ hash = (unsigned char *)MD5Final(td);
+ // hash = (char *)mhash_end(td);
+
+ memcpy(rhash,hash,MD5_LENGTH);
+
+ if (debug) {
+ printf(" ");
+ for (i = 0; i < MD5_LENGTH; i++) {
+ printf("%.2x", hash[i]);
+ }
+ printf(" ");
+ }
+ delete td;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/md5.h b/debian/htdig/htdig-3.2.0b6/htlib/md5.h
new file mode 100644
index 00000000..8e4c029a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/md5.h
@@ -0,0 +1,3 @@
+#define MD5_LENGTH 16
+
+void md5(char *rhash, char *buf, int len, time_t *date, bool debug);
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/memcmp.c b/debian/htdig/htdig-3.2.0b6/htlib/memcmp.c
new file mode 100644
index 00000000..55c327ec
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/memcmp.c
@@ -0,0 +1,72 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+
+#ifndef HAVE_MEMCMP
+/*
+ * memcmp --
+ *
+ * PUBLIC: #ifndef HAVE_MEMCMP
+ * PUBLIC: int memcmp __P((const void *, const void *, size_t));
+ * PUBLIC: #endif
+ */
+int
+memcmp(s1, s2, n)
+ char *s1, *s2;
+ size_t n;
+{
+ if (n != 0) {
+ unsigned char *p1 = (unsigned char *)s1,
+ *p2 = (unsigned char *)s2;
+ do {
+ if (*p1++ != *p2++)
+ return (*--p1 - *--p2);
+ } while (--n != 0);
+ }
+ return (0);
+}
+#endif /* HAVE_MEMCMP */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/memcpy.c b/debian/htdig/htdig-3.2.0b6/htlib/memcpy.c
new file mode 100644
index 00000000..22e07c04
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/memcpy.c
@@ -0,0 +1,144 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#ifndef HAVE_MEMCPY
+
+/*
+ * sizeof(word) MUST BE A POWER OF TWO
+ * SO THAT wmask BELOW IS ALL ONES
+ */
+typedef int word; /* "word" used for optimal copy speed */
+
+#undef wsize
+#define wsize sizeof(word)
+#undef wmask
+#define wmask (wsize - 1)
+
+/*
+ * Copy a block of memory, handling overlap.
+ * This is the routine that actually implements
+ * (the portable versions of) bcopy, memcpy, and memmove.
+ */
+/*
+ * PUBLIC: #ifndef HAVE_MEMCPY
+ * PUBLIC: void *memcpy __P((void *, const void *, size_t));
+ * PUBLIC: #endif
+ */
+void *
+memcpy(dst0, src0, length)
+ void *dst0;
+ const void *src0;
+ register size_t length;
+{
+ register char *dst = dst0;
+ register const char *src = src0;
+ register size_t t;
+
+ if (length == 0 || dst == src) /* nothing to do */
+ goto done;
+
+ /*
+ * Macros: loop-t-times; and loop-t-times, t>0
+ */
+#undef TLOOP
+#define TLOOP(s) if (t) TLOOP1(s)
+#undef TLOOP1
+#define TLOOP1(s) do { s; } while (--t)
+
+ if ((unsigned long)dst < (unsigned long)src) {
+ /*
+ * Copy forward.
+ */
+ t = (int)src; /* only need low bits */
+ if ((t | (int)dst) & wmask) {
+ /*
+ * Try to align operands. This cannot be done
+ * unless the low bits match.
+ */
+ if ((t ^ (int)dst) & wmask || length < wsize)
+ t = length;
+ else
+ t = wsize - (t & wmask);
+ length -= t;
+ TLOOP1(*dst++ = *src++);
+ }
+ /*
+ * Copy whole words, then mop up any trailing bytes.
+ */
+ t = length / wsize;
+ TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize);
+ t = length & wmask;
+ TLOOP(*dst++ = *src++);
+ } else {
+ /*
+ * Copy backwards. Otherwise essentially the same.
+ * Alignment works as before, except that it takes
+ * (t&wmask) bytes to align, not wsize-(t&wmask).
+ */
+ src += length;
+ dst += length;
+ t = (int)src;
+ if ((t | (int)dst) & wmask) {
+ if ((t ^ (int)dst) & wmask || length <= wsize)
+ t = length;
+ else
+ t &= wmask;
+ length -= t;
+ TLOOP1(*--dst = *--src);
+ }
+ t = length / wsize;
+ TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src);
+ t = length & wmask;
+ TLOOP(*--dst = *--src);
+ }
+done:
+ return (dst0);
+}
+
+#endif /* HAVE_MEMCPY */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/memmove.c b/debian/htdig/htdig-3.2.0b6/htlib/memmove.c
new file mode 100644
index 00000000..7615f63c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/memmove.c
@@ -0,0 +1,143 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#ifndef HAVE_MEMMOVE
+/*
+ * sizeof(word) MUST BE A POWER OF TWO
+ * SO THAT wmask BELOW IS ALL ONES
+ */
+typedef int word; /* "word" used for optimal copy speed */
+
+#undef wsize
+#define wsize sizeof(word)
+#undef wmask
+#define wmask (wsize - 1)
+
+/*
+ * Copy a block of memory, handling overlap.
+ * This is the routine that actually implements
+ * (the portable versions of) bcopy, memcpy, and memmove.
+ */
+/*
+ * PUBLIC: #ifndef HAVE_MEMMOVE
+ * PUBLIC: void *memmove __P((void *, const void *, size_t));
+ * PUBLIC: #endif
+ */
+void *
+memmove(dst0, src0, length)
+ void *dst0;
+ const void *src0;
+ register size_t length;
+{
+ register char *dst = dst0;
+ register const char *src = src0;
+ register size_t t;
+
+ if (length == 0 || dst == src) /* nothing to do */
+ goto done;
+
+ /*
+ * Macros: loop-t-times; and loop-t-times, t>0
+ */
+#undef TLOOP
+#define TLOOP(s) if (t) TLOOP1(s)
+#undef TLOOP1
+#define TLOOP1(s) do { s; } while (--t)
+
+ if ((unsigned long)dst < (unsigned long)src) {
+ /*
+ * Copy forward.
+ */
+ t = (int)src; /* only need low bits */
+ if ((t | (int)dst) & wmask) {
+ /*
+ * Try to align operands. This cannot be done
+ * unless the low bits match.
+ */
+ if ((t ^ (int)dst) & wmask || length < wsize)
+ t = length;
+ else
+ t = wsize - (t & wmask);
+ length -= t;
+ TLOOP1(*dst++ = *src++);
+ }
+ /*
+ * Copy whole words, then mop up any trailing bytes.
+ */
+ t = length / wsize;
+ TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize);
+ t = length & wmask;
+ TLOOP(*dst++ = *src++);
+ } else {
+ /*
+ * Copy backwards. Otherwise essentially the same.
+ * Alignment works as before, except that it takes
+ * (t&wmask) bytes to align, not wsize-(t&wmask).
+ */
+ src += length;
+ dst += length;
+ t = (int)src;
+ if ((t | (int)dst) & wmask) {
+ if ((t ^ (int)dst) & wmask || length <= wsize)
+ t = length;
+ else
+ t &= wmask;
+ length -= t;
+ TLOOP1(*--dst = *--src);
+ }
+ t = length / wsize;
+ TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src);
+ t = length & wmask;
+ TLOOP(*--dst = *--src);
+ }
+done:
+ return (dst0);
+}
+#endif /* HAVE_MEMOVE */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.c b/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.c
new file mode 100644
index 00000000..2e0441a1
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.c
@@ -0,0 +1,534 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*
+ Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ rights reserved.
+
+ License to copy and use this software is granted provided that it
+ is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ Algorithm" in all material mentioning or referencing this software
+ or this function.
+
+ License is also granted to make and use derivative works provided
+ that such works are identified as "derived from the RSA Data
+ Security, Inc. MD5 Message-Digest Algorithm" in all material
+ mentioning or referencing the derived work.
+
+ RSA Data Security, Inc. makes no representations concerning either
+ the merchantability of this software or the suitability of this
+ software for any particular purpose. It is provided "as is"
+ without express or implied warranty of any kind.
+
+ These notices must be retained in any copies of any part of this
+ documentation and/or software.
+ */
+
+/*
+ This is a slightly modified version
+ */
+
+#include "mhash_md5.h"
+
+/*
+ Constants for MD5Transform routine.
+ */
+
+
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+static void MD5Transform PROTO_LIST((word32[4], unsigned char[64]));
+static void Encode PROTO_LIST
+ ((unsigned char *, word32 *, unsigned int));
+static void Decode PROTO_LIST
+ ((word32 *, unsigned char *, unsigned int));
+static void MD5_memcpy PROTO_LIST((POINTER, POINTER, unsigned int));
+static void MD5_memset PROTO_LIST((POINTER, int, unsigned int));
+
+static unsigned char PADDING[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ F, G, H and I are basic MD5 functions.
+ */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/*
+ ROTATE_LEFT rotates x left n bits.
+ */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+/*
+ FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+ Rotation is separate from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s, ac) { \
+ (a) += F ((b), (c), (d)) + (x) + (word32)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define GG(a, b, c, d, x, s, ac) { \
+ (a) += G ((b), (c), (d)) + (x) + (word32)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define HH(a, b, c, d, x, s, ac) { \
+ (a) += H ((b), (c), (d)) + (x) + (word32)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+#define II(a, b, c, d, x, s, ac) { \
+ (a) += I ((b), (c), (d)) + (x) + (word32)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+ }
+
+/*
+ MD5 initialization. Begins an MD5 operation, writing a new context.
+ */
+void
+MD5Init(context)
+ MD5_CTX *context; /*
+ context
+ */
+{
+ context->count[0] = context->count[1] = 0;
+ /*
+ Load magic initialization constants.
+ */
+ context->state[0] = 0x67452301;
+ context->state[1] = 0xefcdab89;
+ context->state[2] = 0x98badcfe;
+ context->state[3] = 0x10325476;
+}
+
+/*
+ MD5 block update operation. Continues an MD5 message-digest
+ operation, processing another message block, and updating the
+ context.
+ */
+void
+MD5Update(context, input, inputLen)
+ MD5_CTX *context; /*
+ context
+ */
+ const unsigned char *input; /*
+ input block
+ */
+ unsigned int inputLen; /*
+ length of input block
+ */
+{
+ unsigned int i, index, partLen;
+
+ /*
+ Compute number of bytes mod 64
+ */
+ index = (unsigned int) ((context->count[0] >> 3) & 0x3F);
+
+ /*
+ Update number of bits
+ */
+ if ((context->count[0] += ((word32) inputLen << 3)) < ((word32) inputLen << 3))
+ context->count[1]++;
+ context->count[1] += ((word32) inputLen >> 29);
+
+ partLen = 64 - index;
+
+ /*
+ Transform as many times as possible.
+ */
+ if (inputLen >= partLen) {
+ MD5_memcpy
+ ((POINTER) & context->buffer[index], (POINTER) input, partLen);
+ MD5Transform(context->state, context->buffer);
+
+ for (i = partLen; i + 63 < inputLen; i += 64)
+ MD5Transform(context->state, (unsigned char *) &input[i]);
+
+ index = 0;
+ }
+ else
+ i = 0;
+
+ /*
+ Buffer remaining input
+ */
+ MD5_memcpy
+ ((POINTER) & context->buffer[index], (POINTER) & input[i],
+ inputLen - i);
+}
+
+/*
+ MD5 finalization. Ends an MD5 message-digest operation, writing the
+ the message digest and zeroizing the context.
+ */
+void *
+MD5Final(context)
+ MD5_CTX *context; /*
+ context
+ */
+{
+ unsigned char bits[8];
+ unsigned int index, padLen;
+ unsigned char *digest = malloc(16);
+
+ /*
+ Save number of bits
+ */
+ Encode(bits, context->count, 8);
+
+ /*
+ Pad out to 56 mod 64.
+ */
+ index = (unsigned int) ((context->count[0] >> 3) & 0x3f);
+ padLen = (index < 56) ? (56 - index) : (120 - index);
+ MD5Update(context, PADDING, padLen);
+
+ /*
+ Append length (before padding)
+ */
+ MD5Update(context, bits, 8);
+
+
+
+ /*
+ Store state in digest
+ */
+ Encode(digest, context->state, 16);
+
+ /*
+ Zeroize sensitive information.
+ */
+ MD5_memset((POINTER) context, 0, sizeof(*context));
+
+ return digest;
+}
+
+/*
+ MD5 basic transformation. Transforms state based on block.
+ */
+static void
+MD5Transform(state, block)
+ word32 state[4];
+ unsigned char block[64];
+{
+ word32 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+ Decode(x, block, 64);
+
+ /*
+ Round 1
+ */
+ FF(a, b, c, d, x[0], S11, 0xd76aa478); /*
+ 1
+ */
+ FF(d, a, b, c, x[1], S12, 0xe8c7b756); /*
+ 2
+ */
+ FF(c, d, a, b, x[2], S13, 0x242070db); /*
+ 3
+ */
+ FF(b, c, d, a, x[3], S14, 0xc1bdceee); /*
+ 4
+ */
+ FF(a, b, c, d, x[4], S11, 0xf57c0faf); /*
+ 5
+ */
+ FF(d, a, b, c, x[5], S12, 0x4787c62a); /*
+ 6
+ */
+ FF(c, d, a, b, x[6], S13, 0xa8304613); /*
+ 7
+ */
+ FF(b, c, d, a, x[7], S14, 0xfd469501); /*
+ 8
+ */
+ FF(a, b, c, d, x[8], S11, 0x698098d8); /*
+ 9
+ */
+ FF(d, a, b, c, x[9], S12, 0x8b44f7af); /*
+ 10
+ */
+ FF(c, d, a, b, x[10], S13, 0xffff5bb1); /*
+ 11
+ */
+ FF(b, c, d, a, x[11], S14, 0x895cd7be); /*
+ 12
+ */
+ FF(a, b, c, d, x[12], S11, 0x6b901122); /*
+ 13
+ */
+ FF(d, a, b, c, x[13], S12, 0xfd987193); /*
+ 14
+ */
+ FF(c, d, a, b, x[14], S13, 0xa679438e); /*
+ 15
+ */
+ FF(b, c, d, a, x[15], S14, 0x49b40821); /*
+ 16
+ */
+
+ /*
+ Round 2
+ */
+ GG(a, b, c, d, x[1], S21, 0xf61e2562); /*
+ 17
+ */
+ GG(d, a, b, c, x[6], S22, 0xc040b340); /*
+ 18
+ */
+ GG(c, d, a, b, x[11], S23, 0x265e5a51); /*
+ 19
+ */
+ GG(b, c, d, a, x[0], S24, 0xe9b6c7aa); /*
+ 20
+ */
+ GG(a, b, c, d, x[5], S21, 0xd62f105d); /*
+ 21
+ */
+ GG(d, a, b, c, x[10], S22, 0x2441453); /*
+ 22
+ */
+ GG(c, d, a, b, x[15], S23, 0xd8a1e681); /*
+ 23
+ */
+ GG(b, c, d, a, x[4], S24, 0xe7d3fbc8); /*
+ 24
+ */
+ GG(a, b, c, d, x[9], S21, 0x21e1cde6); /*
+ 25
+ */
+ GG(d, a, b, c, x[14], S22, 0xc33707d6); /*
+ 26
+ */
+ GG(c, d, a, b, x[3], S23, 0xf4d50d87); /*
+ 27
+ */
+ GG(b, c, d, a, x[8], S24, 0x455a14ed); /*
+ 28
+ */
+ GG(a, b, c, d, x[13], S21, 0xa9e3e905); /*
+ 29
+ */
+ GG(d, a, b, c, x[2], S22, 0xfcefa3f8); /*
+ 30
+ */
+ GG(c, d, a, b, x[7], S23, 0x676f02d9); /*
+ 31
+ */
+ GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /*
+ 32
+ */
+
+ /*
+ Round 3
+ */
+ HH(a, b, c, d, x[5], S31, 0xfffa3942); /*
+ 33
+ */
+ HH(d, a, b, c, x[8], S32, 0x8771f681); /*
+ 34
+ */
+ HH(c, d, a, b, x[11], S33, 0x6d9d6122); /*
+ 35
+ */
+ HH(b, c, d, a, x[14], S34, 0xfde5380c); /*
+ 36
+ */
+ HH(a, b, c, d, x[1], S31, 0xa4beea44); /*
+ 37
+ */
+ HH(d, a, b, c, x[4], S32, 0x4bdecfa9); /*
+ 38
+ */
+ HH(c, d, a, b, x[7], S33, 0xf6bb4b60); /*
+ 39
+ */
+ HH(b, c, d, a, x[10], S34, 0xbebfbc70); /*
+ 40
+ */
+ HH(a, b, c, d, x[13], S31, 0x289b7ec6); /*
+ 41
+ */
+ HH(d, a, b, c, x[0], S32, 0xeaa127fa); /*
+ 42
+ */
+ HH(c, d, a, b, x[3], S33, 0xd4ef3085); /*
+ 43
+ */
+ HH(b, c, d, a, x[6], S34, 0x4881d05); /*
+ 44
+ */
+ HH(a, b, c, d, x[9], S31, 0xd9d4d039); /*
+ 45
+ */
+ HH(d, a, b, c, x[12], S32, 0xe6db99e5); /*
+ 46
+ */
+ HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /*
+ 47
+ */
+ HH(b, c, d, a, x[2], S34, 0xc4ac5665); /*
+ 48
+ */
+
+ /*
+ Round 4
+ */
+ II(a, b, c, d, x[0], S41, 0xf4292244); /*
+ 49
+ */
+ II(d, a, b, c, x[7], S42, 0x432aff97); /*
+ 50
+ */
+ II(c, d, a, b, x[14], S43, 0xab9423a7); /*
+ 51
+ */
+ II(b, c, d, a, x[5], S44, 0xfc93a039); /*
+ 52
+ */
+ II(a, b, c, d, x[12], S41, 0x655b59c3); /*
+ 53
+ */
+ II(d, a, b, c, x[3], S42, 0x8f0ccc92); /*
+ 54
+ */
+ II(c, d, a, b, x[10], S43, 0xffeff47d); /*
+ 55
+ */
+ II(b, c, d, a, x[1], S44, 0x85845dd1); /*
+ 56
+ */
+ II(a, b, c, d, x[8], S41, 0x6fa87e4f); /*
+ 57
+ */
+ II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /*
+ 58
+ */
+ II(c, d, a, b, x[6], S43, 0xa3014314); /*
+ 59
+ */
+ II(b, c, d, a, x[13], S44, 0x4e0811a1); /*
+ 60
+ */
+ II(a, b, c, d, x[4], S41, 0xf7537e82); /*
+ 61
+ */
+ II(d, a, b, c, x[11], S42, 0xbd3af235); /*
+ 62
+ */
+ II(c, d, a, b, x[2], S43, 0x2ad7d2bb); /*
+ 63
+ */
+ II(b, c, d, a, x[9], S44, 0xeb86d391); /*
+ 64
+ */
+
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+
+ /*
+ Zeroize sensitive information.
+
+
+
+ */
+ MD5_memset((POINTER) x, 0, sizeof(x));
+}
+
+/*
+ Encodes input (word32) into output (unsigned char). Assumes len is
+ a multiple of 4.
+ */
+static void
+Encode(output, input, len)
+ unsigned char *output;
+ word32 *input;
+ unsigned int len;
+{
+ unsigned int i, j;
+
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (unsigned char) (input[i] & 0xff);
+ output[j + 1] = (unsigned char) ((input[i] >> 8) & 0xff);
+ output[j + 2] = (unsigned char) ((input[i] >> 16) & 0xff);
+ output[j + 3] = (unsigned char) ((input[i] >> 24) & 0xff);
+ }
+}
+
+/*
+ Decodes input (unsigned char) into output (word32). Assumes len is
+ a multiple of 4.
+ */
+static void
+Decode(output, input, len)
+ word32 *output;
+ unsigned char *input;
+ unsigned int len;
+{
+ unsigned int i, j;
+
+ for (i = 0, j = 0; j < len; i++, j += 4)
+ output[i] = ((word32) input[j]) | (((word32) input[j + 1]) << 8) |
+ (((word32) input[j + 2]) << 16) | (((word32) input[j + 3]) << 24);
+}
+
+/*
+ Note: Replace "for loop" with standard memcpy if possible.
+ */
+
+static void
+MD5_memcpy(output, input, len)
+ POINTER output;
+ POINTER input;
+ unsigned int len;
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++)
+ output[i] = input[i];
+}
+
+/*
+ Note: Replace "for loop" with standard memset if possible.
+ */
+static void
+MD5_memset(output, value, len)
+ POINTER output;
+ int value;
+ unsigned int len;
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++)
+ ((char *) output)[i] = (char) value;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.h b/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.h
new file mode 100644
index 00000000..a1cbd2fc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/mhash_md5.h
@@ -0,0 +1,86 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*
+ MD5.H - header file for MD5C.C
+ */
+
+/*
+ Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ rights reserved.
+
+ License to copy and use this software is granted provided that it
+ is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ Algorithm" in all material mentioning or referencing this software
+ or this function.
+
+ License is also granted to make and use derivative works provided
+ that such works are identified as "derived from the RSA Data
+ Security, Inc. MD5 Message-Digest Algorithm" in all material
+ mentioning or referencing the derived work.
+
+ RSA Data Security, Inc. makes no representations concerning either
+ the merchantability of this software or the suitability of this
+ software for any particular purpose. It is provided "as is"
+ without express or implied warranty of any kind.
+ These notices must be retained in any copies of any part of this
+ documentation and/or software.
+ */
+
+#ifndef MHASH_MD5_H
+#define MHASH_MD5_H
+
+#include <libdefs.h>
+
+/*
+ PROTOTYPES should be set to one if and only if the compiler supports
+ function argument prototyping.
+ The following makes PROTOTYPES default to 0 if it has not already
+ been defined with C compiler flags.
+ */
+#ifndef PROTOTYPES
+#define PROTOTYPES 1
+#endif
+
+/*
+ POINTER defines a generic pointer type
+ */
+typedef unsigned char *POINTER;
+
+/*
+ PROTO_LIST is defined depending on how PROTOTYPES is defined above.
+ If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
+ returns an empty list.
+ */
+#if PROTOTYPES
+#define PROTO_LIST(list) list
+#else
+#define PROTO_LIST(list) ()
+#endif
+
+
+
+/*
+ MD5 context.
+ */
+typedef struct {
+ word32 state[4]; /*
+ state (ABCD)
+ */
+ word32 count[2]; /*
+ number of bits, modulo 2^64 (lsb first)
+ */
+ word8 buffer[64]; /*
+ input buffer
+ */
+} MD5_CTX;
+
+void MD5Init PROTO_LIST((MD5_CTX *));
+void MD5Update PROTO_LIST((MD5_CTX *, const unsigned char *, unsigned int));
+void *MD5Final PROTO_LIST((MD5_CTX *));
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/mktime.c b/debian/htdig/htdig-3.2.0b6/htlib/mktime.c
new file mode 100644
index 00000000..a59dab4c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/mktime.c
@@ -0,0 +1,535 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/* Convert a `struct tm' to a time_t value.
+ Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Paul Eggert (eggert@twinsun.com).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Define this to have a standalone program to test this implementation of
+ mktime. */
+/* #define DEBUG 1 */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#ifdef _LIBC
+# define HAVE_LIMITS_H 1
+# define STDC_HEADERS 1
+#endif
+
+/* Assume that leap seconds are possible, unless told otherwise.
+ If the host has a `zic' command with a `-L leapsecondfilename' option,
+ then it supports leap seconds; otherwise it probably doesn't. */
+#ifndef LEAP_SECONDS_POSSIBLE
+# define LEAP_SECONDS_POSSIBLE 1
+#endif
+
+#include <sys/types.h> /* Some systems define `time_t' here. */
+#include <time.h>
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#if DEBUG
+# include <stdio.h>
+# if STDC_HEADERS
+# include <stdlib.h>
+# endif
+/* Make it work even if the system's libc has its own mktime routine. */
+# define mktime my_mktime
+#endif /* DEBUG */
+
+#ifndef __P
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define __P(args) args
+# else
+# define __P(args) ()
+# endif /* GCC. */
+#endif /* Not __P. */
+
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
+#endif
+
+/* The extra casts work around common compiler bugs. */
+#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
+/* The outer cast is needed to work around a bug in Cray C 5.0.3.0.
+ It is necessary at least when t == time_t. */
+#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
+ ? ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1) : (t) 0))
+#define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
+
+#ifndef INT_MIN
+# define INT_MIN TYPE_MINIMUM (int)
+#endif
+#ifndef INT_MAX
+# define INT_MAX TYPE_MAXIMUM (int)
+#endif
+
+#ifndef TIME_T_MIN
+# define TIME_T_MIN TYPE_MINIMUM (time_t)
+#endif
+#ifndef TIME_T_MAX
+# define TIME_T_MAX TYPE_MAXIMUM (time_t)
+#endif
+
+#define TM_YEAR_BASE 1900
+#define EPOCH_YEAR 1970
+
+#ifndef __isleap
+/* Nonzero if YEAR is a leap year (every 4 years,
+ except every 100th isn't, and every 400th is). */
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+#endif
+
+/* How many days come before each month (0-12). */
+const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+
+
+#ifdef _LIBC
+# define my_mktime_localtime_r __localtime_r
+#else
+/* If we're a mktime substitute in a GNU program, then prefer
+ localtime to localtime_r, since many localtime_r implementations
+ are buggy. */
+static struct tm *
+my_mktime_localtime_r (const time_t *t, struct tm *tp)
+{
+ struct tm *l = localtime (t);
+ if (! l)
+ return 0;
+ *tp = *l;
+ return tp;
+}
+#endif /* ! _LIBC */
+
+
+/* Yield the difference between (YEAR-YDAY HOUR:MIN:SEC) and (*TP),
+ measured in seconds, ignoring leap seconds.
+ YEAR uses the same numbering as TM->tm_year.
+ All values are in range, except possibly YEAR.
+ If TP is null, return a nonzero value.
+ If overflow occurs, yield the low order bits of the correct answer. */
+static time_t
+ydhms_tm_diff (int year, int yday, int hour, int min, int sec,
+ const struct tm *tp)
+{
+ if (!tp)
+ return 1;
+ else
+ {
+ /* Compute intervening leap days correctly even if year is negative.
+ Take care to avoid int overflow. time_t overflow is OK, since
+ only the low order bits of the correct time_t answer are needed.
+ Don't convert to time_t until after all divisions are done, since
+ time_t might be unsigned. */
+ int a4 = (year >> 2) + (TM_YEAR_BASE >> 2) - ! (year & 3);
+ int b4 = (tp->tm_year >> 2) + (TM_YEAR_BASE >> 2) - ! (tp->tm_year & 3);
+ int a100 = a4 / 25 - (a4 % 25 < 0);
+ int b100 = b4 / 25 - (b4 % 25 < 0);
+ int a400 = a100 >> 2;
+ int b400 = b100 >> 2;
+ int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+ time_t years = year - (time_t) tp->tm_year;
+ time_t days = (365 * years + intervening_leap_days
+ + (yday - tp->tm_yday));
+ return (60 * (60 * (24 * days + (hour - tp->tm_hour))
+ + (min - tp->tm_min))
+ + (sec - tp->tm_sec));
+ }
+}
+
+/* Use CONVERT to convert *T to a broken down time in *TP.
+ If *T is out of range for conversion, adjust it so that
+ it is the nearest in-range value and then convert that. */
+static struct tm *
+ranged_convert (struct tm *(*convert) (const time_t *, struct tm *),
+ time_t *t, struct tm *tp)
+{
+ struct tm *r;
+
+ if (! (r = (*convert) (t, tp)) && *t)
+ {
+ time_t bad = *t;
+ time_t ok = 0;
+ struct tm tm;
+
+ /* BAD is a known unconvertible time_t, and OK is a known good one.
+ Use binary search to narrow the range between BAD and OK until
+ they differ by 1. */
+ while (bad != ok + (bad < 0 ? -1 : 1))
+ {
+ time_t mid = *t = (bad < 0
+ ? bad + ((ok - bad) >> 1)
+ : ok + ((bad - ok) >> 1));
+ if ((r = (*convert) (t, tp)))
+ {
+ tm = *r;
+ ok = mid;
+ }
+ else
+ bad = mid;
+ }
+
+ if (!r && ok)
+ {
+ /* The last conversion attempt failed;
+ revert to the most recent successful attempt. */
+ *t = ok;
+ *tp = tm;
+ r = tp;
+ }
+ }
+
+ return r;
+}
+
+
+/* Convert *TP to a time_t value, inverting
+ the monotonic and mostly-unit-linear conversion function CONVERT.
+ Use *OFFSET to keep track of a guess at the offset of the result,
+ compared to what the result would be for UTC without leap seconds.
+ If *OFFSET's guess is correct, only one CONVERT call is needed. */
+time_t
+__mktime_internal (struct tm *tp,
+ struct tm *(*convert) (const time_t *, struct tm *),
+ time_t *offset)
+{
+ time_t t, dt, t0, t1, t2;
+ struct tm tm;
+
+ /* The maximum number of probes (calls to CONVERT) should be enough
+ to handle any combinations of time zone rule changes, solar time,
+ leap seconds, and oscillations around a spring-forward gap.
+ POSIX.1 prohibits leap seconds, but some hosts have them anyway. */
+ int remaining_probes = 6;
+
+ /* Time requested. Copy it in case CONVERT modifies *TP; this can
+ occur if TP is localtime's returned value and CONVERT is localtime. */
+ int sec = tp->tm_sec;
+ int min = tp->tm_min;
+ int hour = tp->tm_hour;
+ int mday = tp->tm_mday;
+ int mon = tp->tm_mon;
+ int year_requested = tp->tm_year;
+ int isdst = tp->tm_isdst;
+
+ /* Ensure that mon is in range, and set year accordingly. */
+ int mon_remainder = mon % 12;
+ int negative_mon_remainder = mon_remainder < 0;
+ int mon_years = mon / 12 - negative_mon_remainder;
+ int year = year_requested + mon_years;
+
+ /* The other values need not be in range:
+ the remaining code handles minor overflows correctly,
+ assuming int and time_t arithmetic wraps around.
+ Major overflows are caught at the end. */
+
+ /* Calculate day of year from year, month, and day of month.
+ The result need not be in range. */
+ int yday = ((__mon_yday[__isleap (year + TM_YEAR_BASE)]
+ [mon_remainder + 12 * negative_mon_remainder])
+ + mday - 1);
+
+ int sec_requested = sec;
+#if LEAP_SECONDS_POSSIBLE
+ /* Handle out-of-range seconds specially,
+ since ydhms_tm_diff assumes every minute has 60 seconds. */
+ if (sec < 0)
+ sec = 0;
+ if (59 < sec)
+ sec = 59;
+#endif
+
+ /* Invert CONVERT by probing. First assume the same offset as last time.
+ Then repeatedly use the error to improve the guess. */
+
+ tm.tm_year = EPOCH_YEAR - TM_YEAR_BASE;
+ tm.tm_yday = tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+ t0 = ydhms_tm_diff (year, yday, hour, min, sec, &tm);
+
+ for (t = t1 = t2 = t0 + *offset;
+ (dt = ydhms_tm_diff (year, yday, hour, min, sec,
+ ranged_convert (convert, &t, &tm)));
+ t1 = t2, t2 = t, t += dt)
+ if (t == t1 && t != t2
+ && (isdst < 0 || tm.tm_isdst < 0
+ || (isdst != 0) != (tm.tm_isdst != 0)))
+ /* We can't possibly find a match, as we are oscillating
+ between two values. The requested time probably falls
+ within a spring-forward gap of size DT. Follow the common
+ practice in this case, which is to return a time that is DT
+ away from the requested time, preferring a time whose
+ tm_isdst differs from the requested value. In practice,
+ this is more useful than returning -1. */
+ break;
+ else if (--remaining_probes == 0)
+ return -1;
+
+ /* If we have a match, check whether tm.tm_isdst has the requested
+ value, if any. */
+ if (dt == 0 && isdst != tm.tm_isdst && 0 <= isdst && 0 <= tm.tm_isdst)
+ {
+ /* tm.tm_isdst has the wrong value. Look for a neighboring
+ time with the right value, and use its UTC offset.
+ Heuristic: probe the previous three calendar quarters (approximately),
+ looking for the desired isdst. This isn't perfect,
+ but it's good enough in practice. */
+ int quarter = 7889238; /* seconds per average 1/4 Gregorian year */
+ int i;
+
+ /* If we're too close to the time_t limit, look in future quarters. */
+ if (t < TIME_T_MIN + 3 * quarter)
+ quarter = -quarter;
+
+ for (i = 1; i <= 3; i++)
+ {
+ time_t ot = t - i * quarter;
+ struct tm otm;
+ ranged_convert (convert, &ot, &otm);
+ if (otm.tm_isdst == isdst)
+ {
+ /* We found the desired tm_isdst.
+ Extrapolate back to the desired time. */
+ t = ot + ydhms_tm_diff (year, yday, hour, min, sec, &otm);
+ ranged_convert (convert, &t, &tm);
+ break;
+ }
+ }
+ }
+
+ *offset = t - t0;
+
+#if LEAP_SECONDS_POSSIBLE
+ if (sec_requested != tm.tm_sec)
+ {
+ /* Adjust time to reflect the tm_sec requested, not the normalized value.
+ Also, repair any damage from a false match due to a leap second. */
+ t += sec_requested - sec + (sec == 0 && tm.tm_sec == 60);
+ if (! (*convert) (&t, &tm))
+ return -1;
+ }
+#endif
+
+ if (TIME_T_MAX / INT_MAX / 366 / 24 / 60 / 60 < 3)
+ {
+ /* time_t isn't large enough to rule out overflows in ydhms_tm_diff,
+ so check for major overflows. A gross check suffices,
+ since if t has overflowed, it is off by a multiple of
+ TIME_T_MAX - TIME_T_MIN + 1. So ignore any component of
+ the difference that is bounded by a small value. */
+
+ double dyear = (double) year_requested + mon_years - tm.tm_year;
+ double dday = 366 * dyear + mday;
+ double dsec = 60 * (60 * (24 * dday + hour) + min) + sec_requested;
+
+ /* On Irix4.0.5 cc, dividing TIME_T_MIN by 3 does not produce
+ correct results, ie., it erroneously gives a positive value
+ of 715827882. Setting a variable first then doing math on it
+ seems to work. (ghazi@caip.rutgers.edu) */
+
+ const time_t time_t_max = TIME_T_MAX;
+ const time_t time_t_min = TIME_T_MIN;
+
+ if (time_t_max / 3 - time_t_min / 3 < (dsec < 0 ? - dsec : dsec))
+ return -1;
+ }
+
+ *tp = tm;
+ return t;
+}
+
+
+static time_t localtime_offset;
+
+/* Convert *TP to a time_t value. */
+time_t
+mymktime (tp)
+ struct tm *tp;
+{
+#ifdef _LIBC
+ /* POSIX.1 8.1.1 requires that whenever mktime() is called, the
+ time zone names contained in the external variable `tzname' shall
+ be set as if the tzset() function had been called. */
+ __tzset ();
+#endif
+
+ return __mktime_internal (tp, my_mktime_localtime_r, &localtime_offset);
+}
+
+#ifdef weak_alias
+weak_alias (mktime, timelocal)
+#endif
+
+#if DEBUG
+
+static int
+not_equal_tm (a, b)
+ struct tm *a;
+ struct tm *b;
+{
+ return ((a->tm_sec ^ b->tm_sec)
+ | (a->tm_min ^ b->tm_min)
+ | (a->tm_hour ^ b->tm_hour)
+ | (a->tm_mday ^ b->tm_mday)
+ | (a->tm_mon ^ b->tm_mon)
+ | (a->tm_year ^ b->tm_year)
+ | (a->tm_mday ^ b->tm_mday)
+ | (a->tm_yday ^ b->tm_yday)
+ | (a->tm_isdst ^ b->tm_isdst));
+}
+
+static void
+print_tm (tp)
+ struct tm *tp;
+{
+ if (tp)
+ printf ("%04d-%02d-%02d %02d:%02d:%02d yday %03d wday %d isdst %d",
+ tp->tm_year + TM_YEAR_BASE, tp->tm_mon + 1, tp->tm_mday,
+ tp->tm_hour, tp->tm_min, tp->tm_sec,
+ tp->tm_yday, tp->tm_wday, tp->tm_isdst);
+ else
+ printf ("0");
+}
+
+static int
+check_result (tk, tmk, tl, lt)
+ time_t tk;
+ struct tm tmk;
+ time_t tl;
+ struct tm *lt;
+{
+ if (tk != tl || !lt || not_equal_tm (&tmk, lt))
+ {
+ printf ("mktime (");
+ print_tm (&tmk);
+ printf (")\nyields (");
+ print_tm (lt);
+ printf (") == %ld, should be %ld\n", (long) tl, (long) tk);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int status = 0;
+ struct tm tm, tmk, tml;
+ struct tm *lt;
+ time_t tk, tl;
+ char trailer;
+
+ if ((argc == 3 || argc == 4)
+ && (sscanf (argv[1], "%d-%d-%d%c",
+ &tm.tm_year, &tm.tm_mon, &tm.tm_mday, &trailer)
+ == 3)
+ && (sscanf (argv[2], "%d:%d:%d%c",
+ &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &trailer)
+ == 3))
+ {
+ tm.tm_year -= TM_YEAR_BASE;
+ tm.tm_mon--;
+ tm.tm_isdst = argc == 3 ? -1 : atoi (argv[3]);
+ tmk = tm;
+ tl = mktime (&tmk);
+ lt = localtime (&tl);
+ if (lt)
+ {
+ tml = *lt;
+ lt = &tml;
+ }
+ printf ("mktime returns %ld == ", (long) tl);
+ print_tm (&tmk);
+ printf ("\n");
+ status = check_result (tl, tmk, tl, lt);
+ }
+ else if (argc == 4 || (argc == 5 && strcmp (argv[4], "-") == 0))
+ {
+ time_t from = atol (argv[1]);
+ time_t by = atol (argv[2]);
+ time_t to = atol (argv[3]);
+
+ if (argc == 4)
+ for (tl = from; tl <= to; tl += by)
+ {
+ lt = localtime (&tl);
+ if (lt)
+ {
+ tmk = tml = *lt;
+ tk = mktime (&tmk);
+ status |= check_result (tk, tmk, tl, tml);
+ }
+ else
+ {
+ printf ("localtime (%ld) yields 0\n", (long) tl);
+ status = 1;
+ }
+ }
+ else
+ for (tl = from; tl <= to; tl += by)
+ {
+ /* Null benchmark. */
+ lt = localtime (&tl);
+ if (lt)
+ {
+ tmk = tml = *lt;
+ tk = tl;
+ status |= check_result (tk, tmk, tl, tml);
+ }
+ else
+ {
+ printf ("localtime (%ld) yields 0\n", (long) tl);
+ status = 1;
+ }
+ }
+ }
+ else
+ printf ("Usage:\
+\t%s YYYY-MM-DD HH:MM:SS [ISDST] # Test given time.\n\
+\t%s FROM BY TO # Test values FROM, FROM+BY, ..., TO.\n\
+\t%s FROM BY TO - # Do not test those values (for benchmark).\n",
+ argv[0], argv[0], argv[0]);
+
+ return status;
+}
+
+#endif /* DEBUG */
+
+/*
+Local Variables:
+compile-command: "gcc -DDEBUG -DHAVE_LIMITS_H -DSTDC_HEADERS -Wall -W -O -g mktime.c -o mktime"
+End:
+*/
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/myqsort.c b/debian/htdig/htdig-3.2.0b6/htlib/myqsort.c
new file mode 100644
index 00000000..1931cfdd
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/myqsort.c
@@ -0,0 +1,260 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/* Copyright (C) 1991, 1992, 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*
+ * Standard qsort function modified to add a user data argument to
+ * the comparison function.
+ */
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "myqsort.h"
+
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size) \
+ do \
+ { \
+ register size_t __size = (size); \
+ register char *__a = (a), *__b = (b); \
+ do \
+ { \
+ char __tmp = *__a; \
+ *__a++ = *__b; \
+ *__b++ = __tmp; \
+ } while (--__size > 0); \
+ } while (0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+ This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+ {
+ char *lo;
+ char *hi;
+ } stack_node;
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+#define STACK_SIZE (8 * sizeof(unsigned long int))
+#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
+#define STACK_NOT_EMPTY (stack < top)
+
+
+/* Order size using quicksort. This implementation incorporates
+ four optimizations discussed in Sedgewick:
+
+ 1. Non-recursive, using an explicit stack of pointer that store the
+ next array partition to sort. To save time, this maximum amount
+ of space required to store an array of MAX_INT is allocated on the
+ stack. Assuming a 32-bit integer, this needs only 32 *
+ sizeof(stack_node) == 136 bits. Pretty cheap, actually.
+
+ 2. Chose the pivot element using a median-of-three decision tree.
+ This reduces the probability of selecting a bad pivot value and
+ eliminates certain extraneous comparisons.
+
+ 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+ insertion sort to order the MAX_THRESH items within each partition.
+ This is a big win, since insertion sort is faster for small, mostly
+ sorted array segments.
+
+ 4. The larger of the two sub-partitions is always pushed onto the
+ stack first, with the algorithm then concentrating on the
+ smaller partition. This *guarantees* no more than log (n)
+ stack size is needed (actually O(1) in this case)! */
+
+void
+myqsort(void *const pbase, size_t total_elems, size_t size, myqsort_cmp cmp, void *data)
+{
+ register char *base_ptr = (char *) pbase;
+
+ /* Allocating SIZE bytes for a pivot buffer facilitates a better
+ algorithm below since we can do comparisons directly on the pivot. */
+ char *pivot_buffer = (char *) malloc (size);
+ const size_t max_thresh = MAX_THRESH * size;
+
+ if (total_elems == 0) {
+ /* Avoid lossage with unsigned arithmetic below. */
+ free(pivot_buffer);
+ return;
+ }
+
+ if (total_elems > MAX_THRESH)
+ {
+ char *lo = base_ptr;
+ char *hi = &lo[size * (total_elems - 1)];
+ /* Largest size needed for 32-bit int!!! */
+ stack_node stack[STACK_SIZE];
+ stack_node *top = stack + 1;
+
+ while (STACK_NOT_EMPTY)
+ {
+ char *left_ptr;
+ char *right_ptr;
+
+ char *pivot = pivot_buffer;
+
+ /* Select median value from among LO, MID, and HI. Rearrange
+ LO and HI so the three values are sorted. This lowers the
+ probability of picking a pathological pivot value and
+ skips a comparison for both the LEFT_PTR and RIGHT_PTR. */
+
+ char *mid = lo + size * ((hi - lo) / size >> 1);
+
+ if ((*cmp) (data, (void *) mid, (void *) lo) < 0)
+ SWAP (mid, lo, size);
+ if ((*cmp) (data, (void *) hi, (void *) mid) < 0)
+ SWAP (mid, hi, size);
+ else
+ goto jump_over;
+ if ((*cmp) (data, (void *) mid, (void *) lo) < 0)
+ SWAP (mid, lo, size);
+ jump_over:;
+ memcpy (pivot, mid, size);
+ pivot = pivot_buffer;
+
+ left_ptr = lo + size;
+ right_ptr = hi - size;
+
+ /* Here's the famous ``collapse the walls'' section of quicksort.
+ Gotta like those tight inner loops! They are the main reason
+ that this algorithm runs much faster than others. */
+ do
+ {
+ while ((*cmp) (data, (void *) left_ptr, (void *) pivot) < 0)
+ left_ptr += size;
+
+ while ((*cmp) (data, (void *) pivot, (void *) right_ptr) < 0)
+ right_ptr -= size;
+
+ if (left_ptr < right_ptr)
+ {
+ SWAP (left_ptr, right_ptr, size);
+ left_ptr += size;
+ right_ptr -= size;
+ }
+ else if (left_ptr == right_ptr)
+ {
+ left_ptr += size;
+ right_ptr -= size;
+ break;
+ }
+ }
+ while (left_ptr <= right_ptr);
+
+ /* Set up pointers for next iteration. First determine whether
+ left and right partitions are below the threshold size. If so,
+ ignore one or both. Otherwise, push the larger partition's
+ bounds on the stack and continue sorting the smaller one. */
+
+ if ((size_t) (right_ptr - lo) <= max_thresh)
+ {
+ if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore both small partitions. */
+ POP (lo, hi);
+ else
+ /* Ignore small left partition. */
+ lo = left_ptr;
+ }
+ else if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore small right partition. */
+ hi = right_ptr;
+ else if ((right_ptr - lo) > (hi - left_ptr))
+ {
+ /* Push larger left partition indices. */
+ PUSH (lo, right_ptr);
+ lo = left_ptr;
+ }
+ else
+ {
+ /* Push larger right partition indices. */
+ PUSH (left_ptr, hi);
+ hi = right_ptr;
+ }
+ }
+ }
+
+ /* Once the BASE_PTR array is partially sorted by quicksort the rest
+ is completely sorted using insertion sort, since this is efficient
+ for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+ of the array to sort, and END_PTR points at the very last element in
+ the array (*not* one beyond it!). */
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+ {
+ char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+ char *tmp_ptr = base_ptr;
+ char *thresh = min(end_ptr, base_ptr + max_thresh);
+ register char *run_ptr;
+
+ /* Find smallest element in first threshold and place it at the
+ array's beginning. This is the smallest array element,
+ and the operation speeds up insertion sort's inner loop. */
+
+ for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+ if ((*cmp) (data, (void *) run_ptr, (void *) tmp_ptr) < 0)
+ tmp_ptr = run_ptr;
+
+ if (tmp_ptr != base_ptr)
+ SWAP (tmp_ptr, base_ptr, size);
+
+ /* Insertion sort, running from left-hand-side up to right-hand-side. */
+
+ run_ptr = base_ptr + size;
+ while ((run_ptr += size) <= end_ptr)
+ {
+ tmp_ptr = run_ptr - size;
+ while ((*cmp) (data, (void *) run_ptr, (void *) tmp_ptr) < 0)
+ tmp_ptr -= size;
+
+ tmp_ptr += size;
+ if (tmp_ptr != run_ptr)
+ {
+ char *trav;
+
+ trav = run_ptr + size;
+ while (--trav >= run_ptr)
+ {
+ char c = *trav;
+ char *hi, *lo;
+
+ for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+ *hi = *lo;
+ *hi = c;
+ }
+ }
+ }
+ }
+
+ free(pivot_buffer);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/myqsort.h b/debian/htdig/htdig-3.2.0b6/htlib/myqsort.h
new file mode 100644
index 00000000..415324f6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/myqsort.h
@@ -0,0 +1,23 @@
+/*
+ * Part of the ht://Dig package <http://www.htdig.org/>
+ * Copyright (c) 1999-2004 The ht://Dig Group
+ * For copyright details, see the file COPYING in your distribution
+ * or the GNU Library General Public License (LGPL) version 2 or later
+ * <http://www.gnu.org/copyleft/lgpl.html>
+ */
+#ifndef _myqsort_h
+#define _myqsort_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*myqsort_cmp)(void *data, void *a, void *b);
+
+void myqsort(void *const pbase, size_t total_elems, size_t size, myqsort_cmp cmp, void *data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _myqsort_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/raise.c b/debian/htdig/htdig-3.2.0b6/htlib/raise.c
new file mode 100644
index 00000000..e686822b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/raise.c
@@ -0,0 +1,39 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_RAISE
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <signal.h>
+#include <unistd.h>
+#endif
+
+/*
+ * raise --
+ * Send a signal to the current process.
+ *
+ * PUBLIC: #ifndef HAVE_RAISE
+ * PUBLIC: int raise __P((int));
+ * PUBLIC: #endif
+ */
+int
+raise(s)
+ int s;
+{
+ return (kill(getpid(), s));
+}
+#endif /* HAVE_RAISE */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/realloc.c b/debian/htdig/htdig-3.2.0b6/htlib/realloc.c
new file mode 100644
index 00000000..2d31766a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/realloc.c
@@ -0,0 +1,146 @@
+/* Change the size of a block allocated by `malloc'.
+ Copyright 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ Written May 1989 by Mike Haertel.
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.
+
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+#ifndef _MALLOC_INTERNAL
+#define _MALLOC_INTERNAL
+#include <malloc.h>
+#endif
+
+#define min(A, B) ((A) < (B) ? (A) : (B))
+
+/* Debugging hook for realloc. */
+__ptr_t (*__realloc_hook) __P ((__ptr_t __ptr, size_t __size));
+
+/* Resize the given region to the new size, returning a pointer
+ to the (possibly moved) region. This is optimized for speed;
+ some benchmarks seem to indicate that greater compactness is
+ achieved by unconditionally allocating and copying to a
+ new region. This module has incestuous knowledge of the
+ internals of both free and malloc. */
+__ptr_t
+realloc (ptr, size)
+ __ptr_t ptr;
+ size_t size;
+{
+ __ptr_t result;
+ int type;
+ size_t block, blocks, oldlimit;
+
+ if (size == 0)
+ {
+ free (ptr);
+ return malloc (0);
+ }
+ else if (ptr == NULL)
+ return malloc (size);
+
+ if (__realloc_hook != NULL)
+ return (*__realloc_hook) (ptr, size);
+
+ block = BLOCK (ptr);
+
+ type = _heapinfo[block].busy.type;
+ switch (type)
+ {
+ case 0:
+ /* Maybe reallocate a large block to a small fragment. */
+ if (size <= BLOCKSIZE / 2)
+ {
+ result = malloc (size);
+ if (result != NULL)
+ {
+ memcpy (result, ptr, size);
+ free (ptr);
+ return result;
+ }
+ }
+
+ /* The new size is a large allocation as well;
+ see if we can hold it in place. */
+ blocks = BLOCKIFY (size);
+ if (blocks < _heapinfo[block].busy.info.size)
+ {
+ /* The new size is smaller; return
+ excess memory to the free list. */
+ _heapinfo[block + blocks].busy.type = 0;
+ _heapinfo[block + blocks].busy.info.size
+ = _heapinfo[block].busy.info.size - blocks;
+ _heapinfo[block].busy.info.size = blocks;
+ free (ADDRESS (block + blocks));
+ result = ptr;
+ }
+ else if (blocks == _heapinfo[block].busy.info.size)
+ /* No size change necessary. */
+ result = ptr;
+ else
+ {
+ /* Won't fit, so allocate a new region that will.
+ Free the old region first in case there is sufficient
+ adjacent free space to grow without moving. */
+ blocks = _heapinfo[block].busy.info.size;
+ /* Prevent free from actually returning memory to the system. */
+ oldlimit = _heaplimit;
+ _heaplimit = 0;
+ free (ptr);
+ _heaplimit = oldlimit;
+ result = malloc (size);
+ if (result == NULL)
+ {
+ /* Now we're really in trouble. We have to unfree
+ the thing we just freed. Unfortunately it might
+ have been coalesced with its neighbors. */
+ if (_heapindex == block)
+ (void) malloc (blocks * BLOCKSIZE);
+ else
+ {
+ __ptr_t previous = malloc ((block - _heapindex) * BLOCKSIZE);
+ (void) malloc (blocks * BLOCKSIZE);
+ free (previous);
+ }
+ return NULL;
+ }
+ if (ptr != result)
+ memmove (result, ptr, blocks * BLOCKSIZE);
+ }
+ break;
+
+ default:
+ /* Old size is a fragment; type is logarithm
+ to base two of the fragment size. */
+ if (size > (size_t) (1 << (type - 1)) && size <= (size_t) (1 << type))
+ /* The new size is the same kind of fragment. */
+ result = ptr;
+ else
+ {
+ /* The new size is different; allocate a new space,
+ and copy the lesser of the new size and the old. */
+ result = malloc (size);
+ if (result == NULL)
+ return NULL;
+ memcpy (result, ptr, min (size, (size_t) 1 << type));
+ free (ptr);
+ }
+ break;
+ }
+
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/regex.c b/debian/htdig/htdig-3.2.0b6/htlib/regex.c
new file mode 100644
index 00000000..1a13901e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/regex.c
@@ -0,0 +1,7924 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> *
+/* Copyright (c) 1999-2004 The ht://Dig Group *
+/* For copyright details, see the file COPYING in your distribution *
+/* or the GNU Library General Public License (LGPL) version 2 or later *
+/* <http://www.gnu.org/copyleft/lgpl.html> *
+
+
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
+ internationalization features.)
+ Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined _AIX && !defined REGEX_MALLOC
+ #pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#ifdef HAVE_CONFIG_H
+# include "htconfig.h"
+#endif
+
+#ifdef _MSC_VER /* _WIN32 */
+#define alloca _alloca
+#endif
+
+#ifndef PARAMS
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif /* GCC. */
+#endif /* Not PARAMS. */
+
+#if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
+#else
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+# include <sys/types.h>
+#endif
+
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+/* This is for multi byte string support. */
+#ifdef MBS_SUPPORT
+# define CHAR_TYPE wchar_t
+# define US_CHAR_TYPE wchar_t/* unsigned character type */
+# define COMPILED_BUFFER_VAR wc_buffer
+# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
+# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1)
+# define PUT_CHAR(c) \
+ do { \
+ if (MC_CUR_MAX == 1) \
+ putchar (c); \
+ else \
+ printf ("%C", (wint_t) c); /* Should we use wide stream?? */ \
+ } while (0)
+# define TRUE 1
+# define FALSE 0
+#else
+# define CHAR_TYPE char
+# define US_CHAR_TYPE unsigned char /* unsigned character type */
+# define COMPILED_BUFFER_VAR bufp->buffer
+# define OFFSET_ADDRESS_SIZE 2
+# define PUT_CHAR(c) putchar (c)
+#endif /* MBS_SUPPORT */
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# define btowc __btowc
+
+/* We are also using some library internals. */
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <langinfo.h>
+# include <locale/coll-lookup.h>
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
+
+#else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+# undef REL_ALLOC
+
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
+char *malloc ();
+char *realloc ();
+# endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+# ifndef Sword
+# define Sword 1
+# endif
+
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
+
+#endif /* not emacs */
+
+#if defined _LIBC || HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#ifndef MB_LEN_MAX
+# define MB_LEN_MAX 1
+#endif
+
+/* Get the interface, including the syntax bits. */
+#include <gregex.h>
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+#else
+# define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
+#ifndef NULL
+# define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+#ifndef emacs
+/* How many characters in the character set. */
+# define CHAR_SET_SIZE 256
+
+# ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+# else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void init_syntax_once PARAMS ((void));
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 0; c < CHAR_SET_SIZE; ++c)
+ if (ISALNUM (c))
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+# endif /* not SYNTAX_TABLE */
+
+# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
+
+#endif /* emacs */
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+# ifndef alloca
+
+/* Make alloca work the best possible way. */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
+
+# endif /* not alloca */
+
+# define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+# define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ memcpy (destination, source, osize))
+
+/* No need to do anything to free, after alloca. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined REL_ALLOC && defined REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+# define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+# ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
+
+# else /* not REGEX_MALLOC */
+
+# define REGEX_ALLOCATE_STACK alloca
+
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+# define REGEX_FREE_STACK(arg)
+
+# endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop));
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+#ifdef MBS_SUPPORT
+ /* Same as exactn, but contains binary data. */
+ exactn_bin,
+#endif
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ /* ifdef MBS_SUPPORT, following element is length of character
+ classes, length of collating symbols, length of equivalence
+ classes, length of character ranges, and length of characters.
+ Next, character class element, collating symbols elements,
+ equivalence class elements, range elements, and character
+ elements follow.
+ See regex_compile function. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+#ifdef MBS_SUPPORT
+# define STORE_NUMBER(destination, number) \
+ do { \
+ *(destination) = (US_CHAR_TYPE)(number); \
+ } while (0)
+#else
+# define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+#endif /* MBS_SUPPORT */
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += OFFSET_ADDRESS_SIZE; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+#ifdef MBS_SUPPORT
+# define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source); \
+ } while (0)
+#else
+# define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+#endif
+
+#ifdef DEBUG
+static void extract_number _RE_ARGS ((int *dest, US_CHAR_TYPE *source));
+static void
+extract_number (dest, source)
+ int *dest;
+ US_CHAR_TYPE *source;
+{
+#ifdef MBS_SUPPORT
+ *dest = *source;
+#else
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+#endif
+}
+
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += OFFSET_ADDRESS_SIZE; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number_and_incr _RE_ARGS ((int *destination,
+ US_CHAR_TYPE **source));
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ US_CHAR_TYPE **source;
+{
+ extract_number (destination, *source);
+ *source += OFFSET_ADDRESS_SIZE;
+}
+
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+# include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+# include <assert.h>
+
+static int debug;
+
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ putchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ putchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ US_CHAR_TYPE *start;
+ US_CHAR_TYPE *end;
+{
+ int mcnt, mcnt2;
+ US_CHAR_TYPE *p1;
+ US_CHAR_TYPE *p = start;
+ US_CHAR_TYPE *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+#ifdef _LIBC
+ printf ("%td:\t", p - start);
+#else
+ printf ("%ld:\t", (long int) (p - start));
+#endif
+
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ PUT_CHAR (*p++);
+ }
+ while (--mcnt);
+ break;
+
+#ifdef MBS_SUPPORT
+ case exactn_bin:
+ mcnt = *p++;
+ printf ("/exactn_bin/%d", mcnt);
+ do
+ {
+ printf("/%lx", (long int) *p++);
+ }
+ while (--mcnt);
+ break;
+#endif /* MBS_SUPPORT */
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%ld", (long int) *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+#ifdef MBS_SUPPORT
+ int i, length;
+ wchar_t *workp = p;
+ printf ("/charset [%s",
+ (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
+ p += 5;
+ length = *workp++; /* the length of char_classes */
+ for (i=0 ; i<length ; i++)
+ printf("[:%lx:]", (long int) *p++);
+ length = *workp++; /* the length of collating_symbol */
+ for (i=0 ; i<length ;)
+ {
+ printf("[.");
+ while(*p != 0)
+ PUT_CHAR((i++,*p++));
+ i++,p++;
+ printf(".]");
+ }
+ length = *workp++; /* the length of equivalence_class */
+ for (i=0 ; i<length ;)
+ {
+ printf("[=");
+ while(*p != 0)
+ PUT_CHAR((i++,*p++));
+ i++,p++;
+ printf("=]");
+ }
+ length = *workp++; /* the length of char_range */
+ for (i=0 ; i<length ; i++)
+ {
+ wchar_t range_start = *p++;
+ wchar_t range_end = *p++;
+ if (MB_CUR_MAX == 1)
+ printf("%c-%c", (char) range_start, (char) range_end);
+ else
+ printf("%C-%C", (wint_t) range_start, (wint_t) range_end);
+ }
+ length = *workp++; /* the length of char */
+ for (i=0 ; i<length ; i++)
+ if (MB_CUR_MAX == 1)
+ putchar (*p++);
+ else
+ printf("%C", (wint_t) *p++);
+ putchar (']');
+#else
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf ("/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p
+ && (p[1 + (c/8)] & (1 << (c % 8))))
+ {
+ /* Are we starting a range? */
+ if (last + 1 == c && ! in_range)
+ {
+ putchar ('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range)
+ {
+ putchar (last);
+ in_range = 0;
+ }
+
+ if (! in_range)
+ putchar (c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar (last);
+
+ putchar (']');
+
+ p += 1 + *p;
+#endif /* MBS_SUPPORT */
+ }
+ break;
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/on_failure_jump to %td", p + mcnt - start);
+#else
+ printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
+#else
+ printf ("/on_failure_keep_string_jump to %ld",
+ (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/dummy_failure_jump to %td", p + mcnt - start);
+#else
+ printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/maybe_pop_jump to %td", p + mcnt - start);
+#else
+ printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/pop_failure_jump to %td", p + mcnt - start);
+#else
+ printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/jump_past_alt to %td", p + mcnt - start);
+#else
+ printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+#ifdef _LIBC
+ printf ("/jump to %td", p + mcnt - start);
+#else
+ printf ("/jump to %ld", (long int) (p + mcnt - start));
+#endif
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+#ifdef _LIBC
+ printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
+#else
+ printf ("/succeed_n to %ld, %d times",
+ (long int) (p1 - start), mcnt2);
+#endif
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+#ifdef _LIBC
+ printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
+#else
+ printf ("/set_number_at location %ld to %d",
+ (long int) (p1 - start), mcnt2);
+#endif
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+ break;
+
+# ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+# endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%ld", (long int) *(p-1));
+ }
+
+ putchar ('\n');
+ }
+
+#ifdef _LIBC
+ printf ("%td:\tend of pattern.\n", p - start);
+#else
+ printf ("%ld:\tend of pattern.\n", (long int) (p - start));
+#endif
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ US_CHAR_TYPE *buffer = (US_CHAR_TYPE*) bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer
+ + bufp->used / sizeof(US_CHAR_TYPE));
+ printf ("%ld bytes used/%ld bytes allocated.\n",
+ bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+#ifdef _LIBC
+ printf ("re_nsub: %Zd\t", bufp->re_nsub);
+#else
+ printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
+#endif
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %lx\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const CHAR_TYPE *where;
+ const CHAR_TYPE *string1;
+ const CHAR_TYPE *string2;
+ int size1;
+ int size2;
+{
+ int this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ PUT_CHAR (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ PUT_CHAR (string2[this_char]);
+ }
+}
+
+void
+printchar (c)
+ int c;
+{
+ putc (c, stderr);
+}
+
+#else /* not DEBUG */
+
+# undef assert
+# define assert(e)
+
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+#ifdef MBS_SUPPORT
+/* This convert a multibyte string to a wide character string.
+ And write their correspondances to offset_buffer(see below)
+ and write whether each wchar_t is binary data to is_binary.
+ This assume invalid multibyte sequences as binary data.
+ We assume offset_buffer and is_binary is already allocated
+ enough space. */
+
+static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src,
+ size_t len, int *offset_buffer,
+ char *is_binary);
+static size_t
+convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
+ CHAR_TYPE *dest;
+ const unsigned char* src;
+ size_t len; /* the length of multibyte string. */
+
+ /* It hold correspondances between src(char string) and
+ dest(wchar_t string) for optimization.
+ e.g. src = "xxxyzz"
+ dest = {'X', 'Y', 'Z'}
+ (each "xxx", "y" and "zz" represent one multibyte character
+ corresponding to 'X', 'Y' and 'Z'.)
+ offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
+ = {0, 3, 4, 6}
+ */
+ int *offset_buffer;
+ char *is_binary;
+{
+ wchar_t *pdest = dest;
+ const unsigned char *psrc = src;
+ size_t wc_count = 0;
+
+ if (MB_CUR_MAX == 1)
+ { /* We don't need conversion. */
+ for ( ; wc_count < len ; ++wc_count)
+ {
+ *pdest++ = *psrc++;
+ is_binary[wc_count] = FALSE;
+ offset_buffer[wc_count] = wc_count;
+ }
+ offset_buffer[wc_count] = wc_count;
+ }
+ else
+ {
+ /* We need conversion. */
+ mbstate_t mbs;
+ int consumed;
+ size_t mb_remain = len;
+ size_t mb_count = 0;
+
+ /* Initialize the conversion state. */
+ memset (&mbs, 0, sizeof (mbstate_t));
+
+ offset_buffer[0] = 0;
+ for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
+ psrc += consumed)
+ {
+ consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
+
+ if (consumed <= 0)
+ /* failed to convert. maybe src contains binary data.
+ So we consume 1 byte manualy. */
+ {
+ *pdest = *psrc;
+ consumed = 1;
+ is_binary[wc_count] = TRUE;
+ }
+ else
+ is_binary[wc_count] = FALSE;
+ /* In sjis encoding, we use yen sign as escape character in
+ place of reverse solidus. So we convert 0x5c(yen sign in
+ sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
+ solidus in UCS2). */
+ if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
+ *pdest = (wchar_t) *psrc;
+
+ offset_buffer[wc_count + 1] = mb_count += consumed;
+ }
+ }
+
+ return wc_count;
+}
+
+#endif /* MBS_SUPPORT */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+#ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+#endif /* DEBUG */
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char re_error_msgid[] =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+static const size_t re_error_msgid_idx[] =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+# undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+# define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+long int re_max_failures = 4000;
+# else
+long int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt
+{
+ US_CHAR_TYPE *pointer;
+ long int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+int re_max_failures = 4000;
+# else
+int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt
+{
+ US_CHAR_TYPE *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#endif /* INT_IS_16BIT */
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (US_CHAR_TYPE *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#else
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ active_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+# define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+/* We used to use (num_regs - 1), which is the number of registers
+ this regexp will save; but that was changed to 5
+ to avoid stack overflow for a regexp with lots of parens. */
+#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
+ const US_CHAR_TYPE *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const CHAR_TYPE *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (US_CHAR_TYPE *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
+ \
+ regend[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();\
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static CHAR_TYPE reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+static void store_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, int arg));
+static void store_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
+ int arg1, int arg2));
+static void insert_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
+ int arg, US_CHAR_TYPE *end));
+static void insert_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
+ int arg1, int arg2, US_CHAR_TYPE *end));
+static boolean at_begline_loc_p _RE_ARGS ((const CHAR_TYPE *pattern,
+ const CHAR_TYPE *p,
+ reg_syntax_t syntax));
+static boolean at_endline_loc_p _RE_ARGS ((const CHAR_TYPE *p,
+ const CHAR_TYPE *pend,
+ reg_syntax_t syntax));
+#ifdef MBS_SUPPORT
+static reg_errcode_t compile_range _RE_ARGS ((CHAR_TYPE range_start,
+ const CHAR_TYPE **p_ptr,
+ const CHAR_TYPE *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ US_CHAR_TYPE *b,
+ CHAR_TYPE *char_set));
+static void insert_space _RE_ARGS ((int num, CHAR_TYPE *loc, CHAR_TYPE *end));
+#else
+static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
+ const CHAR_TYPE **p_ptr,
+ const CHAR_TYPE *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ US_CHAR_TYPE *b));
+#endif /* MBS_SUPPORT */
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
+ because it is impossible to allocate 4GB array for some encodings
+ which have 4 byte character_set like UCS4. */
+#ifndef PATFETCH
+# ifdef MBS_SUPPORT
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (US_CHAR_TYPE) *p++; \
+ if (translate && (c <= 0xff)) c = (US_CHAR_TYPE) translate[c]; \
+ } while (0)
+# else
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+# endif /* MBS_SUPPORT */
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (US_CHAR_TYPE) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
+ because it is impossible to allocate 4GB array for some encodings
+ which have 4 byte character_set like UCS4. */
+#ifndef TRANSLATE
+# ifdef MBS_SUPPORT
+# define TRANSLATE(d) \
+ ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \
+ ? (char) translate[(unsigned char) (d)] : (d))
+#else
+# define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+# endif /* MBS_SUPPORT */
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE (32 * sizeof(US_CHAR_TYPE))
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#ifdef MBS_SUPPORT
+# define GET_BUFFER_SPACE(n) \
+ while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
+ + (n)*sizeof(CHAR_TYPE)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+#else
+# define GET_BUFFER_SPACE(n) \
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+#endif /* MBS_SUPPORT */
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (US_CHAR_TYPE) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (US_CHAR_TYPE) (c1); \
+ *b++ = (US_CHAR_TYPE) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (US_CHAR_TYPE) (c1); \
+ *b++ = (US_CHAR_TYPE) (c2); \
+ *b++ = (US_CHAR_TYPE) (c3); \
+ } while (0)
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
+ arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+#if defined _MSC_VER && !defined WIN32
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
+#else
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
+#endif
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#if __BOUNDED_POINTERS__
+# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
+# define MOVE_BUFFER_POINTER(P) \
+ (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
+# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ else \
+ { \
+ SET_HIGH_BOUND (b); \
+ SET_HIGH_BOUND (begalt); \
+ if (fixup_alt_jump) \
+ SET_HIGH_BOUND (fixup_alt_jump); \
+ if (laststart) \
+ SET_HIGH_BOUND (laststart); \
+ if (pending_exact) \
+ SET_HIGH_BOUND (pending_exact); \
+ }
+#else
+# define MOVE_BUFFER_POINTER(P) (P) += incr
+# define ELSE_EXTEND_BUFFER_HIGH_BOUND
+#endif
+
+#ifdef MBS_SUPPORT
+# define EXTEND_BUFFER() \
+ do { \
+ US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR; \
+ int wchar_count; \
+ if (bufp->allocated + sizeof(US_CHAR_TYPE) > MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ /* How many characters the new buffer can have? */ \
+ wchar_count = bufp->allocated / sizeof(US_CHAR_TYPE); \
+ if (wchar_count == 0) wchar_count = 1; \
+ /* Truncate the buffer to CHAR_TYPE align. */ \
+ bufp->allocated = wchar_count * sizeof(US_CHAR_TYPE); \
+ RETALLOC (COMPILED_BUFFER_VAR, wchar_count, US_CHAR_TYPE); \
+ bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
+ if (COMPILED_BUFFER_VAR == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != COMPILED_BUFFER_VAR) \
+ { \
+ int incr = COMPILED_BUFFER_VAR - old_buffer; \
+ MOVE_BUFFER_POINTER (b); \
+ MOVE_BUFFER_POINTER (begalt); \
+ if (fixup_alt_jump) \
+ MOVE_BUFFER_POINTER (fixup_alt_jump); \
+ if (laststart) \
+ MOVE_BUFFER_POINTER (laststart); \
+ if (pending_exact) \
+ MOVE_BUFFER_POINTER (pending_exact); \
+ } \
+ ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ } while (0)
+#else
+# define EXTEND_BUFFER() \
+ do { \
+ US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (US_CHAR_TYPE *) REALLOC (COMPILED_BUFFER_VAR, \
+ bufp->allocated); \
+ if (COMPILED_BUFFER_VAR == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != COMPILED_BUFFER_VAR) \
+ { \
+ int incr = COMPILED_BUFFER_VAR - old_buffer; \
+ MOVE_BUFFER_POINTER (b); \
+ MOVE_BUFFER_POINTER (begalt); \
+ if (fixup_alt_jump) \
+ MOVE_BUFFER_POINTER (fixup_alt_jump); \
+ if (laststart) \
+ MOVE_BUFFER_POINTER (laststart); \
+ if (pending_exact) \
+ MOVE_BUFFER_POINTER (pending_exact); \
+ } \
+ ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ } while (0)
+#endif /* MBS_SUPPORT */
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while ('0' <= c && c <= '9') \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+#else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static
+regex_grow_registers (num_regs)
+ int num_regs;
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (reg_info, num_regs, register_info_type);
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
+ compile_stack,
+ regnum_t regnum));
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+#ifdef MBS_SUPPORT
+# define FREE_STACK_RETURN(value) \
+ return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
+#else
+# define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+#endif /* MBS_SUPPORT */
+
+static reg_errcode_t
+#ifdef MBS_SUPPORT
+regex_compile (cpattern, csize, syntax, bufp)
+ const char *cpattern;
+ size_t csize;
+#else
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ size_t size;
+#endif /* MBS_SUPPORT */
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register US_CHAR_TYPE c, c1;
+
+#ifdef MBS_SUPPORT
+ /* A temporary space to keep wchar_t pattern and compiled pattern. */
+ CHAR_TYPE *pattern, *COMPILED_BUFFER_VAR;
+ size_t size;
+ /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
+ int *mbs_offset = NULL;
+ /* It hold whether each wchar_t is binary data or not. */
+ char *is_binary = NULL;
+ /* A flag whether exactn is handling binary data or not. */
+ char is_exactn_bin = FALSE;
+#endif /* MBS_SUPPORT */
+
+ /* A random temporary spot in PATTERN. */
+ const CHAR_TYPE *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register US_CHAR_TYPE *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+#ifdef MBS_SUPPORT
+ const CHAR_TYPE *p;
+ const CHAR_TYPE *pend;
+#else
+ const CHAR_TYPE *p = pattern;
+ const CHAR_TYPE *pend = pattern + size;
+#endif /* MBS_SUPPORT */
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ US_CHAR_TYPE *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ US_CHAR_TYPE *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ US_CHAR_TYPE *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+#ifdef MBS_SUPPORT
+ const US_CHAR_TYPE *beg_interval;
+#else
+ const char *beg_interval;
+#endif /* MBS_SUPPORT */
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ US_CHAR_TYPE *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef MBS_SUPPORT
+ /* Initialize the wchar_t PATTERN and offset_buffer. */
+ p = pend = pattern = TALLOC(csize, CHAR_TYPE);
+ mbs_offset = TALLOC(csize + 1, int);
+ is_binary = TALLOC(csize + 1, char);
+ if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
+ {
+ if (pattern) free(pattern);
+ if (mbs_offset) free(mbs_offset);
+ if (is_binary) free(is_binary);
+ return REG_ESPACE;
+ }
+ size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
+ pend = p + size;
+ if (size < 0)
+ {
+ if (pattern) free(pattern);
+ if (mbs_offset) free(mbs_offset);
+ if (is_binary) free(is_binary);
+ return REG_BADPAT;
+ }
+#endif
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ PUT_CHAR (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ {
+#ifdef MBS_SUPPORT
+ if (pattern) free(pattern);
+ if (mbs_offset) free(mbs_offset);
+ if (is_binary) free(is_binary);
+#endif
+ return REG_ESPACE;
+ }
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined emacs && !defined SYNTAX_TABLE
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+#ifdef MBS_SUPPORT
+ /* Free bufp->buffer and allocate an array for wchar_t pattern
+ buffer. */
+ free(bufp->buffer);
+ COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(US_CHAR_TYPE),
+ US_CHAR_TYPE);
+#else
+ RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, US_CHAR_TYPE);
+#endif /* MBS_SUPPORT */
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(US_CHAR_TYPE),
+ US_CHAR_TYPE);
+ }
+
+ if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
+#ifdef MBS_SUPPORT
+ bufp->buffer = (char*)COMPILED_BUFFER_VAR;
+#endif /* MBS_SUPPORT */
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+#ifdef MBS_SUPPORT
+ else
+ COMPILED_BUFFER_VAR = (US_CHAR_TYPE*) bufp->buffer;
+#endif
+
+ begalt = b = COMPILED_BUFFER_VAR;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart -
+ (1 + OFFSET_ADDRESS_SIZE));
+
+ /* We've added more stuff to the buffer. */
+ b += 1 + OFFSET_ADDRESS_SIZE;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ /* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
+ 'b + 3'. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 1 + OFFSET_ADDRESS_SIZE);
+ pending_exact = 0;
+ b += 1 + OFFSET_ADDRESS_SIZE;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart +
+ 2 + 2 * OFFSET_ADDRESS_SIZE);
+ b += 1 + OFFSET_ADDRESS_SIZE;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+#ifdef MBS_SUPPORT
+ CHAR_TYPE range_start = 0xffffffff;
+#else
+ unsigned int range_start = 0xffffffff;
+#endif
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+#ifdef MBS_SUPPORT
+ /* We assume a charset(_not) structure as a wchar_t array.
+ charset[0] = (re_opcode_t) charset(_not)
+ charset[1] = l (= length of char_classes)
+ charset[2] = m (= length of collating_symbols)
+ charset[3] = n (= length of equivalence_classes)
+ charset[4] = o (= length of char_ranges)
+ charset[5] = p (= length of chars)
+
+ charset[6] = char_class (wctype_t)
+ charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
+ ...
+ charset[l+5] = char_class (wctype_t)
+
+ charset[l+6] = collating_symbol (wchar_t)
+ ...
+ charset[l+m+5] = collating_symbol (wchar_t)
+ ifdef _LIBC we use the index if
+ _NL_COLLATE_SYMB_EXTRAMB instead of
+ wchar_t string.
+
+ charset[l+m+6] = equivalence_classes (wchar_t)
+ ...
+ charset[l+m+n+5] = equivalence_classes (wchar_t)
+ ifdef _LIBC we use the index in
+ _NL_COLLATE_WEIGHT instead of
+ wchar_t string.
+
+ charset[l+m+n+6] = range_start
+ charset[l+m+n+7] = range_end
+ ...
+ charset[l+m+n+2o+4] = range_start
+ charset[l+m+n+2o+5] = range_end
+ ifdef _LIBC we use the value looked up
+ in _NL_COLLATE_COLLSEQ instead of
+ wchar_t character.
+
+ charset[l+m+n+2o+6] = char
+ ...
+ charset[l+m+n+2o+p+5] = char
+
+ */
+
+ /* We need at least 6 spaces: the opcode, the length of
+ char_classes, the length of collating_symbols, the length of
+ equivalence_classes, the length of char_ranges, the length of
+ chars. */
+ GET_BUFFER_SPACE (6);
+
+ /* Save b as laststart. And We use laststart as the pointer
+ to the first element of the charset here.
+ In other words, laststart[i] indicates charset[i]. */
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Push the length of char_classes, the length of
+ collating_symbols, the length of equivalence_classes, the
+ length of char_ranges and the length of chars. */
+ BUF_PUSH_3 (0, 0, 0);
+ BUF_PUSH_2 (0, 0);
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-6] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ {
+ BUF_PUSH('\n');
+ laststart[5]++; /* Update the length of characters */
+ }
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ BUF_PUSH(c1);
+ laststart[5]++; /* Update the length of chars */
+ range_start = c1;
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret;
+ /* Allocate the space for range_start and range_end. */
+ GET_BUFFER_SPACE (2);
+ /* Update the pointer to indicate end of buffer. */
+ b += 2;
+ ret = compile_range (range_start, &p, pend, translate,
+ syntax, b, laststart);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+ /* Allocate the space for range_start and range_end. */
+ GET_BUFFER_SPACE (2);
+ /* Update the pointer to indicate end of buffer. */
+ b += 2;
+ ret = compile_range (c, &p, pend, translate, syntax, b,
+ laststart);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but store them as character). */
+ if (c == ':' && *p == ']')
+ {
+ wctype_t wt;
+ uintptr_t alignedp;
+
+ /* Query the character class as wctype_t. */
+ wt = IS_CHAR_CLASS (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Allocate the space for character class. */
+ GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
+ /* Update the pointer to indicate end of buffer. */
+ b += CHAR_CLASS_SIZE;
+ /* Move data which follow character classes
+ not to violate the data. */
+ insert_space(CHAR_CLASS_SIZE,
+ laststart + 6 + laststart[1],
+ b - 1);
+ alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
+ + __alignof__(wctype_t) - 1)
+ & ~(uintptr_t)(__alignof__(wctype_t) - 1);
+ /* Store the character class. */
+ *((wctype_t*)alignedp) = wt;
+ /* Update length of char_classes */
+ laststart[1] += CHAR_CLASS_SIZE;
+
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ BUF_PUSH ('[');
+ BUF_PUSH (':');
+ laststart[5] += 2; /* Update the length of characters */
+ range_start = ':';
+ had_char_class = false;
+ }
+ }
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
+ || *p == '.'))
+ {
+ CHAR_TYPE str[128]; /* Should be large enough. */
+ CHAR_TYPE delim = *p; /* '=' or '.' */
+# ifdef _LIBC
+ uint32_t nrules =
+ _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[=' or '[[.'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == delim && *p == ']') || p == pend)
+ break;
+ if (c1 < sizeof (str) - 1)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ if (c == delim && *p == ']' && str[0] != '\0')
+ {
+ unsigned int i, offset;
+ /* If we have no collation data we use the default
+ collation in which each character is in a class
+ by itself. It also means that ASCII is the
+ character set and therefore we cannot have character
+ with more than one byte in the multibyte
+ representation. */
+
+ /* If not defined _LIBC, we push the name and
+ `\0' for the sake of matching performance. */
+ int datasize = c1 + 1;
+
+# ifdef _LIBC
+ int32_t idx = 0;
+ if (nrules == 0)
+# endif
+ {
+ if (c1 != 1)
+ FREE_STACK_RETURN (REG_ECOLLATE);
+ }
+# ifdef _LIBC
+ else
+ {
+ const int32_t *table;
+ const int32_t *weights;
+ const int32_t *extra;
+ const int32_t *indirect;
+ wint_t *cp;
+
+ /* This #include defines a local function! */
+# include <locale/weightwc.h>
+
+ if(delim == '=')
+ {
+ /* We push the index for equivalence class. */
+ cp = (wint_t*)str;
+
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_TABLEWC);
+ weights = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTWC);
+ extra = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAWC);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTWC);
+
+ idx = findidx ((const wint_t**)&cp);
+ if (idx == 0 || cp < (wint_t*) str + c1)
+ /* This is no valid character. */
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ str[0] = (wchar_t)idx;
+ }
+ else /* delim == '.' */
+ {
+ /* We push collation sequence value
+ for collating symbol. */
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+ int32_t idx;
+ int32_t elem;
+ int32_t second;
+ int32_t hash;
+ char char_str[c1];
+
+ /* We have to convert the name to a single-byte
+ string. This is possible since the names
+ consist of ASCII characters and the internal
+ representation is UCS4. */
+ for (i = 0; i < c1; ++i)
+ char_str[i] = str[i];
+
+ table_size =
+ _NL_CURRENT_WORD (LC_COLLATE,
+ _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+
+ /* Locate the character in the hashing table. */
+ hash = elem_hash (char_str, c1);
+
+ idx = 0;
+ elem = hash % table_size;
+ second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ && c1 == extra[symb_table[2 * elem + 1]]
+ && memcmp (str,
+ &extra[symb_table[2 * elem + 1]
+ + 1], c1) == 0)
+ {
+ /* Yep, this is the entry. */
+ idx = symb_table[2 * elem + 1];
+ idx += 1 + extra[idx];
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+
+ if (symb_table[2 * elem] != 0)
+ {
+ /* Compute the index of the byte sequence
+ in the table. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~4;
+
+ str[0] = (wchar_t) idx + 4;
+ }
+ else if (symb_table[2 * elem] == 0 && c1 == 1)
+ {
+ /* No valid character. Match it as a
+ single byte character. */
+ had_char_class = false;
+ BUF_PUSH(str[0]);
+ /* Update the length of characters */
+ laststart[5]++;
+ range_start = str[0];
+
+ /* Throw away the ] at the end of the
+ collating symbol. */
+ PATFETCH (c);
+ /* exit from the switch block. */
+ continue;
+ }
+ else
+ FREE_STACK_RETURN (REG_ECOLLATE);
+ }
+ datasize = 1;
+ }
+# endif
+ /* Throw away the ] at the end of the equivalence
+ class (or collating symbol). */
+ PATFETCH (c);
+
+ /* Allocate the space for the equivalence class
+ (or collating symbol) (and '\0' if needed). */
+ GET_BUFFER_SPACE(datasize);
+ /* Update the pointer to indicate end of buffer. */
+ b += datasize;
+
+ if (delim == '=')
+ { /* equivalence class */
+ /* Calculate the offset of char_ranges,
+ which is next to equivalence_classes. */
+ offset = laststart[1] + laststart[2]
+ + laststart[3] +6;
+ /* Insert space. */
+ insert_space(datasize, laststart + offset, b - 1);
+
+ /* Write the equivalence_class and \0. */
+ for (i = 0 ; i < datasize ; i++)
+ laststart[offset + i] = str[i];
+
+ /* Update the length of equivalence_classes. */
+ laststart[3] += datasize;
+ had_char_class = true;
+ }
+ else /* delim == '.' */
+ { /* collating symbol */
+ /* Calculate the offset of the equivalence_classes,
+ which is next to collating_symbols. */
+ offset = laststart[1] + laststart[2] + 6;
+ /* Insert space and write the collationg_symbol
+ and \0. */
+ insert_space(datasize, laststart + offset, b-1);
+ for (i = 0 ; i < datasize ; i++)
+ laststart[offset + i] = str[i];
+
+ /* In re_match_2_internal if range_start < -1, we
+ assume -range_start is the offset of the
+ collating symbol which is specified as
+ the character of the range start. So we assign
+ -(laststart[1] + laststart[2] + 6) to
+ range_start. */
+ range_start = -(laststart[1] + laststart[2] + 6);
+ /* Update the length of collating_symbol. */
+ laststart[2] += datasize;
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ BUF_PUSH ('[');
+ BUF_PUSH (delim);
+ laststart[5] += 2; /* Update the length of characters */
+ range_start = delim;
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ BUF_PUSH(c);
+ laststart[5]++; /* Update the length of characters */
+ range_start = c;
+ }
+ }
+
+#else /* not MBS_SUPPORT */
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ range_start = c1;
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (range_start, &p, pend, translate,
+ syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (c, &p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+# if defined _LIBC || WIDE_CHAR_SUPPORT
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_upper = STREQ (str, "upper");
+ wctype_t wt;
+ int ch;
+
+ wt = IS_CHAR_CLASS (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ {
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
+ if (iswctype (btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# endif
+
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+
+ had_char_class = true;
+# else
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ if ( translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+# endif /* libc || wctype.h */
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ range_start = ':';
+ had_char_class = false;
+ }
+ }
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
+ {
+ unsigned char str[MB_LEN_MAX + 1];
+# ifdef _LIBC
+ uint32_t nrules =
+ _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[='. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == '=' && *p == ']') || p == pend)
+ break;
+ if (c1 < MB_LEN_MAX)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ if (c == '=' && *p == ']' && str[0] != '\0')
+ {
+ /* If we have no collation data we use the default
+ collation in which each character is in a class
+ by itself. It also means that ASCII is the
+ character set and therefore we cannot have character
+ with more than one byte in the multibyte
+ representation. */
+# ifdef _LIBC
+ if (nrules == 0)
+# endif
+ {
+ if (c1 != 1)
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Set the bit for the character. */
+ SET_LIST_BIT (str[0]);
+ }
+# ifdef _LIBC
+ else
+ {
+ /* Try to match the byte sequence in `str' against
+ those known to the collate implementation.
+ First find out whether the bytes in `str' are
+ actually from exactly one character. */
+ const int32_t *table;
+ const unsigned char *weights;
+ const unsigned char *extra;
+ const int32_t *indirect;
+ int32_t idx;
+ const unsigned char *cp = str;
+ int ch;
+
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+
+ idx = findidx (&cp);
+ if (idx == 0 || cp < str + c1)
+ /* This is no valid character. */
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Now we have to go throught the whole table
+ and find all characters which have the same
+ first level weight.
+
+ XXX Note that this is not entirely correct.
+ we would have to match multibyte sequences
+ but this is not possible with the current
+ implementation. */
+ for (ch = 1; ch < 256; ++ch)
+ /* XXX This test would have to be changed if we
+ would allow matching multibyte sequences. */
+ if (table[ch] > 0)
+ {
+ int32_t idx2 = table[ch];
+ size_t len = weights[idx2];
+
+ /* Test whether the lenghts match. */
+ if (weights[idx] == len)
+ {
+ /* They do. New compare the bytes of
+ the weight. */
+ size_t cnt = 0;
+
+ while (cnt < len
+ && (weights[idx + 1 + cnt]
+ == weights[idx2 + 1 + cnt]))
+ ++cnt;
+
+ if (cnt == len)
+ /* They match. Mark the character as
+ acceptable. */
+ SET_LIST_BIT (ch);
+ }
+ }
+ }
+# endif
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT ('=');
+ range_start = '=';
+ had_char_class = false;
+ }
+ }
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
+ {
+ unsigned char str[128]; /* Should be large enough. */
+# ifdef _LIBC
+ uint32_t nrules =
+ _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[.'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == '.' && *p == ']') || p == pend)
+ break;
+ if (c1 < sizeof (str))
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ if (c == '.' && *p == ']' && str[0] != '\0')
+ {
+ /* If we have no collation data we use the default
+ collation in which each character is the name
+ for its own class which contains only the one
+ character. It also means that ASCII is the
+ character set and therefore we cannot have character
+ with more than one byte in the multibyte
+ representation. */
+# ifdef _LIBC
+ if (nrules == 0)
+# endif
+ {
+ if (c1 != 1)
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Set the bit for the character. */
+ SET_LIST_BIT (str[0]);
+ range_start = ((const unsigned char *) str)[0];
+ }
+# ifdef _LIBC
+ else
+ {
+ /* Try to match the byte sequence in `str' against
+ those known to the collate implementation.
+ First find out whether the bytes in `str' are
+ actually from exactly one character. */
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+ int32_t idx;
+ int32_t elem;
+ int32_t second;
+ int32_t hash;
+
+ table_size =
+ _NL_CURRENT_WORD (LC_COLLATE,
+ _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+
+ /* Locate the character in the hashing table. */
+ hash = elem_hash (str, c1);
+
+ idx = 0;
+ elem = hash % table_size;
+ second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ && c1 == extra[symb_table[2 * elem + 1]]
+ && memcmp (str,
+ &extra[symb_table[2 * elem + 1]
+ + 1],
+ c1) == 0)
+ {
+ /* Yep, this is the entry. */
+ idx = symb_table[2 * elem + 1];
+ idx += 1 + extra[idx];
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+
+ if (symb_table[2 * elem] == 0)
+ /* This is no valid character. */
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Now add the multibyte character(s) we found
+ to the accept list.
+
+ XXX Note that this is not entirely correct.
+ we would have to match multibyte sequences
+ but this is not possible with the current
+ implementation. Also, we have to match
+ collating symbols, which expand to more than
+ one file, as a whole and not allow the
+ individual bytes. */
+ c1 = extra[idx++];
+ if (c1 == 1)
+ range_start = extra[idx];
+ while (c1-- > 0)
+ {
+ SET_LIST_BIT (extra[idx]);
+ ++idx;
+ }
+ }
+# endif
+ had_char_class = false;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT ('.');
+ range_start = '.';
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ range_start = c;
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+#endif /* MBS_SUPPORT */
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b
+ - COMPILED_BUFFER_VAR + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ US_CHAR_TYPE *inner_group_loc
+ = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (on_failure_jump, begalt,
+ b + 2 + 2 * OFFSET_ADDRESS_SIZE);
+ pending_exact = 0;
+ b += 1 + OFFSET_ADDRESS_SIZE;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ b += 1 + OFFSET_ADDRESS_SIZE;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || (syntax & RE_NO_BK_BRACES))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_EBRACE);
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
+ || ((syntax & RE_NO_BK_BRACES) && c != '}'))
+ FREE_STACK_RETURN (REG_BADBR);
+
+ if (upper_bound < 0)
+ upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ /* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE'
+ instead of 'b + 3'. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (jump, laststart, b + 1
+ + OFFSET_ADDRESS_SIZE);
+ b += 1 + OFFSET_ADDRESS_SIZE;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
+ (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 1 + 2 * OFFSET_ADDRESS_SIZE
+ + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
+ , lower_bound);
+ b += 1 + 2 * OFFSET_ADDRESS_SIZE;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ /* ifdef MBS_SUPPORT, The '1+2*OFFSET_ADDRESS_SIZE'
+ is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
+ of the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 1
+ + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
+ b += 1 + 2 * OFFSET_ADDRESS_SIZE;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart
+ + 2 * OFFSET_ADDRESS_SIZE + 1,
+ upper_bound - 1);
+ b += 1 + 2 * OFFSET_ADDRESS_SIZE;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 1 + 2 * OFFSET_ADDRESS_SIZE;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ FREE_STACK_RETURN (REG_ESUBREG);
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, (regnum_t) c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+#ifdef MBS_SUPPORT
+ /* If last exactn handle binary(or character) and
+ new exactn handle character(or binary). */
+ || is_exactn_bin != is_binary[p - 1 - pattern]
+#endif /* MBS_SUPPORT */
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+#ifdef MBS_SUPPORT
+ /* Is this exactn binary data or character? */
+ is_exactn_bin = is_binary[p - 1 - pattern];
+ if (is_exactn_bin)
+ BUF_PUSH_2 (exactn_bin, 0);
+ else
+ BUF_PUSH_2 (exactn, 0);
+#else
+ BUF_PUSH_2 (exactn, 0);
+#endif /* MBS_SUPPORT */
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN (REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH (succeed);
+
+#ifdef MBS_SUPPORT
+ free (pattern);
+ free (mbs_offset);
+ free (is_binary);
+#endif
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+#ifdef MBS_SUPPORT
+ bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
+#else
+ bufp->used = b - bufp->buffer;
+#endif
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: \n");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+# ifdef emacs
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+# else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+# endif /* not emacs */
+ }
+
+ regex_grow_registers (num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+/* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ US_CHAR_TYPE *loc;
+ int arg;
+{
+ *loc = (US_CHAR_TYPE) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+/* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ US_CHAR_TYPE *loc;
+ int arg1, arg2;
+{
+ *loc = (US_CHAR_TYPE) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+/* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ US_CHAR_TYPE *loc;
+ int arg;
+ US_CHAR_TYPE *end;
+{
+ register US_CHAR_TYPE *pfrom = end;
+ register US_CHAR_TYPE *pto = end + 1 + OFFSET_ADDRESS_SIZE;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+/* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ US_CHAR_TYPE *loc;
+ int arg1, arg2;
+ US_CHAR_TYPE *end;
+{
+ register US_CHAR_TYPE *pfrom = end;
+ register US_CHAR_TYPE *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const CHAR_TYPE *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const CHAR_TYPE *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const CHAR_TYPE *p, *pend;
+ reg_syntax_t syntax;
+{
+ const CHAR_TYPE *next = p;
+ boolean next_backslash = *next == '\\';
+ const CHAR_TYPE *next_next = p + 1 < pend ? p + 1 : 0;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+#ifdef MBS_SUPPORT
+/* This insert space, which size is "num", into the pattern at "loc".
+ "end" must point the end of the allocated buffer. */
+static void
+insert_space (num, loc, end)
+ int num;
+ CHAR_TYPE *loc;
+ CHAR_TYPE *end;
+{
+ register CHAR_TYPE *pto = end;
+ register CHAR_TYPE *pfrom = end - num;
+
+ while (pfrom >= loc)
+ *pto-- = *pfrom--;
+}
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+static reg_errcode_t
+compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
+ char_set)
+ CHAR_TYPE range_start_char;
+ const CHAR_TYPE **p_ptr, *pend;
+ CHAR_TYPE *char_set, *b;
+ RE_TRANSLATE_TYPE translate;
+ reg_syntax_t syntax;
+{
+ const CHAR_TYPE *p = *p_ptr;
+ CHAR_TYPE range_start, range_end;
+ reg_errcode_t ret;
+# ifdef _LIBC
+ uint32_t nrules;
+ uint32_t start_val, end_val;
+# endif
+ if (p == pend)
+ return REG_ERANGE;
+
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
+ _NL_COLLATE_COLLSEQWC);
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+
+ if (range_start_char < -1)
+ {
+ /* range_start is a collating symbol. */
+ int32_t *wextra;
+ /* Retreive the index and get collation sequence value. */
+ wextra = (int32_t*)(extra + char_set[-range_start_char]);
+ start_val = wextra[1 + *wextra];
+ }
+ else
+ start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
+
+ end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
+
+ /* Report an error if the range is empty and the syntax prohibits
+ this. */
+ ret = ((syntax & RE_NO_EMPTY_RANGES)
+ && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
+
+ /* Insert space to the end of the char_ranges. */
+ insert_space(2, b - char_set[5] - 2, b - 1);
+ *(b - char_set[5] - 2) = (wchar_t)start_val;
+ *(b - char_set[5] - 1) = (wchar_t)end_val;
+ char_set[4]++; /* ranges_index */
+ }
+ else
+# endif
+ {
+ range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
+ range_start_char;
+ range_end = TRANSLATE (p[0]);
+ /* Report an error if the range is empty and the syntax prohibits
+ this. */
+ ret = ((syntax & RE_NO_EMPTY_RANGES)
+ && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
+
+ /* Insert space to the end of the char_ranges. */
+ insert_space(2, b - char_set[5] - 2, b - 1);
+ *(b - char_set[5] - 2) = range_start;
+ *(b - char_set[5] - 1) = range_end;
+ char_set[4]++; /* ranges_index */
+ }
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ return ret;
+}
+#else
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
+ unsigned int range_start_char;
+ const char **p_ptr, *pend;
+ RE_TRANSLATE_TYPE translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+ const char *p = *p_ptr;
+ reg_errcode_t ret;
+# if _LIBC
+ const unsigned char *collseq;
+ unsigned int start_colseq;
+ unsigned int end_colseq;
+# else
+ unsigned end_char;
+# endif
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* Report an error if the range is empty and the syntax prohibits this. */
+ ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+# if _LIBC
+ collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_COLLSEQMB);
+
+ start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
+ end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
+ for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
+ {
+ unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
+
+ if (start_colseq <= this_colseq && this_colseq <= end_colseq)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ ret = REG_NOERROR;
+ }
+ }
+# else
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- we would otherwise go into an infinite loop, since all
+ characters <= 0xff. */
+ range_start_char = TRANSLATE (range_start_char);
+ /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
+ and some compilers cast it to int implicitly, so following for_loop
+ may fall to (almost) infinite loop.
+ e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
+ To avoid this, we cast p[0] to unsigned int and truncate it. */
+ end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
+
+ for (this_char = range_start_char; this_char <= end_char; ++this_char)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ ret = REG_NOERROR;
+ }
+# endif
+
+ return ret;
+}
+#endif /* MBS_SUPPORT */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+#ifdef MBS_SUPPORT
+/* local function for re_compile_fastmap.
+ truncate wchar_t character to char. */
+static unsigned char truncate_wchar (CHAR_TYPE c);
+
+static unsigned char
+truncate_wchar (c)
+ CHAR_TYPE c;
+{
+ unsigned char buf[MB_LEN_MAX];
+ int retval = wctomb(buf, c);
+ return retval > 0 ? buf[0] : (unsigned char)c;
+}
+#endif /* MBS_SUPPORT */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+#ifdef MATCH_MAY_ALLOCATE
+ fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+
+ register char *fastmap = bufp->fastmap;
+
+#ifdef MBS_SUPPORT
+ /* We need to cast pattern to (wchar_t*), because we casted this compiled
+ pattern to (char*) in regex_compile. */
+ US_CHAR_TYPE *pattern = (US_CHAR_TYPE*)bufp->buffer;
+ register US_CHAR_TYPE *pend = (US_CHAR_TYPE*) (bufp->buffer + bufp->used);
+#else
+ US_CHAR_TYPE *pattern = bufp->buffer;
+ register US_CHAR_TYPE *pend = pattern + bufp->used;
+#endif /* MBS_SUPPORT */
+ US_CHAR_TYPE *p = pattern;
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (1)
+ {
+ if (p == pend || *p == succeed)
+ {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY ())
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ }
+ else
+ break;
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ goto done;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+#ifdef MBS_SUPPORT
+ case exactn:
+ fastmap[truncate_wchar(p[1])] = 1;
+ break;
+ case exactn_bin:
+ fastmap[p[1]] = 1;
+ break;
+#else
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+#endif /* MBS_SUPPORT */
+
+
+#ifdef MBS_SUPPORT
+ /* It is hard to distinguish fastmap from (multi byte) characters
+ which depends on current locale. */
+ case charset:
+ case charset_not:
+ case wordchar:
+ case notwordchar:
+ bufp->can_be_null = 1;
+ goto done;
+#else
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+#endif
+
+ case anychar:
+ {
+ int fastmap_newline = fastmap['\n'];
+
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ {
+ RESET_FAIL_STACK ();
+ return -2;
+ }
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += OFFSET_ADDRESS_SIZE;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 2 * OFFSET_ADDRESS_SIZE;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 2 * OFFSET_ADDRESS_SIZE;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK ();
+ return 0;
+} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register CHAR_TYPE c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+#endif
+
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+#ifdef MBS_SUPPORT
+/* This converts PTR, a pointer into one of the search wchar_t strings
+ `string1' and `string2' into an multibyte string offset from the
+ beginning of that string. We use mbs_offset to optimize.
+ See convert_mbs_to_wcs. */
+# define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
+ : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
+ + csize1)))
+#else
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+# define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
+#endif /* MBS_SUPPORT */
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#ifdef MBS_SUPPORT
+/* Use internationalized API instead of SYNTAX. */
+# define WORDCHAR_P(d) \
+ (iswalnum ((wint_t)((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0)
+#else
+# define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+#endif /* MBS_SUPPORT */
+
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+
+/* Free everything we malloc. */
+#ifdef MATCH_MAY_ALLOCATE
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+# ifdef MBS_SUPPORT
+# define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ FREE_VAR (string1); \
+ FREE_VAR (string2); \
+ FREE_VAR (mbs_offset1); \
+ FREE_VAR (mbs_offset2); \
+ } while (0)
+# else /* not MBS_SUPPORT */
+# define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+# endif /* MBS_SUPPORT */
+#else
+# define FREE_VAR(var) if (var) free (var); var = NULL
+# ifdef MBS_SUPPORT
+# define FREE_VARIABLES() \
+ do { \
+ FREE_VAR (string1); \
+ FREE_VAR (string2); \
+ FREE_VAR (mbs_offset1); \
+ FREE_VAR (mbs_offset2); \
+ } while (0)
+# else
+# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+# endif /* MBS_SUPPORT */
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+{
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ pos, regs, size);
+# ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+# endif
+ return result;
+}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
+#endif /* not emacs */
+
+static boolean group_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
+ US_CHAR_TYPE *end,
+ register_info_type *reg_info));
+static boolean alt_match_null_string_p _RE_ARGS ((US_CHAR_TYPE *p,
+ US_CHAR_TYPE *end,
+ register_info_type *reg_info));
+static boolean common_op_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
+ US_CHAR_TYPE *end,
+ register_info_type *reg_info));
+static int bcmp_translate _RE_ARGS ((const CHAR_TYPE *s1, const CHAR_TYPE *s2,
+ int len, char *translate));
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca (0);
+# endif
+#endif
+ return result;
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+#ifdef MBS_SUPPORT
+
+static int count_mbs_length PARAMS ((int *, int));
+
+/* This check the substring (from 0, to length) of the multibyte string,
+ to which offset_buffer correspond. And count how many wchar_t_characters
+ the substring occupy. We use offset_buffer to optimization.
+ See convert_mbs_to_wcs. */
+
+static int
+count_mbs_length(offset_buffer, length)
+ int *offset_buffer;
+ int length;
+{
+ int wcs_size;
+
+ /* Check whether the size is valid. */
+ if (length < 0)
+ return -1;
+
+ if (offset_buffer == NULL)
+ return 0;
+
+ for (wcs_size = 0 ; offset_buffer[wcs_size] != -1 ; wcs_size++)
+ {
+ if (offset_buffer[wcs_size] == length)
+ return wcs_size;
+ if (offset_buffer[wcs_size] > length)
+ /* It is a fragment of a wide character. */
+ return -1;
+ }
+
+ /* We reached at the sentinel. */
+ return -1;
+}
+#endif /* MBS_SUPPORT */
+
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+#ifdef MBS_SUPPORT
+re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *cstring1, *cstring2;
+ int csize1, csize2;
+#else
+re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+#endif
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ US_CHAR_TYPE *p1;
+#ifdef MBS_SUPPORT
+ /* We need wchar_t* buffers correspond to string1, string2. */
+ CHAR_TYPE *string1 = NULL, *string2 = NULL;
+ /* We need the size of wchar_t buffers correspond to csize1, csize2. */
+ int size1 = 0, size2 = 0;
+ /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
+ int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
+ /* They hold whether each wchar_t is binary data or not. */
+ char *is_binary = NULL;
+#endif /* MBS_SUPPORT */
+
+ /* Just past the end of the corresponding string. */
+ const CHAR_TYPE *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const CHAR_TYPE *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const CHAR_TYPE *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+#ifdef MBS_SUPPORT
+ US_CHAR_TYPE *pattern, *p;
+ register US_CHAR_TYPE *pend;
+#else
+ US_CHAR_TYPE *p = bufp->buffer;
+ register US_CHAR_TYPE *pend = p + bufp->used;
+#endif /* MBS_SUPPORT */
+
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ US_CHAR_TYPE *just_past_start_mem = 0;
+
+ /* We use this to map every character in the string. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+ static unsigned failure_id;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ size_t num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const CHAR_TYPE **regstart, **regend;
+#endif
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const CHAR_TYPE **old_regstart, **old_regend;
+#endif
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const CHAR_TYPE **best_regstart, **best_regend;
+#endif
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const CHAR_TYPE *match_end = NULL;
+
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
+ /* Used when we pop values we don't care about. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const CHAR_TYPE **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+#ifdef MATCH_MAY_ALLOCATE
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ old_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ old_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ best_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ best_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* MATCH_MAY_ALLOCATE */
+
+ /* The starting position is bogus. */
+#ifdef MBS_SUPPORT
+ if (pos < 0 || pos > csize1 + csize2)
+#else
+ if (pos < 0 || pos > size1 + size2)
+#endif
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+#ifdef MBS_SUPPORT
+ /* Allocate wchar_t array for string1 and string2 and
+ fill them with converted string. */
+ if (csize1 != 0)
+ {
+ string1 = REGEX_TALLOC (csize1 + 1, CHAR_TYPE);
+ mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
+ is_binary = REGEX_TALLOC (csize1 + 1, char);
+ if (!string1 || !mbs_offset1 || !is_binary)
+ {
+ FREE_VAR (string1);
+ FREE_VAR (mbs_offset1);
+ FREE_VAR (is_binary);
+ return -2;
+ }
+ size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
+ mbs_offset1, is_binary);
+ string1[size1] = L'\0'; /* for a sentinel */
+ FREE_VAR (is_binary);
+ }
+ if (csize2 != 0)
+ {
+ string2 = REGEX_TALLOC (csize2 + 1, CHAR_TYPE);
+ mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
+ is_binary = REGEX_TALLOC (csize2 + 1, char);
+ if (!string2 || !mbs_offset2 || !is_binary)
+ {
+ FREE_VAR (string1);
+ FREE_VAR (mbs_offset1);
+ FREE_VAR (string2);
+ FREE_VAR (mbs_offset2);
+ FREE_VAR (is_binary);
+ return -2;
+ }
+ size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
+ mbs_offset2, is_binary);
+ string2[size2] = L'\0'; /* for a sentinel */
+ FREE_VAR (is_binary);
+ }
+
+ /* We need to cast pattern to (wchar_t*), because we casted this compiled
+ pattern to (char*) in regex_compile. */
+ p = pattern = (CHAR_TYPE*)bufp->buffer;
+ pend = (CHAR_TYPE*)(bufp->buffer + bufp->used);
+
+#endif /* MBS_SUPPORT */
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+#ifdef MBS_SUPPORT
+ if (stop <= csize1)
+ {
+ mcnt = count_mbs_length(mbs_offset1, stop);
+ end_match_1 = string1 + mcnt;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ mcnt = count_mbs_length(mbs_offset2, stop-csize1);
+ end_match_2 = string2 + mcnt;
+ }
+ if (mcnt < 0)
+ { /* count_mbs_length return error. */
+ FREE_VARIABLES ();
+ return -1;
+ }
+#else
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+#endif /* MBS_SUPPORT */
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+#ifdef MBS_SUPPORT
+ if (size1 > 0 && pos <= csize1)
+ {
+ mcnt = count_mbs_length(mbs_offset1, pos);
+ d = string1 + mcnt;
+ dend = end_match_1;
+ }
+ else
+ {
+ mcnt = count_mbs_length(mbs_offset2, pos-csize1);
+ d = string2 + mcnt;
+ dend = end_match_2;
+ }
+
+ if (mcnt < 0)
+ { /* count_mbs_length return error. */
+ FREE_VARIABLES ();
+ return -1;
+ }
+#else
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+#endif /* MBS_SUPPORT */
+
+ DEBUG_PRINT1 ("The compiled pattern is:\n");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set || best_match_p)
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ succeed_label:
+ DEBUG_PRINT1 ("Accepting match.\n");
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ }
+ else
+ {
+ /* These braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert (bufp->regs_allocated == REGS_FIXED);
+ }
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+#ifdef MBS_SUPPORT
+ if (MATCHING_IN_FIRST_STRING)
+ regs->end[0] = mbs_offset1 != NULL ?
+ mbs_offset1[d-string1] : 0;
+ else
+ regs->end[0] = csize1 + (mbs_offset2 != NULL ?
+ mbs_offset2[d-string2] : 0);
+#else
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+#endif /* MBS_SUPPORT */
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
+ mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+#ifdef MBS_SUPPORT
+ if (MATCHING_IN_FIRST_STRING)
+ mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
+ else
+ mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
+ csize1;
+ mcnt -= pos;
+#else
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+#endif /* MBS_SUPPORT */
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ FREE_VARIABLES ();
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+ case succeed:
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");
+ goto succeed_label;
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+#ifdef MBS_SUPPORT
+ case exactn_bin:
+#endif
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+#ifdef MBS_SUPPORT
+ if (*d <= 0xff)
+ {
+ if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
+ != (US_CHAR_TYPE) *p++)
+ goto fail;
+ }
+ else
+ {
+ if (*d++ != (CHAR_TYPE) *p++)
+ goto fail;
+ }
+#else
+ if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
+ != (US_CHAR_TYPE) *p++)
+ goto fail;
+#endif /* MBS_SUPPORT */
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (CHAR_TYPE) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register US_CHAR_TYPE c;
+#ifdef MBS_SUPPORT
+ unsigned int i, char_class_length, coll_symbol_length,
+ equiv_class_length, ranges_length, chars_length, length;
+ CHAR_TYPE *workp, *workp2, *charset_top;
+#define WORK_BUFFER_SIZE 128
+ CHAR_TYPE str_buf[WORK_BUFFER_SIZE];
+# ifdef _LIBC
+ uint32_t nrules;
+# endif /* _LIBC */
+#endif /* MBS_SUPPORT */
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+#ifdef MBS_SUPPORT
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif /* _LIBC */
+ charset_top = p - 1;
+ char_class_length = *p++;
+ coll_symbol_length = *p++;
+ equiv_class_length = *p++;
+ ranges_length = *p++;
+ chars_length = *p++;
+ /* p points charset[6], so the address of the next instruction
+ (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
+ where l=length of char_classes, m=length of collating_symbol,
+ n=equivalence_class, o=length of char_range,
+ p'=length of character. */
+ workp = p;
+ /* Update p to indicate the next instruction. */
+ p += char_class_length + coll_symbol_length+ equiv_class_length +
+ 2*ranges_length + chars_length;
+
+ /* match with char_class? */
+ for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
+ {
+ wctype_t wctype;
+ uintptr_t alignedp = ((uintptr_t)workp
+ + __alignof__(wctype_t) - 1)
+ & ~(uintptr_t)(__alignof__(wctype_t) - 1);
+ wctype = *((wctype_t*)alignedp);
+ workp += CHAR_CLASS_SIZE;
+ if (iswctype((wint_t)c, wctype))
+ goto char_set_matched;
+ }
+
+ /* match with collating_symbol? */
+# ifdef _LIBC
+ if (nrules != 0)
+ {
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+
+ for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
+ workp++)
+ {
+ int32_t *wextra;
+ wextra = (int32_t*)(extra + *workp++);
+ for (i = 0; i < *wextra; ++i)
+ if (TRANSLATE(d[i]) != wextra[1 + i])
+ break;
+
+ if (i == *wextra)
+ {
+ /* Update d, however d will be incremented at
+ char_set_matched:, we decrement d here. */
+ d += i - 1;
+ goto char_set_matched;
+ }
+ }
+ }
+ else /* (nrules == 0) */
+# endif
+ /* If we can't look up collation data, we use wcscoll
+ instead. */
+ {
+ for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
+ {
+ const CHAR_TYPE *backup_d = d, *backup_dend = dend;
+ length = wcslen(workp);
+
+ /* If wcscoll(the collating symbol, whole string) > 0,
+ any substring of the string never match with the
+ collating symbol. */
+ if (wcscoll(workp, d) > 0)
+ {
+ workp += length + 1;
+ continue;
+ }
+
+ /* First, we compare the collating symbol with
+ the first character of the string.
+ If it don't match, we add the next character to
+ the compare buffer in turn. */
+ for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
+ {
+ int match;
+ if (d == dend)
+ {
+ if (dend == end_match_2)
+ break;
+ d = string2;
+ dend = end_match_2;
+ }
+
+ /* add next character to the compare buffer. */
+ str_buf[i] = TRANSLATE(*d);
+ str_buf[i+1] = '\0';
+
+ match = wcscoll(workp, str_buf);
+ if (match == 0)
+ goto char_set_matched;
+
+ if (match < 0)
+ /* (str_buf > workp) indicate (str_buf + X > workp),
+ because for all X (str_buf + X > str_buf).
+ So we don't need continue this loop. */
+ break;
+
+ /* Otherwise(str_buf < workp),
+ (str_buf+next_character) may equals (workp).
+ So we continue this loop. */
+ }
+ /* not matched */
+ d = backup_d;
+ dend = backup_dend;
+ workp += length + 1;
+ }
+ }
+ /* match with equivalence_class? */
+# ifdef _LIBC
+ if (nrules != 0)
+ {
+ const CHAR_TYPE *backup_d = d, *backup_dend = dend;
+ /* Try to match the equivalence class against
+ those known to the collate implementation. */
+ const int32_t *table;
+ const int32_t *weights;
+ const int32_t *extra;
+ const int32_t *indirect;
+ int32_t idx, idx2;
+ wint_t *cp;
+ size_t len;
+
+ /* This #include defines a local function! */
+# include <locale/weightwc.h>
+
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
+ weights = (const wint_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
+ extra = (const wint_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
+
+ /* Write 1 collating element to str_buf, and
+ get its index. */
+ idx2 = 0;
+
+ for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
+ {
+ cp = (wint_t*)str_buf;
+ if (d == dend)
+ {
+ if (dend == end_match_2)
+ break;
+ d = string2;
+ dend = end_match_2;
+ }
+ str_buf[i] = TRANSLATE(*(d+i));
+ str_buf[i+1] = '\0'; /* sentinel */
+ idx2 = findidx ((const wint_t**)&cp);
+ }
+
+ /* Update d, however d will be incremented at
+ char_set_matched:, we decrement d here. */
+ d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
+ if (d >= dend)
+ {
+ if (dend == end_match_2)
+ d = dend;
+ else
+ {
+ d = string2;
+ dend = end_match_2;
+ }
+ }
+
+ len = weights[idx2];
+
+ for (workp2 = workp + equiv_class_length ; workp < workp2 ;
+ workp++)
+ {
+ idx = (int32_t)*workp;
+ /* We already checked idx != 0 in regex_compile. */
+
+ if (idx2 != 0 && len == weights[idx])
+ {
+ int cnt = 0;
+ while (cnt < len && (weights[idx + 1 + cnt]
+ == weights[idx2 + 1 + cnt]))
+ ++cnt;
+
+ if (cnt == len)
+ goto char_set_matched;
+ }
+ }
+ /* not matched */
+ d = backup_d;
+ dend = backup_dend;
+ }
+ else /* (nrules == 0) */
+# endif
+ /* If we can't look up collation data, we use wcscoll
+ instead. */
+ {
+ for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
+ {
+ const CHAR_TYPE *backup_d = d, *backup_dend = dend;
+ length = wcslen(workp);
+
+ /* If wcscoll(the collating symbol, whole string) > 0,
+ any substring of the string never match with the
+ collating symbol. */
+ if (wcscoll(workp, d) > 0)
+ {
+ workp += length + 1;
+ break;
+ }
+
+ /* First, we compare the equivalence class with
+ the first character of the string.
+ If it don't match, we add the next character to
+ the compare buffer in turn. */
+ for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
+ {
+ int match;
+ if (d == dend)
+ {
+ if (dend == end_match_2)
+ break;
+ d = string2;
+ dend = end_match_2;
+ }
+
+ /* add next character to the compare buffer. */
+ str_buf[i] = TRANSLATE(*d);
+ str_buf[i+1] = '\0';
+
+ match = wcscoll(workp, str_buf);
+
+ if (match == 0)
+ goto char_set_matched;
+
+ if (match < 0)
+ /* (str_buf > workp) indicate (str_buf + X > workp),
+ because for all X (str_buf + X > str_buf).
+ So we don't need continue this loop. */
+ break;
+
+ /* Otherwise(str_buf < workp),
+ (str_buf+next_character) may equals (workp).
+ So we continue this loop. */
+ }
+ /* not matched */
+ d = backup_d;
+ dend = backup_dend;
+ workp += length + 1;
+ }
+ }
+
+ /* match with char_range? */
+#ifdef _LIBC
+ if (nrules != 0)
+ {
+ uint32_t collseqval;
+ const char *collseq = (const char *)
+ _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+
+ collseqval = collseq_table_lookup (collseq, c);
+
+ for (; workp < p - chars_length ;)
+ {
+ uint32_t start_val, end_val;
+
+ /* We already compute the collation sequence value
+ of the characters (or collating symbols). */
+ start_val = (uint32_t) *workp++; /* range_start */
+ end_val = (uint32_t) *workp++; /* range_end */
+
+ if (start_val <= collseqval && collseqval <= end_val)
+ goto char_set_matched;
+ }
+ }
+ else
+#endif
+ {
+ /* We set range_start_char at str_buf[0], range_end_char
+ at str_buf[4], and compared char at str_buf[2]. */
+ str_buf[1] = 0;
+ str_buf[2] = c;
+ str_buf[3] = 0;
+ str_buf[5] = 0;
+ for (; workp < p - chars_length ;)
+ {
+ wchar_t *range_start_char, *range_end_char;
+
+ /* match if (range_start_char <= c <= range_end_char). */
+
+ /* If range_start(or end) < 0, we assume -range_start(end)
+ is the offset of the collating symbol which is specified
+ as the character of the range start(end). */
+
+ /* range_start */
+ if (*workp < 0)
+ range_start_char = charset_top - (*workp++);
+ else
+ {
+ str_buf[0] = *workp++;
+ range_start_char = str_buf;
+ }
+
+ /* range_end */
+ if (*workp < 0)
+ range_end_char = charset_top - (*workp++);
+ else
+ {
+ str_buf[4] = *workp++;
+ range_end_char = str_buf + 4;
+ }
+
+ if (wcscoll(range_start_char, str_buf+2) <= 0 &&
+ wcscoll(str_buf+2, range_end_char) <= 0)
+
+ goto char_set_matched;
+ }
+ }
+
+ /* match with char? */
+ for (; workp < p ; workp++)
+ if (c == *workp)
+ goto char_set_matched;
+
+ not = !not;
+
+ char_set_matched:
+ if (not) goto fail;
+#else
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+#undef WORK_BUFFER_SIZE
+#endif /* MBS_SUPPORT */
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
+ (long int) *p, (long int) p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ just_past_start_mem = p;
+
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
+ (long int) *p, (long int) p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ US_CHAR_TYPE r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += OFFSET_ADDRESS_SIZE;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
+ && p1[2+OFFSET_ADDRESS_SIZE] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
+ r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if (old_regend[r] >= regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const CHAR_TYPE *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : memcmp (d, d2, mcnt*sizeof(US_CHAR_TYPE)))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED ();
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+#endif
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+#endif
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register US_CHAR_TYPE *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
+ pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+#ifdef MBS_SUPPORT
+ || (re_opcode_t) *p2 == exactn_bin
+#endif
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register US_CHAR_TYPE c
+ = *p2 == (US_CHAR_TYPE) endline ? '\n' : p2[2];
+
+ if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
+#ifdef MBS_SUPPORT
+ || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
+#endif
+ ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
+ {
+ p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
+ pop_failure_jump;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX != 1)
+ DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
+ (wint_t) c,
+ (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
+ else
+#endif
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ (char) c,
+ (char) p1[3+OFFSET_ADDRESS_SIZE]);
+ }
+
+#ifndef MBS_SUPPORT
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+#endif /* not MBS_SUPPORT */
+ }
+#ifndef MBS_SUPPORT
+ else if ((re_opcode_t) *p2 == charset)
+ {
+ /* We win if the first character of the loop is not part
+ of the charset. */
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
+ && (p2[2 + p1[5] / BYTEWIDTH]
+ & (1 << (p1[5] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < (int) p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+#endif /* not MBS_SUPPORT */
+ }
+ p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (US_CHAR_TYPE) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ active_reg_t dummy_low_reg, dummy_high_reg;
+ US_CHAR_TYPE *pdummy = NULL;
+ const CHAR_TYPE *sdummy = NULL;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+ unconditional_jump:
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+ /* Note fall through. */
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+#ifdef _LIBC
+ DEBUG_PRINT2 ("(to %p).\n", p);
+#else
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+#endif
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += OFFSET_ADDRESS_SIZE;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
+ , mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
+ , mcnt);
+#endif
+ }
+ else if (mcnt == 0)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
+ p + OFFSET_ADDRESS_SIZE);
+#else
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
+ p + OFFSET_ADDRESS_SIZE);
+#endif /* _LIBC */
+
+#ifdef MBS_SUPPORT
+ p[1] = (US_CHAR_TYPE) no_op;
+#else
+ p[2] = (US_CHAR_TYPE) no_op;
+ p[3] = (US_CHAR_TYPE) no_op;
+#endif /* MBS_SUPPORT */
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
+
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
+ mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
+ mcnt);
+#endif /* _LIBC */
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 2 * OFFSET_ADDRESS_SIZE;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+#endif
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+#if 0
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+ macro and introducing temporary variables works around the bug. */
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+#else
+ case wordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ break;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ break;
+ goto fail;
+ }
+
+ case notwordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ goto fail;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ goto fail;
+ break;
+ }
+#endif
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ US_CHAR_TYPE **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ US_CHAR_TYPE *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
+ jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt -
+ (1 + OFFSET_ADDRESS_SIZE),
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
+ jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 1 + OFFSET_ADDRESS_SIZE;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ US_CHAR_TYPE *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ US_CHAR_TYPE *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ US_CHAR_TYPE **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ US_CHAR_TYPE *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += OFFSET_ADDRESS_SIZE;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 2 * OFFSET_ADDRESS_SIZE;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 2 * OFFSET_ADDRESS_SIZE;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ const CHAR_TYPE *s1, *s2;
+ register int len;
+ RE_TRANSLATE_TYPE translate;
+{
+ register const US_CHAR_TYPE *p1 = (const US_CHAR_TYPE *) s1;
+ register const US_CHAR_TYPE *p2 = (const US_CHAR_TYPE *) s2;
+ while (len)
+ {
+#ifdef MBS_SUPPORT
+ if (((*p1<=0xff)?translate[*p1++]:*p1++)
+ != ((*p2<=0xff)?translate[*p2++]:*p2++))
+ return 1;
+#else
+ if (translate[*p1++] != translate[*p2++]) return 1;
+#endif /* MBS_SUPPORT */
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ if (!ret)
+ return NULL;
+ return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec below without link errors. */
+weak_function
+#endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return (char *) gettext (re_error_msgid
+ + re_error_msgid_idx[(int) REG_ESPACE]);
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (re_error_msgid
+ + re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
+}
+
+
+int
+#ifdef _LIBC
+weak_function
+#endif
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occurred while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
+ return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (int) (sizeof (re_error_msgid_idx)
+ / sizeof (re_error_msgid_idx[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+#endif /* not emacs */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.c b/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.c
new file mode 100644
index 00000000..566b1fc0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.c
@@ -0,0 +1,5742 @@
+/* LGPLd GNU regex for Native WIN32 */
+
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 2003 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/* Added June 2003 Neal Richter, RightNow Technologies */
+
+/* note that this version is significantly different from the original */
+/* version 0.12 GNU source code. It compiles and works on Native WIN32. */
+
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
+ internationalization features.)
+
+ Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#if defined(_WIN32)
+#pragma warning(disable: 4018 4101)
+#endif
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+#pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#if defined(LINUX)
+#define STDC_HEADERS
+#endif
+
+#if defined(STDC_HEADERS) && !defined(emacs)
+#include <stddef.h>
+#else
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+#endif
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+# include <wctype.h>
+# include <wchar.h>
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined (_LIBC)
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+#define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+#else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+#undef REL_ALLOC
+
+#if defined (STDC_HEADERS) || defined (_LIBC) || defined(_WIN32)
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+void free();
+#endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+#ifdef INHIBIT_STRING_HEADER
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
+#if !defined (bzero) && !defined (bcopy)
+#undef INHIBIT_STRING_HEADER
+#endif
+#endif
+#endif
+
+#include <string.h>
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+#ifndef INHIBIT_STRING_HEADER
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) || defined (_WIN32)
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+#endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SWITCH_ENUM_BUG
+#define SWITCH_ENUM_CAST(x) ((int)(x))
+#else
+#define SWITCH_ENUM_CAST(x) (x)
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+/* #include "regex.h" */
+#include "regex_win32.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding." */
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#if defined(REGEX_MALLOC) || defined(_WIN32)
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE free
+#define REGEX_MALLOC
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+/* No need to do anything to free, after alloca. */
+#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+
+#define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+#define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE_STACK malloc
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE_STACK free
+
+#else /* not REGEX_MALLOC */
+
+#define REGEX_ALLOCATE_STACK alloca
+
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+#define REGEX_FREE_STACK(arg)
+
+#endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+static int
+re_match_2_internal(struct re_pattern_buffer *bufp,
+ const char *string1,
+ int size1,
+ const char *string2,
+ int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop);
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number_and_incr _RE_ARGS ((int *destination,
+ unsigned char **source));
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ putchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ putchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p1;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ printf ("%d:\t", p - start);
+
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ putchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf ("/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p
+ && (p[1 + (c/8)] & (1 << (c % 8))))
+ {
+ /* Are we starting a range? */
+ if (last + 1 == c && ! in_range)
+ {
+ putchar ('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range)
+ {
+ putchar (last);
+ in_range = 0;
+ }
+
+ if (! in_range)
+ putchar (c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar (last);
+
+ putchar (']');
+
+ p += 1 + *p;
+ }
+ break;
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump to %d", p + mcnt - start);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt to %d", p + mcnt - start);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump to %d", p + mcnt - start);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+
+ putchar ('\n');
+ }
+
+ printf ("%d:\tend of pattern.\n", p - start);
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%ld bytes used/%ld bytes allocated.\n",
+ bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %lx\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ int this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ putchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ putchar (string2[this_char]);
+ }
+}
+
+void
+printchar (c)
+ int c;
+{
+ putc (c, stderr);
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax(reg_syntax_t syntax)
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+#ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+#endif /* DEBUG */
+ return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char *re_error_msgid[] =
+{
+ gettext_noop ("Success"), /* REG_NOERROR */
+ gettext_noop ("No match"), /* REG_NOMATCH */
+ gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
+ gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
+ gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
+ gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
+ gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
+ gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
+ gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
+ gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
+ gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
+ gettext_noop ("Invalid range end"), /* REG_ERANGE */
+ gettext_noop ("Memory exhausted"), /* REG_ESPACE */
+ gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
+ gettext_noop ("Premature end of regular expression"), /* REG_EEND */
+ gettext_noop ("Regular expression too big"), /* REG_ESIZE */
+ gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
+};
+
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+#undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
+#undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+#if defined (MATCH_MAY_ALLOCATE)
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+static long int re_max_failures = 4000;
+#else
+static long int re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ long int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
+#if defined (MATCH_MAY_ALLOCATE)
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+static int re_max_failures = 20000;
+#else
+static int re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#endif /* INT_IS_16BIT */
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_INT
+#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ s_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+/* We used to use (num_regs - 1), which is the number of registers
+ this regexp will save; but that was changed to 5
+ to avoid stack overflow for a regexp with lots of parens. */
+#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ s_reg_t this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
+static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2));
+static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg, unsigned char *end));
+static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2, unsigned char *end));
+static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
+ reg_syntax_t syntax));
+static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
+ reg_syntax_t syntax));
+static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
+ const char *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ unsigned char *b));
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#ifndef TRANSLATE
+#define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (int) ((to) - (loc) - 3))
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+#if defined(_MSC_VER) && !defined(_WIN32)
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+#define MAX_BUF_SIZE 65500L
+#define REALLOC(p,s) realloc ((p), (size_t) (s))
+#else
+#define MAX_BUF_SIZE (1L << 16)
+#define REALLOC(p,s) realloc ((p), (s))
+#endif
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# define IS_CHAR_CLASS(string) wctype (string)
+#else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static
+regex_grow_registers (num_regs)
+ int num_regs;
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (reg_info, num_regs, register_info_type);
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
+ compile_stack,
+ regnum_t regnum));
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
+static reg_errcode_t
+regex_compile (const char *pattern,
+ size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp)
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random temporary spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ putchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_upper = STREQ (str, "upper");
+ wctype_t wt;
+ int ch;
+
+ wt = wctype (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ {
+ if (iswctype (btowc (ch), wt))
+ SET_LIST_BIT (ch);
+
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+
+ had_char_class = true;
+#else
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ if ( translate && (is_upper || is_lower)
+ && (ISUPPER (ch) || ISLOWER (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+#endif /* libc || wctype.h */
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY) {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY) {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_EBRACE);
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ if (re_syntax_options & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ FREE_STACK_RETURN (REG_ESUBREG);
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, (regnum_t) c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN (REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH (succeed);
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: \n");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+#ifdef emacs
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#endif /* not emacs */
+ }
+
+ regex_grow_registers (num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (re_opcode_t op,
+ unsigned char *loc,
+ int arg)
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2(re_opcode_t op,
+ unsigned char *loc,
+ int arg1,
+ int arg2)
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1(re_opcode_t op,
+ unsigned char *loc,
+ int arg,
+ unsigned char *end)
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2(re_opcode_t op,
+ unsigned char *loc,
+ int arg1,
+ int arg2,
+ unsigned char *end)
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p(const char *pattern,
+ const char *p,
+ reg_syntax_t syntax)
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p(const char *p,
+ const char *pend,
+ reg_syntax_t syntax)
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack(compile_stack_type compile_stack,
+ regnum_t regnum)
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range(const char **p_ptr,
+ const char *pend,
+ RE_TRANSLATE_TYPE translate,
+ reg_syntax_t syntax,
+ unsigned char *b)
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ unsigned int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
+ range_start = ((const unsigned char *) p)[-2];
+ range_end = ((const unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap(struct re_pattern_buffer *bufp)
+{
+ int j, k;
+#ifdef MATCH_MAY_ALLOCATE
+ fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned char *p = pattern;
+ register unsigned char *pend = pattern + bufp->used;
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (1)
+ {
+ if (p == pend || *p == succeed)
+ {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY ())
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ }
+ else
+ break;
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ goto done;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ {
+ int fastmap_newline = fastmap['\n'];
+
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ {
+ RESET_FAIL_STACK ();
+ return -2;
+ }
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK ();
+ return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers(struct re_pattern_buffer *bufp,
+ struct re_registers *regs,
+ unsigned num_regs,
+ regoff_t *starts,
+ regoff_t *ends)
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search(struct re_pattern_buffer *bufp,
+ const char *string,
+ int size,
+ int startpos,
+ int range,
+ struct re_registers *regs)
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2(struct re_pattern_buffer *bufp,
+ const char *string1,
+ int size1,
+ const char *string2,
+ int size2,
+ int startpos,
+ int range,
+ struct re_registers *regs,
+ int stop)
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+
+/* Free everything we malloc. */
+#ifdef MATCH_MAY_ALLOCATE
+#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR ((void*) regstart); \
+ FREE_VAR ((void*) regend); \
+ FREE_VAR ((void*) old_regstart); \
+ FREE_VAR ((void*) old_regend); \
+ FREE_VAR ((void*) best_regstart); \
+ FREE_VAR ((void*) best_regend); \
+ FREE_VAR ((void*) reg_info); \
+ FREE_VAR ((void*) reg_dummy); \
+ FREE_VAR ((void*) reg_info_dummy); \
+ } while (0)
+#else
+#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match(struct re_pattern_buffer *bufp,
+ const char *string,
+ int size,
+ int pos,
+ struct re_registers *regs)
+{
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ pos, regs, size);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+ return result;
+}
+#endif /* not emacs */
+
+static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
+ int len, char *translate));
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2(struct re_pattern_buffer *bufp,
+ const char *string1,
+ int size1,
+ const char *string2,
+ int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop)
+{
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+ return result;
+}
+
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+re_match_2_internal(struct re_pattern_buffer *bufp,
+ const char *string1,
+ int size1,
+ const char *string2,
+ int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop)
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ unsigned char *just_past_start_mem = 0;
+
+ /* We use this to map every character in the string. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ size_t num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **regstart, **regend;
+#endif
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **old_regstart, **old_regend;
+#endif
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **best_regstart, **best_regend;
+#endif
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
+ /* Used when we pop values we don't care about. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+#ifdef MATCH_MAY_ALLOCATE
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* MATCH_MAY_ALLOCATE */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is:\n");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set || best_match_p)
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ succeed_label:
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+ }
+ else
+ {
+ /* These braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert (bufp->regs_allocated == REGS_FIXED);
+ }
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
+ mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ FREE_VARIABLES ();
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+ case succeed:
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");
+ goto succeed_label;
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ just_past_start_mem = p;
+
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
+ r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if (old_regend[r] >= regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED ();
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+#endif
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+#endif
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ else if ((re_opcode_t) *p2 == charset)
+ {
+#ifdef DEBUG
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+#endif
+
+#if 0
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
+ && (p2[2 + p1[5] / BYTEWIDTH]
+ & (1 << (p1[5] % BYTEWIDTH)))))
+#else
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
+ && (p2[2 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+#endif
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < (int) p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ active_reg_t dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+ unconditional_jump:
+#ifdef _LIBC
+ DEBUG_PRINT2 ("\n%p: ", p);
+#else
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+#endif
+ /* Note fall through. */
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+#ifdef _LIBC
+ DEBUG_PRINT2 ("(to %p).\n", p);
+#else
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+#endif
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
+#endif
+ }
+ else if (mcnt == 0)
+ {
+#ifdef _LIBC
+ DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
+#else
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+#endif
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
+#endif
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
+#else
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+#endif
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+#if 0
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+ macro and introducing temporary variables works around the bug. */
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+#else
+ case wordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ break;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ break;
+ goto fail;
+ }
+
+ case notwordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ goto fail;
+
+ prevchar = WORDCHAR_P (d - 1);
+ thischar = WORDCHAR_P (d);
+ if (prevchar != thischar)
+ goto fail;
+ break;
+ }
+#endif
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p(unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info)
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p(unsigned char *p,
+ unsigned char *end,
+ register_info_type *reg_info)
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p(unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info)
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate(const char *s1,
+ const char *s2,
+ register int len,
+ RE_TRANSLATE_TYPE translate)
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern(const char *pattern,
+ size_t length,
+ struct re_pattern_buffer *bufp)
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ if (!ret)
+ return NULL;
+ return gettext (re_error_msgid[(int) ret]);
+}
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined (_REGEX_RE_COMP) || defined (_LIBC)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec below without link errors. */
+weak_function
+#endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (re_error_msgid[(int) ret]);
+}
+
+
+int
+#ifdef _LIBC
+weak_function
+#endif
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp(regex_t *preg,
+ const char *pattern,
+ int cflags)
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec(const regex_t *preg,
+ const char *string,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ int eflags)
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror(int errcode,
+ const regex_t *preg,
+ char *errbuf,
+ size_t errbuf_size)
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (int) (sizeof (re_error_msgid)
+ / sizeof (re_error_msgid[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (re_error_msgid[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree(regex_t *preg)
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.h b/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.h
new file mode 100644
index 00000000..8471cba0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/regex_win32.h
@@ -0,0 +1,548 @@
+/* LGPLd GNU regex for Native WIN32 */
+
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 2003 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+/* Added June 2003 Neal Richter, RightNow Technologies */
+
+/* note that this version is significantly different from the original */
+/* version 0.12 GNU source code. It compiles and works on Native WIN32. */
+
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+ Copyright (C) 1985,89,90,91,92,93,95,96,97 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+ /* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+ /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+ /* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+ typedef long int s_reg_t;
+ typedef unsigned long int active_reg_t;
+
+ /* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+ typedef unsigned long int reg_syntax_t;
+
+ /* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+ /* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+ /* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+ /* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+ /* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+ /* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+ /* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+ /* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+ /* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+ /* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+ /* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+ /* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+ /* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+ /* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+ /* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+ /* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+ /* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+ /* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+ /* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+ /* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+ /* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+ /* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+ extern reg_syntax_t re_syntax_options;
+
+ /* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+ /* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+ /* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+ /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+ /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ /* [[[end syntaxes]]] */
+
+ /* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+ /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+ /* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+ /* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+ /* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+ /* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+ /* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+ /* POSIX `eflags' bits (i.e., information for regexec). */
+
+ /* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+ /* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+ /* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+ typedef enum
+ {
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+ } reg_errcode_t;
+
+ /* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#endif
+
+ struct re_pattern_buffer
+ {
+ /* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+ /* [[[end pattern_buffer]]] */
+ };
+
+ typedef struct re_pattern_buffer regex_t;
+
+ /* Type for byte offsets within the string. POSIX mandates this. */
+ typedef int regoff_t;
+
+
+ /* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+ struct re_registers
+ {
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+ };
+
+
+ /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+ /* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+ typedef struct
+ {
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+ } regmatch_t;
+
+ /* Declarations for routines. */
+
+ /* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if defined(__STDC__) || defined(_WIN32)
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+ /* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+ extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+ /* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+ extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+ /* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+ /* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+ extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+ /* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+ extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+ /* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+ extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+ /* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+ extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+ extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#ifdef _REGEX_RE_COMP
+#ifndef _CRAY
+ /* 4.2 bsd compatibility. */
+ extern char *re_comp _RE_ARGS ((const char *));
+ extern int re_exec _RE_ARGS ((const char *));
+#endif
+#endif
+
+ /* POSIX compatibility. */
+ int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+ int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+ size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+ void regfree _RE_ARGS ((regex_t *preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+ Local variables:
+ make-backup-files: t
+ version-control: t
+ trim-versions-without-asking: nil
+ End:
+*/
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/snprintf.c b/debian/htdig/htdig-3.2.0b6/htlib/snprintf.c
new file mode 100644
index 00000000..7986023d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/snprintf.c
@@ -0,0 +1,75 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_SNPRINTF
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#endif
+
+/*
+ * snprintf --
+ * Bounded version of sprintf.
+ *
+ * PUBLIC: #ifndef HAVE_SNPRINTF
+ * PUBLIC: #ifdef __STDC__
+ * PUBLIC: int snprintf __P((char *, size_t, const char *, ...));
+ * PUBLIC: #else
+ * PUBLIC: int snprintf();
+ * PUBLIC: #endif
+ * PUBLIC: #endif
+ */
+int
+#ifdef __STDC__
+snprintf(char *str, size_t n, const char *fmt, ...)
+#else
+snprintf(str, n, fmt, va_alist)
+ char *str;
+ size_t n;
+ const char *fmt;
+ va_dcl
+#endif
+{
+ va_list ap;
+ int rval;
+
+ n = 0;
+#ifdef __STDC__
+ va_start(ap, fmt);
+#else
+ va_start(ap);
+#endif
+#ifdef SPRINTF_RET_CHARPNT
+ (void)vsprintf(str, fmt, ap);
+ va_end(ap);
+ return (strlen(str));
+#else
+ rval = vsprintf(str, fmt, ap);
+ va_end(ap);
+ return (rval);
+#endif
+}
+#endif /* HAVE_SNPRINTF */
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/strcasecmp.cc b/debian/htdig/htdig-3.2.0b6/htlib/strcasecmp.cc
new file mode 100644
index 00000000..1dfa74e2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/strcasecmp.cc
@@ -0,0 +1,101 @@
+//
+// strcasecmp.cc
+//
+// strcasecmp: replacement of the strcasecmp functions for architectures that do
+// not have it.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: strcasecmp.cc,v 1.10 2004/05/28 13:15:22 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "lib.h"
+#include <ctype.h>
+
+//*****************************************************************************
+//
+int mystrcasecmp(const char *str1, const char *str2)
+{
+ if (!str1 && !str2)
+ return 0;
+ if (!str1)
+ return 1;
+ if (!str2)
+ return -1;
+ while (*str1 &&
+ *str2 &&
+ tolower((unsigned char)*str1) == tolower((unsigned char)*str2))
+ {
+ str1++;
+ str2++;
+ }
+
+ return tolower((unsigned char)*str1) - tolower((unsigned char)*str2);
+}
+
+
+//#define tolower(ch) (isupper(ch) ? (ch) + 'a' - 'A' : (ch))
+//*****************************************************************************
+//
+int mystrncasecmp(const char *str1, const char *str2, int n)
+{
+ if (!str1 && !str2)
+ return 0;
+ if (!str1)
+ return 1;
+ if (!str2)
+ return -1;
+ if (n < 0)
+ return 0;
+ while (n &&
+ *str1 &&
+ *str2 &&
+ tolower((unsigned char)*str1) == tolower((unsigned char)*str2))
+ {
+ str1++;
+ str2++;
+ n--;
+ }
+
+ return n == 0 ? 0 :
+ tolower((unsigned char)*str1) - tolower((unsigned char)*str2);
+}
+
+
+//*****************************************************************************
+// char *strdup(char *str)
+//
+char *strdup(char *str)
+{
+ char *p = new char[strlen(str) + 1];
+ strcpy(p, str);
+ return p;
+}
+
+
+//*****************************************************************************
+// char *mystrcasestr(const char *s, const char *pattern)
+//
+const char *
+mystrcasestr(const char *s, const char *pattern)
+{
+ int length = strlen(pattern);
+
+ while (*s)
+ {
+ if (mystrncasecmp(s, pattern, length) == 0)
+ return s;
+ s++;
+ }
+ return 0;
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/strerror.c b/debian/htdig/htdig-3.2.0b6/htlib/strerror.c
new file mode 100644
index 00000000..761dd7b0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/strerror.c
@@ -0,0 +1,86 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_STRERROR
+
+/*
+ * strerror --
+ * Return the string associated with an errno.
+ *
+ * PUBLIC: #ifndef HAVE_STRERROR
+ * PUBLIC: char *strerror __P((int));
+ * PUBLIC: #endif
+ */
+char *
+strerror(num)
+ int num;
+{
+ extern int sys_nerr;
+ extern char *sys_errlist[];
+#undef UPREFIX
+#define UPREFIX "Unknown error: "
+ static char ebuf[40] = UPREFIX; /* 64-bit number + slop */
+ int errnum;
+ char *p, *t, tmp[40];
+
+ errnum = num; /* convert to unsigned */
+ if (errnum < sys_nerr)
+ return(sys_errlist[errnum]);
+
+ /* Do this by hand, so we don't include stdio(3). */
+ t = tmp;
+ do {
+ *t++ = "0123456789"[errnum % 10];
+ } while (errnum /= 10);
+ for (p = ebuf + sizeof(UPREFIX) - 1;;) {
+ *p++ = *--t;
+ if (t <= tmp)
+ break;
+ }
+ return(ebuf);
+}
+
+#endif /* HAVE_STRERROR */
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/strptime.cc b/debian/htdig/htdig-3.2.0b6/htlib/strptime.cc
new file mode 100644
index 00000000..5cd28751
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/strptime.cc
@@ -0,0 +1,377 @@
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+
+/*
+ * Copyright (c) 1994 Powerdog Industries. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgement:
+ * This product includes software developed by Powerdog Industries.
+ * 4. The name of Powerdog Industries may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef lint
+/*
+static char copyright[] =
+"@(#) Copyright (c) 1994 Powerdog Industries. All rights reserved.";
+static char sccsid[] = "@(#)strptime.c 1.0 (Powerdog) 94/03/27";
+*/
+#endif /* not lint */
+
+#include <lib.h>
+
+#include <time.h>
+#include <ctype.h>
+#include <locale.h>
+#include <string.h>
+
+#define asizeof(a) ((int)(sizeof (a) / sizeof ((a)[0])))
+
+struct mydtconv {
+ char *abbrev_month_names[12];
+ char *month_names[12];
+ char *abbrev_weekday_names[7];
+ char *weekday_names[7];
+ char *time_format;
+ char *sdate_format;
+ char *dtime_format;
+ char *am_string;
+ char *pm_string;
+ char *ldate_format;
+};
+
+static struct mydtconv En_US = {
+ { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" },
+ { "January", "February", "March", "April",
+ "May", "June", "July", "August",
+ "September", "October", "November", "December" },
+ { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" },
+ { "Sunday", "Monday", "Tuesday", "Wednesday",
+ "Thursday", "Friday", "Saturday" },
+ "%H:%M:%S",
+ "%m/%d/%y",
+ "%a %b %e %T %Z %Y",
+ "AM",
+ "PM",
+ "%A, %B, %e, %Y"
+};
+
+char *
+mystrptime(const char *buf, const char *fmt, struct tm *tm)
+{
+ char c;
+ const char *ptr;
+ int i, len = 0;
+
+ ptr = fmt;
+ while (*ptr != 0)
+ {
+ if (*buf == 0)
+ break;
+
+ c = *ptr++;
+
+ if (c != '%')
+ {
+ if (isspace(c))
+ while (*buf != 0 && isspace(*buf))
+ buf++;
+ else if (c != *buf++)
+ return 0;
+ continue;
+ }
+
+ c = *ptr++;
+ switch (c)
+ {
+ case 0:
+ case '%':
+ if (*buf++ != '%')
+ return 0;
+ break;
+
+ case 'C':
+ buf = mystrptime(buf, En_US.ldate_format, tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'c':
+ buf = mystrptime(buf, "%x %X", tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'D':
+ buf = mystrptime(buf, "%m/%d/%y", tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'R':
+ buf = mystrptime(buf, "%H:%M", tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'r':
+ buf = mystrptime(buf, "%I:%M:%S %p", tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'T':
+ buf = mystrptime(buf, "%H:%M:%S", tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'X':
+ buf = mystrptime(buf, En_US.time_format, tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'x':
+ buf = mystrptime(buf, En_US.sdate_format, tm);
+ if (buf == 0)
+ return 0;
+ break;
+
+ case 'j':
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (i > 365)
+ return 0;
+
+ tm->tm_yday = i;
+ break;
+
+ case 'M':
+ case 'S':
+ if (*buf == 0 || isspace(*buf))
+ break;
+
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (i > 59)
+ return 0;
+
+ if (c == 'M')
+ tm->tm_min = i;
+ else
+ tm->tm_sec = i;
+
+ if (*buf != 0 && isspace(*buf))
+ while (*ptr != 0 && !isspace(*ptr))
+ ptr++;
+ break;
+
+ case 'H':
+ case 'I':
+ case 'k':
+ case 'l':
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (c == 'H' || c == 'k') {
+ if (i > 23)
+ return 0;
+ } else if (i > 11)
+ return 0;
+
+ tm->tm_hour = i;
+
+ if (*buf != 0 && isspace(*buf))
+ while (*ptr != 0 && !isspace(*ptr))
+ ptr++;
+ break;
+
+ case 'p':
+ len = strlen(En_US.am_string);
+ if (mystrncasecmp(buf, En_US.am_string, len) == 0)
+ {
+ if (tm->tm_hour > 12)
+ return 0;
+ if (tm->tm_hour == 12)
+ tm->tm_hour = 0;
+ buf += len;
+ break;
+ }
+
+ len = strlen(En_US.pm_string);
+ if (mystrncasecmp(buf, En_US.pm_string, len) == 0)
+ {
+ if (tm->tm_hour > 12)
+ return 0;
+ if (tm->tm_hour != 12)
+ tm->tm_hour += 12;
+ buf += len;
+ break;
+ }
+
+ return 0;
+
+ case 'A':
+ case 'a':
+ for (i = 0; i < asizeof(En_US.weekday_names); i++)
+ {
+ len = strlen(En_US.weekday_names[i]);
+ if (mystrncasecmp(buf,
+ En_US.weekday_names[i],
+ len) == 0)
+ break;
+
+ len = strlen(En_US.abbrev_weekday_names[i]);
+ if (mystrncasecmp(buf,
+ En_US.abbrev_weekday_names[i],
+ len) == 0)
+ break;
+ }
+ if (i == asizeof(En_US.weekday_names))
+ return 0;
+
+ tm->tm_wday = i;
+ buf += len;
+ break;
+
+ case 'd':
+ case 'e':
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (i > 31)
+ return 0;
+
+ tm->tm_mday = i;
+
+ if (*buf != 0 && isspace(*buf))
+ while (*ptr != 0 && !isspace(*ptr))
+ ptr++;
+ break;
+
+ case 'B':
+ case 'b':
+ case 'h':
+ for (i = 0; i < asizeof(En_US.month_names); i++)
+ {
+ len = strlen(En_US.month_names[i]);
+ if (mystrncasecmp(buf,
+ En_US.month_names[i],
+ len) == 0)
+ break;
+
+ len = strlen(En_US.abbrev_month_names[i]);
+ if (mystrncasecmp(buf,
+ En_US.abbrev_month_names[i],
+ len) == 0)
+ break;
+ }
+ if (i == asizeof(En_US.month_names))
+ return 0;
+
+ tm->tm_mon = i;
+ buf += len;
+ break;
+
+ case 'm':
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (i < 1 || i > 12)
+ return 0;
+
+ tm->tm_mon = i - 1;
+
+ if (*buf != 0 && isspace(*buf))
+ while (*ptr != 0 && !isspace(*ptr))
+ ptr++;
+ break;
+
+ case 'Y':
+ case 'y':
+ if (*buf == 0 || isspace(*buf))
+ break;
+
+ if (!isdigit(*buf))
+ return 0;
+
+ for (i = 0; *buf != 0 && isdigit(*buf); buf++)
+ {
+ i *= 10;
+ i += *buf - '0';
+ }
+ if (c == 'y' && i < 69) /* Unix Epoch pivot year */
+ i += 100;
+ if (c == 'Y')
+ i -= 1900;
+ if (i < 0)
+ return 0;
+
+ tm->tm_year = i;
+
+ if (*buf != 0 && isspace(*buf))
+ while (*ptr != 0 && !isspace(*ptr))
+ ptr++;
+ break;
+ }
+ }
+
+ return (char *) buf;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/timegm.c b/debian/htdig/htdig-3.2.0b6/htlib/timegm.c
new file mode 100644
index 00000000..67588d93
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/timegm.c
@@ -0,0 +1,142 @@
+/*
+ timegm.cc
+
+ timegm: Portable version of timegm (mytimegm) for ht://Dig
+ Based on a version from the GNU C Library
+ and a previous implementation for ht://Dig
+
+ Part of the ht://Dig package <http://www.htdig.org/>
+ Copyright (c) 1999-2004 The ht://Dig Group
+ For copyright details, see the file COPYING in your distribution
+ or the GNU Library General Public License (LGPL) version 2 or later
+ <http://www.gnu.org/copyleft/lgpl.html>
+
+ $Id: timegm.c,v 1.18 2004/05/28 13:15:22 lha Exp $
+*/
+
+/* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Paul Eggert (eggert@twinsun.com).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+
+/* #define TEST_TIMEGM */
+
+#include <time.h>
+#ifdef TEST_TIMEGM
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+static struct tm *my_mktime_gmtime_r (const time_t *t, struct tm *tp);
+
+static struct tm *my_mktime_gmtime_r (const time_t *t, struct tm *tp)
+{
+ struct tm *l = gmtime (t);
+ if (! l)
+ return 0;
+ *tp = *l;
+ return tp;
+}
+
+time_t __mktime_internal(struct tm *,
+ struct tm *(*) (const time_t *, struct tm *),
+ time_t *);
+
+time_t Httimegm(tmp)
+struct tm *tmp;
+{
+ static time_t gmtime_offset;
+ tmp->tm_isdst = 0;
+ return __mktime_internal (tmp, my_mktime_gmtime_r, &gmtime_offset);
+}
+
+#ifdef TEST_TIMEGM
+
+void parse_time(char *s, struct tm *tm)
+{
+ sscanf(s, "%d.%d.%d %d:%d:%d",
+ &tm->tm_year, &tm->tm_mon, &tm->tm_mday,
+ &tm->tm_hour, &tm->tm_min, &tm->tm_sec);
+ tm->tm_year -= 1900;
+ tm->tm_mon--;
+}
+
+void print_time(struct tm *tm)
+{
+ fprintf(stderr, "%04d.%02d.%02d %02d:%02d:%02d",
+ tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+}
+
+int time_equal(struct tm *tm1, struct tm *tm2)
+{
+ return ((tm1->tm_year == tm2->tm_year) &&
+ (tm1->tm_mon == tm2->tm_mon) &&
+ (tm1->tm_mday == tm2->tm_mday) &&
+ (tm1->tm_hour == tm2->tm_hour) &&
+ (tm1->tm_min == tm2->tm_min) &&
+ (tm1->tm_sec == tm2->tm_sec));
+}
+
+int main(void)
+{
+ char *test_dates[] =
+ {
+ "1970.01.01 00:00:00",
+ "1970.01.01 00:00:01",
+ "1972.02.05 23:59:59",
+ "1972.02.28 00:59:59",
+ "1972.02.28 23:59:59",
+ "1972.02.29 00:00:00",
+ "1972.03.01 13:00:04",
+ "1973.03.01 12:00:00",
+ "1980.01.01 00:00:05",
+ "1984.12.31 23:00:00",
+ "1997.06.05 17:55:35",
+ "1999.12.31 23:00:00",
+ "2000.01.01 00:00:05",
+ "2000.02.28 23:00:05",
+ "2000.02.29 23:00:05",
+ "2000.03.01 00:00:05",
+ "2007.06.05 17:55:35",
+ "2038.01.19 03:14:07",
+ 0
+ };
+ int i, ok = 1;
+ struct tm orig, *conv;
+ time_t t;
+
+ for (i = 0; (test_dates[i]); i++)
+ {
+ parse_time(test_dates[i], &orig);
+ t = Httimegm(&orig);
+ conv = gmtime(&t);
+ if (!time_equal(&orig, conv))
+ {
+ fprintf(stderr, "timegm() test failed!\n Original: ");
+ print_time(&orig);
+ fprintf(stderr, "\n Converted: ");
+ print_time(conv);
+ fprintf(stderr, "\n time_t: %ld\n", (long) t);
+ ok = 0;
+ }
+ }
+ exit(ok ? 0 : 1);
+}
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/vsnprintf.c b/debian/htdig/htdig-3.2.0b6/htlib/vsnprintf.c
new file mode 100644
index 00000000..dac21c23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htlib/vsnprintf.c
@@ -0,0 +1,58 @@
+/* Part of the ht://Dig package <http://www.htdig.org/> */
+/* Copyright (c) 1999-2004 The ht://Dig Group */
+/* For copyright details, see the file COPYING in your distribution */
+/* or the GNU Library General Public License (LGPL) version 2 or later */
+/* <http://www.gnu.org/copyleft/lgpl.html> */
+
+
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999
+ * Sleepycat Software. All rights reserved.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_VSNPRINTF
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#endif
+
+/*
+ * vsnprintf --
+ * Bounded version of vsprintf.
+ *
+ * PUBLIC: #ifndef HAVE_VSNPRINTF
+ * PUBLIC: int vsnprintf();
+ * PUBLIC: #endif
+ */
+
+int
+vsnprintf(str, n, fmt, ap)
+ char *str;
+ size_t n;
+ const char *fmt;
+ va_list ap;
+{
+ n = 0;
+
+#ifdef SPRINTF_RET_CHARPNT
+ (void)vsprintf(str, fmt, ap);
+ return (strlen(str));
+#else
+ return (vsprintf(str, fmt, ap));
+#endif
+}
+#endif /* HAVE_VSNPRINTF */
+