summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc124
1 files changed, 124 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc b/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc
new file mode 100644
index 00000000..23518119
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc
@@ -0,0 +1,124 @@
+//
+// HtSGMLCodec.cc
+//
+// HtSGMLCodec: A Specialized HtWordCodec class to convert between SGML
+// ISO 8859-1 entities and high-bit characters.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtSGMLCodec.cc,v 1.6 2004/06/01 18:25:01 angusgb Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "HtSGMLCodec.h"
+#include "HtConfiguration.h"
+
+// Constructor: parses the appropriate parameters using the
+// encapsulated HtWordCodec class.
+// Only used in privacy.
+HtSGMLCodec::HtSGMLCodec()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ int translate_latin1 = config->Boolean("translate_latin1", 1);
+ StringList *myTextFromList = new StringList(); // For &foo;
+ StringList *myNumFromList = new StringList(); // For &#nnn;
+ StringList *myToList = new StringList();
+ String myTextFromString(770); // Full text list
+
+ // Is this really the best way to do this?
+ if (!translate_latin1 )
+ {
+ myTextFromString = "&nbsp;";
+ }
+ else
+ {
+ // this set has been slightly modified in order to manage the &euro; entity
+ // the resulting charset is therefore a ISO-8859-1 partially moved to ISO-8859-15
+ myTextFromString = "&nbsp;|&iexcl;|&cent;|&pound;|&euro;|&yen;|&brvbar;|&sect;|";
+ myTextFromString << "&uml;|&copy;|&ordf;|&laquo;|&not;|&shy;|&reg;|&macr;|&deg;|";
+ myTextFromString << "&plusmn;|&sup2;|&sup3;|&acute;|&micro;|&para;|&middot;|&cedil;|";
+ myTextFromString << "&sup1;|&ordm;|&raquo;|&frac14;|&frac12;|&frac34;|&iquest;|&Agrave;|";
+ myTextFromString << "&Aacute;|&Acirc;|&Atilde;|&Auml;|&Aring;|&AElig;|&Ccedil;|&Egrave;|";
+ myTextFromString << "&Eacute;|&Ecirc;|&Euml;|&Igrave;|&Iacute;|&Icirc;|&Iuml;|&ETH;|";
+ myTextFromString << "&Ntilde;|&Ograve;|&Oacute;|&Ocirc;|&Otilde;|&Ouml;|&times;|&Oslash;|";
+ myTextFromString << "&Ugrave;|&Uacute;|&Ucirc;|&Uuml;|&Yacute;|&THORN;|&szlig;|&agrave;|";
+ myTextFromString << "&aacute;|&acirc;|&atilde;|&auml;|&aring;|&aelig;|&ccedil;|&egrave;|";
+ myTextFromString << "&eacute;|&ecirc;|&euml;|&igrave;|&iacute;|&icirc;|&iuml;|&eth;|";
+ myTextFromString << "&ntilde;|&ograve;|&oacute;|&ocirc;|&otilde;|&ouml;|&divide;|&oslash;|";
+ myTextFromString << "&ugrave;|&uacute;|&ucirc;|&uuml;|&yacute;|&thorn;|&yuml;";
+ }
+
+ myTextFromList->Create(myTextFromString, '|');
+
+ for (int i = 160; i <= 255; i++)
+ {
+ String temp = 0;
+ temp << (char) i;
+ myToList->Add(temp);
+
+ temp = 0;
+ temp << "&#" << i << ";";
+ myNumFromList->Add(temp);
+ if (!translate_latin1 )
+ break;
+ }
+
+ // Now let's take care of the low-bit characters with encodings.
+ myTextFromList->Add("&quot;");
+ myToList->Add("\"");
+ myNumFromList->Add("&#34;");
+
+ myTextFromList->Add("&amp;");
+ myToList->Add("&");
+ myNumFromList->Add("&#38;");
+
+ myTextFromList->Add("&lt;");
+ myToList->Add("<");
+ myNumFromList->Add("&#60;");
+
+ myTextFromList->Add("&gt;");
+ myToList->Add(">");
+ myNumFromList->Add("&#62;");
+
+ myTextWordCodec = new HtWordCodec(myTextFromList, myToList, '|');
+ myNumWordCodec = new HtWordCodec(myNumFromList, myToList, '|');
+}
+
+
+HtSGMLCodec::~HtSGMLCodec()
+{
+ delete myTextWordCodec;
+ delete myNumWordCodec;
+}
+
+
+// Supposedly used as HtSGMLCodec::instance()->ErrMsg()
+// to check if HtWordCodec liked what was fed.
+String& HtSGMLCodec::ErrMsg()
+{
+ return myErrMsg;
+}
+
+
+// Canonical singleton interface.
+HtSGMLCodec *
+HtSGMLCodec::instance()
+{
+ static HtSGMLCodec *_instance = 0;
+
+ if (_instance == 0)
+ {
+ _instance = new HtSGMLCodec();
+ }
+
+ return _instance;
+}
+
+// End of HtSGMLCodec.cc