diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc | 450 |
1 files changed, 450 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc new file mode 100644 index 00000000..8026622d --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htlib/HtPack.cc @@ -0,0 +1,450 @@ +// +// HtPack.cc +// +// HtPack: Compress and uncompress data in e.g. simple structures. +// The structure must have the layout defined in the ABI; +// the layout the compiler generates. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: HtPack.cc,v 1.8 2004/05/28 13:15:20 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "HtPack.h" + +#include <ctype.h> +#include <stdlib.h> + +// For the moment, these formats are accepted: +// "i" native int, with most compressed value 0 +// "u" unsigned int, with most compressed value 0 +// "c" unsigned int, with most compressed value 1. +// +// If someone adds other formats (and uses them), please note +// that structure padding may give surprising effects on some +// (most) platforms, for example if you try to unpack a +// structure with the imagined signature "isi" (int, short, int). +// You will want to solve that portably. +// +// Compression is done to 2 bits description (overhead) each, +// plus variable-sized data. +// Theoretically, different formats can use different number of +// bits in the description with a few changes. +// The description is located in a byte before every four +// "fields". +String +htPack(const char format[], const char *data) +{ + const char *s = format; + + // We insert the encodings by number, rather than shifting and + // inserting at the "bottom". This should make it faster for + // decoding, which presumably is more important than the speed + // of encoding. + int code_no = 0; + + // Make a wild guess that we will compress some ordinary sized + // struct. This guess only has speed effects. + String compressed(60); + + // Accumulated codes. + unsigned int description = 0; + + // Store the encoding here. We cannot use a char *, as the + // string may be reallocated and moved. + int code_index = 0; + + // Make place for the first codes. + compressed << '\0'; + + // Format string loop. + while (*s) + { + int fchar = *s++; + int n; + + if (isdigit(*s)) + { + char* t; + n = strtol(s, &t, 10); + s = t; + } + else + n = 1; + + // Loop over N in e.g. "iN" (default 1). + while (n--) + { + // Format character handling. + switch (fchar) + { + case 'c': + { + // We compress an unsigned int with the most common + // value 1 as this: + // 00 - value is 1. + // 01 - value fits in unsigned char - appended. + // 10 - value fits in unsigned short - appended. + // 11 - just plain unsigned int - appended (you lose). + unsigned int value; + + // Initialize, but allow disalignment. + memcpy(&value, data, sizeof value); + data += sizeof(unsigned int); + + int mycode; + if (value == 1) + { + mycode = 0; + } + else + { + unsigned char charvalue = (unsigned char) value; + unsigned short shortvalue = (unsigned short) value; + if (value == charvalue) + { + mycode = 1; + compressed << charvalue; + } + else if (value == shortvalue) + { + mycode = 2; + compressed.append((char *) &shortvalue, sizeof shortvalue); + } + else + { + mycode = 3; + compressed.append((char *) &value, sizeof value); + } + } + + description |= mycode << (2*code_no++); + } + break; + + case 'i': + { + // We compress a (signed) int as follows: + // 00 - value is 0. + // 01 - value fits in char - appended. + // 10 - value fits in short - appended. + // 11 - just plain int - appended (you lose). + int value; + + // Initialize, but allow disalignment. + memcpy(&value, data, sizeof value); + data += sizeof(int); + + int mycode; + if (value == 0) + { + mycode = 0; + } + else + { + char charvalue = char(value); + short shortvalue = short(value); + if (value == charvalue) + { + mycode = 1; + compressed << charvalue; + } + else if (value == shortvalue) + { + mycode = 2; + compressed.append((char *) &shortvalue, sizeof shortvalue); + } + else + { + mycode = 3; + compressed.append((char *) &value, sizeof value); + } + } + + description |= mycode << (2*code_no++); + } + break; + + case 'u': + { + // We compress an unsigned int like an int: + // 00 - value is 0. + // 01 - value fits in unsigned char - appended. + // 10 - value fits in unsigned short - appended. + // 11 - just plain unsigned int - appended (you lose). + unsigned int value; + + // Initialize, but allow disalignment. + memcpy(&value, data, sizeof value); + data += sizeof(unsigned int); + + int mycode; + if (value == 0) + { + mycode = 0; + } + else + { + unsigned char charvalue = (unsigned char) value; + unsigned short shortvalue = (unsigned short) value; + if (value == charvalue) + { + mycode = 1; + compressed << charvalue; + } + else if (value == shortvalue) + { + mycode = 2; + compressed.append((char *) &shortvalue, sizeof shortvalue); + } + else + { + mycode = 3; + compressed.append((char *) &value, sizeof value); + } + } + + description |= mycode << (2*code_no++); + } + break; + + default: +#ifndef NOSTREAM +#ifdef DEBUG + if (1) + cerr << "Invalid char \'" << char(fchar) + << "\' in pack format \"" << format << "\"" + << endl; + return ""; +#endif +#endif + ; // Must always have a statement after a label. + } + + // Assuming 8-bit chars here. Flush encodings after 4 (2 bits + // each) or when the code-string is consumed. + if (code_no == 4 || (n == 0 && *s == 0)) + { + char *codepos = compressed.get() + code_index; + + *codepos = description; + description = 0; + code_no = 0; + + if (n || *s) + { + // If more data to be encoded, then we need a new place to + // store the encodings. + code_index = compressed.length(); + compressed << '\0'; + } + } + } + } + + return compressed; +} + + +// Reverse the effect of htPack. +String +htUnpack(const char format[], const char *data) +{ + const char *s = format; + + // The description needs to be renewed immediately. + unsigned int description = 1; + + // Make a wild guess about that we decompress to some ordinary + // sized struct and assume the cost of allocation some extra + // memory is much less than the cost of allocating more. + // This guess only has speed effects. + String decompressed(60); + + // Format string loop. + while (*s) + { + int fchar = *s++; + int n; + + if (isdigit(*s)) + { + char* t; + n = strtol(s, &t, 10); + s = t; + } + else + n = 1; + + // Loop over N in e.g. "iN" (default 1). + while (n--) + { + // Time to renew description? + if (description == 1) + description = 256 | *data++; + + // Format character handling. + switch (fchar) + { + case 'c': + { + // An unsigned int with the most common value 1 is + // compressed as follows: + // 00 - value is 1. + // 01 - value fits in unsigned char - appended. + // 10 - value fits in unsigned short - appended. + // 11 - just plain unsigned int - appended (you lose). + unsigned int value; + + switch (description & 3) + { + case 0: + value = 1; + break; + + case 1: + { + unsigned char charvalue; + memcpy(&charvalue, data, sizeof charvalue); + value = charvalue; + data++; + } + break; + + case 2: + { + unsigned short int shortvalue; + memcpy(&shortvalue, data, sizeof shortvalue); + value = shortvalue; + data += sizeof shortvalue; + } + break; + + case 3: + { + memcpy(&value, data, sizeof value); + data += sizeof value; + } + break; + } + decompressed.append((char *) &value, sizeof value); + } + break; + + case 'i': + { + // A (signed) int is compressed as follows: + // 00 - value is 0. + // 01 - value fits in char - appended. + // 10 - value fits in short - appended. + // 11 - just plain int - appended (you lose). + int value; + + switch (description & 3) + { + case 0: + value = 0; + break; + + case 1: + { + char charvalue; + memcpy(&charvalue, data, sizeof charvalue); + value = charvalue; + data++; + } + break; + + case 2: + { + short int shortvalue; + memcpy(&shortvalue, data, sizeof shortvalue); + value = shortvalue; + data += sizeof shortvalue; + } + break; + + case 3: + { + memcpy(&value, data, sizeof value); + data += sizeof value; + } + break; + } + decompressed.append((char *) &value, sizeof value); + } + break; + + case 'u': + { + // An unsigned int is compressed as follows: + // 00 - value is 0. + // 01 - value fits in unsigned char - appended. + // 10 - value fits in unsigned short - appended. + // 11 - just plain unsigned int - appended (you lose). + unsigned int value; + + switch (description & 3) + { + case 0: + value = 0; + break; + + case 1: + { + unsigned char charvalue; + memcpy(&charvalue, data, sizeof charvalue); + value = charvalue; + data++; + } + break; + + case 2: + { + unsigned short int shortvalue; + memcpy(&shortvalue, data, sizeof shortvalue); + value = shortvalue; + data += sizeof shortvalue; + } + break; + + case 3: + { + memcpy(&value, data, sizeof value); + data += sizeof value; + } + break; + } + decompressed.append((char *) &value, sizeof value); + } + break; + + default: +#ifndef NOSTREAM +#ifdef DEBUG + if (1) + cerr << "Invalid char \'" << char(fchar) + << "\' in unpack format \"" << format << "\"" + << endl; + return ""; +#endif +#endif + ; // Must always have a statement after a label. + } + + description >>= 2; + } + } + + return decompressed; +} + +// End of HtPack.cc |