summaryrefslogtreecommitdiffstats
path: root/kjs/ustring.h
blob: 0cdfa3f0fda88a8cee287c6c5e4f5187a928091e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
// -*- c-basic-offset: 2 -*-
/*
 *  This file is part of the KDE libraries
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2003 Apple Computer, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#ifndef _KJS_USTRING_H_
#define _KJS_USTRING_H_

#include <tqstring.h>
#include "global.h"

/**
 * @internal
 */
namespace DOM {
  class DOMString;
}
class KJScript;
class TQString;
class TQConstString;

namespace KJS {

  class UCharReference;
  class UString;

  /**
   * @short Unicode character.
   *
   * UChar represents a 16 bit Unicode character. It's internal data
   * representation is compatible to XChar2b and TQChar. It's therefore
   * possible to exchange data with X and Qt with shallow copies.
   */
  struct KJS_EXPORT UChar {
    /**
     * Construct a character with uninitialized value.
     */
    UChar();
    UChar(char u);
    UChar(unsigned char u);
    /**
     * Construct a character with the value denoted by the arguments.
     * @param h higher byte
     * @param l lower byte
     */
    UChar(unsigned char h , unsigned char l);
    /**
     * Construct a character with the given value.
     * @param u 16 bit Unicode value
     */
    UChar(unsigned short u);
    UChar(const UCharReference &c);
    /**
     * @return The higher byte of the character.
     */
    unsigned char high() const { return uc >> 8; }
    /**
     * @return The lower byte of the character.
     */
    unsigned char low() const { return uc; }
    /**
     * @return the 16 bit Unicode value of the character
     */
    unsigned short unicode() const { return uc; }
  public:
    /**
     * @return The character converted to lower case.
     */
    UChar toLower() const;
    /**
     * @return The character converted to upper case.
     */
    UChar toUpper() const;
    /**
     * A static instance of UChar(0).
     */
    static UChar null;

    unsigned short uc;
  } KJS_PACKED;

  inline UChar::UChar() { }
  inline UChar::UChar(unsigned char h , unsigned char l) : uc(h << 8 | l) { }
  inline UChar::UChar(char u) : uc((unsigned char)u) { }
  inline UChar::UChar(unsigned char u) : uc(u) { }
  inline UChar::UChar(unsigned short u) : uc(u) { }

  /**
   * @short Dynamic reference to a string character.
   *
   * UCharReference is the dynamic counterpart of UChar. It's used when
   * characters retrieved via index from a UString are used in an
   * assignment expression (and therefore can't be treated as being const):
   * \code
   * UString s("hello world");
   * s[0] = 'H';
   * \endcode
   *
   * If that sounds confusing your best bet is to simply forget about the
   * existence of this class and treat is as being identical to UChar.
   */
  class KJS_EXPORT UCharReference {
    friend class UString;
    UCharReference(UString *s, unsigned int off) : str(s), offset(off) { }
  public:
    /**
     * Set the referenced character to c.
     */
    UCharReference& operator=(UChar c);
    /**
     * Same operator as above except the argument that it takes.
     */
    UCharReference& operator=(char c) { return operator=(UChar(c)); }
    /**
     * @return Unicode value.
     */
    unsigned short unicode() const { return ref().uc; }
    /**
     * @return Lower byte.
     */
    unsigned char low() const { return ref().uc; }
    /**
     * @return Higher byte.
     */
    unsigned char high() const { return ref().uc >> 8; }
    /**
     * @return Character converted to lower case.
     */
    UChar toLower() const { return ref().toLower(); }
    /**
     * @return Character converted to upper case.
     */
    UChar toUpper() const  { return ref().toUpper(); }
  private:
    // not implemented, can only be constructed from UString
    UCharReference();

    UChar& ref() const;
    UString *str;
    int offset;
  };

  inline UChar::UChar(const UCharReference &c) : uc(c.unicode()) { }

  /**
   * @short 8 bit char based string class
   */
  class KJS_EXPORT CString {
  public:
    CString() : data(0L), length(0) { }
    CString(const char *c);
    CString(const char *c, int len);
    CString(const CString &);

    ~CString();

    CString &append(const CString &);
    CString &operator=(const char *c);
    CString &operator=(const CString &);
    CString &operator+=(const CString &c) { return append(c); }

    int size() const { return length; }
    const char *c_str() const { return data; }
  private:
    char *data;
    int length;
  };

  /**
   * @short Unicode string class
   */
  class KJS_EXPORT UString {
    friend bool operator==(const UString&, const UString&);
    friend class UCharReference;
    friend class Identifier;
    friend class PropertyMap;
    friend class PropertyMapHashTableEntry;
    /**
     * @internal
     */
    struct KJS_EXPORT Rep {
      friend class UString;
      friend bool operator==(const UString&, const UString&);

      static Rep *create(UChar *d, int l);
      void destroy();

      UChar *data() const { return dat; }
      int size() const { return len; }

      unsigned hash() const { if (_hash == 0) _hash = computeHash(dat, len); return _hash; }

      static unsigned computeHash(const UChar *, int length);
      static unsigned computeHash(const char *);

      void ref() { ++rc; }
      void deref() { if (--rc == 0) destroy(); }

      UChar *dat;
      int len;
      int capacity;
      int rc;
      mutable unsigned _hash;

      enum { capacityForIdentifier = 0x10000000 };

      static Rep null;
      static Rep empty;
    };

  public:
    /**
     * Constructs a null string.
     */
    UString();
    /**
     * Constructs a string from the single character c.
     */
    explicit UString(char c);
    /**
     * Constructs a string from a classical zero determined char string.
     */
    UString(const char *c);
    /**
     * Constructs a string from an array of Unicode characters of the specified
     * length.
     */
    UString(const UChar *c, int length);
    /**
     * If copy is false the string data will be adopted.
     * That means that the data will NOT be copied and the pointer will
     * be deleted when the UString object is modified or destroyed.
     * Behaviour defaults to a deep copy if copy is true.
     */
    UString(UChar *c, int length, bool copy);
    /**
     * Copy constructor. Makes a shallow copy only.
     */
    UString(const UString &s) { attach(s.rep); }
    /**
     * Convenience declaration only ! You'll be on your own to write the
     * implementation for a construction from TQString.
     *
     * Note: feel free to contact me if you want to see a dummy header for
     * your favorite FooString class here !
     */
    UString(const TQString &);
    /**
     * Convenience declaration only ! See UString(const TQString&).
     */
    UString(const DOM::DOMString &);
    /**
     * Concatenation constructor. Makes operator+ more efficient.
     */
    UString(const UString &, const UString &);
    /**
     * Destructor. If this handle was the only one holding a reference to the
     * string the data will be freed.
     */
    ~UString() { release(); }

    /**
     * Constructs a string from an int.
     */
    static UString from(int i);
    /**
     * Constructs a string from an unsigned int.
     */
    static UString from(unsigned int u);
    /**
     * Constructs a string from a long.
     */
    static UString from(long l);
    /**
     * Constructs a string from a double.
     */
    static UString from(double d);

    /**
     * Append another string.
     */
    UString &append(const UString &);

    /**
     * @return The string converted to the 8-bit string type CString().
     */
    CString cstring() const;
    /**
     * Convert the Unicode string to plain ASCII chars chopping of any higher
     * bytes. This method should only be used for *debugging* purposes as it
     * is neither Unicode safe nor free from side effects. In order not to
     * waste any memory the char buffer is static and *shared* by all UString
     * instances.
     */
    char *ascii() const;
    /**
     * @see UString(const TQString&).
     */
    DOM::DOMString string() const;
    /**
     * @see UString(const TQString&).
     */
    TQString qstring() const;
    /**
     * @see UString(const TQString&).
     */
    TQConstString qconststring() const;

    /**
     * Assignment operator.
     */
    UString &operator=(const char *c);
    UString &operator=(const UString &);
    /**
     * Appends the specified string.
     */
    UString &operator+=(const UString &s) { return append(s); }

    /**
     * @return A pointer to the internal Unicode data.
     */
    const UChar* data() const { return rep->data(); }
    /**
     * @return True if null.
     */
    bool isNull() const { return (rep == &Rep::null); }
    /**
     * @return True if null or zero length.
     */
    bool isEmpty() const { return (!rep->len); }
    /**
     * Use this if you want to make sure that this string is a plain ASCII
     * string. For example, if you don't want to lose any information when
     * using cstring() or ascii().
     *
     * @return True if the string doesn't contain any non-ASCII characters.
     */
    bool is8Bit() const;
    /**
     * @return The length of the string.
     */
    int size() const { return rep->size(); }
    /**
     * Const character at specified position.
     */
    UChar operator[](int pos) const;
    /**
     * Writable reference to character at specified position.
     */
    UCharReference operator[](int pos);

    /**
     * Attempts an conversion to a number. Apart from floating point numbers,
     * the algorithm will recognize hexadecimal representations (as
     * indicated by a 0x or 0X prefix) and +/- Infinity.
     * Returns NaN if the conversion failed.
     * @param tolerateTrailingJunk if true, toDouble can tolerate garbage after the number.
     * @param tolerateEmptyString if false, toDouble will turn an empty string into NaN rather than 0.
     */
    double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
    double toDouble(bool tolerateTrailingJunk) const;
    double toDouble() const;
    /**
     * Attempts an conversion to an unsigned long integer. ok will be set
     * according to the success.
     @ @param ok make this point to a bool in case you need to know whether the conversion succeeded.
     * @param tolerateEmptyString if false, toULong will return false for *ok for an empty string.
     */
    unsigned long toULong(bool *ok, bool tolerateEmptyString) const;
    unsigned long toULong(bool *ok = 0) const;

    unsigned int toUInt32(bool *ok = 0) const;
    unsigned int toStrictUInt32(bool *ok = 0) const;

    /**
     * Attempts an conversion to an array index. The "ok" boolean will be set
     * to true if it is a valid array index according to the rule from
     * ECMA 15.2 about what an array index is. It must exactly match the string
     * form of an unsigned integer, and be less than 2^32 - 1.
     */
    unsigned toArrayIndex(bool *ok = 0) const;

    /**
     * Returns this string converted to lower case characters
     */
    UString toLower() const;
    /**
     * Returns this string converted to upper case characters
     */
    UString toUpper() const;
    /**
     * @return Position of first occurrence of f starting at position pos.
     * -1 if the search was not successful.
     */
    int find(const UString &f, int pos = 0) const;
    int find(UChar, int pos = 0) const;
    /**
     * @return Position of first occurrence of f searching backwards from
     * position pos.
     * -1 if the search was not successful.
     */
    int rfind(const UString &f, int pos) const;
    int rfind(UChar, int pos) const;
    /**
     * @return The sub string starting at position pos and length len.
     */
    UString substr(int pos = 0, int len = -1) const;
    /**
     * Static instance of a null string.
     */
    static UString null;
#ifdef KJS_DEBUG_MEM
    /**
     * Clear statically allocated resources.
     */
    static void globalClear();
#endif
  private:
    UString(Rep *r) { attach(r); }
    void attach(Rep *r);
    void detach();
    void release();
    Rep *rep;
  };

  KJS_EXPORT inline bool operator==(const UChar &c1, const UChar &c2) {
    return (c1.uc == c2.uc);
  }
  KJS_EXPORT inline bool operator!=(const UChar& c1, const UChar& c2) {
    return !KJS::operator==(c1, c2);
  }
  KJS_EXPORT bool operator==(const UString& s1, const UString& s2);
  inline bool operator!=(const UString& s1, const UString& s2) {
    return !KJS::operator==(s1, s2);
  }
  KJS_EXPORT bool operator<(const UString& s1, const UString& s2);
  KJS_EXPORT bool operator==(const UString& s1, const char *s2);
  KJS_EXPORT inline bool operator!=(const UString& s1, const char *s2) {
    return !KJS::operator==(s1, s2);
  }
  KJS_EXPORT inline bool operator==(const char *s1, const UString& s2) {
    return operator==(s2, s1);
  }
  KJS_EXPORT inline bool operator!=(const char *s1, const UString& s2) {
    return !KJS::operator==(s1, s2);
  }
  KJS_EXPORT bool operator==(const CString& s1, const CString& s2);
  KJS_EXPORT inline bool operator!=(const CString& s1, const CString& s2) {
    return !KJS::operator==(s1, s2);
  }
  KJS_EXPORT inline UString operator+(const UString& s1, const UString& s2) {
    return UString(s1, s2);
  }

  KJS_EXPORT int compare(const UString &, const UString &);

} // namespace

#endif