Bibletime – a bible study tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

btstringmgr.cpp 3.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. //
  2. // C++ Implementation: btstringmgr
  3. //
  4. // Description:
  5. //
  6. //
  7. // Author: The BibleTime team <info@bibletime.info>, (C) 2004
  8. //
  9. // Copyright: See COPYING file that comes with this distribution
  10. //
  11. //
  12. #include "btstringmgr.h"
  13. //System includes
  14. #include <ctype.h>
  15. char* BTStringMgr::upperUTF8(char* text, unsigned int maxlen) const {
  16. const int max = (maxlen>0) ? maxlen : strlen(text);
  17. if (isUtf8(text)) {
  18. strncpy(text, (const char*)TQString::fromUtf8(text).upper().utf8(), max);
  19. return text;
  20. }
  21. else {
  22. char* ret = text;
  23. while (*text) {
  24. *text = toupper(*text);
  25. text++;
  26. }
  27. return ret;
  28. }
  29. return text;
  30. }
  31. char* BTStringMgr::upperLatin1(char* text, unsigned int max) const {
  32. char* ret = text;
  33. while (*text) {
  34. *text = toupper(*text);
  35. text++;
  36. }
  37. return ret;
  38. }
  39. bool BTStringMgr::supportsUnicode() const {
  40. return true;
  41. }
  42. const bool BTStringMgr::isUtf8(const char *buf) const {
  43. int i, n;
  44. register unsigned char c;
  45. bool gotone = false;
  46. #define F 0 /* character never appears in text */
  47. #define T 1 /* character appears in plain ASCII text */
  48. #define I 2 /* character appears in ISO-8859 text */
  49. #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
  50. static const unsigned char text_chars[256] = {
  51. /* BEL BS HT LF FF CR */
  52. F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
  53. /* ESC */
  54. F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
  55. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
  56. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
  57. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
  58. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
  59. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
  60. T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
  61. /* NEL */
  62. X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
  63. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
  64. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
  65. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
  66. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
  67. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
  68. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
  69. I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
  70. };
  71. /* *ulen = 0; */
  72. for (i = 0; (c = buf[i]); i++) {
  73. if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
  74. /*
  75. * Even if the whole file is valid UTF-8 sequences,
  76. * still reject it if it uses weird control characters.
  77. */
  78. if (text_chars[c] != T)
  79. return false;
  80. }
  81. else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
  82. return false;
  83. }
  84. else { /* 11xxxxxx begins UTF-8 */
  85. int following;
  86. if ((c & 0x20) == 0) { /* 110xxxxx */
  87. following = 1;
  88. }
  89. else if ((c & 0x10) == 0) { /* 1110xxxx */
  90. following = 2;
  91. }
  92. else if ((c & 0x08) == 0) { /* 11110xxx */
  93. following = 3;
  94. }
  95. else if ((c & 0x04) == 0) { /* 111110xx */
  96. following = 4;
  97. }
  98. else if ((c & 0x02) == 0) { /* 1111110x */
  99. following = 5;
  100. }
  101. else
  102. return false;
  103. for (n = 0; n < following; n++) {
  104. i++;
  105. if (!(c = buf[i]))
  106. goto done;
  107. if ((c & 0x80) == 0 || (c & 0x40))
  108. return false;
  109. }
  110. gotone = true;
  111. }
  112. }
  113. done:
  114. return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
  115. }
  116. #undef F
  117. #undef T
  118. #undef I
  119. #undef X