Bibletime – a bible study tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bt_gbfhtml.cpp 9.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /*********
  2. *
  3. * This file is part of BibleTime's source code, http://www.bibletime.info/.
  4. *
  5. * Copyright 1999-2006 by the BibleTime developers.
  6. * The BibleTime source code is licensed under the GNU General Public License version 2.0.
  7. *
  8. **********/
  9. //BibleTime includes
  10. #include "cswordmoduleinfo.h"
  11. #include "cswordbackend.h"
  12. #include "util/cpointers.h"
  13. #include "bt_gbfhtml.h"
  14. //sytsme includes
  15. #include <stdlib.h>
  16. #include <stdio.h>
  17. //Sword includes
  18. #include <utilxml.h>
  19. //Qt includes
  20. #include <tqregexp.h>
  21. #include <tqstring.h>
  22. using namespace Filters;
  23. BT_GBFHTML::BT_GBFHTML() : sword::GBFHTML() {
  24. setEscapeStringCaseSensitive(true);
  25. setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
  26. removeTokenSubstitute("Rf");
  27. // addTokenSubstitute("RB", "<span>"); //start of a footnote with embedded text
  28. addTokenSubstitute("FI", "<span class=\"italic\">"); // italics begin
  29. addTokenSubstitute("Fi", "</span>");
  30. addTokenSubstitute("FB", "<span class=\"bold\">"); // bold begin
  31. addTokenSubstitute("Fb", "</span>");
  32. addTokenSubstitute("FR", "<span class=\"jesuswords\">");
  33. addTokenSubstitute("Fr", "</span>");
  34. addTokenSubstitute("FU", "<u>"); // underline begin
  35. addTokenSubstitute("Fu", "</u>");
  36. addTokenSubstitute("FO", "<span class=\"quotation\">"); // Old Testament quote begin
  37. addTokenSubstitute("Fo", "</span>");
  38. addTokenSubstitute("FS", "<span class=\"sup\">"); // Superscript begin// Subscript begin
  39. addTokenSubstitute("Fs", "</span>");
  40. addTokenSubstitute("FV", "<span class=\"sub\">"); // Subscript begin
  41. addTokenSubstitute("Fv", "</span>");
  42. addTokenSubstitute("TT", "<div class=\"booktitle\">");
  43. addTokenSubstitute("Tt", "</div>");
  44. addTokenSubstitute("TS", "<div class=\"sectiontitle\">");
  45. addTokenSubstitute("Ts", "</div>");
  46. //addTokenSubstitute("PP", "<span class=\"poetry\">"); // poetry begin
  47. //addTokenSubstitute("Pp", "</span>");
  48. addTokenSubstitute("Fn", "</font>"); // font end
  49. addTokenSubstitute("CL", "<br/>"); // new line
  50. addTokenSubstitute("CM", "<br/>"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired
  51. addTokenSubstitute("CG", "&gt;"); // literal greater-than sign
  52. addTokenSubstitute("CT", "&lt;"); // literal less-than sign
  53. addTokenSubstitute("JR", "<span class=\"right\">"); // right align begin
  54. addTokenSubstitute("JC", "<span class=\"center\">"); // center align begin
  55. addTokenSubstitute("JL", "</span>"); // align end
  56. }
  57. /** No descriptions */
  58. char BT_GBFHTML::processText(sword::SWBuf& buf, const sword::SWKey * key, const sword::SWModule * module) {
  59. GBFHTML::processText(buf, key, module);
  60. if (!module->isProcessEntryAttributes()) {
  61. return 1; //no processing should be done, may happen in a search
  62. }
  63. CSwordModuleInfo* m = CPointers::backend()->findModuleByName( module->Name() );
  64. if (m && !(m->has(CSwordModuleInfo::lemmas) || m->has(CSwordModuleInfo::morphTags) || m->has(CSwordModuleInfo::strongNumbers))) { //only parse if the module has strongs or lemmas
  65. return 1; //WARNING: Return alread here
  66. }
  67. //Am Anfang<WH07225> schuf<WH01254><WTH8804> Gott<WH0430> Himmel<WH08064> und<WT> Erde<WH0776>.
  68. //A simple word<WT> means: No entry for this word "word"
  69. TQString result;
  70. TQString t = TQString::fromUtf8(buf.c_str());
  71. TQRegExp tag("([.,;:]?<W[HGT][^>]*>\\s*)+");
  72. TQStringList list;
  73. int lastMatchEnd = 0;
  74. int pos = tag.search(t,0);
  75. if (pos == -1) { //no strong or morph code found in this text
  76. return 1; //WARNING: Return already here
  77. }
  78. //split the text into parts which end with the GBF tag marker for strongs/lemmas
  79. while (pos != -1) {
  80. list.append(t.mid(lastMatchEnd, pos+tag.matchedLength()-lastMatchEnd));
  81. lastMatchEnd = pos + tag.matchedLength();
  82. pos = tag.search(t, pos + tag.matchedLength());
  83. }
  84. //append the trailing text to the list.
  85. if (!t.right(t.length() - lastMatchEnd).isEmpty()) {
  86. list.append(t.right(t.length() - lastMatchEnd));
  87. }
  88. //list is now a list of words with 1-n Strongs at the end, which belong to this word.
  89. //now create the necessary HTML in list entries and concat them to the result
  90. tag = TQRegExp("<W([HGT])([^>]*)>");
  91. tag.setMinimal(true);
  92. for (TQStringList::iterator it = list.begin(); it != list.end(); ++it) {
  93. TQString e = (*it); //current entry to process
  94. //qWarning(e.latin1());
  95. //check if there is a word to which the strongs info belongs to.
  96. //If yes, wrap that word with the strongs info
  97. //If not, leave out the strongs info, because it can't be tight to a text
  98. //Comparing the first char with < is not enough, because the tokenReplace is done already
  99. //so there might be html tags already.
  100. const bool textPresent = (e.stripWhiteSpace().remove(TQRegExp("[.,;:]")).left(2) != "<W");
  101. if (!textPresent) {
  102. result += (*it);
  103. continue;
  104. }
  105. int pos = tag.search(e, 0); //try to find a strong number marker
  106. bool insertedTag = false;
  107. bool hasLemmaAttr = false;
  108. bool hasMorphAttr = false;
  109. TQString value = TQString::null;
  110. int tagAttributeStart = -1;
  111. while (pos != -1) { //work on all strong/lemma tags in this section, should be between 1-3 loops
  112. const bool isMorph = (tag.cap(1) == "T");
  113. value = isMorph ? tag.cap(2) : tag.cap(2).prepend( tag.cap(1) );
  114. if (value.isEmpty()) {
  115. break;
  116. }
  117. //insert the span
  118. if (!insertedTag) { //we have to insert a new tag end and beginning, i.e. our first loop
  119. e.replace(pos, tag.matchedLength(), "</span>");
  120. pos += 7;
  121. //skip blanks, commas, dots and stuff at the beginning, it doesn't belong to the morph code
  122. TQString rep("<span ");
  123. rep.append(isMorph ? "morph" : "lemma").append("=\"").append(value).append("\">");
  124. hasMorphAttr = isMorph;
  125. hasLemmaAttr = !isMorph;
  126. int startPos = 0;
  127. TQChar c = e[startPos];
  128. while ((startPos < pos) && (c.isSpace() || c.isPunct())) {
  129. ++startPos;
  130. c = e[startPos];
  131. }
  132. e.insert( startPos, rep );
  133. tagAttributeStart = startPos + 6; //to point to the start of the attributes
  134. pos += rep.length();
  135. }
  136. else { //add the attribute to the existing tag
  137. e.remove(pos, tag.matchedLength());
  138. if (tagAttributeStart == -1) {
  139. continue; //nothing valid found
  140. }
  141. if ((!isMorph && hasLemmaAttr) || (isMorph && hasMorphAttr)) { //we append another attribute value, e.g. 3000 gets 3000|5000
  142. //search the existing attribute start
  143. TQRegExp attrRegExp( isMorph ? "morph=\".+(?=\")" : "lemma=\".+(?=\")" );
  144. attrRegExp.setMinimal(true);
  145. const int foundPos = e.find(attrRegExp, tagAttributeStart);
  146. if (foundPos != -1) {
  147. e.insert(foundPos + attrRegExp.matchedLength(), TQString("|").append(value));
  148. pos += value.length() + 1;
  149. hasLemmaAttr = !isMorph;
  150. hasMorphAttr = isMorph;
  151. }
  152. }
  153. else { //attribute was not yet inserted
  154. TQString attr;
  155. attr.setLatin1(isMorph ? "morph" : "lemma").append("=\"").append(value).append("\" ");
  156. e.insert(tagAttributeStart, attr);
  157. pos += attr.length();
  158. hasMorphAttr = isMorph;
  159. hasLemmaAttr = !isMorph;
  160. }
  161. //tagAttributeStart remains the same
  162. }
  163. insertedTag = true;
  164. pos = tag.search(e, pos);
  165. }
  166. result += e;
  167. }
  168. if (list.count()) {
  169. buf = (const char*)result.utf8();
  170. }
  171. return 1;
  172. }
  173. bool BT_GBFHTML::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) {
  174. if (!substituteToken(buf, token)) { //more than a simple replace
  175. const unsigned int tokenLength = strlen(token);
  176. unsigned long i;
  177. sword::SWBuf value;
  178. BT_UserData* myUserData = dynamic_cast<BT_UserData*>(userData);
  179. sword::SWModule* myModule = const_cast<sword::SWModule*>(myUserData->module); //hack to be able to call stuff like Lang()
  180. if ( !strncmp(token, "WG", 2)
  181. || !strncmp(token, "WH", 2)
  182. || !strncmp(token, "WT", 2) ) {
  183. buf.append('<');
  184. buf.append(token);
  185. buf.append('>');
  186. }
  187. else if (!strncmp(token, "RB", 2)) {
  188. myUserData->hasFootnotePreTag = true;
  189. buf.append("<span class=\"footnotepre\">");
  190. }
  191. else if (!strncmp(token, "RF", 2)) {
  192. //we use several append calls because appendFormatted slows down filtering, which should be fast
  193. if (myUserData->hasFootnotePreTag) {
  194. // qWarning("inserted footnotepre end");
  195. buf.append("</span>");
  196. myUserData->hasFootnotePreTag = false;
  197. }
  198. buf.append(" <span class=\"footnote\" note=\"");
  199. buf.append(myModule->Name());
  200. buf.append('/');
  201. buf.append(myUserData->key->getShortText());
  202. buf.append('/');
  203. buf.append( TQString::number(myUserData->swordFootnote++).latin1() );
  204. buf.append("\">*</span> ");
  205. userData->suspendTextPassThru = true;
  206. }
  207. else if (!strncmp(token, "Rf", 2)) { //end of footnote
  208. userData->suspendTextPassThru = false;
  209. }
  210. else if (!strncmp(token, "FN", 2)) { //the end </font> tag is inserted in addTokenSubsitute
  211. buf.append("<font face=\"");
  212. for (i = 2; i < tokenLength; i++) {
  213. if(token[i] != '\"') {
  214. buf.append( token[i] );
  215. }
  216. }
  217. buf.append("\">");
  218. }
  219. else if (!strncmp(token, "CA", 2)) { // ASCII value
  220. buf.append( (char)atoi(&token[2]) );
  221. }
  222. else {
  223. return GBFHTML::handleToken(buf, token, userData);
  224. }
  225. }
  226. return true;
  227. }