Bibletime – a bible study tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bt_osishtml.cpp 18KB


  1. /*********
  2. *
  3. * This file is part of BibleTime's source code, http://www.bibletime.info/.
  4. *
  5. * Copyright 1999-2006 by the BibleTime developers.
  6. * The BibleTime source code is licensed under the GNU General Public License version 2.0.
  7. *
  8. **********/
  9. //BibleTime includes
  10. #include <stdlib.h>
  11. #include "bt_osishtml.h"
  12. #include "versekey.h"
  13. #include "clanguagemgr.h"
  14. #include "creferencemanager.h"
  15. #include "cswordmoduleinfo.h"
  16. #include "frontend/cbtconfig.h"
  17. #include "util/cpointers.h"
  18. #include <iostream>
  19. //Sword includes
  20. #include <swmodule.h>
  21. #include <swbuf.h>
  22. #include <utilxml.h>
  23. //KDE includes
  24. #include <klocale.h>
  25. //Qt includes
  26. #include <tqstring.h>
  27. using sword::SWBuf;
  28. using sword::XMLTag;
  29. using namespace Filters;
  30. BT_OSISHTML::BT_OSISHTML() : sword::OSISHTMLHREF() {
  31. setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes
  32. addTokenSubstitute("inscription", "<span class=\"inscription\">");
  33. addTokenSubstitute("/inscription","</span>");
  34. addTokenSubstitute("mentioned", "<span class=\"mentioned\">");
  35. addTokenSubstitute("/mentioned", "</span>");
  36. // addTokenSubstitute("divineName", "<span class=\"name\"><span class=\"divine\">");
  37. // addTokenSubstitute("/divineName", "</span></span>");
  38. //TODO Move that down to the real tag handling, segs without the type morph would generate incorrect markup, as the end span is always inserted
  39. // addTokenSubstitute("seg type=\"morph\"", "<span class=\"morphSegmentation\">");
  40. // addTokenSubstitute("/seg", "</span>");
  41. // OSIS tables
  42. addTokenSubstitute("table", "<table>");
  43. addTokenSubstitute("/table", "</table>");
  44. addTokenSubstitute("row", "<tr>");
  45. addTokenSubstitute("/row", "</tr>");
  46. addTokenSubstitute("cell", "<td>");
  47. addTokenSubstitute("/cell", "</td>");
  48. }
  49. bool BT_OSISHTML::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) {
  50. // manually process if it wasn't a simple substitution
  51. if (!substituteToken(buf, token)) {
  52. BT_UserData* myUserData = dynamic_cast<BT_UserData*>(userData);
  53. sword::SWModule* myModule = const_cast<sword::SWModule*>(myUserData->module); //hack
  54. XMLTag tag(token);
  55. // qWarning("found %s", token);
  56. const bool osisQToTick = ((!userData->module->getConfigEntry("OSISqToTick")) || (strcmp(userData->module->getConfigEntry("OSISqToTick"), "false")));
  57. if (!tag.getName()) {
  58. return false;
  59. }
  60. // <div> tag
  61. if (!strcmp(tag.getName(), "div")) {
  62. //handle intro
  63. if ((!tag.isEmpty()) && (!tag.isEndTag())) { //start tag
  64. SWBuf type( tag.getAttribute("type") );
  65. if (type == "introduction") {
  66. buf.append("<div class=\"introduction\">");
  67. }
  68. else if (type == "chapter") {
  69. buf.append("<div class=\"chapter\" />"); //don't open a div here, that would lead to a broken XML structure
  70. }
  71. else {
  72. buf.append("<div>");
  73. }
  74. }
  75. else if (tag.isEndTag()) { //end tag
  76. buf.append("</div>");
  77. }
  78. }
  79. else if (!strcmp(tag.getName(), "w")) {
  80. if ((!tag.isEmpty()) && (!tag.isEndTag())) { //start tag
  81. const char *attrib;
  82. const char *val;
  83. XMLTag outTag("span");
  84. SWBuf attrValue;
  85. if ((attrib = tag.getAttribute("xlit"))) {
  86. val = strchr(attrib, ':');
  87. val = (val) ? (val + 1) : attrib;
  88. outTag.setAttribute("xlit", val);
  89. }
  90. if ((attrib = tag.getAttribute("gloss"))) {
  91. val = strchr(attrib, ':');
  92. val = (val) ? (val + 1) : attrib;
  93. outTag.setAttribute("gloss", val);
  94. }
  95. if ((attrib = tag.getAttribute("lemma"))) {
  96. char splitChar = '|';
  97. const int countSplit1 = tag.getAttributePartCount("lemma", '|');
  98. const int countSplit2 = tag.getAttributePartCount("lemma", ' '); //TODO: not allowed, remove soon
  99. int count = 0;
  100. if (countSplit1 > countSplit2) { //| split char
  101. splitChar = '|'; //TODO: not allowed, remove soon
  102. count = countSplit1;
  103. }
  104. else {
  105. splitChar = ' ';
  106. count = countSplit2;
  107. }
  108. int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
  109. attrValue = "";
  110. do {
  111. if (attrValue.length()) {
  112. attrValue.append( '|' );
  113. }
  114. attrib = tag.getAttribute("lemma", i, splitChar);
  115. if (i < 0) { // to handle our -1 condition
  116. i = 0;
  117. }
  118. val = strchr(attrib, ':');
  119. val = (val) ? (val + 1) : attrib;
  120. attrValue.append(val);
  121. }
  122. while (++i < count);
  123. if (attrValue.length()) {
  124. outTag.setAttribute("lemma", attrValue.c_str());
  125. }
  126. }
  127. if ((attrib = tag.getAttribute("morph"))) {
  128. char splitChar = '|';
  129. const int countSplit1 = tag.getAttributePartCount("morph", '|');
  130. const int countSplit2 = tag.getAttributePartCount("morph", ' '); //TODO: not allowed, remove soon
  131. int count = 0;
  132. if (countSplit1 > countSplit2) { //| split char
  133. splitChar = '|';
  134. count = countSplit1;
  135. }
  136. else {
  137. splitChar = ' ';
  138. count = countSplit2;
  139. }
  140. int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
  141. attrValue = "";
  142. do {
  143. if (attrValue.length()) {
  144. attrValue.append('|');
  145. }
  146. attrib = tag.getAttribute("morph", i, splitChar);
  147. if (i < 0) {
  148. i = 0; // to handle our -1 condition
  149. }
  150. val = strchr(attrib, ':');
  151. if (val) { //the prefix gives the modulename
  152. //check the prefix
  153. if (!strncmp("robinson:", attrib, 9)) { //robinson
  154. attrValue.append( "Robinson:" ); //work is not the same as Sword's module name
  155. attrValue.append( val+1 );
  156. }
  157. //strongs is handled by BibleTime
  158. /*else if (!strncmp("strongs", attrib, val-atrrib)) {
  159. attrValue.append( !strncmp(attrib, "x-", 2) ? attrib+2 : attrib );
  160. }*/
  161. else {
  162. attrValue.append( !strncmp(attrib, "x-", 2) ? attrib+2 : attrib );
  163. }
  164. }
  165. else { //no prefix given
  166. const bool skipFirst = ((val[0] == 'T') && ((val[1] == 'H') || (val[1] == 'H')));
  167. attrValue.append( skipFirst ? val+1 : val );
  168. }
  169. }
  170. while (++i < count);
  171. if (attrValue.length()) {
  172. outTag.setAttribute("morph", attrValue.c_str());
  173. }
  174. }
  175. if ((attrib = tag.getAttribute("POS"))) {
  176. val = strchr(attrib, ':');
  177. val = (val) ? (val + 1) : attrib;
  178. outTag.setAttribute("pos", val);
  179. }
  180. buf.append( outTag.toString() );
  181. }
  182. else if (tag.isEndTag()) { // end or empty <w> tag
  183. buf.append("</span>");
  184. }
  185. }
  186. // <note> tag
  187. else if (!strcmp(tag.getName(), "note")) {
  188. if (!tag.isEndTag()) { //start tag
  189. const SWBuf type( tag.getAttribute("type") );
  190. if (type == "crossReference") { //note containing cross references
  191. myUserData->inCrossrefNote = true;
  192. myUserData->noteType = BT_UserData::CrossReference;
  193. myUserData->swordFootnote++; // cross refs count as notes, too
  194. /* //get the refList value of the right entry attribute
  195. AttributeList notes = myModule->getEntryAttributes()["Footnote"];
  196. bool foundNote = false;
  197. SWBuf id( tag.getAttribute("osisID") );
  198. SWBuf refList;
  199. for (AttributeList::iterator list_it = notes.begin(); (list_it != notes.end()) && !foundNote; ++list_it ) {
  200. for (AttributeValue::iterator val_it = list_it->second.begin(); (val_it != list_it->second.end()) && !foundNote; ++val_it ) {
  201. if ((val_it->first == "osisID") && (val_it->second == id)) {
  202. foundNote = true; //this break the loop
  203. refList = list_it->second["refList"];
  204. }
  205. }
  206. }
  207. if (refList.length()) {
  208. buf.append(" <span class=\"crossreference\" crossrefs=\"");
  209. buf.append(refList.c_str());
  210. buf.append("\"> ");
  211. myUserData->noteType = BT_UserData::CrossReference;
  212. }
  213. else {
  214. myUserData->noteType = BT_UserData::Unknown;
  215. }*/
  216. buf.append("<span class=\"crossreference\">");
  217. }
  218. /* else if (type == "explanation") {
  219. }
  220. */
  221. else if ((type == "strongsMarkup") || (type == "x-strongsMarkup")) {
  222. /**
  223. * leave strong's markup notes out, in the future we'll probably have
  224. * different option filters to turn different note types on or off
  225. */
  226. myUserData->suspendTextPassThru = true;
  227. myUserData->noteType = BT_UserData::StrongsMarkup;
  228. }
  229. else if (type == "alternative") {
  230. // qWarning("found alternative");
  231. // buf.append(" <span class=\"alternative\">");
  232. myUserData->noteType = BT_UserData::Alternative;
  233. myUserData->suspendTextPassThru = true;
  234. myUserData->swordFootnote++; // count as notes, too
  235. }
  236. else {
  237. // qWarning("found note in %s", myUserData->key->getShortText());
  238. buf.append(" <span class=\"footnote\" note=\"");
  239. buf.append(myModule->Name());
  240. buf.append('/');
  241. buf.append(myUserData->key->getShortText());
  242. buf.append('/');
  243. buf.append( TQString::number(myUserData->swordFootnote++).latin1() ); //inefficient
  244. const SWBuf n = tag.getAttribute("n");
  245. buf.append("\">");
  246. buf.append( (n.length() > 0) ? n.c_str() : "*" );
  247. buf.append("</span> ");
  248. myUserData->noteType = BT_UserData::Footnote;
  249. myUserData->suspendTextPassThru = true;
  250. }
  251. }
  252. else { //if (tag.isEndTag()) {
  253. Q_ASSERT(myUserData->noteType != BT_UserData::Unknown);
  254. if (myUserData->noteType == BT_UserData::CrossReference) {
  255. buf.append("</span> ");
  256. // myUserData->suspendTextPassThru = false;
  257. myUserData->inCrossrefNote = false;
  258. }
  259. else if (myUserData->noteType == BT_UserData::Alternative) {
  260. buf.append(" <span class=\"alternative\" alternative=\"");
  261. buf.append(myUserData->lastTextNode);
  262. buf.append("\" title=\"");
  263. buf.append((const char*)i18n("Alternative text").utf8());
  264. buf.append("\" />");
  265. }
  266. myUserData->noteType = BT_UserData::Unknown;
  267. myUserData->suspendTextPassThru = false;
  268. }
  269. }
  270. // The <p> paragraph tag is handled by OSISHTMLHref
  271. else if (!strcmp(tag.getName(), "reference")) { // <reference> tag
  272. if (!tag.isEndTag() && !tag.isEmpty()) {
  273. TQString ref( tag.getAttribute("osisRef") );
  274. TQString hrefRef( ref );
  275. Q_ASSERT(!ref.isEmpty());
  276. if (!ref.isEmpty()) {
  277. //find out the mod, using the current module makes sense if it's a bible or commentary because the refs link into a bible by default.
  278. //If the osisRef is something like "ModuleID:key comes here" then the
  279. // modulename is given, so we'll use that one
  280. CSwordModuleInfo* mod = CPointers::backend()->findSwordModuleByPointer(myModule);
  281. Q_ASSERT(mod);
  282. if (!mod || (mod->type() != CSwordModuleInfo::Bible
  283. && mod->type() != CSwordModuleInfo::Commentary)) {
  284. mod = CBTConfig::get( CBTConfig::standardBible );
  285. }
  286. Q_ASSERT(mod);
  287. //if the osisRef like "GerLut:key" contains a module, use that
  288. int pos = ref.find(":");
  289. if ((pos >= 0) && ref.at(pos-1).isLetter() && ref.at(pos+1).isLetter()) {
  290. TQString newModuleName = ref.left(pos);
  291. hrefRef = ref.mid(pos+1);
  292. if (CPointers::backend()->findModuleByName(newModuleName)) {
  293. mod = CPointers::backend()->findModuleByName(newModuleName);
  294. }
  295. }
  296. CReferenceManager::ParseOptions options;
  297. options.refBase = TQString::fromUtf8(myUserData->key->getText());
  298. options.refDestinationModule = TQString(mod->name());
  299. options.sourceLanguage = TQString(myModule->Lang());
  300. options.destinationLanguage = TQString("en");
  301. buf.append("<a href=\"");
  302. buf.append( //create the hyperlink with key and mod
  303. CReferenceManager::encodeHyperlink(
  304. mod->name(),
  305. CReferenceManager::parseVerseReference(hrefRef, options),
  306. CReferenceManager::typeFromModule(mod->type())
  307. ).utf8()
  308. );
  309. buf.append("\" crossrefs=\"");
  310. buf.append((const char*)CReferenceManager::parseVerseReference(ref, options).utf8()); //ref must contain the osisRef module marker if there was any
  311. buf.append("\">");
  312. }
  313. }
  314. else if (tag.isEndTag()) {
  315. buf.append("</a>");
  316. }
  317. else { // empty reference marker
  318. // -- what should we do? nothing for now.
  319. }
  320. }
  321. // <l> is handled by OSISHTMLHref
  322. // <title>
  323. else if (!strcmp(tag.getName(), "title")) {
  324. if (!tag.isEndTag() && !tag.isEmpty()) {
  325. buf.append("<div class=\"sectiontitle\">");
  326. }
  327. else if (tag.isEndTag()) {
  328. buf.append("</div>");
  329. }
  330. else { // empty title marker
  331. // what to do? is this even valid?
  332. buf.append("<br/>");
  333. }
  334. }
  335. // <hi> highlighted text
  336. else if (!strcmp(tag.getName(), "hi")) {
  337. const SWBuf type = tag.getAttribute("type");
  338. if ((!tag.isEndTag()) && (!tag.isEmpty())) {
  339. if (type == "bold") {
  340. buf.append("<span class=\"bold\">");
  341. }
  342. else if (type == "illuminated") {
  343. buf.append("<span class=\"illuminated\">");
  344. }
  345. else if (type == "italic") {
  346. buf.append("<span class=\"italic\">");
  347. }
  348. else if (type == "line-through") {
  349. buf.append("<span class=\"line-through\">");
  350. }
  351. else if (type == "normal") {
  352. buf.append("<span class=\"normal\">");
  353. }
  354. else if (type == "small-caps") {
  355. buf.append("<span class=\"small-caps\">");
  356. }
  357. else if (type == "underline") {
  358. buf.append("<span class=\"underline\">");
  359. }
  360. else {
  361. buf.append("<span>"); //don't break markup, </span> is inserted later
  362. }
  363. }
  364. else if (tag.isEndTag()) { //all hi replacements are html spans
  365. buf.append("</span>");
  366. }
  367. }
  368. //name
  369. else if (!strcmp(tag.getName(), "name")) {
  370. const SWBuf type = tag.getAttribute("type");
  371. if ((!tag.isEndTag()) && (!tag.isEmpty())) {
  372. if (type == "geographic") {
  373. buf.append("<span class=\"name\"><span class=\"geographic\">");
  374. }
  375. else if (type == "holiday") {
  376. buf.append("<span class=\"name\"><span class=\"holiday\">");
  377. }
  378. else if (type == "nonhuman") {
  379. buf.append("<span class=\"name\"><span class=\"nonhuman\">");
  380. }
  381. else if (type == "person") {
  382. buf.append("<span class=\"name\"><span class=\"person\">");
  383. }
  384. else if (type == "ritual") {
  385. buf.append("<span class=\"name\"><span class=\"ritual\">");
  386. }
  387. else {
  388. buf.append("<span class=\"name\"><span>");
  389. }
  390. }
  391. else if (tag.isEndTag()) { //all hi replacements are html spans
  392. buf.append("</span></span> ");
  393. }
  394. }
  395. else if (!strcmp(tag.getName(), "transChange")) {
  396. SWBuf type( tag.getAttribute("type") );
  397. if ( !type.length() ) {
  398. type = tag.getAttribute("changeType");
  399. }
  400. if ((!tag.isEndTag()) && (!tag.isEmpty())) {
  401. if (type == "added") {
  402. buf.append("<span class=\"transchange\" title=\"");
  403. buf.append((const char*)i18n("Added text").utf8());
  404. buf.append("\"><span class=\"added\">");
  405. }
  406. else if (type == "amplified") {
  407. buf.append("<span class=\"transchange\"><span class=\"amplified\">");
  408. }
  409. else if (type == "changed") {
  410. buf.append("<span class=\"transchange\"><span class=\"changed\">");
  411. }
  412. else if (type == "deleted") {
  413. buf.append("<span class=\"transchange\"><span class=\"deleted\">");
  414. }
  415. else if (type == "moved") {
  416. buf.append("<span class=\"transchange\"><span class=\"moved\">");
  417. }
  418. else if (type == "tenseChange") {
  419. buf.append("<span class=\"transchange\"><span class=\"tenseChange\">");
  420. }
  421. else {
  422. buf.append("<span class=\"transchange\"><span>");
  423. }
  424. }
  425. else if (tag.isEndTag()) { //all hi replacements are html spans
  426. buf.append("</span></span>");
  427. }
  428. }
  429. else if (!strcmp(tag.getName(), "p")) {
  430. if (tag.isEmpty()) {
  431. buf.append("<p/>");
  432. }
  433. }
  434. // <q> quote
  435. else if (!strcmp(tag.getName(), "q")) {
  436. SWBuf type = tag.getAttribute("type");
  437. SWBuf who = tag.getAttribute("who");
  438. const char *lev = tag.getAttribute("level");
  439. int level = (lev) ? atoi(lev) : 1;
  440. if ((!tag.isEndTag()) && (!tag.isEmpty())) {
  441. myUserData->quote.who = who;
  442. if(osisQToTick) //alternate " and '
  443. buf.append((level % 2) ? '\"' : '\'');
  444. if (who == "Jesus") {
  445. buf.append("<span class=\"jesuswords\">");
  446. }
  447. }
  448. else if (tag.isEndTag()) {
  449. if (myUserData->quote.who == "Jesus") {
  450. buf.append("</span>");
  451. }
  452. if (osisQToTick) { //alternate " and '
  453. buf.append((level % 2) ? '\"' : '\'');
  454. }
  455. myUserData->quote.who = "";
  456. }
  457. }
  458. // abbr tag
  459. else if (!strcmp(tag.getName(), "abbr")) {
  460. if (!tag.isEndTag() && !tag.isEmpty()) {
  461. const SWBuf expansion = tag.getAttribute("expansion");
  462. buf.append("<span class=\"abbreviation\" expansion=\"");
  463. buf.append(expansion);
  464. buf.append("\">");
  465. }
  466. else if (tag.isEndTag()) {
  467. buf.append("</span>");
  468. }
  469. }
  470. // <milestone> tag
  471. else if (!strcmp(tag.getName(), "milestone")) {
  472. const SWBuf type = tag.getAttribute("type");
  473. if ((type == "screen") || (type == "line")) {//line break
  474. buf.append("<br/>");
  475. userData->supressAdjacentWhitespace = true;
  476. }
  477. else if (type == "x-p") { //e.g. occurs in the KJV2006 module
  478. //buf.append("<br/>");
  479. const SWBuf marker = tag.getAttribute("marker");
  480. if (marker.length() > 0) {
  481. buf.append(marker);
  482. }
  483. }
  484. }
  485. //seg tag
  486. else if (!strcmp(tag.getName(), "seg")) {
  487. if (!tag.isEndTag() && !tag.isEmpty()) {
  488. const SWBuf type = tag.getAttribute("type");
  489. if (type == "morph") {//line break
  490. //This code is for WLC and MORPH (WHI)
  491. XMLTag outTag("span");
  492. outTag.setAttribute("class", "morphSegmentation");
  493. const char* attrValue;
  494. //Transfer the values to the span
  495. //Problem: the data is in hebrew/aramaic, how to encode in HTML/BibleTime?
  496. if ((attrValue = tag.getAttribute("lemma"))) outTag.setAttribute("lemma", attrValue);
  497. if ((attrValue = tag.getAttribute("morph"))) outTag.setAttribute("morph", attrValue);
  498. if ((attrValue = tag.getAttribute("homonym"))) outTag.setAttribute("homonym", attrValue);
  499. buf.append(outTag.toString());
  500. //buf.append("<span class=\"morphSegmentation\">");
  501. }
  502. else{
  503. buf.append("<span>");
  504. }
  505. }
  506. else { // seg end tag
  507. buf.append("</span>");
  508. }
  509. //qWarning(TQString("handled <seg> token. result: %1").arg(buf.c_str()).latin1());
  510. }
  511. //divine name, don't use simple tag replacing because it may have attributes
  512. else if (!strcmp(tag.getName(), "divineName")) {
  513. if (!tag.isEndTag()) {
  514. buf.append("<span class=\"name\"><span class=\"divine\">");
  515. }
  516. else { //all hi replacements are html spans
  517. buf.append("</span></span>");
  518. }
  519. }
  520. else { //all tokens handled by OSISHTMLHref will run through the filter now
  521. return sword::OSISHTMLHREF::handleToken(buf, token, userData);
  522. }
  523. }
  524. return false;
  525. }