/********* * * This file is part of BibleTime's source code, http://www.bibletime.info/. * * Copyright 1999-2006 by the BibleTime developers. * The BibleTime source code is licensed under the GNU General Public License version 2.0. * **********/ //BibleTime includes #include "backend/bt_thmlhtml.h" #include "backend/clanguagemgr.h" #include "backend/cswordmoduleinfo.h" #include "backend/creferencemanager.h" #include "frontend/cbtconfig.h" #include "util/cpointers.h" #include "util/scoped_resource.h" #include //Sword includes #include #include #include "versekey.h" //Qt includes #include #include //System includes #include using namespace Filters; BT_ThMLHTML::BT_ThMLHTML() { setEscapeStringCaseSensitive(true); setPassThruUnknownEscapeString(true); //the HTML widget will render the HTML escape codes setTokenStart("<"); setTokenEnd(">"); setTokenCaseSensitive(true); addTokenSubstitute("/foreign", ""); removeTokenSubstitute("note"); removeTokenSubstitute("/note"); } char BT_ThMLHTML::processText(sword::SWBuf& buf, const sword::SWKey* key, const sword::SWModule* module) { ThMLHTML::processText(buf, key, module); CSwordModuleInfo* m = CPointers::backend()->findModuleByName( module->Name() ); if (m && !(m->has(CSwordModuleInfo::lemmas) || m->has(CSwordModuleInfo::strongNumbers))) { //only parse if the module has strongs or lemmas return 1; } TQString result; TQString t = TQString::fromUtf8(buf.c_str()); TQRegExp tag("([.,;]?]+(type|value)=\"([^\"]+)\"[^>]+(type|value)=\"([^\"]+)\"([^<]*)>)+"); TQStringList list; int lastMatchEnd = 0; int pos = tag.search(t,0); if (pos == -1) { //no strong or morph code found in this text return 1; //WARNING: Return alread here } while (pos != -1) { list.append(t.mid(lastMatchEnd, pos+tag.matchedLength()-lastMatchEnd)); lastMatchEnd = pos+tag.matchedLength(); pos = tag.search(t,pos+tag.matchedLength()); } if (!t.right(t.length() - lastMatchEnd).isEmpty()) { list.append(t.right(t.length() - lastMatchEnd)); } tag = TQRegExp("]+(type|value|class)=\"([^\"]+)\"[^>]+(type|value|class)=\"([^\"]+)\"[^>]+((type|value|class)=\"([^\"]+)\")*([^<]*)>"); for (TQStringList::iterator it = list.begin(); it != list.end(); ++it) { TQString e( *it ); const bool textPresent = (e.stripWhiteSpace().remove(TQRegExp("[.,;:]")).left(1) != "<"); if (!textPresent) { continue; } bool hasLemmaAttr = false; bool hasMorphAttr = false; int pos = tag.search(e, 0); bool insertedTag = false; TQString value; TQString valueClass; while (pos != -1) { bool isMorph = false; bool isStrongs = false; value = TQString::null; valueClass = TQString::null; // check 3 attribute/value pairs for (int i = 1; i < 6; i += 2) { if (i > 4) i++; if (tag.cap(i) == "type") { isMorph = (tag.cap(i+1) == "morph"); isStrongs = (tag.cap(i+1) == "Strongs"); } else if (tag.cap(i) == "value") { value = tag.cap(i+1); } else if (tag.cap(i) == "class") { valueClass = tag.cap(i+1); } } // prepend the class qualifier to the value if (!valueClass.isEmpty()) { value = valueClass + ":" + value; // value.append(":").append(value); } if (value.isEmpty()) { break; } //insert the span if (!insertedTag) { e.replace(pos, tag.matchedLength(), ""); pos += 7; TQString rep; rep.setLatin1(""); int startPos = 0; TQChar c = e[startPos]; while ((startPos < pos) && (c.isSpace() || c.isPunct())) { ++startPos; c = e[startPos]; } hasLemmaAttr = isStrongs; hasMorphAttr = isMorph; e.insert( startPos, rep ); pos += rep.length(); } else { //add the attribute to the existing tag e.remove(pos, tag.matchedLength()); if ((!isMorph && hasLemmaAttr) || (isMorph && hasMorphAttr)) { //we append another attribute value, e.g. 3000 gets 3000|5000 //search the existing attribute start TQRegExp attrRegExp( isMorph ? "morph=\".+(?=\")" : "lemma=\".+(?=\")" ); attrRegExp.setMinimal(true); const int foundAttrPos = e.find(attrRegExp, pos); if (foundAttrPos != -1) { e.insert(foundAttrPos + attrRegExp.matchedLength(), TQString("|").append(value)); pos += value.length() + 1; hasLemmaAttr = !isMorph; hasMorphAttr = isMorph; } } else { //attribute was not yet inserted const int attrPos = e.find(TQRegExp("morph=|lemma="), 0); if (attrPos >= 0) { TQString attr; attr.append(isMorph ? "morph" : "lemma").append("=\"").append(value).append("\" "); e.insert(attrPos, attr); hasMorphAttr = isMorph; hasLemmaAttr = !isMorph; pos += attr.length(); } } } insertedTag = true; pos = tag.search(e, pos); } result.append( e ); } if (list.count()) { buf = (const char*)result.utf8(); } return 1; } bool BT_ThMLHTML::handleToken(sword::SWBuf &buf, const char *token, sword::BasicFilterUserData *userData) { if (!substituteToken(buf, token) && !substituteEscapeString(buf, token)) { sword::XMLTag tag(token); BT_UserData* myUserData = dynamic_cast(userData); sword::SWModule* myModule = const_cast(myUserData->module); //hack to be able to call stuff like Lang() if ( tag.getName() && !strcasecmp(tag.getName(), "foreign") ) { // a text part in another language, we have to set the right font if (tag.getAttribute("lang")) { const char* abbrev = tag.getAttribute("lang"); //const CLanguageMgr::Language* const language = CPointers::languageMgr()->languageForAbbrev( TQString::fromLatin1(abbrev) ); buf.append(""); } } else if (tag.getName() && !strcasecmp(tag.getName(), "sync")) { //lemmas, morph codes or strongs if (tag.getAttribute("type") && (!strcasecmp(tag.getAttribute("type"), "morph") || !strcasecmp(tag.getAttribute("type"), "Strongs") || !strcasecmp(tag.getAttribute("type"), "lemma"))) { // Morph or Strong buf.append('<'); buf.append(token); buf.append('>'); } } else if (tag.getName() && !strcasecmp(tag.getName(), "note")) { // tag if (!tag.isEndTag() && !tag.isEmpty()) { //appending is faster than appendFormatted buf.append(" Name()); buf.append('/'); buf.append(myUserData->key->getShortText()); buf.append('/'); buf.append( TQString::number(myUserData->swordFootnote++).latin1() ); buf.append("\">* "); myUserData->suspendTextPassThru = true; myUserData->inFootnoteTag = true; } else if (tag.isEndTag() && !tag.isEmpty()) { //end tag //buf += ")"; myUserData->suspendTextPassThru = false; myUserData->inFootnoteTag = false; } } else if (tag.getName() && !strcasecmp(tag.getName(), "scripRef")) { // a scripRef //scrip refs which are embeded in footnotes may not be displayed! if (!myUserData->inFootnoteTag) { if (tag.isEndTag()) { if (myUserData->inscriptRef) { // like "See John 3:16" buf.append(""); myUserData->inscriptRef = false; myUserData->suspendTextPassThru = false; } else { // like "John 3:16" CSwordModuleInfo* mod = CBTConfig::get(CBTConfig::standardBible); Q_ASSERT(mod); if (mod) { CReferenceManager::ParseOptions options; options.refBase = TQString::fromUtf8(myUserData->key->getText()); //current module key options.refDestinationModule = TQString(mod->name()); options.sourceLanguage = TQString(myModule->Lang()); options.destinationLanguage = TQString("en"); //it's ok to split the reference, because to descriptive text is given bool insertSemicolon = false; buf.append(""); TQStringList refs = TQStringList::split(";", TQString::fromUtf8(myUserData->lastTextNode.c_str())); TQString oldRef; //the previous reference to use as a base for the next refs for (TQStringList::iterator it(refs.begin()); it != refs.end(); ++it) { if (! oldRef.isEmpty() ){ options.refBase = oldRef; //use the last ref as a base, e.g. Rom 1,2-3, when the next ref is only 3:3-10 } const TQString completeRef( CReferenceManager::parseVerseReference((*it), options) ); oldRef = completeRef; //use the parsed result as the base for the next ref. if (insertSemicolon) { //prepend a ref divider if we're after the first one buf.append("; "); } buf.append("name(), completeRef, CReferenceManager::typeFromModule(mod->type()) ).utf8() ); buf.append("\" crossrefs=\""); buf.append((const char*)completeRef.utf8()); buf.append("\">"); buf.append((const char*)(*it).utf8()); buf.append(""); insertSemicolon = true; } buf.append(""); //crossref end } myUserData->suspendTextPassThru = false; } } else if (tag.getAttribute("passage") ) { //the passage was given as a parameter value myUserData->inscriptRef = true; myUserData->suspendTextPassThru = false; const char* ref = tag.getAttribute("passage"); Q_ASSERT(ref); CSwordModuleInfo* mod = CBTConfig::get(CBTConfig::standardBible); Q_ASSERT(mod); CReferenceManager::ParseOptions options; options.refBase = TQString::fromUtf8(myUserData->key->getText()); options.refDestinationModule = TQString(mod->name()); options.sourceLanguage = myModule->Lang(); options.destinationLanguage = TQString("en"); const TQString completeRef = CReferenceManager::parseVerseReference(TQString::fromUtf8(ref), options); if (mod) { buf.append(""); buf.append("name(), completeRef, CReferenceManager::typeFromModule(mod->type()) ).utf8() ); buf.append("\" crossrefs=\""); buf.append((const char*)completeRef.utf8()); buf.append("\">"); } else { buf.append(""); } } else if ( !tag.getAttribute("passage") ) { // we're starting a scripRef like "John 3:16" myUserData->inscriptRef = false; // let's stop text from going to output, the text get's added in the -tag handler myUserData->suspendTextPassThru = true; } } } else if (tag.getName() && !strcasecmp(tag.getName(), "div")) { if (tag.isEndTag()) { buf.append(""); } else if ( tag.getAttribute("class") && !strcasecmp(tag.getAttribute("class"),"sechead") ) { buf.append("

"); } else if (tag.getAttribute("class") && !strcasecmp(tag.getAttribute("class"), "title")) { buf.append("

"); } } else if (tag.getName() && !strcasecmp(tag.getName(), "img") && tag.getAttribute("src")) { const char* value = tag.getAttribute("src"); if (value[0] == '/') { value++; //strip the first / } buf.append("

module->getConfigEntry("AbsoluteDataPath")); buf.append('/'); buf.append(value); buf.append("\" />"); } else { // let unknown token pass thru return sword::ThMLHTML::handleToken(buf, token, userData); } } return true; }