/*********
*
* This file is part of BibleTime ' s source code , http : //www.bibletime.info/.
*
* Copyright 1999 - 2006 by the BibleTime developers .
* The BibleTime source code is licensed under the GNU General Public License version 2.0 .
*
* * * * * * * * * */
//BibleTime includes
# include "backend/bt_thmlhtml.h"
# include "backend/clanguagemgr.h"
# include "backend/cswordmoduleinfo.h"
# include "backend/creferencemanager.h"
# include "frontend/cbtconfig.h"
# include "util/cpointers.h"
# include "util/scoped_resource.h"
# include <iostream>
//Sword includes
# include <swmodule.h>
# include <utilxml.h>
# include "versekey.h"
//Qt includes
# include <tqstring.h>
# include <tqregexp.h>
//System includes
# include <stdlib.h>
using namespace Filters ;
BT_ThMLHTML : : BT_ThMLHTML ( ) {
setEscapeStringCaseSensitive ( true ) ;
setPassThruUnknownEscapeString ( true ) ; //the HTML widget will render the HTML escape codes
setTokenStart ( " < " ) ;
setTokenEnd ( " > " ) ;
setTokenCaseSensitive ( true ) ;
addTokenSubstitute ( " /foreign " , " </span> " ) ;
removeTokenSubstitute ( " note " ) ;
removeTokenSubstitute ( " /note " ) ;
}
char BT_ThMLHTML : : processText ( sword : : SWBuf & buf , const sword : : SWKey * key , const sword : : SWModule * module ) {
ThMLHTML : : processText ( buf , key , module ) ;
CSwordModuleInfo * m = CPointers : : backend ( ) - > findModuleByName ( module - > Name ( ) ) ;
if ( m & & ! ( m - > has ( CSwordModuleInfo : : lemmas ) | | m - > has ( CSwordModuleInfo : : strongNumbers ) ) ) { //only parse if the module has strongs or lemmas
return 1 ;
}
TQString result ;
TQString t = TQString : : fromUtf8 ( buf . c_str ( ) ) ;
TQRegExp tag ( " ([.,;]?<sync[^>]+(type|value)= \" ([^ \" ]+) \" [^>]+(type|value)= \" ([^ \" ]+) \" ([^<]*)>)+ " ) ;
TQStringList list ;
int lastMatchEnd = 0 ;
int pos = tag . search ( t , 0 ) ;
if ( pos = = - 1 ) { //no strong or morph code found in this text
return 1 ; //WARNING: Return alread here
}
while ( pos ! = - 1 ) {
list . append ( t . mid ( lastMatchEnd , pos + tag . matchedLength ( ) - lastMatchEnd ) ) ;
lastMatchEnd = pos + tag . matchedLength ( ) ;
pos = tag . search ( t , pos + tag . matchedLength ( ) ) ;
}
if ( ! t . right ( t . length ( ) - lastMatchEnd ) . isEmpty ( ) ) {
list . append ( t . right ( t . length ( ) - lastMatchEnd ) ) ;
}
tag = TQRegExp ( " <sync[^>]+(type|value|class)= \" ([^ \" ]+) \" [^>]+(type|value|class)= \" ([^ \" ]+) \" [^>]+((type|value|class)= \" ([^ \" ]+) \" )*([^<]*)> " ) ;
for ( TQStringList : : iterator it = list . begin ( ) ; it ! = list . end ( ) ; + + it ) {
TQString e ( * it ) ;
const bool textPresent = ( e . stripWhiteSpace ( ) . remove ( TQRegExp ( " [.,;:] " ) ) . left ( 1 ) ! = " < " ) ;
if ( ! textPresent ) {
continue ;
}
bool hasLemmaAttr = false ;
bool hasMorphAttr = false ;
int pos = tag . search ( e , 0 ) ;
bool insertedTag = false ;
TQString value ;
TQString valueClass ;
while ( pos ! = - 1 ) {
bool isMorph = false ;
bool isStrongs = false ;
value = TQString : : null ;
valueClass = TQString : : null ;
// check 3 attribute/value pairs
for ( int i = 1 ; i < 6 ; i + = 2 ) {
if ( i > 4 )
i + + ;
if ( tag . cap ( i ) = = " type " ) {
isMorph = ( tag . cap ( i + 1 ) = = " morph " ) ;
isStrongs = ( tag . cap ( i + 1 ) = = " Strongs " ) ;
}
else if ( tag . cap ( i ) = = " value " ) {
value = tag . cap ( i + 1 ) ;
}
else if ( tag . cap ( i ) = = " class " ) {
valueClass = tag . cap ( i + 1 ) ;
}
}
// prepend the class qualifier to the value
if ( ! valueClass . isEmpty ( ) ) {
value = valueClass + " : " + value ;
// value.append(":").append(value);
}
if ( value . isEmpty ( ) ) {
break ;
}
//insert the span
if ( ! insertedTag ) {
e . replace ( pos , tag . matchedLength ( ) , " </span> " ) ;
pos + = 7 ;
TQString rep ;
rep . setLatin1 ( " <span lemma= \" " ) . append ( value ) . append ( " \" > " ) ;
int startPos = 0 ;
TQChar c = e [ startPos ] ;
while ( ( startPos < pos ) & & ( c . isSpace ( ) | | c . isPunct ( ) ) ) {
+ + startPos ;
c = e [ startPos ] ;
}
hasLemmaAttr = isStrongs ;
hasMorphAttr = isMorph ;
e . insert ( startPos , rep ) ;
pos + = rep . length ( ) ;
}
else { //add the attribute to the existing tag
e . remove ( pos , tag . matchedLength ( ) ) ;
if ( ( ! isMorph & & hasLemmaAttr ) | | ( isMorph & & hasMorphAttr ) ) { //we append another attribute value, e.g. 3000 gets 3000|5000
//search the existing attribute start
TQRegExp attrRegExp ( isMorph ? " morph= \" .+(?= \" ) " : " lemma = \ " .+(?= \" ) " ) ;
attrRegExp . setMinimal ( true ) ;
const int foundAttrPos = e . find ( attrRegExp , pos ) ;
if ( foundAttrPos ! = - 1 ) {
e . insert ( foundAttrPos + attrRegExp . matchedLength ( ) , TQString ( " | " ) . append ( value ) ) ;
pos + = value . length ( ) + 1 ;
hasLemmaAttr = ! isMorph ;
hasMorphAttr = isMorph ;
}
}
else { //attribute was not yet inserted
const int attrPos = e . find ( TQRegExp ( " morph=|lemma= " ) , 0 ) ;
if ( attrPos > = 0 ) {
TQString attr ;
attr . append ( isMorph ? " morph " : " lemma " ) . append ( " = \" " ) . append ( value ) . append ( " \" " ) ;
e . insert ( attrPos , attr ) ;
hasMorphAttr = isMorph ;
hasLemmaAttr = ! isMorph ;
pos + = attr . length ( ) ;
}
}
}
insertedTag = true ;
pos = tag . search ( e , pos ) ;
}
result . append ( e ) ;
}
if ( list . count ( ) ) {
buf = ( const char * ) result . utf8 ( ) ;
}
return 1 ;
}
bool BT_ThMLHTML : : handleToken ( sword : : SWBuf & buf , const char * token , sword : : BasicFilterUserData * userData ) {
if ( ! substituteToken ( buf , token ) & & ! substituteEscapeString ( buf , token ) ) {
sword : : XMLTag tag ( token ) ;
BT_UserData * myUserData = dynamic_cast < BT_UserData * > ( userData ) ;
sword : : SWModule * myModule = const_cast < sword : : SWModule * > ( myUserData - > module ) ; //hack to be able to call stuff like Lang()
if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " foreign " ) ) { // a text part in another language, we have to set the right font
if ( tag . getAttribute ( " lang " ) ) {
const char * abbrev = tag . getAttribute ( " lang " ) ;
//const CLanguageMgr::Language* const language = CPointers::languageMgr()->languageForAbbrev( TQString::fromLatin1(abbrev) );
buf . append ( " <span class= \" foreign \" lang= \" " ) ;
buf . append ( abbrev ) ;
buf . append ( " \" > " ) ;
}
}
else if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " sync " ) ) { //lemmas, morph codes or strongs
if ( tag . getAttribute ( " type " ) & & ( ! strcasecmp ( tag . getAttribute ( " type " ) , " morph " ) | | ! strcasecmp ( tag . getAttribute ( " type " ) , " Strongs " ) | | ! strcasecmp ( tag . getAttribute ( " type " ) , " lemma " ) ) ) { // Morph or Strong
buf . append ( ' < ' ) ;
buf . append ( token ) ;
buf . append ( ' > ' ) ;
}
}
else if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " note " ) ) { // <note> tag
if ( ! tag . isEndTag ( ) & & ! tag . isEmpty ( ) ) {
//appending is faster than appendFormatted
buf . append ( " <span class= \" footnote \" note= \" " ) ;
buf . append ( myModule - > Name ( ) ) ;
buf . append ( ' / ' ) ;
buf . append ( myUserData - > key - > getShortText ( ) ) ;
buf . append ( ' / ' ) ;
buf . append ( TQString : : number ( myUserData - > swordFootnote + + ) . latin1 ( ) ) ;
buf . append ( " \" >*</span> " ) ;
myUserData - > suspendTextPassThru = true ;
myUserData - > inFootnoteTag = true ;
}
else if ( tag . isEndTag ( ) & & ! tag . isEmpty ( ) ) { //end tag
//buf += ")</span>";
myUserData - > suspendTextPassThru = false ;
myUserData - > inFootnoteTag = false ;
}
}
else if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " scripRef " ) ) { // a scripRef
//scrip refs which are embeded in footnotes may not be displayed!
if ( ! myUserData - > inFootnoteTag ) {
if ( tag . isEndTag ( ) ) {
if ( myUserData - > inscriptRef ) { // like "<scripRef passage="John 3:16">See John 3:16</scripRef>"
buf . append ( " </a></span> " ) ;
myUserData - > inscriptRef = false ;
myUserData - > suspendTextPassThru = false ;
}
else { // like "<scripRef>John 3:16</scripRef>"
CSwordModuleInfo * mod = CBTConfig : : get ( CBTConfig : : standardBible ) ;
Q_ASSERT ( mod ) ;
if ( mod ) {
CReferenceManager : : ParseOptions options ;
options . refBase = TQString : : fromUtf8 ( myUserData - > key - > getText ( ) ) ; //current module key
options . refDestinationModule = TQString ( mod - > name ( ) ) ;
options . sourceLanguage = TQString ( myModule - > Lang ( ) ) ;
options . destinationLanguage = TQString ( " en " ) ;
//it's ok to split the reference, because to descriptive text is given
bool insertSemicolon = false ;
buf . append ( " <span class= \" crossreference \" > " ) ;
TQStringList refs = TQStringList : : split ( " ; " , TQString : : fromUtf8 ( myUserData - > lastTextNode . c_str ( ) ) ) ;
TQString oldRef ; //the previous reference to use as a base for the next refs
for ( TQStringList : : iterator it ( refs . begin ( ) ) ; it ! = refs . end ( ) ; + + it ) {
if ( ! oldRef . isEmpty ( ) ) {
options . refBase = oldRef ; //use the last ref as a base, e.g. Rom 1,2-3, when the next ref is only 3:3-10
}
const TQString completeRef ( CReferenceManager : : parseVerseReference ( ( * it ) , options ) ) ;
oldRef = completeRef ; //use the parsed result as the base for the next ref.
if ( insertSemicolon ) { //prepend a ref divider if we're after the first one
buf . append ( " ; " ) ;
}
buf . append ( " <a href= \" " ) ;
buf . append (
CReferenceManager : : encodeHyperlink (
mod - > name ( ) ,
completeRef ,
CReferenceManager : : typeFromModule ( mod - > type ( ) )
) . utf8 ( )
) ;
buf . append ( " \" crossrefs= \" " ) ;
buf . append ( ( const char * ) completeRef . utf8 ( ) ) ;
buf . append ( " \" > " ) ;
buf . append ( ( const char * ) ( * it ) . utf8 ( ) ) ;
buf . append ( " </a> " ) ;
insertSemicolon = true ;
}
buf . append ( " </span> " ) ; //crossref end
}
myUserData - > suspendTextPassThru = false ;
}
}
else if ( tag . getAttribute ( " passage " ) ) { //the passage was given as a parameter value
myUserData - > inscriptRef = true ;
myUserData - > suspendTextPassThru = false ;
const char * ref = tag . getAttribute ( " passage " ) ;
Q_ASSERT ( ref ) ;
CSwordModuleInfo * mod = CBTConfig : : get ( CBTConfig : : standardBible ) ;
Q_ASSERT ( mod ) ;
CReferenceManager : : ParseOptions options ;
options . refBase = TQString : : fromUtf8 ( myUserData - > key - > getText ( ) ) ;
options . refDestinationModule = TQString ( mod - > name ( ) ) ;
options . sourceLanguage = myModule - > Lang ( ) ;
options . destinationLanguage = TQString ( " en " ) ;
const TQString completeRef = CReferenceManager : : parseVerseReference ( TQString : : fromUtf8 ( ref ) , options ) ;
if ( mod ) {
buf . append ( " <span class= \" crossreference \" > " ) ;
buf . append ( " <a href= \" " ) ;
buf . append (
CReferenceManager : : encodeHyperlink (
mod - > name ( ) ,
completeRef ,
CReferenceManager : : typeFromModule ( mod - > type ( ) )
) . utf8 ( )
) ;
buf . append ( " \" crossrefs= \" " ) ;
buf . append ( ( const char * ) completeRef . utf8 ( ) ) ;
buf . append ( " \" > " ) ;
}
else {
buf . append ( " <span><a> " ) ;
}
}
else if ( ! tag . getAttribute ( " passage " ) ) { // we're starting a scripRef like "<scripRef>John 3:16</scripRef>"
myUserData - > inscriptRef = false ;
// let's stop text from going to output, the text get's added in the -tag handler
myUserData - > suspendTextPassThru = true ;
}
}
}
else if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " div " ) ) {
if ( tag . isEndTag ( ) ) {
buf . append ( " </div> " ) ;
}
else if ( tag . getAttribute ( " class " ) & & ! strcasecmp ( tag . getAttribute ( " class " ) , " sechead " ) ) {
buf . append ( " <div class= \" sectiontitle \" > " ) ;
}
else if ( tag . getAttribute ( " class " ) & & ! strcasecmp ( tag . getAttribute ( " class " ) , " title " ) ) {
buf . append ( " <div class= \" booktitle \" > " ) ;
}
}
else if ( tag . getName ( ) & & ! strcasecmp ( tag . getName ( ) , " img " ) & & tag . getAttribute ( " src " ) ) {
const char * value = tag . getAttribute ( " src " ) ;
if ( value [ 0 ] = = ' / ' ) {
value + + ; //strip the first /
}
buf . append ( " <img src= \" file: " ) ;
buf . append ( myUserData - > module - > getConfigEntry ( " AbsoluteDataPath " ) ) ;
buf . append ( ' / ' ) ;
buf . append ( value ) ;
buf . append ( " \" /> " ) ;
}
else { // let unknown token pass thru
return sword : : ThMLHTML : : handleToken ( buf , token , userData ) ;
}
}
return true ;
}