summaryrefslogtreecommitdiffstats
path: root/conduits/docconduit/DOC-converter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'conduits/docconduit/DOC-converter.cpp')
-rw-r--r--conduits/docconduit/DOC-converter.cpp631
1 files changed, 631 insertions, 0 deletions
diff --git a/conduits/docconduit/DOC-converter.cpp b/conduits/docconduit/DOC-converter.cpp
new file mode 100644
index 0000000..6502fe5
--- /dev/null
+++ b/conduits/docconduit/DOC-converter.cpp
@@ -0,0 +1,631 @@
+/* KPilot
+**
+** Copyright (C) 2002-2003 by Reinhold Kainhofer
+**
+** The doc converter synchronizes text files on the PC with DOC databases on the Palm
+*/
+
+/*
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program in a file called COPYING; if not, write to
+** the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+** MA 02110-1301, USA.
+*/
+
+/*
+** Bug reports and questions can be sent to kde-pim@kde.org
+*/
+
+
+#include "options.h"
+#include "DOC-converter.moc"
+
+#include <tqdir.h>
+#include <tqfileinfo.h>
+#include <tqregexp.h>
+#include <tqsortedlist.h>
+
+#include <pilotDatabase.h>
+#include <pilotLocalDatabase.h>
+#include <pilotSerialDatabase.h>
+
+#include "pilotDOCHead.h"
+#include "pilotDOCEntry.h"
+#include "pilotDOCBookmark.h"
+
+
+
+#define min(a,b) (a<b)?(a):(b)
+
+
+
+/****************************************************************************************************
+ * various bookmark classes. Most important is the bmkList findMatches(TQString) function,
+ * which needs to return a list of all bookmarks found for the given bookmark expression.
+ * A bookmark usually consists of a bookmark text and an offset into the text document.
+ ****************************************************************************************************/
+
+
+bool docBookmark::compare_pos=true;
+
+bool operator< ( const docBookmark &s1, const docBookmark &s2)
+{
+ if (docBookmark::compare_pos) { return s1.position<s2.position;}
+ else {return s2.bmkName<s2.bmkName;}
+}
+
+bool operator== ( const docBookmark &s1, const docBookmark &s2)
+{
+ return (s1.position==s2.position) && (s1.bmkName==s2.bmkName);
+}
+
+
+int docMatchBookmark::findMatches(TQString doctext, bmkList &fBookmarks) {
+ FUNCTIONSETUP;
+// bmkList res;
+ int pos = 0, nr=0, found=0;
+#ifdef DEBUG
+ DEBUGKPILOT<<"Finding matches of "<<pattern<<endl;
+#endif
+
+ while (pos >= 0 && found<to) {
+ pos = doctext.find(pattern, pos);
+#ifdef DEBUG
+ DEBUGKPILOT<<"Result of search: pos="<<pos<<endl;
+#endif
+ if (pos >= 0)
+ {
+ ++found;
+ if (found>=from && found<=to) {
+ fBookmarks.append(new docBookmark(pattern, pos));
+ ++nr;
+
+ }
+ ++pos;
+ }
+ }
+ return nr;
+}
+
+
+
+int docRegExpBookmark::findMatches(TQString doctext, bmkList &fBookmarks)
+{
+ FUNCTIONSETUP;
+// bmkList res;
+ TQRegExp rx(pattern);
+ int pos = 0, nr=0, found=0;
+
+ while (pos>=0 && found<=to) {
+#ifdef DEBUG
+ DEBUGKPILOT<<"Searching for bookmark "<<pattern<<endl;
+#endif
+ pos=rx.search(doctext, pos);
+ if (pos > -1) {
+ ++found;
+ if (found>=from && found<to) {
+ if (capSubexpression>=0) {
+ fBookmarks.append(new docBookmark(/*bmkName.left(16)*/rx.cap(capSubexpression), pos));
+ } else {
+ // TODO: use the subexpressions from the regexp for the bmk name ($1..$9) (given as separate regexp)
+ TQString bmkText(bmkName);
+ for (int i=0; i<=rx.numCaptures(); ++i) {
+ bmkText.replace(CSL1("$%1").arg(i), rx.cap(i));
+ bmkText.replace(CSL1("\\%1").arg(i), rx.cap(i));
+ }
+ fBookmarks.append(new docBookmark(bmkText.left(16), pos));
+ }
+ ++nr;
+ }
+ ++pos;
+ }
+ }
+ return nr;
+}
+
+
+
+
+
+
+
+
+/*********************************************************************
+ C O N S T R U C T O R
+ *********************************************************************/
+
+
+DOCConverter::DOCConverter(TQObject *parent, const char *name):TQObject(parent,name) {
+ FUNCTIONSETUP;
+ docdb=0L;
+ eSortBookmarks=eSortNone;
+ fBookmarks.setAutoDelete( TRUE );
+}
+
+
+
+DOCConverter::~DOCConverter() {
+ FUNCTIONSETUP;
+}
+
+
+
+
+
+/*********************************************************************
+ S Y N C S T R U C T U R E
+ *********************************************************************/
+
+
+
+void DOCConverter::setTXTpath(TQString path, TQString file) {
+ TQDir dr(path);
+ TQFileInfo pth(dr, file);
+ if (!file.isEmpty())
+ txtfilename = pth.absFilePath();
+}
+
+
+
+void DOCConverter::setTXTpath(TQString filename) {
+ if (!filename.isEmpty()) txtfilename = filename;
+}
+
+
+
+void DOCConverter::setPDB(PilotDatabase * dbi) {
+ if (dbi) docdb = dbi;
+}
+
+
+
+TQString DOCConverter::readText() {
+ FUNCTIONSETUP;
+ if (txtfilename.isEmpty()) return TQString();
+ TQFile docfile(txtfilename);
+ if (!docfile.open(IO_ReadOnly))
+ {
+ emit logError(i18n("Unable to open text file %1 for reading.").arg(txtfilename));
+ return TQString();
+ }
+
+ TQTextStream docstream(&docfile);
+
+ TQString doc = docstream.read();
+ docfile.close();
+ return doc;
+}
+
+
+
+int DOCConverter::findBmkEndtags(TQString &text, bmkList&fBmks) {
+ FUNCTIONSETUP;
+ // Start from the end of the text
+ int pos = text.length() - 1, nr=0;
+ bool doSearch=true;
+ while (pos >= 0/* && doSearch*/) {
+ DEBUGKPILOT<<"Current character is \'"<<text[pos].latin1()<<"\'"<<endl;
+ // skip whitespace until we reach a >
+ while (text[pos].isSpace() && pos >= 0) {
+ DEBUGKPILOT<<"Skipping whitespaces at the end of the file"<<endl;
+ pos--;
+ }
+ // every other character than a > is assumed to belong to the text, so there are no more bookmarks.
+ if (pos < 0 || text[pos] != '>') {
+ DEBUGKPILOT<<"Current character \'"<<text[pos].latin1()<<"\' at position "<<pos<<" is not and ending >. Finish searching for bookmarks."<<endl;
+
+ pos=-1;
+ break;
+ } else {
+ int endpos = pos;
+ doSearch=true;
+ DEBUGKPILOT<<"Found the ending >, now looking for the opening <"<<endl;
+
+ // Search for the opening <. There must not be a newline in the bookmark text.
+ while (doSearch && pos > 0) {
+// DEBUGKPILOT<<"pos="<<pos<<", char="<<text[pos].latin1()<<endl;
+ pos--;
+ if (text[pos] == '\n') {
+ DEBUGKPILOT<<"Found carriage return at position "<<pos<<" inside the bookmark text, assuming this is not a bookmark, and the text ends in a >"<<endl;
+ doSearch = false;
+ pos = -1;
+ break;
+ }
+ if (text[pos] == '<') {
+ fBmks.append(new docMatchBookmark(text.mid(pos + 1, endpos - pos - 1)));
+ ++nr;
+ DEBUGKPILOT<<"Found opening < at position "<<pos<<", bookmarktext ="<<text.mid(pos+1, endpos-pos-1)<<endl;
+ text.remove(pos, text.length());
+ pos--;
+ doSearch = false;
+ }
+ }
+ }
+ DEBUGKPILOT<<"Finished processing the next bookmark, current position: "<<pos<<endl;
+ }
+ return nr;
+}
+
+int DOCConverter::findBmkInline(TQString &text, bmkList &fBmks) {
+ FUNCTIONSETUP;
+// bmkList res;
+ int nr=0;
+ TQRegExp rx(CSL1("<\\*(.*)\\*>"));
+
+ rx.setMinimal(TRUE);
+ int pos = 0;
+ while (pos >= 0) {
+ pos = rx.search(text, pos);
+ if (pos >= 0) {
+ fBmks.append(new docBookmark(rx.cap(1), pos+1));
+ ++nr;
+ text = text.remove(pos, rx.matchedLength());
+ }
+ }
+ return nr;
+}
+
+int DOCConverter::findBmkFile(TQString &, bmkList &fBmks) {
+ FUNCTIONSETUP;
+ int nr=0;
+
+ TQString bmkfilename = txtfilename;
+ if (bmkfilename.endsWith(CSL1(".txt"))){
+ bmkfilename.remove(bmkfilename.length()-4, 4);
+ }
+ TQString oldbmkfilename=bmkfilename;
+ bmkfilename+=CSL1(BMK_SUFFIX);
+ TQFile bmkfile(bmkfilename);
+ if (!bmkfile.open(IO_ReadOnly)) {
+ bmkfilename=oldbmkfilename+CSL1(PDBBMK_SUFFIX);
+ bmkfile.setName(bmkfilename);
+ if (!bmkfile.open(IO_ReadOnly)) {
+ DEBUGKPILOT<<"Unable to open bookmarks file "<<bmkfilename<<" for reading the bookmarks of "<<docdb ->dbPathName()<<endl;
+ return 0;
+ }
+ }
+
+ DEBUGKPILOT<<"Bookmark file: "<<bmkfilename<<endl;
+
+ TQTextStream bmkstream(&bmkfile);
+ TQString line;
+ while ( !(line=bmkstream.readLine()).isEmpty() ) {
+ if (!line.isEmpty() && !line.startsWith(CSL1("#")) ) {
+ TQStringList bmkinfo=TQStringList::split(CSL1(","), line);
+ int fieldnr=bmkinfo.count();
+ // We use the same syntax for the entries as MakeDocJ bookmark files:
+ // <bookmark>,<string-to-search>,<bookmark-name-string>,<starting-bookmark>,<ending-bookmark>
+ // For an explanation see: http://home.kc.rr.com/krzysztow/PalmPilot/MakeDocJ/index.html
+ if (fieldnr>0){
+ DEBUGKPILOT<<"Working on bookmark \""<<line<<"\""<<endl;
+ docMatchBookmark*bmk=0L;
+ TQString bookmark=bmkinfo[0];
+ bool ok;
+ int pos=bookmark.toInt(&ok);
+ if (ok) {
+ if (fieldnr>1) {
+ TQString name(bmkinfo[1]);
+ DEBUGKPILOT<<"Bookmark \""<<name<<"\" set at position "<<pos<<endl;
+ fBmks.append(new docBookmark(name, pos));
+ }
+ } else if (bookmark==CSL1("-") || bookmark==CSL1("+")) {
+ if (fieldnr>1) {
+ TQString patt(bmkinfo[1]);
+ TQString name(patt);
+ if (fieldnr>2) {
+ int cap=bmkinfo[2].toInt(&ok);
+ if (ok) {
+ bmk=new docRegExpBookmark(patt, cap);
+ } else {
+ name=bmkinfo[2];
+ bmk=new docRegExpBookmark(patt, name);
+ }
+ } else{
+ bmk=new docRegExpBookmark(patt, name);
+ }
+ // The third entry in the line (optional) denotes the index of a capture subexpression (if an integer) or the bookmark text as regexp (if a string)
+ DEBUGKPILOT<<"RegExp Bookmark, pattern="<<patt<<", name="<<name<<endl;
+ if (bmk) {
+ if (bookmark==CSL1("-")) {
+ bmk->from=1;
+ bmk->to=1;
+ } else {
+ if (fieldnr>3) {
+ bool ok;
+ int tmp=bmkinfo[3].toInt(&ok);
+ if (ok) bmk->from=tmp;
+ if (fieldnr>4) {
+ tmp=bmkinfo[4].toInt(&ok);
+ if (ok) bmk->to=tmp;
+ }
+ }
+ }
+ fBmks.append(bmk);
+ bmk=0L;
+ } else {
+ DEBUGKPILOT<<"Could not allocate bookmark "<<name<<endl;
+ }
+ } else {
+ DEBUGKPILOT<<"RegExp bookmark found with no other information (no bookmark pattern nor name)"<<endl;
+ }
+ } else {
+ TQString pattern(bookmark);
+ if (fieldnr>1) pattern=bmkinfo[1];
+ if (fieldnr>2) bookmark=bmkinfo[2];
+ DEBUGKPILOT<<"RegExp Bookmark, pattern="<<pattern<<", name="<<bookmark<<endl;
+ bmk=new docRegExpBookmark(pattern, bookmark);
+ if (bmk) {
+ bmk->from=1;
+ bmk->to=1;
+ fBmks.append(bmk);
+ }
+ }
+ } // fieldnr>0
+ } // !line.isEmpty()
+ } // while
+ return nr;
+}
+
+bool DOCConverter::convertTXTtoPDB() {
+ FUNCTIONSETUP;
+
+ if (!docdb) {
+ emit logError(i18n("Unable to open Database for writing"));
+ return false;
+ }
+
+ TQString text = readText();
+
+ if (fBmkTypes & eBmkEndtags) {
+ findBmkEndtags(text, fBookmarks);
+ } // end: EndTag Bookmarks
+
+
+ // Search for all tags <* Bookmark text *> in the text. We have to delete them immediately, otherwise the later bookmarks will be off.
+ if (fBmkTypes & eBmkInline) {
+ findBmkInline(text, fBookmarks);
+ } // end: Inline Bookmarks
+
+
+ // Read in regular expressions and positions from an external file (doc-filename with extension .bmk)
+ if (fBmkTypes & eBmkFile)
+ {
+ findBmkFile(text, fBookmarks);
+ }
+
+ // Process the bookmarks: find the occurrences of the regexps, and sort them if requested:
+ bmkSortedList pdbBookmarks;
+ pdbBookmarks.setAutoDelete(TRUE);
+ docBookmark*bmk;
+ for (bmk = fBookmarks.first(); bmk; bmk = fBookmarks.next())
+ {
+ bmk->findMatches(text, pdbBookmarks);
+ }
+
+ switch (eSortBookmarks)
+ {
+ case eSortName:
+ docBookmark::compare_pos=false;
+// qHeapSort(pdbBookmarks);
+ pdbBookmarks.sort();
+ break;
+ case eSortPos:
+ docBookmark::compare_pos=true;
+ pdbBookmarks.sort();
+ break;
+ case eSortNone:
+ default:
+ break;
+ }
+
+#ifdef DEBUG
+ DEBUGKPILOT << "Bookmarks: "<<endl;
+ for (bmk = pdbBookmarks.first(); bmk; bmk = pdbBookmarks.next())
+ {
+ DEBUGKPILOT<<bmk->bmkName.left(20)<<" at position "<<bmk->position<<endl;
+ }
+#endif
+
+ if (!docdb->isOpen()) {
+ emit logError(i18n("Unable to open palm doc database %1").arg(docdb->dbPathName()) );
+ return false;
+ }
+
+ // Clean the whole database, otherwise the records would be just appended!
+ docdb->deleteRecord(0, true);
+
+ // Header record for the doc file format
+ PilotDOCHead docHead;
+ docHead.position=0;
+ docHead.recordSize=4096;
+ docHead.spare=0;
+ docHead.storyLen=text.length();
+ docHead.version=compress?DOC_COMPRESSED:DOC_UNCOMPRESSED;
+ docHead.numRecords=(int)( (text.length()-1)/docHead.recordSize)+1;
+ PilotRecord*rec=docHead.pack();
+ docdb->writeRecord(rec);
+ KPILOT_DELETE(rec);
+
+ DEBUGKPILOT << "Write header record: length="<<text.length()<<", compress="<<compress<<endl;
+
+ // First compress the text, then write out the bookmarks and - if existing - also the annotations
+ int len=text.length();
+ int start=0,reclen=0;
+ int recnum=0;
+ while (start<len)
+ {
+ reclen=min(len-start, PilotDOCEntry::TEXT_SIZE);
+ DEBUGKPILOT << "Record #"<<recnum<<", reclen="<<reclen<<", compress="<<compress<<endl;
+
+ PilotDOCEntry recText;
+// recText.setText(text.mid(start, reclen), reclen);
+ recText.setText(text.mid(start, reclen));
+// if (compress)
+ recText.setCompress(compress);
+ PilotRecord*textRec=recText.pack();
+ docdb->writeRecord(textRec);
+ ++recnum;
+ start+=reclen;
+ KPILOT_DELETE(textRec);
+ }
+
+ recnum=0;
+ // Finally, write out the bookmarks
+ for (bmk = pdbBookmarks.first(); bmk; bmk = pdbBookmarks.next())
+// for (bmkList::const_iterator it=pdbBookmarks.begin(); it!=pdbBookmarks.end(); ++it)
+ {
+ ++recnum;
+ DEBUGKPILOT << "Bookmark #"<<recnum<<", Name="<<bmk->bmkName.left(20)<<", Position="<<bmk->position<<endl;
+
+ PilotDOCBookmark bmkEntry;
+ bmkEntry.pos=bmk->position;
+ strncpy(&bmkEntry.bookmarkName[0], bmk->bmkName.latin1(), 16);
+ PilotRecord*bmkRecord=bmkEntry.pack();
+ docdb->writeRecord(bmkRecord);
+ KPILOT_DELETE(bmkRecord);
+ }
+
+ pdbBookmarks.clear();
+ fBookmarks.clear();
+
+ return true;
+}
+
+
+
+bool DOCConverter::convertPDBtoTXT()
+{
+ FUNCTIONSETUP;
+ if (txtfilename.isEmpty()) {
+ emit logError(i18n("No filename set for the conversion"));
+ return false;
+ }
+
+ if (!docdb) {
+ emit logError(i18n("Unable to open Database for reading"));
+ return false;
+ }
+
+ // The first record of the db is the document header containing information about the doc db
+ PilotRecord*headerRec = docdb->readRecordByIndex(0);
+ if (!headerRec)
+ {
+ emit logError(i18n("Unable to read database header for database %1.").arg(docdb->dbPathName()));
+ KPILOT_DELETE(docdb);
+ return false;
+ }
+ PilotDOCHead header(headerRec);
+ KPILOT_DELETE(headerRec);
+
+ DEBUGKPILOT<<"Database "<<docdb->dbPathName()<<" has "<<header.numRecords<<" text records, "<<endl
+ <<" total number of records: "<<docdb->recordCount()<<endl
+ <<" position="<<header.position<<endl
+ <<" recordSize="<<header.recordSize<<endl
+ <<" spare="<<header.spare<<endl
+ <<" storyLen="<<header.storyLen<<endl
+// <<" textRecordSize="<<header.textRecordSize<<endl
+ <<" version="<<header.version<<endl;
+
+ // next come the header.numRecords real document records (might be compressed, see the version flag in the header)
+ TQFile docfile(txtfilename);
+ if (!docfile.open(IO_WriteOnly))
+ {
+ emit logError(i18n("Unable to open output file %1.").arg(txtfilename));
+ KPILOT_DELETE(docdb);
+ return false;
+ }
+ TQString doctext;
+ for (int i=1; i<header.numRecords+1; ++i)
+ {
+ PilotRecord*rec=docdb->readRecordByIndex(i);
+ if (rec)
+ {
+ PilotDOCEntry recText(rec, header.version==DOC_COMPRESSED);
+ doctext.append(recText.getText());
+ DEBUGKPILOT<<"Record "<<i<<endl;
+ KPILOT_DELETE(rec);
+ } else {
+ emit logMessage(i18n("Could not read text record #%1 from Database %2").arg(i).arg(docdb->dbPathName()));
+ }
+ }
+
+ // After the document records possibly come a few bookmark records, so read them in and put them in a separate bookmark file.
+ // for the ztxt conduit there might be annotations after the bookmarks, so the upper bound needs to be adapted.
+ int upperBmkRec=docdb->recordCount();
+ bmkSortedList bmks;
+ bmks.setAutoDelete(TRUE);
+ for (int i=header.numRecords+1; i<upperBmkRec; ++i)
+ {
+ PilotRecord*rec=docdb->readRecordByIndex(i);
+ if (rec)
+ {
+ PilotDOCBookmark bookie(rec);
+ docBookmark*bmk=new docBookmark(TQString::fromLatin1(bookie.bookmarkName), bookie.pos);
+ bmks.append(bmk);
+ KPILOT_DELETE(rec);
+ } else {
+ emit logMessage(i18n("Could not read bookmark record #%1 from Database %2").arg(i).arg(docdb->dbPathName()));
+ }
+ }
+ // TODO: Sort the list of bookmarks according to their position
+ docBookmark::compare_pos=true;
+ bmks.sort();
+
+ if ((fBmkTypes & eBmkFile) && (bmks.count()>0))
+ {
+ TQString bmkfilename = docfile.name();
+ if (bmkfilename.endsWith(CSL1(".txt"))){
+ bmkfilename.remove(bmkfilename.length()-4, 4);
+ }
+ bmkfilename+=CSL1(PDBBMK_SUFFIX);
+ TQFile bmkfile(bmkfilename);
+ if (!bmkfile.open(IO_WriteOnly))
+ {
+ emit logError(i18n("Unable to open file %1 for the bookmarks of %2.")
+ .arg(bmkfilename).arg(docdb ->dbPathName()));
+ }
+ else
+ {
+ DEBUGKPILOT<<"Writing "<<upperBmkRec-header.numRecords<<
+ "("<<upperBmkRec<<") bookmarks to file "<<bmkfilename<<endl;
+ TQTextStream bmkstream(&bmkfile);
+ for (docBookmark*bmk=bmks.first(); bmk; bmk=bmks.next())
+ {
+ bmkstream<<bmk->position<<", "<<bmk->bmkName<<endl;
+ }
+ //bmkstream.close();
+ bmkfile.close();
+ }
+ }
+ if (fBmkTypes & eBmkInline)
+ {
+ for (docBookmark*bmk=bmks.last(); bmk; bmk=bmks.prev())
+ {
+ doctext.insert(bmk->position, TQString(CSL1("<*") +
+ bmk->bmkName +
+ CSL1("*>")));
+ }
+ }
+
+ // Finally, write the actual text out to the file.
+ TQTextStream docstream(&docfile);
+ docstream<<doctext;
+ //docstream.close();
+ docfile.close();
+ docdb->cleanup();
+ // reset all records to unchanged. I don't know if this is really such a wise idea?
+ docdb->resetSyncFlags();
+ return true;
+}
+
+