summaryrefslogtreecommitdiffstats
path: root/kbabel/common/diff.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kbabel/common/diff.cpp')
-rw-r--r--kbabel/common/diff.cpp465
1 files changed, 465 insertions, 0 deletions
diff --git a/kbabel/common/diff.cpp b/kbabel/common/diff.cpp
new file mode 100644
index 00000000..7a54e4d3
--- /dev/null
+++ b/kbabel/common/diff.cpp
@@ -0,0 +1,465 @@
+/* **************************************************************************
+ This file is part of KBabel
+
+ Copyright (C) 2001 by Matthias Kiefer <kiefer@kde.org>
+ charDiff algorithm by Wolfram Diestel <wolfram@steloj.de>
+ wordDiff algorithm by Nick Shaforostoff <shafff@ukr.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ In addition, as a special exception, the copyright holders give
+ permission to link the code of this program with any edition of
+ the Qt library by Trolltech AS, Norway (or with modified versions
+ of Qt that use the same license as Qt), and distribute linked
+ combinations including the two. You must obey the GNU General
+ Public License in all respects for all of the code used other than
+ Qt. If you modify this file, you may extend this exception to
+ your version of the file, but you are not obligated to do so. If
+ you do not wish to do so, delete this exception statement from
+ your version.
+
+************************************************************************** */
+
+#include "diff.h"
+
+#include <kdebug.h>
+
+
+LCSprinter::LCSprinter(const QStringList &s_1, const QStringList &s_2, QValueVector<LCSMarker> *b_, const uint nT_, uint index):s1(s_1),s2(s_2),b(b_),nT(nT_)
+{
+ it1=s1.begin();
+ it2=s2.begin();
+ printLCS(index);
+}
+
+void LCSprinter::printLCS(uint index)
+{
+ //fprintf(stderr,"%2d. %2d. %2d. %2d\n",(uint)(*b)[index],nT,index%nT, index);
+ if (index % nT == 0 || index < nT)
+ {
+ //original LCS algo doesnt have to deal with ins before first common
+ uint bound = index%nT;
+ for (index=0; index<bound; ++index)
+ {
+ resultString.append("<KBABELADD>");
+ resultString.append(*it2);
+ ++it2;
+ resultString.append("</KBABELADD>");
+ }
+
+ return;
+ }
+
+ if (ARROW_UP_LEFT == (*b)[index])
+ {
+ printLCS(index-nT-1);
+ resultString.append(*it1);
+ ++it1;
+ ++it2;
+ return;
+ }
+ else if (ARROW_UP == (*b)[index])
+ {
+ printLCS(index-nT);
+ resultString.append("<KBABELDEL>");
+ resultString.append(*it1);
+ ++it1;
+ resultString.append("</KBABELDEL>");
+ return;
+ }
+ else
+ {
+ printLCS(index-1);
+ resultString.append("<KBABELADD>");
+ resultString.append(*it2);
+ ++it2;
+ resultString.append("</KBABELADD>");
+ return;
+ }
+
+}
+
+
+
+QString wordDiff(const QString& str1, const QString& str2)
+{
+ //separate punctuation marks etc from words as _only_ they may have changed
+ QStringList s1, s2;
+
+ uint i=0;
+ uint j=0;
+ uint l1=str1.length();
+ uint l2=str2.length();
+ QString temp;
+ temp.reserve(16);
+ /*
+ while ( i<l1 )
+ {
+ if (str1[i]==QChar(0x003C) && (str1[i+1]==QChar(0x002F) || str1[i+1].isLetter()) ) // 0x003C="<", 0x002F="/"
+ {// skip tag
+ while(i<l1 && str1[i]!=QChar(0x003E)) temp += str1[i++];
+ if (i++ <l1) temp += QChar(0x003E);
+ s1.append(temp);
+ temp.truncate(0);
+ }
+
+ while ( i<l1 && str1[i].isLetter() )
+ {
+ temp += str1[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s1.append(temp);
+ temp.truncate(0);
+ };
+
+ if (str1[i]==QChar(0x003C) && (str1[i+1]==QChar(0x002F) || str1[i+1].isLetter()) ) // 0x003C="<", 0x002F="/"
+ {// skip tag
+ while(i<l1 && str1[i]!=QChar(0x003E)) temp += str1[i++];
+ if (i++ <l1) temp += QChar(0x003E);
+ s1.append(temp);
+ temp.truncate(0);
+ }
+
+ while ( i<l1 && !(str1[i].isLetter() || str1[i]==QChar(0x003C)) )
+ {
+ temp += str1[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s1.append(temp);
+ temp.truncate(0);
+ };
+ }
+
+ i=0;
+ while ( i<l2 )
+ {
+ if (str2[i]==QChar(0x003C) && (str2[i+1]==QChar(0x002F) || str2[i+1].isLetter()) ) // 0x003C="<", 0x002F="/"
+ {// skip tag
+ while(i<l2 && str2[i]!=QChar(0x003E)) temp += str2[i++];
+ if (i++ <l2) temp += QChar(0x003E);
+ s2.append(temp);
+ temp.truncate(0);
+ }
+
+ while ( i<l2 && str2[i].isLetter() )
+ {
+ temp += str2[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s2.append(temp);
+ temp.truncate(0);
+ };
+ //FIXME bounds
+ if (str2[i]==QChar(0x003C) && (str2[i+1]==QChar(0x002F) || str2[i+1].isLetter()) ) // 0x003C="<", 0x002F="/"
+ {// skip tag
+ while(i<l2 && str2[i]!=QChar(0x003E)) temp += str2[i++];
+ if (i++ <l2) temp += QChar(0x003E);
+ s2.append(temp);
+ temp.truncate(0);
+ }
+
+ while ( i<l2 && !(str2[i].isLetter() || str2[i]==QChar(0x003C)) )
+ {
+ temp += str2[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s2.append(temp);
+ temp.truncate(0);
+ };
+ }
+*/
+
+ while ( i<l1 )
+ {
+ while ( i<l1 && str1[i].isLetter() )
+ {
+ temp += str1[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s1.append(temp);
+ temp.truncate(0);
+ };
+
+ while ( i<l1 && !str1[i].isLetter() )
+ {
+ s1.append(QString(str1[i++]));
+ }
+ }
+
+ i=0;
+ while ( i<l2 )
+ {
+ while ( i<l2 && str2[i].isLetter() )
+ {
+ temp += str2[i++];
+ }
+ if (!temp.isEmpty())
+ {
+ s2.append(temp);
+ temp.truncate(0);
+ };
+
+ while ( i<l2 && !str2[i].isLetter() )
+ {
+ s2.append(QString(str2[i++]));
+ }
+ }
+
+ uint mX = s1.count();
+ uint nY = s2.count();
+
+ uint mT = mX+1;
+ uint nT = nY+1;
+
+ QValueVector<LCSMarker> b(mT*nT, NOTHING);
+ QValueVector<uint> c(mT*nT, 0);
+
+
+// calculate the LCS
+ b[0] = FINAL;
+ uint index_cache;
+ QStringList::iterator it1, it2;
+
+ for (i=1, it1 = s1.begin(); i<mT; ++i, ++it1)
+ {
+ for (j=1, it2 = s2.begin(); j<nT; ++j, ++it2)
+ {
+ index_cache = i*nT+j;
+ if ((*it1)==(*it2))
+ {
+ c[index_cache] = c[index_cache-nT-1] + 1;
+ b[index_cache] = ARROW_UP_LEFT;
+ }
+ else if (c[index_cache-nT] >= c[index_cache-1])
+ {
+ c[index_cache] = c[index_cache-nT];
+ b[index_cache] = ARROW_UP;
+ }
+ else
+ {
+ c[index_cache] = c[index_cache-1];
+ b[index_cache] = ARROW_LEFT;
+ }
+ }
+ }
+
+ c.clear();
+
+
+ LCSprinter printer(s1, s2, &b, nT, index_cache);
+
+ /*//debug
+ fprintf(stderr,"b:\n");
+ for (i=0; i<=mT; i++) {
+ for (j=0; j<nY; j++) {
+// fprintf(stderr,"[%3d]%1d.",i*nT+j,(uint) b[i*nT+j]);
+ fprintf(stderr,"%1d.",(uint) b[i*nT+j]);
+}
+ fprintf(stderr,"%1d\n",(uint) b[i*nT+j]);
+}
+ fprintf(stderr,"\n");
+
+ // print the c table
+ fprintf(stderr,"c:\n");
+ for (i=0; i<=mX; i++) {
+ for (j=0; j<nY; j++) {
+ fprintf(stderr,"%2d.",(uint) c[i*nY+j]);
+}
+ fprintf(stderr,"%2d\n",(uint) c[i*nY+j]);
+}
+ fprintf(stderr,"\n");
+ */
+
+ return printer.getString();
+}
+
+
+
+
+/*
+// old algorithm by Wolfram Diestel <wolfram@steloj.de>
+QString Diff::charDiff(QString s1, QString s2)
+{
+ int n = 3;
+ s1+="xxx";
+ s2+="xxx";
+
+ int pos1=0;
+ int pos2=0;
+ int len1 = s1.length();
+ int len2 = s2.length();
+ QString resultstr;
+ bool found = true;
+ bool swap = false;
+ while (found && pos1+n < len1 && pos2+n < len2)
+ {
+ // while strings are equal, increase positions
+ int len = 0;
+ while (pos1+len < len1 && pos2+len < len2 &&
+ (s1[pos1+len] == s2[pos2+len]))
+ {
+ len++;
+ }
+
+ // write first result
+ resultstr += s1.mid(pos1,len);
+ pos1 += len;
+ pos2 += len;
+
+ // If strings are not equal try to find equal substrings of
+ // length n. For this increase positions alternating like
+ // 0 1, 1 0, 0 2, 1 1, 2 0, 0 3, 1 2, 2 1, 3 0 etc.
+
+ int x = 1;
+ found = false;
+
+ // If one position is already at the end stop processing
+ if (pos1+n >= len1 || pos2+n >= len2)
+ {
+ break;
+ }
+
+
+ // evtl. exchange
+ if (len2 - pos2 < len1 - pos1)
+ {
+ swap=true;
+
+ int pos = pos1;
+ pos1 = pos2;
+ pos2 = pos;
+
+ QString s = s1;
+ s1 = s2;
+ s2 = s;
+
+ int len = len1;
+ len1 = len2;
+ len2 = len;
+ }
+
+ while( !found && (pos1+x < len1 || pos2+x <len2))
+ {
+ int p1;
+ int p2;
+ for(p1 = 0, p2 = x; p1 <= x; p1++, p2--)
+ {
+ if( pos1+p1+n >= len1 || pos2+p2+n >= len2)
+ break;
+
+ if(s1.mid(pos1+p1,n) == s2.mid(pos2+p2,n))
+ {
+ found = true;
+
+ // evtl. exchange back
+ if(swap)
+ {
+ int pos = pos1;
+ pos1 = pos2;
+ pos2 = pos;
+
+ QString s = s1;
+ s1 = s2;
+ s2 = s;
+
+ int len = len1;
+ len1 = len2;
+ len2 = len;
+
+ int p = p1;
+ p1 = p2;
+ p2 = p;
+
+ swap=false;
+ }
+
+
+ // write current result
+ if (p1 > 0)
+ {
+ resultstr += "<KBABELDEL>";
+ resultstr += s1.mid(pos1,p1);
+ resultstr += "</KBABELDEL>";
+ pos1 += p1;
+ }
+
+ if (p2 > 0 )
+ {
+ resultstr += "<KBABELADD>";
+ resultstr += s2.mid(pos2,p2);
+ resultstr += "</KBABELADD>";
+ pos2 += p2;
+ }
+
+ break;
+ }
+ }
+
+ x++;
+
+ }
+
+ }
+
+ bool removed=false;
+
+ // evtl. exchange back
+ if(swap)
+ {
+ int pos = pos1;
+ pos1 = pos2;
+ pos2 = pos;
+
+ QString s = s1;
+ s1 = s2;
+ s2 = s;
+
+ int len = len1;
+ len1 = len2;
+ len2 = len;
+
+ swap=false;
+ }
+
+ // if there is a rest, add it
+ if(pos1+n < len1)
+ {
+ resultstr += "<KBABELDEL>";
+ resultstr += s1.mid(pos1,len1-pos1-n);
+ resultstr += "</KBABELDEL>";
+ removed=true;
+ }
+
+ if(pos2+n < len2)
+ {
+ resultstr += "<KBABELADD>";
+ resultstr += s2.mid(pos2,len2-pos2-n);
+ resultstr += "</KBABELADD>";
+ removed=true;
+ }
+
+ if( !removed )
+ {
+ resultstr.truncate( resultstr.length()-3 );
+ }
+
+ return resultstr;
+}
+*/