summaryrefslogtreecommitdiffstats
path: root/klinkstatus/src/parser/url.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'klinkstatus/src/parser/url.cpp')
-rw-r--r--klinkstatus/src/parser/url.cpp350
1 files changed, 350 insertions, 0 deletions
diff --git a/klinkstatus/src/parser/url.cpp b/klinkstatus/src/parser/url.cpp
new file mode 100644
index 00000000..f7f1f6f8
--- /dev/null
+++ b/klinkstatus/src/parser/url.cpp
@@ -0,0 +1,350 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include <kresolver.h>
+
+#include "url.h"
+#include "mstring.h"
+#include "../utils/utils.h"
+
+#include <kcharsets.h>
+
+
+Node::LinkType Url::resolveLinkType(QString const& url)
+{
+ QString aux(url);
+ aux = KURL::decode_string(aux);
+
+ if(aux.isNull())
+ return Node::relative;
+
+ if(findWord(url, "FILE:") != -1)
+ return Node::file_href;
+ else if(findWord(KCharsets::resolveEntities(url), "MAILTO:") != -1)
+ return Node::mailto;
+ else if( (int)url.find(":/") != -1)
+ return Node::href;
+ else
+ return Node::relative;
+}
+
+KURL Url::normalizeUrl(QString const& string_url, LinkStatus const& link_parent, QString const& document_root)
+{
+ QString _string_url = string_url.stripWhiteSpace();
+
+ QString s_url;
+ KURL base_url;
+
+ // resolve base url
+ if(link_parent.hasBaseURI())
+ base_url = link_parent.baseURI();
+ else
+ base_url = link_parent.absoluteUrl();
+
+ // resolve relative url
+ if(_string_url.isEmpty())
+ return base_url;
+ else if(Url::hasProtocol(_string_url))
+ return KURL(_string_url);
+ else
+ {
+ s_url.prepend(base_url.protocol() + "://" + base_url.host());
+
+ if(_string_url[0] == '/') {
+ if(!base_url.protocol().startsWith("http")) {
+ s_url.append(document_root);
+ }
+ }
+ else {
+ s_url.append(base_url.directory(true, false) + "/");
+ }
+
+ if( (_string_url[0] == ';' || // parameters
+ _string_url[0] == '?' || // query
+ _string_url[0] == '#') ) // fragment or reference
+ {
+ s_url.append(base_url.fileName(false));
+ }
+
+ s_url.append(_string_url);
+ KURL url(s_url);
+ if(base_url.hasUser())
+ url.setUser(base_url.user());
+ if(base_url.hasPass())
+ url.setPass(base_url.pass());
+
+ url.setPort(base_url.port());
+
+ url.cleanPath();
+
+// kdDebug(23100) << "Normalized URL: "
+// << KCharsets::resolveEntities(KURL::decode_string(url.url())) << endl;
+
+ return KURL(KCharsets::resolveEntities(KURL::decode_string(url.url())));
+ }
+}
+
+KURL Url::normalizeUrl(QString const& string_url)
+{
+ QString qs_url(KCharsets::resolveEntities(string_url.stripWhiteSpace()));
+
+ if(qs_url[0] == '/')
+ {
+ KURL url;
+ url.setPath(qs_url);
+ url.cleanPath();
+ return url;
+ }
+
+ else
+ {
+ if(!Url::hasProtocol(qs_url))
+ qs_url.prepend("http://");
+
+ KURL url(qs_url);
+ url.cleanPath();
+ return url;
+ }
+}
+
+bool Url::existUrl(KURL const& url, vector<LinkStatus*> const& v)
+{
+ if(url.prettyURL().isEmpty())
+ return true;
+
+ for(uint i = 0; i != v.size(); ++i)
+ if(v[i]->absoluteUrl() == url)
+ return true;
+
+ return false;
+}
+
+/**
+ www.iscte.pt, iscte.pt => true;
+ iscte.pt, www.iscte.pt => true;
+ www.iscte.pt, alunos.iscte.pt => true; (if restrict = false)
+ www.iscte.pt, alunos.iscte.pt => false; (if restrict = true)
+ alunos.iscte.pt, www.iscte.pt => false;
+ alunos.iscte.pt, iscte.pt => false.
+*/
+// FIXME - Rename this function to sameDomain
+bool Url::equalHost(QString const& host1, QString const& host2, bool restrict)
+{
+ //Q_ASSERT(!host1.isEmpty());
+ //Q_ASSERT(!host2.isEmpty()); // this fails if href="javascript:......."
+ //if(host2.isEmpty())
+ //return false;
+
+ if(host1 == host2)
+ return true;
+
+ QString host1_(KNetwork::KResolver::normalizeDomain(host1));
+ QString host2_(KNetwork::KResolver::normalizeDomain(host2));
+ removeLastCharIfExists(host1_, '/');
+ removeLastCharIfExists(host2_, '/');
+
+ vector<QString> v1 = tokenizeWordsSeparatedByDots(host1_);
+ vector<QString> v2 = tokenizeWordsSeparatedByDots(host2_);
+ uint const size1 = v1.size();
+ uint const size2 = v2.size();
+
+ if( !(size1 >= 1 && size2 >= 1) && // localhost would have size = 1
+ !(host1_[0].isNumber() || host2_[0].isNumber()) ) // not (host == IP)
+ {
+ kdDebug(23100) << "Invalid host: " << host2 << endl;
+ return false;
+ }
+
+ vector<QString>::size_type aux = 0;
+ vector<QString>::size_type aux2 = 0;
+ if(v1[0] == "www")
+ aux = 1;
+ if(v2[0] == "www")
+ aux2 = 1;
+
+ if((size2 - aux2 < size1 - aux) && restrict) // e.g. paradigma.co.pt < linkstatus.paradigma.co.pt
+ return false;
+
+ if(restrict && (size2 - aux2 > size1 - aux)) // e.g. linkstatus.paradigma.co.pt > paradigma.co.pt
+ return false;
+
+ int i = 1;
+ while( ((int)(size1 - i) >= (int)aux) && ((int)(size2 - i) >= (int)aux) )
+ {
+ if( !(v1[size1 - i] == v2[size2 - i]) )
+ return false;
+
+ ++i;
+ }
+
+ return true;
+}
+
+/* This should be done by parsing but I wan't to know when some new scheme comes along :) */
+bool Url::hasProtocol(QString const& url)
+{
+ QString s_url(url);
+ s_url.stripWhiteSpace();
+
+ if(s_url[0] == '/')
+ return false;
+
+ else
+ {
+ KURL url = KURL::fromPathOrURL(s_url);
+ if(!url.protocol().isEmpty())
+ return true;
+ /*
+ if(s_url.startsWith("http:") ||
+ s_url.startsWith("https:") ||
+ s_url.startsWith("ftp:") ||
+ s_url.startsWith("sftp:") ||
+ s_url.startsWith("webdav:") ||
+ s_url.startsWith("webdavs:") ||
+ s_url.startsWith("finger:") ||
+ s_url.startsWith("fish:") ||
+ s_url.startsWith("imap:") ||
+ s_url.startsWith("imaps:") ||
+ s_url.startsWith("lan:") ||
+ s_url.startsWith("ldap:") ||
+ s_url.startsWith("pop3:") ||
+ s_url.startsWith("pop3s:") ||
+ s_url.startsWith("smtp:") ||
+ s_url.startsWith("smtps:") ||
+ s_url.startsWith("file:") ||
+ s_url.startsWith("news:") ||
+ s_url.startsWith("gopher:") ||
+ s_url.startsWith("mailto:") ||
+ s_url.startsWith("telnet:") ||
+ s_url.startsWith("prospero:") ||
+ s_url.startsWith("wais:") ||
+ s_url.startsWith("nntp:") )
+ {
+ return true;
+ }
+ */
+ else
+ return false;
+ }
+}
+
+/**
+ http://linkstatus.paradigma.co.pt/en/index.html&bix=bix -> /en/index.html&bix=bix
+*/
+QString Url::convertToLocal(LinkStatus const* ls)
+{
+ KURL url = ls->absoluteUrl();
+ KURL base_url = ls->rootUrl();
+
+ if(base_url == url)
+ return "./" + url.fileName();
+ else
+ return KURL::relativeURL(base_url, url);
+}
+
+/**
+ If url2 has the same domain has url1 returns true.
+ If restrict, sourceforge.net != quanta.sourceforge.net.
+ Else is equal.
+*/
+bool Url::localDomain(KURL const& url1, KURL const& url2, bool restrict)
+{
+ if(url1.protocol() != url2.protocol())
+ {
+ //kdDebug(23100) << "NOT localDomain" << endl;
+ return false;
+ }
+ else if(!url1.hasHost())
+ {
+ //kdDebug(23100) << "localDomain" << endl;
+ return true;
+ }
+ else
+ {
+ //return ::equalHost(url1.host(), url2.host(), restrict);
+ if(Url::equalHost(url1.host(), url2.host(), restrict))
+ {
+ //kdDebug(23100) << "localDomain" << endl;
+ return true;
+ }
+ else
+ {
+ //kdDebug(23100) << "NOT localDomain" << endl;
+ return false;
+ }
+
+ }
+}
+
+/**
+ Returns true if url2 is a parent of url1.
+*/
+bool Url::parentDir(KURL const& url1, KURL const& url2)
+{
+ if(url1.protocol() != url2.protocol())
+ return false;
+
+ else if(!url1.hasHost())
+ return url2.isParentOf(url1);
+
+ else
+ {
+ if(!equalHost(url1.host(), url2.host()))
+ return false;
+
+ vector<QString> tokens_1 = tokenizeWordsSeparatedBy(url1.directory(true, false), QChar('/'));
+ vector<QString> tokens_2 = tokenizeWordsSeparatedBy(url2.directory(true, false), QChar('/'));
+
+ if(tokens_1.size() == 0)
+ return false;
+
+ //if(tokens_2.size() > tokens_1.size() or tokens_2.size() == 0)
+ //return true;
+ vector<QString>::size_type size = 0;
+ if(tokens_1.size() < tokens_2.size())
+ size = tokens_1.size();
+ else
+ size = tokens_2.size();
+
+ for(vector<QString>::size_type i = 0; i != size; ++i)
+ {
+ if(tokens_2[i] != tokens_1[i])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool Url::externalLink(KURL const& url1, KURL const& url2, bool restrict)
+{
+ if(url1.protocol() != url2.protocol())
+ {
+ kdDebug(23100) << "externalLink" << endl;
+ return true;
+ }
+ else if(!url1.hasHost() && !url2.hasHost())
+ {
+ kdDebug(23100) << "NOT externalLink" << endl;
+ return false;
+ }
+ else
+ return !Url::equalHost(url1.host(), url2.host(), restrict);
+}