summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc576
1 files changed, 576 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc b/debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc
new file mode 100644
index 00000000..25922b27
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc
@@ -0,0 +1,576 @@
+
+// HtCookieMemJar.cc
+//
+// HtCookieMemJar: This class stores/retrieves cookies.
+//
+// by Robert La Ferla. Started 12/9/2000.
+// Reviewed by G.Bartolini - since 24 Feb 2001
+//
+////////////////////////////////////////////////////////////
+//
+// The HtCookieMemJar class stores/retrieves cookies
+// directly into memory. It is derived from HtCookieJar class.
+//
+// See "PERSISTENT CLIENT STATE HTTP COOKIES" Specification
+// at http://www.netscape.com/newsref/std/cookie_spec.html
+// Modified according to RFC2109 (max age and version attributes)
+//
+///////
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Part of the ht://Check package <http://htcheck.sourceforge.net/>
+// Copyright (c) 2001-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtCookieMemJar.cc,v 1.10 2004/05/28 13:15:23 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "HtCookieMemJar.h"
+#include "HtCookie.h"
+#include "List.h"
+#include "Dictionary.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+// Constructor
+HtCookieMemJar::HtCookieMemJar()
+: _key(0), _list(0), _idx(0)
+{
+ cookieDict = new Dictionary();
+ cookieDict->Start_Get(); // reset the iterator
+}
+
+// Copy constructor
+HtCookieMemJar::HtCookieMemJar(const HtCookieMemJar& rhs)
+: _key(0), _list(0), _idx(0)
+{
+
+ if (rhs.cookieDict)
+ {
+ // Let's perform a deep copy of the 'jar'
+ cookieDict = new Dictionary();
+ rhs.cookieDict->Start_Get();
+
+ // Let's walk the domains
+ while (char* d = rhs.cookieDict->Get_Next())
+ {
+ List* l = new List();
+ cookieDict->Add(d, l); // add that domain
+
+ // Let's walk the cookies for that domain
+ if (List* rhsl = (List*) rhs.cookieDict->Find(d))
+ {
+
+ rhsl->Start_Get();
+
+ while (HtCookie* cookie = ((HtCookie *)rhsl->Get_Next()))
+ {
+ HtCookie* new_cookie = new HtCookie(*cookie);
+ l->Add((Object *)new_cookie); // add this cookie
+ }
+ }
+ }
+ }
+ else
+ cookieDict = new Dictionary();
+
+ cookieDict->Start_Get(); // reset the iterator
+}
+
+// Destructor
+HtCookieMemJar::~HtCookieMemJar()
+{
+ if (debug>4)
+ printDebug();
+
+ if (cookieDict)
+ delete cookieDict;
+}
+
+// Add a cookie to the Jar
+int HtCookieMemJar::AddCookie(const String &CookieString, const URL &url)
+{
+
+ // Builds a new Cookie object
+ HtCookie *Cookie = new HtCookie(CookieString, url.get());
+
+ // Interface to the insert method
+ // If the cookie has not been added, we'd better delete it
+ if (!AddCookieForHost (Cookie, url.host()))
+ delete Cookie;
+
+ return true;
+
+}
+
+
+// Add a cookie to a host
+int HtCookieMemJar::AddCookieForHost(HtCookie *cookie, String HostName)
+{
+
+ List *list; // pointer to the Cookie list of an exact host
+ HtCookie *theCookie;
+ bool inList = false;
+
+/////////////////////////////////////////////////////////////
+// That's an abstract from the Netscape Cookies specification
+/////////////////////////////////////////////////////////////
+//
+// When searching the cookie list for valid cookies,
+// a comparison of the domain attributes of the cookie
+// is made with the Internet domain name of the host from which the URL
+// will be fetched. If there is a tail match, then the cookie
+// will go through path matching to see if it should be sent.
+//
+// "Tail matching" means that domain attribute is matched against
+// the tail of the fully qualified domain name of the host.
+// A domain attribute of "acme.com" would match host names "anvil.acme.com"
+// as well as "shipping.crate.acme.com".
+//
+// Only hosts within the specified domain can set a cookie
+// for a domain and domains must have at least two (2)
+// or three (3) periods in them to prevent domains of
+// the form: ".com", ".edu", and "va.us".
+//
+// Any domain that fails within one of the seven special top level domains
+// listed below only require two periods.
+// Any other domain requires at least three.
+//
+// The seven special top level domains are:
+// "COM", "EDU", "NET", "ORG", "GOV", "MIL", and "INT".
+//
+// The default value of domain is the host name of the
+// server which generated the cookie response.
+//
+/////////////////////////////////////////////////////////////
+
+
+ // Let's get the domain of the cookie
+ String Domain(cookie->GetDomain());
+
+ // Lowercase the HostName
+ HostName.lowercase();
+
+ if (!Domain.length())
+ Domain = HostName;
+ else
+ {
+ Domain.lowercase(); // lowercase the domain
+
+ // The cookie's domain must have a minimum number of periods
+ // inside, as stated by the abstract cited above
+ int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
+
+ if (!minimum_periods)
+ {
+ if (debug > 2)
+ cout << "Cookie - Invalid domain "
+ << "(minimum number of periods): " << Domain << endl;
+
+ cookie->SetIsDomainValid(false);
+ }
+ else
+ {
+ // Let's see if the domain is now valid
+ const char* s = Domain.get();
+ const char* r = s + strlen(s) - 1; // go to the last char
+ int num_periods = 1; // at minimum is one
+
+ while (r > s && *r)
+ {
+ if (*r == '.' && *(r+1) && *(r+1) != '.')
+ ++num_periods; // when a 'dot' is found increment
+ // the number of periods
+ --r;
+ }
+
+ if (num_periods >= minimum_periods) // here is a so-far valid domain
+ {
+ while (*r && *r == '.')
+ ++r; // goes beyond the first dot
+
+ if (r>s)
+ Domain.set((char*) r); // Set the new 'shorter' domain
+
+
+ if (HostName.indexOf(Domain.get()) != -1)
+ {
+ if (debug > 2)
+ cout << "Cookie - valid domain: "
+ << Domain << endl;
+ }
+ else if (HostName.length() == 0)
+ {
+ if (debug > 2)
+ cout << "Imported cookie - valid domain: "
+ << Domain << endl;
+ }
+ else
+ {
+ cookie->SetIsDomainValid(false);
+ if (debug > 2)
+ cout << "Cookie - Invalid domain "
+ << "(host not within the specified domain): " << Domain << endl;
+ }
+ }
+ else
+ {
+ cookie->SetIsDomainValid(false);
+ if (debug > 2)
+ cout << "Cookie - Invalid domain "
+ << "(minimum number of periods): " << Domain << endl;
+ }
+ }
+ }
+
+ if (! cookie->getIsDomainValid()) // Not a valid domain
+ Domain = HostName; // Set the default
+
+ // Is the host in the dictionary?
+ if (cookieDict->Exists(Domain) == 0)
+ {
+ // No, add a list instance
+ list = new List();
+ cookieDict->Add(Domain, list);
+ }
+ else list = (List *)cookieDict->Find(Domain);
+
+ // Is cookie already in list?
+ list->Start_Get();
+
+ // Let's start looking for it
+ // The match is made on the name and the path
+
+ if (debug > 5)
+ cout << "- Let's go searching for the cookie '"
+ << cookie->GetName() << "' in the list" << endl;
+
+ while (!inList && (theCookie = (HtCookie *)list->Get_Next()))
+ {
+ if ( (theCookie->GetName().compare(cookie->GetName()) == 0 )
+ && ( theCookie->GetPath().compare(cookie->GetPath()) == 0 ))
+ {
+ // The cookie has been found
+ inList = true;
+
+ // Let's update the expiration datetime
+ if (debug > 5)
+ cout << " - Found: Update cookie expire time." << endl;
+
+ theCookie->SetExpires(cookie->GetExpires());
+
+ }
+ }
+
+ // Well ... the cookie wasn't in the list. Until now! ;-)
+ // Let's go add it!
+ if (inList == false)
+ {
+ if (debug > 5)
+ cout << " - Not Found: let's go add it." << endl;
+
+ list->Add((Object *)cookie);
+ }
+
+ return !inList;
+}
+
+
+// Retrieve all cookies that are valid for a domain
+List * HtCookieMemJar::cookiesForDomain(const String &DomainName)
+{
+ List * list;
+
+ list = (List *)cookieDict->Find(DomainName);
+ return list;
+}
+
+
+
+int HtCookieMemJar::SetHTTPRequest_CookiesString(const URL &_url,
+ String &RequestString)
+{
+
+ // Let's split the URL domain and get all of the subdomains.
+ // For instance:
+ // - bar.com
+ // - foo.bar.com
+ // - www.foo.bar.com
+
+ String Domain(_url.host());
+ Domain.lowercase();
+
+ int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
+
+ if (debug > 3)
+ cout << "Looking for cookies - Domain: "
+ << Domain
+ << " (Minimum periods: " << minimum_periods << ")" << endl;
+
+ // Let's get the subdomains, starting from the end
+ const char* s = Domain.get();
+ const char* r = s + strlen(s) - 1; // go to the last char
+ int num_periods = 1; // at minimum is one
+
+ while (r > s && *r)
+ {
+ if (*r == '.' && *(r+1) && *(r+1) != '.')
+ {
+ ++num_periods; // when a 'dot' is found increment
+ // the number of periods
+
+ if (num_periods > minimum_periods) // here is a so-far valid domain
+ {
+ const String SubDomain(r+1);
+ if (debug > 3)
+ cout << "Trying to find cookies for subdomain: "
+ << SubDomain << endl;
+
+ if (cookieDict->Exists(SubDomain))
+ WriteDomainCookiesString(_url, SubDomain, RequestString);
+ }
+ }
+
+ --r;
+ }
+
+ if (num_periods >= minimum_periods
+ && cookieDict->Exists(Domain))
+ // Let's send cookies for this domain to the Web server ...
+ WriteDomainCookiesString(_url, Domain, RequestString);
+
+ return true;
+}
+
+
+
+/////////////////////////////////////////////////////////////
+// That's an abstract from the Netscape Cookies specification
+/////////////////////////////////////////////////////////////
+//
+//
+// When requesting a URL from an HTTP server, the browser will match
+// the URL against all cookies and if any of them match,
+// a line containing the name/value pairs of all matching cookies
+// will be included in the HTTP request.
+//
+// Here is the format of that line:
+// Cookie: NAME1=OPAQUE_STRING1; NAME2=OPAQUE_STRING2 ...
+//
+// This method writes on a string (RequestString) the headers
+// for cookies settings as defined by Netscape standard
+//
+/////////////////////////////////////////////////////////////
+
+int HtCookieMemJar::WriteDomainCookiesString(const URL &_url,
+ const String &Domain, String &RequestString)
+{
+
+ // Cookie support. We need a list of cookies and a cookie object
+ List *cookieList;
+ HtCookie *cookie;
+ const HtDateTime now; // Instant time, used for checking
+ // cookies expiration time
+
+ // Let's find all the valid cookies depending on the specified domain
+ cookieList = cookiesForDomain(Domain);
+
+ if (cookieList)
+ {
+ // Let's store the number of cookies eventually sent
+ int NumCookies = 0;
+
+ if (debug > 5)
+ cout << "Found a cookie list for: '" << Domain << "'" << endl;
+
+ // Let's crawl the list for getting the 'path' matching ones
+ cookieList->Start_Get();
+
+ while ((cookie = (HtCookie *)cookieList->Get_Next()))
+ {
+ const String cookiePath = cookie->GetPath();
+ const String urlPath = _url.path();
+
+ //
+ // Let's see if the cookie has expired
+ // by checking the Expires value of it
+ // If it's not empty and the datetime
+ // is before now.
+ //
+ // Another way of determining whether a
+ // cookie is expired is checking the
+ // max_age property that is to say:
+ // (now - issuetime <= maxage).
+ //
+ const bool expired =
+ (cookie->GetExpires() && (*(cookie->GetExpires()) < now)) // Expires
+ || (HtDateTime::GetDiff(now, cookie->GetIssueTime())
+ <= cookie->GetMaxAge()); // Max-age
+
+ if (debug > 5)
+ cout << "Trying to match paths and expiration time: "
+ << urlPath << " in " << cookiePath;
+
+ // Is the path matching
+ if (!expired && !strncmp(cookiePath, urlPath, cookiePath.length()))
+ {
+
+ if (debug > 5)
+ cout << " (passed)" << endl;
+
+ ++NumCookies;
+
+ // Write the string by passing the cookie to the superclass' method
+ WriteCookieHTTPRequest(*cookie, RequestString, NumCookies);
+
+ }
+ else if (debug > 5) cout << " (discarded)" << endl;
+
+ }
+
+ // Have we sent one cookie at least?
+ if (NumCookies > 0)
+ RequestString <<"\r\n";
+
+ }
+
+ // That's the end of function
+ return true;
+}
+
+
+// Debug info
+void HtCookieMemJar::printDebug()
+{
+ char * key;
+
+ cookieDict->Start_Get();
+
+ cout << "Summary of the cookies stored so far" << endl;
+
+ while ((key = cookieDict->Get_Next()))
+ {
+ List * list;
+ HtCookie * cookie;
+
+ cout << " - View cookies for: '" << key << "'" << endl;
+ list = (List *)cookieDict->Find(key);
+ list->Start_Get();
+
+ while ((cookie = (HtCookie *)list->Get_Next()))
+ cookie->printDebug();
+ }
+}
+
+
+///////
+ // Show the summary of the stored cookies
+///////
+
+ostream &HtCookieMemJar::ShowSummary(ostream &out)
+{
+
+ char * key;
+ int num_cookies = 0; // Global number of cookies
+ int num_server = 0; // Number of servers with cookies
+
+ cookieDict->Start_Get();
+
+ out << endl << "Summary of the cookies" << endl;
+ out << "======================" << endl;
+
+ while ((key = cookieDict->Get_Next()))
+ {
+ List * list;
+ HtCookie * cookie;
+ int num_cookies_server = 0;
+
+ ++num_server; // Number of servers with cookies
+
+ out << " Host: '" << key << "'" << endl;
+ list = (List *)cookieDict->Find(key);
+ list->Start_Get();
+
+ while ((cookie = (HtCookie *)list->Get_Next()))
+ {
+ ++num_cookies_server;
+ cookie->printDebug();
+ }
+
+ out << " Number of cookies: " << num_cookies_server << endl << endl;
+
+ // Global number of cookies
+ num_cookies += num_cookies_server;
+ }
+
+ out << "Total number of cookies: " << num_cookies << endl;
+ out << "Servers with cookies: " << num_server << endl << endl;
+
+ return out;
+
+}
+
+
+// Get the next cookie. It is a bit tricky, but for now it is good
+const HtCookie* HtCookieMemJar::NextCookie()
+{
+ if (!cookieDict)
+ return 0;
+
+ if (!_idx && (_key = cookieDict->Get_Next())
+ && (_list = (List *)cookieDict->Find(_key)))
+ _list->Start_Get(); // the first time we position at the beginning
+
+ ++_idx;
+
+ if (!_key)
+ return 0; // ends
+
+ if (!_list)
+ return 0; // ends
+
+#ifdef _MSC_VER /* _WIN32 */
+ const HtCookie *cookie = ((const HtCookie*)_list->Get_Next()); // Cookie object
+#else
+ const HtCookie* cookie( (const HtCookie*)(_list->Get_Next()) ); // Cookie object
+#endif
+
+ if (cookie)
+ return cookie;
+ else
+ {
+ // Non ci sono cookie per l'host. Si passa a quello seguente
+ if ((_key = cookieDict->Get_Next()) &&
+ (_list = (List *)cookieDict->Find(_key)))
+ {
+ _list->Start_Get();
+ if ((cookie = (const HtCookie*)_list->Get_Next()))
+ return cookie;
+ }
+ }
+
+ return 0;
+}
+
+// Reset the iterator
+void HtCookieMemJar::ResetIterator()
+{
+ cookieDict->Start_Get();
+ _idx = 0;
+}
+