summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc376
1 files changed, 376 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc b/debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc
new file mode 100644
index 00000000..c418e62c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdig/ExternalTransport.cc
@@ -0,0 +1,376 @@
+//
+// ExternalTransport.cc
+//
+// ExternalTransport: Allows external programs to retrieve given URLs with
+// unknown protocols.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ExternalTransport.cc,v 1.9 2004/05/28 13:15:14 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "ExternalTransport.h"
+#include "htdig.h"
+#include "QuotedStringList.h"
+#include "URL.h"
+#include "Dictionary.h"
+#include "good_strtok.h"
+
+#include <ctype.h>
+#include <stdio.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+#include <stdlib.h>
+#ifdef HAVE_WAIT_H
+#include <wait.h>
+#elif HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif
+
+#include "defaults.h"
+
+static Dictionary *handlers = 0;
+static Dictionary *toTypes = 0;
+extern String configFile;
+
+//*****************************************************************************
+// ExternalTransport::ExternalTransport(char *protocol)
+//
+ExternalTransport::ExternalTransport(const String &protocol)
+{
+ if (canHandle(protocol))
+ {
+ _Handler = ((String *)handlers->Find(protocol))->get();
+ }
+ ExternalTransport::_Protocol = protocol;
+ _Response = new ExternalTransport_Response;
+}
+
+
+//*****************************************************************************
+// ExternalTransport::~ExternalTransport()
+//
+ExternalTransport::~ExternalTransport()
+{
+ if (_Response)
+ {
+ delete _Response;
+ }
+}
+
+
+//*****************************************************************************
+// int ExternalTransport::canHandle(const String &protocol)
+//
+int
+ExternalTransport::canHandle(const String &protocol)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ if (!handlers)
+ {
+ handlers = new Dictionary();
+ toTypes = new Dictionary();
+
+ QuotedStringList qsl(config->Find("external_protocols"), " \t");
+ String from, to;
+ int i;
+ int sep;
+
+ for (i = 0; qsl[i]; i += 2)
+ {
+ from = qsl[i];
+ to = "";
+ sep = from.indexOf("->");
+ if (sep != -1)
+ {
+ to = from.sub(sep+2).get();
+ from = from.sub(0, sep).get();
+ }
+
+ // Recognise service specified as "https://" rather than "https"
+ sep = from.indexOf(":");
+ if (sep != -1)
+ from = from.sub(0, sep).get();
+
+ handlers->Add(from, new String(qsl[i + 1]));
+ toTypes->Add(from, new String(to));
+ }
+ }
+ return handlers->Exists(protocol);
+}
+
+
+//*****************************************************************************
+// void ExternalTransport::SetConnection(URL *u)
+//
+void ExternalTransport::SetConnection (URL *u)
+{
+ // Grab the actual URL to pass to the handler
+ _URL = *u;
+
+ // OK, now call the parent method to make sure everything else is set up.
+ Transport::SetConnection (u->host(), u->port());
+}
+
+
+//*****************************************************************************
+// DocStatus ExternalTransport::Request()
+//
+Transport::DocStatus ExternalTransport::Request()
+{
+// NEAL - ENABLE/REWRITE THIS ASAP FOR WIN32
+#ifndef _MSC_VER /* _WIN32 */
+ //
+ // Start the external handler, passing the protocol, URL and config file
+ // as command arguments
+ //
+ StringList hargs(_Handler);
+ char **handlargs = new char * [hargs.Count() + 5];
+ int argi;
+ for (argi = 0; argi < hargs.Count(); argi++)
+ handlargs[argi] = (char *)hargs[argi];
+ handlargs[argi++] = _Protocol.get();
+ handlargs[argi++] = (char *)_URL.get().get();
+ handlargs[argi++] = configFile.get();
+ handlargs[argi++] = 0;
+
+ int stdout_pipe[2];
+ int fork_result = -1;
+ int fork_try;
+
+ if (pipe(stdout_pipe) == -1)
+ {
+ if (debug)
+ cerr << "External transport error: Can't create pipe!" << endl;
+ delete [] handlargs;
+ return GetDocumentStatus(_Response);
+ }
+
+ for (fork_try = 4; --fork_try >= 0;)
+ {
+ fork_result = fork(); // Fork so we can execute in the child process
+ if (fork_result != -1)
+ break;
+ if (fork_try)
+ sleep(3);
+ }
+ if (fork_result == -1)
+ {
+ if (debug)
+ cerr << "Fork Failure in ExternalTransport" << endl;
+ delete [] handlargs;
+ return GetDocumentStatus(_Response);
+ }
+
+ if (fork_result == 0) // Child process
+ {
+ close(STDOUT_FILENO); // Close handle STDOUT to replace with pipe
+ dup(stdout_pipe[1]);
+ close(stdout_pipe[0]);
+ close(stdout_pipe[1]);
+ // not really necessary, and may pose Cygwin incompatibility...
+ //close(STDIN_FILENO); // Close STDIN to replace with null dev.
+ //open("/dev/null", O_RDONLY);
+
+ // Call External Transport Handler
+ execv(handlargs[0], handlargs);
+
+ exit(EXIT_FAILURE);
+ }
+
+ // Parent Process
+ delete [] handlargs;
+ close(stdout_pipe[1]); // Close STDOUT for writing
+ FILE *input = fdopen(stdout_pipe[0], "r");
+ if (input == NULL)
+ {
+ if (debug)
+ cerr << "Fdopen Failure in ExternalTransport" << endl;
+ return GetDocumentStatus(_Response);
+ }
+
+ // Set up a response for this request
+ _Response->Reset();
+ // We just accessed the document
+ _Response->_access_time = new HtDateTime();
+ _Response->_access_time->SettoNow();
+
+
+ // OK, now parse the stuff we got back from the handler...
+ String line;
+ char *token1;
+ int in_header = 1;
+
+ while (in_header && readLine(input, line))
+ {
+ line.chop('\r');
+ if (line.length() > 0 && debug > 2)
+ cout << "Header line: " << line << endl;
+ token1 = strtok(line, "\t");
+ if (token1 == NULL)
+ {
+ token1 = "";
+ in_header = 0;
+ break;
+ }
+
+ switch (*token1)
+ {
+ case 's': // status code
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_status_code = atoi(token1);
+ else
+ cerr<< "External transport error: expected status code in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ case 'r': // status reason
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_reason_phrase = token1;
+ else
+ cerr<< "External transport error: expected status reason in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ case 'm': // modification time
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_modification_time= NewDate(token1); // Hopefully we can grok it...
+ else
+ cerr<< "External transport error: expected modification time in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ case 't': // Content-Type
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_content_type = token1;
+ else
+ cerr<< "External transport error: expected content-type in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ case 'l': // Content-Length
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_content_length = atoi(token1);
+ else
+ cerr<< "External transport error: expected content-length in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ case 'u': // redirect target
+ token1 = strtok(0, "\t");
+ if (token1 != NULL)
+ _Response->_location = token1;
+ else
+ cerr<< "External transport error: expected URL in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+
+ default:
+ cerr<< "External transport error: unknown field in line "<<line<<"\n" << " URL: " << _URL.get() << "\n";
+ break;
+ }
+ }
+
+ // OK, now we read in the rest of the document as contents...
+ _Response->_contents = 0;
+ char docBuffer[8192];
+ int bytesRead;
+
+ while ((bytesRead = fread(docBuffer, 1, sizeof(docBuffer), input)) > 0)
+ {
+ if (debug > 2)
+ cout << "Read " << bytesRead << " from document\n";
+ if (_Response->_contents.length() + bytesRead > _max_document_size)
+ bytesRead = _max_document_size - _Response->_contents.length();
+ _Response->_contents.append(docBuffer, bytesRead);
+ if (_Response->_contents.length() >= _max_document_size)
+ break;
+ }
+ _Response->_document_length = _Response->_contents.length();
+ fclose(input);
+ // close(stdout_pipe[0]); // This is closed for us by the fclose()
+
+ int rpid, status;
+ while ((rpid = wait(&status)) != fork_result && rpid != -1)
+ ;
+
+#endif
+
+ return GetDocumentStatus(_Response);
+}
+
+
+//*****************************************************************************
+// private
+// DocStatus ExternalTransport::GetDocumentStatus(ExternalTransport_Response *r)
+//
+Transport::DocStatus ExternalTransport::GetDocumentStatus(ExternalTransport_Response *r)
+{
+ // The default is 'not found' if we can't figure it out...
+ DocStatus returnStatus = Document_not_found;
+ int statuscode = r->GetStatusCode();
+
+ if (statuscode == 200)
+ {
+ returnStatus = Document_ok; // OK
+ // Is it parsable?
+ }
+
+ else if (statuscode > 200 && statuscode < 300)
+ returnStatus = Document_ok; // Successful 2xx
+ else if (statuscode == 304)
+ returnStatus = Document_not_changed; // Not modified
+ else if (statuscode > 300 && statuscode < 400)
+ returnStatus = Document_redirect; // Redirection 3xx
+ else if (statuscode == 401)
+ returnStatus = Document_not_authorized; // Unauthorized
+
+ return returnStatus;
+}
+
+
+//*****************************************************************************
+// private
+// int ExternalTransport::readLine(FILE *in, String &line)
+//
+int
+ExternalTransport::readLine(FILE *in, String &line)
+{
+ char buffer[2048];
+ int length;
+
+ line = 0;
+ while (fgets(buffer, sizeof(buffer), in))
+ {
+ length = strlen(buffer);
+ if (buffer[length - 1] == '\n')
+ {
+ //
+ // A full line has been read. Return it.
+ //
+ line << buffer;
+ line.chop('\n');
+ return 1;
+ }
+ else
+ {
+ //
+ // Only a partial line was read. Append it to the line
+ // and read some more.
+ //
+ line << buffer;
+ }
+ }
+ return line.length() > 0;
+}
+