diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_htdig_local')
-rwxr-xr-x | debian/htdig/htdig-3.2.0b6/test/t_htdig_local | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdig_local b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local new file mode 100755 index 00000000..8405ee07 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local @@ -0,0 +1,359 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $ +# + +# Tests the following config attributes: +# bad_local_extensions +# check_unique_md5 +# content_classifier +# database_dir +# exclude_urls +# limit_normalized +# limit_urls_to +# local_extensions +# local_urls +# local_urls_only +# local_user_urls +# max_hop_count +# md5_db +# mime_types +# remove_default_doc +# server_aliases +# start_url + +test_functions_action=--start-apache +. ./test_functions + +# set up config file with chosen non-default values +config=$testdir/conf/htdig.conf.tmp +cp $testdir/conf/htdig.conf2 $config + +################################################################################ +#test for local-file-system access to <http://...> URLs + +/bin/rm -f var/htdig2/* +set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html" +# ban ite3.htm from query, but not from main URL. +# Allow site3.html, but not title.html?site3.html +set_attr bad_querystr ite3.htm +expected='bad_local.htm' +got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "first htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config + +# should http://localhost:7400/set1/sub%2520dir be purged? +expected='http://localhost:7400/set1/ +http://localhost:7400/set1/bad_local.htm +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site2.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/site4.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/sub%2520dir/empty%20file.html +http://localhost:7400/set1/title.html +http://localhost:7400/set1/title.html?site4.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "first document: expected +$expected +but got +$got" +fi + +set_attr bad_query_str + + +################################################################################ +# limit_urls_to applies before server alias expansion +set_attr start_url http://myhost/set1/index.html +set_attr limit_urls_to "http://myhost/set1/" +set_attr server_aliases myhost=localhost:7400 +$htdig "$@" -t -i -c $config || fail "couldn't dig second time" +$htpurge -c $config || fail "couldn't purge second time" +# only start_url uses alias, so only it passes the limit_urls_to test +expected='http://localhost:7400/set1/' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "second document: expected +$expected +but got +$got" +fi + + + +################################################################################ +# Check remote URLs not retrieved if local_urls_only specified +set_attr local_urls_only true +set_attr remove_default_doc site2.html +# Note: local_urls_only doesn't handle directories without a default doc +set_attr local_default_doc "site2.html empty%20file.html" +set_attr start_url http://myhost/set1/index.html +# don't care what the aliased URL is; only check the normalized one +set_attr limit_urls_to +set_attr limit_normalized "http://localhost:7400/set1/" +set_attr server_aliases myhost=localhost:7400 +$htdig "$@" -t -i -c $config || fail "couldn't dig third time" +$htpurge -c $config || fail "couldn't purge third time" +expected='http://localhost:7400/set1/ +http://localhost:7400/set1/index.html +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/site4.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/title.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "third document: expected +$expected +but got +$got" +fi +set_attr remove_default_doc index.html +set_attr local_urls_only false +set_attr limit_normalized + + +################################################################################ +#test for <file:///...> URLs + +expected='' # no "bad local" extensions for file:/// +# Check only one "title.html" found... +set_attr check_unique_md5 true +set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/" +set_attr limit_urls_to '${start_url}' +got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "fourth htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.md5hash.db +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "fourth created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge fourth time" + +expected='file:///set1/bad_local.htm +file:///set1/index.html +file:///set1/script.html +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/site4.html +file:///set1/sub%2520dir/empty%20file.html +/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"` + +if [ "$expected" != "$got" ] +then + fail "fourth document: expected +$expected +but got +$got" +fi + + +################################################################################ +#test mime types handling + +expected='' # no "bad local" extensions for file:/// +set_attr max_hop_count 1 # removes "empty%20file.html" +set_attr exclude_urls "site4.html script.html site[3].html" +set_attr bad_extensions .foo +set_attr local_urls_only false + +rm -f var/htdig2/db.md5hash.db +set_attr md5_db '${database_base}.md5.db' + +set_attr mime_types $PWD/mime-without-htm +set_attr content_classifier $PWD/say-text +echo 'text/html html' > mime-without-htm +echo '#!/bin/sh + echo text/plain' > say-text +chmod 700 say-text +got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "fifth htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.md5.db +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "fifth created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge fifth time" + +expected='file:///set1/bad_local.htm +file:///set1/index.html +file:///set1/nph-location.cgi +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` + +if [ "$expected" != "$got" ] +then + fail "fifth document: expected +$expected +but got +$got" +fi + +################################################################################ +expected='' # no "bad local" extensions for file:/// +set_attr max_hop_count # removes "empty%20file.html" +set_attr exclude_urls /CVS/ +set_attr valid_extensions ".foo .html" +set_attr bad_extensions + +set_attr mime_types $PWD/mime-without-htm +set_attr content_classifier $PWD/say-text +echo 'text/html html' > mime-without-htm +echo '#!/bin/sh + echo text/plain' > say-text +chmod 700 say-text +got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "sixth htdig: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge sixth time" + +expected='file:///set1/index.html +file:///set1/nph-location.foo +file:///set1/script.html +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/site4.html +file:///set1/sub%2520dir/empty%20file.html +file:///set1/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` + +if [ "$expected" != "$got" ] +then + fail "sixth document: expected +$expected +but got +$got" +fi + + +################################################################################ +set_attr local_urls_only +set_attr local_urls "http://somewhere/=$PWD/htdocs/" +set_attr local_user_urls "http://somewhere/=$PWD/,/set1/" +set_attr start_url "http://somewhere/~htdocs/" + +set_attr valid_extensions +set_attr local_default_doc index.html +set_attr remove_default_doc index.html + +$htdig "$@" -t -i -c $config || fail "couldn't dig seventh time" +$htpurge -c $config || fail "couldn't purge seventh time" + +#local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html +expected='http://somewhere/~htdocs/ +http://somewhere/~htdocs/script.html +http://somewhere/~htdocs/site%201.html +http://somewhere/~htdocs/site2.html +http://somewhere/~htdocs/site3.html +http://somewhere/~htdocs/site4.html +http://somewhere/~htdocs/title.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "seventh document: expected +$expected +but got +$got" +fi + + +/bin/rm mime-without-htm say-text + +test_functions_action=--stop-apache +. ./test_functions |