summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/t_validwords
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_validwords')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/test/t_validwords196
1 files changed, 196 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_validwords b/debian/htdig/htdig-3.2.0b6/test/t_validwords
new file mode 100755
index 00000000..8dc0f9be
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/test/t_validwords
@@ -0,0 +1,196 @@
+#!/bin/sh
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License (LGPL) version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+# $Id: t_validwords,v 1.2 2004/05/28 13:15:30 lha Exp $
+#
+
+try() {
+ comment="$1"
+ shift
+ query="$1"
+ shift
+ $htsearch -c $config "$query" > $tmp 2> /dev/null
+ for pattern
+ do
+ if grep "$pattern" $tmp > /dev/null
+ then :
+ else
+ $htsearch -vv -c $config "$query" > /dev/null
+ echo "Output doesn't match \"$pattern\""
+ fail "$htsearch -c $config '$query' >> $tmp --
+ $comment"
+ fi
+ done
+}
+
+
+
+
+test_functions_action=--start-apache
+. ./test_functions
+
+config=$testdir/conf/htdig.conf.tmp
+tmp=/tmp/t_htsearch$$
+
+# set up config file with chosen non-default values
+cp $testdir/conf/htdig.conf $config
+
+set_attr allow_numbers "false"
+set_attr minimum_word_length "3"
+set_attr maximum_word_length "10"
+set_attr translate_latin1 "0"
+set_attr valid_punctuation "."
+set_attr extra_word_characters "çé"
+#set_attr locale fr
+
+$htdig "$@" -t -i -c $config || fail "Couldn't dig"
+
+set_attr remove_bad_urls "false"
+set_attr remove_unretrieved_urls "true"
+$htpurge -vv -c $config > tmp1 || fail "Couldn't purge"
+
+# How can I check that unretrieved urls have been removed, but bad ones haven't?
+
+
+
+try "Search for '2001' without allow_numbers" \
+ "words=2001" \
+ 'No matches'
+
+try "Search for '0b3' without allow_numbers" \
+ "words=0b3" \
+ '1 matches' 'bad_local.htm' '3.2.<strong>0b3</strong>'
+
+try "Search for '3.2.0b3' without allow_numbers" \
+ "words=3.2.0b3" \
+ '1 matches' 'bad_local.htm' '<strong>3.2.0b3</strong>'
+
+try "Search for '320b3' without allow_numbers" \
+ "words=320b3" \
+ '1 matches' 'bad_local.htm'
+
+try 'Search for "archive." without . in extra_word_characters' \
+ 'words=archive.' \
+ '1 matches' 'bad_local.htm' '<strong>archive</strong>.'
+
+try 'Search for "archive" without . in extra_word_characters' \
+ 'words=archive' \
+ '1 matches' 'bad_local.htm' '<strong>archive</strong>.'
+
+try "Search for 'graduateprofessional' which should not match a slash" \
+ "words=graduateprofessional" \
+ 'No matches'
+
+try "Search for 'now' with minimum_word_length=3" \
+ "words=now" \
+ '1 matches' 'bad_local.htm'
+
+try "Search for 'français' without translate_latin1" \
+ "words=français" \
+ '1 matches' 'site4.html' '<strong>français</strong>'
+
+try "Search for 'québec' without translate_latin1" \
+ "words=québec" \
+ 'No matches'
+
+try "Search for 'with' with default bad_word_list" \
+ "words=with" \
+ 'No matches'
+
+try "Search for 'technical' with default bad_word_list" \
+ "words=technical" \
+ '1 matches' 'site%201.html'
+
+
+
+
+
+set_attr allow_numbers "true"
+set_attr minimum_word_length "4"
+set_attr maximum_word_length "13"
+set_attr translate_latin1 "yes"
+set_attr valid_punctuation "/"
+set_attr extra_word_characters '.\\\$çé' # string is .\$çé, chars: .$çé
+set_attr bad_word_list "${testdir}/bad_word_list"
+#set_attr locale fr
+
+$htdig "$@" -t -i -c $config || fail "Couldn't dig"
+
+set_attr remove_bad_urls "true"
+set_attr remove_unretrieved_urls "false"
+$htpurge -vv -c $config > tmp || fail "Couldn't purge"
+
+# How can I check that bad urls have been removed, but unretrieved ones haven't?
+
+
+
+try "Search for '2001' " \
+ "words=2001" \
+ '1 matches' '1995-<strong>2001</strong>'
+
+try "Search for '9.00'" \
+ "words=9.00" \
+ '1 matches' 'site4.html' '<strong>9.00</strong>'
+
+try "Search for '9/00' -- checking . is not just valid_punctuation" \
+ "words=9/00" \
+ 'No matches'
+
+try 'Search for "archive." with . in extra_word_characters' \
+ 'words=archive.' \
+ '1 matches' 'bad_local.htm' '<strong>archive.</strong>'
+
+try 'Search for "archive" with . in extra_word_characters' \
+ 'words=archive' \
+ 'No matches'
+
+try 'Search for "$195"' \
+ 'words=$195' \
+ '1 matches' 'site4.html' '<strong>$195</strong>,000'
+
+try "Search for 'graduateprofessional' which should match a slash" \
+ "words=graduateprofessional" \
+ '1 matches' 'site4.html' '<strong>graduate/professional</strong>'
+
+#try "Search for 'graduateprofexyz' which should match a truncated word" \
+# "words=graduateprofexyz" \
+# '1 matches' 'site4.html' '<strong>graduate/professional</strong>'
+
+try "Search for 'graduateprofexyz' which should match a truncated word" \
+ "words=graduateprofexyz" \
+ '1 matches' 'site4.html'
+
+try "Search for 'graduateprofxyz' which should fail to match a truncated word" \
+ "words=graduateprofxyz" \
+ 'No matches'
+
+try "Search for 'part' with minimum_word_length=4" \
+ "words=part" \
+ '2 matches' 'bad_local.htm' 'script.html'
+
+try "Search for 'now' with minimum_word_length=4" \
+ "words=now" \
+ 'No matches'
+
+try "Search for 'français' with translate_latin1" \
+ "words=français" \
+ '1 matches' 'site4.html' '<strong>fran&ccedil;ais</strong>'
+
+try "Search for 'québec' with translate_latin1" \
+ "words=québec" \
+ '1 matches' 'site4.html' '<strong>Qu&eacute;bec</strong>'
+
+try "Search for 'with' with new bad_word_list" \
+ "words=with" \
+ '4 matches' 'bad_local.htm' 'script.html' 'site4.html' 'site%201.html'
+
+try "Search for 'technical' with new bad_word_list" \
+ "words=technical" \
+ 'No matches'
+
+test_functions_action=--stop-apache
+. ./test_functions