summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/t_factors
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_factors')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/test/t_factors235
1 files changed, 235 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_factors b/debian/htdig/htdig-3.2.0b6/test/t_factors
new file mode 100755
index 00000000..c1127077
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/test/t_factors
@@ -0,0 +1,235 @@
+#!/bin/sh
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License (LGPL) version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+# $Id: t_factors,v 1.7 2004/06/05 06:26:22 lha Exp $
+#
+
+# Tests (or should eventually test) the following config attributes:
+# author_factor
+# backlink_factor
+# caps_factor
+# date_factor (TODO)
+# description_factor
+# heading_factor
+# keywords_factor
+# meta_description_factor
+# multimatch_factor
+# search_results_order
+# text_factor
+# title_factor
+# url_seed_score
+# url_text_factor
+
+# try_order comment query pattern1 patern2 ...
+# comment - description of test, displayed if error occurs
+# query - search string passed to htsearch
+# pattern - strings expected to occur *in order* in the output
+try_order() {
+ comment="$1"
+ shift
+ query="$1"
+ shift
+ $htsearch -c $config "$query" > $tmp 2> /dev/null
+ array=""
+ for pattern
+ do
+ array="$array; array[i++] = "\"$pattern\"
+ done
+ miss=`$awk "BEGIN {$array; line = 0; } \
+ "'$0'" ~ \".*\"array[line] { line++ } \
+ END { print array[line] } " < $tmp `
+ if [ "$miss" != "" ]
+ then
+ $htsearch -vv -c $config "$query" > /dev/null
+ echo "String \"$miss\" was not found where expected"
+ fail "$htsearch -c $config '$query' >> $tmp --
+ $comment"
+ fi
+}
+
+
+
+
+test_functions_action=--start-apache
+. ./test_functions
+
+config=$testdir/conf/htdig.conf.tmp
+tmp=/tmp/t_htsearch$$
+
+# set up config file with chosen non-default values
+cp $testdir/conf/htdig.conf $config
+
+$htdig "$@" -t -i -c $config || fail "Couldn't dig"
+$htpurge -c $config || fail "Couldn't purge"
+
+try_order "Search for 'also'" \
+ "words=also" \
+ '4 matches' 'site2.html' 'site4.html' 'bad_local.htm' 'script.html'
+
+set_attr url_seed_score "site4 *1000+1000"
+try_order "Seed score 1000 for site4.html" \
+ "words=also" \
+ '4 matches' 'site4.html' 'site2.html' 'bad_local.htm' 'script.html'
+
+set_attr url_seed_score "site4 *1000+1000 script *1000+1000"
+try_order "Seed score 1000 for site4.html and script.html" \
+ "words=also" \
+ '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm'
+
+set_attr url_seed_score "site4|script *1000+1000"
+try_order "Seed score 1000 for site4|script" \
+ "words=also" \
+ '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm'
+
+set_attr search_results_order "bad_local"
+try_order "Search_results_order bad_local" \
+ "words=also" \
+ '4 matches' 'bad_local.htm' 'site4.html' 'script.html' 'site2.html'
+
+set_attr search_results_order "script * e2|e4"
+try_order "Search_results_order * script e2|e4" \
+ "words=also" \
+ '4 matches' 'script.html' 'bad_local.htm' 'site4.html' 'site2.html'
+
+set_attr url_seed_score ""
+set_attr search_results_order ""
+set_attr author_factor 0
+set_attr backlink_factor 0
+set_attr caps_factor 0 # not implemented
+set_attr date_factor 0 # TODO
+set_attr description_factor 0
+set_attr heading_factor 0
+set_attr keywords_factor 0
+set_attr meta_description_factor 0
+set_attr multimatch_factor 0
+set_attr text_factor 0
+set_attr title_factor 0
+set_attr url_text_factor 0 # not implemented
+
+try_order "Search with factors 0" \
+ "words=also" \
+ 'No matches'
+
+try_order "Search for 'service' with title_factor 0" \
+ "words=service" \
+ 'No matches'
+set_attr title_factor 1
+try_order "Search for 'service' with title_factor 1" \
+ "words=service" \
+ '1 matches' 'script.html'
+set_attr text_factor 0.3
+try_order "Greater weight to title factor" \
+ "words=service" \
+ '4 matches' 'script.html' 'site4.html' 'site%201.html' 'site3.html'
+set_attr title_factor -3.2
+try_order "Checking negative title factor" \
+ "words=service" \
+ '4 matches' 'site4.html' 'site%201.html' 'site3.html' 'script.html'
+set_attr title_factor 0
+set_attr text_factor 0
+
+# test with all factors 0 except the one which matches
+
+set_attr description_factor 1
+try_order "Search for 'crossRef' with description_factor 1" \
+ "words=crossRef" \
+ '1 matches' 'site%201.html'
+set_attr description_factor 0
+
+set_attr author_factor 1
+try_order "Search for 'media' with author_factor 1" \
+ "words=media" \
+ '1 matches' 'script.html'
+set_attr author_factor 0
+
+set_attr meta_description_factor 1
+try_order "Search for 'stars' with meta_description_factor 1" \
+ "words=stars" \
+ '1 matches' 'site2.html'
+set_attr meta_description_factor 0
+
+set_attr heading_factor 1
+try_order "Search for 'obtain' with heading_factor 1" \
+ "words=obtain" \
+ '1 matches' 'bad_local.htm'
+set_attr heading_factor 0
+
+set_attr keywords_factor 1
+try_order "Search for 'newWord' with keywords_factor 1" \
+ "words=newWord" \
+ '1 matches' 'title.html'
+set_attr keywords_factor 0
+
+
+# test with all document-based factors non-zero except the one which matches
+set_attr author_factor 1
+#set_attr backlink_factor 1 # not document based
+set_attr caps_factor 1
+#set_attr date_factor 1 # not document based
+set_attr description_factor 1
+set_attr heading_factor 1
+set_attr keywords_factor 1
+set_attr meta_description_factor 1
+set_attr multimatch_factor 1
+set_attr text_factor 1
+set_attr title_factor 1
+set_attr url_text_factor 1
+set_attr description_factor 1
+
+set_attr description_factor 0
+try_order "Search for 'crossRef' with description_factor 0" \
+ "words=crossRef" \
+ '1 matches' 'title.html'
+set_attr description_factor 1
+
+set_attr author_factor 0
+try_order "Search for 'media' with author_factor 0" \
+ "words=media" \
+ 'No matches'
+set_attr author_factor 1
+
+set_attr meta_description_factor 0
+try_order "Search for 'stars' with meta_description_factor 0" \
+ "words=stars" \
+ 'No matches'
+set_attr meta_description_factor 1
+
+set_attr heading_factor 0
+try_order "Search for 'obtain' with heading_factor 0" \
+ "words=obtain" \
+ 'No matches'
+set_attr heading_factor 1
+
+set_attr keywords_factor 0
+try_order "Search for 'newWord' with keywords_factor 0" \
+ "words=newWord" \
+ 'No matches'
+set_attr keywords_factor 1
+
+# multimatch_factor gives a "boost" to searches matching multiple terms
+set_attr title_factor 10 # "get" in title of bad_local
+set_attr multimatch_factor 10000
+try_order "Search for 'get or interest or repay' with multimatch_factor 10000" \
+ "words=get+interest+repay;method=or" \
+ '2 matches' 'site4.html' 'bad_local.htm'
+set_attr multimatch_factor 0
+try_order "Search for 'get or interest or repay' with multimatch_factor 0" \
+ "words=get+interest+repay;method=or" \
+ '2 matches' 'bad_local.htm' 'site4.html'
+
+# backlink counts the number of references (of any type) to this document
+set_attr backlink_factor 0
+try_order "site4.html has repay+interest, site 1.html only has suggestions" \
+ "words=suggestions+repay+interest;method=or" \
+ '2 matches' 'site4.html' 'site%201.html'
+set_attr backlink_factor 100
+try_order "site 1.html has a higher ratio of backlinks to outgoing links" \
+ "words=suggestions+repay+interest;method=or" \
+ '2 matches' 'site%201.html' 'site4.html'
+
+test_functions_action=--stop-apache
+. ./test_functions