summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htfuzzy/Speling.cc
blob: e9f365e3fdb7aae25fbada15fabc24027999fe9a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
//
// Speling.h
//
// Speling: (sic) Performs elementary (one-off) spelling correction for ht://Dig
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later 
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Speling.cc,v 1.12 2004/05/28 13:15:20 lha Exp $
//

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#include <fcntl.h>

#include "Speling.h"
#include "htString.h"
#include "List.h"
#include "StringMatch.h"
#include "HtConfiguration.h"

#ifdef HAVE_STD
#include <fstream>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <fstream.h>
#endif /* HAVE_STD */

#include <stdio.h>

//*****************************************************************************
// Speling::Speling(const HtConfiguration& config_arg)
//
Speling::Speling(const HtConfiguration& config_arg) :
  Fuzzy(config_arg)
{
    name = "speling";
}


//*****************************************************************************
// Speling::~Speling()
//
Speling::~Speling()
{
}


//*****************************************************************************
// A fairly efficient one-off spelling checker
// This generates the small list of possibilities and
// checks to see if they exist...
//
void
Speling::getWords(char *w, List &words)
{
    if ((int)strlen(w) < config.Value("minimum_speling_length",5))
	return;

    HtWordList	wordDB(config);
    // last arg=1 -> open to compare only "word" part of of word keys
    if (wordDB.Open(config["word_db"], O_RDONLY, 1) == NOTOK)
      return;

    String	initial = w;
    String	stripped = initial;
    HtStripPunctuation(stripped);
    String	tail;
    int		max_length = stripped.length() - 1;

    for (int pos = 0; pos < max_length; pos++)
    {
      // First transposes
      // (these are really common)
      initial = stripped;
      char	temp = initial[pos];
      initial[pos] = initial[pos+1];
      initial[pos+1] = temp;
      if (!wordDB.Exists(initial))   // Seems weird, but this is correct
	words.Add(new String(initial));

      // Now let's do deletions
      initial = stripped;
      tail = initial.sub(pos+1);
      if (pos > 0)
	{
	  initial = initial.sub(0, pos);
	  initial += tail;
	}
      else
	initial = tail;

      if (!wordDB.Exists(initial))   // Seems weird, but this is correct
	words.Add(new String(initial));
    }

    // One last deletion -- check the last character!
    initial = stripped;
    initial = initial.sub(0, initial.length() - 1);
    
    if (!wordDB.Exists(initial))   // Seems weird, but this is correct
      words.Add(new String(initial));    
    
    wordDB.Close();
}


//*****************************************************************************
int
Speling::openIndex()
{
  return 0;
}


//*****************************************************************************
void
Speling::generateKey(char *, String &)
{
}


//*****************************************************************************
void
Speling::addWord(char *)
{
}