summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htdoc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htdoc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/.cvsignore1
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/COPYING500
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/ChangeLog8763
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/FAQ.html2590
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/Makefile.am58
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/Makefile.in407
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/RELEASE.html1542
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/THANKS.html104
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/TODO.html141
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/all.html137
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/attrs.html.in14606
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/attrs_head.html22
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/attrs_tail.html3
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/author.html39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/bdot.gifbin0 -> 46 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/bugs.html55
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_blocks.html58
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byname.html269
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_head.html20
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_tail.html10
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog.html260
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_head.html20
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_tail.html9
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_general.html80
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/htdoc/cf_generate.pl289
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_types.html103
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/cf_variables.html69
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/config.html509
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/confindex.html12
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/confmenu.html34
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/contents.html59
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/dot.gifbin0 -> 46 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htdig.gifbin0 -> 1822 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htdig.html256
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htdig_big.gifbin0 -> 6662 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htdigconfig.8.in18
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htdump.html201
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htfuzzy.html239
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htload.html203
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htmerge.html160
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htnotify.html120
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htpurge.html127
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_form.html209
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_general.html72
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_menu.html30
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_method.html102
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_selectors.html324
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/hts_templates.html513
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htsearch.html12
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/htstat.html116
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/index.html12
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/install.html475
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/isp.html87
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/ma_menu.html65
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/mailarchive.html12
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/mailing.html60
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/main.html108
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/meta.html269
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/notification.html185
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/require.html392
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/rundig.html190
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/running.html137
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/triangle.gifbin0 -> 49 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/up.gifbin0 -> 50 bytes
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/upgrade.html73
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/uses.html652
-rw-r--r--debian/htdig/htdig-3.2.0b6/htdoc/where.html126
67 files changed, 36314 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/.cvsignore b/debian/htdig/htdig-3.2.0b6/htdoc/.cvsignore
new file mode 100644
index 00000000..f3c7a7c5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/.cvsignore
@@ -0,0 +1 @@
+Makefile
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/COPYING b/debian/htdig/htdig-3.2.0b6/htdoc/COPYING
new file mode 100644
index 00000000..efa9a67c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/COPYING
@@ -0,0 +1,500 @@
+As decided by the HtDig Board Members and ratified by the HtDig Membership
+in October of 2002 the HtDig codebase is now licensed under the LGPL.
+
+The primary reason for doing this is to promote wider usage of HtDig by
+allowing more liberal use of the code as a library while preserving
+the "all changes/improvements must be given back" philosophy of the GPL.
+
+Here's a quote from the FSF site
+[http://www.gnu.org/philosophy/why-not-lgpl.html]
+
+"Using the ordinary GPL is not advantageous for every library. There are
+reasons that can make it better to use the Library GPL in certain cases. The
+most common case is when a free library's features are readily available for
+proprietary software through other alternative libraries. In that case, the
+library cannot give free software any particular advantage, so it is better to
+use the Library GPL for that library."
+
+
+-----------------------------------------------------------------------
+ GNU LIBRARY GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the library GPL. It is
+ numbered 2 because it goes with version 2 of the ordinary GPL.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Library General Public License, applies to some
+specially designated Free Software Foundation software, and to any
+other libraries whose authors decide to use it. You can use it for
+your libraries, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if
+you distribute copies of the library, or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link a program with the library, you must provide
+complete object files to the recipients so that they can relink them
+with the library, after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ Our method of protecting your rights has two steps: (1) copyright
+the library, and (2) offer you this license which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ Also, for each distributor's protection, we want to make certain
+that everyone understands that there is no warranty for this free
+library. If the library is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original
+version, so that any problems introduced by others will not reflect on
+the original authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that companies distributing free
+software will individually obtain patent licenses, thus in effect
+transforming the program into proprietary software. To prevent this,
+we have made it clear that any patent must be licensed for everyone's
+free use or not licensed at all.
+
+ Most GNU software, including some libraries, is covered by the ordinary
+GNU General Public License, which was designed for utility programs. This
+license, the GNU Library General Public License, applies to certain
+designated libraries. This license is quite different from the ordinary
+one; be sure to read it in full, and don't assume that anything in it is
+the same as in the ordinary license.
+
+ The reason we have a separate public license for some libraries is that
+they blur the distinction we usually make between modifying or adding to a
+program and simply using it. Linking a program with a library, without
+changing the library, is in some sense simply using the library, and is
+analogous to running a utility program or application program. However, in
+a textual and legal sense, the linked executable is a combined work, a
+derivative of the original library, and the ordinary General Public License
+treats it as such.
+
+ Because of this blurred distinction, using the ordinary General
+Public License for libraries did not effectively promote software
+sharing, because most developers did not use the libraries. We
+concluded that weaker conditions might promote sharing better.
+
+ However, unrestricted linking of non-free programs would deprive the
+users of those programs of all benefit from the free status of the
+libraries themselves. This Library General Public License is intended to
+permit developers of non-free programs to use free libraries, while
+preserving your freedom as a user of such programs to change the free
+libraries that are incorporated in them. (We have not seen how to achieve
+this as regards changes in header files, but we have achieved it as regards
+changes in the actual functions of the Library.) The hope is that this
+will lead to faster development of free libraries.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, while the latter only
+works together with the library.
+
+ Note that it is possible for a library to be covered by the ordinary
+General Public License rather than by this special one.
+
+ GNU LIBRARY GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library which
+contains a notice placed by the copyright holder or other authorized
+party saying it may be distributed under the terms of this Library
+General Public License (also called "this License"). Each licensee is
+addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also compile or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ c) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ d) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the source code distributed need not include anything that is normally
+distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Library General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/ChangeLog b/debian/htdig/htdig-3.2.0b6/htdoc/ChangeLog
new file mode 100644
index 00000000..b7615dd4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/ChangeLog
@@ -0,0 +1,8763 @@
+Mon Jun 14 10:08:01 CEST 2004 Gabriele Bartolini <angusgb@users.sourceforge.net>
+
+ * Tagged release htdig-3-2-0b6
+
+Sun 13 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/os_abs.c, (db/os_abs.c.win32 removed):
+ Re-fix Cygwin bug (#814268, fixed 25 Apr) so that it won't be
+ clobbered by autotools.
+
+Sat 12 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdoc/RELEASE.html: Separated bug fixes from new features
+
+ * htdoc/{htdig,htfuzzy}.html, installdir/{htdig,htfuzzy}.1.in:
+ Added list of database files used
+
+ * htdoc/{htdump,htmerge,htnotify,htpurge,hts_general,htstat,rundig}.html:
+ Hyperlinked COMMON_DIR, BIN_DIR, DATABASE_DIR to attrs.html.
+
+ * htcommon/defaults.cc, htdoc/attrs.html.in:
+ Remove reference to deprecated '-l' option (generate URL log) of htdig.
+
+Fri Jun 11 11:48:40 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/parser.cc (phrase): Applied Lachlan's patch to prevent endless
+ loop when boolean keywords appear in a phrase in boolean match method.
+
+Fri Jun 11 11:26:56 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * db/hash.c (CDB___ham_open): Applied Red Hat's h_hash patch, to ensure
+ that hash function always set to something valid.
+
+Fri Jun 11 10:53:49 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/HtFileType: Added -f to rm command.
+
+ * htsearch/parser.cc (perform_or): Added missing & in if clause.
+
+ * contrib/htdig-3.2.0.spec: Updated for 3.2.0b6.
+
+ * installdir/Makefile.{am,in}: Don't stick $(DESTDIR) in HtFileType.
+
+Thu Jun 10 16:39:36 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/conf_(lexer.lxx,parser.yxx): applied Gilles' patch (April 22)
+ which features:
+ - improved error handling, gives file name and correct line number,
+ even if using include files
+ - allows space before comment, because otherwise it would just complain
+ about the "#" character and go on to parse the text after it as a
+ definition
+ - allows config file with an unterminated line at end of file, by
+ pushing an extra newline token to the parser at EOF
+ - parser correctly handles extra newline tokens, by moving this
+ handling out of simple_expression, and into simple_expression_list
+ and block, as simple_expression must return a new ConfigDefaults
+ object and a newline token doesn't cut it (caused segfaults when
+ dealing with fix above)
+ * htcommon/conf_lexer.cxx: Regenerate using flex 2.5.31.
+ * htcommon/conf_parser.cxx: Regenerate using bison 1.875a.
+
+Wed Jun 9 12:32:47 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Fixed meta date handling fix of June 3 to
+ ensure null byte gets put in by get() call.
+
+Wed 9 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * contrib/doc2html/doc2html.pl, installdir/mime.types:
+ Add support for OpenOffice.org documents (#957305)
+
+Sat 5 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/t_htdig, test/t_factors: fix tests for non-gnu/linux systems.
+
+Sat 5 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdoc/cf_generate.pl: Hyperlink to simplify finding the defaults of
+ attributes defined in terms of others (e.g.,
+ accents_db->database_base->database_dir).
+ * htdoc/attrs.html.in: regenerated using cf_generate.pl
+
+Sat 5 Jun 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Escaped new-line in "allow_spaces_in_url" entry.
+ Set no_next_page_text to ${next_page_text}; likewise no_prev_page_text.
+
+Fri Jun 4 10:23:53 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/URL.cc: added "allow_space_in_url" (from fileSpace.1 patch)
+ * htcommon/defaults.[cc,xml]: added documentation of allow_space_in_url
+ * htdoc/attrs.html.in: regenerated using cf_generate.pl
+ * htdoc/cf_byname.html: ditto
+ * htdoc/cf_byprog.html: ditto
+ * htdoc/RELEASE.html: updated with info regarding this attribute
+
+Thu Jun 3 16:04:23 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Fixed meta date handling to avoid inadvertently
+ matching names like DC.Date.Review.
+
+Thu Jun 3 10:01:50 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdoc/RELEASE.html: updated release notes and changes
+ * htdoc/THANKS.html: updated the 'thanks' section
+
+Thu Jun 3 09:32:52 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * global: updated with 'autoreconf -if' (autoconf 2.59, libtool 1.5.6
+ and automake 1.7.9)
+
+Wed Jun 2 19:03:14 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * contrib/rtf2html: added the rtf2html.c source as modified by David Lippi
+ and Gabriele Bartolini of the Comune di Prato. The source code is now
+ released under GNU GPL and included in the ht://Dig package.
+
+Tue Jun 1 20:23:40 CEST 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/HtSGMLCodec.cc: changed &curren; to &euro;
+
+Fri 28 May 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * Most files: Update copyright to 2004
+
+Sun 23 May 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdocs/FAQ.html: Sync with maindocs
+
+Sun 23 May 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * configure, configure.in:
+ Resolve variables (e.g., BINDIR) copied into attrs.html,
+ without introducing "NONE" prefix detected by Gabriele.
+
+Sun 23 May 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * .version, htdoc/RELEASE.html, htdoc/where.html,
+ htdoc/attrs.html.in, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Prepare docs for release of 3.2.0b6.
+
+Mon Apr 26 15:12:22 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Soundex.cc (generateKey): Applied Alex Kiesel's fix to prevent
+ segfaults when word has no letters.
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/HTML.cc: Handle empty noindex_start/noindex_end lists.
+ * htlib/StringList.{cc,h}: const-correctness of Add/Insert/Assign(char*)
+
+ * redo mistakenly backed out patch...
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/parser.cc: Address (but not fix) bug #934739
+ If collection->getDocumentRef() on line 889 returns NULL, don't crash.
+ I'm still trying to work out why it does return NULL -- I don't think
+ it ever should.
+
+ * mistakenly back out previous patch :(
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/Retriever.{h,cc}, htcommon/defaults.cc, htdoc/FAQ.html:
+ Add store_phrases attribute. If it is false, htdig only stores the
+ first occurrence of each word in a document. This reduces the database
+ size dramatically, and slightly increases digging speed.
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/{aclocal.m4,configure,os_abs.c.win32}, STATUS, htdoc/THANKS.html:
+ Correctly dected paths beginning C: as absolute paths in cygwin/Win32.
+ Fixes bug #814268.
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/Retriever.cc:
+ Gilles's patch to avoid regex compile for every URL encountered.
+
+Sun 25 Apr 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * contrib/htdig-3.2.0.spec:
+ Karl Eichwalder's patch to use mktemp to create safe temp file.
+
+Wed Apr 7 17:12:33 2004 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (IsValidURL): Fixed bug #931377 so bad_extensions
+ and valid_extensions not thrown off by periods in query strings.
+
+Mon Mar 15 11:56:04 CET 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htsearch/Display.cc: changed (and fixed) the date factor formula as
+ Lachlan and David Lippi suggested, in order not to give negative results.
+
+Fri Mar 12 09:13:28 CET 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: removed 'eval' expressions which caused the 'NONE' prefix
+ path to be instantiated and the make script to hang
+ * acinclude.in: fixed AC_DEFINEs for SSL and ZLIB check macros, which prevented
+ autoheader (and therefore autoreconf) to correctly work
+ * moved manual pages from htdoc to installdir
+ * htdoc/[manpages].in: removed
+ * installdir/*.[1,8]: removed man pages (htdig-pdfparser.1, htdig.1,
+ htdump.1, htfuzzy.1, htload.1, htmerge.1, htnotify.1, htpurge.1,
+ htsearch.1, htstat.1, rundig.1, htdigconfig.8)
+ * installdir/*.[1,8].in: added pre-configure man pages (htdig-pdfparser.1.in,
+ htdig.1.in, htdump.1.in, htfuzzy.1.in, htload.1.in, htmerge.1.in, htnotify.1.in,
+ htpurge.1.in, htsearch.1.in, htstat.1.in, rundig.1.in, htdigconfig.8.in)
+ * regenerated configure scripts with autoreconf
+ * fixes bug #909674
+
+Sat 21 Feb 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * installdir/HtFileType: Use mktemp to create safe temp file (bug #901555)
+
+Wed Feb 25 11:14:45 CET 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdocs/THANKS.html: added Robert Ribnitz to the 'thanks' page and fixed
+ Nenciarini's position (it was not in alphabetical order - sorry!).
+
+Wed Feb 25 11:02:37 CET 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * installdir/*.[1,8]: added man pages (htdig-pdfparser.1, htdig.1,
+ htdump.1, htfuzzy.1, htload.1, htmerge.1, htnotify.1, htpurge.1,
+ htsearch.1, htstat.1, rundig.1, htdigconfig.8) provided by
+ Robert Ribnitz <ribnitz at linuxbourg.ch> of the Debian Project
+ * installdir/Makefile.am: prepared the automake script for correctly
+ handling the man pages
+
+Sat 21 Feb 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/htsearch.cc:
+ Back out change of 21 December, as it causes problems with characters
+ which *should* be unencded, like /
+
+Thu 19 Feb 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * aclocal.m4, acinclude.m4, configure.in:
+ Remove duplicate tests for zlib
+ Fix tests for SSL (Fixes bug #829081)
+ Fix configure --help formatting
+
+ * htdoc/*.[18].in, htdoc/Makefile.am, configure.in: Added man pages
+
+ * htdoc/attrs.html.in, htdoc/cf_generate.pl, htdoc/Makefile.am:
+ Fill in #define'd attribs (Fixes bug #692125)
+
+ * test/Makefile.am: Incorporate new tests in make check
+
+ * test/t_htdig, test/t_parsing: suppress unwanted diagnostics
+
+ * STATUS: list Cygwin bug (#814268)
+
+ * htcommon/default.cc:
+ added wordlist_cache_inserts, remove worlist_cache_dirty_level
+
+ * configure, */Makefile.in, */Makefile, htdoc/cf_by{name,prog}.html:
+ regenerated
+
+Fri 13 Feb 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp_cmpr.c: Fix bug with --without-zlib
+
+Sun 8 Feb 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/URL.cc: Make server_alias case insensitive.
+
+ * htdig/Document.cc: Don't hex-decode twice. (Caused problems with names
+ like file%20name)
+
+ * htdig/Retriever.cc: Test validity of URL value *before* calling
+ signature(), as that implictly normalises, and confuses
+ limit_normalised vs limit_urls_to
+
+ * htdig/htdig.cc: Remove stale md5_db if -i specified
+
+ * installdir/htdig.conf: Set common_url_parts to contain all strings
+ which *must* be in a valid URL. Probably contains whole domain name,
+ so more compression than using standard strings.
+
+ * htcommon/defaults.cc: Update docs. Remove default "bad_extensions"
+ from common_url_parts, and add .shtml
+
+ * test/t_htdig, test/t_htdig_local: Update self-tests
+
+Tue Feb 3 18:06:38 CET 2004 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/HtConfiguration.cc: changed the Find method in order not to
+ ignore empty string results for string attributes whenever they are
+ defined in the configuration file by the user
+ * htdig/Document.cc: fixed bugs in handling the http_proxy,
+ http_proxy_authorization, authorization attributes
+ * htlib/Configuration.[h,cc]: added the Exists method in order to query
+ whether an attribute's definition is present in the configuration
+ dictionary (before it was checked against its string's length which
+ prevented empty attributes to be correctly used)
+ * these changes fix bug #887552
+
+Sun 18 Jan 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/URL.cc, test/url.cc:
+ Rename "allow_dbl_slash" to "allow_double_slash", to match defaults.cc
+
+ * htcommon/default.cc, htdoc/{hts_temlates,attrs}.html:
+ Explain that keywords_factor applies to meta keywords. Fix old typo.
+
+ * test/t_{factors,templates}, test/htdocs/set1/{title.html,bad_local.htm}
+ * test/conf/entry-template:
+ Expanded test suite.
+
+Sat 17 Jan 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/t_{parsing,htdig_local,factors,templates},
+ * test/htdocs/set1/title.html:
+ Expanded test suite.
+
+Sat 17 Jan 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/DocumentRef.cc:
+ Fix old-style use of HtConfiguration, so defaults are read correctly.
+ Causes max_descriptions to be treated correctly.
+
+ * htcommon/default.cc, htdoc/{hts_temlates,attrs,cf_byname,cf_byprog}.html:
+ Explain that max_description{s,_length} don't affect indexing -- only
+ text used to fill in template variables.
+
+Mon 12 Jan 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * Very many files: Fix bug #873965
+ Replace C++ style comments with C style comments in all C files, and .h
+ files they include.
+ Also, change //_WIN32 to /* _WIN32 */ in .cc files for uniformity.
+
+Mon 12 Jan 2004 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/t_parsing, test/test_functions.in: Add new tests
+ * htcommon/default.cc, htdoc/hts_templates.html: Cross-ref documentation.
+
+Mon Dec 29 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/Retriever.cc:
+ Fix bug in which validity of first URL from each server was not checked.
+
+Mon Dec 29 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/htdig.cc, htdoc/htdig.html: Fix bug #845054
+ Fix behaviour of -m and additional list of urls at the end of a command.
+ In either case, "-" denotes stdin.
+
+Mon Dec 29 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * installdir/rundig, installdir/Makefile.{in,am}: Address bug #860708
+ Make bin/rundig -a handle multiple database directories
+
+Sun Dec 21 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/htsearch.cc:
+ Improve handling of restrict/exclude URLs with spaces or encoded chars
+
+Sun Dec 21 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/HtURLSeedScore.cc, htsearch/SplitMatches.cc: Fix bug #863860
+ Split patterns at "|".
+ For SplitMatches, make "*" only match if all other patterns fail.
+
+Sun Dec 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/Server.cc: Fix bug #851303.
+ Allow indexing if robots.txt has an empty "disallow".
+
+ * test/t_htdig, test/t_htsearch, test/htdocs/robots.txt:
+ Tests for the above.
+
+Sun Dec 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/htdig.cc, test/t_factors: Warn if config file has obsolete fields.
+
+Sun Dec 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/Display.cc: Apply Gilles's patch for ellipses bug #844828.
+
+Sun Dec 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/{t_validwords,t_templates,t_fuzzy,t_factors}
+ * test/{set_attr,synonym_dict,dummy.stems,dummy.affixes,bad_word_list}
+ * test/conf/main-template test/htdocs/set1/{site2.html,site4.html}:
+ Added four new tests to test suite. Not included in "make check",
+ but can be run explicitly by "make TESTS=t_... check".
+
+Sun Dec 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/conf_lexer.{lxx,cxx}:
+ Back out changes to try to accept files without EOL :(
+
+Sat Dec 13 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.{cc,xml}, htdoc/{attrs,cf_byprog}.html:
+ Fix "used by" for max_excerpts, and resulting hyperlinks.
+
+Sat Nov 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/conf_lexer.{lxx,cxx}, htcommon/conf_parser.{yxx,cxx}:
+ Partially address bug #823455.
+ Don't complain if config file doesn't end in EOL.
+ Should the grammar be fixed not to need EOL?
+ Report errors to stderr, not stdout, as they confuse the web server.
+
+Sun Nov 9 14:44:02 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * Tagged release htdig-3-2-0b5
+
+Sat Nov 8 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/default.cc, htsearch/parser.cc: Fix bug #825877
+ Reduce backlink_factor to comparable with other factors, and
+ interpret multimatch_factor as the *bonus* given for multiple matches.
+
+Sat Nov 1 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/parser.cc: Fix bug #806419. Ignore bad words at start of phrase.
+
+Tue Oct 28 11:58:06 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdig/htdig.cc: set the debug level when we are importing a cookie file.
+ Fix bug #831478.
+
+Mon Oct 27 17:13:02 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Server.cc: Fix bug #831407. Make sure time properly reset after
+ delay completed, so that it doesn't allow 2 connections per delay.
+
+Mon Oct 27 15:57:38 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/THANKS.html: Added Lachlan, Jim and Neal to the active developers
+ list.
+
+Sun Oct 26 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdoc/hts_templates.html: Clarify that PREV/NEXTPAGE template variables
+ are empty if there is only one page, ignoring no_{prev,next}_page_text.
+
+Sun Oct 26 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Fixed documentation to close bug #829767
+ Clarified that noindex_start/end do not get replaced by whitespace.
+ Also removed spurious '>' from start of boolean_syntax_errors, and
+ added missing '#' to many local <a href> tags.
+
+Sun Oct 26 12:42:27 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Fixed description of 'head_before_get' after
+ Lachlan fixes.
+ * htdoc/attrs.html: rerun cf_generate.pl
+
+Sat Oct 25 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/Display.cc: Fix #829761.
+ If last component of the URL is used as a title, URL-decode it.
+
+Sat Oct 25 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/Server.cc: Fix #829754. Avoid calculations with negative time
+
+Fri Oct 24 17:17:15 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/htdig.html, htdoc/meta.html, htdoc/require.html: Update URL for
+ the Standard for Robot Exclusion.
+
+ * htdoc/htmerge.html: Added two clarifications to -m option description.
+
+ * htdoc/cf_types.html: Make clear distinction between String List and
+ Quoted String List.
+
+Fri Oct 24 15:30:08 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc: Fix bug #829746. Applied Niel Kohl's fix for this,
+ to check if words input given before trying to use it, to avoid NULL
+ argument to syslog().
+
+Fri Oct 24 15:15:53 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc: Fix bug #578570. The enddate handling now works
+ correctly for a large, negative startday value.
+
+Fri Oct 24 12:47:51 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (ctor): Fix obvious typo in metadatetags.Pattern setting.
+
+Thu Oct 23 10:27:18 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/default.cc: Fix bug #828808. Default startyear to empty
+ Document "startyear defaults to 1970 if a start/end date set".
+
+Thu Oct 23 12:14:30 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdig/htdig.cc: restored the code before Oct 21 (fixes ##828628)
+
+Thu Oct 23 11:41:15 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdig/Retriever.[h,cc]: removed 'head_before_get' overriding by
+ restoring the code before Oct 21.
+ * htdig/Document.[h,cc]: ditto, with the exception of detaching the HEAD
+ before GET mechanism from the persistent connections'.
+ * htcommon/defaults.cc: improved documentation (even though it needs
+ corrections by an english-speaking developer).
+ * These changes fix bug #828628
+
+Wed Oct 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/parser.cc: Applied Neal's patch to fix bug #823403
+ Documents only added to search list if they were successfully dug.
+ Lines 237-238 of htsearch/Display.cc
+ if (!ref || ref->DocState() != Reference_normal)
+ continue;
+ should now be redundant. (Left in to be defensive.)
+
+Tue Oct 21 11:04:56 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdig/Retriever.h: added the 'RetrieverType' enum and an object variable
+ for storing the type of dig we are performing (default initial);
+ * htdig/Retriever.cc: changed constructor in order to handle the type,
+ added some debugging explanation regarding the override of the
+ 'head_before_get' attribute, added checks regarding an empty
+ database of URLs to be updated (set the type to initial).
+ * htdig/Document.h: added the attribute 'is_initial' which stores the
+ information regarding the type of indexing (initial or incremental)
+ we are currently performing. Added access methods (get-and-set-like)
+ * htdig/Document.cc: modified the logic of the HeadBeforeGet settings during
+ the retrieval phase, in order to always override user's settings in
+ an incremental dig and automatically set the 'HEAD' call in this case.
+ * htcommon/defaults.cc: modified the default value of 'head_before_get' and a bit
+ of its explanation.
+ * htnet/HtHTTP.cc: detached the HEAD before GET mechanism to the persistent
+ connections one
+ * htdig/Server.cc: added one level of debugging to the display of the
+ server settings in the server constructor
+
+Fri Oct 17 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htword/WordType.cc, htcommon/defaults.cc: Patched to fix bug #823083
+ Don't assume IsStrictChar returns false for digits.
+ Clarify behaviour of allow_numbers in the documentation.
+
+Fri Oct 17 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Patched to fix bug #823455
+ Escaped "$" in valid_punctuation, and add warnings about $, \ and `.
+
+Wed Oct 15 11:12:52 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Server.cc (robotstxt): Patched to fix bug #765726.
+ Don't block paths with subpaths excluded by robots.txt, and make
+ sure any regex meta characters are properly escaped.
+
+Tue Oct 14 11:54:07 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: add an empty Accept-Encoding header - this inform the
+ server that htdig is only able to manage documents that are not encoded
+ (if no Accept-Encoding is sent, the server assumes that the client is
+ capable of handling every content encoding - i.e. zipped documents with
+ Apache's mod_gzip module). Partial fix of bug #594790 (which now becomes a
+ feature request)
+
+Mon Oct 13 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htfuzzy/Regex.cc: Search for regular expression. (Used to ignore it!)
+
+ * htfuzzy/Speling.cc, htword/{WordList.cc,WordList.h,WordKey.cc,WordKey.h}:
+ When looking in word database for misspelt words, don't ask to match
+ trailing numeric fields in database key.
+
+ * htcommon/defaults.cc, htdoc/htfuzzy.cc: Update docs.
+
+Sun Oct 12 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/htsearch.cc:
+ Fix bug if fuzzy algorithms produced no search words.
+ Send all debugging output to cerr not cout. More debugging output.
+
+Sun Oct 12 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/{Retriever,Server}.cc: Back out the previous.
+ Gilles pointed out inconsistency with Retriever::IsValidURL().
+
+Sun Oct 5 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/{Retriever,Server}.cc: Jim Cole's patch to bug #765726.
+ Don't block paths with subpaths excluded by robots.txt.
+
+Sun Oct 5 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/htsearch.cc: Highlight phrases containing stop words
+ * test/t_htsearch, test/conf/htdig.conf.in: Tests for the above
+
+Sat Sep 27 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/{test_functions.in,t_htdig,t_htdig_local,t_htnet}:
+ Don't assume shell "." command passes arguments. (Doesn't on FreeBSD.)
+
+Sat Sep 27 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htlib/HtDateTime.h, htnet/HtCookie.cc:
+ Avoid ambiguous function call on systems (HP-UX) where time_t=int
+
+Fri Aug 29 09:35:46 MDT 2003 Neal Richter <nealr at rightnow.com>
+
+ * removed references to CDB___mp_dirty_level ,CDB_set_mp_diry_level()
+ & CDB_get_mp_diry_level()
+
+ * The config verb 'wordlist_cache_dirty_level' was left for possible use in
+ the future.
+
+Thu Aug 28 15:11:21 MDT 2003 Neal Richter <nealr at rightnow.com>
+
+ * Changed db/LICENSE file to new LGPL compatible license from Sleepycat
+ Software -- Thanks Sleepycat!
+
+ * Reverted to Revision 1.2 or db/mp_alloc.c The recent changed cuased
+ large DB growth. Strangely the files contained no 'new' data, they were
+ just much larger. Looks like the pages were being flushed too often????
+
+Thu Aug 28 12:41:22 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * global: updated with 'autoreconf -if' (autoconf 2.57, libtool 1.5.0a and
+ automake 1.7.6)
+ * 'make check' successful on: AMD64 Linux 2.4, Alpha Linux 2.2,
+ RedHat Linux 7.3 (2.4), SPARC Ultra60 Linux 2.4,
+ Sparc R220 Sun Solaris (5.8).
+ * README.developer: added further info
+
+Thu Aug 28 12:00:10 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * db/[config.guess,config.sub,install-sh,ltmain.sh,missing]: added in the
+ database directory (this way 'make dist' goes on); I have not been able to
+ tell the db/configure script to get the 'top_srcdir' ones (which should be
+ the default behaviour). Maybe in the future we'll look for this.
+
+Thu Aug 28 11:53:48 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * db/configure.in: changed AC_PROG_INSTALL() to AC_PROG_INSTALL and removed
+ AC_CONFIG_AUX_DIR; this implies that autotools copies will be made for the
+ db directory as well.
+
+Thu Aug 28 11:36:42 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * [htcommon,htdb,htdig,htfuzzy,htlib,htnet,htsearch,httools,htword,test]/Makefile.am:
+ added the option above to every *_LDFLAGS
+
+Thu Aug 28 11:30:39 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * Makefile.am: removed acconfig.h from the EXTRA_DIST list
+
+Thu Aug 28 11:25:07 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: removed portability checks for error, stat and lstat that
+ caused a compile errors on Solaris. Added the '-mimpure-text'
+ extra ld flag for GCC on solaris systems (a linkage error occurs
+ when libstdc++ is not shared)
+
+Thu Aug 28 11:22:57 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * include/Makefile.am: changed htconfig.h.in into config.h.in
+
+Thu Aug 28 11:16:19 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/error.[h,c]: removed for now, until replacement functions will be
+ correctly performed.
+
+Thu Aug 28 11:11:32 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdoc/cf_generate.pl: fixed an error when opening tail and head files
+ * Makefile.am: enabled rebuild from a different directory (it is used
+ my 'make dist')
+
+Thu Aug 28 10:46:35 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/malloc.c: modified according to autoconf specifications as far
+ as replacement functions are regarded
+ * htlib/[lstat, stat].c: removed for now
+
+Thu Aug 28 10:40:58 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdoc/cf_generate.pl: accept an optional parameter (top source directory)
+ * htcommon/defaults.cc: fixed some broken lines which prevented
+ cf_generate.pl from correctly working
+ * htdoc/Makefile.am: modified the automake file for passing the top
+ source directory to cf_generate.pl
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerated using cf_generate.pl.
+
+Tue Aug 26 12:25:40 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: removed AC_FUNC_MKTIME because it may not work properly
+ and added default replacement directory (htlib) for future uses
+ * htlib/Makefile.am: back-step with re-inclusion of mktime.c in the
+ list of files to be always compiled (caused linking errors
+ for the __mktime_internal function)
+ * global: updated with 'autoreconf -if'
+
+Sun Aug 24 12:44:29 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * updated with 'autoreconf -if': autoconf 2.57, automake 1.7.6 and
+ libtool 1.5.0a (autotools that come with Debian SID)
+
+Sun Aug 24 12:39:34 EST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: moved AC_PROG_LEX to AM_PROG_LEX
+ * db/configure.in: enabled AM_MAINTAINER_MODE which prevented users without
+ autotools to configure and compile the program (relatively to the db
+ directory)
+ * include/htconfig.h: previously excluded from the branch (severe error!)
+
+Mon Jul 21 20:54:47 CEST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/(malloc|error|lstat|stat|realloc).c: added for cross-compiling
+ reasons (as suggested by automake)
+ * htlib/error.h: ditto
+ * db/acconfig.h: removed as suggested by autotools' new versions
+ * configure.in: removed AC_PROG_RANLIB (overriden by AC_PROG_LIBTOOL)
+ * updated as of rerun 'autoreconf -if'
+
+Mon Jul 21 10:08:24 CEST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * Patch provided by Marco Nenciarini <mnencia at linux.it> has been
+ completely applied; the patch adds support for detection
+ of standard C++ library
+ * all sources using <iostream.h> <fstream.h> <iomanip.h>: modified
+ to use standard ISO C++ library, if present
+ * db/configure scripts: modified for autoconf 2.57
+
+Mon Jul 21 09:59:16 CEST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * [.,*]/Makefile.in: regenerated by new automake against new configure.in
+ * Makefile.config: now looking for the global configuration file
+ in the source directory
+
+Mon Jul 21 09:49:22 CEST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: completely rewritten, deprecated directives have
+ been removed and now version 2.57 is a prerequisite.
+ * acinclude.m4: moved all the macros here
+ * aclocal.m4, configure: regenerated by aclocal and autoconf
+ * acconfig.h: removed as now it is deprecated
+ * include/htconfig.h.in: removed, as 'config.h.in' is preferred
+ and auto-generated
+ * config.[guess,sub]: updated with newer versions
+
+Tue Jul 8 16:29:44 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/parser.cc (checkSyntax): Fixed boolean_syntax_errors
+ handling to work over multiple config files.
+
+Mon Jul 7 00:41:55 CEST 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * Updated to autoconf 2.57, libtool 1.5 and automake 1.7.5
+ * removed acconfig.h files
+ * autoconf include file is now include/config.h (for autoheader)
+ * include/htconfig.h.in renamed in include/htconfig.h: now includes
+ config.h and redefines the bool types
+ * htlib/HtRegexList.cc, htdig/(Document.cc|ExternalParser.cc): removed
+ TRUE and FALSE and converted to C++ standard values
+
+Sat Jul 5 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/test_functions.in: Fix bugs starting/killing apache
+
+Sat Jul 5 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Disable cache flushing to avoid "page leak".
+
+Tue Jun 24 2003 Neal Richter <nearl at rightnow.com>
+
+ * Update Copyright Notices in code & documentation to 2003
+
+ * Changed License Notice GPL -> LGPL License change (Decided by HtDig
+ Board & Membership October 2002
+
+Mon Jun 23 2003 Neal Richter <nearl at rightnow.com>
+
+ * Raft of changes. Most todo with Native Win32 support
+
+ * TODO: ExternalTranport & ExternalParser are effectively dissabled with
+ #ifdefs for Native WIN32
+
+ * remove global CDB___mp_dirty_level variable and subsitute functions to set/get variable
+
+ * Added local copies of GNU LGPL regex, POSIX-like dirent routines, getopt
+ library and filecopy routines - mainly for Native WIN32 support
+
+ * improve IsValidURL with return codes (htdig/Retriever.cc)
+
+ * lots of improvements/new-features to libhtdig
+
+Sun Jun 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp_cmpr.c (CDB___memp_cmpr_open):
+ Make weak compression database standalone to avoid recursion
+ This *should* fix all of the recent problems with dirty cache etc.
+
+ * test/search.cc: Don't take sizeof zero sized array
+
+Fri Jun 20 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * configure,aclocal.m4,acinclude.m4: --with-ssl set CPPFLAGS, not CFLAGS
+
+Fri Jun 20 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/configure: Hack which should allow select to be detected on HP/UX
+
+ * db/db.c: Replace HAVE_ZLIB with HAVE_LIBZ (as set by configure)
+
+ * htword/wordKey.cc: More descriptive error message
+
+ (Changes to compile with Sun's C++)
+ * htnet/{HtCookie.cc,HtFTP.cc,Transport.cc}:
+ Assign substring of const string to const pointer.
+ * htsearch/ResultMatch.h:
+ Allow use of SortType in ResultMatch::setSortType()
+ * test/search.cc: Don't take sizeof(variable size array)
+ * htdb/htdb_stat.cc: avoid name clash for global var internal
+ * htcommon/URL.h, htlib/HtTime.h, htlib/htString.h, htnet/Connection.h,
+ htword/WordBitCompress.h:
+ Cast default args of type string literal to type (char*)
+
+ * htdocs/require.html: Remove email address.
+
+ * htlib/gregex.h: Avoid warning if __restrict_arr already defined
+
+Sun Jun 14 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc:
+ Set wordlist_cache_dirty_level to 1 (it most conservative value).
+ Miscellaneous reformatting.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerated using cf_generate.pl.
+
+ * htdoc/{require.html,meta.html,all.html,meta.html}:
+ Update disk usage for phrase searching.
+ Updated list of supported platforms. More hyperlinks.
+
+Fri Jun 13 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/Display.cc (setVariables), htdocs/hts_template.html:
+ Set MATCH_MESSAGE from method_names (for internationalisability).
+ Removed all trace of hack for config attribute...
+
+Thu Jun 12 14:16:05 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (main): Fixed boolean_keywords handling to
+ work over multiple config files (must destroy old list before
+ creating new one).
+
+ * htcommon/defaults.cc, htsearch/Display.cc (setVariables): Removed
+ incorrect default value for "config" attribute, and removed hack
+ that attempted to correct it.
+
+ * htdoc/attrs.html: Regenerated using cf_generate.pl.
+
+Thu Jun 12 13:28:01 2003 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc, htcommon/HtSGMLCodec.cc (ctor): Added
+ translate_latin1 option to allow disable Latin 1 specific SGML
+ translations.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerated using cf_generate.pl.
+
+Mon Jun 9 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/htsearch.cc: Fixed setupWords loop for junk at end of query
+
+Mon Jun 9 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/Display.cc: Set CONFIG template variable to the base name
+ of the config file (no directory or .conf), as expected by htsearch
+
+Mon Jun 9 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * test/test_functions.in: avoid trying killing apache multiple times
+
+ * configure,configure.in: Reformat --help output
+ * htdoc/FAQ.html: Brought up-to-date with main docs
+ * htdoc/hts_templates.html: added hyperlinks.
+ * installdir/search.html: Display version
+
+Sun Jun 8 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * configure: Hack to set --disable-bigfile for Solaris (with Sun cc)
+ and --disable-shared --enable-static for Mac OS X
+
+ * test/{test_functions.in,t_htdig,t_htdig_local,t_htnet}:
+ Only start Apache for tests which need it, and kill it after the test
+
+ * contrib/parse_doc.pl: Allow file names containing spaces (from .deb)
+
+Mon Jun 2 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp_cmpr.c: Add default zlib setting to default_cmpr_info
+ * htcommon/defaults.cc, htword/WordDBCompress.cc: Fix docs to say
+ default compression by 8 (not by 3, which I had "fixed" it to...)
+
+ * htcommon/conf_lexer.{cxx,lxx}: Avoid warnings, and document hack.
+
+Thu May 29 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp_cmpr.c: Fix comparison of -1 and unsigned which broke SunOS cc
+ * htdoc/install.html: Warn SunOS cc users to --disable-bigfile
+
+ * htcommon/conf_lexer.cxx: Suppress warnings of unused identifiers
+ * test/con/htdig.conf2.in: Disable testing of content_classifier
+ attribute, as didn't work until after installation
+
+Tue May 27 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/configure, db/ac{local,include}.m4:
+ Stop test for zlib from adding -I/default/path (*this* time...)
+
+ * htword/DBPage.h: Fix bug introduce in previous patch
+
+ * test/Makefile.{in,am}: Replace non-portable make -C X by cd X; make
+
+Tue May 27 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * {,db/}configure, {,db/}ac{local,include}.m4:
+ Stop test for zlib from adding -I/default/path (broke SunOS cc)
+ Fix -Wall test if CCC is g++ but CC is not gcc
+
+ * test/dbbench.cc: #include <fcntl.h> later, to avoid #define open
+ causing problems
+
+ * includedir/synonyms: Remove trailing blank line which caused warning
+ * htnet/HtCookieInFileJar.cc,htfuzzy/Synonym.cc: .get() to stop warnings
+ * htlib/mhash_md5.c: char -> unsigned char to stop warnings
+ * test/search.cc, htword/WordDBPage.h:
+ Casts to (int) to stop printf warnings. ALLIGN -> ALIGN
+
+Sat May 24 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Keep more wordlist cache pages clean
+
+ * {,db/}configure{,.in}, {,db/}ac{local,include}.m4:
+ Patch by Richard Munroe to test if -Wno-deprecated needed.
+ Many bug fixes / extra search paths added.
+
+ * include/htconfig.h.in, db/db_config.h.in:
+ Only '#define const' if not C++ (htword/WordDB.cc uses db_config.h)
+ * test/dbbench.cc: check for alloca even if gcc
+ * test/t_url: used grep -C instead of grep -c (for portability)
+ * db/mp_{alloc,cmpr}.c: Removed/replaced C++ style comments
+
+ * htdoc/require.html: Revised list of supported platforms
+
+Thu May 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htnet/HtFile.cc: Fix previous .get() patch...
+
+Thu May 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htlib/DB_2.cc: Set wordlist_cache_dirty_level before opening
+ database, to avoid database memory allocation problem.
+
+ * db/db_err.c: Make 'fatal' errors actually exit.
+
+ * htdig/Document.cc, htsearch/parser.cc, htdig/htdig.cc,
+ * htnet/Ht{HTTP,File}.cc:
+ Add .get() to use of strings to avoid compiler warnings (FreeBSD).
+
+Thu May 22 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * ltmain.sh, test/Makefile.in: Hack to list library dependencies
+ multiple times in g++ command, to get MacOS X to 'make check'.
+
+ * test/{search,word}.cc: cast sizeof() to (int) to avoid warnings.
+
+ * htdoc/install.html: Documented MacOS X's shared libraries problem.
+
+Sun May 18 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp_alloc.c: Hopefully the *last* fix for this morning's patch...
+
+ * configure, aclocal.m4, acinclude.m4:
+ Look for httpd modules in .../libexec/httpd for OS X
+ * test/conf/httpd.conf: Disabled mod_auth_db, mod_log{agent,referer}.
+
+Sun May 18 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/db.h.in: Declare variable introduced in db/mp_cmpr.c patch
+
+Sun May 18 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * db/mp.h, db/mp_{alloc,bh,cmpr,region}.c,
+ * htword/WordDB.cc, htdig/htdig.cc:
+ Avoid infinite loop if memp_alloc has only dirty,
+ "weakly compressed" (i.e. overflow) pages.
+ * htcommon/defaults.cc: Document the above, plus misc updates.
+
+ * htword/WordDBPage.h:
+ Cast sizeof() to (int) in printf()s to avoid compiler warnings.
+
+Sun APR 20 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/htdig.cc: delete db.words.db_weakcmpr if -i specified.
+
+Wed Feb 26 22:10:40 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: fixed colon (':') problem with HTTP header parsing,
+ as Frank Passek, Gilles and others suggested, as space is not
+ mandatory between the field declaration and the field value returned
+ by the server
+
+Sun Feb 23 10:20:58 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/defaults.[cc,xml]: added the 'cookies_input_file'
+ configuration attribute for pre-loading cookies in memory
+ * htdig/htdig.cc: added the feature above; the code automatically
+ loads the cookies from the input file into the 'jar' that will be
+ used during the crawl.
+
+Sun Feb 23 10:16:08 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.h: removed the NULL pointer check before assigning a
+ new jar to the HTTP code
+
+Tue Feb 11 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/defaults.cc: Set default compression_level to 6,
+ which enables Neal's wordlist_compression_zlib flag.
+
+Tue Feb 11 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htcommon/{DocumentRef.h, HtWordReference.h},
+ htsearch/WeightWord.{cc,h},
+ htsearch/parser.{cc,h}, htsearch/htsearch.cc:
+ Added field-restricted searching, by title:word or author:word
+
+ * htdig/ExternalParser.cc, htdig/HTML.{cc,h}, htdig/Parsable.{cc,h},
+ htdig/Retriever.{cc,h}:
+ Parse author from <meta ...> tags. Also moved some common
+ functionality from HTML/ExternalParser into Parsable.
+
+ * test/t_htsearch, htcommon/defaults.cc,
+ htdoc/{TODO.html,hts_general.html,hts_method.html}:
+ Test and document the above
+
+Sun Feb 9 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htdig/HTML.cc: fix bug in detection of deprecated noindex_start/end
+ * htsearch/Display.cc: try harder to find value for DBL_MAX #680836
+ * htcommon/defaults.cc: fixed typos.
+
+Sat Feb 1 13:57:17 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.[h,cc]: allowed printDebug to be passed an ostream object
+ * htnet/HtCookieMemJar.cc: removed a debug call
+
+Thu Jan 30 19:28:32 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * configure.in: used AC_LIBOBJ instead of deprecated LTLIBOBJS's workaround
+ * ltconfig: removed as not needed anymore since libtool 1.4
+ * db/configure.in: added AC_CONFIG_AUX_DIR(../) for letting automake know to use
+ the main ltmain.sh file
+ * configure, aclocal.m4, Makefile.in, */Makefile.in, config.guess, config.sub,
+ install-sh, ltmain.sh, missing, mkinstalldirs: re-generated by autotools:
+ aclocal, autoconf 2.57, automake 1.6.3 and libtool 1.4.3
+ * db/aclocal.m4, db/configure, db/mkinstalldirs: ditto
+
+Thu Jan 30 00:16:51 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htsearch/htsearch.cc: removed a warning due to a not-initialized pointer
+
+Wed Jan 29 22:53:25 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * acinclude.m4: included the function for checking against SSL, as
+ found in the ac-archive.
+
+Tue Jan 28 12:23:16 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/Makefile.am: added HtCookieInFileJar.[h,cc] files
+ * installdir/cookies.txt: example file for pre-loading HTTP cookies
+ * installdir/Makefile.am: added cookies.txt
+
+Tue Jan 28 12:16:28 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookieMemJar.[h,cc]: performed deep copy of the jar in the copy constructor
+
+Tue Jan 28 12:13:44 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.[h,cc]: added the constructor of a cookie object from a line
+ of a cookie input file (Netscape's way): if an expiration value of '0' is set
+ through the cookies input file, the cookie is managed as a session cookie.
+ Improved copy constructor, solving a bug related to the expires field.
+
+Tue Jan 28 12:11:27 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookieInFileJar.[h,cc]: class for importing cookies from a text file
+
+Tue Jan 28 12:08:20 CET 2003 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/HtDateTime.h: added the constructor HtDateTime(const int)
+
+Sat Jan 25 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htsearch/Display.cc: Convert "<br>\n" in $(DESCRIPTION) to "<br>"
+ so it can be used in Javascript (feature request #529926).
+
+Tue Jan 21 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * HTML.cc (HTML, parse): Handle noindex_start/end as string lists.
+
+ * test/{t_htsearch,htdocs/set1/script}: Test the above
+
+ * htcomon/defaults.cc:
+ Add "<SCRIPT" to default noindex_start/end (feature request #586359).
+
+
+ * htlib/String.cc (operator>> (istream&,String&) ):
+ Exit loop when getline fails for reasons other than a full buffer.
+
+ * htnet/HtFile.cc (File2Mime), installdir/HtFileType:
+ Allow file names containing spaces.
+
+Sat Jan 11 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htnet/HtFile.cc (Request), htdig/Document.cc (RetrieveLocal),
+ htcommon/URL.h htcommon/URLTrans.cc:
+ Decode URL paths before use as local filenames (file:/// & local_urls).
+
+ * test/{t_htdig,t_htdig_local,t_htsearch}, test/conf/htdig.conf2.in,
+ test/htdocs/set1/{index.html,site 1,sub%20dir/empty file.html}:
+ Tests for the above.
+
+ * htcommon/HtConfiguration.cc: brackets around assignment in 'if'.
+ * test/search.cc (LocationCompare): Only specify default arg once.
+
+Fri Jan 10 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htlib/String.cc (operator>> (istream&,String&) ):
+ Check status of stream, no return value of get().
+ Fixes bug (for some C++ libs) where reading stops at a blank line.
+
+Fri Jan 1 2003 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * htnet/HtFile.cc(Ext2Mime,Request), htdig/Document.cc(RetrieveLocal):
+ Determine local files' MIME types from mime.types, not hard-coded.
+ URLs matching attribute "bad_local_extensions" must use their true
+ transport protocol (HTTP for http://, filesystem for file:///).
+
+ * htnet/HtFile.cc (File2Mime, Request): For file:/// URLs only,
+ files without (or with unrecognised) extensions are checked by
+ the program specfied by the "content_classifier" attribute.
+
+ * htnet/htFile.cc (Request): Symbolic links are treated as
+ redirects, to avoid problems with relative references.
+
+ * htcommon/defaults.cc: Documented the above (and added crossrefs).
+
+ * test/t_ht{dig,dig_local,search}, test/htdocs/set1/*,
+ test/conf/htdig.conf2.in: Add tests for bad_local_extensions.
+
+Mon Dec 31 2002 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * configure.in,htfuzzy/EndingsDB.cc,htlib/{HtR,r}egex.h,Makefile.am:
+ Renamed regex.h to gregex.h and allow use of rx instead.
+
+ * htcommon/defaults.cc,htdocs/{attrs,cf_byprog,cf_byname}.html:
+ Fixed typo in cross-references to restrict and limit_urls_to.
+
+ * test/t_htmerge: Re-enabled htmerge command (discarding output).
+
+ * test/Makefile,test/conf/htdig.conf3.in: Added conf3 and fixed db path.
+
+Mon Dec 30 2002 Lachlan Andrew <lha at users.sourceforge.net>
+
+ * contrib/doc2html/*: Incorporated David Adams' latest version, 3.0.1.
+
+Mon Dec 30 2002 Lachlan Andrew <lha at users.sourcefourge.net>
+
+ Forward-ported several patches from 3.1.6:
+
+ * htdig/ExternalParser.cc: Added "description_meta_tag_names" attrib.
+ Added "dc.date|dc.date.created|dc.date.modified" synonyms for "date".
+ Allow spaces between "url" and "=" in refresh.
+ Fixed bug in flag positions.
+ Added "use_doc_date" attribute.
+
+ * htdig/HTML.cc: Added "description \_meta_tag_names" attribute.
+ Added "dc.date|..." synonyms.
+ Added "ignore_alt_text" attribute.
+
+ * htdig/Retriever.cc: Added "ignore_dead_servers" attribute.
+ Added call to "url.rewrite() in got_href().
+
+ * htdig/FAQ.html: Latest version now 3.1.6. Mention old security hole.
+ Describe external converters for PostScript etc.
+ Mention pdf_parser not supported in 3.2.
+
+ * htdoc/{attrs,cf_byname,cf_byprog}.html: New attributes added
+ (automatically from defaults.cc).
+
+ * htdoc/htmerge.html: Update for multiple database support.
+
+ * htdoc/hts_form.html: Describe relative/incomplete dates.
+
+ * htdoc/require.html: Describe phrase searching, external parsers,
+ external transports.
+ Added some new supported systems. (Commented out as testing
+ incomplete.)
+
+ * htfuzzy/Synonym.cc: Protect against "synonym" entries with one word.
+
+ * htlib/String.cc: Protect against negative string lengths.
+
+ * htsearch/Display.{cc,h}: Added "search_result_contenttype" attribute,
+ and corresponding displayHTTPheaders() function.
+ Rewrite URLs.
+ Remove old "ANCHOR" variable.
+ Handle relative dates.
+ Added "max_excerpts" attribute and buildExcerpts() function.
+ Added "anchor_target" attribute.
+
+ * htsearch/DocMatch.h: Added "orMatches"
+
+ * htsearch/htsearch.cc: Added "boolean_keywords" attribute.
+ Rewrite URLs.
+
+ * htsearch/parser.cc: Added "boolean_syntax_errors" attribute.
+ Added wildcard search.
+ Fixed bug in perform_phrase() so it now handles "bad words" and
+ short words properly.
+ Added "multimatch_factor" to give greater weight to documents matching
+ multiple "OR" terms.
+
+ * htsearch/htparser.h: Added boolean_keywords support.
+
+ * htcommon/defaults.{cc,xml}: New attributes added, and enhanced
+ descriptions
+
+
+ Cleaned code to remove some compiler warnings/errors:
+
+ * htcommon/HtConfiguration.cc: Brackets around assignment 'path='
+ inside 'if'
+
+ * htdig/Server.cc, htsearch/Display.cc:
+ Added ".get()" when strings passed as arguments.
+
+ * htlib/StringMatch.h, htword/WordBitCompress.h:
+ Explicit cast of NULL to (char*)NULL for broken C++ compilers.
+
+
+ Also:
+
+ * STATUS: Removed "not all htsearch input parameters handled properly",
+ "Return all URLs", "Turn on URL parser test",
+ "htsearch phrase support tests".
+ Reduced list of things to do for "require.html".
+
+
+ * test/t_htsearch, test/conf/htdig.conf3.in:
+ Added testing of phrases and boolean_keywords / boolean_syntax_errors.
+
+Thu Nov 28 09:02:46 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/english.0: Removed S flag from birth, because it doesn't
+ do what we want (birthes, not births).
+
+Tue Nov 26 23:16:08 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/hts_form.html: Fixed typo in link & description for restrict.
+
+Tue Nov 26 22:30:06 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/english.0: Patched with Lachlan Andrew's changes, fixing
+ lots of dubious uses of suffixes to get more appropriate and correct
+ fuzzy endings expansions.
+
+ * installdir/synonyms: Updated with the version contributed by
+ David Adams, with minor changes. Kept old one as synonyms.original.
+
+Mon Nov 4 10:44:35 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/URL.[h,cc]: added the assignment operator
+
+Sun Oct 27 09:29:02 2002 Geoffrey Hutchison <ghutchis at localhost>
+
+ Merge in word DB zlib patch from Neal Richter.
+
+ * db/db.h.in, db/mp_cmpr.c, htword/WordList.cc,
+ htword/WordDBCompress.h, htword/WordDBCompress.cc: Add support for
+ using the zlib compression (and compression level) if specified by
+ the new wordlist_compress_zlib, which is "true" by default.
+
+ * htcommon/defaults.cc: Add attribute wordlist_compress_zlib as
+ above.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Update using cf_generate.pl.
+
+Sat Oct 26 21:59:01 2002 Geoffrey Hutchison <ghutchis at localhost>
+
+ Merge in fixes from Lachlan Andrew
+
+ * test/Makefile.am, test/Makefile.in, test/t_url, test/url.cc,
+ test/url.children, test/url.parents, test/url.output: Add URL
+ tests to the automatic test suite (rather than requiring them to
+ be run manually).
+
+ * */Makefile.in: Regenerate using automake-1.4p6.
+
+ * htcommon/URL.cc, htcommon/URL.h: Add new configuration attribute
+ allow_double_slash to only remove // marks when requested (since
+ some server-side code uses them), handle initial protocols
+ without double slashes, and only remove the default doc string
+ from appropriate protocol URLs (e.g. not file), treat ".//" as a
+ relative path, and collapse /../ *after* // and /./ handling.
+
+ * htcommon/defaults.cc: Add documentation for allow_double_slash,
+ as well as various documentation cleanups.
+
+ * htdig/ExternalTransport.cc: Fix minor bug--recognize service
+ specified as https:// rather than https.
+
+ * htdoc/hts_form.html, htdoc/hts_templates.html: Documentation fixes.
+
+ * htsearch/htsearch.cc: Create valid boolean query if "exact" not
+ specified in search_algorithms by adding the exact word with low
+ weight. Solves PR#405294.
+
+Fri Oct 4 17:05:06 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.xml: Added first-draft XML version of defaults
+ file. This will eventually be used to generate defaults.cc and
+ documentation automatically. (As pointed out by Brian White, this
+ will make the binaries smaller.)
+
+Wed Sep 25 13:56:31 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (parse): Fixed handling of JavaScript skipping so it
+ doesn't get confused by "<" in code.
+
+Thu Sep 19 09:04:50 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc : another check for cookie jar's null pointer
+
+Tue Sep 17 17:41:51 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (external_protocols): Fixed table formatting
+ as suggested by Lachlan Andrew.
+
+Thu Aug 29 21:21:34 CEST 2002 Soeren Vejrup Carlsen <svc at users.sourceforge.net>
+
+ * htdig/Document.[h,cc]: first steps in FTP handling. HtFTP.h included and
+ we now test for the 'ftp' protocol in the Document::Retrieve function.
+ Has not yet been tested!
+
+ * htnet/HtFTP.[h,cc]: added class to handle the FTP-protocol. Very
+ experimental (has not been tested yet).
+
+Fri Aug 9 13:01:05 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * httools/htnotify.cc (readPreAndPostamble): Check for empty strings
+ in file names, not just NULL, as suggested by Martin Kraemer.
+
+Wed Aug 7 12:11:31 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Fixed to impose max_doc_size
+ restriction on external converter output which it reads in.
+
+Tue Aug 6 18:21:11 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * these changes were suggested by David Reed <DReed1 at citgo.com> (thanks)
+
+ * htdig/Document.cc: manage cookies via SSL
+
+ * htnet/HtCookie.[h,cc]: features both RFC2109 and Netscape version
+
+ * htnet/HtCookieJar.cc: ditto
+
+Tue Aug 6 17:12:22 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/defaults.cc: added the 'http_proxy_authorization' attribute.
+ Needs revision due to my usual *spaghetti* english. :-)
+
+ * htdig/Document.[h,cc]: proxy authorization is now enabled
+
+Tue Aug 6 09:28:39 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/Connection.[h,cc]: IP address storing as string (sync with ht://Check)
+
+ * htnet/Transport.[h,cc]: HTTP Proxy and Basic credentials handling moved here (ditto)
+ through the use of a protected static method
+
+ * htnet/HtHTTP.h: SetCredentials declared to be virtual (unnecessary because inherited,
+ but gives better understanding); new method SetProxyCredentials for
+ proxy authorization.
+
+ * htnet/HtHTTP.cc: HTTP header Proxy-Authorization is now handled. The
+ SetCredentials and SetProxyCredentials methods now make use of the
+ Transport::SetHTTPBasicAccessAuthorizationString method, in order to
+ write the string for negotiating the access.
+
+Fri Aug 2 15:40:18 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (Retrieve): Allow redirects from HTTPSConnect.
+
+Tue Jul 30 12:46:56 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/md5.cc: Added missing include of stdlib.h, as Geoff suggested.
+
+Sat Jul 27 11:57:25 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/SSLConnection.cc: Add fix for segfault on SSL connections
+ noticed by several users. Fix contributed by Andy Bach
+ <afbach at users.sourceforge.net>.
+
+Tue Jun 18 10:22:01 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (got_word): Check that the word length meets
+ the minimum word length before doing any processing.
+
+Fri Jun 14 17:26:21 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (buildMatchList), htsearch/HtURLSeedScore.cc
+ (Match), htsearch/SplitMatches.cc (Match): Added Jim Cole's fix to
+ bugs in handling of search_results_order.
+
+Wed May 15 09:45:40 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/Retriever.cc: fixed the bug regarding the server_wait_time
+ feature after the maximum number of requests per connection has been
+ reached.
+
+Tue Apr 9 16:41:33 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie*.[h,cc]: RFC2109 compliant.
+ * htlib/HtDateTime.[h,cc]: Add const-ness to the DiffTime static method
+
+Tue Apr 9 12:52:30 CEST 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.cc: fixed a bug regarding expiry date recognition
+
+Fri Apr 5 14:08:39 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalTransport.cc (Request): Fixed to strip CR from
+ header lines, output header lines with -vvv.
+
+Tue Mar 19 08:40:54 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.cc: enhanced controls regarding the expires setting
+ when no expires is returned. Prevents NULL pointer exceptions to be
+ arisen.
+
+Mon Mar 18 11:28:02 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/HtDateTime.h: added the copy constructor
+ * htnet/HtCookie.cc: fixed a NULL pointer bug regarding 'datestring'
+ management and HtDateTime copy constructor is now used
+
+Tue Mar 12 18:19:49 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtDateTime.cc (Parse, SetFTime): Added Parse method for
+ more flexible parsing of LOOSE/SHORT formats, use it in SetFTime.
+ Also skip unexpected leading spaces in SetFTime, as these frequently
+ cause problems with some strptime() implementations.
+
+Mon Feb 11 23:28:37 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.h (got_redirect): Add referer to properly handle
+ broken links through a redirect as reported by Joe Jah.
+
+ * htdig/Retriever.cc: As above.
+
+ * htdig/Document.cc (Retrieve): Fix bug that prevented external
+ transport methods from reporting redirects as reported by Jamie
+ Anstice <Jamie.Anstice at sli-systems.com>.
+
+ * htlib/Dictionary.cc (hashCode): Trial of hash function suggested
+ by Jamie Anstice.
+
+Sat Feb 9 18:06:29 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/DocMatch.[h,cc]: Add scoring code for the new htsearch
+ framework.
+
+Thu Feb 7 11:32:14 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc (ReadChunkedBody): gets control of Read_Line
+ methods (return error when they fail).
+
+Fri Feb 1 17:12:31 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * Merged htdig-3-2-x branch back into CVS mainline.
+
+ * ChangeLog.0: Update with current 3.1.6 ChangeLog.
+
+Thu Jan 24 18:06:04 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, aclocal.m4: Use new CHECK_SSL macro from the
+ autoconf archive.
+
+ * configure: Generate via autoconf.
+
+Fri Jan 18 11:15:29 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Transport.h (class Transport): Add const to SetCredentials
+ method declaration as pointed out by Roman Maeder.
+
+Wed Jan 16 13:35:26 2002 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * db/db.h.in: Add #include <sys/stat.h> which seems to help
+ problems of stat64 conflicts on Solaris as suggested by Gilles.
+
+Sat Jan 12 16:19:55 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: A few changes to the wording and formatting
+ of the 'accept_language' attribute description.
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Fri Jan 11 21:18:00 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/defaults.cc: added the 'accept_language' attribute
+
+Fri Jan 11 20:53:36 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.[h,cc]: management of the accept-language directive added
+ * htcommon/URL.[h,cc]: const-ness in copy constructor and other cosmetic changes
+ * htlib/Server.[h,cc]: management of the 'accept_language' attribute as
+ a server block configuration directive.
+ * htlib/Document.cc: set of the attribute above for the HTTP layer
+
+Fri Jan 11 13:25:49 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalTransport.cc (Request): Fixed to allocate access_time
+ object before setting it.
+
+Fri Jan 4 12:31:34 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtCookie.cc, htword/WordKeyInfo.cc, htword/WordMonitor.cc,
+ test/search.cc: changed all uses of strcasecmp to mystrcasecmp for
+ consistency and portability.
+
+Fri Jan 4 12:17:10 2002 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtHTTP.cc (HTTPRequest): make the second comparison of the
+ transfer-encoding header the same as the first, i.e. case insensitive
+ and limited to 7 characters.
+
+Fri Jan 4 15:13:13 CET 2002 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: parse the transfer-encoding header as case insens.
+ [fix htdig-Bugs-499388 by Matthias Emmert <Matthias.Emmert2 at start.de>]
+
+Sun Dec 30 15:47:35 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * HtHTTP.[h,cc]: management of the Content-Language directive for the response
+
+Sat Dec 29 13:07:08 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.[h,cc]: new fields (srcURL and isDomainValid) and
+ a more robust class with initialization list and copy constructor
+
+ * htnet/HtCookieJar.[h,cc]: method for calculating the minimum number
+ of periods that a domain specification of a cookie must have. Depending
+ on what the Netscape cookies specification says.
+
+ * htnet/HtCookieMemJar.cc: Management of the domain field of the cookie
+
+Mon Dec 17 06:45:02 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htdig/htdig.cc: fixed bug about cookie jar creation. It is done in
+ here, because there is only one jar for the whole process. However
+ it can be moved anywhere else. :-)
+
+Mon Dec 17 06:40:25 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: check for null pointer of cookie jar
+
+Sun Dec 16 19:55:07 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/Connection.[h,cc]: default constructor is changed and accepts
+ a socket value (by default is -1)
+ * htnet/HtCookieJar.[h,cc]: added a simple iterator
+ * htnet/HtCookieMemJar.[h,cc]: ditto
+ * htnet/HtFile: removed the management of modification_time (constructor)
+ * htnet/HtHTTP.[h,cc]: constructor with initilization list and without
+ a default constructor (the construction is now forced to pass a valid
+ connection object). Removed any memory deletion from the destructor.
+ The class is now abstract (see the virtual pure destructor).
+ * htnet/HtHTTPBasic.cc: creates a Connection object in the initialization
+ and the destructor has no responsability
+ * htnet/HtHTTPSecure.cc: creates an SSLConnection object in the initialization
+ and the destructor has no responsability
+ * htnet/HtNNTP.cc: creates a Connection object in the initialization
+ and the destructor has no responsability
+ * htnet/Transport.[h,cc]: default constructor accepts a pointer to a
+ Connection object and the destructor carries out the deletion of it
+
+Thu Dec 6 13:24:30 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/examples/rundig.sh: Fixed to make use of DBDIR variable,
+ and to test for and copy db.words.db.work_weakcmpr if it's there.
+
+Fri Oct 19 11:07:33 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (IsValidURL): Fixed discrepancies in debug
+ levels for messages giving cause of rejection, inadvertantly
+ changed when regex support added.
+
+Wed Oct 17 15:48:23 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalTransport.h: Added missing class keyword on friend
+ declaration.
+
+Tue Oct 16 14:35:16 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/default.cc (external_parsers): Documented external converter
+ chaining to same content-type, e.g. text/html->text/html-internal.
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Mon Oct 15 22:25:55 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc, htdig/htdig.cc, htdig/Retriever.cc: Make sure
+ setEscaped is called with the current value of
+ case_sensitive. Fixes bug pointed out by Phil Glatz.
+
+Fri Oct 12 17:14:08 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/htdump.html, htdoc/htload.html: Fixed 3 little typos.
+
+Fri Oct 12 15:11:45 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtHTTP.cc (ParseHeader): Show header lines in debugging
+ output at verbosity level 3, not 4, for consistency with 3.1.x.
+
+ * htcommon/URL.cc (removeIndex): Fixed to make sure the matched
+ file name is at the end of the URL.
+
+Fri Oct 12 10:39:54 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtRegexList.cc (setEscaped): Fixed to set compiled flag to
+ FALSE when there's no pattern, so match() can detect this condition.
+ Fixes handling of empty lists in bad_querystr, exclude_urls, etc.
+
+ * htdig/Retriever.cc (IsValidURL): Fixed bad_querystr matching to
+ look at right part of URL, not whole URL.
+
+Mon Sep 24 11:47:15 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtHTTP.cc (SetRequestCommand): Put If-Modified-Since header
+ out in GMT, not local time, and only put it out if existing document
+ time > 0.
+
+ * htsearch/parser.cc (perform_phrase): Optimized phrase search handling
+ to use linear algorithm with Dictionary lookups instead of n**2 alg.,
+ as suggested by Toivo Pedaste.
+
+Tue Sep 18 10:50:40 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/running.html: New documentation on how to run after configuring.
+ * htdoc/rundig.html: New manual page for rundig script.
+ * htdoc/install.html: Added link to running.html.
+ * htdoc/contents.html: Added link to running.html, rundig.html, related
+ projects. Updated links to contrib and developer site.
+
+Fri Sep 14 22:12:56 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/URL.h: Moved DefaultPort() from private to public for
+ use in HtHTTP.cc.
+
+Fri Sep 14 09:25:20 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtHTTP.cc (SetRequestCommand): Add port to Host: header when
+ port is not default, as per RFC2616(14.23). Fixes bug #459969.
+
+Sat Sep 8 22:15:33 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * acconfig.h, include/htconfig.h.in: Add undef for
+ ALLOW_INSECURE_CGI_CONFIG, which if defined does about what you'd
+ expect. (This is for any wrapper authors who don't want to rewrite
+ but are willing to run insecure.)
+
+ * htsearch/htsearch.cc: Only allow the -c flag to work when
+ REQUEST_METHOD is undefined. Fixes PR#458013.
+
+Tue Sep 4 18:58:31 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/DocMatch.cc: Add scoring for Quim's new parser
+ framework. Only the normal word scoring is currently done, not
+ backlink_factor or other "Document" methods.
+
+Fri Aug 31 15:34:28 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.h, htdig/HTML.cc (ctor, parse, do_tag): Fixed buggy
+ handling of nested tags that independently turn off indexing, so
+ </script> doesn't cancel <meta name=robots ...> tag. Add handling
+ of <noindex follow> tag. Added <> delim. to tag debugging output.
+ Fixed a few typos.
+
+Wed Aug 29 10:33:01 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (url_part_aliases): Added clarification
+ explaining how to use example.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Mon Aug 27 15:05:09 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/search.html: Add DTD tag for HTML 4 compliance.
+ * installdir/htdig.conf: Added .css to bad_extensions default,
+ added missing closing ">".
+ * htdoc/config.html: Updated with sample of latest htdig.conf and
+ installdir/*.html.
+
+Wed Jul 25 22:16:06 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Put new htnotify_* entries in alphabetical
+ order. Removed superfluous quotes from htnotify_webmaster example
+ (htnotify.cc adds in the quotes).
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Tue Jul 24 16:07:01 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Changed references in (no_)page_number_text
+ entries from maximum_pages to maximum_page_buttons.
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Tue Jul 24 14:38:22 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/hts_templates.html: Document Quim Sanmarti's URL decoding
+ feature for template variables.
+
+Thu Jul 12 14:12:02 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtFile.cc (Request): Fixed so it doesn't remove newlines
+ from documents, and so it only tries to open mime.types once even
+ if the open fails.
+
+Thu Jul 12 11:40:07 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/conv_doc.pl, contrib/parse_doc.pl: Fixed EOF handling in
+ dehyphenation, fixed to handle %xx codes in title made from URL.
+
+ * contrib/doc2html/doc2html.pl, contrib/doc2html/pdf2html.pl,
+ contrib/doc2html/swf2html.pl: Fixed to handle %xx codes in URL title.
+
+Wed Jul 11 15:05:47 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (readFile): Added missing fclose() call, and
+ debugging message for when file can't be opened.
+
+Wed Jul 11 14:26:28 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (displayParsedFile): Added debugging message
+ when file can't be opened.
+
+ * htseach/Display.cc (buildMatchList): Fixed while loop to avoid
+ warning.
+
+ * htsearch/htsearch.cc (main): Fixed handling of syntax error message
+ to use String class instead of strdup().
+
+ * htsearch/parser.cc (setError): Added debugging message when error
+ is set.
+
+ * htsearch/parser.cc (parse): Fixed not to clear error message after
+ it's set.
+
+Sat Jul 7 22:19:18 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * */Makefile.in: Update using current production automake
+ (1.4-p4).
+
+ * htfuzzy/Regexp.[cc,h]: Change class name to Regexp to prevent
+ further namespace clashes.
+
+ * htfuzzy/Fuzzy.c: #include "Regexp.h" now and make sure we create
+ the right class when needed.
+
+ * htlib/mktime.c: Change included mktime declaration to mymktime
+ to avoid conflict on Mac OS X. (For some reason, autoconf's
+ AC_FUNC_MKTIME doesn't work for Mac OS X. So this is a hack in the
+ meantime.)
+
+ * htfuzzy/Makefile.am: Rename Regex files. Oops!
+
+Fri Jul 6 18:38:58 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Regexp.cc, htfuzzy/Regexp.h: Rename Regex class to
+ prevent problems on case-insensitive systems.
+
+ * htlib/HtRegexReplaceList.cc, htlib/String.cc, htdig/htdig.cc:
+ Change #include of <stream.h> to modern standard of iostream.h.
+
+ * htlib/Configuration.cc (Read): Make sure we never reference a
+ negative position when trimming off whitespace.
+
+ * config.guess, config.sub: Update with new versions from GNU to
+ recognize various flavors of Mac OS X/Rhapsody.
+
+ * htlib/strptime.cc: Make sure len is initialized.
+
+Fri Jul 6 12:04:52 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtRegexList.cc (setEscaped): Fixed a potential problem
+ with list building. When we go back a step, we still have to
+ compile the new pattern in case it's the last one.
+
+Wed Jul 4 23:39:19 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/URL.cc (parse, ServerAlias): Fixed two problems that
+ caused incorrect signatures to be generated.
+
+Wed Jul 4 13:52:54 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * test/document.cc (dodoc), test/url.cc (dourl),
+ test/testnet.cc (Retrieve): Fixed up handling of config to match
+ David Graff's changes of May 16, and handling of HtHTTPBasic class
+ to match Joshua Gerth's changes of Mar 17.
+
+Tue Jul 3 16:20:56 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (GetLocal): Fixed to use URL class on given
+ URL, so that default port numbers are stripped off. This was needed
+ to allow local fetching of robots.txt.
+
+ * htnet/Connection.cc (ctors, dtor, Assign_Server, Get_Peername),
+ htnet/Connection.h: Got rid of strdup stuff, used String class for
+ peer & server_name.
+
+ * htnet/Connection.cc (Get_PeerIP): Used unambiguous name for structure.
+
+ * htnet/HtHTTP.cc (ctor, dtor): Don't allocate a 2nd Connection, as
+ child classes already do this, and set pointer to null when connection
+ is deleted, so we don't try to delete it twice. This was messing up
+ the heap and causing segfaults. Call Transport::CloseConnection before
+ deleting connection.
+
+ * htnet/HtHTTPBasic.cc (dtor), htnet/HtHTTPSecure.cc (dtor),
+
+ * htnet/HtNNTP.cc (dtor): Only delete connection if non-null, & set
+ to null after deleting. Call Transport::CloseConnection before
+ deleting connection.
+
+ * htnet/Transport.cc (CloseConnection): Don't exit if connection
+ pointer is null, as this may be normal when called from destructor.
+
+Fri Jun 29 11:14:36 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Endings.cc (getWords): Undid change introduced in 3.1.3,
+ in part. It now gets permutations of word whether or not it has
+ a root, but it also gets permutations of one or more roots that
+ the word has, based on a suggestion by Alexander Lebedev.
+ * htfuzzy/EndingsDB.cc (createRoot): Fixed to handle words that have
+ more than one root.
+ * installdir/english.0: Removed P flag from wit, like and high, so
+ they're not treated as roots of witness, likeness and highness, which
+ are already in the dictionary.
+
+Mon Jun 25 12:50:47 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (main): Got rid of last remnants of 'urllist'
+ and used the 'l' StringList as was used in the code before, to make
+ restrict and exclude handling work properly.
+
+Mon Jun 25 15:52:19 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htsearch/htsearch.cc: defined 'urllist' in order to remove the
+ compilation error (as Jesse suggested).
+
+Fri Jun 22 16:28:13 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (buildMatchList): Fix date_factor calculation
+ to avoid 32-bit int overflow after multiplication by 1000, and avoid
+ repetitive time(0) call, as contributed by Marc Pohl. Also move the
+ localtime() call up before gmtime() call, to avoid clobbering gmtime's
+ returned static structure (my thinko).
+
+ * htdig/htdig.cc (main): Use .work file for md5_db, if -a given,
+ as contributed by Marc Pohl.
+
+ * htcommon/URL.cc (constructURL): Ensure that the _host is set if we
+ are constructing non-file urls, as contributed by Marc Pohl.
+
+ * htdoc/THANKS.html: Credit Marc Pohl for patches.
+
+Tue Jun 19 17:14:05 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * README: Bump up to 3.2.0b4, fix note about bug report submissions.
+
+Tue Jun 19 17:01:16 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (setVariables): Fixed handling of
+ build_select_lists attribute, to deal with new restrict & exclude
+ attributes.
+
+Mon Jun 18 12:16:27 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * configure.in, configure: Fix "hdig" typo in help.
+
+Fri Jun 15 17:57:19 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Noted effect of locale setting on floating
+ point numbers in search_algorithm and locale descriptions.
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Fri Jun 15 15:36:51 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/cf_generate.pl: Fixed to handle new defaults.cc format
+ with trailing backslashes.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Fri Jun 15 14:57:21 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdb/htdb_dump.cc, htdb/htdb_load.cc, htdb/htdb_stat.cc: Added a
+ conditional include of <getopt.h> if HAVE_GETOPT_H is defined.
+
+Fri Jun 15 11:25:24 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (main), htcommon/defaults.cc,
+ htdoc/hts_form.html: two new attributes, used by htsearch, have
+ been added: restrict and exclude. They can now give more control
+ to template customisation through configuration files, allowing
+ to restrict or exclude URLs from search without passing
+ any CGI variables (although this specification overrides the
+ configuration one).
+
+Fri Jun 15 09:34:23 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (main): Changed ridiculously outdated question
+ "Did you run htmerge?" to "Did you run htdig?".
+
+Fri Jun 8 11:07:04 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc: Add <float.h> header, now needed for RH 7.1.
+
+Thu Jun 7 12:05:09 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/htdig-3.2.0.spec: Updated to 3.2.0b4.
+
+ * contrib/README: Mention acroconv.pl script.
+
+Thu Jun 7 10:46:19 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (expandVariables): Use isalnum() instead of
+ isalpha() to allow digits in variable names, allow '-' in variable
+ names too for consistency with attribute name handling.
+
+Wed Jun 6 16:14:06 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * httools/htpurge.cc (main): Added missing "u:" declaration in
+ getopt() call.
+
+Wed Jun 6 15:24:04 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/doc2html/DETAILS, contrib/doc2html/README,
+ contrib/doc2html/doc2html.pl, contrib/doc2html/pdf2html.pl,
+ contrib/doc2html/swf2html.pl: Update to version 3.0 of doc2html,
+ contributed by David Adams <D.J.Adams at soton.ac.uk>.
+
+Wed May 16 11:23:04 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ Added a pile of changes contributed by David Graff
+ <phlat at mindspring.com> fixing compilation problems with
+ non-gcc/g++ compilers (i.e. Sun's compiler).
+
+ * Makefile.config, db/Makefile.am: Added no-dependencies to
+ AUTOMAKE_OPTIONS for those not on GNU C/C++
+
+ * configure.in: Changed AM_PROG_YACC to AC_PROG_YACC as autoconf
+ and autoreconf both complain that AM_PROG_YACC is not in the
+ library.
+
+ * htcommon/DocumentDB.cc: Removed default parameters as they are
+ already declared in the header
+
+ * htcommon/HtConfiguration.cc: Changed some of the loop
+ declarations so that Sparc C 4.2 is happy. Removed default
+ parameters as they are already declared in the header Moved inline
+ ParseString to header where it belongs. Added initialization for
+ HtConfiguration::_config static member variable. Added
+ implementation of HtConfiguration::config() static class member.
+
+ * htcommon/HtConfiguration.h: Added include for ParsedString.h.
+ Added declaration of static member function ::config().
+ Added private static member variable _config;.
+ Added inline ParseString from implementation.
+
+ * htcommon/HtURLCodec.cc, htcommon/HtURLRewriter.cc,
+ htcommon/HtZlibCodec.cc, htcommon/URL.cc, htcommon/conf_lexer.lxx,
+ htdig/Document.cc, htdig/ExternalParser.cc,
+ htdig/ExternalTransport.cc, htdig/HTML.cc, htdig/Parsable.cc,
+ htdig/Plaintext.cc, htdig/Retriever.cc, :
+ Changed to use new global configuration semantics.
+
+ * htcommon/conf_parser.yxx: Added a return to yyerror to quiet
+ Sparc C 4.2. Should really return a value here. Is it normal to
+ return a YY_something or just -1, 0, ?
+
+ * htcommon/defaults.cc: Added line continuation characters at the
+ end of all the string lines that did not completed by a quote.
+
+ * htcommon/defaults.h, htdig/htdig.h: Removed extern
+ HtConfiguation config in favor of HtConfiguration::config().
+
+ * htdig/ExternalTransport.h Changed return type of GetResponse to
+ match superclass.
+
+ * htdig/Server.cc, htdig/htdig.cc, htfuzzy/htfuzzy.cc, htnet/HtFile.cc,
+ htsearch/Display.cc, htsearch/QueryLexer.cc, htsearch/WordSearcher.cc,
+ htsearch/htsearch.cc, htsearch/parser.cc, htsearch/qtest.cc,
+ httools/htdump.cc, httools/htload.cc, httools/htmerge.cc,
+ httools/htnotify.cc, httools/htpurge.cc, httools/htstat.cc
+ htlib/Configuration.cc, htlib/HtRegex.cc:
+ Changed constructor to use initializers
+
+ * htlib/HtDateTime.cc: Moved inlines to header
+
+ * htlib/HtDateTime.h: Added inlines from implementation
+
+ * htlib/HtHeap.cc, htlib/HtHeap.h, htlib/HtVector.cc, htlib/HtVector.h,
+ htlib/HtVectorGeneric.h, htlib/HtVectorGenericCode.h:
+ Changed Copy member to return same type as superclass
+
+ * htlib/HtRegexReplace.cc, htlib/HtRegexReplaceList.cc: Removed
+ default parameters as they are declared already in the header
+
+ * htlib/myqsort.h: Changed comment in header to use C-style
+ comments as it's compiled using a C.
+
+ * htlib/regex.h: Changed #if __STDC__ to #if defined(__STDC__)
+
+ * htword/WordKey.h: Corrected const'ness
+
+Wed May 9 07:50:19 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookieJar.h: ShowSummary makes the class abstract
+
+Sat May 5 20:51:00 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/cf_blocks.html: Add colon in example and description of
+ blocks to match code for the moment. The parser can be changed
+ later if we like.
+
+Sat May 5 20:38:44 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/ParsedString.cc (get): Use isalnum() instead of isalpha()
+ for looking up--allows names that contain digits too.
+
+Sat May 5 20:36:29 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/htString.h (class String): Remove now-obsolete and
+ confusing int() casting operator. This was previously used to make
+ a string of a certain length. Use String(int) as a ctor instead.
+
+Sat May 5 20:30:18 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htword/WordContext.[h,cc]: Change Initialize to supply a config
+ that can be modified (i.e. if we don't have ZLIB_H).
+
+Sat May 5 23:30:55 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookieJar.h: ShowSummary, printing cookies (to be derived)
+ * htnet/HtCookieMemJar.[h,cc]: ShowSummary, printing cookies
+
+Thu May 3 23:14:14 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP[h,cc]: connection object is now created and destroyed.
+ NULL pointers converted to C++ standard (0).
+ * htnet/Transport[h,cc]: NULL pointers converted to C++ standard (0).
+ * htnet/Connection[h,cc]: ditto
+
+Thu May 3 23:09:33 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htlib/HtDateTime.[h,cc]: Timestamp format added (used by ht://Check
+ for MySQL interfacing) - keeping them equal helps me maintaining
+ both of them!
+
+Thu May 3 10:28:56 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/parser.cc (perform_and): Add missing return statement,
+ as suggested by Quim Sanmarti.
+
+Fri Mar 30 15:50:42 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/ResultMatch.h, htsearch/ResultMatch.cc (setTitle): Changed
+ argument type to char * to fix problem with sort by title not working,
+ as reported by Adam Lewenberg.
+
+Fri Mar 30 14:08:51 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.h, htdig/Retriever.cc (parse_url): Define and use
+ Document::StoredLength() method to get actual length of data
+ retrieved and given to md5(), which may be less than original
+ length. Fixes bug reported by Michael Haggerty.
+
+Wed Mar 21 22:22:55 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc (generateStars): Add NSTARS variable for
+ template output as suggested by Caleb Crome
+ <ccrome at users.sourceforge.net> (except here precision is 0). Fixes
+ feature request #405787.
+
+ * htdoc/hts_templates.html: Add description of NSTARS variable
+ above.
+
+ * htlib/HtRegex.cc (set): Make sure we free memory if we've
+ already compiled a pattern.
+
+ * htdig/Retriever.cc (got_href): Fix bug pointed out by Gilles
+ with hopcounts and don't bother to update the DocURL unless we
+ have a new doc.
+
+Mon Mar 19 18:00:18 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/URL.cc (URL): Make sure even absolute relative URLs are
+ run through normalizePath() as pointed out by Gilles. Allows
+ backout of previous fix of #408586, which does extra re-parsing of
+ URL.
+
+ * htdig/Retriever.cc (Need2Get): Back out change of Mar. 17 for above.
+
+ * htcommon/conf_lexer.[cxx, lxx]: Apply change suggested by Jesse
+ to remove empty statements.
+
+Mon Mar 19 11:33:25 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegexList.cc (setEscaped): Fix assorted bugs, including
+ obvious segfault, incorrect creation of limits, and failure to set
+ "compiled" flag before return().
+
+ * htdig/Retriever.cc (IsValidURL): Make sure the tmpList is
+ cleared before attempting to parse the bad_querystr
+ config--otherwise we'll just Add to the end of the list.
+
+Sun Mar 18 14:01:56 CET 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/Transport.[h,cc], htnet/HtHTTP.cc: In order to modularize
+ the net code the default parser string for the content-type has
+ been added to the Transport class.
+ * htdig/Document.cc: modified for the changes above.
+
+Sat Mar 17 16:38:27 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, configure, include/htconfig.h.in: Add tests for
+ libssl, libcrypto, and ssl.h.
+
+ * htnet/SSLConnection.[cc,h], htnet/HtHTTPBasic.[cc,h],
+ htnet/HTTPSecure.[cc,h]: New files. Contributed by Joshua Gerth
+ <jgerth at hmsoaps.com>.
+
+ * htnet/Transport.[cc,h], htnet/HtNTTP.cc, htnet/HtHTTP.cc,
+ htnet/Connection.h: Changes needed to support SSLConnection class.
+
+ * htdig/Document.cc, htdig/Document.h: Ditto.
+
+ * htnet/Makefile.am, htnet/Makefile.in: Add above for compilation.
+
+ * htdoc/THANKS.html: Updated with new contributors.
+
+Sat Mar 17 15:28:20 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htword/WordContext.cc (Initialize): If HAVE_LIBZ or HAVE_ZLIB_H
+ are not defined, make sure wordlist_compress is set to false. This
+ semi-hack will not be necessary with new mifluz code which does
+ not necessary need zlib. Fixes bug #405761.
+
+Sat Mar 17 14:39:17 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Fixed problems with META descriptions
+ containing newlines, returns or tabs. They are now replaced with
+ spaces. Fixes bug #405771.
+
+Sat Mar 17 14:26:55 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Improve handling of whitespace in META
+ refresh handling. Fixes bug #406244.
+
+ * htlib/HtRegexList.cc (setEscaped): Make this more efficient by
+ building up larger and larger patterns--when we fail, go back a
+ step and add the pattern in the next loop. This ensures we have a
+ list of the maximum allowable length regexp.
+
+ * htdig/Retriever.cc (Need2Get): Add change suggested by Yariv Tal
+ to run URLs through the URL parser for cleanup before comparing to
+ the visited list. Fixes bug #408586.
+
+Mon Mar 12 13:28:56 2001 Michael Haggerty <mhagger at alum.mit.edu>
+
+ * htdig/Retriever.cc, htdig/Retriever.h:
+ Fixed two off-by-one errors related to Retriever::factor table.
+
+Mon Mar 12 11:25:31 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Dictionary.cc (Add): Fix comments about add method--it
+ will replace existing keys. Fixes report #407940.
+
+Thu Mar 8 15:31:45 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: removed an unuseful <else>
+
+Tue Mar 6 11:42:10 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/regex.[c,h]: Update with versions from glibc 2.2.2.
+
+Mon Mar 5 13:47:30 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * ltconfig (host_os): Add test to solve problems building C++
+ shared libraries on some platforms. Currently should only make
+ --enable-shared the default on Linux and *BSD* unless specified
+ explicitly by the user.
+
+Mon Mar 5 12:52:57 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/String.cc (operator =): Add fix contributed by Yariv Tal
+ <YarivT at webmap.com>, fixed bug #406075.
+
+Mon Mar 5 12:06:26 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegexList.cc (match): Ignore rearrangement code for the
+ moment--may or may not be the culprit for bug #405277, but is a
+ start to debugging the problem.
+
+ * htlib/List.[cc,h]: Remove *prev pointer from listnode
+ structure and add a *prev pointer to the cursor structure. Saves
+ one pointer per item in the list, plus overhead.
+
+Mon Mar 5 11:56:16 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc (bad_extensions): Add .css to ignore CSS docs.
+
+ * htdig/Document.cc (getParsable): Ignore CSS documents -- they
+ aren't very useful to parse. Solves bug report #405772.
+
+Sun Mar 04 11:32:43 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.cc: fixed a bug regarding <no header> with persistent
+ connections enabled, but head call before the get one disabled.
+ Sourceforge.net's bug reference: 405275 - fixed.
+
+Sat Mar 3 21:09:55 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * .version: Bump to 3.2.0b4 so snapshots have right versioning.
+
+Thu Mar 1 16:51:09 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in: Added test for alloca.h, which is needed for the
+ regex.c code.
+
+Wed Feb 28 12:54:43 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htcommon/defaults.cc: 'disable_cookies' option has been added, with
+ a 'server' scope. By default it is set to 'false'.
+ * htdig/Server.h, cc: management of the option above has been enhanced.
+ * htnet/HtHTTP.h, cc: now an HTTP connection can disable/enable cookies
+ through the configuration attribute 'disable_cookies'.
+ * htdig/Document.cc: management of cookies enabling/disabling is here.
+ * Cookies classes: now support the expiration time. Need only the
+ subdomain treatment.
+
+Mon Feb 26 16:37:30 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/conf_lexer.lxx: Don't directly call exit(1) on an error
+ condition! Seems a harsh problem for an unknown character.
+
+ * htcommon/conf_parser.yxx: Ditto. (Running out of memory is a
+ much more fatal condition, of course.)
+
+ * htcommon/conf_lexer.cxx: Regenerate using flex 2.5.4.
+
+ * htcommon/conf_parser.cxx: Regenerate using bison 1.28.
+
+Sun Feb 25 19:46:01 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtHTTP.h, cc: support for cookies enabled
+ * htnet/Makefile.am: files for cookies have been added to make.
+
+Sun Feb 25 19:27:18 CEST 2001 Gabriele Bartolini <angusgb at users.sourceforge.net>
+
+ * htnet/HtCookie.h,cc: class HTTP cookie
+ * htnet/HtCookieJar.h,cc: abstract class for managing the
+ 'jar' of cookies. In this way, we can use different methods
+ for the storage of them.
+ * htnet/HtCookieMemJar.h,cc: class for managing the 'jar' of
+ cookies in memory, without persistent storage (no db or file).
+ * Many thanks to Robert LaFerla for his coding on this! Yeah,
+ really really thanks Robert! <robertlaferla at mediaone.net>
+
+
+Thu Feb 22 16:43:18 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/ChangeLog, htdig/RELEASE.html, README: Update to roll the
+ release of 3.2.0b3.
+
+Thu Feb 22 16:22:05 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (main), htsearch/Display.cc (setVariables,
+ createURL, buildMatchList), htdoc/hts_form.html,
+ htdoc/hts_templates.html: Add Mike Grommet's date range search
+ feature.
+
+Mon Feb 19 18:24:42 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Synonym.cc (createDB): Create database in a temporary
+ directory before we move it into place, much like the endings
+ code. This should prevent problems when we just append to the DB
+ instead of making a new one.
+
+ * htdig/htdig.cc (main): Fix bug discovered by Gilles--htword
+ should be initialized *after* we are finished modifying config
+ attributes based on flags and unlink with -i.
+
+ * installdir/rundig: Fix bug with calling htpurge with -s option.
+
+Thu Feb 15 11:03:42 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/*.html: Update with 2001 copyrights and various changes
+ with the website move for the pending 3.2.0b3 release.
+
+Thu Feb 15 10:41:47 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegexList.cc (match): Fix thinko with logic for matching
+ and add code to rearrange matching nodes for hopefully better
+ performance.
+
+Sun Feb 11 16:42:11 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegexList.h, htlib/HtRegexList.cc (class HtRegexList):
+ Simple List(HtRegex) object with similar calling conventions to
+ HtRegex class. This version is not as sophisticated as it could
+ be, but it's not likely to drop objects when reorganizing.
+
+ * htlib/Makefile.[in,am]: Add HtRegexList files to list for
+ compilation.
+
+ * htdig/htdig.h, htdig/htdig.cc, htdig/Retriever.cc: Use
+ HtRegexList instead of HtRegex for setting escaped values--should
+ never fail (since each String item is short).
+
+ * htlib/HtDateTime.cc: Put back timezone specs into the output
+ formats so we give everything even if we ignore it when reading
+ input.
+
+Mon Feb 5 11:47:07 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.cc: Remove the timezone specs in the date
+ formats--these are not required in the RFCs because many dates are
+ in GMT anyway.
+
+Wed Jan 17 08:48:30 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalTransport.cc (Request): Oops, fixed a holdover from
+ code borrowed from ExternalParser.cc's fork handling.
+
+Mon Jan 15 23:09:37 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Connection.cc: Back out previous change--this should not
+ in any way be needed since the configure script should set
+ FD_SET_T.
+
+ * configure.in, configure: Add more lenient prototyping for
+ select() test--now allows "const struct timeval" for compilation
+ on BSDI.
+
+ * htdoc/RELEASE.html: Update with Gilles's changes.
+
+ * htdoc/cf_blocks.html: New file describing <server ...></server>
+ and <url ...></url> blocks.
+
+ * htdoc/cf_general.html, htdoc/confmenu.html: Refer to the above.
+
+Mon Jan 15 17:46:07 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/TemplateList.cc (createFromString), htcommon/defaults.cc:
+ Treat template_map as a _quoted_ string list.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Mon Jan 15 17:40:45 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/hts_templates.html: Add METADESCRIPTION variable.
+
+ * htsearch/Display.cc (displayMatch): Add METADESCRIPTION variable.
+
+ * htdig/ExternalParser.cc (parse): Fix up handling of arguments.
+
+ * htdig/ExternalTransport.cc (Request): Fix up handling of fork/exec
+ and command arguments, add wait() call.
+
+Wed Jan 10 19:23:36 2001 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/rundig: Fix -a handling to move db.words.db.work_weakcmpr
+ into place if it exists
+
+Sat Jan 6 21:50:58 2001 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in: Add checks for <sys/wait.h> and <wait.h> for
+ ExternalParser.
+
+ * include/htconfig.h.in: Regenerate using autoheader.
+
+ * configure: Regenerate using configure.
+
+ * htnet/Connection.cc: Add definition for FD_SET_T to fix problems
+ compiling on BSDI mentioned by Joe.
+
+ * htdig/ExternalParser.cc: Use <sys/wait.h> or <wait.h> as
+ appropriate. Should fix problems with compiliation mentioned by
+ Jesse on HP/UX.
+
+ * README, htdoc/RELEASE.html: Adjust dates for the new year.
+
+ * htdoc/upgrade.html: A few "remaining features" have been implemented.
+
+Sun Dec 06 19:46:15 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.cc: Fixed bug for Read_Line function call in
+ ReadChunkedBody method. Many thanks to Robert LaFerla. ;-)
+
+Tue Dec 12 13:24:49 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Fixed to properly handle binary
+ output from an external converter. Fixed some compilation errors.
+
+Tue Dec 12 12:52:14 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Handle parser command string
+ as a string list again to allow arguments, build up argv and
+ use execv instead of execl.
+
+Tue Dec 12 12:25:04 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Add call to wait for child process,
+ to avoid zombie buildup.
+
+Mon Dec 11 23:57:43 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Fix up handling of fds in child
+ process, more fault-tolerant handling of pipe or fork errors.
+
+Mon Dec 11 23:30:55 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Fix up handling of creation
+ of temporary file, check for proper return code, give error if
+ appropriate.
+
+Mon Dec 11 23:19:28 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Lowercase content-types and
+ strip off any trailing semicolons, at one last spot. This reinserts
+ code added Sep 11, which was dropped Oct 9, probably inadvertantly
+ during mifluz back-out.
+
+Sun Dec 10 15:28:44 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/ExternalTransport.cc: Use fork/exec instead of calling
+ popen, which bypasses any shell escape problems.
+
+ * htdig/ExternalParser.cc: Ditto, plus use of mkstemp where
+ available to pick the filename.
+
+ * configure, configure.in: Check for mkstemp where available.
+
+ * include/htconfig.h.in: Define it as above.
+
+ * htlib/Makefile.am: Omit regex.c from SOURCES--this is included
+ when necessary by the configure script. Otherwise this produces
+ duplicate declarations, etc.
+
+ * htlib/Makefile.in: Regenerate using automake --foreign.
+
+ * htcommon/URL.cc: Fix bug with ports of 0 showing up in URLs like
+ mailto: or other less-common protocols.
+
+Fri Dec 1 14:45:33 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/htdig-3.2.0.spec: Updated to 3.2.0b3.
+
+Fri Dec 1 13:59:09 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/Makefile.am: Fix pkginclude_HEADERS to list missing headers
+ ber.h, libdefs.h, myqsort.h, mhash_md5.h, omit unneeded langinfo.h;
+ fix libht_la_SOURCES to list missing sources regex.c, myqsort.c.
+
+ * htlib/Makefile.in: Regenerate using automake --foreign
+
+ * htlib/langinfo.h, htlib/nl_types.h: Removed as they're now unused.
+
+Fri Dec 1 13:22:47 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/strptime.cc (mystrptime): make ptr const and use cast on
+ return value to avoid warnings.
+
+ * htlib/Makefile.am: Fix pkginclude_HEADERS to list HtRegexReplace*.h
+ rather than .cc.
+
+ * htlib/Makefile.in: Regenerate using automake --foreign
+
+Fri Dec 1 11:58:21 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * Makefile.in, [hit]*/Makefile.in: Regenerate using automake --foreign
+ after fixing bug with cp -pr in automake.
+
+Thu Nov 30 14:41:58 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/Makefile.am: Removed howitworks.html from EXTRA_DIST.
+
+ * Makefile.in (distdir): Added missing variable name 'd' to cp -pr.
+
+Thu Nov 30 14:01:48 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/strptime.cc, htlib/lib.h: make first 2 args to strptime
+ const to avoid warnings, use cast in asizeof to avoid warnings.
+
+ * htsearch/qtest.cc: Change include from iostream to iostream.h
+
+ * htsearch/DocMatch.cc: Change include from iostream to iostream.h
+
+ * htsearch/Display.cc (createURL, buildMatchList, excerpt, hilight):
+ Clean up code to get rid of warnings, especially resulting from
+ NULLs in ternary operators.
+
+Thu Nov 30 10:55:09 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/String_fmt.cc (form, vform): Use vsnprintf rather than
+ vsprintf, for buffer overflow prevention if vsnprintf available.
+
+ * htdig/Retriever.cc: Remove unused strptime declaration.
+
+ * htlib/HtDateTime.cc: Use mystrptime if HAVE_STRPTIME not set.
+
+Wed Nov 29 23:31:10 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdb/htdb_stat.cc, htdb_load.cc, htdb_dump.cc: Make sure we
+ include htconfig.h to include proper declarations.
+
+ * htlib/strptime.cc: Change to strptime.cc, from htdig-3.1 series
+ hopefully more portable until I can find a more suitable
+ replacement.
+
+ * htlib/Makefile.am, htlib/Makefile.in: As above.
+
+ * htlib/clib.h, htlib/lib.h: Ditto.
+
+ * htdoc/all.html: Add a first draft of program summaries.
+
+Wed Nov 29 18:00:15 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (parse_url): Remove undeclared "dup" variable,
+ add missing calls to words.Skip().
+
+Wed Nov 29 17:44:56 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/htdig.html: Add description of -v output.
+
+Mon Nov 27 12:03:34 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/md5.cc: Added missing include of time.h
+
+Fri Nov 24 00:56:01 2000 Toivo Pedaste <toivo at ucs.uwa.edu.au>
+
+ * htsearch/Display.cc: Some extra debugging for scoring
+
+Sun Nov 19 00:56:01 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/HtFile.cc (Request): Use opendir/readdir instead of
+ scandir for generating directory listings on-the-fly.
+
+ * htdoc/RELEASE.html: Write up release notes for 3.2.0b3.
+
+ * htdoc/THANKS.html: Update list of contributors for 3.2.0b3 as
+ current.
+
+Fri Nov 17 14:52:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/acroconv.pl: Added external converter script to convert
+ PDFs with acroread.
+
+Mon Nov 6 12:13:13 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (GetLocal, GetLocalUser): move String definition
+ out of while statement for AIX xlC compiler.
+
+Mon Oct 30 21:50:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Server.h, htdig/Server.cc (push): Add newDoc paramter that
+ will allow redirects (old docs) to be followed and not count
+ against the maxDoc restrictions.
+
+ * htdig/Retriever.cc (got_redirect): Use new parameter so we don't
+ count against a server's max documents since it's a redirect.
+
+ * htlib/nl_types.h: Add for systems missing this header file.
+
+Sun Oct 29 21:36:51 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Updated per-server and per-URL fields to
+ match code. I still have a "wish list" of additional attributes
+ that should work this way eventually.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Sun Oct 22 17:13:08 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/HtWordList.h: Add missing include for stdlib.h needed for
+ abort().
+
+ * htsearch/BooleanQueryParser.cc (ParseAnd): Fix problems with RH7
+ compiler -- shouldn't use "not" as a variable name!
+
+Thu Oct 19 22:19:16 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * ltmain.sh, ltconfig: Update with versions from libtool
+ 1.3.5. which may fix some problems building libraries.
+
+Mon Oct 9 21:59:11 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * */* [many, many files]: Backed out mifluz merge by going back on
+ modified files to 091000 snapshot.
+
+ * configure: Regenerated from configure.in.
+
+ * */Makefile.in: Regenerated using automake.
+
+Fri Oct 6 11:03:14 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Parse <object> tags properly, looking
+ for data= attribute rather than src=.
+
+ * htcommon/defaults.cc (server_aliases): Additional clarification
+ to server_aliases description of port numbers.
+
+Wed Oct 4 12:12:31 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (limit_normalized, server_aliases,
+ server_max_docs, server_wait_time): Added clarification
+ to server_aliases description. Changed word "directive" to
+ "attribute" where appropriate. Added cross-link to server_aliases
+ from limit_normalized.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Wed Sep 27 00:05:41 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdb/mifluz[dict, dump, load].cc, htdb/util_sig.h,
+ htdb/util_sig.cc: New files from mifluz merge. (Whoops, missed a
+ directory).
+
+ * htdb/*.cc: Change config.h references to htconfig.h.
+
+ * htlib/myqsort.c: Ditto.
+
+ * htcommon/HtWordReference.h, htcommon/HtWordReference.cc: Ensure
+ we keep the WordContext object around--unfortunately this also
+ requires that callers initialize us with a WordContext (e.g. from
+ the HtWordList class).
+
+ * htlib/StringMatch.h, htlib/StringMatch.cc: Changes to use
+ WordType directly instead of HtWordType.
+
+ * htfuzzy/*: Ditto. Additionally make sure HtWordReference objects
+ are intstantiated properly.
+
+ * htcommon/DocumentRef.cc, htcommon/HtWordList.cc: As above.
+
+ * htdig/*: As above.
+
+ * htsearch/*: As above.
+
+ * httools/*: Don't bother initializing WordContext--this is done
+ in the HtWordList class now.
+
+ * htdig/htdig.cc: Ditto.
+
+ * htsearch/htsearch.cc, htsearch/qtest.cc: Ditto.
+
+ * htfuzzy/htfuzzy.cc: Ditto.
+
+ * db/Makefile.am, db/Makefile.in: Update to build libhtdb instead
+ of libdb to prevent conflicts.
+
+Sun Sep 24 22:50:22 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htword/HtWordList.h, htword/HtWordList.cc: Keep a WordContext
+ object private that is associated with this word database and
+ provide accessor.
+
+ * htword/WordType.h, htword/WordType.cc: Add WordToken function,
+ migrated from HtWordType class.
+
+ * htcommon/HtWordType.cc: WordType class no longer has Instance()
+ method, so just pass along the calls.
+
+ * htlib/DB2_db.cc (db_init): Remove unnecessary NULL parameter.
+
+ * htlib/Makefile.am, htlib/Makefile.in: Remove HtVectorGeneric and
+ derived files as well as HtWordType as these are depreciated.
+
+Wed Sep 20 22:47:01 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * aclocal.m4: Add in missing autoconf macros that somehow didn't
+ make the merge before. (No idea why I didn't catch this earlier.)
+
+ * acinclude.m4: Use newer CHECK_ZLIB macro.
+
+ * */Makefile.in: Updated with automake for new build changes.
+
+ * configure, include/htconfig.h.in: Updated using autoconf.
+
+ * test/dbbench.cc, test/word.cc, test/search.cc: Fix #include to
+ point to htconfig.h not non-existant config.h.
+
+ * htlib/Configuration.h: Fix copy ctor, removing code in header file.
+
+ * htword/*.cc: Ditto.
+
+ * htword/Makefile.am: Update from mifluz version.
+
+ * htlib/myqsort.h, htlib/myqsort.c: Additional system library
+ replacement code.
+
+Sat Sep 16 20:14:32 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, configure, acinclude.m4, aclocal.m4, acconfig.h,
+ include/htconfig.h.in: Merged with mifluz versions. Main
+ difference is that top-level configure script now also configures
+ db/ directory as well.
+
+ * Makefile.am, */Makefile.in: Updated with automake for new build
+ environment (with db/ run through top-level configure).
+
+ * db/*.c: Updated to use htconfig.h instead of config.h.
+
+Wed Sep 13 22:05:33 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * Merged in mifluz-0.19 branch. Everything will break
+ temporarily. Loic and I will clean up tomorrow.
+
+ * htdoc/RELEASE.html, htdoc/THANKS.html, htdoc/TODO.html: Get a
+ start on updting these files for the next release.
+
+ * htdoc/cf_generate.pl: Revert change of Sep. 9 to ignore links to
+ all.html in cf_byprog.html file.
+
+ * htdoc/all.html: New file, moved from howitworks.html and not
+ updated yet.
+
+ * htdoc/contents.html: Change link from howitworks.html to all.html
+
+Tue Sep 12 17:00:00 CEST 2000 Quim Sanmarti <qss at gtd.es>
+
+ * htsearch: added AndQuery.cc BooleanLexer.cc BooleanQueryParser.cc
+ ExactWordQuery.cc GParser.cc NearQuery.cc NotQuery.cc
+ OperatorQuery.cc OrFuzzyExpander.cc OrQuery.cc
+ PhraseQuery.cc Query.cc QueryLexer.cc QueryParser.cc
+ SimpleQueryParser.cc VolatileCache.cc WordSearcher.cc
+ qtest.cc WordSearcher.h AndQuery.h AndQueryParser.h
+ BooleanLexer.h BooleanQueryParser.h ExactWordQuery.h
+ FuzzyExpander.h GParser.h NearQuery.h NotQuery.h
+ OperatorQuery.h OrFuzzyExpander.h OrQuery.h OrQueryParser.h
+ PhraseQuery.h Query.h QueryCache.h QueryLexer.h
+ QueryParser.h SimpleLexer.h SimpleQueryParser.h VolatileCache.h.
+ This is the new query parsing/evaluation framework.
+
+ * Modified DocMatch.{cc,h} and ResultList.{cc,h} for compatibility.
+
+ * Removed the previous {And,Or,Exact,}ParseTree.{cc,h} files.
+
+ * Modified Makefile.{am,in} consequently.
+
+Mon Sep 11 11:56:44 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc (parse): Lowercase content-types and
+ strip off any trailing semicolons, at one last spot which Geoff missed.
+
+Sat Sep 9 21:28:29 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc (getParsable): Fix a bug with earlier
+ change--if no parser is found and the MIME type is not text/* then
+ return a NULL parser.
+
+ * htdig/Retriever.cc (RetrievedDocument): If a NULL parser is
+ returned, mark the document as noindex and move on.
+
+ * configure.in, configure (enable-tests): Fix bug that would run
+ the 'yes' program inside the configure script if --enable-tests
+ was set.
+
+Sat Sep 9 17:50:11 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Add "all" program listing for common
+ attributes--seems more logical esp. now with many httool programs.
+
+ * htdoc/cf_generate.pl (cf_byprog): Do not output a link when
+ 'prog' is 'all.'
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Sat Sep 9 11:44:47 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * aclocal.m4 (AM_CHECK_YACC): New macro to check for bison/yacc
+ and use "missing yacc" if not found.
+
+ * configure.in (enable_tests): Fix buglet where --enable-tests=no
+ or --disable-tests would not work and set the default to enabled
+ tests. Since the tests do not build unless the user does a "make
+ check" this should not be confusing and should help debugging.
+ Also use AM_CHECK_YACC instead of AC_CHECK_YACC.
+
+ * configure: Regenerate using autoconf.
+
+Sat Sep 9 11:01:03 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/ExternalParser.cc (canParse): Lowercase content-types and
+ strip off any trailing semicolons. Should prevent problems with
+ combined content-type; charset values.
+ (ctor): As above.
+
+ * htdig/Document.cc (getParsable): Only assume plain text if MIME
+ code starts with text/. Should prevent problems with retrieving
+ things like image/png or application/postscript as text.
+
+Fri Sep 8 22:59:10 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Add new attributes htnotify_replyto,
+ htnotify_webmaster, htnotify_prefix_file, htnotify_suffix_file.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+ * httools/htnotify.cc: Added in code from Richard Beton
+ <richard.beton at roke.co.uk> to collect multiple URLs per e-mail
+ address and allow customization of notification messages by
+ reading in header/footer text as designated by the new attributes
+ above.
+
+Fri Sep 8 15:15:00 2000 Quim Sanmarti <qss at gtd.es>
+
+ * htsearch/Display.cc: Fixed tiny date_format bug;
+ added url-decoding template variable expansion.
+
+Thu Sep 7 23:45:25 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (Retriever): Only open up md5 database if
+ check_unique_md5 attribute is set.
+
+Thu Sep 7 22:56:19 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/URL.cc (DefaultPort): Add file default port of 0.
+
+ * htnet/HtFile.cc (Request): Handle directory listings by using
+ scandir and generating minimal HTML file with appropriate noindex listing.
+
+Wed Sep 06 10:00:50 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htlib/URL.h, htlib/URL.cc: Restored corrected versions of URL.*
+ * htnet/HtNNTP.h: Removed the error in the NNTP class declaration
+
+Mon Sep 04 13:43:40 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.cc: Restored previous version of HtHTTP. I removed
+ an initialization in the constructor (_modification_time). Sorry.
+
+Sun Sep 3 16:51:24 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc, htdig/Server.cc: Fix compiler warnings about
+ String conversions.
+
+ * configure, configure.in, db/configure, db/configure.in,
+ db/acinclude.m4, db/aclocal.m4: Ensure --enable-bigfile is handled
+ correctly by the configure scripts as pointed out by Jesse.
+
+Fri Sep 01 23:28:43 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * URL.cc: added DefaultPort() method and changed NNTP default port
+ from 523 to 119.
+ * Document.cc: management of NNTP documents retrieval.
+
+Fri Sep 01 19:05:02 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtNNTP.* : just created them ...
+ * htnet/HtHTTP.cc : removed modification_time deletion in the
+ class destructor.
+
+Thu Sep 01 12:00:00 2000 Toivo Pedaste <toivo at ucs.uwa.edu.au>
+
+ * htdig/Retriever.cc: Allow for modify time being set to
+ current time if not available.
+
+Thu Aug 31 13:21:12 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (allow_in_form, build_select_lists):
+ Add clearer instructions to allow_in_form description, add
+ cross-links between these two sections.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Wed Aug 30 10:01:59 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * substition of char * returned types to const String & in URL and
+ Server classes. This change made me do lots of changes in other files:
+ HtFile.cc, HtHTTP.cc, HtConfiguration.*, Document.*, ExternalParser.*,
+ Retriever.*.
+
+Tue Aug 30 12:00:00 2000 Toivo Pedaste <toivo at ucs.uwa.edu.au>
+
+ * htlibs/md5.cc, htlibs/md5.h: Generate md5 hash of
+ a page and also optionally the modify date.
+
+ * htlibs/mhash_md5.h, htlibs/mhash_md5.c, htlibs/libdefs.h:
+ Md5 hash code from libmhash
+
+ * htdig/Retriever.cc: Allow storing m5 hashes of pages
+ in order to reject aliases.
+
+ * htcommon/defaults.cc: Options "check_unique_md5" and
+ "check_unique_date"
+
+Tue Aug 29 08:51:39 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/upgrade.html: Add description of the difference between
+ htmerge and htpurge. Mention other httools.
+
+ * htsearch/parser.cc, htsearch/parser.h: Merge in patch by Quim
+ Sanmarti <qss at gtd.es> to fix problems with phrase searching and
+ AND searches and improve performance.
+
+Sun Aug 27 22:41:10 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/AndParseTree.cc, htsearch/OrParseTree.cc (Parse):
+ Rewrote using new WordToken inherited method. Fixes a bug where
+ user input two phrases next to each other.
+
+ * htsearch/ParseTree.cc (Parse): Fix bug where phrases would
+ "adsorb" prior query words. Also fix bug where operators were
+ incorrectly popped off the stack. Should (hopefully) solve all
+ parsing problems.
+
+ * htsearch/*ParseTree.cc (GetLogicalWords): Test for empty list of
+ children to prevent potential segfault.
+
+Sat Aug 26 18:40:50 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * installdir/{syntax, header, footer, wrapper, nomatch}.html:
+ Add DTD tags, ALT attributes and remove bogus </select> tags to
+ fix invalid HTML pointed out in PR#901.
+
+Wed Aug 23 23:39:18 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/ParseTree.cc (Parse): Get rid of compiler warnings, use
+ new private tokenizer to ensure parens and quote aren't
+ removed. Also, when popping an operator off the parens stack, make
+ sure it's adopted by a new ParseTree object so we get the parens
+ back in the tree heirarchy.
+
+Wed Aug 23 23:34:44 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/AndParseTree.cc (Parse): Fix nasty infinite loop when
+ phrases hit in AND searches.
+
+ * htsearch/OrParseTree.cc (Parse): Ditto.
+
+Wed Aug 23 13:24:31 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.*, htnet/Transport.h: all 'char *', when possibile,
+ have been changed into 'const String &' types.
+
+Sun Aug 20 23:25:01 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htpurge.cc (purgeDocs): Add error message when document
+ database is completely empty. Should take care of PR#672 (and others).
+
+Sun Aug 20 20:37:53 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegex.h, htlib/HtRegex.cc: Made destructor virtual,
+ added lastError() and associated support. Changed return type of
+ set*() to int. They now return the value of |compiled|.
+
+ * htcommon/defaults.cc (url_rewrite_rules): Add new attribute to
+ support patch by Andy Armstrong <andy at tagish.com> for permanent
+ URL rewriting.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+ * htlib/HtRegexReplace.cc, htlib/HtRegexReplaceList.cc,
+ htlib/HtRegexReplace.h, htlib/HtRegexReplaceList.h,
+ htcommon/HtURLRewriter.cc, htcommon/HtURLRewriter.h: New classes.
+
+ * htcommon/Makefile.am, htcommon/Makefile.in: Add compilation for
+ HtURLRewriter.
+
+ * htlib/Makefile.am, htcommon/Makefile.in: Ditto for
+ HtRegexReplace*
+
+ * htcommon/URL.h, htcommon/URL.cc (rewrite): New method for
+ transforming URLs based on HtURLRewriter.
+
+ * htdig/Retriever.cc (got_href): Rewrite the URL before we do
+ anything with it.
+
+ * htdig/htdig.cc: Include HtURLRewriter headers and check rewrite
+ rules for errors.
+
+Sat Aug 19 17:01:36 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/conf_lexer.lxx: Patched to fix the bug with relative
+ filename includes. Keeps a separate stack with the filenames and
+ adjusts accordingly.
+
+ * htcommon/conf_lexer.cxx: Updated using flex 2.5.4.
+
+Thu Aug 17 23:59:26 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/conf_lexer.lxx: Patched to fix a bug reported by Abel
+ Deuring -- config filename stack was decremented too many times.
+
+ * htcommon/conf_lexer.cxx: Updated using flex 2.5.4.
+
+Thu Aug 17 23:40:08 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htword/WordType.h (WordToken): Add non-destructive version of
+ HtWordToken using a passed int as a pointer into the
+ string. Add virtual destructor so class can be sub-classed.
+
+ * htword/WordType.cc (WordToken): Implement it.
+
+ * httools/htmerge.cc (mergeDB): Back out change of Aug. 9th --
+ WordSearchDescription has disappeared from htword
+ interfaces. Should be restored when Loic comes back and can
+ suggest an alternative.
+
+Thu Aug 17 16:59:05 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (createURL): Get rid of extra "config="
+ parameter that was inserted before collections stuff.
+
+Thu Aug 17 15:47:58 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.cc: ask again for a document after a <NoHeader>
+ response is given by the HTTPRequest() method.
+
+Thu Aug 17 12:25:33 CEST 2000 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.*, htnet/Transport.* : fixed bug with HTTP/1.1 management.
+ Now the "Connection: close" directive is handled and force the connection
+ to be closed. So the bug has now been fixed. Fixed other minor bugs and
+ strings initializations.
+
+Tue Aug 15 00:24:33 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * contrib/multidig/Makefile, gen-collect, db.conf, multidig.conf:
+ Add missing trailing newlines as pointed out by Doug Moran
+ <dmoran at dougmoran.com>.
+
+ * contrib/multidig/Makefile (install): Make sure scripts have a+x
+ permissions. Pointed out by Doug Moran.
+
+ * contrib/multidig/new-collect: Fix typo to ensure MULTIDIG_CONF
+ is set correctly.
+
+Sun Aug 13 23:17:30 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Server.h, htdig/Server.cc (Server): Add support for
+ per-server user_agent configuration.
+
+ * htdig/Document.cc (Retrieve): Ditto.
+
+ * httools/htpurge.cc (purgeDocs): Set remove_* attributes on a
+ per-server basis.
+
+ * htcommon/defaults.cc: Fix remove_bad_urls and
+ remove_unretrieved_urls to point to htpurge and not htmerge.
+
+Sat Aug 12 23:03:32 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/cf_generate.pl (html_escape): Fix mindless thinko with
+ perl stringwise-equal operator. Documentation is now generated
+ with block: portion appropriate to defaults.cc.
+
+ * htdoc/attrs.html, cf_by{name,prog}.html: Reran cf_generate.pl.
+
+Fri Aug 11 16:03:18 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (parse): fix problem with &amp; not being translated.
+
+Fri Aug 11 10:48:54 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (setVariables), htcommon/defaults.cc: Added
+ maximum_page_buttons attribute, to limit buttons to less than
+ maximum_pages. Fixes PR#731 & PR#781.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Wed Aug 9 23:04:39 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htmerge.cc (mergeDB): Add fix to prevent duplicate
+ documents when you merge a database with a copy of itself
+ contributed by Lorenzo.
+
+Wed Aug 9 22:58:39 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/parser.cc (score): Merged in patch contributed by
+ Lorenzo Campedelli <lorenzo.campedelli at libero.it> and Arthur
+ Prokosch <prokosch at aptima.com> to fix problems with AND operators
+ and phrase matches.
+
+Wed Aug 2 11:44:11 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (setVariables), htcommon/defaults.cc: Enhanced
+ build_select_lists attribute, to generate not only single-choice
+ select lists, but also select multiple lists, radio button lists
+ and checkbox lists. Added explanation and examples in documentation.
+ * htdoc/hts_selectors.html: Added detailed explanation of new feature.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Tue Aug 1 21:50:22 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/ParseTree.cc (Parse): Fix problems with token
+ comparisons and fix thinko with HtWordToken parsing--previously
+ didn't advance the parse step at all.
+
+ * htsearch/*ParseTree.cc (Parse): Fix thinko with HtWordToken as
+ above--here it acted as an infinite loop.
+
+ * htdig/ExternalParser.cc (parse): Add shell quoting around
+ content-type. Hard to exploit, but a server could potentially
+ return a strange value that could then be exectuted locally.
+
+Thu Jun 29 23:33:51 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/ParseTree.h, htsearch/ParseTree.cc: New parent class
+ for the new htsearch framework. Still needs work.
+
+ * htsearch/*ParseTree.*: Derived classes appropriate to the method
+ indicated.
+
+ * htsearch/parsetest.cc: New program to alllow initial
+ command-line testing of ParseTree classes.
+
+ * htsearch/Makefile.am, htsearch/Makefile.in: Build parsetest in
+ addition to htsearch. Eventually, parsetest is probably best
+ modified slightly and moved into the tests directory.
+
+Tue Jun 20 22:29:57 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htmerge.cc (mergeDB): Merge in patch contributed by
+ Lorenzo Campedelli <lorenzo.campedelli at libero.it> to greatly
+ reduce memory usage.
+
+Sun Jun 18 13:15:43 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Object.h (class Object): Fix problems with retrieval order
+ by insuring the compare() method is declared const.
+
+Tue Jun 13 22:57:10 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (GetLocal): Fix bug that would cause a
+ coredump when local_urls was used and local_default_docs was
+ needed. The list of default filenames was freed before it should
+ have been.
+
+Tue Jun 13 19:30:28 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/HtWordReference.h, htcommon/HtWordReference.cc (Load,
+ LoadHeaders): New methods to check the header of an ASCII
+ representation and read it in.
+
+ * htcommon/HtWordList.h, htcommon/HtWordList.cc (Load): Add load
+ method to read in data. Calls the new methods above.
+
+ * httools/htload.cc: Open word databases read-write and call
+ HtWordList::Load().
+
+Sun Jun 11 14:39:28 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc (generateStars): Fix problem when maxScore
+ == minScore as reported by Rajendra. Fixed problem PR#858.
+ (displayMatch): Ditto.
+
+ * htsearch/htsearch.cc: Fix memory corruption problem in reporting
+ syntax errors pointed out by Rajendra. Fixes PR#860.
+
+Thu Jun 8 09:31:15 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Accents.h, htfuzzy/Accents.cc: Apply Robert Marchand's
+ patch to his algorithm. Gets rid of writeDB function (falls back
+ on default one in Fuzzy.cc), changes addWord, and adds a new
+ getWords function to override default. These avoid overhead of
+ unaccented forms of words in accents database, but ensure that
+ unaccented form of search word is always searched.
+
+Thu Jun 8 09:00:02 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/DocumentRef.h(DocScore, docScore),
+ htsearch/ResultMatch.cc(ScoreMatch::compare),
+ htsearch/ResultMatch.h(setScore, getScore, score),
+ htsearch/Display.cc(displayMatch, generateStars, buildMatchList):
+ Apply Terry Luedtke's patch for score calculations, to calculate
+ min & max from log(score).
+
+Thu Jun 8 08:47:03 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/doc2html/doc2html.pl: Apply David Adams' fix for missing
+ quote.
+
+Wed Jun 07 10:53:53 2000 Loic Dachary <loic at senga.org>
+
+ * db/db.c (CDB___db_dbenv_setup): open mode is 0666 instead
+ of 0 otherwise the weakcmpr file is not open with the proper
+ mode.
+
+Tue Jun 6 23:48:48 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htpurge.cc: Fix coredump problems by passing
+ dictionaries as pointers rather than full objects (this is
+ preferred anyway).
+
+Sun Jun 4 22:17:14 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * test/t_htdig_local: Added test for local filesystem support.
+
+ * test/config/htdig.conf2.in: Change to be a config file for
+ local_urls testing.
+
+ * test/Makefile.am: Add t_htdig_local to list.
+
+Tue May 30 23:52:45 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htmerge.cc: Move to httools directory, remove "cleanup"
+ functionality now in htpurge and merge in htmerge.h and db.cc files.
+
+ * httools/Makefile.am: Add htmerge now moved to this directory.
+
+ * */Makefile.in: Update with automake.
+
+ * Makefile.am (SUBDIRS): Remove htmerge, now found in httools.
+
+ * configure.in: Ditto.
+
+ * configure: Update with autoconf.
+
+ * test/test_functions.in: Add paths for htpurge, htstat, htload,
+ htdump and update path for htmerge.
+
+ * test/t_htdig: Change htmerge to htpurge to clean out incorrect URLs.
+
+ * installdir/rundig: Change htmerge to htpurge. This needs serious
+ additional cleanup for use in 3.2 since many conventions have changed!
+
+Tue May 23 22:21:14 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * README: Fix for 3.2.0b3 and clean up organization a bit for new
+ directory structure.
+
+Wed May 17 23:22:31 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Add support for TITLE attributes in
+ anchor and related tags.
+
+Fri May 12 17:54:09 2000 Loic Dachary <loic at senga.org>
+
+ * db/acinclude.m4: bigfile support is disabled by default.
+
+ * db/mp_region.c (CDB___memp_close): clear weakcmpr pointer
+ when closing region so that memory pool files are not
+ released twice.
+
+Wed May 10 22:26:21 2000 Loic Dachary <loic at senga.org>
+
+ * */*.cc: all include htconfig.h
+
+ * htlib/HtTime.h: remove htconfig.h inclusion (never in headers)
+
+ * htlib/*.h,*.cc: Fix copyright GNU Public -> Gnu General Public
+ and 1999, 2000 instead of 1999.
+
+Tue May 09 16:38:07 2000 Loic Dachary <loic at senga.org>
+
+ * htsearch/Collection.cc (Collection): set searchWords and
+ searchWordsPattern to null in constructor. Delete in destructor.
+ Also delete matches in destructor.
+
+ * test/word.cc (doskip_harness): free cursor after use.
+
+ * test/word.cc (doskip_overflow): free cursor after use.
+
+ * test/dbbench.cc (find): free cursor after use.
+
+ * htsearch/htsearch.cc (main): free searchWords and searchWordsPattern
+ after usage.
+
+ * htdb/htdb_{load,dump,stat}.cc (main): call WordContext::Finish
+ to free global context for inverted index.
+
+ * htdb/htdb_stat.cc (btree_stats): free stat structure.
+
+ * htlib/List.h (class List): Add Shift/Unshift/Push/Pop methods.
+
+ * htlib/List.h (class List): Add Remove(int position) method.
+
+Tue May 09 00:22:33 2000 Loic Dachary <loic at senga.org>
+
+ * htsearch/htsearch.cc (main): kill useless call to
+ StringList::Release
+
+ * htsearch/HtURLSeedScore.cc (ScoreAdjustItem): remove useless
+ call to StringList::Destroy.
+
+ * htlib/HtWordCodec.cc (HtWordCodec): Fix usage of StringList
+ that was inserting pointers to volatile strings instead of
+ permanent copies. I suspect that the tweak on StringList was
+ primarily done to satisfy this piece of code. After reviewing
+ all the usage of StringList, it's the only one to use it in this
+ fashion.
+
+ * htlib/QuotedStringList.h (class QuotedStringList): remove
+ noop destructor to enable Destroy of the underlying StringList
+ when deleted.
+
+Mon May 08 18:17:02 2000 Loic Dachary <loic at senga.org>
+
+ * htlib/StringList.h (class StringList): change methods
+ Add/Insert/Assign that were copying the String* given in argument.
+ This behaviour is confusing since it has a different semantic
+ than the base class List.
+
+Mon May 08 17:16:00 2000 Loic Dachary <loic at senga.org>
+
+ * htdig/Retriever.cc (GetLocal): fix leaked defaultdocs
+
+Mon May 08 04:27:47 2000 Loic Dachary <loic at senga.org>
+
+ * htlib/StringList.cc (Create): remove SRelease. Deleting
+ the strings is taken care of by the destructor thru
+ Destroy. If destruction of the Strings is not desirable
+ Release should be used. SRelease was added apparently after
+ a virtual constructor doing nothing was added to hide the
+ default call to Destroy therefore leaking memory.
+
+Mon May 08 01:28:25 2000 Loic Dachary <loic at senga.org>
+
+ * test/txt2mifluz.cc,word.cc,search.cc: fix minor memory leaks.
+
+Sun May 07 19:24:12 2000 Loic Dachary <loic at senga.org>
+
+ * Makefile.config (HTLIBS): add libht at end because htdb
+ now depends on htlib.
+
+ * configure.in,htlib/Makefile.am: use LTLIBOBJS as suggested
+ by the libtool documentation.
+
+Sun May 07 17:09:22 2000 Loic Dachary <loic at senga.org>
+
+ * test/Makefile.am (clean-local): clean conf to prevent
+ inconsistencies when re-configuring in a directory that
+ is not the source directory.
+
+Sun May 07 05:07:23 2000 Loic Dachary <loic at senga.org>
+
+ * db/mkinstalldir,test/benchmark: Add for installation purpose
+
+Sun May 07 02:17:03 2000 Loic Dachary <loic at senga.org>
+
+ * Makefile.am (distclean-local): Xtest instead of test
+ that confuse some shells.
+
+Sun May 07 02:02:46 2000 Loic Dachary <loic at senga.org>
+
+ * htword/WordDB.cc: Move Open to WordDB.cc.
+
+Sun May 07 01:32:47 2000 Loic Dachary <loic at senga.org>
+
+ * test/t_*: check/fix scripts. All regression tests pass
+ on RedHat-6.2.
+
+Sun May 07 00:54:30 2000 Loic Dachary <loic at senga.org>
+
+ * */*.cc: fix warnings and large file support inclusion
+ files on Solaris.
+
+Sat May 06 21:55:58 2000 Loic Dachary <loic at senga.org>
+
+ * test/: import regression tests from mifluz
+
+ * htlib/DB2_db.cc (db_init): fix flags used when creating the
+ environment to include a memory pool.
+
+ * htcommon/defaults.cc: change wordkey_description format.
+ update all wordlist_* attributes
+
+Sat May 06 04:46:03 2000 Loic Dachary <loic at senga.org>
+
+ * htmerge/words.cc (mergeWords): WordSearchDescription becomes
+ WordCursor.
+
+ * httools/htpurge.cc (purgeWords): WordSearchDescription becomes
+ WordCursor.
+
+Sat May 06 02:01:40 2000 Loic Dachary <loic at senga.org>
+
+ * htdb/*: upgrade to Berkeley DB 3.0.55. Very different.
+
+ * htlib/getcwd.c,memcmp.c,memcpy.c,memmove.c,raise.c,snprintf.c,
+ strerror.c,vsnprintf.c,clib.h: Add compatibility support
+
+ * htcommon/DocumentDB.cc (LoadDB): remove unused variable
+
+ * htlib/DB2_db.cc: adapt to Berkeley DB 3.0.55 syntax.
+
+ * htlib/Database.h (class Database): remove DB_INFO, does
+ not exist in Berkeley DB 3.0.55
+
+ * htlib/*: run ../db/prefix-symbols.sh
+
+ * Makefile.config (INCLUDES): fix db include dirs
+
+ * acconfig.h: Big file support + replacement functions
+
+ * acinclude.m4,configure.in : db instead of db/dist + bug fixes
+
+Fri May 5 08:33:59 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * db/*: Merge in changes from Loic's mifluz tree. This will break
+ everything, but Loic promises he'll fix it ASAP after I make this
+ change.
+
+Mon Apr 24 21:58:22 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/htdig.cc (main): Make the -l stop & restart mode the
+ default. This will catch signals and quit gracefully. The
+ command-line parser will still accept -l, it will just ignore it.
+ (usage): Remove -l portion.
+ (main): Fix -m option to read in a file as it's
+ supposed to do! Also set max_hops correctly so really only indexes
+ the URLs in that file.
+
+ * htdoc/htdig.html: Remove -l from documentation since it's now
+ the default.
+
+Mon Apr 24 21:22:53 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Server.cc (push): Fix bug where changes in the robots.txt
+ would be ignored. If a URL was indexed and later the robots.txt
+ changed to forbid it, the URL would still be updated.
+
+Wed Apr 19 22:13:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * Merging in changes from mifluz 0.14 from Loic.
+
+ * htlib/Configuration.cc (Read): Removed dependency on fstream.h,
+ use fopen, fprintf, fgets, fclose instead of iostream.
+
+ * htlib/HtPack.cc, htlib/HtVectorGeneric.h, htlib/Object.h,
+ htlib/ParsedString.cc, htlib/String.cc: Remove use of cerr,
+ instead use fprintf(stderr ...).
+
+ * htlib/Dictionary.cc, htlib/HtVectorGeneric.cc, htlib/List.cc,
+ htlib/Object.cc, htlib/StringList.cc, htlib/htString.h,
+ htlib/strcasecmp.cc: Add #ifdef blocks for htconfig.h
+
+Wed Apr 12 19:09:40 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * .version: Bump to 3.2.0b3.
+
+ * htdoc/htload.html, htdoc/htpurge.html, htdoc/htstat.html: Fix
+ typos in headers.
+
+ * htdoc/main.html: Fix link to download to actually point to 3.2.0b2.
+
+Tue Apr 11 00:21:48 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc (setupWords): Does not apply fuzzy
+ algorithms to phrase queries. This helps prevent the infinite
+ loops described on the mailing list.
+
+ * htcommon/conf_parser.yxx (list): Add conditions for lists
+ starting with string-number, number-string, and number-number.
+
+ * htcommon/conf_parser.cxx: Regenerate using bison.
+
+ * htdoc/RELEASE.html: Update release notes for recent bug fixes
+ and likely release date for 3.2.0b2.
+
+ * htdoc/main.html: Add a blurb about the 3.2.0b2 release.
+
+ * htdoc/*.html: Remove author notes in the footer as requested by
+ Andrew. To balance it out, the copyright notice at the top links
+ to THANKS.html.
+
+Sun Apr 9 15:21:12 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/conf_parser.yxx (list): Fix problem with
+ build_select_lists--parser didn't support lists including numbers.
+
+ * htcommon/conf_parser.cxx: Regenerate using bison.
+
+Sun Apr 9 12:53:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/RELEASE.html: Add a first draft of 3.2.0b2 release notes.
+
+Sun Apr 9 12:31:13 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/Makefile.am, httools/Makefile.in: Add htload to
+ compilation list.
+
+ * htcommon/DocumentDB.h: Add optional verbose options to DumpDB
+ and LoadDB.
+
+ * htcommon/DocumentDB.cc (LoadDB): Implement loading and parsing
+ an ASCII version of the document database. Records on disk will
+ replace any matching records in the db.
+ (DumpDB): Add all fields in the DocumentRef to ensure the entire
+ database is written out.
+
+ * htcommon/DocumentRef.h: Add new method for setting DocStatus
+ from an int type.
+
+ * htcommon/DocumentRef.cc (DocStatus): Set it using a switch
+ statement. (It's not pretty, but it works.)
+
+ * httools/htload.cc: New file. Loads in ASCII versions of the
+ databases, replacing existing records if found.
+
+ * httools/htdump.cc: Pass verbose flags to DumpDB method. Make
+ sure to close the document DB before quitting.
+
+ * httools/htpurge.cc: Add -u option to specify a URL to purge from
+ the command-line.
+
+ * httools/htstat.cc: Add -u option to output the list of URLs in
+ the document DB as well.
+
+Sat Apr 8 16:35:55 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Change all <b>, <i>, and <tt> tags to the
+ HTML-4.0 compliant <strong>, <em>, and <code> tags.
+
+ * installdir/long.html, installdir/header.html,
+ installdir/nomatch.html, installdir/syntax.html,
+ installdir/wrapper.html: Ditto.
+
+ * htdoc/*.html: Ditto. (Don't you just love sed?)
+
+ * htsearch/TemplateList.cc (createFromString): Ditto.
+
+ * htdoc/htpurge.html, htdoc/htdump.html, htdoc/htload.html,
+ htdoc/htstat.html: New files documenting usage of httools
+ programs.
+
+ * htdoc/contents.html: Add links to above.
+
+ * htdoc/htdig.html: Update table with -t format to match htdump.
+
+Fri Apr 7 00:30:01 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * README: Update to mention 3.2.0b2 and use correct copyright. (It
+ is 2000 after all!)
+
+ * htdoc/FAQ.html, htdoc/where.html, htdoc/uses.html,
+ htdoc/isp.html: Update with most recent versions from maindocs.
+
+ * htdoc/RELEASE.html: Add release notes for 3.1.5 to the
+ top. (It's out of version ordering, but it is in correct
+ chronological order.)
+
+Fri Apr 7 00:11:29 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htpurge.cc (main): Read in URLs from STDIN for purging,
+ one per line. Pass them along to purgeDocs for removal. Also, make
+ discard_list into a local variable and pass it from purgeDocs to
+ purgeWords.
+ (purgeDocs): Accept a hash of URLs to delete (user input) and
+ return the list of doc IDs deleted.
+ (usage): Note the - option to read in URLs to be deleted from STDIN.
+
+Thu Apr 6 00:10:23 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (got_redirect): Allow the redirect to accept
+ relative redirects instead of just full URLs.
+
+Wed Apr 5 15:07:52 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc: Added #if test to make sure DBL_MAX is
+ defined on Solaris, as reported by Terry Luedtke.
+
+Tue Apr 4 12:46:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/doc2html/*: Added parser submitted by D.J.Adams at soton.ac.uk
+
+Mon Apr 3 13:48:59 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Fix error in description of new attribute
+ plural_suffix.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Fri Mar 31 21:48:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, configure: Add test using AC_TRY_RUN to compile
+ against the htlib/regex.c and attempt to compile a regexp. This
+ should allow us to find out if the included regex code causes
+ problems.
+
+ * acconfig.h: Add HAVE_BROKEN_REGEX as a result of the configure
+ script to conditionally include the appropriate regex.h file.
+
+ * include/htconfig.h.in: Regenerate using autoheader.
+
+ * htlib/regex.c: Move #include "htconfig.h" inside HAVE_CONFIG_H
+ tests. This file is only created when this is true anyway. This
+ prevents problems with the configure test.
+
+ * htlib/HtRegex.h, htfuzzy/EndingsDB.cc: Use HAVE_BROKEN_REGEX
+ switch to use the system include instead of the local include
+ where appropriate.
+
+ * htlib/Makefile.am, htlib/Makefile.in: Only compile regex.lo if
+ the configure script added it to LIBOBJS.
+
+Thu Mar 30 22:41:38 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/URL.cc (normalizePath): Remove Gilles's loop to add
+ back ../ components to a path that would go above the top
+ level. Now we simply discard them. Both are allowed under the RFC,
+ but this should have fewer "surprises."
+
+Tue Mar 28 21:57:49 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Connection.cc (Read_Partial): Fix bug reported by Valdas
+ where a zero value returned by select would result in an infinite
+ loop.
+
+ * htcommon/defaults.cc: Add new attribute plural_suffix to set the
+ language-dependent suffix for PLURAL_MATCHES contributed by Jesse.
+
+ * htsearch/Display.cc (setVariables): Use it.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Mon Mar 27 22:28:20 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentRef.cc (Deserialize): Add back stub for
+ DOC_IMAGESIZE to prevent decoding errors. This just throws away
+ that field.
+
+ * htcommon/HtSGMLCodec.h (class HtSGMLCodec): Differentiate
+ between codec used for &foo; and numeric form &#nnn; Make sure
+ encoding goes through both but decoding only goes through the
+ preferred text form.
+
+ * htcommon/HtSGMLCodec.cc (HtSGMLCodec): When constructing the
+ private HtWordCodec objects, create separate lists for the number
+ and text codecs.
+
+Mon Mar 27 21:25:27 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/HtURLSeedScore.cc (ScoreAdjustItem): Change to use
+ HtRegex for flexibility and to get around const char * -> char *
+ problems.
+
+ * htsearch/SplitMatches.cc (MatchArea): Ditto.
+
+ * htsearch/Makefile.am, htsearch/Makefile.in: Add SplitMatches.cc
+ and HtURLSeedScore.cc to compilation list!
+
+Mon Mar 27 21:03:12 2000 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htcommon/defaults.cc (defaults): Add default for
+ search_results_order, url_seed_score.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerated using cf_generate.pl.
+
+ * htlib/List.h (List): New method AppendList.
+ * htlib/List.cc (List::AppendList): Implement it.
+
+ * htsearch/SplitMatches.h, htsearch/SplitMatches.cc: New.
+
+ * htsearch/HtURLSeedScore.cc, HtURLSeedScore.h: New.
+
+ * htsearch/Display.h (class Display: Add member minScore.
+ Change maxScore type to double.
+
+ * htsearch/Display.cc: Include SplitMatches.h and HtURLSeedScore.h
+ (ctor): Initialize minScore, change init value for
+ maxScore to -DBL_MAX.
+ (buildMatchList): Use a SplitMatches to hold search results and
+ interate over its parts when sorting scores.
+ Ignore Count() of matches when setting minScore and maxScore.
+ Use an URLSeedScore to adjust the score after other calculations.
+ Calculate minScore.
+ Correct maxScore adjustment for change to double.
+ (displayMatch): Use minScore in calculation of score to adjust for
+ negative scores.
+ (sort): Calculation of maxScore moved to buildMatchList.
+
+Mon Mar 27 20:22:24 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc: Remove
+ DocImageSize field since it is not used anywhere and is never updated.
+
+ * htdig/Retriever.h (class Retriever): Remove references to Images class.
+
+ * htcommon/DocumentDB.cc (DumpDB): Ignore DocImageSize field.
+
+ * htdig/Makefile.am, htdig/Makefile.in: Remove Images.cc since
+ this is no longer used.
+
+ * htdig/Plaintext.cc: Do not insert SGML equivalents into the
+ excerpt, these are decoded by HtSGMLCodec automatically.
+
+Sat Mar 25 21:58:36 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/cf_generate.pl (html_escape): Changed <b></b> and <i></i>
+ tags to HTML 4.0 <strong> and <em> tags.
+
+Sat Mar 25 17:23:46 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdb/Makefile.am, htdb/Makefile.in: Change the names of the htdb
+ utility programs to escape name conflicts with httool programs.
+
+ * htdb/htdb_load.cc: Rename htload.cc to escape name conflict and
+ more closely match orignal db_load program name.
+
+ * htdb/htdb_dump.cc, htdb/htdb_stat.cc: Ditto.
+
+ * htfuzzy/Prefix.cc (getWords): Add code to "weed out" duplicates
+ returned from WordList::Prefix. We only want to add unique words
+ to the search list.
+
+Fri Mar 24 22:33:20 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc (Document): Fix bug reported by Mentos
+ Hoffman, contributed by Atlee Gordy <agordy at moonlight.net>.
+
+Mon Mar 20 23:14:26 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentDB.cc (Delete): Fix bug reported by Valdas
+ where duplicate document records could "sneak in" because the
+ doc_index entry was removed incorrectly.
+
+Mon Mar 20 19:08:14 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Added block field and added appropriate blocks.
+
+ * htlib/Configuration.h (struct ConfigDefaults): Add block field.
+
+ * htdoc/cf_generate.pl: Parse the new block field.
+
+ * htdoc/cf_byname.html, htdoc/cf_byprog.html, htdoc/attrs.html:
+ Regenerate using above.
+
+ * htcommon/DocumentDB.cc (DumpDB): Make sure we decompress the
+ DocHead field before we write it to disk!
+
+ * httools/htdump.cc, httools/htstat.cc: Call
+ WordContext::Initialize() before doing any htword calls.
+
+Mon Mar 20 14:10:30 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/htpurge.cc: Whoops! Left some references to htmerge in
+ the error messages and usage message.
+
+ * httools/htstat.cc: New program. Simply spits up the total number
+ of documents, words and unique words in the databases.
+
+ * httools/htdump.cc: New program. Simply dumps the contents of the
+ document DB and the word DB to doc_list and word_dump files
+ respectively. Also has flags -w and -d to pick one or the other.
+
+ * httools/Makefile.am, httools/Makefile.in: Add htdump and htstat
+ programs to compilation list.
+
+ * htcommon/DocumentDB.cc (DumpDB): Change name of CreateSearchDB
+ and add fields for DocBackLinks, DocSig, DocHopCount, DocEmail,
+ DocNotification, and DocSubject. This should now export every
+ portion of the document DB.
+
+ * htcommon/DocumentDB.h: Change name of CreateSearchDB and add
+ stub for LoadDB, to be written shortly.
+
+ * htdig/htdig.cc: Call DumpDB instead of CreateSearchDB when
+ creating an ASCII version of the DB.
+
+Sat Mar 18 22:57:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * httools/Makefile.am, httools/Makefile.in: New directory for
+ useful database utilities.
+
+ * httools/htnotify.cc: Moved htnotify to httools directory.
+
+ * httools/htpurge.cc: New program--currently just purges documents
+ (and corresponding words) in the databases. Will shortly also
+ allow deletion of specified URLs.
+
+ * Makefile.am, configure.in: Remove htnotify directory in favor of
+ httools directory.
+
+ * configure: Regenerate using autoconf.
+
+ * Makefile.in: Regenerate using automake --foreign.
+
+Fri Mar 17 16:47:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (excerpt, hilight): Correctly handle case
+ where there is no pattern to highlight.
+ * htsearch/htsearch.cc (addRequiredWords), htcommon/defaults.cc:
+ Add any_keywords attribute, to OR keywords rather than ANDing,
+ fix addRequiredWords not to mess up expression when there are
+ no search words, but required words are given.
+ * htdoc/hts_form.html: Mention new attribute, add links to all
+ mentioned attributes.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Fri Mar 17 15:48:12 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Accents.cc (generateKey): Truncate words to
+ maximum_word_length, for consistency with what's found in word DB.
+
+Fri Mar 17 10:56:17 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Use case insensitive parsing of META
+ robots tag content.
+ * htlib/String.cc (uppercase): Fix misplaced cast for islower().
+
+Mon Mar 6 17:31:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc (setupWords): Don't allow comma as string
+ list separator, as it can be a decimal point in some locales.
+
+Mon Mar 06 00:58:00 2000 Loic Dachary <loic at ceic.com>
+
+ * db/mp/mp_bh.c (__memp_bhfree): always free the chain, if
+ any. The bh is reset to null after free and we loose the
+ pointer anyway, finally filling the pool with it.
+
+ * db/mp/mp_cmpr.c (__memp_cmpr_write): i < CMPR_MAX - 1 instead of
+ i < CMPR_MAX otherwise go beyond array limits. This fixes a
+ major problem when handling large files.
+
+Sat Mar 04 19:41:49 2000 Loic Dachary <loic at ceic.com>
+
+ * db/mp/mp_cmpr.c (__memp_cmpr_free_chain): clear BH_CMPR
+ flag. Was causing core dumps, thanks to
+ Peter Marelas maral at phase-one.com.au for providing
+ a simple case to reproduce the error.
+
+Fri Mar 3 11:32:34 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * Fixed bugs regarding yesterday's changes. Even Leonardo da Vinci
+ used to commit errors, so ...
+
+Fri Mar 3 11:25:42 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * testnet.cc: added the -r and -w options in order to set how many
+ times it retries to re-connect after a timeout occurs, and how long
+ it should wait after it.
+
+Thu Mar 2 18:45:15 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/Connection.*: management of wait_time and number of retries
+ after a timeout occurs.
+
+ * htnet/Transport.*: Management of connection attributes above.
+
+ * htdig/Server.*: Set members for managing timeout retries taken from
+ the configuration file ("timeout", "tcp_max_retries", "tcp_wait_time").
+
+ * htdig/Document.cc: Added the chance to configure on a server basis
+ "persistent_connections", "head_before_get", "timeout",
+ "tcp_max_retries", "tcp_wait_time". Changed Retrieve method accepting
+ now a server object pointer: Retrieve (server*, HtDateTime).
+
+ * htdig/Retriever.cc: Added the chance to configure on a server basis
+ "max_connection_requests" attribute.
+
+ * htcommon/defaults.cc: Added "tcp_max_retries", "tcp_wait_time" -- Need
+ to be go over by someone who speaks english better than me. Not a hard
+ work !!! ;-)
+
+Wed Mar 1 17:01:09 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc (excerpt, hilight): move SGML encoding into
+ hilight() function, because when it's done earlier it breaks
+ highlighting of accented characters.
+
+Wed Mar 1 16:02:49 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/htfuzzy.cc (main): Correctly test return value on Open()
+ of word database, include db name in error message if Open() fails,
+ do a WordContext::Initialize() before we need htword functions.
+ (Obviously I'm the first to test htfuzzy in 3.2!)
+ * htfuzzy/Accents.cc (generateKey): cast characters to unsigned char
+ before using as array subscripts.
+
+Wed Mar 1 13:27:26 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Added accents_db attribute, mentioned accents
+ algorithm in search_algorithms section.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+ * installdir/htdig.conf: Added mentions of accents, speling & substring,
+ fixed a couple typos in comments.
+ * htdoc/htfuzzy.html: Added blurb on accents algorithm.
+ * htdoc/require.html: Added mentions of accents, speling, substring,
+ prefix & regex.
+ * htdoc/config.html: Updated with sample of latest htdig.conf and
+ installdir/*.html, added blurb on wrapper.html.
+
+Wed Mar 1 00:30:19 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, configure: Add test for FD_SET_T, the second (also
+ third and fourth) argument in calls to select(). Should solve PR#739.
+
+ * acconfig.h, include/htconfig.h.in: Add declaration for FD_SET_T.
+
+ * htnet/Connection.cc (ReadPartial): Change declaration of fds to
+ use FD_SET_T define set by the configure script.
+
+Tue Feb 29 23:11:49 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/DB2_db.cc (Error): Simply fprint the error message on
+ stderr. This is not a method since the db.h interface expects a C
+ function.
+ (db_init): Don't set db_errfile, instead set errcall to point to
+ the new Error function.
+
+Tue Feb 29 15:09:41 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Accents.h, htfuzzy/Accents.cc: Adapted writeDB() for 3.2.
+
+Tue Feb 29 14:29:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Accents.h, htfuzzy/Accents.cc: Added these, as contributed
+ by Robert Marchand, to implement accents fuzzy match. Adapted to 3.2.
+ * htfuzzy/Fuzzy.cc, htfuzzy/htfuzzy.cc, htfuzzy/Makefile.am,
+ htfuzzy/Makefile.in: Added in accents algorithm, as for soundex.
+
+Tue Feb 29 11:31:53 2000 Loic Dachary <loic at ceic.com>
+
+ * test/testnet.cc (Listen): Add -b port to listen to a specific
+ port. This is to test connect timeout conditions.
+
+ * htnet/Connection.cc (Connect): Added SIGALRM signal handler,
+ Connect() always allow EINTR to occur.
+
+Mon Feb 28 15:32:46 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h (class WordKey): explicitly add inline keyword
+ for all inline functions.
+
+Mon Feb 28 13:10:34 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h (class WordKey): nfields data member caches
+ result of NFields() method.
+
+ * htword/WordDBPage.h (class WordDBPage): nfields data member caches
+ result of WordKey::NFields() method.
+
+ * acinclude.m4 (APACHE): check in lib/apache for modules
+
+Sat Feb 26 22:05:03 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Collection.h, htsearch/Collection.cc: New files
+ contributed by Rajendra Inamdar <inamdar at beasys.com>.
+
+ * htsearch/Makefile.am, htsearch/Makefile.in: Compile them.
+
+ * htcommon/defaults.cc: Add new collection_names attribute as
+ described by Rajendra.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+ * htsearch/Display.h, htsearch/Display.cc: Loop through
+ collections as we are assembling results.
+ (buildMatchList): Use 1.0 as minimum score and take log(score) as
+ the final score. This requires an increase in magnitude in weight
+ to correspond to a factor of increase in score.
+
+ * htsearch/DocMatch.h, htsearch/DocMatch.cc: Keep track of the
+ collection we're in.
+
+ * htsearch/ResultMatch.h: Ditto.
+
+ * htsearch/htsearch.h, htsearch/htsearch.cc: Wrap results in
+ collections.
+
+ * htsearch/parser.h, htsearch/parser.cc: Set the collection for
+ the results--we use this to get to the appropriate word DB.
+ (score): Divide word weights by word frequency to calibrate for
+ expected Zipf's law. Rare words should count more.
+
+Fri Feb 25 11:19:47 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (maximum_pages): Describe new bahaviour (as of
+ 3.1.4), where this limits total matches shown.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Thu Feb 24 14:43:06 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtFile.cc (Request): Fix silly typo.
+
+ * htlib/DB2_db.cc: Remove include of malloc.h, as it causes problems
+ on some systems (e.g. Mac OS X), and all we need should be in stdlib.h.
+
+Thu Feb 24 13:11:15 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnet/HtFile.cc (Request): Don't append more than _max_document_size
+ bytes to _contents string, set _content_length to size returned by
+ stat().
+ * htnet/HtHTTP.cc (HTTPRequest): Extra tests in case Content-Length
+ not given for non-chunked input, and not to close persistent
+ connection when chunked input exceeds _max_document_size.
+ (ReadChunkedBody): Don't append more than _max_document_size bytes
+ to _contents string.
+
+Thu Feb 24 11:40:24 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Fix handling of img alt text to be consistent
+ with body text, rather than keywords.
+ * htdig/Retriever.cc (ctor): Treat alt text as plain text, until it has
+ its own FLAG and factor.
+
+Thu Feb 24 11:16:37 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (version): Moved example over to correct field.
+ (defaults[] terminator): Padded zeros to new number of fields.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Thu Feb 24 19:08:41 2000 Loic Dachary <loic at ceic.com>
+
+ * htmerge/words.cc: only display Word in verbose message instead
+ of complete key if verbosity < 3.
+
+Thu Feb 24 10:43:12 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (external_protocols, external_parser):
+ Swapped these two entries to put them in alphabetical order.
+ (star_blank): Fixed old typo (incorrect reference to image_star).
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Wed Feb 23 16:53:40 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc (backlink_factor, external_parser,
+ local_default_doc, local_urls, local_urls_only, local_user_urls):
+ Add some updates from 3.1.5's attrs.html.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Wed Feb 23 15:11:51 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ [ Improve htsearch's HTML 4.0 compliance ]
+ * htsearch/TemplateList.cc (createFromString): Use file name rather
+ than internal name to select builtin-* templates, use $&(TITLE) and
+ $&(URL) in templates and quote HTML tag parameters.
+ * installdir/long.html, installdir/short.html: Use $&(TITLE) and
+ $&(URL) in templates and quote HTML tag parameters.
+ * htsearch/Display.cc (setVariables): quote all HTML tag parameters
+ in generated select lists.
+ * installdir/footer.html, installdir/header.html,
+ installdir/nomatch.html, installdir/search.html,
+ installdir/syntax.html, installdir/wrapper.html:
+ Use $&(var) where appropriate, and quote HTML tag parameters.
+ * installdir/htdig.conf: quote all HTML tag parameters.
+
+Wed Feb 23 13:40:27 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/URL.h (encodeURL): Change list of valid characters to
+ include only unreserved ones.
+ * htcommon/cgi.cc (init): Allow "&" and ";" as input param. separators.
+ * htsearch/Display.cc (createURL): Encode each parameter separately,
+ using new unreserved list, before piecing together query string, to
+ allow characters like "?=&" within parameters to be encoded.
+
+Wed Feb 23 13:22:29 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/URL.cc (ServerAlias): Fix server_aliases processing to prevent
+ infinite loop (as for local_urls in PR#688).
+
+Wed Feb 23 12:49:52 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtDateTime.h, htlib/HtDateTime.cc: change Httimegm() method
+ to HtTimeGM(), to avoid conflict with Httimegm() C function, so we
+ don't need "::" override, for Mac OS X.
+ * htlib/htString.h, htlib/String.cc: change write() method to
+ Write(), to avoid conflict with write() function, so we don't need
+ "::" override, for Mac OS X.
+
+Wed Feb 23 12:17:46 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/Configuration.cc(Read): Fixed to allow final line without
+ terminating newline character, rather than ignoring it.
+
+Wed Feb 23 12:01:01 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (GetLocal, GetLocalUser): Add URL-decoding
+ enhancements to local_urls, local_default_urls & local_default_doc,
+ to allow hex encoding of special characters.
+
+Wed Feb 23 19:14:29 2000 Loic Dachary <loic at ceic.com>
+
+ * htcommon/conf_parser.cxx: regenerated from conf_parser.yxx
+
+Wed Feb 23 19:04:16 2000 Loic Dachary <loic at ceic.com>
+
+ * test/test_functions.in: inconditionaly remove existing test/var
+ directory before runing tests to prevent accidents.
+
+ * htcommon/URL.cc (URL): fixed String->char warning
+
+ * htcommon/defaults.cc (wordlist_compress): defaults to true
+
+Tue Feb 22 17:09:10 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc(parse, do_tag): Fix handling of <img alt=...> text
+ and parsing of words in meta tags, to to proper word separation.
+ * htlib/HtWordType.h, htlib/HtWordType.cc: Add HtWordToken() function,
+ to replace strtok() in HTML parser.
+
+Tue Feb 22 16:21:25 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/URL.cc (ctor, normalizePath): Fix PR#779, to handle relative
+ URLs correctly when there's a trailing ".." or leading "//".
+
+Tue Feb 22 14:09:26 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (RetrieveLocal): Handle common extensions for
+ text/plain, application/pdf & application/postscript.
+
+Mon Feb 21 17:25:21 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/htdig-3.2.0.spec: Fixed %post script to add more
+ descriptive entries in htdig.conf, made cron script a config file,
+ updated to 3.2.0b2.
+
+ * contrib/conv_doc.pl, contrib/parse_doc.pl: Added comments to show
+ Warren Jones's updates in change history.
+
+Mon Feb 21 17:09:13 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/HtConfiguration.h, htcommon/conf_parser.yxx,
+ htlib/Configuration.h, htlib/Configuration.cc: split Add() method
+ into Add() and AddParsed(), so that only config attributes get parsed.
+ Use AddParsed() only in Read() and Defaults().
+
+Fri Feb 18 22:50:54 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Connection.h, htnet/Connection.cc: Renamed methods with
+ capitals to remove the need to use ::-escaped library calls.
+
+ * htnet/Transport.h, htnet/Transport.cc, htnet/HtHTTP.cc,
+ htdig/Images.cc: Fix code using Connection to use the newly
+ capitalized methods.
+
+Fri Feb 18 14:40:50 2000 Loic Dachary <loic at ceic.com>
+
+ * test/conf/access.conf.in: removed cookies. Not used and some
+ httpd are not compiled with usertrack.
+
+Wed Feb 16 12:15:08 2000 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/Makefile.am replaced conf.tab.cc.h by conf_parser.h in
+ noinst_HEADERS
+
+ * htcommon/conf_parser.yxx,conf_parser.lxx,HtConfiguration.cc,
+ HtConfiguration.h: added copyright and Id:
+
+ * htcommon/cgi.cc(init): fixed bug: array must be free by
+ delete [] buf, not just delete buf;
+
+Tue Feb 15 23:16:14 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/HtHTTP.cc (isParsable): Remove application/pdf as a
+ default type--it is now handled through the ExternalParser
+ interface if at all.
+
+ * htcommon/defaults.cc: Remove pdf_parser attribute.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+ * htdig/Document.cc (getParsable): Remove PDF once and for all
+ (hopefully).
+
+ * htdig/ExternalParser.cc (parse): Ditto.
+
+ * configure.in: Remove check for PDF_PARSER.
+
+ * configure: Regenerate using autoconf
+
+ * htdig/Makefile.am: Remove PDF.cc and PDF.h.
+
+ * Makefile.in, */Makefile.in: Regenerate using automake --foreign
+
+Tue Feb 15 12:02:39 EET 2000 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/HtConfiguration.cc,HtConfiguration.h: fixed bug discovered
+ by Gilles. HtConfiguration was able to get info only from "url" and
+ "server" block.
+
+ * htcommon/conf_parser.yxx: deleted 1st parameter for new char[],
+ lefted when realloc was replaced by new char[]. Removed a few unused
+ variable declaration.
+
+ * htcommon/Makefile.am: added -d flag to bison to generate
+ conf_parser.h template from conf_parser.yxx;
+ conf_lexer.lxx uses #include conf_parser.h;
+ conf.tab.cc.h removed.
+
+Sun Feb 13 21:19:04 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Get rid of uncoded_db_compatible since
+ the current DB format has clearly broken backwards compatibility.
+
+ * htsearch/Display.cc (Display), htnotify/htnotify.cc (main),
+ htmerge/docs.cc (convertDocs), htmerge/db.cc (mergeDB),
+ htdig/htdig.cc (main): Remove call to DocumentDB::setCompatibility().
+
+ * htcommon/DocumentDB.h (class DocumentDB): Remove
+ setCompatibility and related private variable.
+
+ * htcommon/DocumentDB.cc ([], Delete): Don't bother checking for
+ an unencoded URL, at this point all URLs will be encoded using
+ HtURLCodec.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Sat Feb 12 21:29:20 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/HtSGMLCodec.cc (HtSGMLCodec): Always translate &quot;
+ &amp; &lt; and &gt;
+
+ * htcommon/defaults.cc: Remove translate_* and word_list
+ attributes since they're now no longer used.
+
+ * htdig/PDF.cc (parseNonTextLine): Fix bogus escape sequences
+ around Title parsing. Fixes PR#740.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Fri Feb 11 11:41:36 2000 Loic Dachary <loic at ceic.com>
+
+ * htlib/Makefile.am: removed CFLAGS=-g (use make CXXFLAGS=-g all
+ instead).
+
+ * htdoc/install.html: specify header/lib install directory now
+ is prefix/include/htdig and prefix/lib/htdig.
+
+ * Makefile.am (distclean-local): use TESTDIR instead of deprecated
+ HTDIGDIRS.
+
+ * */Makefile.am: install libraries in prefix/lib/htdig and
+ includes in prefix/include/htdig. Just prepend pkg in front of
+ automake targets.
+
+ * include/Makefile.am: install htconfig.h
+
+Thu Feb 10 23:18:37 2000 Loic Dachary <loic at ceic.com>
+
+ * Connection.cc (Connection): set retry_value to 1 instead of
+ 0 as suggested by Geoff.
+
+Thu Feb 10 17:36:09 2000 Loic Dachary <loic at ceic.com>
+
+ * htdig/Document.cc: fix (String)->(char*) conversion warnings.
+
+ * htword/WordList.cc: kill Collect(WordSearchDescription) which
+ was useless and error prone.
+
+ * htword/WordDB.h (WordDBCursor::Get): small performance improvement
+ by copying values only if key found.
+
+ * htword/WordDB.h,WordList.cc: fix reference counting bug when
+ using Override (+1 even if entry existed). Turn WordDB.h return
+ values to be std Berkeley DB fashion instead of the mixture with
+ OK/NOTOK that was a stupid idea. This allows to detect Put errors
+ and handle them properly to fix the Override bug without performance
+ loss.
+
+ * test/conf/httpd.conf.in: comment out loading of mod_rewrite
+ since not everyone has it.
+
+Thu Feb 10 00:26:02 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Add new attribute "nph" to send out
+ non-parsed headers for servers that do not supply HTTP headers on
+ CGI output (e.g. IIS).
+
+ * htsearch/Display.cc (display): If nph is set, send out HTTP OK
+ header as suggested by Matthew Daniel <mdaniel at scdi.com>
+ (displaySyntaxError): Ditto.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate from current defaults.cc file.
+
+Thu Feb 10 00:21:58 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Treat <script></script> tags as noindex
+ tags, much like <style></style> as suggested by Torsten.
+
+Thu Feb 10 00:02:41 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * .version: Bump for 3.2.0b2.
+
+ * htcommon/defaults.cc: Add category fields for each
+ attribute. Though these are currently unused, they could allow the
+ documentation to be split into multiple files based on logical
+ categories and subcategories.
+
+Wed Feb 9 23:52:55 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Connection.cc (connect): Add alarm(timeout) ... alarm(0)
+ around ::connect() call to ensure this does timeout as appropriate
+ as suggested by Russ Lentini <rlentini at atl.lmco.com> to resolve
+ PR#762 (and probably others as well).
+ (connect): Add a retry loop as suggested by Wilhelm Schnell
+ <Wilhelm.Schnell at mn.man.de> to resolve PR#754.
+
+ * htnet/HtHTTP.cc (HTTPRequest): Add CloseConnection() when the
+ connection fails on open before returning from the method. Should
+ take care of PR#670 for htdig-3-2-x.
+
+Wed Feb 09 17:20:50 2000 Loic Dachary <loic at ceic.com>
+
+ * db/dist/Makefile.in (libhtdb.so): move dependent libraries
+ *after* the list of objects, otherwise it's useless.
+
+ * htword/WordKey.h (class WordKey): move #if SWIG around to
+ please swig (www.swig.org).
+
+ * htword/WordList.h (class WordList): allow SWIG to see Walk*
+ functions (#if SWIG).
+
+Wed Feb 9 09:21:00 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Server.cc (robotstxt): apply more rigorous parsing of
+ multiple user-agent fields, and use only the first one.
+
+ * htlib/HtRegex.cc (set): apply the fix from Valdas Andrulis, to
+ properly compile case_sensitive expressions.
+
+Mon Feb 09 09:43:59 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.cc: changed "<<" to append() for content_length
+ assignment in ReadChunkedBody() function (as Gilles suggested)
+
+Tue Feb 08 10:54:08 2000 Loic Dachary <loic at ceic.com>
+
+ * db/dist/configure.in: Added AC_PREFIX_DEFAULT(/opt/www)
+ so that headers and libraries are installed in the proper
+ directory when no --prefix is given.
+
+Tue Feb 08 10:32:48 2000 Loic Dachary <loic at ceic.com>
+
+ * test/t_wordskip: copy $srcdir/skiptest_db.txt to allow running
+ outside the source tree.
+
+ * configure.in: use '${prefix}/...' instead of "$ac_default_prefix/..."
+ that did not carry the --prefix value.
+
+ * configure.in: run CHECK_USER and AC_PROG_APACHE if --enable-tests
+
+Mon Feb 07 17:40:47 2000 Loic Dachary <loic at ceic.com>
+
+ * htlib/htString.h (last): turn to const
+
+Mon Feb 07 14:05:37 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/HtHTTP.cc: fixed a bug in ReadChunkedBody() function
+ regarding document size assignment (raised by Valdas Andrulis)
+
+Sun Feb 06 19:11:05 2000 Loic Dachary <loic at ceic.com>
+
+ * configure.in: Fix inconsistencies between default values
+ shown by ./configure and actual defaults.
+
+ * htdoc/install.html: change example version 3.1 to 3.2
+ Commented out warning about libguile.
+ Replace CONFIG variables by configure.in options.
+ Specify default value for each of them.
+ Replace (and move) make depend by automake (distributed
+ Makefiles do not include dependency generation)
+ Added section for running tests.
+ Added section on shared libraries.
+
+ * configure.in: use AM_CONDITIONAL for --enable-tests
+
+ * Makefile.am: use automake conditionals for subdir so
+ that make dist knows what to distribution --enable-tests
+ specified or not.
+
+ * db/Makefile.in: allow make dist to work outside the source
+ tree.
+
+Sat Feb 05 18:31:04 2000 Loic Dachary <loic at ceic.com>
+
+ * test/word.cc (SkipTestEntries): The fix of
+ WordList::SkipUselessSequentialWalking actually saves us
+ a few hops when walking lists of words.
+
+Fri Feb 04 17:28:32 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.cc,WordReference.cc,WordRecord.cc (Print): use
+ cerr instead of cout for immediate printing under debugger.
+
+Thu Feb 3 16:06:45 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (RetrieveLocal): fix bug that prevented local
+ filesystem digging, because max_doc_size was initialized to 0.
+ Now sets it to max_doc_size for current url.
+
+Thu Feb 3 12:36:56 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/Makefile.{am,in}: install mime.types as mime.types,
+ not as htdig.conf.
+
+ * htfuzzy/EndingsDB.cc (createDB): fix code to use MV macro in
+ system() command, not hard-coded "MV" string literal, and use
+ get() on config objects to avoid passing String objects to form().
+
+Wed Feb 2 19:44:33 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.cc (SetRFC1123): Strip off weekday, if present
+ and use LOOSE format.
+ (SetRFC850): Ditto.
+
+ * configure.in, configure: Add configure check for "mv."
+
+ * htfuzzy/Makefile.am: Use it.
+
+ * */Makefile.in: Regenerate using automake.
+
+ * htfuzzy/EndingsDB.cc (createDB): Use the detected mv, or
+ whatever is in the path to move the endings DB when they're
+ finished.
+
+Wed Feb 2 15:49:14 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (RetrieveLocal), htdig/Retriever.cc (GetLocal):
+ Fix compilation errors. Oops!
+
+Wed Feb 2 13:53:27 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc (IsValidURL): fix problem with valid_extensions
+ matching failure when URL parameters follow extension.
+
+Wed Feb 2 13:29:48 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/QuotedStringList.cc (Create): fix PR#743, where quoted string
+ lists didn't allow embedded quotes of opposite sort in strings
+ (e.g. "'" or '"'), and fix to avoid overrunning end of string
+ if it ends with backslash.
+
+Wed Feb 2 13:23:16 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (ctor, parse, do_tag), htcommon/defaults.cc:
+ Add max_keywords attribute to limit meta keyword spamming.
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Wed Feb 2 12:57:40 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (RetrieveLocal), htdig/Document.h,
+ htdig/Retriever.cc (Initial, parse_url, GetLocal, GetLocalUser,
+ IsLocalURL, got_href, got_redirect), htdig/Retriever.h,
+ htdig/Server.cc (ctor), htdig/Server.h: Add in Paul Henson's
+ enhancements to local_urls, local_default_urls & local_default_doc.
+ * htcommon/defaults.cc: Document these.
+
+Wed Feb 02 10:14:57 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKeyInfo.h,WordKey.{cc,h}: fix overflow bug when 32
+ bits. For that purpose implement Outbound/Overflow/Underflow
+ methods in WordKey, MaxValue in WordKey/WordKeyInfo.
+ (WordKey::SetToFollowing) was FUBAR : overflow of field1 tested
+ with number of bits in next field, do not handle overflow,
+ Re-implemented.
+ (WordKey::Set) Change atoi to strtoul.
+ (WordList::SkipUselessSequentialWalking) was much to fucked up
+ to explain. Re-implement
+ (WordKey::Diff) Added as a support function of
+ SkipUselessSequentialWalking.
+ implement consistent verbosity.
+
+ * htword/WordList.cc (operator >>): explicit error message when
+ insert failed, with line number.
+
+Wed Feb 2 00:11:03 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdoc/RELEASE.html: Finish up with notes on all significant
+ new attributes.
+
+ * htdoc/FAQ.html, htdoc/where.html: Mention new 3.2.0b1 release
+ as a beta.
+
+ * contrib/README: Update to mention new scripts.
+
+ * installdir/mime.types: Add default Apache mime.types file for
+ systems that do not already have one.
+
+ * installdir/Makefile.am: Make sure it is installed by default.
+
+ * installdir/Makefile.in: Regenerate using automake.
+
+ * htcommon/defaults.cc: Add documentation for mime_types
+ attribute, remove currently unused image_alt_factor, and add
+ documentation for external_protocols.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Regenerate using cf_generate.pl.
+
+Tue Feb 1 10:24:19 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/parser.cc (score): fix up score calculations for
+ correctness and efficiency.
+
+Mon Jan 31 16:29:20 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordBitCompress.cc: fixed endian bug in compression
+
+Sat Jan 29 21:14:03 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/parser.cc (score): Change config.Value (which returns
+ int) to config.Double to preserve accuracy of attributes.
+
+ * htcommon/defaults.cc: Updated documentation for attributes now
+ allowing regex, search_algorithms (for new fuzzy) and added
+ documentation for the overlooked remove_unretrieved_urls.
+
+ * htdoc/*.html: Updated copyright notice for 2000, changed footer
+ to use CVS's magic Date keyword. Regenerated documentation from
+ defaults changes.
+
+Sat Jan 29 16:32:08 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * contrib/htdig-3.1.4.spec, contrib/htdig-3.1.4-conf.patch: Remove
+ these since they don't apply to the 3.2.x releases.
+
+ * htfuzzy/Synonym.cc (openIndex): Change database format from
+ DB_BTREE to DB_HASH--no reason for the synonym database to be a
+ btree. This was probably overlooked when I switched the rest of
+ the fuzzy databases over to DB_HASH.
+
+Sat Jan 29 05:34:26 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h (UnpackNumber): Very nasty bug. Optimization
+ dated Dec 29 broke endianess on Solaris. Restore previous version.
+
+Fri Jan 28 18:17:08 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Configuration.h (struct ConfigDefaults): Add version and
+ category fields for more accurate documentation.
+
+ * htcommon/defaults.cc: Add blank category fields and start
+ filling in version field. Killed modification_time_is_now_attribute.
+
+ * htdig/Document.cc (Document): Kill attribute
+ modification_time_is_now since it can cause more harm than good.
+
+ * htnet/HtHTTP.cc (ParseHeader): Ditto.
+
+ * htdoc/cf_generate.pl: Added support for new version and category
+ fields. Currently category does nothing, but it could split the
+ documentation into categories.
+
+Sat Jan 29 01:37:45 2000 Loic Dachary <loic at ceic.com>
+
+ * .version: remove the trailing -dev
+
+Thu Jan 27 12:22:57 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.cc: cdebug replaced by cerr. replace lverbose
+ by verbose > 2. Remove shutup.
+ (WordList): monitor = 0
+ (Open): create monitor only if wordlist_monitor = true
+ (Close): delete monitor if set, delete compressor if set
+
+ * htword/WordDBCompress.cc,WordList.cc: only activate monitoring code
+ if monitor is set. No interaction with the monitor is therefore possible
+ if wordlist_monitor is false.
+
+ * htword/WordMonitor.cc: remove useless test of wordlist_monitor (done by
+ WordList now).
+
+ * htword/WordDBCompress.cc (TestCompress): remove redundant debuglevel argument.
+
+ * htword/WordDBCompress.cc (WordDBCompress): init cmprInfo to 0
+
+ * db/include/db_cxx.h: Add get_mp_cmpr_info method
+
+ * htword/WordDBCompress.cc (WordDBCompress): set default debug level to 0
+
+ * htword/WordDB.h: CmprInfo returns current CmprInfo and non static,
+ overload to set CmprInfo if argument given.
+
+ * htword/WordDBCompress.h: new CmprInfo() method returns DB_CMPR_INFO object
+ for Berkeley DB database.
+
+ * htword/WordList.h: add compressor member, kill cmprInfo member.
+
+ * htword/WordList.cc:
+
+Wed Jan 26 20:05:33 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.cc,htword/WordList.h: get rid of obsolete WordBenchmarking
+
+Wed Jan 26 9:14:32 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htcommon/defaults.cc: added "max_connection_requests".
+
+ * htdig/Retriever.cc: now manages the attribute above.
+
+Tue Jan 25 12:59:01 2000 Loic Dachary <loic at ceic.com>
+
+ * htsearch/Display.cc (setVariables): fixed
+ Display.cc:505: warning: multiline `//' comment
+
+Tue Jan 25 8:37:15 2000 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Document.h: Added the "HtHTTP *GetHTTPHandler()" method, in
+ order to be able to control an HTTP object outside the Document class.
+ This is useful for the Server class, after the request for robots.txt.
+ We can control the response of a server and check if it supports
+ persistent connections.
+
+ * htdig/Server.cc: inside the constructor, persistent_connections var is
+ initialized to the configuration parameter value, instead of <true>.
+ Besides, after the request of the robots.txt, it controls and set
+ the attribute for persistent connections, depending on whether the
+ server supports them or not.
+
+ * htdig/Retriever.cc: modified the Start() method. Now the loop manage
+ HTTP persistent connections "on a server" basis. Indeed, it's a
+ Server object that decides if persisent connections are allowed on
+ that server or not (depending on configuration or capabilities of
+ the remote http server).
+
+Mon Jan 24 12:57:45 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setVariables): Added double quotes around
+ default selection value in build_select_lists handling.
+
+Mon Jan 24 12:37:22 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setVariables), htcommon/defaults.cc: Added
+ build_select_lists attribute, to generate selector menus in forms.
+ Added relevant explanations and links to selectors documentation.
+ * htdoc/hts_selectors.html: Added this page to explain this new
+ feature, plus other details on select lists in general.
+ * htdoc/hts_templates.html: Added relevant links to related attributes
+ and selectors documentation.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Fri Jan 21 18:57:58 EET 2000 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/HtConfiguration.cc: added HtConfiguration::ParseString(char*)
+ method to allow lexer handle "include: ${var}/file.inc" construction
+
+ * htcommon/conf_lexer.lxx: fixed handling "include: ${var}file.inc"
+ bug.
+
+Fri Jan 21 17:04:28 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.cc (WalkFinish,WalkInit,WalkNextStep): fix typos in error messages
+ and misleading comment.
+
+ * htword/WordList.h,WordList.cc: move part of WalkInit in WalkRewind so that
+ we have a function to go back to the beginning of possible matches.
+
+Wed Jan 19 21:49:57 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Only add words for META descriptions,
+ keywords, and IMG ALT attributes if doindex is set.
+
+ * htcommon/DocumentRef.h: Added Reference_obsolete for documents
+ that should be removed (but haven't).
+
+ * htdig/Retriever.cc (parse_url): Flag documents that have been
+ modified as Reference_obsolete and update the database. Flag all
+ documents with various errors as something other than
+ Reference_normal, as appropriate--these probably should be pruned.
+
+ * htdig/Retriever.h: Get rid of GetRef() method--it's only used once!
+
+ * htsearch/Display.cc (display): Don't show DocumentRefs with
+ states other than Reference_normal--these documents have various
+ errors.
+
+ * htmerge/docs.cc: If a document has a state of Reference_obsolete, ignore it.
+
+ * htcommon/HtWordList.h, htcommon/HtWordList.cc (Skip): Change
+ MarkGone() to Skip() to emphasize that this document should be ignored.
+
+Wed Jan 19 14:11:51 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.cc (SkipUselessSequentialWalking): return OK if skipping,
+ NOTOK if not skipping.
+
+ * htword/WordReference.h: remove useless Clear in WordReference(key, record)
+ constructor.
+
+ * htword/WordList.h,WordList.cc: Split Walk in three separate functions
+ WalkInit, WalkNext and WalkFinish. Much clearer. Fill the status field
+ of WordSearchDescription to have more information about the error condition.
+ Add found field to WordSearchDescription for WalkNext result. Add cursor_get_flags
+ and searchKeyIsSamePrefix fields to WordSearchDescription as internal state
+ information.
+
+ * htword/WordList.h,WordList.cc: WalkInit to create and prepare cursor,
+ WalkNext to move to next match
+ WalkNextStep to move to next index entry, be it a match or not
+ WalkFinish to release cursor.
+
+ * htword/WordList.h: WordSearchDescription::ModifyKey add to jump
+ while walking.
+
+ * htword/WordList.cc (WalkNext) : it is now legal to step without
+ collection or callback because search contains the last match (found
+ field) and it s therefore not useless.
+
+Mon Jan 17 12:15:45 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/htdig-3.2.0.spec: added sample RPM spec file for 3.2
+
+Sat Jan 15 11:53:35 2000 Loic Dachary <loic at ceic.com>
+
+ * htdb/htstat.cc,htdb/htdump.cc: remove useless -S option since
+ the page size is found in the header of the file.
+
+ * htdb/htstat.cc,htdump.cc,htload.cc: only call WordContext::Initialize
+ if -W flag specified.
+
+Fri Jan 14 18:39:12 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordBitCompress.cc: speedup, VlengthCoder::code()
+ finds appropriate coding interval much faster
+
+Fri Jan 14 11:30:41 2000 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriver.cc(IsValidURL): Fix problem with valid_extensions,
+ which got lost in the shuffle yesterday.
+
+Fri Jan 14 15:56:49 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordType.cc,WordRecord.cc,WordKeyInfo.cc (Initialize): change
+ inverted test on instance (== instead of !=).
+
+ * htword/WordRecord.cc (WordRecordInfo): change inverted test on compare
+
+Fri Jan 14 14:24:39 2000 Loic Dachary <loic at ceic.com>
+
+ * htdig/htdig.cc,htmerge/htmerge.cc,htsearch/htsearch.cc: Use Initialize(defaults)
+ to load configuration file if provided.
+
+ * htword/WordDBCompress.cc (Compress): initialize monitor to null in
+ constructor and check if null before usage. Core dumped in htdb/htload.
+
+ * htword/WordContext.h (class WordContext): Add
+ Initialize(const ConfigDefaults* config_defaults = 0)
+ that probe configuration files. Usefull when htword is used as a standalone library.
+
+Thu Jan 13 19:52:27 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriver.cc: Fix problem with valid_extensions when an
+ "extension" would include part of a directory path or server
+ name, as contributed by Warren Jones.
+
+Thu Jan 13 19:22:25 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Makefile.am, htnet/Makefile.in: Add HtFile to the build process.
+
+Thu Jan 13 18:58:03 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/HtFile.h, htnet/HtFile.cc: New Transport classes
+ contributed by Alexis Mikhailov to allow file:// access.
+
+ * htdig/Document.h, htdig/Document.cc: Add logic to call HtFile
+ objects for URLs.
+
+ * htcommon/URL.cc: Don't remove a trailing index.html (removeIndex)
+ if the URL is a file://URL.
+
+Thu Jan 13 18:49:41 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * contrib/conv_doc.pl, contrib/parse_doc.pl: Replace "break" by
+ "last" for correct Perl syntax and additional cleanups and
+ simplifications as contributed by Warren Jones.
+
+Thu Jan 13 18:42:29 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htword/WordType.h, htword/WordType.cc: Implementation of new
+ methods IsDigit() and IsCntrl() as contributed by Marc Pohl
+ <marc.pohl at wdr.de>. Fixes some problems with 8-bit characters.
+
+Thu Jan 13 17:17:47 2000 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * ChangeLog.0, configure, configure.in, htfuzzy/Endings.cc,
+ htlib/String.cc, htlib/Configuration.cc,
+ htlib/QuotedStringList.cc, htlib/regex.c, htcommon/defaults.cc,
+ htdig/ExternalParser.cc, htdig/Retriever.h, htsearch/Display.cc,
+ include/htconfig.h.in installdir/htdig.conf: Merge in changes from
+ 3.1.x releases.
+
+ * htdoc/: Merge in documentation changes from 3.1.x releases.
+
+Thu Jan 13 20:12:42 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.cc (Walk): close the cursor before returning. If
+ not doing that the cursor might be closed after the database is
+ closed, leading to double free of the cursor. Bad bug.
+
+Thu Jan 13 13:23:17 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordContext.h (class WordContext): simplifies a lot. WordContext is
+ no longer a repository for pointers of class instances. Only a place to call
+ Initialize for classes that have a single instance.
+
+ * htlib/HtWordType.cc: added to include definition of functions shortcuts for
+ WordType.
+
+ * htword/WordRecord.h,WordType.h,WordKeyInfo.h: implement homogeneous scheme to
+ handle unique instance of the class.
+ - constructor takes const Configuration& argument and init object with config
+ values
+ - static member instance
+ - static method Initialize the static member instance
+ - static method Instance returns the pointer in instance data member
+
+ * htword/WordRecord.cc: add constructor for WordRecordInfo, and Instance static
+ function. Add WORD_RECORD_INVALID to depict uninitialize WordRecordInfo object.
+
+ * htword/WordKeyInfo.h: rename SetKeyDescriptionFromFile and SetKeyDescriptionFromString
+ to InitializeFromFile and InitializeFromString and implement them by calling Initialize.
+ rename SetKeyDescriptionRandom to InitializeRandom
+ rename Initialize(String& line) to GetNFields(String& line)
+ rename Initialize(int nfields) to Alloc(int nfields)
+
+ * htdig/htdig.cc,htmerge/htmerge.cc,htsearch/htsearch.cc,test/word.cc: replace
+ WordList::Initialize with WordContext::Initialize and run immediately after
+ config is read. Otherwise WordType fails to work and configuration value
+ extraction will fail.
+
+ * htmerge/htmerge.cc: move initialization
+
+ * test/conf/htdig.conf2.in: reorder so that it looks as much as possible as conf.in
+
+Thu Jan 13 12:33:46 2000 Loic Dachary <loic at ceic.com>
+
+ * htdb/htstat.cc,htdump.cc,htload.cc: set proper progname
+
+Wed Jan 12 20:02:26 2000 Loic Dachary <loic at ceic.com>
+
+ * htcommon/HtWordList.cc (Dump): Use Walk instead of Collect otherwise does not work.
+
+Wed Jan 12 19:38:33 2000 Loic Dachary <loic at ceic.com>
+
+ * htlib/HtDateTime.h (class HtDateTime): killed void SetDateTime(const int t)
+ because they cause problems when time_t is an int and were useless anyway.
+
+Wed Jan 12 13:31:45 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordBitCompress.h: remove inline qualifier on check_tag1: its not inline
+
+ * htword/WordKey.h: #define WORD_KEY_UNKNOWN_POSITION to -1. Remove default
+ argument to SetToFollowing so that its more explicit when used with
+ WORD_KEY_UNKNOWN_POSITION.
+
+ * htword/WordKey.cc: change name of variable info0 to info
+
+ * htword/WordList.cc: use WordKey::Info instead of WordKeyInfo::Get as done
+ in WordKey.cc for consistency.
+
+ * htword/WordList.{cc,h},htword/WordDB.h: rename WordCursor to WordDBCursor
+ for consistency.
+
+ * htword/WordList.h: Kill the WordSearchDescription::Setup useless function
+
+ * htword/WordList.h: WordSearchDescription constructor now have a straightforward
+ semantics.
+
+ * htword/WordList.h: Rename Search into Collect since it already existed, just
+ with a different prototype.
+
+Wed Jan 12 12:36:46 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.h (class WordSearchDescription): add cursor member
+
+Tue Jan 11 19:33:44 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htlib/HtVectorGeneric,htword: Fixed some warnings found
+ when compiling under FreeBSD
+
+Tue Jan 11 18:22:58 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htlib/HtVectorGeneric.h: inlined functions Add and Allocate which
+ are critical to performance
+
+Tue Jan 11 12:18:47 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordKey.h: fixed uninitialized memory read
+
+ * htword/WordBitCompress.cc: Fixed big number BUG
+ Fixed memeory leak
+
+Tue Jan 11 09:37:36 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.h: move operator << and operator >> to end of
+ functions declarations instead of data members.
+
+ * htword/WordList.h: added more comments on functions behaviour.
+
+ * htword/WordList.h: added #if SWIG for Perl interface
+
+Mon Jan 10 17:55:05 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordDBPage: enhanced compression debugging output
+
+Mon Jan 10 09:07:19 2000 Loic Dachary <loic at ceic.com>
+
+ * WordContext.h,WordKey.h,WordList.h: Added #if SWIG for perl
+ interfaces. Remove InSortOrder, useless now that everything
+ is manipulated in sort order as far as the interface is concerned.
+
+ * WordKey.cc,WordList.cc: remove InSortOrder
+
+ * WordKey.h,WordRecord.h,WordReference.h: commented out Set/Get for
+ ascii Set/Get for SWIG.
+
+ * WordKey.h: turn CopyFrom to public for those who dont want to
+ use operator =.
+
+ * WordKey.h: rename info -> Info and nfields NFields
+
+ * WordKey.h: remove int IsFullyDefined() const redundant with Filled
+
+Thu Jan 06 14:41:15 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword,all: Changed interface to overloaded Walk function that was
+ ambigous on some compilers...
+
+Thu Jan 06 14:00:01 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordList.h (class WordSearchDescription): rename setup to Setup
+
+ * htword/WordList.h (class WordBenchmarking): rename show to Show
+
+ * htword/WordRecord.{h,cc}, htword/WordReference.h, htword/WordList.h:
+ add comments, reorganize member functions for clarity.
+
+Thu Jan 06 12:01:47 2000 Marcel Bosc <bosc at ceic.com>
+
+ * htword/compression: Split WordDBCompress.* to WordDBCompress +
+ WordDBPage.*
+
+ * htword/WordBitCompress: renamed put/get to put_uint/get_uint. added get/put_uint_vl
+
+ * htword/compression: modified slightly the compression: this makes old databases
+ OBSOLETE: headers compress better. Chaged Flags compress better and faster.
+
+ * htword/WordKey: added operator [] and Get/Set accessors
+
+ * htword: removed the obsolete --with_key configure option (KEYDESC)
+
+ * htword/WordMonitor: addded monitor input
+
+Wed Jan 05 14:32:31 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKeyInfo.h (class WordKeyInfo ): if(encode) was if(sort)
+
+ * htword/WordKeyInfo.h: rename show to Show an nprint to Nprint
+
+ * htword/WordKeyInfo.h: move WORD_ISA from WordKey.h to WordKeyInfo.h,
+ rename WORD_ISA_String to WORD_ISA_STRING.
+
+ * htword/WordKey.h: rename FATAL_ABORT to WORD_FATAL_ABORT and errr to word_errr
+
+ * htword/WordKey.h: move private functions at bottom of class above data members
+ rename show_packed to ShowPacked
+
+ * htword/WordKey.cc: move WordKeyInfo::SetKeyDescriptionRandom from WordKey.cc
+ to WordKeyInfo.cc
+
+ * htword/WordKeyInfo.cc: add include htconfig.h
+
+Wed Jan 05 13:26:16 2000 Loic Dachary <loic at ceic.com>
+
+ * htdig/ExternalParser.cc (parse): use nocase_compare instead of mystrcasecmp to
+ suppress warnings. (char*)String for mystrncasecmp that has no equivalent in
+ the String class.
+
+ * htdig/Retriever.cc (IsValidURL): remove warning by (char*)url
+
+Wed Jan 05 11:54:19 2000 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h: kill obsolete comment and add suffix explanation at
+ the beginning of the file.
+
+ * htword/WordKey.h (class WordKey): rename copy_from and initialize to CopyFrom
+ and Initialize to fit naming conventions. Reorganize the methods to group them
+ in logical sets. Fix indenting. Comment each method.
+
+ * htword/WordKey.h (Clear): add kword.trunc()
+
+ * htword/WordKey.h: protect SetWord(const char *str,int len) because it opens
+ the door to all kind of specific derivations. Should be
+ SetWord(String(foo, foo_length)) if not performance critical.
+
+Wed Dec 29 18:41:14 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htlib/HtMaxMin: added max/min of arrays, added comments to
+ HtMaxMin. Added HtMaxMin.cc all these are used in htword
+
+ * htlib/HtTime.h: added comments. included portable time.h
+
+ * htlib/HtVectorGeneric.cc: added HtVector_double, HtVector_String
+
+ * htlib/HtVectorGeneric.h: inlined several methods, disactivated CheckBounds
+
+ * htlib/StringMatch.cc: removed #include"WordType.h", this made htlib dependant
+ on htword, which is not acceptable for a library
+
+ * htlib/HtWordType.h: this replaces the macros used in StringMatch.cc
+
+ * htlib/HtRandom.h: added tools for using random number
+ (this is used currently in tests)
+
+ * htword/WordBitCompress.cc: transfered max_v/min_v to htlib
+
+ * htword/WordBitCompress.cc: optimized put/get for better performance
+
+ * htword/WordMonitor: system for detailed monitoring of operation
+ and performance within htword
+
+ * htword/WordDBCompress: fixed compression for case of empty WordRecord
+
+ * htword/WordDBCompress: cleaned up some code added some comments
+
+ * htword/WordKeyInfo: split WordKey files into WordKey and WordKeyInfo files
+
+ * htword/WordContext: centralized global configuration into one class
+
+ * htword/WordKey: inserted randomized key/keydescription into WordKey classes
+ (this was previously used in several tests)
+
+ * htword/WordKey: optimized Compare, UnpackNumber for speed (these are
+ really speed critical)
+
+ * htword/WordRecord: is now configurable, type can be configured to "DATA" (htdig)
+ or "NONE" (for other uses)
+
+ * htword/WordType: changed macros to global functions to make it compatible
+ with cleanup in StringMatch. Integrated WordType to WordContext
+ configuration/Initialization
+
+ * htword/WordKeyInfo: fixed initialization from key descrition file
+
+Tue Dec 28 18:58:21 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htlib/String.cc: String::lowercase(), String::uppercase()
+ support for national character added.
+
+ * htfuzzy/Prefix.cc: method "prefix" works now.
+
+Mon Dec 27 22:17:48 1999 Loic Dachary <loic at ceic.com>
+
+ * htdig/htdig.cc (main): change '\r\n' to "\r\n"
+
+ * Makefile.config,db/dist/Makefile.in: rename libdb to libhtdb to
+ prevent conflicts with installed libdb.
+
+ * db/dist/Makefile.in: do not install documentation nor binary
+ utilities (db_dump & al) since they are replaced by htdb binaries
+ (htdump & al).
+
+ * db/dist/Makefile.in (prefix): prepend $(DESTDIR) to prefix
+ to support make DESTDIR=/staging install for binary distribution
+ packages generation.
+
+ * configure.in: use AC_FUNC_ALLOCA to check for alloca. Used
+ in regex and test/dbbench.cc only but definitely a usefull
+ feature to have.
+
+Thu Dec 23 11:10:24 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htcommon/defaults.cc: set wordlist_cache_size default to 10Meg
+
+ * db/mp: removed some debuging messages
+
+ * htword/WordList.cc: added warning if no cache
+
+ * test/word.cc: added cache
+
+ * htlib/HtTime.h: added ifdefs for portable time.h sys/time.h
+
+Tue Dec 21 23:33:06 1999 Loic Dachary <loic at ceic.com>
+
+ * htdoc/attrs.html,cf_by*.html: regenerate to include
+ wordlist_wordkey_description attribute
+
+ * htcommon/Makefile.am: Add AM_LFLAGS = -L and AM_YFLAGS = -l to
+ prevent #line generation because it confuses the dependencies
+ generator of GCC if configure run out of source tree.
+
+ * configure.in: remove --with-key option. Not needed since
+ word description now dynamic. Destroyed WordKey.h if
+ specified.
+
+ * htword/Makefile.am: remove commented lines for WordKey.h
+ generation.
+
+Tue Dec 21 18:18:01 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword: added code for benchmarking
+
+Mon Dec 20 17:59:15 1999 Marcel Bosc <bosc at ceic.com>
+
+ * WordKey: Made the key structure dynamic: Changing the
+ key structure used to imply recompiling the htword library.
+ This should not change anything in htdig.
+
+ * WordKey: numerical key fields are stored in an array of unsigned
+ ints instead of compile-time defined pools.
+
+ * WordKey.h: WordKey now needs copy opreators. Setbits are stored
+ in sort order (used to be in encoding order)
+
+ * htword: word_key_info is now a pointer, had to change all references
+
+ * word.cc: Rewrote wordkey test for new dynamically
+ set key structure. The test randomly creates key structures
+ and tests them.
+
+ * test: adapted test files (simplifies things a lot)
+
+1999-12-21 Toivo Pedaste <toivo at ucs.uwa.edu.au>
+
+ * htlib/Dictionary.cc: Fix memory leak when destroying dictionary
+
+ * htlib/StringList.cc, htdig/Retriever.cc: Fix memory leak, not
+ the most elegent way but I'm not sure about the exact semantics
+ of StringList
+
+Mon Dec 20 21:59:03 1999 Loic Dachary <loic at ceic.com>
+
+ * htdb/{Makefile.am,err.c,getlong.c}: Fix mistake: err.c and
+ getlong.c contain C functions (declared in clib_ext) and
+ must be C compiled otherwise the prototype won't fit. Checking
+ db Makefiles, getlong.c and err.c are added to the list of objects
+ for each utility program. This guaranties that they won't conflict
+ with objects included in libdb.a.
+
+Sun Dec 19 20:04:42 1999 Loic Dachary <loic at ceic.com>
+
+ * htdb/{Makefile.am, err.cc}: add err.cc for portability
+ purposes.
+
+Fri Dec 17 18:04:09 1999 Loic Dachary <loic at ceic.com>
+
+ * Makefile.config: add PROFILING variable and document it. Designed
+ to enable profiling of htdig easily.
+
+ * */Makefile.am: add *_LDFLAGS = $(PROFILING) for every binary to
+ enable profiling, if specified.
+
+Thu Dec 16 17:16:33 1999 Loic Dachary <loic at ceic.com>
+
+ * htdb/*.cc: add -W option to activate htword specific compression.
+ Keep compatibility with zlib compression (-z only).
+
+Thu Dec 16 11:56:02 1999 Loic Dachary <loic at ceic.com>
+
+ * test/dbbench.cc: change wrong strcpy with memcpy
+
+Wed Dec 15 15:04:39 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/htdig.cc(main): Handle list of URLs given on stdin, if
+ optional "-" argument given. (Uses >> operator below.)
+
+ * htlib/htString.h, htlib/String.cc: Added Alexis Mikhailov's String
+ input methods, readLine() and >> operator.
+
+Wed Dec 15 13:59:34 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc: remove include of sys/stat.h, which is no
+ longer needed after hack removed from Need2Get(), and could pose
+ a problem on systems that need sys/types.h included first.
+
+Wed Dec 15 17:00:04 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordDB.h: add inline keyword for portability
+
+ * htword/WordDB.h: add CmprInfo method to get object describing
+ compression scheme for Berkeley DB
+
+ * htdb: Add htdump, htload, htstat equivalent of db_dump
+ db_load and db_stat that know about htword specific compression
+ strategy.
+
+ * htword/WordDBCompress: add static to localy defined functions and
+ variables, remove unecessary #define and #include from header.
+
+Tue Dec 14 21:56:57 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/conf_parser.lxx, htcommon/conf_lexer.cxx:
+ bcopy on Solaris is in strings.h, not in string.h. Added
+ check for #ifdef HAVE_STRINGS_H
+
+Tue Dec 14 19:18:22 1999 Marcel Bosc <bosc at ceic.com>
+
+ * WordBitCompress: code cleaned up and commented
+
+Tue Dec 14 18:32:21 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/Word{Record,Reference,Key}: added a Get method to
+ convert the structure into it's ascii string representation.
+ operator << now uses Get.
+
+Tue Dec 14 17:46:33 1999 Loic Dachary <loic at ceic.com>
+
+ * db/dist/Makefile.in (install): fix bugous test for libshared
+
+Tue Dec 14 14:10:28 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/{WordKey,WordReference,WordRecord}: rework
+ the input methods (operator >>). Each class now has a Set function
+ to initialize itself from an ascii description and a Get function
+ to retrieve an ascii description of the object.
+
+ * htword/WordList: operator >> has a better and cleaner input loop
+ using StringList and String instead of char*.
+
+Tue Dec 14 12:06:24 1999 Marcel Bosc <bosc at ceic.com>
+
+ * WordDBCompress.cc : Added compression version checking
+
+Mon Dec 13 21:09:31 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/conf_parser.lxx, htcommon/conf_lexer.cxx:
+ Added #include <string.h> Without it failed to compile
+ on Solaris.
+
+Mon Dec 13 16:31:27 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordBitCompress.cc : fixed bug that made compression
+ fail on big documents or big number of url's ...
+
+Mon Dec 13 13:49:35 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h.tmpl: Added *_POSITION macro generation
+
+Mon Dec 13 11:51:50 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htcommon/conf_parser.yxx: fixed several delete that should be delete []
+
+Sun Dec 12 17:14:00 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/conf_lexer.lxx, htcommon/conf_lexer.cxx:
+ national symbols are allowed in right part of expressions
+ (noted by Marcel Bosc).
+ Changed default behavior of flex from print unknown chars
+ on stdout to exit with error message.
+
+Sat Dec 11 17:34:03 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htdig/Retriever.cc,htdig/htdig.cc: "exclude_urls","bad_querystr"
+ "bad_extensions","valid_extensions","local_default_doc"
+ changed for new config.
+
+ * htdig/Server.cc: "server_max_docs","server_wait_time" changed for
+ new config.
+
+ * check for "limit_normalized" moved from Retriever::got_href and
+ Retriever::got_redirect to more appropriate Retriever::IsValidUrl
+
+Fri Dec 10 18:05:48 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword: checked for failed memory allocations in compression code
+
+Fri Dec 10 18:03:42 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordList,htcommon/HtWordList.cc,htmerge/words.cc: cleaned up WordList::Walk()
+ function, change two occurences of WordList::Walk in htdig files
+
+Fri Dec 10 17:40:22 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordKey.cc (Compare): Fixed bug: compare used to compare chars and not
+ unsigned chars, this failed when non-ascii caracters were used
+
+Fri Dec 10 11:54:36 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htcommon/defaults.cc : doc for wordlist_cache_size
+
+Thu Dec 09 17:07:47 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htcommon/defaults.cc: added defaults for compression and DB configuration
+ parameters
+
+Thu Dec 09 16:47:54 1999 Loic Dachary <loic at ceic.com>
+
+ * db/dist/configure.in,Makefile.in: Added shared lib support
+ for linux only. Not enabled if not on linux.
+
+Thu Dec 09 15:07:11 1999 Loic Dachary <loic at ceic.com>
+
+ * acinclude.m4,db/dist/acinclude.mr: CHECK_ZLIB now fails if either
+ zlib.h or libz is not found.
+
+ * configure.in: do not test zlib.h
+
+ * db/db/db.c,db/mp/mp_fopen.c: added #ifdef HAVE_ZLIB so that
+ compilation works if zlib is not found
+
+ * htlib/.cvsignore: remove wrong *.cxx
+
+ * test/dbbench.cc: added #ifdef HAVE_ZLIB so that
+ compilation works if zlib is not found
+
+Thu Dec 09 13:25:45 1999 Marcel Bosc <bosc at ceic.com>
+
+ * test/Word.cc,t_wordlist,Makefile.am: upgraded tests
+ * htcommon/HtWordList.h: fixed Configuration/HtConfiguration problem
+
+Thu Dec 09 12:10:32 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword: Added the compression code:
+ * WordDBCompress: Classes for page specific compression code
+ * WordBitCompress: Classes for bitstreams and non-specific compression
+
+Thu Dec 9 12:09:51 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htcommon/HtConfiguration.cc: bug fix: sometimes
+ htConfiguration::Find(url,char*) retuned empty values
+ even if there was something to return.
+
+Thu Dec 09 11:15:30 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htlib/Configuration.cc (Read): Read is now a virtual function: the old one
+ for Configuration the new one (Vadim's ... with the parser) in HtConfiguration
+
+Thu Dec 09 11:01:22 1999 Loic Dachary <loic at ceic.com>
+
+ * acinclude.m4: upgrade AC_PROG_APACHE macro for
+ modules detection.
+
+ * test/conf/httpd.conf,test/test_functions.in,test/conf/Makefile:
+ use @APACHE_MODULES@ to accomodate various apache modules directory
+ flavors.
+
+Tue Dec 07 20:32:34 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htdig: Split the Configuration class into Configuration
+ and HtConfiguration. All the HtConfiguration and the
+ configuration parsing (lex..) was woved to htcommon.
+ Configuration was replaced by HtConfiguration as needed
+
+Tue Dec 07 16:21:13 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: added AM_PROG_LEX and AC_PROG_YACC
+
+ * htlib/Makefile.am: simply set conf_lexer.lxx and conf_parser.yxx,
+ automake knows how to handle these. The renaming is needed to avoid
+ conflicts in automake generated rules.
+
+Mon Dec 6 16:23:39 CST 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/cf_generate.pl: added a bit of error checking for when it
+ can't fetch the config info, and made it more flexible for what it
+ allows as terminator.
+ * htcommon/defaults.cc: add default and description for authorization
+ attribute, and clean up external_protocols entry for cf_generate.pl.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+ * htdig/htdig.cc(main): set authorization parameter before Retriever
+ constuctor is called, as it may initialize a Server. (Should complete
+ fix of PR#490.)
+
+Mon Dec 6 21:34:29 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htdig/Document.cc htdig/htdig.cc: "authorization" parameter
+ in config is added and is new config compatible.
+ New code has'n got PR#490 bug (don't authentificate robot.txt)
+
+Mon Dec 06 11:58:56 1999 Marcel Bosc <bosc at ceic.com>
+
+ * HtVectorGeneric.h: generic vectors, stl-free: this was originally a copy of
+ HtVector.h with Object * replaced by GType and some small changes.
+ It has been modified and checked to see if it all works ok.
+ You can build vectors of any type that has an empty constructor.
+ * HtVectorGenericCode.h: generic vectors, stl-free: implementation
+ (modified "copy" of HtVector.cc)
+ * HtVectorGeneric.cc: generic vectors: implementation for common types
+ * HtVector_int.h: generic vectors: declaration for the most common type
+ (and example of howto use)
+
+Sat Dec 4 23:49:18 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Synonym.cc (createDB): Change declaration to match
+ Fuzzy::createDB(config), allowing the method to be called by
+ htfuzzy.
+
+ * htfuzzy/htfuzzy.cc (main): Add an error message if
+ fuzzy->createDB() comes back with an error.
+
+Sat Dec 4 15:38:34 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * htnet/HtHTTP.cc, htnet/HtHTTP.h, htdig/Document.cc
+ fixed proxy bug. GET command in HtHTTP included only
+ path of url insead full url when use proxy.
+ HtHTTP::UseProxy(int) added.
+
+ * htdig/Document.cc: make "http_proxy" parameter
+ url-depended for new configuration.
+
+Fri Dec 03 14:57:13 1999 Marcel Bosc <bosc at ceic.com>
+
+ * BerkelyDB: Compression code: added possibility to use
+ user-defined compression routines (the goal is to enable
+ the mifluz-specific DB page compression that obtains
+ higher compression ratios than generic zlib compression)
+ this envolves the following changes in BerkeleyDB:
+ * BerkelyDB/CompressionEnvironment: Adding a structure db_cmpr_info
+ in db_env that permits db user to specify the external compression
+ routines and other information related to compression
+ * BerkelyDB/CompressionEnvironment: Adding a cmpr_context structure
+ to DB_MPOOLFILE that stores information that compression needs
+ (the _weacmpr DB and the db_cmpr_info)
+ * BerkelyDB/Compression: Needed to modify the compression
+ system (that is implemented in the BerkelyDB memory pool) to permit
+ higher compression ratios and to use the compression environment
+
+Thu Dec 2 16:47:30 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc(parse_url): Use a static int to avoid
+ re-fetching local_urls_only from the config object.
+ (Initial, got_href, got_redirect): Try to get the local filename
+ for a server's robots.txt file and pass it along to the newly
+ generated server.
+
+ * htdig/Server.cc(ctor): Retrieve the robots.txt file from the
+ filesystem when possible and respect the local_urls_only option.
+
+ * htdig/Server.h: Change type of local_robots_file to String* to
+ better match Retriever::GetLocal().
+
+Thu Dec 02 16:24:27 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordReference.cc,WordKey.cc,WordRecord.cc (Print): Add function
+ to ease printing from Perl.
+
+Thu Dec 02 16:06:29 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordReference.h (WORD_FILLED): remove
+ unused WORD_FILLED and WORD_PARTIAL macros
+
+Wed Dec 01 19:18:42 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordKey.h.tmpl,WordRecord.h,WordReference.h,
+ WordList.h: Added #ifndef SWIG for
+ www.swig.org sake.
+
+Wed Dec 1 19:47:20 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegex.cc, htlib/HtRegex.h (set*): Add a case_sensitive
+ flag which defaults to insensitive. This better mirrors the
+ StringMatch class.
+
+ * htcommon/URL.cc(signature): Make the signature a proper URL to
+ the base of the server.
+
+ * htdig/Server.h: Add IsDead() methods to query the status of the
+ server, as well as an IsDisallowed() method to query whether a URL
+ is forbidden by the robots.txt rules. Change _disallow to HtRegex.
+
+ * htdig/Server.cc(ctor): Only retrieve the robots.txt file if this
+ is an http or https server.
+ (robotstxt): Use the proper HtRegex method for setting the pattern.
+ (push): Remove logic checking the _disallow patterns. This is now
+ done by the Retriever object.
+
+ * htcommon/defaults.cc: Add new attribute "local_urls_only" which
+ defaults to false, which dictates whether retrieval should revert
+ to another method if RetrieveLocal() fails.
+
+ * htdig/Retriever.cc(parse_url): Check to see if the server is
+ dead before calling the Retrieve() method. Notify the server
+ object if a connection fails. Also respects the new
+ local_urls_only attribute as described above.
+ (IsValidURL): Check the server's IsDisallowed() method to see if
+ the robots.txt forbids this URL.
+
+ * htdoc/THANKS.html: Updated to reflect current contributions, etc.
+
+ * README: Update to mention version 3.2.0b1.
+
+Wed Dec 1 17:05:48 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc(GetLocal): Fix error in GetLocalUser() return
+ value check, as suggested by Vadim.
+
+Wed Dec 1 15:57:09 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/conv_doc.pl: Added a sample external converter script.
+
+Mon Nov 29 23:19:35 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriver.cc, htdig/Retriver.h, htdig/Server.cc,
+ htdig/Server.h: forward-ported patch provided by Alexis Mikhailov
+ <alexis at medinf.chuvashia.su> and Gilles's for cleaning up
+ IsLocal/GetLocal. Makes local digging persistent, even when HTTP
+ server is down.
+
+Mon Nov 29 22:35:06 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * test/url.cc: New test for URL class.
+
+ * test/url.parents: Base URLs for parsing.
+
+ * test/url.children: Derived relative URLs for testing.
+
+ * test/Makefile.am, test/Makefile.in: Add the above for building.
+
+ * htcommon/URL.cc: A variety of bug fixes (some hacks), especially
+ for file:// and user@host URLs.
+
+Sun Nov 28 00:35:59 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * .version: Bump to 3.2.0b1-dev.
+
+Sat Nov 27 20:23:14 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/ExternalTransport.h, htdig/ExternalTransport.cc: New class
+ to allow external scripts to handle transport methods.
+
+ * contrib/handler.pl: Example handler using the program 'curl' to
+ handle HTTP or HTTPS transactions.
+
+ * htcommon/defaults.cc: Add new configuration option
+ 'external_protocols' as a list of protocols and scripts to handle
+ them. Documentation currently needs to be written.
+
+ * htdig/Document.h, htdig/Document.cc(Retrieve): Call
+ ExternalTransport::canHandle to establish which protocols are
+ supported by handler scripts and then create an appropriate
+ transport object.
+
+ * Makefile.in, htdig/Makefile.am, htdig/Makefile.in: Add
+ dependencies for ExternalTransport class.
+
+ * htnet/HtHTTP.h, htnet/HtHTTP.cc, htnet/Transport.h,
+ htnet/Transport.cc: Move _location field from HtHTTP_Response to
+ Transport_Response to allow other subclasses to use it. Similarly,
+ move NewDate and RecognizeDateFormat to Transport.
+
+Fri Nov 26 17:07:52 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc(HTML & do_tag): add code to turn off indexing between
+ <style> and </style> tags.
+
+Fri Nov 26 15:56:47 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setVariables): added Alexis Mikhailov's fix
+ to check the number of pages against maximum_pages at the right time.
+ * htlib/String.cc(write): added Alexis Mikhailov's fix to bump up
+ pointer after writing a block.
+
+Wed Nov 24 15:10:05 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * installdir/htdig.conf: Add bad_extensions to make it more obvious to
+ users how to exclude certain document types.
+
+Tue Nov 23 19:29:37 CST 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htnotify/htnotify.cc(send_notification): apply Jason Haar's fix
+ to quote the sender name "ht://Dig Notification Service".
+
+Tue Nov 23 19:46:00 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * conf.tab.cc.h conf.l.cc conf.tab.cc
+ Added files pre-generated from conf.y, conf.l
+
+Sun Nov 21 18:26:21 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ *htdig/Document.cc: "max_doc_size" supports new
+ configuration and is url-depended now.
+
+Sun Nov 21 17:06:50 EET 1999 Vadim Chekan <vadim at etc.lviv.ua>
+
+ * New config parser commited. htlib/(Makefile.am,Makefile.in),
+ htlib/Configuration.cc, htlib/Configuration.h
+ htlib/(conf.y, conf.l) added.
+
+Fri Nov 12 14:17:37 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/cgi.cc(init): Fix bug in reading long queries via POST
+ method (PR#668).
+
+Wed Nov 10 15:34:04 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setVariables & createURL),
+ htsearch/htsearch.cc(main), htdoc/hts_templates.html: handle keywords
+ input parameter like others, and make it propagate to followups.
+
+Wed Nov 10 15:16:57 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc: Fix PR#688, where htdig goes into an infinite
+ loop if an entry in local_urls (or local_user_urls) is missing a '='
+ (or a ',').
+
+ * htcommon/defaults.cc: removed vestigial references to MAX_MATCHES
+ template variables in search_results_{header,footer}.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+ * htdoc/hts_form.html: add disclaimer about keywords parameter not
+ being limited to meta keywords.
+
+ * htdoc/meta.html: add description of "keywords" meta tag property.
+ add links to keywords_factor & meta_description_factor attributes.
+
+1999-11-10 Toivo Pedaste <toivo at ucs.uwa.edu.au>
+
+ * htdig/Retriever.cc : Ignore SIGPIPEs with persistant connections
+
+ * htnet/HtHTTP.cc : Fix buffer overrun reading chunks
+
+ * htdig/Document.cc : Make redirects work
+
+ * htdig/Retriever.cc : Make valid URL checks apply to initial URL's
+ particularly those from a previous run
+
+ * htlib/Dictionary.cc : Fix memory deallocation error
+
+
+Tue Nov 02 13:44:57 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htsearch/Display.cc (setVariables): parentheses missing around ternary
+ operator : confusion in priority with <<.
+
+Tue Nov 02 13:33:50 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htsearch/Display.cc (hilight): changed static char * (!!) to const string,
+ static char evaluated before configuration is loaded so config had no
+ effect + unnecesary conversion
+
+Tue Nov 02 11:45:49 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/WordKey.cc : Cleaned up obsolete code now using *InSortOrder fcts
+ and WordKeyInfo.sort[]
+ * htword/WordKey : Added FirstSkipField :
+ find first field that must be checked for skip
+ * htword/WordKey (PrefixOnly): now returns OK/NOTOK, fixed bug which
+ made Walk loop over the whole db if the searchkey just had
+ a the "word" field defined
+ * htword/WordKey.cc (Unpack): had forgten to: SetDefinedWordSuffix
+ * htword/WordKey.cc (operator >>): added check for very very long words
+ (even if this should never happen)
+ * htword/WordKey.cc (operators << >>): added <UNDEF> word suffix handling
+ * htword/WordKey.h : Filled() did not check for WordSuffix
+ * htword/WordKey.h : added WordKey::ExactEqual
+ * htword/WordKey.h (IsDefinedWordSuffix): fixed bad flag check
+ * htword/WordList : Removed all obsolete HTDIG_WORDLIST flags: only
+ two remain : COLLECTOR and WALKER the rest is now specified by the searchKey
+ removed action arg to WordList::Collect()
+ * htcommon/HtWordList.cc,htmerge/words.cc : changed flags in calls to WordList::Walk
+ * htword/WordList.cc : skip now deals with the SuffixUndefined case
+
+Fri Oct 29 17:13:21 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/cf_generate.pl: now updates last modified date in attrs.html
+ * htdoc/attrs.html: reran cf_generate.pl
+
+Fri Oct 29 15:28:22 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setVariables & hilight): added Sergey's idea
+ for start_highlight, end_highlight & page_number_separator attributes.
+ * htcommon/defaults.cc: added & documented these.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Thu Oct 28 13:06:23 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/ExternalParser.cc: added support for external converters
+ as extension to external_parsers attribute.
+ * htcommon/defaults.cc: Updated external_parsers with new description
+ and examples of external converters.
+
+Thu Oct 28 12:52:28 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: Updated programs lists for *_factor, so they
+ all refer to htsearch and not htdig. Added htsearch to programs lists
+ for translate_*. img_alt_factor & url_factor not defined yet because
+ they're still not used in htdig/htsearch.
+
+Wed Oct 27 15:53:36 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: added descriptions & examples for
+ doc_excerpt, heading_factor, max_descriptions, minimum_speling_length,
+ regex_max_words, use_doc_date, valid_extensions. Added references
+ to these elsewhere in document as appropriate. Removed -pairs option
+ from pdf_parser default (again). Minor changes to noindex_start & end,
+ and changed example for modification_time_is_now. Corrected references
+ to heading_factor_[1-6].
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Wed Oct 27 13:32:50 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/cf_generate.pl: changed formatting of output to more closely
+ match format of old attrs.html (to make diff'ing easier),
+ and fixed handling of pdf_parser default to strip quotes.
+ * htcommon/defaults.cc: oops, fixed typo in url_part_aliases example.
+ * htdoc/attrs.html, cf_by{name,prog}.html: reran cf_generate.pl
+
+Wed Oct 27 18:24:36 1999 Loic Dachary <loic at ceic.com>
+
+ * htdoc/cf_generate.pl: fixed wrong target for cf_byprog, escape
+ HTML chars <>&'" for default values.
+
+Wed Oct 27 10:21:18 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: restored 2nd example for url_part_aliases
+
+Tue Oct 26 16:28:29 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc: corrected descriptions for allow_in_form,
+ search_results_header, noindex_start, noindex_end. Also fixed a
+ few small typos & formatting errors here & there in descriptions
+ and examples.
+
+Tue Oct 26 16:01:22 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/Makefile.am: rm Wordkey.h instead of chmod to copy with
+ non existent WordKey.h
+
+Tue Oct 26 10:54:52 1999 Loic Dachary <loic at ceic.com>
+
+ * htcommon/default.cc: fixed all inconsistencies reported by Gilles.
+
+Mon Oct 25 11:42:13 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/ word.cc,t_wordskip,skip_db.txt: Added test for *Skip Speedup*
+ * htword/ WordList: Added tracing of Walk() for debuging purposes
+
+Fri Oct 22 18:22:00 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/ WordList.cc,WordKey: Added a defined/undefined flag for saying
+ if a search key's word is a prefix or not: WORD_KEY_WORDSUFFIX_DEFINED
+ reduces code size and makes it much easier to undertand
+ * htword/ WordList,WordReference,WordKey: Added input output streams for
+ WordList,WordReference,WordKey
+
+Wed Oct 20 16:47:52 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/ WordKey,Makefile.am,WordCaseIsAStatements.h: for readability
+ replaced the switch ... #ifdef ..STATEMENT().... sequence that apeared many times
+ with an include file :WordCaseIsAStatements.h
+
+ * htword/ WordKey: WordKeyInfo: duplicated all of the fields structure into
+ sort structure, for fast acces without cross referencing and for simplifying code
+ (required change of perl in template WordKey.h.tmpl)
+
+ * htword/ WordList: *Skip Speedup* added a speedup to avoid wasting time
+ by sequentialy walking through useless entries. see function:
+ SkipUselessSequentialWalking() for an example and more info
+
+ * htword/ WordKey.h,WordKey.cc: Changed Set,Unset,IsSet Wordkey accesors' names to:
+ SetDefined,Undefined,IsDefined. (easier to read and avoids naming conflicts)
+
+ * htword/ WordKey: added generic numerical accesors for accesing
+ numerical fields in WordKey (in sorted order):GetInSortOrder,SetInSortOrder
+
+ * htword/ WordKey,word_builder.pl: added a MAX_NFIELDS constant, that specifies
+ a maximum number of fields that a WordKey can have. Sanity check in word_builder.pl.
+
+ * htword/ word_builder.pl: enforced word sort order to ascending
+
+ * htword/ WordList: added a verbose flag using config."wordlist_verbose"
+
+Tue Oct 19 18:36:42 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordType.h: const accessors to wtype and config
+
+Tue Oct 19 13:10:47 1999 Loic Dachary <loic at ceic.com>
+
+ * acconfig.h: remove uncessary VERSION (redundant)
+
+Tue Oct 19 11:32:38 1999 Loic Dachary <loic at ceic.com>
+
+ * db/Makefile.in,db/dist/Makefile.in: install db library so
+ that external applications can be linked.
+
+Tue Oct 19 10:57:27 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: add --with-key to specify alternate to htword/word.desc
+
+ * configure.in: htword is done before htcommon to prevent unecessary
+ recompilation because WordKey.h changes.
+
+ * htword/Makefile.am: use @KEYDESC@
+
+Tue Oct 19 10:38:41 1999 Loic Dachary <loic at ceic.com>
+
+ * test/word.cc use TypeA instead of DocID and the like
+
+Mon Oct 18 17:21:34 1999 Loic Dachary <loic at ceic.com>
+
+ * Makefile.config: AUTOMAKE_OPTIONS = foreign
+
+Mon Oct 18 11:40:17 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htword/ WordList.cc (Walk): fixed bug in Walk: if flag HTDIG_WORDLIST was set
+ then data was uninitialized in loop
+
+Fri Oct 15 18:52:03 1999 Marcel Bosc <bosc at ceic.com>
+
+ * htdig/Document.h (class Document): added const to:
+ Transport::DocStatus RetrieveLocal(HtDateTime date, const String filename);
+
+Fri Oct 15 17:46:23 1999 Loic Dachary <loic at ceic.com>
+
+ * acinclude.m4,configure.in: modified AC_APACHE_PROG to detect
+ version number and control it.
+
+ * test/conf/*.in: patch to fit module loading or not, accomodate
+ various installation configurations.
+
+ * test/test_functions.in: More portable call to apache.
+
+Fri Oct 15 12:55:47 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Document: added the management of 'persistent_connections',
+ 'head_before_get', 'max_retries' configuration attributes.
+
+Fri Oct 15 12:54:11 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * test/testnet.cc: added the option '-m' for setting the max size
+ of the document.
+
+Fri Oct 15 12:48:49 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Server: added a flag for persistent connections.
+ It's set to true if the Server allows persistent connections.
+ It should be used when retrieving a document.
+
+Fri Oct 15 12:45:42 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * defaults.cc: added the configuration attributes 'persistent_connections',
+ 'max_retries' and 'head_before_get'. Their default values are
+ respectively true, 3, false.
+
+Fri Oct 15 12:35:51 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc: managing of uncompleted stream reading with persistent
+ connections (it occurs when max_doc_size is lower than the real
+ content length of the document, or when a document is not parsable
+ and we asked for it with a GET call).
+
+ * Transport: _host variable is treated as a String, as Loic suggested.
+
+Fri Oct 15 12:11:23 1999 Marcel Bosc <bosc at ceic.com>
+
+ * Added README to htword
+
+Thu Oct 14 11:29:35 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/mktime.c, htlib/regex.c, htlib/regex.h, htlib/strptime.c:
+ Updated with latest glibc versions. Merging from glibc sources may
+ have introduced bugs, so this is the last merge before htdig-3.2.0b1.
+
+Thu Oct 14 13:09:32 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/Transport: added statistics for open and close of connections
+ and changes of servers.
+ Fixed a bug in the SetConnection method, regarding the host comparison.
+ Added a method for showing the statistics on a given channel.
+
+ * htnet/HtHTTP: More debug info available.
+ Added a method for showing the statistics on a given channel.
+
+ * test/testnet.cc: now receives changes above.
+
+Wed Oct 13 13:35:42 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Document.h: added an HtHTTP pointer to the class.
+
+ * htdig/Document.cc: Transport and HtHTTP initialization methods
+ inside the Document constructur. The class destructor now calls
+ only the HtHTTP destructor (not the Transport destructor).
+ Modified the Retrieve method.
+
+ * htdig/Server.h: _last_connection is now an HtDateTime object.
+
+ * htdig/Server.cc: _modified the constructor and the delay method.
+
+ * htdig/Retriever.cc: modified the parse_url function in order to manage
+ all the Document status messages coming from the Transport class.
+ Also modified the method for not found URLs for managing the no_port
+ status.
+
+Tue Oct 12 10:12:10 1999 Loic Dachary <loic at ceic.com>
+
+ * install headers and libraries so that htdig libraries may be used by external programs
+
+ * htword/WordList.cc,WordType.cc: add comments about config parameters used.
+
+Fri Oct 8 09:35:30 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.cc (SetFTime): Change buffer argument to const
+ char* to prevent problems passing in const buffers.
+
+ * htnet/HtHTTP.h: Change SetUserAgent to take a const char* to
+ prevent problems passing in const parameters.
+
+ * htdig/Document.h, htdig/Document.cc(): Use Transport class for
+ obtaining documents. Remove duplication of declarations
+ (e.g. DocStatus).
+
+ * htdig/Retriever.cc: Adapt switch statements from
+ Document::DocStatus to Transport::DocStatus.
+
+ * htdig/Server.cc: Use Document::Retrieve instead of RetrieveHTTP.
+
+Fri Oct 08 16:35:16 1999 Loic Dachary <loic at ceic.com>
+
+ * test/t_htnet: succeed if timeout occurs. It was the opposite.
+
+ * configure.in: AC_MSG_CHECKING(how to call getpeername?) add missing
+ comma at end for header spec block.
+
+Fri Oct 08 14:42:47 1999 Loic Dachary <loic at ceic.com>
+
+ * Fix all warnings reported by gcc-2.95.1 related to string
+ cast to char*.
+
+Fri Oct 08 14:04:21 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * htlib/Configuration,ParsedString,Dictionary: change char* to String
+ where possible.
+
+ * Fix a lot of warnings reported by gcc-2.95.1 related to string
+ cast to char*.
+
+ * Completely disable exception code from db.
+
+Fri Oct 08 13:44:32 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc: fixed a little bug in setting the modification time
+ if not returned by the server.
+
+Fri Oct 08 11:30:53 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc: better management of connection failures return values.
+ * Transport.h: added Document_no_connection and
+ Document_connection_no_port enum values.
+ * testnet.cc: management of above changes.
+
+Fri Oct 08 11:27:31 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * configure.in: modified getpeername() test.
+
+Fri Oct 08 10:28:15 1999 Loic Dachary <loic at ceic.com>
+
+ * htdig/Retriever.cc (IsValidURL): test return value of
+ ext = strrchr(url, '.');
+
+ * htword/WordRecord.h: initialize info member to 0 in constructor and
+ Clear.
+
+ * htlib/Configuration: char* -> String to all functions. Resolve
+ warnings.
+
+Thu Oct 07 16:19:46 1999 Loic Dachary <loic at ceic.com>
+
+ * htnet/HtHTTP.cc (ReadChunkedBody): use append instead of
+ << because buffer is *not* null terminated.
+
+ * htnet/Transport.cc (Transport): initialize _port and _max_document_size
+ otherwise comparison with undefined value occurs.
+
+Thu Oct 07 16:34:21 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc: call FinishRequest everytime in HTTPRequest() a value is
+ returned.
+ * testnet.cc: improved with more statistics and connections timeouts
+ control.
+
+Thu Oct 07 12:53:12 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * configure.in: modified getpeername() test function with
+ AC_LANG_CPLUSPLUS instead of AC_LANG_C.
+
+Thu Oct 07 11:56:52 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc : fixed bug of double deleting _access_time
+ and _modification_time objects in ~HtHTTP().
+
+Thu Oct 07 10:17:22 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordRecord.h: change (const char*) cast to (char*)
+
+ * htword/WordKey.h.tmp: fix constness of accessors, const accessor
+ returns const ref. Prevents unecessary copies.
+
+Wed Oct 6 23:31:50 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnet/Connection.h, htnet/Connection.cc: Merge in io
+ class. Connection class was the only subclass of io.
+
+ * Makefile.in, htlib/Makefile.am, htlib/Makefile.in: Update for
+ removed io class.
+
+ * htdig/ExternalParser.cc: Add more verbose flags for errors.
+
+Wed Oct 06 14:56:34 1999 Loic Dachary <loic at ceic.com>
+
+ * htnet/Connection.cc (assign_server): use free, not delete
+ on strdup allocated memory.
+
+ * htcommon/URL.cc (URL): set _port to 0 in constructors.
+
+Wed Oct 06 12:08:38 1999 Loic Dachary <loic at ceic.com>
+
+ * Move htlib/HtSGMLCodec.* to htcommon to prevent
+ crossed interdependencies between htlib and htcommon
+
+Wed Oct 06 12:07:32 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP.cc: patch from Michal Hirohama regarding
+ the SetBodyReadingController() method
+
+Wed Oct 06 11:49:15 1999 Loic Dachary <loic at ceic.com>
+
+ * Move htlib/HtZlibCodec.* htlib/cgi.* to htcommon to prevent
+ crossed interdependencies between htlib and htcommon
+
+Wed Oct 06 11:40:48 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * HtHTTP: stores the server info correctly and removed some debug info
+ in chunk managing
+
+Wed Oct 06 11:39:12 1999 Loic Dachary <loic at ceic.com>
+
+ * Move htlib/*URL* to htcommon
+
+Wed Oct 06 10:09:19 1999 Loic Dachary <loic at ceic.com>
+
+ * README: add htword
+
+ * test/t_htnet: fix variable set problem & return code problem
+
+Wed Oct 06 08:53:52 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * Written t_htnet test
+
+Tue Oct 5 12:24:43 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * db/*: Import of Sleepycat's Berkeley DB 2.7.7.
+
+ * db/db/db.c, db/include/db.h, db/include/db_cxx.h, db/mp/mp_bh.c:
+ Resolve conflicts created in merge.
+
+Tue Oct 05 18:53:13 1999 Loic Dachary <loic at ceic.com>
+
+ * htdig/Display.cc, htword/*.cc: add inclusion of htconfig.h
+
+Tue Oct 05 14:54:17 1999 Loic Dachary <loic at ceic.com>
+
+ * htlib/htString.h (class String): add set(char*)
+
+ * htword/WordKey.cc: define typedefs for key components. Leads to more
+ regular code and no dependency on a predefined set of known types.
+ All types must still be castable to unsigned int.
+ Assume Word of type String always exists.
+ Generic Get/Set/Unset methods made simpler. Added const and ref
+ for Get in both forms.
+
+ * htword/WordList.cc: enable word reference counting only if wordlist_extend
+ configuration parameter is set. This parameter is hidden because
+ no code uses per word statistics at present. It is only activated
+ in the test directory.
+
+ * htword/word_list.pl: add mapping to symbolic type names,
+ force and check to have exactly one String field named Word.
+
+Mon Oct 04 20:05:35 1999 Loic Dachary <loic at ceic.com>
+
+ * test: add thingies to make test work when doing ./configure
+ outside the source directory.
+
+ * htword/WordList: Add Ref and Unref to update statistics.
+ Fix walking to start from the end of statistics. All statistics
+ words start with \001, therefore at the beginning of the file and
+ all clustered together.
+
+ * htword/WordStat: derived from WordReference to implement
+ uniq word statistics.
+
+ * test/word.cc: test statistics updating.
+
+ * htword/WordKey.cc: fix bugous compare (returned length diff
+ if key of different length).
+
+Mon Oct 04 18:43:56 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * test/testnet.cc: added the option for HEAD before GET control
+
+Mon Oct 04 17:33:24 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/Transport.h .cc: added the FlushConnection() method
+
+ * htnet/HtHTTP.h .cc: now the Request() method can make a HEAD
+ request precede a GET request. This is made by default, and
+ can be changed by using the methods Enable/DisableHeadBeforeGet().
+ A configuration option can be raised to manage it.
+
+Mon Oct 04 12:43:41 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htlib/io.h .cc: added a flush() method.
+
+ * htnet/HtHTTP.cc: manage the chunk correctly, by calling the flush()
+ method after reading it.
+
+Mon Oct 04 12:02:24 1999 Loic Dachary <loic at ceic.com>
+
+ * htlib/htString.h: move null outside inline operator [] functions.
+
+Fri Oct 01 14:55:56 1999 Loic Dachary <loic at ceic.com>
+
+ * htword/WordRecord: mutable, can also contain uniq word statistics.
+
+ * htword/WordReference: remove all dependencies related to the actual
+ structure of the key.
+
+ * htcommon/HtWordReference: derived from WordReference, explicit
+ accessors.
+
+ * htcommon/HtWordList: derived from WordList, only handles the
+ word cache (Flush, MarkGone).
+
+ * htdig/HTML.cc (do_tag): add wordindex to have location set in
+ tags
+
+ * htcommon/DocumentRef.cc (AddDescription): add Location calculation
+
+ * htword/WordList.cc: add dberror to map Berkeley DB error codes
+
+ * htsearch/Display.cc (display): initialize good_sort to get rid
+ of strange warning.
+
+Fri Oct 01 09:02:11 1999 Loic Dachary <loic at ceic.com>
+
+ * Makefile.config: duplicate library lines to resolve
+ interdependencies.
+
+Thu Sep 30 17:56:55 1999 Loic Dachary <loic at ceic.com>
+
+ * htmerge/words.cc (delete_word): Upgrade to use WordCursor.
+
+ * htword/WordList: Walk now uses a local WordCursor. Many concurent
+ Walk can happen at the same time.
+
+ * htword/WordList: Walk callback now take the current WordCursor.
+ Added a Delete method that takes the WordCursor. Allows to delete
+ the current record while walking.
+
+ * db/include/db_cxx.h (DB_ENV): add int return type to operator =
+
+ * db/dist/configure.in (CXXFLAGS): disable adding obsolete
+ g++ option.
+
+ * configure.in: enable C++ support when configuring Berkeley DB
+
+ * htword: create. move Word* from htcommon. move HtWordType
+ from htlib and rename WordType.
+
+ * htword/WordList: use db_cxx interface instead of Database.
+ Less interface overhead. Get access to full capabilities of
+ Berkeley DB. Much more error checking done.
+ Create WordCursor private class to use String instead of Dbt.
+
+Wed Sep 29 20:03:31 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * htlib/lib.h: AIX xlC does is confused by overloaded mystrcasestr
+ that only differ in constness. Only keep const form and use cast
+ where approriate. *sigh*
+
+ * htlib/htString.h: accomodate new form of Object::compare and
+ Copy. Explicitly convert compare arg to String&, prevent hiding
+ and therefore missing the underlying compare function.
+
+ * htlib/HtVector.cc (Copy): make it const
+
+ * htlib/HtHeap.cc: accomodate new form of Object::compare
+
+ * htcommon/List.h,cc: Add ListCursor to allow many pointers that
+ walk the list to exist in the same program.
+
+ * htlib/Object.h (class Object): kill unused Serialize + Deserialize.
+ Change unused Copy to const and bark on stderr if called because it
+ is clearly not was is wanted. If Copy is called and the derived class
+ does not implement Copy we are in trouble. Alternatives are to make
+ it pure virtual but it will break things all over the code or to abort
+ but this will be considered to violent. Change compare to take a
+ const reference and be a const.
+
+Wed Sep 29 16:51:58 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * acinclude.m4,configure.in,Makefile.config: remove -Wall from
+ Makefile.conf, add the AC_COMPILE_WARNINGS macro in acinclude.m4
+ and use it in configure.in.
+
+ * htdoc/default_check.pl: remove, unused
+
+Wed Sep 29 13:07:58 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/Transport: fixed some bugs on construction and destruction
+
+ * htnet/HtHTTP: the most important add is the decoding of chunked
+ encoded responses, as reported on RFC2616 (HTTP/1.1). It needs
+ to be developed, because it timeouts at the end of the request.
+ Added a function pointer in order to dynamically handle the function
+ that reads the body of a response (for now, normal and chunked, but
+ other encoding ways exist, so ...). Fixed some bugs on construction
+ and added some features like Server and Transfer-encoding headers.
+
+Wed Sep 29 13:54:59 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * fix all inline method declarations so that they are always declared
+ inline in the class declaration if an inline definition follows.
+
+ * acinclude.m4: also search apache in /usr/local/apache/bin by default.
+
+ * fix various warnings of gcc-2.95, now compiles ok without warnings
+ and with -Wall.
+
+ * htlib/htString.h: removed commented out inline get
+
+ * test/testnet.cc: add includes for optarg
+
+Tue Sep 28 18:56:36 1999 Loic Dachary <loic at ceic.com>
+
+ * Makefile.config (HTLIBS): libhtnet at the beginning of the list. It
+ matters on Solaris-2.6 for instance.
+
+ * test/testnet.cc: change times to timesvar to avoid conflict with
+ function (was warning only on Solaris-2.6).
+
+ * htdig,htsearch,htmerge,test/word are purify clean when running
+ make check.
+
+Tue Sep 28 18:23:49 1999 Loic Dachary <loic at ceic.com>
+
+ * htmerge/words.cc (mergeWords): use WordList::Walk to avoid loading ALL
+ the words into memory.
+
+ * htlib/DB2_db.cc (Open): we don't want duplicates. Big mistake. If DUP is
+ on, every put for update will insert a new entry.
+
+ * htcommon/WordList.cc (Delete): separate Delete (straight Delete and WalkDelete)
+ to avoid accessing dbf from outside WordList.
+
+ * htcommon/WordList.cc (Walk): now promoted to public.
+
+Tue Sep 28 16:34:56 1999 Loic Dachary <loic at ceic.com>
+
+ * test/word.cc (dolist): Add regression tests for Delete.
+
+ * htcommon/WordList.cc (Delete): Reimplement from scratch. Use Walk
+ to find records to delete. This allows to say delete all occurence
+ of this word, delete all words in this document (slow), delete
+ all occurences of this word in this document etc.
+
+ * htcommon/WordList.cc (Walk): extend so that it handles walk for
+ partially specified keys, remains fully backward compatible. It allows
+ to extract all the words in a specific document (slow) or all occurences
+ of a word in a specific document etc.
+
+Tue Sep 28 12:56:12 1999 Loic Dachary <loic at ceic.com>
+
+ * htcommon/DocumentDB.cc (Open): report errors on stderr
+
+ * htmerge/docs.cc (convertDocs): rely on error reporting from DocumentDB
+ instead of implementing a custom one.
+
+Tue Sep 28 11:36:28 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htnet/Transport.h: added the status code and the reason phrase
+
+ * htnet/HtHTTP.cc .h: removed the attributes above.
+ Read the body of a response if the code is 2xx. Issues the
+ GetLocation() method.
+
+Tue Sep 28 10:32:47 1999 Loic Dachary <loic at ceic.com>
+
+ * test/htdocs/set3: create and populate with cgi scripts have
+ bad behaviour (time out and, slow connection).
+
+Tue Sep 28 10:20:23 1999 Loic Dachary <loic at ceic.com>
+
+ * test/htdocs: move html files in set1/set2 subdirectories to allows
+ tests that use different set of files. Change htdig.conf accordingly.
+
+Tue Sep 28 09:31:12 1999 Loic Dachary <loic at ceic.com>
+
+ * test/Makefile.am: comment test options, add LONG_TEST='y' for lengthy
+ tests, by default run quick tests.
+
+ * installdir/bad_words: removed it an of : since the minimum word
+ length is by default 3, these words are ignored anyway.
+
+Mon Sep 27 20:37:38 1999 Loic Dachary <loic at ceic.com>
+
+ * htlib/HtWordType.h,cc: concentrate knowledge about word definition in this
+ class. Rename the class WordType (think WordReference etc...). Change
+ Initialize to use an external default object. A WordType object may be
+ allocated on its own. Drag functionalities from BadWordFile, Replace and
+ IsValid of WordList, and concentrate them in the WordType::Normalize
+ function.
+
+ * htcommon/WordList: use the new WordList semantic. WordType is now a member
+ of WordList, opening the possibility to have many WordList object with different
+ configurations within the same program since the constructor takes
+
+ * htsearch/htsearch.cc (setupWords): Use HtNormalize to find out if word should
+ be ignored in query. Formerly using IsValid.
+
+ * htlib/String.cc (operator []): fix big mistake, operator [] was indeed last() !
+
+ * htlib/String.cc(uppercase, lowercase): return the number of converted chars.
+
+ * htlib/String.cc(remove): return the number of chars removed.
+
+Mon Sep 27 17:43:23 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * Created testnet.cc under test dir for trying the htnet library
+ It's a simple program that retrieves an URL.
+
+ * htnet/HtHTTP.cc, .h: added a 'int (*) (char *)' function pointer.
+ This attribute is static and it is used under the isParsable method
+ in order to determine if a document is parsable. It must be set
+ outside this class by using the SetParsingController static method.
+ The classic use is to set it to 'ExternalParser::canParse' .
+
+Mon Sep 27 10:52:51 1999 Loic Dachary <loic at ceic.com>
+
+ * htmerge/db.cc (mergeDB): delete words instead of words->Destroy()
+ because the words object itself was not freed.
+
+Mon Sep 27 10:38:37 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * Created 'htnet' library
+
+Mon Sep 27 12:39:24 1999 Loic Dachary <loic at ceic.com>
+
+ * test/word.cc (dolist): don't deal with upper case at present and prevent warning.
+
+Mon Sep 27 10:38:37 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htlib/String.cc: removed compiler warnings
+
+ * htdig/HtHTTP.h: corrected cvs Id property
+
+Mon Sep 27 10:29:58 1999 Loic Dachary <loic at ceic.com>
+
+ * htlib/String.cc (String): make sure *all* constructors set the Data
+ member to 0.
+
+ * htsearch/parser.cc (score): add missing dm->id = wr->DocID();
+ strange it did not make search fail horribly.
+
+Mon Sep 27 09:46:34 1999 Loic Dachary <loic at ceic.com>
+
+ * test/conf/htdig.conf.in (common_dir): add common_dir so that
+ templates are found in compile directory.
+
+ * htsearch/parser.cc (phrase): free wordList at end and only allocate if
+ needed.
+
+Fri Sep 24 16:35:47 1999 Loic Dachary <loic at ceic.com>
+
+ * htcommon/DocumentDB.ccf (Open): change mode to 666 instead of 664,
+ it's the bizness of umask to remove permission bits.
+
+ * htlib/URL.cc (removeIndex): Memory leak. do not use l.Release
+ since standard Destroy called by destructor is ok.
+
+ * htdig/htdig.cc (main): Memory leak. Use l.Destroy instead of
+ l.Release.
+
+ * htlib/StringList.cc (Join): Memory leak (new String str +
+ return *str). Also change to const fct.
+
+ * htlib/List.cc (Nth): add const version to help StringList::Join save
+ memory.
+
+ * htdig/HTML.cc (parse): delete [] text (was missing [])
+
+ * htlib/HtVector.cc: Most of the boundary tests with element_count
+ (but not all of them) were wrong (> instead of >= for instance).
+
+ * htlib/HtVector.cc (Previous): limit test cut and pasted from Next
+ and obviously completely wrong. Fix.
+
+ * htlib/HtVector.cc (Remove): use RemoveFrom, avoid code duplication.
+
+ * htcommon/DocumentRef.cc (Clear): set all numerical fields to 0,
+ and truncate strings to 0. Some were missing.
+
+ * htlib/Connection.cc (Connection): free(server_name) because allocated
+ by strdup not new.
+
+Fri Sep 24 14:30:21 1999 Loic Dachary <loic at ceic.com>
+
+ * */.cvsignore: update to include .pure, *.la, *.lo, .purify
+
+ * htlib/String.cc (String): add Data = 0
+
+ * htlib/htString.h (class String): add Data = 0
+
+ * htlib/String.cc (String): init set to MinimumAllocationSize at least
+ prevents leaking if init = 0.
+
+ * htlib/String.cc (nocase_compare): use get() instead of direct
+ pointer to Data so that the trailing null will be added.
+
+ * htlib/Dictionary.cc (DictionaryEntry): free(key) instead of
+ delete [] key because obtained with strdup.
+
+ * htlib/DB2_db.cc (Close): free(dbenv) because db_appexit does not
+ free this although it free everything else.
+
+Thu Sep 23 18:18:40 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: add PERL detection & use in Makefile.am
+
+Thu Sep 23 14:29:29 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: removed unused alloca.h
+
+ * htcommon/DocumentDB.cc: test isopen in Close instead of before calling Close.
+ Add some const in functions arguments.
+ (Read): change char* args to const String&, changed tests for null pointers to
+ empty().
+ (Add): Delete the temp class member, use function local temp.
+ (operator []): change char* args to const String&
+ (CreateSearchDB): change char* args to const String&
+
+ * htcommon/DocumentRef.cc:(AddDescription): Add some const in functions arguments.
+ Use a WordReference as insertion context instead of merely the docid: it contains
+ the insertion context.
+ (AddAnchor): Add some const in functions arguments.
+
+ * htcommon/DocumentRef.h: Add some const in inline functions arguments.
+
+ * htcommon/Makefile.am: add WordKey + WordKey.h generation
+
+ * htcommon/word_builder.pl, word.desc, WordKey.h.tmpl: generate WordKey.h from WordKey.h.tmpl and
+ word.desc
+
+ * htcommon/WordList.cc: In general remove code that belongs to WordReference rather
+ than WordList and cleanup const + String.
+ (WordList) the constructor takes a Configuration object in argument.
+ (Word -> Replace): Word method replaced by Replace method because more explicit. Now
+ taks a WordReference in argument instead of the list of fields values.
+ (valid_word deleted, IsValid only): Add some const in functions arguments.
+ (BadWordFile): change char* args to const String&
+ (Open + Read -> Open): Open and Read merge into Open with mode argument. change char* args
+ to const String&.
+ (Add): use WordReference::Pack and simply do Put.
+ (operator[], Prefix ...) now take WordReference instead of Word. Autmatic Conversion from
+ Word for compatibility thru WordReference(const Word& w).
+ (Dump): change char* args to const String&
+ (Walk): use WordReference member functions instead of hard coded packing
+
+ * htcommon/WordRecord.h: move flag definitions to WordReference.h
+ only keep anchor, the reste moved to key.
+
+ * htdig/Document.cc: change all config[""] manipulations from char* to String
+ or const String
+ (setUsernamePassword): Add some const in functions arguments.
+
+ * htdig/HTML.cc: change all config[""] manipulations from char* to String
+ or const String. Change null pointer tests to empty().
+ (transSGML): change char* args to const String&
+
+ * htdig/HtHTTP.cc: Add error messages for default cases in every switch.
+
+ * htdig/PDF.cc: (parse) change char* to const String& for config[""]
+
+ * htdig/Plaintext.cc: (parse) remove unused variable
+
+ * htdig/Retriever.cc: use WordReference word_context instead of simple docid
+ to hold the insertion context.
+ (Retriever) pass config to WordList initializer.
+ (setUsernamePassword): Add some const in functions arguments.
+ (Initial): change char* args to const String&
+ (parse_url): use WordReference word_context, add debug information.
+ (RetrievedDocument): set anchor in word_context.
+ (got_word): use Replace instead of Word
+ (got_*): Add some const in functions arguments.
+
+ * htdig/htdig.cc: change all config[""] manipulations from char* to String
+
+ * htdoc/cf_generate.pl: compute attrs.html, cf_byprog.html and cf_byname.html from
+ ../htlib/default.cc and attrs_head.html attrs_tail.html cf_byname_head.html cf_byname_tail.html
+ cf_byprog_head.html cf_byprog_tail.html
+ Add rules in Makefile.am
+
+ * htfuzzy: In every programs I changed the constructor to take a
+ Configuration agrument. The openIndex and writeDB had this
+ argument sometime used it, sometimes used the global
+ config. Having it in the contructor is cleaner and safer, there
+ is no more reference to the global config. I also changed some
+ char* to String and const. Most of the program look the same, I
+ won't go into details here :-}
+
+ * htlib/Configuration.cc: changed separators from String* to String. Simpler.
+ (~Configuration): removed because not needed.
+ (Add): change to String, remove new String + delete for local var.
+ (Find, operator[]): make it const fct, add some const in functions arguments.
+ (Value + Double): killed, replaced by as_integer + as_double from String
+ (Boolean): use String methods + string objects
+ (Defaults): Add some const in functions arguments.
+
+ * htlib/Configuration.h: add
+ char *type; // Type of the value (string, integer, boolean)
+ char *programs; // White separated list of programs/modules using this attribute
+ char *example; // Example usage of the attribute (HTML)
+ char *description; // Long description of the attribute (HTML)
+ to the ConfigDefaults type.
+
+ * htlib/Connection.cc: (assign_server) change char* args to const String&
+
+ * htlib/DB2_db.cc: Merge with DB2_hash.
+ Add compare and prefix functions pointers.
+ Merge OpenRead & OpenReadWrite into Open, keep for compatibility.
+ skey and data are now strings instead of DBT.
+ Remove Get_Next_Seq.
+ Get_Next now returns key and value in arguments.
+ Remove all other Get_Next interfaces.
+
+ * htlib/Database.h:
+ Compatibility functions for Get_Next
+ Put, Get, Exists, Delete take String args and are inline
+ Add SetPrefix and SetCompare
+
+ * htlib/Dictionary.cc:
+ Add copy constructor.
+ Add DictionaryCursor that holds the traversal context.
+ Use DictionaryCursor object for traversal without explicit
+ cursor specified.
+ Add constness where meaningfull.
+
+ * htlib/HtPack.cc:
+ (htPack) format is const, change strtol call
+ to use temporary variable to cope with constness.
+ (htUnpack) dataref argument is not a reference anymore. Not used
+ anywhere and kind of hidden argument nobody wants.
+
+ * htlib/HtRegex.cc: set, match, HtRegex have const args.
+
+ * htlib/HtWordCodec.cc: (code) orig is const
+
+ * htlib/HtWordType.cc,h: statics is made of String instead of char*. Remove
+ static String punct_and_extra from Initialize.
+
+ * htlib/HtZlibCodec.cc: len is unsigned int
+
+ * htlib/ParsedString.cc: add constness to function args
+ (get) use String instead of char
+
+ * htlib/QuotedStringList.cc: inline functions argument variations and
+ add constness.
+
+ * htlib/String.cc: add constness whereever possible.
+
+ * htlib/htString.h: Add const get, char* cast, operator [].
+ Add as_double conversion.
+
+ * htlib/StringList.cc: inline functions argument variations and
+ add constness.
+
+ * htlib/StringMatch.cc: add constness to function args.
+
+ * htlib/URL.cc: add constness to function args.
+ (URL): fct arg was used as temp. Change, clearer.
+
+ * htlib/lib.h: add const declaration of string manipulation functions.
+ Two forms for mystrcasestsr: const and not const.
+
+ * htlib/strcasecmp.cc: add constness to function args.
+
+ * htlib/timegm.c: add declaration for __mktime_internal
+
+ * htmerge/db.cc: change *doc* vars from char* to const String, use
+ new WordList + WordReference interface.
+
+ * htmerge/docs.cc: change *doc* vars from char* to const String.
+
+ * htmerge/words.cc: use new WordList + WordReference interface.
+
+ * htsearch/Display.cc: use empty method on String where appropriate.
+ use String instead of char* where config[""] used.
+ (includeURL): change char* args to const String&
+
+ * htsearch/ResultMatch.cc: (setTitle, setSortType) change char* args to const String&
+
+ * htsearch/Template.cc: (createFromFile) change char* args to const String&
+
+ * htsearch/Template.h: accessors return const String& or take const char*
+
+ * htsearch/TemplateList.cc: (get) use const String for internalNames.
+
+ * htsearch/htsearch.cc: use String instead of char* where config[""] used.
+
+ * htsearch/parser.cc: Initialize WordList member with config global.
+ (perform_push): free the result list after calling score.
+ (score, phrase): use new WordList + WordReference interface.
+
+Thu Sep 23 14:29:29 1999 Loic Dachary <loic at ceic.com>
+
+ * htcommon/WordKey.h.tmpl, WordKey.cc: new, describe the key of the word
+ database.
+
+ * htcommon/word.desc: new, abstract description of the key structure of the word
+ database.
+
+ * htcommon/word_builder.pl: new, generate WordKey.h from WordKey.h.tmpl
+
+ * htcommon/WordReference.cc: move key manipulation to WordKey.cc
+ Add Unpack/Pack functions. Add accessors for fields and move fields to private.
+ Add constness where possible.
+
+Mon Sep 20 14:50:47 1999 Loic Dachary <loic at ceic.com>
+
+ * Everywhere config["string"] is used, check that it's *not* converted to
+ char* for later use. Keep String object so that there is no chance to
+ use a char* that has been deallocated. Using a String as return for config["string"]
+ is also *much* safer for the great number of calls that did not check for a possible
+ 0 pointer return.
+
+ * htfuzzy/*.{cc,h}: const Configuration& config member. Constructor sets it.
+ Remove config argument from openIndex & writeDB. The idea (as it was initialy,
+ I guess) is to be able to have a standalone fuzzy library using a specify
+ configuration file. It is now possible and consistent.
+
+ * htlib/htString.cc: more constness where appropriate. Changed compare
+ to have const String& arg instead of const Object* because useless and
+ potential source of bugous code.
+
+ * htfuzzy/Regex.cc (getWords): fix bugous setting of extra_word_chars
+ configuration value. It is set to change the behaviour of HtStripPunctuation
+ but this function get the extra_word_chars from a static array initialized
+ at program start by static void Initialize(Configuration & config). Use straight
+ s.remove() instead. Besides, the string was anchored by prepending a ^ that
+ was removed because part of the reserved chars.
+
+Mon Sep 20 11:47:05 1999 Loic Dachary <loic at ceic.com>
+
+ * htlib/Configuration.cc (operator []): changed return type to String
+ to solve memory leak. When char* the string was malloced from ParsedString
+ after substitution and never freed. In fact it was even worse : it was
+ free before use in some cases.
+
+Sun Sep 19 19:12:44 1999 Loic Dachary <loic at ceic.com>
+
+ * htdoc/cf_generate.pl, htcommon/defaults.cc, htlib/Configuration.h:
+ Change the structure of the configuration defaults. Move
+ description, examples, types, used_by information from attrs.html.
+ Write cf_generate.pl to build attrs.html, cf_byname, cf_byprog
+ from defaults.cc. Makes it easier to maintain an up to date
+ description of existing attributes. About 10 attributes existed
+ in defaults.cc and were not describted in the HTML pages.
+ Add rules in htdoc/Makefile.am to generate the pages if a source
+ changes.
+
+Fri Sep 17 19:34:48 1999 Loic Dachary <loic at ceic.com>
+
+ * Makefile.config: add -Wall to all compilation and fix
+ all resulting warnings.
+
+ * htlib/Connection.cc (assign_server): remove redundant test
+ and cast litteral value to unsigned
+
+ * htlib/String.cc: add const qualifier where possible. Helps
+ dealing with const objects at an upper level.
+
+Fri Sep 17 18:27:57 1999 Alexander Bergolth <leo at leo.wu-wien.ac.at>
+
+ A few changes so that it compiles with xlC on AIX:
+
+ * configure.in, include/htconfig.h.in: Add check for sys/select.h.
+ Add "long unsigned int" to the possible getpeername_length types.
+
+ * htdig/htdig.cc: Moved variable declaration out of case block.
+
+ * htlib/Connection.cc: Include sys/select.h.
+
+ * htcommon/WordList.cc: just a type cast
+
+ * htlib/regex.c: define true and false only if they aren't already
+
+ * htdig/Transport.{h,cc}: removed inline keywords (inline functions
+ have to be defined and declared simultaneously)
+
+ * htlib/{mktime.c,regex.h,strptime.c,timegm.c}: change // comments
+ to /* ... */
+
+Tue Sep 14 01:15:48 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/db.cc: Rewrite to use the WordList functions to merge
+ the two word databases. Also make sure to load the document
+ excerpt when adding in DocumentRefs.
+
+ * htmerge/docs.cc: Fix bug where ids were not added to the discard
+ list correctly.
+
+ * htmerge/words.cc: Fix bug where ids were not checked for
+ existance in the discard list correctly.
+
+Sun Sep 12 12:27:16 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Remove word_list since that file is no
+ longer used.
+
+ * htdig/htdig.cc: Ensure -a and -i are followed for the word_db
+ file. Fixes PR #638.
+
+Sat Sep 11 00:11:28 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/StringMatch.h: Add back mistakenly deleted #ifndef/#define.
+
+Fri Sep 10 23:07:43 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/*, htcommon/*, htdig/*, htlib/*: Add copyright information.
+
+Fri Sep 10 11:33:50 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htnotify/htnotify.cc: Add copyright information.
+
+ * htsearch/* htfuzzy/*: Ditto.
+
+Fri Sep 10 15:24:44 1999 Loic Dachary <loic at ceic.com>
+
+ * htdig/Retriever.cc: change static WordList words to
+ object member. words.Close() at end of Start function
+ to make sure data is flushed by database.
+
+ * htcommon/WordList.cc (Close): test isopen to prevent
+ ugly crash. Remove isopen test in calling functions.
+
+Fri Sep 10 13:45:53 1999 Loic Dachary <loic at ceic.com>
+
+ * htcommon/WordList.h htcommon/WordList.cc: methods Collect
+ and Walk that factorise the behaviour of operator [], Prefix
+ and WordRefs.
+
+ * htcommon/WordList.h htcommon/WordList.cc: method Dump to
+ dump an ascii version of the word database.
+
+ * htcommon/WordReference.h,htcommon/WordReference.cc: method Dump
+ to write an ascii version of a word.
+
+ * htdig/htdig.cc: -t now also dump word database in ascii as
+ well.
+
+ * htdoc/attrs.html,cf_byprog.html,cf_byname.html: added doc
+ for word_dump
+
+Thu Sep 9 20:30:18 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Fuzzy.h, htfuzzy/Fuzzy.cc, htfuzzy/Prefix.cc,
+ htfuzzy/Regex.cc, htfuzzy/Speling.cc, htfuzzy/Substring.cc,
+ htfuzzy/htfuzzy.cc, htfuzzy.h: Change to use WordList code instead
+ of direct access to the database.
+
+Thu Sep 9 14:55:59 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/parse_doc.pl: fix bug in pdf title extraction.
+
+Tue Sep 7 23:49:41 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/ExternalParser.h, htdig/ExternalParser.cc (parse): Change
+ parsing of location to allow phrase searching -- location is *not*
+ just 0-1000.
+
+ * htdig/Plaintext.h, htdig/Plaintext.cc, htdig/PDF.cc: Ditto.
+
+ * htdig/Retriever.h, htdig/Retriever.cc: Don't call
+ HtStripPunctuation. This is now done in the WordList::Word method.
+
+ * htcommon/WordList.h htcommon/WordList.cc (Prefix): New method to
+ do prefix retrievals. Essentially the same as [], except the loop
+ is broken only in the unlikely event that we retrieve something
+ beyond the range set.
+ (Exists): New method for checking the existance of a
+ string--attempt to retrieve it and determine if anything's
+ actually there.
+ (Word): Call HtStripPunctuation as part of the cleanup.
+
+Tue Sep 7 21:37:44 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Add new configuration option
+ removed_unretrieved_urls to remove docs that have not been accessed.
+
+ * htmerge/docs.cc (convertDocs): Use it.
+
+ * htcommon/defaults.h, htcommon/WordRecord.h,
+ htcommon/WordReference.h: Add copyright notice to head of file.
+
+Mon Sep 6 10:32:59 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtZlibCodec.h, htlib/HtZlibCodec.cc(instance): New method
+ as used in other codecs.
+ (encode, decode): Fix compilation errors.
+
+ * htlib/Makefile.am: Added HtZlibCodec.cc to the compilation list.
+
+ * htcommon/DocumentDB.cc (ReadExcerpt): Call HtZlibCodec to decompress
+ the excerpt.
+ (Add): Call HtZlibCodec to compress the excerpt before storing.
+ (Open, Read): If the databases are
+ already open, close them first in case we're opening under a
+ different filename.
+ (CreateSearchDB): Remove call to external
+ sort program. Database is already sorted by DocID.
+
+ * configure.in, configure: Remove check for external sort
+ program. No longer necessary.
+
+ * */Makefile.in: Regenerate using automake.
+
+Sun Sep 5 13:50:34 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/docs.cc: Ensure a document with empty excerpt has
+ actually been retrieved. Otherwise document stubs are always
+ removed.
+
+ * htlib/String.cc: Implement the nocase_compare method.
+
+ * htcommon/WordReference.cc: Implement a compare method for
+ WordRefs to use in sorting. Uses the above.
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc: Update the
+ headers.
+
+ * htcommon/DocumentDB.h: Ditto.
+
+Sun Sep 5 01:37:27 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/WordList.cc(Flush): Call Add() instead of storing the
+ data ourselves. Additionally, don't open the database ourself (and
+ then close it), instead call Open() if it's not open already.
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc(AddDescription):
+ Pass in a WordList to use when adding link text words. Ensures
+ that the word db is never opened twice for writing.
+
+ * htdig/Retriever.cc: Call AddDescription as above.
+
+ * htdig/Server.cc(ctor): If debugging, write out an entry for the
+ robots.txt file.
+
+ * htlib/HtHeap.cc(percolateUp): Fix a bug where the parent was not
+ updated when moving up more than once.
+ (pushDownRoot): Fix a bug where the root was inproperly pushed
+ down when it required looping.
+
+Fri Sep 3 16:23:23 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtHeap.cc(Remove): Correct bug where after a removal, the
+ structure was not "re-heapified" correctly. The last item should
+ be moved to the top and pushed down.
+ (pushDownRoot): Don't move items past the size of the underlying
+ array.
+
+ * htdig/Server.h, htdig/Server.cc: Change _paths to work on a
+ heap, based on the hopcount. Ensures on a given server that the
+ indexing will be done in level-order by hopcount.
+
+Wed Sep 01 15:40:37 1999 Loic Dachary <loic at ceic.com>
+
+ * test: implement minimal tests for htsearch and htdig
+
+Tue Aug 31 02:17:04 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/WordRecord.h: Change back to struct to ensure integrity
+ when compressed and stored in the word database.
+
+ * htcommon/WordList.cc (Flush): Use HtPack to compress the
+ WordRecord before storage.
+ ([], WordRefs): Use HtUnpack to decompress the WordRecord after
+ storage.
+
+Sun Aug 29 00:42:07 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc (convertToBoolean): Remove debugging
+ strings.
+
+ * htsearch/parser.h: Add new method score(List) to merge scoring
+ for both standard and phrase searching.
+
+ * htsearch/parser.cc(phrase): Keep the current list of successful
+ matched words around to pass to score and perform_phrase.
+ (perform_phrase): Naively (and slowly, but correctly) loop through
+ past words to make sure they match DocID as well as successive locations.
+ Move scoring to score().
+ (perform_push): Move scoring to score().
+ (score): Loop through a list of WordReferences and create a list
+ of scored DocMatches.
+
+Sun Aug 29 00:33:17 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc(createLogicalWords): Hack to produce
+ correct output with phrase searching (e.g. anything in quotes is
+ essentially left alone). Ensure the StringMatch pattern includes
+ the phrase with correct spacing as well.
+ (setupWords): Add a " token whenever it occurs in the query.
+ (convertToBoolean): Make sure booleans are not inserted into
+ phrases.
+
+ * htsearch/parser.h: Add new methods phrase and perfor_phrase to
+ take care of parsing phrases and performing the actual matching.
+
+ * htsearch/parser.cc(lexan): Return a '"' when present for phrase
+ searching.
+ (factor): Call phrase() before parsing a factor--phrases are the
+ highest priority, so ("RedHat Linux" & Debian) ! Windows makes
+ sense.
+ (phrase): New method--slurps up the rest of a phrase and calls
+ perform_phrase to do the matching.
+ (perform_phrase): New method--currently just calls perform_and to
+ give the simulation of a phrase match.
+
+Sat Aug 28 15:57:53 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Server.h, htdig/Server.cc: Undo yesterdays change -- still
+ very buggy and shouldn't be used yet.
+
+ * htdig/Retriever.cc (parse_url): Change default index to 1 to
+ more closely match DocIDs shown with verbose output.
+
+ * htsearch/DocMatch.h: Change score to double and clean up
+ headers.
+
+ * htcommon/WordRecord.h: Change unnecessary long ints (id and
+ flags) to plain ints.
+
+ * htdig/HTML.cc (parse): Call got_word with actual word sequence
+ (i.e. 1, 2, 3...) rather than scaling to 1-1000 by character
+ offset.
+
+ * htlib/Database.h, htlib/DB2_db.h, htlib/DB2_hash.h: Change
+ Get_Item to Get_Next(String item) to return the data as a
+ reference. This makes it easier to use in a loop and cuts the
+ database calls in half.
+
+ * htlib/DB2_db.cc, htlib/DB2_hash.cc: Implement it, making sure we
+ keep the possibly useful data around, rather than tossing it!
+
+ * htsearch/htsearch.cc(htsearch): Don't attempt to open the word db
+ ourselves. Instead, pass the filename off to the parser, which
+ will do it through WordList.
+
+ * htsearch/parser.h: Use a WordList instead of a generic Database.
+
+ * htsearch/parser.cc(perform_push): Use the WordList[] operator to
+ return a list of all matching WordRefs and loop through, summing
+ the score.
+
+ * htcommon/WordList.cc (Flush): Don't use HtPack on the
+ data--somehow when unpacking, there's a mismatch of sizes.
+ (Read): Fix thinko where we attempted to open the database as a
+ DB_HASH.
+ ([]): Don't use HtUnpack since we get mismatches. Use the new
+ Get_Next(data) call instead of calling Get_Item separately.
+ (WordRefs): Same as above.
+
+Fri Aug 27 09:44:09 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (Need2Get): Remove duplicate detection code for
+ local_urls. The code is somewhat buggy and should be replaced by
+ more general code shortly.
+
+ * htdig/Server.h, htdig/Server.cc (push, pop): Change _paths to a
+ HtHeap sorted on hopcount first (and order placed on heap
+ second). Ensures that on each server, the order indexed is
+ guaranteed to be level-order by hopcount.
+
+ * htdig/URLRef.h, htdig/URLRef.cc (compare): Add comparison method
+ to enable sorting by hopcount.
+
+Fri Aug 27 09:36:35 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/WordList.h, htcommon/WordList.cc (WordList): Change
+ words to a list instead of a dictionary for minor speed improvement.
+
+Thu Aug 26 11:18:20 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc, htdoc/attrs.html: increase default
+ maximum_word_length to 32.
+
+Wed Aug 25 16:50:16 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Retriever.cc(got_word): add code to check for compound words
+ and add their component parts to the word database.
+ * htdig/PDF.cc(parseString), htdig/Plaintext.cc(parse): Don't strip
+ punctuation or lowercase the word before calling got_word. That
+ should be left up to got_word & Word methods.
+
+ * htlib/StringMatch.h, htlib/StringMatch.cc(Pattern, IgnoreCase):
+ Add an IgnorePunct() method, which allows matches to skip over valid
+ punctuation, change Pattern() and IgnoreCase() to accomodate this.
+ * htsearch/htsearch.cc(main, createLogicalWords): use IgnorePunct()
+ to highlight matching words in excerpts regardless of punctuation,
+ toss out old origPattern, and don't add short or bad words to
+ logicalPattern.
+
+ * htlib/HtWordType.h, htlib/HtWordType.cc(Initialize): set up and
+ use a lookup table to speed up HtIsWordChar() and HtIsStrictWordChar().
+
+Mon Aug 23 10:13:05 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc(parse): fix problems with null pointer when attempting
+ SGML entity decoding on bare &, as reported by Vadim Chekan.
+
+Thu Aug 19 11:52:06 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc(main): Fix to allow multiple keywords
+ input parameter definitions.
+
+ * contrib/parse_doc.pl: make spaces optional in LANGUAGE = POSTSCRIPT
+ PJL test.
+
+Wed Aug 18 11:27:46 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/PDF.cc(parse): Fixed wrong variable name in new code.
+ Double-Oops! (It was Friday the 13th, after all...)
+
+Tue Aug 17 16:26:46 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtHeap.cc(Remove): apply Geoff's patch to fix Remove.
+
+ * htlib/HtVector.h, htlib/HtVector.cc(Index): various bounds overrun
+ bug fixes and checking in Last(), Nth() & Index().
+
+Mon Aug 16 13:55:10 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(expandVariables): fix up test for &amp;
+
+Mon Aug 16 12:08:57 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * Makefine.am, Makefile.in, installdir/Makefile.am,
+ installdir/Makefile.in: change all remaining INSTALL_ROOT to DESTDIR.
+
+Fri Aug 13 15:44:31 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/PDF.cc(parse): added missing ')' in new code. Oops!
+
+ * htlib/strptime.c, htlib/mktime.c: added #include "htconfig.h"
+ to pick up definitions from configure program. Let's try to
+ remember that config.h != htconfig.h!
+
+Fri Aug 13 14:49:07 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: removed unused HTDIG_TOP, changed AM_WITH_ZLIB
+ by CHECK_ZLIB
+
+Fri Aug 13 14:00:16 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/PDF.cc(parse), htcommon/defaults.cc, htdoc/attrs.html
+ (pdf_parser): Removed -pairs option from default arguments, added
+ special test for acroread to decide whether to use output file or
+ directory as last argument (also adds -toPostScript if missing).
+ Program now tries to test for existance of parser before trying
+ to call it.
+
+Fri Aug 13 10:10:16 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/attrs.html(pdf_parser): updated xpdf version number.
+
+Thu Aug 12 17:09:37 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/parse_doc.pl: updated for xpdf 0.90, plus other fixes.
+
+Thu Aug 12 11:12:07 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/attrs.html(logging): added Geoff's description of log lines.
+
+Thu Aug 12 11:21:12 1999 Loic Dachary <loic at ceic.com>
+
+ * strptime fixes : AC_FUNC_STRPTIME defined in acinclude.m4 and used in configure.in,
+ conditional compilation of strptime.c (only if HAVE_STRPTIME not defined),
+ removed Htstrptime (strptime.c now defines strptime), changed all calls to Htstrptime
+ to calls to strptime.
+
+Wed Aug 11 16:59:41 1999 Loic Dachary <loic at ceic.com>
+
+ * */Makefile.am: use -release instead of -version-info because nobody
+ wants to bother with published shared lib interfaces version numbers
+ at present.
+
+ * htlib/Makefile.am: added langinfo.h
+
+Wed Aug 11 15:00:07 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * acconfig.h: removed MAX_WORD_LENGTH
+
+ * re-run auto* to make sure chain is consistent
+
+ * Makefile.am: improve distclean for tests
+
+Wed Aug 11 13:46:22 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * configure.in: change --enable-test to --enable-tests so
+ that Berkeley DB tests are not activated. Since they depend
+ on tcl this can be a pain.
+
+ * acinclude.m4: AM_PROG_TIME locate time command + find out
+ if verbose output is -l (freebsd) or -v (linux)
+
+Wed Aug 11 13:13:39 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * acinclude.m4 : AM_WITH_ZLIB autoconf macro for zlib detection that
+ allows --with-zlib=DIR to specify the install root of zlib,
+ --without-zlib to prevent inclusion of zlib. If nothing
+ specified zlib is searched in /usr and /usr/local.
+ --disable-zlib is replaced with --without-zlib.
+
+ * configure.in,configure,aclocal.m4,db/dist/acinclude.m4,
+ db/dist/aclocal.m4,db/dist/configure,db/dist/configure.in:
+ changed to use AM_WITH_ZLIB
+
+Tue Aug 10 21:14:34 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc (outputVariable): Fix compilation error with
+ assignment between char * and char *.
+
+ * htsearch/htsearch.cc (main): Use cleaner trick to sidestep
+ discarding const char * as suggested by Gilles.
+
+Tue Aug 10 17:24:12 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(expandVariables): clean up, simplify and
+ label lexical analyzer states.
+
+Tue Aug 10 17:04:54 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(expandVariables, outputVariable): add handling
+ for $%(var) and $&(var) in templates. Still to be documented.
+
+Tue Aug 10 20:13:52 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * db/mp/mp_bh.c: fixed HAVE_ZLIB -> HAVE_LIBZ
+
+Tue Aug 10 17:58:01 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * configure,configure.in,db/dist/configure.in,db/dist/configure:
+ added --with-zlib configure flag for htdig to specify zlib
+ installation path. Motivated to have compatible tests between
+ htdig and db as far as zlib is concerned. Otherwise configuration
+ is confused and miss an existing libz.
+
+Tue Aug 10 17:44:49 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * db/mp/mp_fopen.c: fixed cmpr_open called even if libz not here
+
+Tue Aug 10 17:40:53 1999 Loic Dachary <loic at yoda.ceic.com>
+
+ * htlib/langinfo.h: header missing on FreeBSD-3.2, needed
+ by strptime.c
+
+Tue Aug 10 11:43:14 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.h, htdig/HTML.cc(parse, do_tag): fix problems with
+ SGML entity decoding, add decoding of entities within tag attributes.
+
+Mon Aug 9 21:13:50 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HtHTTP.h(SetRequestMethod): Fix declaration to be void.
+
+ * htdig/Transport.h(GetRequestMaxDocumentSize): Fix declaration to
+ return int.
+
+ * htdig/Retriever.cc(got_href): Fix mistake in hopcount
+ calculations. Now returns the correct hopcount even for pages
+ when a faster path is found. (Still need to change indexing to
+ sort on hopcount).
+
+ * htsearch/htsearch.cc(main): Fix compiler error in gcc-2.95 when
+ discarding const by using strcpy. It's a hack, hopefully there's a
+ better way.
+
+Mon Aug 9 17:23:15 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/URL.cc(ServerAlias): fix small memory leak in new default
+ path code (don't need to allocate new from string each time).
+
+ * htlib/cgi.cc(init): Fix PR#572, where htsearch crashed if
+ CONTENT_LENGTH was not set but REQUEST_METHOD was.
+
+ * htfuzzy/Fuzzy.cc(getWords), htfuzzy/Metaphone.cc(vscode):
+ Fix Geoff's change of May 15 to Fuzzy.cc, add test to vscode macro
+ to stay in array bounds, so non-ASCII letters to cause segfault.
+ Should fix PR#514.
+
+Mon Aug 9 17:03:45 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * include/htconfig.h.in, htcommon/WordList.cc(Word,Flush&BadWordFile),
+ htcommon/DocumentRef.cc(AddDescription), htcommon/defaults.cc,
+ htsearch/parser.cc(perform_push), htdoc/attrs.html,
+ htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Convert the MAX_WORD_LENGTH compile-time option into the run-time
+ configuration attribute maximum_word_length. This required reinserting
+ word truncation code that had been taken out of WordList.cc.
+
+Mon Aug 9 16:34:14 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HtHTTP.cc (isParsable): allow application/pdf as parsable,
+ to use builtin PDF code.
+
+ * htdig/HtHTTP.cc (ParseHeader),
+ htdig/Document.cc (readHeader): clean up header parsing.
+
+ * htdig/Document.cc (getdate): make tm static, so it's initialized
+ to zeros. Should fix PR#81 & PR#472, where strftime() would crash
+ on some systems. Idea submitted by benoit.sibaud at cnet.francetelecom.fr
+
+ * htlib/URL.cc (parse): fix PR#348, to make sure a missing or invalid
+ port number will get set correctly.
+
+Mon Aug 9 15:42:41 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Added descriptions for attributes that were missing, added a few
+ clarifications, and corrected a few defaults and typos.
+ Covers PR#558, PR#626, and then some.
+
+ * configure.in, configure, include/htconfig.h.in, htlib/regex.c:
+ PR#545 fixed - configure tests for presence of alloca.h for regex.c
+
+Sat Aug 07 13:40:17 1999 Loic Dachary <loic at ceic.com>
+
+ * configure.in: remove test for strptime. Run autoconf + autoheader.
+
+ * htlib/HtDateTime.cc: always use htdig strptime, do not try to use
+ existing function in libc.
+
+ * htlib/HtDateTime.h: move inclusion of htconfig.h on top of file,
+ change #ifdef HAVE_CONFIG to HAVE_CONFIG_H
+
+Fri Aug 6 16:37:33 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc (UseProxy): fix call to match() and test of
+ return value to work as documented for http_proxy_exclude (PR#603).
+
+Fri Aug 06 15:06:23 1999 <loic at yoda.ceic.com>
+
+ * db/dist/config.hin, db/mp/mp_cmpr.c db/db/db.c, db/mp/mp_fopen.c:
+ disable compression if zlib not found by configure.
+
+Thu Aug 05 12:27:15 1999 <loic at yoda.ceic.com>
+
+ * test/dbbench.cc: invert -z and -Z for consistency
+
+ * test/Makefile.am: add dbbench call examples
+
+Thu Aug 05 11:38:58 1999 Loic Dachary <loic at ceic.com>
+
+ * test/Makefile.am: all .html go in distribution, compile dbbench
+ that tests Berkeley DB performances.
+
+ * configure.in/Makefile.am: conditional inclusion of the test
+ directory in the list of subdirs (--enable-test). The list
+ of subdirs is now @HTDIGDIRS@ in configure.in & Makefile.am
+
+ * db/*: Transparent I/O compression implementation. Defines the DB_COMPRESS flag.
+ For instance DB_CREATE | DB_COMPRESS.
+
+ * db/db_dump/load: add -C option to specify cache size to db_dump/db_load
+
+Wed Aug 4 22:57:27 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * db/*: Import of Sleepycat's Berkeley DB 2.7.5.
+
+Wed Aug 4 22:40:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * contrib/htparsedoc/htparsedoc: Add in contributed bug fixes from
+ Andrew Bishop to work on SunOS 4.x machines.
+
+Wed Aug 4 01:58:52 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * COPYING, htdoc/COPYING, configure.in, Makefile.am, Makefile.in:
+ Update information to use canonical version of the GPL from the
+ FSF. In particular, this version has the correct mailing address
+ of the FSF.
+
+Mon Aug 02 11:28:00 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htlib/htString.h, htlib/String.cc : added the possibility to
+ insert an unsigned int into a string.
+ * htdig.cc : with verbose mode shows start and end time.
+
+Thu Jul 22 18:10:00 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Transport.cc, htdig/HtHTTP.cc : modified the destructors.
+
+Thu Jul 22 13:10:00 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Transport.cc, htdig/Transport.h, htdig/HtHTTP.cc,
+ htdig/HtHTTP.h: Re-analyzed inheritance methods and attributes of
+ the 2 classes. This is a first step, not definitive ... cos it
+ still doesn't work as I hope.
+
+Tue Jul 20 11:21:52 1999 <loic at ceic.com>
+
+ * configure.in : added AM_MAINTAINER_MODE to prevent unwanted
+ dependencies check by default.
+
+ * db/Makefile.in : remove Makefile when distclean
+
+Mon Jul 19 13:23:53 1999 <loic at ceic.com>
+
+ * Makefile.config (INCLUDES): added -I$(top_srcdir)/include because
+ automatically -I../include is not good, added -I$(top_builddir)/db/dist
+ because some db headers are configure generated (if building in a
+ directory that is not the source directory).
+
+ * rename db/Makefile db/Makefile.in: otherwise it does not show
+ up if if building in a directory that is not the source directory.
+
+Mon Jul 19 13:02:22 1999 <loic at ceic.com>
+
+ * .cvsignore: do not ignore Makefile.config
+
+Sun Jul 18 22:47:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/parser.cc: Eliminated compiler errors. Currently
+ returns no matches until bugs in the WordList code are fixed.
+
+Sun Jul 18 22:42:04 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/htmerge.h: Cleanup, including WordRecord and
+ WordReference as needed.
+
+ * htmerge/htmerge.cc: Update for files necessary for merge
+ calls.
+ Call convertDocs before mergeWords so that the discardList gets
+ the list of documents deleted.
+
+ * htmerge/docs.cc: Update for difference in calling order.
+
+ * htmerge/words.cc: Update (and significant cleanup) since
+ WordList writes directly to db.words.db. Iterate over the stored
+ words, deleting those from deleted documents.
+
+ * htmerge/db.cc: Update to eliminate compiler errors. Currently
+ disabled until bugs in the words code are fixed.
+
+Sun Jul 18 22:33:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Collapse the multiple heading_factors into
+ one. (It's prohibitive to define a flag for each h* tag).
+ Add a new url_factor for the text of URLs (presently unused).
+
+ * htcommon/DocumentRef.cc(AddDescription): Use FLAG_LINK_TEXT as
+ defined in htcommon/WordRecord.h.
+
+ * htdig/Retriever.h: Change factor to accomodate flags instead of
+ weighting factors.
+
+ * htdig/Retriever.cc: Update to use flags, and define the indexed
+ flags in factor as appropriate.
+
+ * htdig/HTML.cc: Update calls to got_word with appropriate new
+ offsets into factor[].
+
+Sun Jul 18 22:18:16 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/WordReference.h, htcommon/WordRecord.h: Update to use
+ flags instead of weight.
+
+ * htcommon/WordList.h, htcommon/WordList.cc: Add database access
+ routines to match DocumentDB.cc.
+ (Word): Recognize flags instead of weight, simply add the
+ word. (Duplicates expected!)
+ (mark*): Simply delete the list of words.
+ (flush): Rather than dump to a text file, dump directly to the db.
+
+Sun Jul 18 21:50:04 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Database.h, htlib/DB2_db.h, htlib/DB2_hash.h: Add new
+ method Get_Item to access the data of the current item when using
+ Get_Next() or Get_Next_Seq().
+
+ * htlib/DB2_db.h, htlib/DB2_hash.cc: Implement Get_Item() using
+ cursor access.
+
+Sat Jul 17 12:59:01 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * test/*.html: Added various HTML files as the beginnings of a
+ testing suite.
+
+Fri Jul 16 16:06:27 1999 Loic Dachary <loic at ceic.com>
+
+ * All libraries (except db) use libtools. Shared libraries are
+ generated by default. --disable-shared to get old behaviour.
+ Libraries are installed in all cases.
+
+ * Change structure of default installation directory (match
+ standard).
+ database : var/htdig
+ programs : bin
+ libraries : lib
+
+ Like default apache:
+ conf : conf
+ htdocs : htdocs/htdig
+ cgi-bin : cgi-bin
+
+ * Switch all Makefile.in into Makefile.am
+
+ * CONFIG.in CONFIG : removed. Replaced with --with- arguments in
+ configure.in
+
+ * Makefile.config.in removed, only keep Makefile.config : automake
+ automatically defines variables for each AC_SUBST variables.
+ Makefile.config has HTLIBS + DEFINES
+
+ * db/Makefile : added to forward (clean all distclean) targets to
+ db/dist and implement distdir target.
+
+ * acconfig.h : created to allow autoheader to work (contains GETPEERNAME_LENGTH_T
+ HAVE_BOOL, HAVE_TRUE, HAVE_FALSE, NEED_PROTO_GETHOSTNAME). Extra definitions
+ added before @TOP@ (TRUE, FALSE, VERSION, MAX_WORD_LENGTH, LOG_LEVEL, LOG_FACILITY).
+
+ * installdir/Makefile.am : installation rules moved from Makefile.am to installdir/Makefile.am
+
+ * include/Makefile.am : distribute htconfig.h.in and stamp-h.in
+
+ * Makefile.am : do not pre-create the directories, creation is done during the installation
+
+ * configure.in: CF_MAKE_INCLUDE not needed anymore : automake handles
+ the include itself.
+
+Fri Jul 16 13:04:27 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc(parse): fix to prevent closing ">" from being passed
+ to do_tag().
+
+Thu Jul 15 21:25:12 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc (readHeader, getParsable): Add back
+ application/pdf to use builtin PDF code.
+
+ * htdig/Makefile.in: Remove broken Postscript parser as it never
+ worked.
+
+ * htlib/URL.cc (normalizePath, path): Use config.Boolean as
+ pointed out by Gilles.
+
+Thu Jul 15 15:54:30 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdoc/attrs.html(pdf_parser & external_parsers): add corrections &
+ clarifications, links to relevant FAQ entries.
+
+Thu Jul 15 18:00:00 1999 CEST Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htlib/HtDateTime.cc, htlib/HtDateTime.h : added the possibility
+ to initialize and compares HtDateTime with integers. Added the
+ constructor HtDateTime (int) and various operator overloading methods.
+
+Wed Jul 14 22:57:14 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/URL.cc (normalizePath, path): If not case_sensitive,
+ lowercase the URL. Should ensure that all URLs are appropriately
+ lowercased, regardless of where they're generated.
+
+Wed Jul 14 22:37:47 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/DB2_db.cc (OpenReadWrite, OpenRead): Add flag DB_DUP to
+ database to allow storage of duplicate keys (in this case,
+ words).
+
+Tue Jul 13 15:36:40 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc (do_tag): Fix handling of <link> and <area>,
+ to use href= instead of src=.
+
+Mon Jul 12 22:31:48 1999 Hanno Mueller <kontakt at hanno.de>
+
+ * contrib/scriptname/results.shtml: Remove unintentional $(VERSION).
+
+Mon Jul 12 22:20:40 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Cleanups suggested by Gilles, combining
+ <link> and <area>, <embed> <object> and <frame> and moving <img>
+ to a separate case.
+
+Sun Jul 11 19:32:38 1999 Hanno Mueller <kontakt at hanno.de>
+
+ * contrib/README: Add scriptname directory.
+
+ * contrib/scriptname/*: An example of using htsearch within
+ dynamic SSI pages
+
+ * htcommon/defaults.cc: Add script_name attribute to override
+ SCRIPT_NAME CGI environment variable.
+
+ * htdoc/FAQ.html: Update question 4.7 based on including htsearch
+ as a CGI in SSI markup.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html,
+ htdoc/hts_templates.html: Update based on behavior of script_name
+ attribute.
+
+ * htsearch/Display.cc: Set SCRIPT_NAME variable to attribute
+ script_name if set and CGI environment variable if undefined.
+
+Sat Jul 10 00:22:34 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Regex.cc (getWords): Anchor the match to the beginning
+ of string, add regex-interpeted characters to extra_word_chars
+ temporarily, and strip remaining punctuation before making a match.
+
+Fri Jul 9 22:35:57 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc: Back out change of June 24.
+
+ * htsearch/htsearch.cc: Ditto.
+
+ * htsearch/htsearch.cc (setupWords): Remove HtStripPunctuation in
+ favor of requiring Fuzzy classes to strip whatever punctuation is
+ necessary.
+
+ * htfuzzy/Fuzzy.h: Add HtWordType.h to #includes and update comments.
+
+ * htfuzzy/Synonym.cc, htfuzzy/Substring.cc, htfuzzy/Speling.cc,
+ htfuzzy/Prefix.cc, htfuzzy/Exact.cc, htfuzzy/Endings.cc,
+ htfuzzy/Fuzzy.cc (getWords): Call HtStripPunctuation on input before
+ performing fuzzy matching.
+
+Thu Jul 8 21:28:44 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Add support for parsing <LINK> tags.
+
+Mon Jul 5 16:53:23 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/htdig.cc (main): Insert '*' instead of username/password
+ combination to hide credentials in process accounting.
+
+Sat Jul 3 17:35:52 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Transport.h(ConnectionWrite): Return value from
+ Connection::write call.
+
+ * htdig/URLRef.h, htdig/URLRef.cc: Cleanup and made hopcount
+ default consistent with 7/3 change to DocumentRef.cc
+
+ * htdig/Server.h, htdig/Server.cc, htdig/Retriever.cc: Cleanup and
+ fixes to match URLRef calling interface.
+
+Sat Jul 3 16:37:29 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.cc (do_tag): Fix <meta> robots parsing to allow
+ multiple directives to work correctly. Fixes PR#578, as provided
+ by Chris Liddiard <c.h.liddiard at qmw.ac.uk>.
+
+Sat Jul 3 00:47:51 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Makefile.in: Remove old SGMLEntities code.
+
+Sat Jul 3 00:26:55 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentRef.cc (Clear): Change default value of
+ docHopCount to 0 to fix several hopcount bugs.
+
+ * htdig/Transport.h, htdig/Transport.cc: Changes to support URL
+ referers as well as authentication credentials.
+
+ * htdig/HtHTTP.h, htdig/HtHTTP.cc(SetCredentials): Implement HTTP
+ Basic Authentication credentials.
+ (SetRequestCommand): Use Referer and Authentication headers if
+ supplied.
+
+Sun Jun 30 11:26:00 1999 Gabriele Bartolini <g.bartol at comune.prato.it>
+
+ * htdig/Transport.h: Inserted the methods declarations regarding
+ the connection management. The code has been moved out from the
+ HtHTTP.h code. Also moved here the static variable 'debug'.
+
+ * htdig/Transport.cc: Definition of the connection management code.
+ The code has been moved out from the HtHTTP.cc code.
+
+ * htdig/HtHTTP.h: Eliminated the connection management code and the
+ static variable 'debug'. Inserted the 'modification_time_is_now' as
+ a static variable, in order to respect the encapsulation principle.
+
+ * htdig/HtHTTP.cc: Eliminated the connection management code and the
+ static variable 'debug' initialization. Inserted the
+ 'modification_time_is_now' initialization.
+
+Sun Jun 27 16:29:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HTML.h: Cleanup.
+
+ * htcommon/defaults.cc: Added default for img_alt_factor for text
+ weighting on <IMG ALT="..." tags.
+
+ * htdig/Retriever.cc: Add slot for img_alt_factor.
+
+ * htdig/HTML.cc (do_tag): Rewrite using Configuration class to
+ separate tag attributes.
+ (parse): Ignore final '>' in string passed to do_tag.
+ (do_tag): Index IMG ALT text.
+
+Fri Jun 25 17:58:44 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Transport.h: Fix virtual methods for Transport_Response to
+ have defaults.
+
+ * htdig/HtHTTP.h: Fix class declaration of HtHTTP class to prevent
+ syntax error. Pointed out by Gabriele.
+
+ * htdig/Transport.cc: Add (empty) ctor and dtor functions for
+ Transport_Response.
+
+Thu Jun 24 22:28:44 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc (main): Add support for form inputs
+ configdir and commondir as contributed by Herbert Martin Dietze
+ <herbert at fh-wedel.de>.
+
+ * htsearch/Display.cc (createURL): If configdir and commondir are
+ defined, add them to URLs sent for other pages.
+
+Wed Jun 23 23:00:18 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HtHTTP.h, htdig/HtHTTP.cc: Make a subclass of Transport.
+
+Wed Jun 23 22:08:20 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Configuration.cc (Add): Handle single-quoted values for
+ attributes.
+
+Tue Jun 22 23:35:39 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Transport.h, htdig/Transport.cc: Virtual classes to handle
+ transport protocols such as HTTP, FTP, WAIS, gopher, etc.
+
+ * htdig/Makefile.in: Make sure they're compiled (not that there's
+ much!)
+
+ * htdig/HtHTTP.h: Add htdig.h to ensure config is defined.
+
+Mon Jun 21 14:33:10 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc(readHeader), htdig/HtHTTP.cc(ParseHeader): fix
+ handling of modification_time_is_now in readHeader, add similar code
+ to ParseHeader.
+
+Sun Jun 20 21:25:15 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.h: Add hop parameter to got_href
+ method. Defaults to 1.
+
+ * htdig/Retriever.cc(got_href): Use it instead of constant 1.
+
+ * htdig/HTML.cc (do_tag): Use new hop parameter to keep the same
+ hopcount for frame, embed and object tags.
+
+ * htdig/Makefile.in: Make sure HtHTTP.cc is compiled.
+
+ * htdig/HtHTTP.cc (ctor): Add default value for _server to make
+ prevent strange segmentation faults.
+
+Fri Jun 18 09:53:30 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc(Clear, Deserialize):
+ add docHeadIsSet field, code for setting and getting it.
+ * htcommon/DocumentDB.cc(Add): only put out excerpt record if DocHead
+ is really set.
+ * htmerge/doc.cc(convertDocs): add missing else after code to delete
+ documents with no excerpts.
+ (All these changes fix the disappearing excerpts problem in 3.2.)
+
+Wed Jun 16 23:04:38 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc (UseProxy): Change http_proxy_exclude to an
+ escaped regex string. Allows for much more complicated rules.
+
+Wed Jun 16 16:04:07 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * Makefile.config.in: fix typo in name IMAGE_URL_PREFIX.
+
+ * htdig/Retriever.cc(IsValidURL): change handling of valids to only
+ reject if list is not empty, give different error message.
+
+Wed Jun 16 14:40:56 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc(main): pass StringList args to setEscaped()
+ instead of unprocessed input[] char *'s.
+
+ * htsearch/Display.cc(buildMatchList): cast score to (int) in maxScore
+ calculation, to avoid compiler warnings.
+
+ * htdig/htdig.cc(main): change comparison on minimalFile to avoid
+ compiler warnings.
+
+Wed Jun 16 11:30:23 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/HtRegex.cc(setEscaped): Fix appending of substring to avoid
+ compiler warnings.
+
+ * htlib/HtDateTime.cc(SettoNow): Strip out all the nonsense that
+ doesn't work, set Ht_t directly instead.
+
+Wed Jun 16 09:58:12 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * configure.in, configure, Makefile.config.in: Correct handling of
+ SEARCH_FORM variable, as Gabriele recommended.
+
+Wed Jun 16 09:32:06 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/cgi.h, htlib/cgi.cc(cgi & init), htsearch/htsearch.cc
+ (main & usage): allow a query string to be passed as an argument.
+
+Wed Jun 16 08:43:09 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/Makefile.in, htdig/Makefile.in, htfuzzy/Makefile.in,
+ htmerge/Makefile.in, htnotify/Makefile.in: Use standard $(bindir)
+ variable instead of $(BIN_DIR). Allows for standard configure flags
+ to set this. (Completes Geoff's change on May 15.)
+
+Tue Jun 15 14:31:50 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/PDF.cc(parseNonTextLine): move line that clears _parsedString,
+ so title cleared even if rejected.
+
+ * htsearch/Display.cc(buildMatchList & sort): move maxScore calculation
+ from sort to buildMatchList, so it's done even if there's only 1 match.
+
+Mon Jun 14 15:01:07 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc(RetrieveHTTP): Show "Unknown host" message if
+ Connection::assign_server() fails (due to gethostbyname() failure).
+
+Mon Jun 14 13:52:34 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htcommon/defaults.cc, htsearch/Display.h, htsearch/Display.cc,
+ htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html,
+ htdoc/hts_templates.html: add template_patterns attribute, to select
+ result templates based on URL patterns.
+
+Sun Jun 13 16:29:19 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (IsValidURL): Add valid_extension list, as
+ requested numerous times.
+
+ * htcommon/defaults.cc: Add config attribute valid_extensions,
+ with default as empty.
+
+Sat Jun 12 23:10:39 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentRef.h: Fix thinkos introduced in change earlier
+ today. Actually compiles correctly now.
+
+Sat Jun 12 22:37:22 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HtHTTP.cc (ParseHeader): Fix parsing to take empty headers
+ into account. Fixes PR#557.
+
+ * htsearch/Display.h, htsearch/Display.cc (excerpt): Fix
+ declaration to refer to first as reference--ensures ANCHOR is
+ properly set. Fixes PR#541 as suggested by <pmb1 at york.ac.uk>.
+
+ * htfuzzy/Endings.cc (getWords): Fixed PR#560 as suggested by
+ Steve Arlow <yorick at ClarkHill.com>. Solves problems with fuzzy
+ matching on words like -ness: witness, highness, likeness... Tries
+ to interpret words as root words before attempting stemming.
+
+ * installdir/search.html (Match): Add Boolean to default search
+ form, as suggested by PR#561.
+
+ * htlib/URL.cc (URL): Fix PR#566 by setting the correct length of
+ the string being matched. 'http://' is 7 characters...
+
+Sat Jun 12 19:06:36 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtZlibCodec.h, htlib/HtZlibCodec.cc: New files. Provide
+ general access to zlib compression routines when available.
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc: Remove
+ compression access and restore DocHead access through default
+ methods. Compression of excerpts will occur through the
+ HtZlibCodec classes and through the DocumentDB excerpt access.
+
+Sat Jun 12 15:25:08 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/docs.cc (convertDocs): Load excerpt from external
+ database before considering it empty.
+
+Sat Jun 12 14:41:54 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc (displayMatch): Added patch from Torsten
+ Neuer <tneuer at inwise.de> to fix PR# 554.
+
+ * htdig/HTML.cc (do_tag): Add parsing for <embed> and <object>,
+ including suggestions from Gilles as to condensing cases with
+ <img> parsing.
+
+Sat Jun 12 14:00:39 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/ExternalParser.cc (parse): Quote the filename before
+ passing it to the command-line to prevent shell escapes. Fixes PR#542.
+
+Fri Jun 11 15:59:10 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/URL.cc(removeIndex): use CompareWord instead of FindFirstWord,
+ to avoid substring matches.
+
+Wed Jun 2 15:51:00 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/URLTrans.cc(encodeURL): Fix to ensure that non-ASCII letters
+ get URL-encoded.
+
+Mon May 31 22:40:29 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentDB.cc(ReadExcerpt): Fix silly typos with methods,
+ thinko with docID.
+ (Add): Add the excerpt *before* the URL index is written.
+
+ * htdig/Retriever.cc(isValidURL): Remove code restricting URLs to
+ relative and http://.
+
+ * htdig/htdig.cc(main): Unlink the doc_excerpt file when doing an
+ initial dig.
+ (main): Fix silly typo with minimumFile.
+
+ * htmerge/db.cc(mergeDB): Call DocumentDB::Open() with doc_excerpt for
+ consistency--doesn't actually do anything with it.
+
+ * htmerge/docs.cc(convertDocs): Ditto. Also don't delete a
+ document simply because it has an empty DocHead. Excerpts are now
+ stored in a separate database!
+
+ * htmerge/htmerge.h: Call mergeDB and convertDocs with
+ doc_excerpt parameter.
+
+ * htmerge/htmerge.cc(main): Ditto.
+
+ * htsearch/Display.h: Call ctor with all three doc db filenames.
+
+ * htsearch/Display.cc(Display): Call DocumentDB::Open with above.
+ (excerpt): Retrieve the excerpt from the excerpt database.
+
+ * htsearch/htsearch.cc: Call Display::Display with all three doc
+ db filenames.
+
+Mon May 31 15:08:30 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentDB.h: Add new method ReadExcerpt to read the
+ excerpt from the separate (new) excerpt database. Change Open()
+ and Read() methods to account for this new database.
+
+ * htcommon/DocumentDB.cc (Open): Open the excerpt database too.
+ (Read): Ditto.
+ (Close): Close it if it exists.
+ (ReadExcerpt): Explicitly read the DocHead of this DocumentRef.
+ (Add): Make sure DocHeads go into the excerpt database.
+ (Delete): Make sure we delete the associated excerpt too.
+ (CreateSearchDB): Make sure we grab the excerpt from the database.
+
+ * htcommon/DocumentRef.cc(Serialize): Don't serialize the DocHead
+ field, this is done in the DocumentDB code.
+
+ * htcommon/defaults.cc(modification_time_is_now): Set to true to
+ avoid problems with not setting dates when no Last-Modified:
+ header appears.
+ (doc_excerpt): Add new attribute for the filename of the excerpt
+ database.
+
+ * htdig/HtHTTP.h: Remove incorrect virtual declarations from
+ Request and EstablishConnection methods. Assign void return value
+ to ResetStatistics since it doesn't return a value.
+
+ * htdig/htdig.cc (main): Add new "minimal" flag '-m' to only index
+ the URLs in the supplied file. Sets hopcount to ignore links.
+
+Sun May 30 19:36:15 1999 Alexander Bergolth <leo at leo.wu-wien.ac.at>
+
+ * htlib/URL.cc (normalizePath): Fix bug that caused endless loops
+ and core dumps when normalizing URLs with more than one of
+ ( "/../" | "/./" | "//" | "%7E" )
+
+ * htlib/HtDateTime.cc (Httimegm): Call Httimegm in timegm.c unless
+ HAVE_TIMEGM.
+
+Wed May 26 23:15:46 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htmerge/db.cc (mergeDB): Add patch contributed by Roman Dimov
+ <roman at twist.mark-itt.ru> to fix problems with confusing docIDs,
+ resulting in documents in main db removed when the corresponding
+ DocID was supposed to be removed from the merged db.
+
+Wed May 26 11:30:22 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.h, htsearch/Display.cc, htsearch/htsearch.cc:
+ Switch restrict and excludes to use HtRegex instead of StringMatch.
+
+ * htdig/htdig.cc (main): Fix typo clobbering setting of
+ excludes. Obviously fixes problems with badquerystr and excludes!
+
+ * htdig/HtHTTP.cc (ParseHeader): Change parsing to skip extra
+ whitespace, as in 5/19 Document.cc(readHeader) change.
+
+Wed May 19 22:17:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/HtHTTP.cc, htdig/HtHTTP.h: Add new files, contributed by
+ Gabriele. A start at an HTTP/1.1 implementation.
+
+ * htdig/Document.cc (readHeader): Fix change of 5/16 to actually
+ work! :-)
+
+ * htsearch/Display.cc (expandVariables): Change end-of-expansion
+ test to include states 2 and 5 to ensure templates ending in } are
+ still properly expanded, as suggested by Gilles.
+
+Mon May 17 14:31:31 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegex.cc (setEscaped): Use full list of characters to
+ escape as suggested by Gilles.
+
+Sun May 16 17:27:51 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Document.cc (readHeader): Since multiple whitespace
+ characters are allowed after headers, don't use strtok.
+ (readHeader): We no longer pretend to parse Word, PostScript, or
+ PDF files internally.
+ (getParsable): Don't generate PostScript or PDF objects since we
+ no longer recommend using them.
+
+Sun May 16 17:07:19 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegex.cc (setEscaped): Ensure escaping does not loop
+ beyond the end of a string.
+
+ * htdig/Retriever.cc (IsValidURL): Fix badquerystr parsing to use
+ HtRegex as expected. (Oops!)
+
+ * htdig/HTML.cc (parse): Use HtSGMLCodec during parsing, rather
+ than encoding the whole document at the beginning. More consistent
+ with previous use of SGMLEntities.
+
+Sat May 15 12:57:40 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/URL.cc (normalizePath): Remove extra (useless) variable
+ declarations.
+
+ * htlib/htString.h, htlib/String.cc: Add new method Nth to solve
+ problems with (String *)->[].
+
+ * htlib/HtRegex.h, htlib/HtRegex.cc: Added new method
+ setEscaped(StringList) to produce a pattern connected with '|' of
+ possibly escaped strings. Strings are not escaped if enclosed in
+ [] and the brackets are removed from unescaped regex.
+
+ * htdig/htdig.h: Use HtRegex instead of StringMatch for limiting
+ by default.
+
+ * htdig/Retriever.cc: As above.
+
+ * htdig/htdig.cc(main): As above. Use setEscaped to set limits
+ correctly (i.e. in a backwards-compatible way).
+
+Sat May 15 11:24:26 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Speling.h, htfuzzy/Speling.cc: New files for simple
+ spelling corection. Currently limited to transpostion and added
+ character errors. Missing character errors to be added soon.
+
+ * htfuzzy/Makefile.in: Compile it.
+
+ * htfuzzy/Fuzzy.cc (getFuzzyByName): Use it.
+
+ * htcommon/defaults.cc: Add new option minimum_speling_length for
+ the shortest query word to receive speling fuzzy
+ modifications. Should prevent problems with valid words generating
+ unrelated "corrections" of words. Default is 5 chars.
+
+Sat May 15 11:18:27 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Fuzzy.cc (getWords): Ensure word is not an empty or null
+ string.
+
+ * htfuzzy/Metaphone.cc (generateKey): Ditto. Should solve PR#514.
+
+ * htdig/Document.cc (Reset): Do not use modification_time_is_now
+ attribute. Simply reset modtime to 0, time is set elsewhere.
+
+ * Makefile.config.in: Add options from separate CONFIG files.
+
+ * configure.in, configure: Add configure-level switches for
+ --with-image-url-prefix= and --with-search-form=. Do not generate
+ CONFIG file (hopefully to be phased out soon).
+
+ * */Makefile.in: Make linking CONFIG-dependent files depend on
+ Makefile.config, not CONFIG.
+
+ * Makefile.in: Use standard $(bindir) variable instead of
+ $(BIN_DIR). Allows for standard configure flags to set this.
+
+Tue May 11 11:15:08 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.h, htlib/HtDateTime.cc: Updates from Gabriele,
+ fixing SetToNow() and adding GetDiff to return the difference in
+ time_t between two objects.
+
+ * htdig/Retriever.cc (Need2Get): Add patch from Warren Jones
+ <wjones at tc.fluke.com> to keep track of inodes on local files to
+ eliminate duplicates. Hopefully this will serve for a first-try at
+ a signature method for HTTP as well.
+
+Tue May 4 20:20:40 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/Regex.h, htfuzzy/Regex.cc: Add new regex fuzzy
+ algorithm, based on Substring and Prefix.
+
+ * htfuzzy/Fuzzy.cc (getFuzzyByName): Add it.
+
+ * htfuzzy/Makefile.in: Compile it.
+
+ * htcommon/defaults.cc: Add new attribute regex_max_words, same
+ concept as substring_max_words.
+
+ * htfuzzy/Exact.cc, htfuzzy/Substring.cc, htfuzzy/Prefix.cc:
+ Define names attribute for debugging purposes.
+
+ * installdir/htdig.conf: Fix the comments for search_algorithm to
+ refer to all the current possibilities.
+
+ * htlib/HtRegex.cc (match): Slight cleanup of how to return.
+
+Tue May 4 15:28:38 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc (reportError): Add e-mail of maintainer to
+ error message. Should help direct people to the correct place.
+
+ * htdig/Retriever.cc (IsValidURL): Lowercase all extensions from
+ bad_extensions as well as all extensions used in
+ comparisons. Ensures we're using case-insenstive matching.
+
+Mon May 3 23:20:22 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/Retriever.cc (IsValidURL): Fix typo with #else statement
+ for REGEX.
+
+ * htdig/htdig.cc: Add conditionals for REGEX to use HtRegex
+ instead of StringMatch methods when defined.
+
+ * htlib/HtDateTime.h: Update to remove definitions of true and
+ false, established by May 2 change in
+ include/htconfig.h.in as contributed by Gabriele.
+
+ * htlib/HtDateTime.cc: Replace call to mktime internal function to
+ Httimegm in timegm.c, contributed by Leo.
+
+ * htlib/timegm.c: Declare my_mktime_gmtime_r to prevent compiler
+ errors with incompatible gmtime structures, contributed by Leo.
+
+ * configure.in: Rearrange date/time checks for clarity.
+
+ * configure: Regenerate using autoconf.
+
+ * include/htconfig.in: Add HAVE_STRFTIME flag.
+
+Sun May 2 18:49:04 1999 Alexander Bergolth <leo at leo.wu-wien.ac.at>
+
+ * configure.in, include/htconfig.h.in: Added a configure test for
+ the availability of the bool type.
+
+Fri Apr 30 20:00:09 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.h, htlib/HtDateTime.cc: Update with new
+ versions sent by Gabriele.
+
+Fri Apr 30 19:30:42 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtRegex.h, htlib/HtRegex.cc: New class, contributed by
+ Peter D. Gray <pdg at draci.its.uow.edu.au> as a small wrapper for
+ system regex calls.
+
+ * htlib/Makefile.in: Build it.
+
+ * htdig/htdig.h: Use it if REGEX is defined.
+
+ * htdig/htdig.cc: Ditto.
+
+ * htdig/Retriever.cc: Ditto.
+
+ * htsearch/Display.cc(generateStars): Remove extra newline after
+ STARSRIGHT and STARSLEFT variables, noted by Torsten Neuer
+ <tneuer at inwise.de>.
+
+Fri Apr 30 18:52:56 1999 Alexander Bergolth <leo at leo.wu-wien.ac.at>
+
+ * htlib/URL.cc(ServerAlias): port for server_aliases entries now
+ defaults to 80 if omitted.
+
+Wed Apr 28 19:57:38 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtDateTime.h, htlib/HtDateTime.cc: New class, contributed
+ by Gabriele.
+
+ * htlib/Makefile.in: Compile it.
+
+ * README: Update message from 3.1.0 (oops!) to 3.2.0, remove rx
+ directory.
+
+ * installdir/htdig.conf: Add example of no_excerpt_show_top
+ attribute in line with most user's expectations.
+
+ * contrib/README: Mention contributed section of the website.
+
+ * Makefile.in: Ignore mailarchive directory--now removed from CVS.
+
+Wed Apr 28 10:46:31 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htmerge/db.cc(mergeDB): fix a few errors in how the merge index
+ name is obtained.
+
+Tue Apr 27 23:00:39 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * Makefile.config.in: Remove now-useless LIBDIRS variable.
+
+ * mailarchive/Split.java, mailarchive/htdig: Remove ancient
+ mailarchive stuff.
+
+Tue Apr 27 18:01:52 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(setupImages): Remove code setting URLimage to
+ a bogus pattern (remnant left over after merge).
+
+Tue Apr 27 16:43:08 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc(RetrieveHTTP): Show "Unable to build connection"
+ message at lower debug level.
+
+Tue Apr 27 11:24:19 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.h: Remove sort, compare functions re-introduced
+ in merge. Moved to ResultMatch by Hans-Peter's April 19th chnages.
+
+ * htsearch/Display.cc: Remove bogus call to ResultMatch:setRef,
+ removed by Hans-Pater's April 19th changes.
+
+Sat Apr 24 21:08:35 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * Merge in changes from 3.1.2 (see below).
+
+ * htcommon/WordList.cc: Change valid_word to use iscntl().
+
+ * htdig/Plaintext.cc: Remove CVS Log.
+
+ * htdig/Retriever.cc: Fix ancient bug with empty excludes list.
+
+ * htlib/List.cc: Remove CVS Log, use more succinct test for
+ out-of-bounds.
+
+ * htsearch/Display.cc: Fix logic with starPatterns, only show top
+ of META description.
+
+ * htsearch/Display.h: Introduce headers needed for sort functionality.
+
+ * installdir/htdig.conf: Add example max_doc_size attribute as
+ well as example for including start_url from a file.
+
+ * htdoc/ChangeLog, htdoc/RELEASE.html, htdoc/FAQ.html,
+ htdoc/where.html, htdoc/cf_byname.html, htdoc/cf_byprog.html,
+ htdoc/uses.html, htdoc/contents.html, htdoc/mailarchive.html:
+ Merge in documentation updates from 3.1.2.
+
+Sat Apr 24 15:18:45 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htsearch/Display.cc (sort): Return immediately if <= 1 items to
+ sort.
+
+Mon Apr 19 00:53:06 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htsearch/ResultMatch.h (create): New. All (the only) ctor
+ caller changed to use this.
+ (setRef, getRef): Removed. Callers changed to use nearby data.
+ (incomplete): Removed.
+ (setIncompleteScore): Renamed to...
+ (setScore): ...this. All callers changed.
+ (setSortType): New.
+ (getTitle, getTime, setTitle, setTime, getSortFun): New virtual
+ functions.
+ (enum SortType): Moved from Display, private.
+ (mySortType): New static member.
+
+ * htsearch/ResultMatch.cc (mySortType): Define static member
+ variable.
+ (getScore): Remove handling of "incomplete". Moved to ResultMatch.h
+ (getTitle, getTime, setTitle, setTime): New dummy functions.
+ (class ScoreMatch, class TimeMatch, class IDMatch, class
+ TitleMatch): Derived classes with compare functions (from Display)
+ and extra sort-method-related members, as needed.
+ (setSortType): New, mostly moved from Display.
+ (create): New.
+
+ * htsearch/Display.h: Changed first argument from ResultMatch * to
+ DocumentRef *.
+ (compare, compareTime, compareID, compareTitle, enum SortType,
+ sortType): Removed.
+
+ * htsearch/Display.cc (display): Call ResultMatch::setSortType and
+ output syntax error page for invalid sort methods.
+ (displayMatch): Change first argument from ResultMatch * to
+ DocumentRef *ref. All callers changed.
+ (buildMatchList): Remove call to sortType and typ variable.
+ Always call (ResultMatch::)setTime and setTitle. Remove extra
+ call to setID.
+ (sort): Call (ResultMatch::)getSortFun for qsort compare function.
+ (compare, compareTime, compareID, compareTitle, sortType): Removed.
+
+Wed Apr 14 21:21:35 1999 Alexander Bergolth <leo at leo.wu-wien.ac.at>
+
+ * htlib/regex.c: fixed compile problem with AIX xlc compiler
+
+ * htlib/HtHeap.h: fixed compile problem with AIX xlc compiler (bool)
+
+ * htlib/HtVector.h: ditto
+
+ * htsearch/Display.cc: fixed typo
+
+Wed Apr 14 00:17:06 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.h: Add compareID for sorting results by DocID.
+
+ * htsearch/Display.cc: As above.
+
+Tue Apr 13 23:50:28 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/defaults.cc: Add new config option use_doc_date to use
+ document meta information for the DocTime() field.
+
+ * htdig/HTML.cc(do_tag): Call Retriever::got_time if use_doc_date
+ is set and we run across a META date tag.
+
+ * htdig/Retriever.h, htdig/Retriver.cc: Add new got_date
+ function. When called, sets the DocTime field of the DocumentRef
+ after parsing is completed. Currently assumes ISO 8601 format for
+ the date tag.
+
+Sun Apr 11 12:51:39 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htsearch/Display.cc (buildMatchList): Delete thisRef if excluded
+ by URL. Call setRef(NULL), not setRef(thisRef).
+
+Wed Apr 7 19:35:42 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc(usage): Remove bogus -w flag.
+
+Thu Apr 1 12:05:11 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/htsearch.cc(main): Apply Gabriele's patch to avoid using an
+ invalid matchesperpage CGI input variable.
+
+ * htsearch/Display.cc(display) & (setVariables): Correct any invalid
+ values for matches_per_page attribute to avoid div. by 0 error.
+
+Wed Mar 31 15:19:25 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htfuzzy/Synonym.cc: Fix previous fix of minor memory leak.
+ (db pointer wasn't properly set)
+
+Mon Mar 29 10:31:09 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/Display.cc(excerpt): Added patch from Gabriele to
+ improve display of excerpts--show top of description always,
+ otherwise try to find the excerpt.
+
+Sun Mar 28 19:45:02 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/HtWordType.h (HtIsWordChar): Avoid matching 0 when using
+ strchr.
+ (HtIsStrictWordChar): Ditto.
+
+ * htdig/ExternalParser.cc (parse): Before got_href call, set
+ hopcount of URL to that of base plus 1.
+ Add URL to external parser error output.
+
+ * htlib/URL.cc (URL(char *ref, URL &parent) ): Move call to
+ constructURL call inside previous else-clause.
+ (parse): Reset _normal, _signature, _user initially.
+ Commence parsing, even if no "//" is found. Do not set _normal
+ here.
+ (normalizePath): Call removeIndex finally.
+
+ * htcommon/WordRecord.h (WORD_RECORD_COMPRESSED_FORMAT)
+ [!NO_WORD_COUNT]: Change to "cu4".
+
+ * htlib/HtPack.cc (htPack): Correct handling at end of code-string
+ and end of encoding-byte. Add code 'c' for often-1 unsigned ints.
+ (htUnpack): Add handling of code 'c'.
+
+Thu Mar 25 12:18:05 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * installdir/long.html, installdir/short.html: Remove backslashes
+ before quotes in HTML versions of the builtin templates.
+
+ * Makefile.in: Add long.html & short.html to COMMONHTML list, so
+ they get installed in common_dir.
+
+Thu Mar 25 11:56:50 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(displayMatch), htcommon/defaults.cc,
+ htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Add date_format attribute suggested by Marc Pohl.
+
+Thu Mar 25 09:46:07 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(displayMatch): Avoid segfault when DocAnchors
+ list has too few entries for current anchor number.
+
+Tue Mar 23 15:08:40 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(displayMatch): Fix problem when documents
+ did not have descriptions.
+
+Tue Mar 23 14:17:14 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/PDF.cc(parseString): Use minimum_word_length instead of
+ hardcoded constant.
+
+Tue Mar 23 14:02:40 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc: Fix bug where noindex_start was empty, allow case
+ insensitive matching of noindex_start & noindex_end.
+
+ * htdoc/attrs.html, htdoc/cf_byname.html, htdoc/cf_byprog.html:
+ Fix inconsistencies in documentation for noindex_start & noindex_end.
+
+Tue Mar 23 14:01:16 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc: Add check for <a href=...> tag that is missing a
+ closing </a> tag, terminating it at next href.
+
+Tue Mar 23 13:57:35 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Document.cc: Fix check of Content-type header in readHeader(),
+ correcting bug introduced Jan 10 (for PR#91), and check against
+ allowed external parsers.
+
+Tue Mar 23 13:54:35 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc: More lenient comment parsing, allows extra dashes.
+
+Tue Mar 23 12:22:53 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htlib/Configuration.cc(Add): Fix function to avoid infinite loop
+ on some systems, which don't allow all the letters in isalnum() that
+ isalpha() does, e.g. accented ones.
+
+ * htdig/HTML.cc: Fix three reported bugs about inconsistent
+ handling of space and punctuation in title, href description & head.
+ Now makes destinction between tags that cause word breaks and those
+ that don't, and which of the latter add space.
+
+Tue Mar 23 12:15:48 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/Plaintext.cc(parse): Use minimum_word_length instead of
+ hardcoded constant.
+
+Tue Mar 23 12:11:04 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htmerge/words.cc(mergeWords): Fix to prevent description text
+ words from clobbering anchor number of merged anchor text words.
+
+Tue Mar 23 12:02:00 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/Display.cc(generateStars): Add in support for use_star_image
+ which was lost when template support was put in way back when.
+
+Tue Mar 23 11:47:52 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * Makefile.in: add missing ';' in for loops, between fi & done
+
+Mon Mar 22 16:06:15 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htdig/HTML.cc: Check for presence of more than one <title> tag.
+
+Mon Mar 22 15:32:15 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrib/parse_doc.pl: Fix handling of minimum word length.
+
+Sun Mar 21 15:19:00 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/HtPack.cc (htPack): New.
+ * htlib/HtPack.h: New.
+ * htsearch/parser.cc (perform_push): Unpack WordRecords using
+ htUnpack.
+ * htsearch/htsearch.h: Add "debug" declaration.
+ * htmerge/words.cc (mergeWords): Pack WordRecords using htPack.
+ * htlib/Makefile.in (OBJS): Add HtPack.o
+ * htcommon/WordRecord.h: Add WORD_RECORD_COMPRESSED_FORMAT
+
+ * htdig/HTML.cc (parse): Keep contents in String variable
+ textified_contents while using its "char *".
+
+ * htsearch/Display.cc (excerpt): Similar for head_string.
+
+Thu Mar 18 20:01:24 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * installdir/long.html, installdir/short.html: Write out HTML
+ versions of the builtin templates.
+
+ * installdir/htdig.conf: Add commented-out template_map and
+ template_name attributes to use the on-disk versions.
+
+Tue Mar 16 03:06:06 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htcommon/DocumentDB.cc (Delete): Fix bad parameter to Get: use
+ key, not DocID.
+
+Tue Mar 16 01:50:16 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/HtWordType.h (class HtWordType): New.
+ * htlib/HtWordType.cc: New.
+ * htlib/Makefile.in (OBJS): Add HtWordType.o
+
+ * htdoc/attrs.html: Document attribute extra_word_characters.
+ * htdoc/cf_byprog.html: Ditto.
+ * htdoc/cf_byname.html: Ditto.
+
+ * htcommon/defaults.cc (defaults): Add extra_word_characters.
+
+ * htsearch/htsearch.h: Lose spurious extern declaration of unused
+ variable valid_punctuation.
+ * htsearch/htsearch.cc (main): Call HtWordType::Initialize.
+ (setupWords): Use HtIsWordChar, HtIsStrictWordChar and
+ HtStripPunctuation. Do not read valid_punctuation.
+
+ * htsearch/Display.cc (excerpt): Use HtIsStrictWordChar.
+
+ * htlib/StringMatch.cc (FindFirstWord): Ditto.
+ (CompareWord): Ditto.
+
+ * htdig/htdig.cc (main): Call HtWordType::Initialize.
+
+ * htdig/Retriever.h (class Retriever): Lose member
+ valid_punctuation.
+ * htdig/Retriever.cc (Retriever): Lose its initialization.
+
+ * htdig/Postscript.h (class Postscript): Lose member
+ valid_punctuation.
+ * htdig/Postscript.cc (Postscript): Lose its initialization.
+ (flush_word): Use HtStripPunctuation.
+ (parse_string): Use HtIsWordChar,
+ HtIsStrictWordChar and HtStripPunctuation.
+
+ * htdig/Parsable.h (class Parsable): Lose member
+ valid_punctuation.
+ * htdig/Parsable.cc (Parsable): Lose its initilization.
+
+ * htcommon/WordList.cc (valid_word): Use HtIsStrictWordChar.
+ (BadWordFile): Use HtStripPunctuation. Do not read
+ valid_punctuation.
+
+ * htcommon/DocumentRef.cc (AddDescription): Use HtIsWordChar,
+ HtIsStrictWordChar and HtStripPunctuation. Do not read
+ valid_punctuation.
+
+ * htdig/PDF.cc (parseString): Similar..
+
+ * htdig/HTML.cc (parse): Similar.
+
+ * htdig/Plaintext.cc (parse): Similar.
+
+Sun Mar 14 14:04:31 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Makefile.in: Add HtSGMLEntites.o to OBJS.
+
+Sat Mar 13 21:29:38 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htcommon/DocumentDB.cc(Open, Read): Switch to DB_HASH for faster
+ access. Most important for very quick URL lookups!
+
+ * htcommon/DocumentRef.cc(AddDescription): Check to see that
+ description isn't a null string or contains only whitespace before
+ doing anything.
+
+ * htlib/HtSGMLCodec.h, htlib/HtSGMLCodec.cc: Add new class to
+ convert between SGML entities and high-bit characters.
+
+ * htdig/HTML.cc(parse): Use it instead of SGMLEntities.
+
+ * htsearch/Display.cc(excerpt): Use HtSGMLCodec to covert *back*
+ to SGML entities before displaying.
+
+ * htlib/HtHeap.cc: Cleaned up comments, use more efficient
+ procedure to build from a vector.
+
+ * htlib/HtWordCodec.cc(HtWordCodec): Fix bug with constructing from
+ uninitialized variables!
+
+ * htlib/URL.h, htlib/URL.cc: Initial support for multiple schemes and
+ user@host URLs.
+
+ * htlib/List.cc(Nth): Check for out-of-bounds requests before
+ doing anything.
+
+Fri Mar 12 00:31:03 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/mktime.c (__mon_yday): Correct size to number of
+ initializers (2).
+
+ * htsearch/htsearch.cc (main): Remove doc_index handling.
+
+ * htsearch/ResultMatch.h (setURL): Change to setID, use int.
+ All callers changed.
+ (getURL): Change to getID.
+ All callers changed.
+ (String url): Change to "int id".
+
+ * htsearch/Display.h: (Display): Second parameter removed.
+ (docIndex) removed.
+
+ * htsearch/Display.cc (Display, ~Display): Do not handle
+ docIndex.
+ (display): Use DocumentDB::operator [](int), not
+ DocumentDB::operator [] (char *).
+ (buildMatchList): Changed to handle ResultMatch as DocID int,
+ instead of URL string: use DocumentDB::operator [](int), not
+ DocumentDB::operator [] (char *). Get DocumentRef directly, then
+ filter the URL by includeURL().
+
+ * htnotify/htnotify.cc (main): Use DocIDs(), not DocURLs().
+ Handle the change from String * to IntObject *.
+
+ * htmerge/htmerge.cc (main): Do not delete doc_index.
+
+ * htmerge/docs.cc (convertDocs): Test doc_index access as
+ read-only. Pass as parameter for docdb, do not handle separately.
+
+ * htmerge/docs.cc (convertDocs): Add debug messages about cause
+ when deleting documents. If verbose > 1, write id/URL for every URL.
+
+ * htmerge/db.cc (mergeDB): Handle doc_index, test accessibility.
+
+ * htlib/IntObject.h (class IntObject): Add int-constructor.
+
+ * htdoc/attrs.html (doc_index): Say that mapping is from document
+ URLs to numbers.
+ (doc_db): Say that indexing is on document number.
+
+ * htdoc/cf_byprog.html (doc_index): Move from htsearch to htdig
+ entry.
+
+ * htdig/htdig.cc (main): Add .work suffix to doc_index too.
+ Unlink doc_index if initial.
+
+ * htcommon/DocumentDB.h (Open): New second argument.
+ (Read): New second argument, default to 0.
+ (operator [](int)): New.
+ (Exists(char *), Delete(char *)): Change to int parameter.
+ (DocIDs, i_dbf): New.
+
+ * htcommon/DocumentDB.cc (operator [](int)): New.
+ (Exists(char *), Delete(char *)): Changed to DocID int parameter.
+ All callers changed.
+ (URLs): Assume keys are ok without probing for documents
+ with each key.
+ (DocIDs): New.
+ (Open): Take an index database file name as second argument.
+ All callers changed.
+ (Read): Similar, accept 0.
+ (all): Change to index on DocID.
+
+Wed Mar 10 02:25:24 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htdoc/attrs.html (template_name): Typo; used by htsearch, not
+ htdig.
+
+Mon Mar 8 13:30:44 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htdig/Retriever.cc (got_href): Check if the ref is for the
+ current document before adding it to the db.
+
+Mon Mar 8 01:36:38 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/DB2_db.cc: Remove errno.
+ * htlib/DB2_hash.cc: Ditto.
+
+Sun Mar 7 20:50:37 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htfuzzy/EndingsDB.cc(createDB): Use link and unlink to move,
+ rather than a non-portable system call.
+
+ * htcommon/DocumentRef.h, htcommon/DocumentRef.cc: Fix #ifdef
+ problems with zlib.
+
+Sun Mar 7 09:39:37 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/timegm.c: Fix problems compiling on libc5 systems noted by
+ Hans-Peter.
+
+ * htlib/Makefile.in, Makefile.in, Makefile.config.in: Use regex.c
+ instead of rx.
+
+ * htfuzzy/EndingsDB.cc: Ditto.
+
+ * configure.in, configure: Don't bother to config rx directory.
+
+Fri Mar 5 08:09:20 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * contrig/parse_doc.pl: uses pdftotext to handle PDF files,
+ generates a head record with punctuation intact, extra checks
+ for file "wrappers" & check for MS Word signature (no longer
+ defaults to catdoc), strip extra punct. from start & end of words,
+ rehyphenate text from PDFs.
+
+Tue Mar 2 23:18:20 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htdig/htdig.cc: Renamed main.cc for consistency with other programs.
+
+ * htlib/DB2_hash.h, htlib/DB2_hash.cc: Added interface to Berkeley
+ hash database format.
+
+ * htlib/Makefile.in: Use them!
+
+ * htlib/Database.h: Define database types, allowing a choice
+ between different formats.
+
+ * htlib/Database.cc(getDatabaseInstance): Use passed type to pick
+ between subclasses. Currently only uses Hash and B-Tree formats of
+ Berkeley DB.
+
+ * htcommon/DocumentDB.cc, htfuzzy/Endings.cc,
+ htfuzzy/EndingsDB.cc, htfuzzy/Fuzzy.cc, htfuzzy/Prefix.cc,
+ htfuzzy/Substring.cc, htfuzzy/Synonym.cc, htfuzzy/htfuzzy.cc,
+ htmerge/docs.cc, htmerge/words.cc, htsearch/Display.cc,
+ htsearch/htsearch.cc: Use new form of getDatabaseInstance(),
+ currently with DB_BTREE option (for compatibility).
+
+Mon Mar 1 22:53:37 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/regex.c, htlib/striptime.c: Import new versions from
+ glibc.
+
+ * htlib/Makefile.in, htlib/mktime.c, htlib/timegm.c, htlib/lib.h:
+ Changes to use glibc timegm() function instead of buggy mytimegm().
+
+ * htdig/Document.cc(getdate): Use it.
+
+Tue Mar 2 02:35:50 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * attrs.html: Rephrase and clarify entry for url_part_aliases.
+
+Sun Feb 28 23:25:40 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htlib/HtURLCodec.cc (~HtURLCodec): Add missing deletion of
+ myWordCodec.
+
+Fri Feb 26 19:03:58 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure, configure.in: Fix typo on timegm test.
+
+ * htlib/mytimegm.cc: Fix Y2K problems.
+
+Wed Feb 24 21:09:19 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc(main): Remember to delete the parser!
+
+ * htlib/String.cc(String(char *s, int len)): Remove redundant copy.
+
+ * htsearch/Display.cc(display): Free DocumentRef memory after
+ displaying them.
+ (displayMatch): Fix memory leak when documents did not have anchors.
+
+Wed Feb 24 15:18:26 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Configuration.cc(Add): Fix small leak in locale code.
+
+ * htlib/String.cc: Fix up code to be cleaner with memory
+ allocation, inline next_power_of_2.
+
+Mon Feb 22 22:13:49 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/String.cc, htlib/htString.h: Fix some memory leaks.
+
+Mon Feb 22 08:52:19 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/Dictionary.h, htlib/Dictionary.cc(hashCode): Check if key
+ can be converted to an integer using strtol. If so, use the
+ integer as the hash code.
+
+ * htlib/HtVector.h, htlib/HtVector.cc: Implement Release() method
+ and make sure delete calls are done properly.
+
+ * htsearch/ResultList.h, htsearch/ResultList.cc(elements): Use HtVector
+ instead of List.
+
+ * htsearch/parser.cc: Ditto.
+
+Sun Feb 21 16:13:59 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtHeap.h, htlib/HtHeap.cc: Add new class.
+
+ * htlib/Makefile.in: Compile it.
+
+ * htlib/HtVector.h, htlib/HtVector.cc: Add Assign() to assign to
+ elements of vectors.
+
+Sun Feb 21 14:45:26 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htsearch/htsearch.cc: Add patch from Jerome Alet <alet at unice.fr>
+ to allow '.' in config field but NOT './' for security reasons.
+
+ * htdig/HTML.cc: Add patch from Gabriele to ensure META
+ descriptions are parsed, even if 'description' is added to the
+ keyword list.
+
+Sun Feb 21 14:43:44 1999 Gilles Detillieux <grdetil at scrc.umanitoba.ca>
+
+ * htsearch/parser.h, htsearch/parser.cc: Clean up patch made for
+ error messages, made on Feb 16.
+
+Thu Feb 18 20:19:30 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * htlib/HtVector.h, htlib/HtVector.cc: Added new Vector class.
+
+ * htlib/Makefile.in: Compile it.
+
+ * htlib/strptime.c: Add new version from glibc-2.1, replacing
+ strptime.cc.
+
+ * htdig/Document.cc: Use it.
+
+ * htlib/regex.h, htlib/regex.c: Add new files from glibc-2.1.
+
+ * htlib/mktime.c: Update from glibc-2.1.
+
+Wed Feb 17 23:44:59 1999 Geoff Hutchison <ghutchis at wso.williams.edu>
+
+ * configure.in, configure, aclocal.m4: Add autoconf macro to
+ detect syntax of makefile includes.
+
+ * Makefile.in, Makefile.config.in, */Makefile.in: Change include
+ syntax to use it.
+
+Wed Feb 17 12:36:42 1999 Hans-Peter Nilsson <hp at bitrange.com>
+
+ * htcommon/defaults.cc (defaults): locale: change to "C".
+
+Local Variables:
+ add-log-time-format: current-time-string
+End:
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/FAQ.html b/debian/htdig/htdig-3.2.0b6/htdoc/FAQ.html
new file mode 100644
index 00000000..9f2db468
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/FAQ.html
@@ -0,0 +1,2590 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig Frequently Asked Questions</title>
+ <link rel="stylesheet" href="css/htdig.css">
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>Frequently Asked Questions</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr noshade size=4>
+ <p class="main">This FAQ is compiled by the ht://Dig developers and the
+ most recent version is available at &lt;<a
+ href="http://www.htdig.org/FAQ.html">http://www.htdig.org/FAQ.html</a>&gt;.
+ Questions (and answers!) are greatly appreciated.
+ Please send questions and/or answers to the ht://Dig user
+ mailing list at: &lt;<a href="mailto:htdig-general@lists.sourceforge.net">htdig-general@lists.sourceforge.net</a>&gt;.
+ </p>
+ <h2>Questions</h2>
+
+ <h3>1. General</h3>
+ 1.1. <a href="#q1.1">Can I search the internet with ht://Dig?</a><br>
+ 1.2. <a href="#q1.2">Can I index the internet with ht://Dig?</a><br>
+ 1.3. <a href="#q1.3">What's the difference between htdig and
+ ht://Dig?</a><br>
+ 1.4. <a href="#q1.4">I sent mail to Andrew or Geoff or
+ Gilles, but I never got a response!</a><br>
+ 1.5. <a href="#q1.5">I sent a question to the mailing list but I
+ never got a response!</a><br>
+ 1.6. <a href="#q1.6">I have a great idea/patch for ht://Dig!</a><br>
+ 1.7. <a href="#q1.7">Is ht://Dig Y2K compliant?</a><br>
+ 1.8. <a href="#q1.8">I think I found a bug. What should I do?</a><br>
+ 1.9. <a href="#q1.9">Does ht://Dig support phrase or near
+ matching?</a><br>
+ 1.10. <a href="#q1.10">What are the practical and/or theoretical
+ limits of ht://Dig?</a><br>
+ 1.11. <a href="#q1.11">Do any ISPs offer ht://Dig as part of
+ their web hosting services?</a><br>
+ 1.12. <a href="#q1.12">Can I use ht://Dig on a commercial website?</a><br>
+ 1.13. <a href="#q1.13">Why do you use a non-free product to
+ index PDF files?</a><br>
+ 1.14. <a href="#q1.14">Why do you have all those SourceForge
+ logos on your website?</a><br>
+ 1.15. <a href="#q1.15">My question isn't answered here. Where should I
+ go for help?</a><br>
+ 1.16. <a href="#q1.16">Why do the developers get annoyed when
+ I e-mail questions directly to them rather than the mailing list?</a><br>
+ 1.17. <a href="#q1.17">Why do replies to messages on the
+ mailing list only go to the sender and not to the list?</a><br>
+ 1.18. <a href="#q1.18">Can I use ht://Dig to index and search
+ an SQL database?</a><br>
+
+ <hr noshade size=2>
+
+ <h3>2. Getting ht://Dig</h3>
+ 2.1. <a href="#q2.1">What's the latest version of ht://Dig?</a><br>
+ 2.2. <a href="#q2.2">Are there binary distributions of ht://Dig?</a><br>
+ 2.3. <a href="#q2.3">Are there mirror sites for ht://Dig?</a><br>
+ 2.4. <a href="#q2.4">Is ht://Dig available by ftp?</a><br>
+ 2.5. <a href="#q2.5">Are patches around to upgrade between
+ versions?</a><br>
+ 2.6. <a href="#q2.6">Is there a Windows 95/98/2000/NT
+ version of ht://Dig?</a><br>
+ 2.7. <a href="#q2.7">Where can I find the documentation for my
+ version of ht://Dig?</a><br>
+
+ <hr noshade size=2>
+
+ <h3>3. Compiling</h3>
+ 3.1. <a href="#q3.1">When I compile ht://Dig I get an error
+ about libht.a.</a><br>
+ 3.2. <a href="#q3.2">I get an error about -lg</a><br>
+ 3.3. <a href="#q3.3">I'm compiling on Digital Unix and I get
+ mesages about "unresolved" and "db_open."</a><br>
+ 3.4. <a href="#q3.4">I'm compiling on FreeBSD and I get lots
+ of messages about '___error' being unresolved.</a><br>
+ 3.5. <a href="#q3.5">I'm compiling on HP/UX and I get a complaint about
+ "Large Files not supported."</a><br>
+ 3.6. <a href="#q3.6">I'm compiling on Solaris and when I run the
+ programs I get complaints about not finding libstdc++.</a><br>
+ 3.7. <a href="#q3.7">I'm compiling on IRIX and I'm having
+ database problems when I run the program.</a><br>
+ 3.8. <a href="#q3.8">I'm compiling with gcc 3.2 and getting
+ all sorts of warnings/errors about ostream and such.</a><br>
+
+ <hr noshade size=2>
+
+ <h3>4. Configuration</h3>
+ 4.1. <a href="#q4.1">How come I can't index my site?</a><br>
+ 4.2. <a href="#q4.2">How can I change the output format of
+ htsearch?</a><br>
+ 4.3. <a href="#q4.3">How do I index pages that start with '~'?</a><br>
+ 4.4. <a href="#q4.4">Can I use multiple databases?</a><br>
+ 4.5. <a href="#q4.5">OK, I can use multiple databases. Can I
+ merge them into one?</a><br>
+ 4.6. <a href="#q4.6">Wow, ht://Dig eats up a lot of disk
+ space. How can I cut down?</a><br>
+ 4.7. <a href="#q4.7">Can I use SSI or other CGIs in my
+ htsearch results?</a><br>
+ 4.8. <a href="#q4.8">How do I index Word, Excel, PowerPoint
+ or PostScript documents?</a><br>
+ 4.9. <a href="#q4.9">How do I index PDF files?</a><br>
+ 4.10. <a href="#q4.10">How do I index documents in other
+ languages?</a><br>
+ 4.11. <a href="#q4.11">How do I get rotating banner ads in
+ search results?</a><br>
+ 4.12. <a href="#q4.12">How do I index numbers in documents?</a><br>
+ 4.13. <a href="#q4.13">How can I call htsearch from a hypertext
+ link, rather than from a search form?</a><br>
+ 4.14. <a href="#q4.14">How do I restrict a search to only meta
+ keywords entries in documents?</a><br>
+ 4.15. <a href="#q4.15">Can I use meta tags to prevent htdig from
+ indexing certain files?</a><br>
+ 4.16. <a href="#q4.16">How do I get htsearch to use the star image
+ in a different directory than the default /htdig?</a><br>
+ 4.17. <a href="#q4.17">How do I get htdig or htsearch to rewrite
+ URLs in the search results?</a><br>
+ 4.18. <a href="#q4.18">What are all the options in
+ htdig.conf, and are there others?</a><br>
+ 4.19. <a href="#q4.19">How do I get more than 10 pages of
+ 10 search results from htsearch?</a><br>
+ 4.20. <a href="#q4.20">How do I restrict a search to only
+ certain subdirectories or documents?</a><br>
+ 4.21. <a href="#q4.21">How can I allow people to search
+ while the index is updating?</a><br>
+ 4.22. <a href="#q4.22">How can I get htdig to ignore the
+ robots.txt file or meta robots tags?</a><br>
+ 4.23. <a href="#q4.23">How can I get htdig not to index
+ some directories, but still follow links?</a><br>
+ 4.24. <a href="#q4.24">How can I get rid of duplicates in
+ search results?</a><br>
+ 4.25. <a href="#q4.25">How can I change the scores in
+ search results, and what are the defaults?</a><br>
+ 4.26. <a href="#q4.26">How can I get htdig not to index
+ JavaScript code or CSS?</a><br>
+
+ <hr noshade size=2>
+
+ <h3>5. Troubleshooting</h3>
+ 5.1. <a href="#q5.1">I can't seem to index more than X documents
+ in a directory.</a><br>
+ 5.2. <a href="#q5.2">I can't index PDF files.</a><br>
+ 5.3. <a href="#q5.3">When I run "rundig," I get a message about
+ "DATABASE_DIR" not being found.</a><br>
+ 5.4. <a href="#q5.4">When I run htmerge, it stops with an "out
+ of diskspace" message.</a><br>
+ 5.5. <a href="#q5.5">I have problems running rundig from cron
+ under Linux.</a><br>
+ 5.6. <a href="#q5.6">When I run htmerge, it stops with an
+ "Unexpected file type" message.</a><br>
+ 5.7. <a href="#q5.7">When I run htsearch, I get lots of Internal
+ Server Errors (#500).</a><br>
+ 5.8. <a href="#q5.8">I'm having problems with indexing words
+ with accented characters.</a><br>
+ 5.9. <a href="#q5.9">When I run htmerge, it stops with a
+ "Word sort failed" message.</a><br>
+ 5.10. <a href="#q5.10">When htsearch has a lot of matches, it runs
+ extremely slowly.</a><br>
+ 5.11. <a href="#q5.11">When I run htsearch, it gives me a count of
+ matches, but doesn't list the matching documents.</a><br>
+ 5.12. <a href="#q5.12">I can't seem to index documents with names
+ like left_index.html with htdig.</a><br>
+ 5.13. <a href="#q5.13">I get Premature End of Script Headers errors
+ when running htsearch.</a><br>
+ 5.14. <a href="#q5.14">I get Segmentation faults when running
+ htdig, htsearch or htfuzzy.</a><br>
+ 5.15. <a href="#q5.15">Why does htdig 3.1.3 mangle URL parameters
+ that contain bare "&amp;" characters?</a><br>
+ 5.16. <a href="#q5.16">When I run htmerge, it stops with an
+ "Unable to open word list file '.../db.wordlist'" message.</a><br>
+ 5.17. <a href="#q5.17">When using Netscape, htsearch always returns the
+ "No match" page.</a><br>
+ 5.18. <a href="#q5.18">Why doesn't htdig follow links to other
+ pages in JavaScript code?</a><br>
+ 5.19. <a href="#q5.19">When I run htsearch from the web server,
+ it returns a bunch of binary data.</a><br>
+ 5.20. <a href="#q5.20">Why are the betas of 3.2 so slow at indexing?</a><br>
+ 5.21. <a href="#q5.21">Why does htsearch use ";" instead of
+ "&amp;" to separate URL parameters for the page buttons?</a><br>
+ 5.22. <a href="#q5.22">Why does htsearch show the
+ "&amp;" character as "&amp;amp;" in search results?</a><br>
+ 5.23. <a href="#q5.23">I get Internal Server or Unrecognized
+ character errors when running htsearch.</a><br>
+ 5.24. <a href="#q5.24">I took some settings out of
+ my htdig.conf but they're still set.</a><br>
+ 5.25. <a href="#q5.25">When I run htdig on my site,
+ it misses entire directories.</a><br>
+ 5.26. <a href="#q5.26">What do all the numbers and symbols
+ in the htdig -v output mean?</a><br>
+ 5.27. <a href="#q5.27">Why is htdig rejecting some of the
+ links in my documents?</a><br>
+ 5.28. <a href="#q5.28">When I run htdig or htmerge, I get a
+ "DB2 problem...: missing or empty key value specified" message.</a><br>
+ 5.29. <a href="#q5.29">When I run htdig on my site,
+ it seems to go on and on without ending.</a><br>
+ 5.30. <a href="#q5.30">Why does htsearch no longer recognize
+ the -c option when run from the web server?</a><br>
+ 5.31. <a href="#q5.31">I've set a config attribute exactly
+ as documented but it seems to have no effect.</a><br>
+ 5.32. <a href="#q5.32">When I run htsearch, it gives a page
+ with an "Unable to read configuration file" message.</a><br>
+ 5.33. <a href="#q5.33">How can I find out which version
+ of ht://Dig I have installed?</a><br>
+ 5.34. <a href="#q5.34">When running htdig, I get "Error (0):
+ PDF file is damaged - attempting to reconstruct xref table..."</a><br>
+ 5.35. <a href="#q5.35">When running htdig on Mandrake Linux,
+ I get "host not found" and "no server running" errors.</a><br>
+ 5.36. <a href="#q5.36">When I run htsearch, it gives me the
+ list of matching documents, but no header or footer.</a><br>
+ 5.37. <a href="#q5.37">When I index files with doc2html.pl,
+ it fails with the "UNABLE to convert" error.</a><br>
+ 5.38. <a href="#q5.38">Why do my searches find search terms
+ in pathnames, or how do I prevent matching filenames?</a><br>
+ 5.39. <a href="#q5.39">I set up an external parser but I still
+ can't index Word/Excel/PowerPoint/PDF documents.</a><br>
+
+ <hr noshade size=4>
+ <h2>Answers</h2>
+
+ <h3>1. General</h3>
+ <strong>1.1. <a name="q1.1">Can I search the internet with
+ ht://Dig?</a></strong><br>
+ <p>No, ht://Dig is a system for indexing and searching a
+ finite (not necessarily small) set of sites or intranet. It
+ is not meant to replace any of the many internet-wide search
+ engines.</p>
+
+ <strong>1.2. <a name="q1.2">Can I index the internet with
+ ht://Dig?</a></strong><br>
+ <p>No, as above, ht://Dig is not meant as an
+ internet-wide search engine. While there is
+ <em>theoretically</em> nothing to stop you from indexing as
+ much as you wish, practical considerations (e.g. time, disk
+ space, memory, etc.) will limit this.</p>
+
+ <strong>1.3. <a name="q1.3">What's the difference between htdig and
+ ht://Dig?</a></strong><br>
+ <p>The complete ht://Dig package consists of several programs, one of
+ which is called "htdig." This program performs the "digging" or
+ indexing of the web pages. Of course an index doesn't do you much good
+ without a program to sort it, search through it, etc.</p>
+
+ <strong>1.4. <a name="q1.4">I sent mail to Andrew or Geoff
+ or Gilles, but I never got a response!</a></strong><br>
+ <p>Andrew no longer does much work on ht://Dig. He has started a
+ company, called <a href="http://www.contigo.com/">Contigo
+ Software</a> and is quite busy with that. To contact any of the
+ current developers, send mail to &lt;<a
+ href="mailto:htdig-dev@lists.sourceforge.net">htdig-dev</a>&gt;.
+ This list is intended primarily for the discussion of current
+ and future development of the software.</p>
+
+ <p>Geoff and Gilles are currently the maintainers of
+ ht://Dig, but they are both volunteers. So while they do
+ read all the e-mail they receive, they may not respond
+ immediately. Questions about ht://Dig in general, and especially
+ questions or requests for help in configuring the software,
+ should be posted to the &lt;<a
+ href="mailto:htdig-general@lists.sourceforge.net">htdig-general</a>&gt;
+ mailing list. When posting a followup to a message on the
+ list, you should use the "reply to all" or "group reply"
+ feature of your mail program, to make sure the mailing list
+ address is included in the reply, rather than replying only
+ to the author of the message.
+ See also question <a href="#q1.16">1.16</a> and the
+ <a href="http://www.htdig.org/mailarchive.html">mailing list</a>
+ page.</p>
+
+ <strong>1.5. <a name="q1.5">I sent a question to the mailing list but I
+ never got a response!</a></strong><br>
+ <p>Development of ht://Dig is done by volunteers. Since we all
+ have other jobs, it make take a while before someone gets back
+ to you. Please be patient and don't hound the volunteers with
+ direct or repeated requests. If you don't get a response after
+ 3 or 4 days, then a reminder may help.
+ See also question <a href="#q1.16">1.16</a>.</p>
+
+ <strong>1.6. <a name="q1.6">I have a great idea/patch for
+ ht://Dig!</a></strong><br>
+ <p>Great! Development of ht://Dig continues through suggestions
+ and improvements from users. If you have an idea (or even better,
+ a patch), please send it to the ht://Dig mailing list so others
+ can use it. For suggestions on how to submit patches, please check
+ the <a href="dev/patches.html">Guidelines for
+ Patch Submissions</a>. If you'd like to make a feature request,
+ you can do so through the <a href="bugs.html">ht://Dig bug
+ database</a></p>
+
+ <strong>1.7. <a name="q1.7">Is ht://Dig Y2K compliant?</a></strong><br>
+ <p>
+ ht://Dig should be y2k compliant since it never <em>stores</em> dates as
+ two-digit years. Under ht://Dig's copyright (GPL), there is no warranty
+ whatsoever as permitted by law. If you would like an iron-clad,
+ legally-binding guarantee, feel free to check the source code
+ itself. Versions prior to 3.1.2 did have a problem with the parsing
+ of the Last-Modified header returned by the HTTP server, which will
+ cause incorrect dates to be stored for documents modified after
+ February 28, 2000 (yes, it didn't recognize 2000 as a leap year).
+ Versions prior to 3.1.5 didn't correctly handle servers that return
+ two digit years in the Last-Modified header, for years after 99.
+ These problems are fixed in the current release.
+ If you discover something else, please let us know!
+ </p>
+
+ <strong>1.8. <a name="q1.8">I think I found a bug. What should I
+ do?</a></strong><br>
+ <p>Well, there are probably bugs out there. You have two options
+ for bug-reporting. You can either mail the ht://Dig mailing list
+ at &lt;<a href="mailto:htdig-general@lists.sourceforge.net">htdig-general@lists.sourceforge.net</a>&gt; or
+ better yet, report it to the <a href="bugs.html">bug
+ database</a>, which ensures it won't
+ become lost amongst all of the other mail on the list.
+ Please try to include as much information as possible, including
+ the version of ht://Dig (see question <a href="#q5.33">5.33</a>),
+ the OS, and anything else that might be helpful.
+ Often, running the programs with one "-v" or more
+ (e.g. "-vvv") gives useful debugging information.
+ If you are unsure whether the problem is a bug or a configuration
+ problem, you should discuss the problem on
+ &lt;<a href="mailto:htdig-general@lists.sourceforge.net">htdig-general</a>&gt;
+ (after carefully reading the FAQ and searching the
+ <a href="http://www.htdig.org/mailarchive.html">mail archive</a>
+ and <a href="#q2.5">patch archive</a>,
+ of course)
+ to sort out what it is. The mailing list has a wider audience, so
+ you're more likely to get help with configuration problems there
+ than by reporting them to the bug database.
+ </p>
+
+ <p>Whether reporting problems to the bug database or mailing
+ list, we cannot stress enough the importance of
+ <strong>always</strong> indicating <strong>which version of
+ ht://Dig you are running</strong>.
+ See question <a href="#q5.33">5.33</a>. There
+ are still a lot of users, ISPs and software distributors using
+ older versions, and there have been a lot of bug fixes and
+ new features added in recent versions. Knowing which version
+ you're running is absolutely essential in helping to find a
+ solution. If you're unsure if your version is current, or what
+ fixes and features have been added in more recent versions,
+ please see the <a href="RELEASE.html">
+ release notes</a>. See also question <a href="#q2.1">2.1</a>.</p>
+
+ <strong>1.9. <a name="q1.9">Does ht://Dig support phrase or near
+ matching?</a></strong><br>
+ <p>Phrase searching has been added for the 3.2 release,
+ which is currently in the beta phase
+ (<a href="http://www.htdig.org/files/htdig-3.2.0b6.tar.gz">3.2.0b6</a>
+ as of this writing). Near or proximity matching will probably be added
+ in a future beta.
+ </p>
+
+ <strong>1.10. <a name="q1.10">What are the practical and/or theoretical
+ limits of ht://Dig?</a></strong><br>
+ <p>The code itself doesn't put any real limit on the number of
+ pages. There are several sites in the hundreds of thousands
+ of pages. As for practical limits, it depends a lot on how
+ many pages you plan on indexing. Some operating systems limit
+ files to 2 GB in size, which can become a problem with a large
+ database. There are also slightly different limits to each of
+ the programs. Right now htmerge performs a sort on the words
+ indexed. Most sort programs use a fair amount of RAM and
+ temporary disk space as they assemble the sorted list. The
+ htdig program stores a fair amount of information about the
+ URLs it visits, in part to only index a page once. This takes
+ a fair amount of RAM. With cheap RAM, it never hurts to throw
+ more memory at indexing larger sites. In a pinch, swap will
+ work, but it obviously really slows things down.</p>
+
+ <p>The 3.2 development code helps with many of these
+ limitations. In paticular, it generates the databases on the
+ fly, which means you don't have to sort them before
+ searching. Additionally, the new databases are compressed
+ significantly, making them usually around 50% the size of
+ those in previous versions.</p>
+
+ <strong>1.11. <a name="q1.10">Do any ISPs offer ht://Dig as part of
+ their web hosting services?</a></strong><br>
+ <p>Yes. A list of such ISPs is <a href="isp.html">available
+ here</a>
+ </p>
+
+ <strong>1.12. <a name="q1.12">Can I use ht://Dig on a
+ commercial website?</a></strong><br>
+ <p>Sure! The <a href="COPYING">GNU Library General Public License (LGPL)</a> has no
+ restrictions on use. So you are free to use ht://Dig however you
+ want on your website, personal files, etc. The license only
+ restricts distribution. So if you're planning on a
+ commercial software product that includes ht://Dig, you will
+ have to provide source code including any modifications upon
+ request.
+ </p>
+
+ <strong>1.13. <a name="q1.13">Why do you use a non-free
+ product to index PDF files?</a></strong><br>
+ <p>
+ We don't. You <em>can</em> use the &quot;acroread&quot;
+ program to index PDF files, but this is no longer
+ recommended. Initially this program was the only reliable
+ way to extract data from PDF files. However, the <a
+ href="http://www.foolabs.com/xpdf/">xpdf package</a> is a
+ reliable, free software package for indexing and viewing PDF
+ files. See question <a href="#q4.9">4.9</a> for details on
+ using xpdf to index PDF files. We do not advocate using
+ acroread any longer because it is a proprietary product.
+ Additionally it is no longer reliable at extracting data.
+ </p>
+
+ <strong>1.14. <a name="q1.14">Why do you have all those SourceForge
+ logos on your website?</a></strong><br>
+ <p><a href="http://sourceforge.net/">SourceForge</a> is a
+ new service for open source software. You can host your
+ project on SourceForge servers and use many of their
+ services like bug-tracking and the like. The ht://Dig
+ project currently uses SourceForge for a mirror of the main
+ website at <a
+ href="http://htdig.sourceforge.net/">htdig.sourceforge.net</a>
+ as well as a mirror of ht://Dig releases and contributed
+ work.
+ </p>
+
+ <strong>1.15. <a name="q1.15">My question isn't answered here.
+ Where should I go for help?</a></strong><br>
+ <p>
+ Before you go anywhere else, think of other ways of phrasing your
+ question. Many times people have questions that are very similar to
+ other FAQ and while we try to phrase the queries in the FAQ closely to
+ the most common questions, we obviously can't get them all! The next
+ place to check is the documentation itself. In particular, take a
+ look at the list of configuration attributes, particularly the list <a
+ href="cf_byname.html">by name</a> and <a
+ href="cf_byprog.html">by program</a>. There are a
+ lot of them, but chances are there's something that might fit your needs.
+ You should also take a close look at all of
+ <a href="htsearch.html">htsearch</a>'s
+ documentation, especially the section "HTML form" which describes
+ all the CGI input parameters available for controlling the search,
+ including limiting the search to certain subdirectories.
+ You can find the answer yourself to almost all "how can I..."
+ questions by exploring what the various configuration attributes
+ and search form input parameters can do.
+ Also have a look at our collection of
+ <a href="http://www.htdig.org/contrib/guides.html">Contributed Guides</a>
+ for help on things like
+ <a href="http://www.htdig.org/files/contrib/guides/htmlhelp.html">HTML
+ forms</a> and CGI, tutorials on installing, configuring, using, and
+ internationalizing ht://Dig, as well as using PHP with htsearch.
+ </p>
+ <p>
+ Finally, if you've exhausted all the online documentation, there's the
+ <a href="mailto:htdig-general@lists.sourceforge.net">htdig-general</a> mailing list.
+ There are hundreds of users subscribed and chances are good that someone
+ has had a similar problem before or can suggest a solution.
+ </p>
+
+ <strong>1.16. <a name="q1.16">Why do the developers get annoyed when
+ I e-mail questions directly to them rather than the mailing list?</a></strong><br>
+ <p>The <a href="mailto:htdig-general@lists.sourceforge.net">htdig-general</a>
+ mailing list exists for dealing with questions about the
+ software, its installation, configuration, and problems with
+ it. E-mailing the developers directly circumvents this forum
+ and its benefits. Most annoyingly, it puts the onus on an
+ individual to answer, even if that individual is not the best or
+ most qualified person to answer. This is not a one-man show. It
+ also circumvents the <a href="http://www.htdig.org/mailarchive.html">archiving
+ mechanism</a> of the mailing list,
+ so not only do subscribers not see these private messages
+ and replies, but future users who may run into the exact same
+ problems won't see them. Remember that the developers are all
+ volunteers, and they don't work for free for your benefit alone.
+ They volunteer for the benefit of the whole ht://Dig user
+ community, so don't expect extra support from them outside of
+ that community. See also questions <a href="#q1.4">1.4</a>
+ and <a href="#q1.5">1.5</a>.</p>
+
+ <p>Note also that when you reply to a message on the list, you
+ should make sure the reply gets on the list as well, provided your
+ reply is still on-topic. See question <a href="#q1.17">1.17</a>
+ below.</p>
+
+ <strong>1.17. <a name="q1.17">Why do replies to messages on the
+ mailing list only go to the sender and not to the list?</a></strong><br>
+ <p>The simple answer is that, unlike some mailing lists, the
+ lists on SourceForge don't force replies back on the list. This
+ is actually a good thing, because you can reply to the sender
+ directly if you want to, or you can use your mail program's
+ "reply to all" capability (sometimes called "group reply")
+ to reply to the mailing list as well. It does mean you have to
+ think before you post a reply, but some would argue that this
+ is a good thing too. There are some compelling reasons to try to
+ keep on-topic discussions on the list, though (see questions
+ <a href="#q1.16">1.16</a> and <a href="#q1.4">1.4</a> above).</p>
+
+ <p>The technical answer is
+ <a href="http://sourceforge.net/docman/display_doc.php?docid=6693&group_id=1">
+ SourceForge's policy on Reply-To: munging</a>, where you'll
+ find all the gory details about the pros and cons of the two
+ common ways of setting up a mailing list, and why SourceForge
+ turns off Reply-To munging. It so happens that the ht://Dig
+ maintainers agree with SourceForge's policy on this, even if
+ we did have a say in the matter. So, counterarguments to this
+ policy are rather moot, and it would be better not to waste
+ any more mailing list bandwidth debating them. (We've heard
+ all the arguments anyway.)</p>
+
+ <strong>1.18. <a name="q1.18">Can I use ht://Dig to index and search
+ an SQL database?</a></strong><br>
+ <p>You can if your database has a web-based front end that can
+ be "spidered" by ht://Dig. The requirement is that every search
+ result must resolve to a unique URL which can be accessed via
+ HTTP. The htdig program uses these URLs, which you feed it via
+ the <a href="attrs.html#start_url">start_url</a> attribute, to
+ fetch and index each page of information. The search results
+ will then give a list of URLs for all pages that match the
+ search terms. If you don't have such a front end to your
+ database, or the search results must be given as something
+ other than URLs, then ht://Dig is probably not the best way of
+ dealing with this problem: you may be better off using an SQL
+ query engine that works directly on your own database, rather
+ than building a separate ht://Dig database for searching.</p>
+
+ <p>Ted Stresen-Reuter had the following tips: "In my case,
+ because I like htdig's ability to rank results (and that
+ ranking can be modified), I created an index page that simply
+ walks through each record and indexes each record (with
+ <em>next</em> and <em>previous</em> links so the spider can
+ read all the records). And then I do one other thing: I make
+ the <code>&lt;title&gt;</code> tag start with the unique ID
+ of each record. Then, when I'm parsing the search results, I
+ do a lookup on the database using the title tag as the key."</p>
+
+ <hr noshade size=2>
+
+ <h3>2. Getting ht://Dig</h3>
+ <strong>2.1. <a name="q2.1">What's the latest version of ht://Dig?</a></strong><br>
+ <p>The latest version is 3.1.6 as of this writing. A beta
+ version of the 3.2 code,
+ <a href="http://www.htdig.org/files/htdig-3.2.0b6.tar.gz">3.2.0b6</a>,
+ is also available, for those who wish to test it.
+ You can find out about the latest version by reading the
+ <a href="RELEASE.html">release
+ notes</a>.</p>
+
+ <p><strong>Note</strong> that if you're running any version
+ older than 3.1.5 (including 3.2.0b1) on a public web site,
+ you should upgrade immediately, as older versions have a
+ rather serious security hole which is explained in detail in
+ this <a
+ href="http://www.htdig.org/htdig-dev/2000/02/0272.html">advisory</a>
+ which was sent to the Bugtraq mailing list.
+ Another slightly less serious, but still troubling security hole
+ exists in 3.1.5 and older (including 3.2.0b3 and older), so you
+ should upgrade if you're running one of these. You can view details
+ on this vulnerability from the
+ <a href="http://www.securityfocus.com/bid/3410">bugtraq mailing list.</a>
+ If you're unsure of which version you're running, see question
+ <a href="#q5.33">5.33</a>.</p>
+
+ <strong>2.2. <a name="q2.2">Are there binary distributions of
+ ht://Dig?</a></strong><br>
+ <p>We're trying to get consistent binary distributions for
+ popular platforms. Contributed binary releases will go in <a
+ href="http://www.htdig.org/files/contrib/binaries/">
+ the contributed binaries section</a>
+ and contributions should be mentioned to the <a
+ href="mailto:htdig-general@lists.sourceforge.net">htdig-general</a>
+ mailing list.
+
+ <p>Anyone who would like to make consistent binary
+ distributions of ht://Dig at least should signup to the <a
+ href="mailing.html">htdig-announce mailing list</a>.</p>
+
+ <strong>2.3. <a name="q2.3">Are there mirror sites for ht://Dig?</a></strong><br>
+ <p>Yes, see our <a href="mirrors.html">mirrors
+ listing</a>. If you'd like to mirror the site, please see
+ the <a href="howto-mirror.html">mirroring guide</a>.</p>
+
+ <strong>2.4. <a name="q2.4">Is ht://Dig available by ftp?</a></strong><br>
+ <p>Yes. You can find the current versions and several older
+ versions at various &lt;<a
+ href="mirrors.html">mirror sites</a>&gt;
+ as well as the other locations mentioned in the <a
+ href="where.html">download page</a>.</p>
+
+ <strong>2.5. <a name="q2.5">Are patches around to upgrade between
+ versions?</a></strong><br>
+ <p>Most versions are also distributed as a patch to the previous
+ version's source code. The most recent exception to this was
+ version 3.1.0b1. Since this version switched from the GDBM
+ database to DB2, the new database package needed to be shipped
+ with the distribution. This made the potential patch almost as large
+ as the regular distribution. Update patches resumed with version
+ 3.1.0b2. You can also find archives of patches submitted to
+ the htdig mailing lists, to fix specific bugs or add features,
+ at Joe Jah's <a href="ftp://ftp.ccsf.org/htdig-patches/">
+ htdig-patches ftp site</a>.</p>
+
+ <strong>2.6. <a name="q2.6">Is there a Windows 95/98/2000/NT
+ version of ht://Dig?</a></strong><br>
+ <p>The ht://Dig package can be built on the Win32 platform when
+ using the Cygwin package. For details, see the contributed guide,
+ <a href="http://www.htdig.org/files/contrib/guides/Installing_on_Win32.html">
+ <em>Idiot's Guide to Installing ht://Dig on Win32</em></a>.
+ </p>
+ <p>
+ As of the <a href="http://www.htdig.org/files/htdig-3.2.0b5.tar.gz">3.2.0b5</a>
+ beta release, there is also native Win32 support, thanks to
+ Neal Richter. (Installation docs will be written soon...)
+ </p>
+
+ <strong>2.7. <a name="q2.7">Where can I find the documentation for my
+ version of ht://Dig?</a></strong><br>
+ <p>The documentation for the most recent stable release is always
+ posted at <a href="http://www.htdig.org/">www.htdig.org</a>.
+ The documentation for the latest beta release can be found at
+ <a href="http://www.htdig.org/dev/htdig-3.2/">http://www.htdig.org/dev/htdig-3.2/</a>.
+ In all releases, the documentation is included in the
+ <strong>htdoc</strong> subdirectory of the source distribution, so
+ you always have access to the documentation for your current version.
+ </p>
+
+ <hr noshade size=2>
+
+ <h3>3. Compiling</h3>
+ <strong>3.1. <a name="q3.1">When I compile ht://Dig I get an error about
+ libht.a</a></strong><br>
+ <p>This usually indicates that either libstdc++ is not installed or
+ is installed incorrectly. To get libstdc++ or any other GNU too,
+ check
+ <a
+ href="ftp://ftp.gnu.org/gnu/">ftp://ftp.gnu.org/gnu/</a>.
+ Note that the most recent versions of gcc come with
+ libstdc++ included and are available from <a
+ href="http://gcc.gnu.org/">http://gcc.gnu.org/</a></p>
+
+ <strong>3.2. <a name="q3.2">I get an error about -lg</a></strong><br>
+ <p>This is due to a bug in the Makefile.config.in of version
+ 3.1.0b1. Remove all flags "-ggdb" in Makefile.config.in. Then
+ type "./config.status" to rebuild the Makefiles and
+ recompile. This bug is fixed in version 3.1.0b2.</p>
+
+ <strong>3.3. <a name="q3.3">I'm compiling on Digital Unix and I get
+ mesages about "unresolved" and "db_open."</a></strong><br>
+ <p>Answer contributed by George Adams
+ &lt;learningapache@my-dejanews.com&gt;</p>
+
+ <p>What you're seeing are problems related to the Berkeley DB
+ library. htdig needs a fairly modern version of db, which is
+ why it ships with one that works. (see that -L../db-2.4.14/dist
+ line? That's where htdig's db library is).<br>
+
+ The solution is to modify the c++ command so it explicity
+ references the correct libdb.a . You can do this by replacing
+ the "-ldb" directive in the c++ command with
+ "../db-2.4.14/dist/libdb.a" This problem has been resolved as of
+ version 3.1.0.</p>
+
+ <strong>3.4. <a name="q3.4">I'm compiling on FreeBSD and I get lots
+ of messages about '___error' being unresolved.</a></strong><br>
+ <p>Answer contributed by Laura Wingerd &lt;laura@perforce.com&gt;<br>
+ I got a clean build of htdig-3.1.2 on FreeBSD 2.2.8 by taking
+ -D_THREAD_SAFE out of CPPFLAGS, and setting LIBS to null, in
+ db/dist/configure.</p>
+
+ <strong>3.5. <a name="q3.5">I'm compiling on HP/UX and I get a complaint about
+ "Large Files not supported."</a></strong><br>
+ <p>The db/ pacakge, included with ht://Dig seems to be unable to complete
+ on HP/UX 10.20 in particular. After running the top-level configure
+ script, cd into db/dist and type:</p>
+ <code>./configure --disable-bigfile</code>
+ <p>Then continue with the normal compilation.</p>
+
+ <strong>3.6. <a name="q3.6">I'm compiling on Solaris and when I run the
+ programs I get complaints about not finding libstdc++.</a></strong><br>
+ <p>Answer contributed by Adam Rice &lt;adam@newsquest.co.uk&gt;</p>
+ <p>The problem is that the Solaris loader can't find the library. The
+ best thing to do is set the LD_RUN_PATH environment variable <em>during compile</em>
+ to the directory where libstdc++.so.2.8.1.1 lives. This tells the linker
+ to search that directory at runtime.
+ </p>
+
+ <p>Note that LD_RUN_PATH is not to be confused with LD_LIBRARY_PATH.
+ The latter is parsed at run-time, while LD_RUN_PATH essentially
+ compiles in a library path into the executable, so that it doesn't
+ need a LD_LIBRARY_PATH setting to find its libraries. This allows
+ you to avoid all the complexities of setting an environment
+ variable for a CGI program run from the server. If all else fails,
+ you can always run your programs from wrapper shell scripts that
+ set the LD_LIBRARY_PATH environment variable appropriately.</p>
+
+ <p>Note also that while this answer is specific to Solaris, it may
+ work for other OSes too, so you may want to give it a try. However,
+ not all versions of the <code>ld</code> program on all OSes support
+ the LD_RUN_PATH environment variable, even if these systems support
+ shared libraries. Try "<code>man&nbsp;ld</code>" on your system to
+ find out the best way of setting the runtime search path for shared
+ libraries. If <code>ld</code> doesn't support LD_RUN_PATH, but does
+ support the <code>-R</code> option, you can add one or more of these
+ options to LIBDIRS in Makefile.config before running make on a 3.1.x
+ release. (For a 3.2 beta release, you can add these options to the
+ LDFLAGS environment variable before you run ./configure.)</p>
+
+ <strong>3.7. <a name="q3.7">I'm compiling on IRIX and I'm having
+ database problems when I run the program.</a></strong><br>
+ <p>
+ It is not entirely clear why these problems occur, though
+ they seem to only happen when older compilers are
+ used. Several people have reported that the problems go away
+ when using the latest version of <a href="http://gcc.gnu.org/">gcc</a>.
+ </p>
+
+ <strong>3.8. <a name="q3.8">I'm compiling with gcc 3.2 and getting
+ all sorts of warnings/errors about ostream and such.</a></strong><br>
+ <p>
+ With versions before 3.2.0b5,
+ you should use the following command to configure the ht://Dig
+ package so it can be built with gcc 3.2:
+<pre>
+CXXFLAGS=-Wno-deprecated CPPFLAGS=-Wno-deprecated ./configure
+</pre>
+ </p>
+
+ <hr noshade size=2>
+
+ <h3>4. Configuration</h3>
+ <strong>4.1. <a name="q4.1">How come I can't index my site?</a></strong><br>
+ <p>There are a variety of reasons ht://Dig won't index a
+ site. To get to the bottom of things, it's advisable to turn on
+ some debugging output from the htdig program. When running from
+ the command-line, try "-vvv" in addition to any other
+ flags. This will add debugging output, including the responses
+ from the server.</p>
+ <p>See also questions <a href="#q5.25">5.25</a>,
+ <a href="#q5.27">5.27</a>, <a href="#q5.16">5.16</a> and
+ <a href="#q5.18">5.18</a>.</p>
+
+ <strong>4.2. <a name="q4.2">How can I change the output format of htsearch?</a></strong><br>
+<p>Answer contributed by: Malki Cymbalista &lt;Malki.Cymbalista@weizmann.ac.il&gt;</p>
+
+<p>You can change the output format of htsearch by creating different
+header, footer and result files that specify how you want the output
+to look. You then create a configuration file that specifies which
+files to use. In the html document that links to the search, you
+specify which configuration file to use.</p>
+
+<p>So the configuration file would have the lines:</p>
+<pre>
+search_results_header: ${common_dir}/ccheader.html
+search_results_footer: ${common_dir}/ccfooter.html
+template_map: Long long builtin-long \
+ Short short builtin-short \
+ Default default ${common_dir}/ccresult.html
+template_name: Default
+</pre>
+<p>You would also put into the configuration file any other lines from the
+default configuration file that apply to htsearch.</p>
+
+<p>The files ${common_dir}/ccheader.html and
+${common_dir}/ccfooter.html and ${common_dir}/ccresult.html would be
+tailored to give the output in the desired format.</p>
+
+<p>Assuming your configuration file is called cc.conf, the html file that
+links to the search has to set the config parameter equal to cc. The
+following line would do it:<br>
+<code>&lt;input type="hidden" name="config" value="cc"&gt;</code></p>
+
+ <p><strong>Note:</strong> Don't just add the line above to your
+ <a href="hts_form.html">search form</a>
+ without checking if there isn't already a similar
+ line giving the config attribute a different value. The sample
+ search.html form that comes with the package includes a line
+ like this already, giving "config" the default value of "htdig".
+ If it's there, modify it instead of adding another definition.
+ The config input parameter doesn't need to be hidden either, and
+ you may want to define it as a pull-down list to select different
+ databases (see question <a href="#q4.4">4.4</a>).</p>
+
+ <strong>4.3. <a name="q4.3">How do I index pages that start with '~'?</a></strong><br>
+ <p>
+ ht://Dig should index pages starting with '~' as if it was another
+ web browser. If you are having problems with this, check your server
+ log files to see what file the server is attempting to return.
+ </p>
+
+ <strong>4.4. <a name="q4.4">Can I use multiple databases?</a></strong><br>
+ <p>Yes, though you may find it easier to have one larger
+ database and use restrict or exclude fields on searches. To use
+ multiple databases, you will need a config file for each
+ database. Then each file will set the
+ <a href="attrs.html#database_dir">database_dir</a> or
+ <a href="attrs.html#database_base">database_base</a> attribute to
+ change the name of the databases. The config file is selected
+ by the <strong>config</strong> input field in the search form.
+ <br>See also questions <a href="#q4.2">4.2</a> and
+ <a href="#q4.20">4.20</a>.</p>
+
+ <strong>4.5. <a name="q4.5">OK, I can use multiple databases. Can I
+ merge them into one?</a></strong><br>
+ <p>As of version 3.1.0, you can do this with the -m option to
+ <a href="htmerge.html">htmerge</a>.</p>
+
+ <strong>4.6. <a name="q4.6">Wow, ht://Dig eats up a lot of disk
+ space. How can I cut down?</a></strong><br>
+ <p>There are several ways to cut down on disk space. One is
+ not to use the "-a" option, which creates work copies of the
+ databases. Naturally this essentially doubles the disk
+ usage. If you don't need to index and search at the same time, you can
+ ignore this flag.</p>
+
+ <p>If you are running 3.2.0b5 or higher and don't have
+ <a href="dev/htdig-3.2/attrs.html#wordlist_compress_zlib">compression</a>
+ turned on, then turning that on will also save considerable space.</p>
+
+ <p>Changing configuration variables can also help cut
+ down on disk usage. Decreasing
+ <a href="attrs.html#max_head_length">max_head_length</a> and
+ <a href="attrs.html#max_meta_description_length">max_meta_description_length</a>
+ will cut down on the size of the excerpts stored (in fact, if you
+ don't have
+ <a href="attrs.html#use_meta_description">use_meta_description</a>
+ set, you can set
+ max_meta_description_length to 0!).</p>
+
+ <p>If you are running 3.2.0b6 or higher, you can turn off
+ <a href="dev/htdig-3.2/attrs.html#store_phrases">store_phrases</a>. This cuts the
+ database size by about 60%, at the expense of severely limiting
+ the effectiveness of phrase searches. It also reduces digging time
+ slightly.</p>
+
+ <p>Other techniques include removing the db.wordlist file and adding
+ more words to the <a href="attrs.html#bad_words">bad_words</a>
+ file.</p>
+
+ <p>The University of Leipzig has published
+ <a href="http://wortschatz.uni-leipzig.de/html/wliste.html">
+ word lists</a> containing the 100, 1000 and 10000 most often used
+ words in English, German, French and Dutch. No copyrights or
+ restrictions seem to be applied to the downloadable files. These
+ can be very handy when putting together a bad_words file. Thanks
+ to Peter Asemann for this tip.</p>
+
+ <strong>4.7. <a name="q4.7">Can I use SSI or other CGIs in my
+ htsearch results?</a></strong><br>
+ <p>Not really. Apache will not parse CGI output for SSI
+ statements (See the <a
+ href="http://www.apache.org/docs/misc/FAQ.html#ssi-part-iii">Apache
+ FAQ</a>). Thus,the htsearch CGI does not understand SSI
+ markup and thus cannot include other
+ CGIs. However, it is possible doing it the other way round:
+ you can have the htsearch results included in your dynamic
+ page.
+ </p>
+ <p>
+ The Apache project has mentioned that this will be a
+ feature added to the Apache 2.0 version, currently in development.
+ </p>
+
+ <p>The easiest approach in the meantime is using SSI with
+ the help of the <a
+ href="attrs.html#script_name">script_name</a> configuration
+ file attribute. See the <code>contrib/scriptname</code>
+ directory for a small example using SSI.</p>
+
+ <p>For CGI and PHP, you need a &quot;wrapper&quot; script to
+ do that. For perl script examples, see the files in
+ <code>contrib/ewswrap</code>. The PHP guide (see <a
+ href="http://www.htdig.org/contrib/guides.html">contributed
+ guides</a>) not only describes a wrapper script for PHP, but
+ also offers a step by step tutorial to the basics of
+ ht://dig and is well worth reading.
+ For other alternatives, see question <a href="#q4.11">4.11</a>.
+ </p>
+
+ <strong>4.8. <a name="q4.8">How do I index Word, Excel, PowerPoint
+ or PostScript documents?</a></strong><br>
+ <p>This must be done with an
+ <a href="attrs.html#external_parsers">external parser or converter</a>.
+ A sample of such an external converter is the
+ contrib/doc2html/doc2html.pl Perl script.
+ It will parse Word, PostScript, PDF and other documents, when used
+ with the appropriate document to text converters. It uses catdoc to
+ parse Word documents, and ps2ascii to parse PostScript files. The
+ comments in the Perl script and accompanying documentation
+ indicate where you can obtain these converters.</p>
+
+ <p>Versions of htdig before 3.1.4 don't support external converters,
+ so you have to use an external parser script such as
+ contrib/parse_doc.pl (or better yet, upgrade htdig if you can).
+ External converter scripts are simpler to write and maintain than a
+ full external parser, as they just convert input documents to
+ text/plain or text/html, and pass that back to htdig to be parsed.
+ Parsing is more consistent across document types with external
+ converters, because the final work is done by htdig's internal
+ parsers. External parser scripts tend to be hacks that don't
+ recognize a lot of the parsing attributes in your htdig.conf, so
+ they have to be hacked some more when you change your attributes.</p>
+
+ <p>The most recent versions of parse_doc.pl, conv_doc.pl and
+ the doc2html package are available on our <a
+ href="http://www.htdig.org/files/contrib/parsers/">web site</a>.<br>
+ See below for an example of doc2html.pl, or see the comments in
+ conv_doc.pl and parse_doc.pl, or the documentation for doc2html
+ for examples of their usage.
+ For help with troubleshooting, see questions
+ <a href="#q5.37">5.37</a> and <a href="#q5.39">5.39</a>.</p>
+
+ <strong>4.9. <a name="q4.9">How do I index PDF files?</a></strong><br>
+ <p>This too can be done with an
+ <a href="attrs.html#external_parsers">external parser or converter</a>,
+ in combination with the pdftotext program that is part of the
+ <a href="http://www.foolabs.com/xpdf/">xpdf</a> 0.90 package. A
+ sample of such a converter is the doc2html.pl Perl
+ script. It uses pdftotext to parse PDF documents, then processes
+ the text into external parser records.
+ The most recent version of doc2html.pl is available on our <a
+ href="http://www.htdig.org/files/contrib/parsers/">web
+ site</a>.</p>
+
+ <p>For example, you could put this in your configuration file:</p>
+<pre>
+<a href="attrs.html#external_parsers">external_parsers</a>: application/msword-&gt;text/html /usr/local/bin/doc2html.pl \
+ application/postscript-&gt;text/html /usr/local/bin/doc2html.pl \
+ application/pdf-&gt;text/html /usr/local/bin/doc2html.pl
+</pre>
+ <p>You would also need to configure the script to indicate where all
+ of the document to text converters are installed. See the DETAILS
+ file that comes with doc2html for more information.</p>
+
+ <p>Versions of htdig before 3.1.4 don't support external converters,
+ so you have to use an external parser script such as
+ contrib/parse_doc.pl (or better yet, upgrade htdig if you can).
+ See question <a href="#q4.8">4.8</a> above.</p>
+
+ <p>Whether you use this external parser or converter, or acroread
+ with the <a href="attrs.html#pdf_parser">pdf_parser</a> attribute,
+ to successfully index PDF files be sure to set the <a
+ href="attrs.html#max_doc_size">max_doc_size</a> attribute to
+ a value larger than the size of your largest PDF file. PDF
+ documents can not be parsed if they are truncated.</p>
+
+ <p>This also raises the questions of why two different
+ methods of indexing PDFs are supported, and which method
+ is preferred. The built-in PDF support, which uses acroread
+ to convert the PDF to PostScript, was the first method which
+ was provided. It had a few problems with it: acroread is not
+ open source, it is not supported on all systems on which
+ ht://Dig can run, and for some PDFs, the PostScript that
+ acroread generated was very difficult to parse into indexable
+ text. Also, the built-in PDF support expected PDF documents to
+ use the same character encoding as is defined in your current
+ <a href="attrs.html#locale">locale</a>, which isn't always the
+ case. The external converters, which use pdftotext, were developed
+ to overcome these problems. xpdf 0.90 is free software, and its
+ pdftotext utility works very well as an indexing tool.
+ It also converts various PDF encodings to the Latin 1 set.
+ It is the opinion of the developers that this is the
+ preferred method. However, some users still prefer to stick
+ with acroread, as it works well for them, and is a little
+ easier to set up if you've already installed Acrobat.</p>
+
+ <p>Also, pdftotext still has some difficulty handling text in
+ landscape orientation, even with its new -raw option in 0.90,
+ so if you need to index such text in PDFs, you may still get
+ better results with acroread. The pdf_parser attribute has been
+ removed from the 3.2 beta releases of htdig, so to use acroread
+ with htdig 3.2.0b5 or other 3.2 betas, use the acroconv.pl
+ external converter script from our <a
+ href="http://www.htdig.org/files/contrib/parsers/">web site</a>.</p>
+
+ <p>See also question <a href="#q5.2">5.2</a> below and
+ question <a href="#q1.13">1.13</a> above.
+ See questions <a href="#q5.37">5.37</a> and <a href="#q5.39">5.39</a>
+ for troubleshooting tips.</p>
+
+ <strong>4.10. <a name="q4.10">How do I index documents in other
+ languages?</a></strong><br>
+ <p>The first and most important thing you must do,
+ to allow ht://Dig to properly support international
+ characters, is to define the correct locale for the
+ language and country you wish to support. This is done
+ by setting the <a href="attrs.html#locale">locale</a>
+ attribute (see question <a href="#q5.8">5.8</a>). The
+ next step is to configure ht://Dig to use dictionary and
+ affix files for the language of your choice. These can
+ be the same dictionary and affix files as are used by the
+ ispell software. A collection of these is available from
+ Geoff Kuenning's
+ <a href="http://fmg-www.cs.ucla.edu/geoff/ispell-dictionaries.html">
+ International Ispell Dictionaries page</a>, and we're slowly
+ building a collection of word lists on our <a
+ href="http://www.htdig.org/files/contrib/wordlists/">web site</a>.</p>
+ <p>For example, if you install German dictionaries in common/german,
+ you could use these lines in your configuration file:</p>
+<pre>
+<a href="attrs.html#locale">locale</a>: de_DE
+lang_dir: ${<a href="attrs.html#common_dir">common_dir</a>}/german
+<a href="attrs.html#bad_word_list">bad_word_list</a>: ${lang_dir}/bad_words
+<a href="attrs.html#endings_affix_file">endings_affix_file</a>: ${lang_dir}/german.aff
+<a href="attrs.html#endings_dictionary">endings_dictionary</a>: ${lang_dir}/german.0
+<a href="attrs.html#endings_root2word_db">endings_root2word_db</a>: ${lang_dir}/root2word.db
+<a href="attrs.html#endings_word2root_db">endings_word2root_db</a>: ${lang_dir}/word2root.db
+</pre>
+ <p>
+ You can build the endings database with <code>htfuzzy endings</code>.
+ (This command may actually take days to complete, for
+ releases older than 3.1.2. Current releases use faster regular
+ expression matching, which will speed this up by a few orders
+ of magnitude.) Note that the "*.0" files are not part of
+ the ispell dictionary distributions, but are easily made by
+ concatenating the partial dictionaries and sorting to remove
+ duplicates (e.g.: "<code>cat * | sort | uniq &gt; lang.0</code>"
+ in most cases). You will also need to redefine the synonyms
+ file if you wish to use the synonyms search algorithm. This
+ file is not included with most of the dictionaries, nor is the
+ <a href="attrs.html#bad_words">bad_words</a> file.</p>
+
+ <p>If you put all the language-specific
+ dictionaries and configuration files in separate directories,
+ and set all the attribute definitions accordingly in each
+ search config file to access the appropriate files, you can
+ have a multilingual setup where the user selects the language
+ by selecting the "config" input parameter value. In addition
+ to the attributes given in the example above, you may also
+ want custom settings for these language-specific attributes:
+ <a href="attrs.html#date_format">date_format</a>,
+ <a href="attrs.html#iso_8601">iso_8601</a>,
+ <a href="attrs.html#method_names">method_names</a>,
+ <a href="attrs.html#no_excerpt_text">no_excerpt_text</a>,
+ <a href="attrs.html#no_next_page_text">no_next_page_text</a>,
+ <a href="attrs.html#no_prev_page_text">no_prev_page_text</a>,
+ <a href="attrs.html#nothing_found_file">nothing_found_file</a>,
+ <a href="attrs.html#page_list_header">page_list_header</a>,
+ <a href="attrs.html#prev_page_text">prev_page_text</a>,
+ <a href="attrs.html#search_results_wrapper">search_results_wrapper</a>
+ (or <a href="attrs.html#search_results_header">search_results_header</a>
+ and <a href="attrs.html#search_results_footer">search_results_footer</a>),
+ <a href="attrs.html#sort_names">sort_names</a>,
+ <a href="attrs.html#synonym_db">synonym_db</a>,
+ <a href="attrs.html#synonym_dictionary">synonym_dictionary</a>,
+ <a href="attrs.html#syntax_error_file">syntax_error_file</a>,
+ <a href="attrs.html#template_map">template_map</a>, and of course
+ <a href="attrs.html#database_dir">database_dir</a> or
+ <a href="attrs.html#database_base">database_base</a> if you
+ maintain multiple databases for sites of different languages.
+ You could also change the definition of
+ <a href="attrs.html#common_dir">common_dir</a>, rather than
+ making up a lang_dir attribute as above, as many language-specific
+ files are defined relative to the common_dir setting.</p>
+
+ <p>If you're running version 3.1.6 of ht://Dig, you may also
+ be interested in the <strong>accents</strong> fuzzy match
+ algorithm in the
+ <a href="attrs.html#search_algorithm">search_algorithm</a>
+ attribute, which lets you treat accented and unaccented letters
+ as equivalent in words. Note that if you use the accents algorithm,
+ you need to rebuild the accents database each time you update your
+ word database, using <code>"htfuzzy accents"</code>. This command
+ isn't in the default rundig script, so you may want to add it there.
+ The accents fuzzy match algorithm is also in the 3.2 beta releases.
+ There are also the
+ <a href="attrs.html#boolean_keywords">boolean_keywords</a> and
+ <a href="attrs.html#boolean_syntax_errors">boolean_syntax_errors</a>
+ attributes in 3.1.6 for changing other language-specific messages
+ in htsearch.</p>
+
+ <p>Current versions of ht://Dig only support 8-bit
+ characters, so languages such as Chinese and Japanese, which
+ require 16-bit characters, are not currently supported.</p>
+
+ <p>Didier Lebrun has written a guide for configuring htdig to
+ support French, entitled
+ <a href="http://www.quartier-rural.org/dl/elucu/htdig-vf/lisezmoi.html">
+ Comment installer et configurer HtDig pour la langue fran&ccedil;aise</a>.
+ His "kit de francisation" is also available on
+ <a
+ href="http://www.htdig.org/files/contrib/wordlists/">our
+ web site</a>.</p>
+
+ <p>See also question <a href="#q4.2">4.2</a> for tips on customizing
+ htsearch, and question <a href="#q4.6">4.6</a> for tips where to find
+ bad_words files.</a></p>
+
+ <strong>4.11. <a name="q4.11">How do I get rotating banner ads in
+ search results?</a></strong><br>
+ <p>While htsearch doesn't currently provide a means of doing
+ SSI on its output, or calling other CGI scripts, it does have
+ the capability of using environment variables in templates.</p>
+
+ <p>The easiest way to get rotating banners in htsearch is
+ to replace htsearch with a wrapper script that sets an
+ environment variable to the banner content, or whatever
+ dynamically generated content you want. Your script can then
+ call the real htsearch to do the work. The wrapper script can be
+ written as a shell script, or in Perl, C, C++, or whatever you
+ like. You'd then need to reference that environment variable
+ in header.html (or wrapper.html if that's what you're using),
+ to indicate where the dynamic content should be placed.</p>
+
+ <p>If the dynamic content is generated by a CGI script, your new
+ wrapper script which calls this CGI would then have to strip out
+ the parts that you don't want embedded in the output (headers,
+ some tags) so that only the relevant content gets put into the
+ environment variable you want. You'd also have to make sure
+ this CGI script doesn't grab the POST data or get confused by
+ the QUERY_STRING contents intended for htsearch. Your script
+ should not take anything out of, or add anything to, the
+ QUERY_STRING environment variable.</p>
+
+ <p>An alternative approach is to have a cron job that periodically
+ regenerates a different header.html or wrapper.html with the
+ new banner ad, or changes a link to a different pre-generated
+ header.html or wrapper.html file. For other alternatives, see
+ question <a href="#q4.7">4.7</a>.</p>
+
+ <strong>4.12. <a name="q4.12">How do I index numbers in documents?</a></strong><br>
+ <p>By default, htdig doesn't treat numbers without letters
+ as words, so it doesn't index them.
+ To change this behavior, you must set the
+ <a href="attrs.html#allow_numbers">allow_numbers</a>
+ attribute to true, and rebuild your index from scratch using
+ rundig or htdig with the -i option, so that bare numbers get
+ added to the index.</p>
+
+ <strong>4.13. <a name="q4.13">How can I call htsearch from a hypertext
+ link, rather than from a search form?</a></strong><br>
+ <p>If you change the search.html form to use the GET method
+ rather than POST, you can see the URLs complete with all the
+ arguments that htsearch needs for a query. Here is an example:<br>
+<code>
+http://www.grommetsRus.com/cgi-bin/htsearch?config=htdig&amp;restrict=&amp;exclude=&amp;method=and&amp;format=builtin-long&amp;words=grapple+grommets
+</code>
+ which can actually be simplified to:<br>
+<code>
+http://www.grommetsRus.com/cgi-bin/htsearch?method=and&amp;words=grapple+grommets
+</code>
+ with the current defaults. The "&amp;" character acts as a
+ separator for the input parameters, while the "+" character
+ acts as a space character within an input parameter.
+ In versions 3.1.5 or 3.2.0b2, or later, you can use a semicolon
+ character ";" as a parameter separator, rather than "&amp;", for
+ HTML 4.0 compliance.
+ Most non-alphanumeric characters should be hex-encoded following
+ the convention for URL encoding (e.g. "%" becomes "%25", "+"
+ becomes "%2B", etc). Any htsearch input parameter that you'd
+ use in a search form can be added to the URL in this way.
+ This can be embedded into an &lt;a href="..."&gt; tag.
+ <br>See also question <a href="#q5.21">5.21</a>.</p>
+
+ <strong>4.14. <a name="q4.14">How do I restrict a search to only meta
+ keywords entries in documents?</a></strong><br>
+ <p>First of all, you do <strong>not</strong> do this by using the
+ "keywords" field in the search form. This seems to be a
+ frequent cause of confusion. The "keywords" input parameter
+ to htsearch has absolutely nothing to do with searching meta
+ keywords fields. It actually predates the addition of meta
+ keyword support in 3.1.x. A better choice of name for the
+ parameter would have been "requiredwords", because that's what
+ it really means - a list of words that are all required to be
+ found somewhere in the document, in addition to the words the
+ user specifies in the search form.</p>
+
+ <p>As of 3.2.0b5, the most direct way to search for a particular
+ meta keyword is to specify the word as "keyword:&lt;word&gt;".
+ Similarly, "title:", "heading:", and "author:" restrict searches
+ to the respective fields. To search for words in the body of the
+ text, use "text:".</p>
+
+ <p>To restrict all search terms to meta keywords only, you can set all
+ <a href="attrs.html#heading_factor">factors</a> other than
+ keywords_factor to 0, and for 3.1.x, you
+ must then reindex your documents. In the 3.2 betas, you can
+ change factors at search time without needing to reindex.
+ As of 3.2.0b5, it is possible to restrict
+ the search in the query itself. Note that changing the scoring
+ factors in this way will only alter the scoring of search results,
+ and shift the low or zero scores to the end of the results when
+ sorting by score (as is done by default). For versions before
+ 3.2.0b5, the results with scores
+ of zero aren't actually removed from the search results.</p>
+
+ <strong>4.15. <a name="q4.15">Can I use meta tags to prevent htdig from
+ indexing certain files?</a></strong><br>
+ <p>Yes, in each HTML file you want to exclude, add the following
+ between the &lt;HEAD&gt; and &lt;/HEAD&gt; tags:</p>
+ <blockquote>
+ &lt;META NAME="robots" CONTENT="noindex, follow"&gt;
+ </blockquote>
+ <p>Doing so will allow htdig to still follow links to other documents,
+ but will prevent this document from being put into the index itself.
+ You can also use "nofollow" to prevent following of links. See
+ the section on <a href="meta.html">Recognized META information</a>
+ for more details. For documents produced automatically by MhonArc,
+ you can have that line inserted automatically by putting it in the
+ MhonArc resource file, in the sections IDXPGBEGIN and TIDXPGBEGIN.</p>
+
+ <p>You can also use the
+ <a href="attrs.html#noindex_start">noindex_start</a> and
+ <a href="attrs.html#noindex_end">noindex_end</a> attributes to
+ define one set of tags which will mark sections to be stripped out
+ of documents, so they don't get indexed, or you can mark sections
+ with the non-DTD &lt;noindex&gt; and &lt;/noindex&gt; tags.
+ The noindex_start and noindex_end attributes can also be used to
+ suppress in-line JavaScript code that wasn't properly enclosed in
+ HTML comment tags (see question <a href="#q4.26">4.26</a>).
+ In 3.1.6, you can also put a section between &lt;noindex follow&gt;
+ and &lt;/noindex&gt; tags to turn off indexing of text but still
+ allow htdig to follow links.</p>
+
+ <p>If you require much more elaborate schemes for avoiding indexing
+ certain parts of your HTML files, especially if you don't have
+ control over these files and can't add tags to them, you can
+ set up htdig's
+ <a href="attrs.html#external_parsers">external_parsers</a> attribute
+ with an external converter that will preprocess the HTML before
+ it's parsed and indexed by htdig. Examples of this are the
+ unhypermail.sh script in our
+ <a href="http://www.htdig.org/files/contrib/parsers/">contributed parsers</a>
+ and the ungeoify.sh script in our
+ <a href="http://www.htdig.org/files/contrib/scripts/">contributed scripts</a>.
+ By preprocessing the HTML, you can strip out parts you don't want, or
+ you can add or change tags wherever they're needed, if you're willing
+ to put in the effort to learn awk/sed/perl enough to do the job.</p>
+
+ <strong>4.16. <a name="q4.16">How do I get htsearch to use the star image
+ in a different directory than the default /htdig?</a></strong><br>
+ <p>You must set either the
+ <a href="attrs.html#image_url_prefix">image_url_prefix</a> attribute,
+ or both <a href="attrs.html#star_blank">star_blank</a> and
+ <a href="attrs.html#star_image">star_image</a> in your
+ htdig.conf, to refer to the URL path for these files. You should
+ also set this URL path similarly in in common/header.html and
+ common/wrapper.html, as they also refer to the star.gif file.
+ If you want to relocate other graphics, such as the buttons or
+ the ht://Dig logo, you should change all references to these
+ in htdig.conf and common/*.html.</p>
+
+ <strong>4.17. <a name="q4.17">How do I get htdig or htsearch to rewrite
+ URLs in the search results?</a></strong><br>
+ <p>This can be done by using the <a
+ href="attrs.html#url_part_aliases">url_part_aliases</a>
+ configuration file attribute. You have to set up different
+ configuration files for htdig and htsearch, to define a
+ different setting of this attribute for each one.</p>
+
+ <p>A large number of users insist on ignoring that last point
+ and try to make do with just one definition, either for htdig
+ or htsearch, or sometimes for both. This seems to stem from
+ a fundamental misunderstanding of how this attribute works,
+ so perhaps a clarification is needed. The url_part_aliases
+ attribute uses a two stage process. In the first stage, htdig
+ encodes the URLs as they go into the database, by using the
+ pairs in url_part_aliases going from left to right. In the
+ second stage, htsearch decodes the encoded URLs taken from the
+ database, by using the pairs in url_part_aliases going from
+ right to left. If you have the same value for url_part_aliases
+ in htdig and htsearch, you end up with the same URLs in the
+ end. If you modify the first string (the from string) in
+ the pairs listed in url_part_aliases for htsearch, then when
+ htsearch decodes the URLs it ends up rewriting part of them.</p>
+
+ <p>While you might think that if you don't use url_part_aliases
+ in htdig, then you can use it in htsearch to alter unencoded
+ URLs, the reality is that if you don't encode parts of URLs
+ using url_part_aliases, they still get encoded automatically
+ by the <a href="attrs.html#common_url_parts">common_url_parts</a>
+ attribute. This helps to reduce the size of your databases. So,
+ trying to use url_part_aliases only in htsearch doesn't work
+ because there are no unencoded URLs in the database, so the
+ right hand strings in the pairs you define won't match anything.</p>
+
+ <p>You also can't put two different definitions of the
+ url_part_aliases attribute in a single configuration file, as
+ some users have attempted. When you define an attribute twice,
+ the second definition merely overrides the first. Pay close
+ attention to the description and examples for
+ <a href="attrs.html#url_part_aliases">url_part_aliases</a>.
+ You must put one definition of this attribute in your
+ configuration file for htdig, htmerge (or htpurge) and htnotify,
+ and a different definition of it in your configuration file
+ for htsearch.</p>
+
+ <strong>4.18. <a name="q4.18">What are all the options in
+ htdig.conf, and are there others?</a></strong><br>
+ <p>In ht://Dig's terminology, the settings in its configuration
+ files are called <a href="attrs.html">configuration attributes</a>,
+ to distinguish them from <a href="htdig.html">command line
+ options</a>, <a href="hts_form.html">CGI input parameters</a>
+ and <a href="hts_templates.html">template variables</a>. There are
+ many, many attributes that can be set to control almost all
+ aspects of indexing, searching, customization of output and
+ internationalization. All attributes have a built-in default
+ setting, and only a subset of these appear in the sample htdig.conf
+ file. See the documentation for all default values for attributes
+ not overridden in the configuration file, and for help on using
+ any of them.
+ See also question <a href="#q1.15">1.15</a>.</p>
+
+ <strong>4.19. <a name="q4.19">How do I get more than 10 pages of
+ 10 search results from htsearch?</a></strong><br>
+ <p>There are two attributes that control the number of matches per
+ page and the total number of pages. The number of matches per page
+ can be set in your configuration file, using the
+ <a href="attrs.html#matches_per_page">matches_per_page</a> attribute,
+ or in your <a href="hts_form.html">search form</a>, using the
+ <strong>matchesperpage</strong> input parameter.</p>
+
+ <p>The number of pages is controlled by the
+ <a href="attrs.html#maximum_pages">maximum_pages</a> attribute in
+ your search configuration file.
+ The current default for maximum_pages is 10 because the ht://Dig
+ package comes with 10 images, with numbers 1 through 10, for
+ use as page list buttons. If we increased the limit, we'd have
+ to field a whole lot more questions from users irate because
+ only the first 10 buttons are graphics, and the rest are text.
+ If you want more than 10 pages of results, change maximum_pages,
+ but you may also want to set the
+ <a href="attrs.html#page_number_text">page_number_text</a> and
+ <a href="attrs.html#no_page_number_text">no_page_number_text</a>
+ attributes in your search configuration file to nothing, or
+ remove them, to use text rather than images for the links to
+ other pages.</p>
+
+ <p>In version of htsearch before 3.1.4, maximum_pages
+ limited only the number of page list buttons, and not the
+ actual number of pages. This was changed because there was no
+ means of limiting the total number of pages, but this ended up
+ frustrating users who wanted the ability to have more pages than
+ buttons. In 3.2.0b3 and 3.1.6 we introduced a
+ <a href="attrs.html#maximum_page_buttons">maximum_page_buttons</a>
+ attribute for this purpose.</p>
+
+ <strong>4.20. <a name="q4.20">How do I restrict a search to only
+ certain subdirectories or documents?</a></strong><br>
+ <p>That depends on whether you want to protect certain parts of
+ your site from prying eyes, or just limit the scope of search
+ results to certain relevant areas. For the latter, you just need
+ to set the <strong>restrict</strong> or <strong>exclude</strong>
+ input parameter in the <a href="hts_form.html">search form</a>.
+ This can be done using hidden input fields containing preset
+ values, text input fields, select lists, radio buttons or
+ checkboxes, as you see fit. If you use select lists, you can
+ propagate the choices to select lists in the follow-up search
+ forms using the
+ <a href="attrs.html#build_select_lists">build_select_lists</a>
+ configuration attribute.
+ The University at Albany has a good description of how to use
+ the <strong>restrict</strong> or <strong>exclude</strong> input
+ parameters: <a href="http://www.albany.edu/its/web/search/">
+ Constructing a local search using ht://Dig Search forms</a>.
+ <br>To include a hex encoded character (such as a %20 for a space)
+ in a restrict or exclude string, the '%' must again be encoded.
+ For example, to match a filename containing a space, the URL must
+ contain %20, and so the CGI parameter passed to htsearch must
+ contain %2520. The %25 encodes the '%'. (Note that this is only
+ necessary for CGI input parameters, not for the corresponding
+ configuration attributes in your htdig.conf file, as attributes
+ aren't subjected to the same hex decoding step as parameters are.)
+ <br>See also question <a href="#q4.4">4.4</a>.</p>
+
+ <p>If you wish to keep secure and non-secure areas on
+ your site separate, and avoid having unauthorized users
+ seeing documents from secure areas in their search results,
+ that takes a bit more effort. You certainly can't rely on
+ the <strong>restrict</strong> and <strong>exclude</strong>
+ parameters, or even the <strong>config</strong> parameter,
+ as any parameter in a search form can also be overridden
+ by the user in a URL with CGI parameters. The safest
+ option would be to host the secure and non-secure areas on
+ separate servers with independent installations of htsearch,
+ each with its own ht://Dig database, but that is often too
+ costly or impractical an option. The next best thing is to
+ host them on the same site, but make sure that everything
+ is very clearly separated to prevent any leakage of secure
+ data. You should maintain separate databases for the secure
+ and public areas of your site, by setting up different htdig
+ configuration files for each area. Use different settings
+ of the <a href="attrs.html#start_url">start_url</a>,
+ <a href="attrs.html#limit_urls_to">limit_urls_to</a>
+ and <a href="attrs.html#database_dir">database_dir</a>
+ configuration attributes, and possibly even different
+ <a href="attrs.html#common_dir">common_dir</a> settings as well.
+ Make sure your database_dir, and even your common_dir, are not
+ in any directories accessible from the web server. Run htdig
+ and htmerge (or rundig) with each separate configuration file,
+ to build your two databases.</p>
+
+ <p>The tricky part is to make sure your htsearch program is
+ secure. You don't want to use the same htsearch for the secure
+ and public sites, because otherwise the public site could
+ access the configuration for the secure database, making its
+ data publicly accessible. You must either compile two separate
+ versions of htsearch, with different settings of the CONFIG_DIR
+ <em>make</em> variable, or you must make a simple wrapper
+ script for htsearch that overrides the compiled-in CONFIG_DIR
+ setting by a different setting of the CONFIG_DIR environment
+ variable. Make sure the CONFIG_DIR for the secure area is
+ not a subdirectory of the CONFIG_DIR for the public area.
+ In this way, you can maintain separate directories of config
+ files for the public and secure sites, so that the secure
+ config files are not accessible from the public htsearch.</p>
+
+ <p>Put the htsearch binary or wrapper script for the secure site
+ in a different ScriptAlias'ed cgi-bin directory than the public
+ one, and protect the secure cgi-bin with a .htaccess file or
+ in your server configuration. Alternatively, you can put the
+ secure program, let's call it htssearch, in the same cgi-bin,
+ but protect that one CGI program in your server configuration,
+ e.g.:</p>
+<pre>
+&lt;Location /cgi-bin/htssearch&gt;
+AuthType Basic
+AuthName ....
+AuthUserFile ...
+AuthGroupFile ...
+&lt;Limit GET POST&gt;
+require group foo
+&lt;/Limit&gt;
+&lt;/Location&gt;
+</pre>
+ <p>This describes the setup for an Apache server. You'd need to
+ work out an equivalent configuration for your server if you're
+ not running Apache.</p>
+
+ <strong>4.21. <a name="q4.21">How can I allow people to search
+ while the index is updating?</a></strong><br>
+ <p>Answer contributed by Avi Rappoport &lt;avirr@searchtools.com&gt;</p>
+ <p>If you have enough disk space for two copies of the index
+ database, use -a with the htdig and htmerge processes. This will
+ make use of a copy of the index database with the extension
+ ".work", and update the copy instead of the originals.
+ This way, htsearch can use those originals while the update is
+ going on. When it's done, you can move the .work versions to
+ replace the originals, and htsearch will use them. The current
+ rundig script will do this for you if you supply the -a flag
+ to it. However, rundig builds the database from scratch each
+ time you run it. If you want to update an alternate copy of
+ the database, see the
+ <a href="http://www.htdig.org/files/contrib/scripts/rundig.sh">contributed
+ rundig.sh script</a>.</p>
+
+ <strong>4.22. <a name="q4.22">How can I get htdig to ignore the
+ robots.txt file or meta robots tags?</a></strong><br>
+ <p>You can't, and you shouldn't. The
+ <a href="http://www.robotstxt.org/wc/norobots.html">
+ Standard for Robot Exclusion</a> exists for a very good reason,
+ and any well behaved indexing engine or spider should conform to it.
+ If you have a problem with a robots.txt file, you really should
+ take it up with the site's webmaster. If they don't have a problem
+ with you indexing their site, they shouldn't mind setting up a
+ User-agent entry in their robots.txt file with a name you both
+ agree on. The user agent setting that htdig uses for matching
+ entries in robots.txt can be changed via the
+ <a href="attrs.html#robotstxt_name">robotstxt_name</a> attribute in
+ your config file.</p>
+
+ <p>If you have a problem with a robots meta tag in a document
+ (see question <a href="#q4.15">4.15</a>) you should take it up
+ with the author or maintainer of that page. These tags are an
+ all or nothing deal, as they can't be set up to allow some engines
+ and disallow others. If htdig encounters them, it has to give the
+ page's creator the benefit of the doubt and honour them. If
+ exceptions to the rule are wanted, this should be done with a
+ robots.txt file rather than a meta tag.</p>
+
+ <strong>4.23. <a name="q4.23">How can I get htdig not to index
+ some directories, but still follow links?</a></strong><br>
+ <p>You can simply add the directory name to your robots.txt file
+ or to the <a href="attrs.html#exclude_urls">exclude_urls</a>
+ attribute in your configuration, but that will exclude all files
+ under that directory. If you want the files in that directory to
+ be indexed, you have a couple options. You can add an index.html
+ file to the directory, that will include a robots meta tag
+ (see question <a href="#q4.15">4.15</a>) to prevent indexing,
+ and will contain links to all your files in this directory.
+ The drawback of this is that you must maintain the index.html
+ file yourself, as it won't be automatically updated as new
+ files are added to the directory.</p>
+
+ <p>The other technique you can use, if you want the directory
+ index to be made by the web server, is to get the server to
+ insert the robots meta tag into the index page it generates.
+ In Apache, this is done using the
+ <a href="http://httpd.apache.org/docs/mod/mod_autoindex.html#headername">HeaderName</a>
+ and <a href="http://httpd.apache.org/docs/mod/mod_autoindex.html#indexoptions">IndexOptions</a>
+ directives in the directory's <strong>.htaccess</strong> file.
+ For example:</p>
+<pre> HeaderName .htrobots
+ IndexOptions FancyIndexing SuppressHTMLPreamble
+</pre>
+ <p>and in the .htrobots file:</p>
+<pre>&lt;HTML&gt;&lt;head&gt;
+&lt;META NAME="robots" CONTENT="noindex, follow"&gt;
+&lt;title&gt;Index of /this/dir&lt;/title&gt;
+&lt;/head&gt;
+</pre>
+
+ <p>If you don't mind getting just one copy of each directory,
+ but want to suppress the multiple copies generated by Apache's
+ FancyIndexing option, you can either turn off FancyIndexing or
+ you can add "?D=A ?D=D ?M=A ?M=D ?N=A ?N=D ?S=A ?S=D" to
+ the <a href="attrs.html#bad_querystr">bad_querystr</a> attribute
+ (without the quotes) to suppress the alternately sorted views of
+ the directory. For Apache 2.x, you'd use "C=D C=M C=N C=S O=A O=D"
+ instead in your bad_querystr setting.</p>
+
+ <strong>4.24. <a name="q4.24">How can I get rid of duplicates in
+ search results?</a></strong><br>
+ <p>This depends on the cause of the duplicate documents. htdig
+ does keep track of the URLs it visits, so it never puts the
+ same URL more than once in the database. So, if you have
+ duplicate documents in your search results, it's because the
+ same document appears under different URLs. Sometimes the
+ URLs vary only slightly, and in subtle ways, so you may have
+ to look hard to find out what the variation is. Here are some
+ common reasons, each requiring a different solution.</p>
+
+ <ul>
+ <li>You're indexing a case insensitive web
+ server (e.g. an NT based server), but the
+ <a href="attrs.html#case_sensitive">case_sensitive</a> attribute is
+ still set to true. In this case, if htdig encounters two URLs
+ pointing to the same document, but the case of the letters in
+ one is different than the other (even if it's only 1 letter),
+ it will not treat them as the same URL.<br><br>
+ <li>You have symbolic links (or hard links) to some of
+ these documents, so they can be reached by several URLs.
+ The solution here is to build an exclude list of URLs that
+ are actually symbolic links, and putting these in
+ <a href="attrs.html#exclude_urls">exclude_urls</a>
+ (or in your robots.txt file). You can automate this using a
+ technique similar to the find command in question
+ <a href="#q5.25">5.25</a> which builds the start_url list, but
+ adding a -type l to find symbolic links.<br><br>
+ <li>You have copies of the same documents in different
+ locations. This is similar to the symbolic link problem above,
+ but harder to fix automatically.<br><br>
+ <li>The duplicate URLs result from CGI, SSI or other dynamic pages
+ that give the same content even though there may be variations in
+ the query string or other parts of the URL. The approach to
+ fix this is similar to the fix above, but may be less easy
+ to automate, depending on what the variations are. You can
+ add patterns to exclude_urls or bad_querystr to get rid of
+ unwanted variations. These are especially important to bring
+ under control, because in some cases, if left unchecked, they
+ can result in an <em>infinite virtual hierarchy</em> which htdig
+ will never be able to finish indexing. For example, in a CGI-based
+ calendar, htdig could go on following next month or next
+ year links to infinity, but this can be stopped by adding a
+ stop year to <a href="attrs.html#bad_querystr">bad_querystr</a>.
+ <br><br>Another common example happens when htdig hits a link
+ to an SSI page and the URL has an extra trailing slash. This
+ can happen with either .shtml pages or .html pages that use
+ the XBitHack. The trailing slash causes the URL to be misinterpreted
+ as a directory URL, and any relative URLs in the document are added
+ to the URL, creating longer and longer URLs that still lead to the
+ same SSI document. There are two things you can do:<ol>
+ <li>hunt down the pages with the incorrect links, i.e.
+ search for ".shtml/" or ".html/" in URLs in your documents,
+ and fix these links; or
+ <li>add .shtml/ and .html/ to your
+ <a href="attrs.html#exclude_urls">exclude_urls</a>
+ setting to get htdig to ignore these defective links.
+ </ol>The second option is easier, but you run the risk that htdig
+ will miss some SSI pages if the only links to them have the trailing
+ slash, so you may want to try hunting down the links anyway.
+ <br><br>See also question <a href="#q5.29">5.29</a>.<br><br>
+ <li>The duplicates result from session IDs in PHP or other dynamic
+ pages that give the same content even though the ID changes during
+ the indexing process. This can lead not only to duplicates, but
+ also to URLs that become unusable because of expired session IDs.
+ Session IDs are the bane of search engines, and you should avoid
+ using them if at all possible. If getting rid of them altogether
+ isn't an option, then you can at least remove them while indexing,
+ using the <a href="attrs.html#url_rewrite_rules">url_rewrite_rules</a>
+ attribute. This will only work if htdig can access the documents
+ without a session ID, as htdig rewrites the URL before fetching the
+ document, and htsearch presents the rewritten URL (without session
+ ID) in search results.
+ </ul>
+
+ <strong>4.25. <a name="q4.25">How can I change the scores in
+ search results, and what are the defaults?</a></strong><br>
+ <p>The scores are calculated mostly by htdig at indexing time,
+ with some tweaking done by htsearch at search time. There are
+ a number of <a href="attrs.html">configuration attributes</a>,
+ all called <em>&lt;something&gt;</em><strong>_factor</strong>,
+ which can control the scoring calculations. In addition, the
+ location of words within the document has an effect on score,
+ as word scores are also multiplied by a varying location
+ factor somewhere in between 1000 for words near the start
+ and 1 for words near the end of the document. As of yet,
+ there is no way to change this factor. For any of the scoring
+ factors you can configure, and which are used by htdig, you
+ will have to reindex your documents so the new factors take
+ effect. The default values for these scoring factors, as well as
+ information about whether they're used by htdig or htsearch,
+ are all listed in the <a href="attrs.html">configuration
+ attributes documentation</a>. Malcolm Austen has written some
+ <a href="http://wwwsearch.ox.ac.uk/scores.html">notes on page
+ scores</a> for 3.1.x which you may find helpful.</p>
+
+ <p>Note that the above applies to the 3.1.x releases, while
+ in the 3.2 beta releases, all scores are calculated at search
+ time with no weight being put on the location of words within
+ the document.</p>
+
+ <strong>4.26. <a name="q4.26">How can I get htdig not to index
+ JavaScript code or CSS?</a></strong><br>
+ <p>The HTML parser in htdig recognizes and parses only HTML,
+ which is all there should be within an HTML file. If your HTML
+ files contain in-line JavaScript code or Cascading Style Sheets
+ (CSS), these in-line codes, which are clearly not HTML, should
+ be enclosed within an HTML comment tag so they are hidden
+ from view from the HTML parser, or for that matter from any
+ web client that is not JavaScript-aware or CSS-aware. See
+ <a href="http://www.mcli.dist.maricopa.edu/show/interact/js_b.html">
+ Behind the Scenes with JavaScript</a> for a description of the
+ technique, which applies equally well to in-line style sheets.
+ If fixing up all non-HTML compliant JavaScript or CSS code in
+ your HTML files is not an option, then see question
+ <a href="#q4.15">4.15</a> for an alternate technique.</p>
+
+ <p>The HTML parser in htdig 3.1.6 tries skipping over bare
+ in-line JavaScript code in HTML, unlike previous versions,
+ but a small bug in the parser causes it to be thrown off by a
+ "&lt;" sign in the JavaScript, and it may then miss the closing
+ &lt;/script&gt; tag. This can be fixed by applying this
+ <a href="ftp://ftp.ccsf.org/htdig-patches/3.1.6/JavaScript.0">
+ patch</a>.</p>
+
+ <hr noshade size=2>
+
+ <h3>5. Troubleshooting</h3>
+ <strong>5.1. <a name="q5.1">I can't seem to index more than X documents
+ in a directory.</a></strong><br>
+ <p>This usually has to do with the default document size
+ limit. If you set <a href="attrs.html#max_doc_size">
+ max_doc_size</a> in your config file to
+ something enough to read in the directory index (try 100000 for
+ 100K) this should fix this problem. Of course this will require
+ more memory to read the larger file. Don't set it to a value
+ larger than the amount of memory you have, and never more than
+ about 2 billion, the maximum value of a 32-bit integer.
+ If htdig is missing entire directories, see question
+ <a href="#q5.25">5.25</a>.</p>
+
+ <strong>5.2. <a name="q5.2">I can't index PDF files.</a></strong><br>
+ <p>As above, this usually has to do with the default document
+ size. What happens is ht://Dig will read in part of a PDF file
+ and try to index it. This usually fails. Try setting
+ <a href="attrs.html#max_doc_size">max_doc_size</a>
+ in your config file to a larger value than the
+ size of your largest PDF file. Don't go overboard, though, as
+ you don't want to overflow a 32-bit integer (about 2 billion),
+ and you don't want to allocate much more memory than you need
+ to store the largest document.</p>
+
+ <p>There is a bug in Adobe Acrobat Reader version 4, in its
+ handling of the -pairs option, which causes a segmentation
+ violation when using it with htdig 3.1.2 or earlier. There is
+ a workaround for this as of version 3.1.3 - you must remove
+ the -pairs option from your pdf_parser definition, if it's
+ there. However, acroread version 4 is still very unstable (on
+ Linux, anyway) so it is not recommended as a PDF parser. An
+ alternative is to use an external converter with the xpdf 0.90
+ package installed on your system, as described in question <a
+ href="#q4.9">4.9</a> above.</p>
+
+ <strong>5.3. <a name="q5.3">When I run "rundig," I get a message about
+ "DATABASE_DIR" not being found.</a></strong><br>
+ <p>This is due to a bug in the Makefile.in file in version
+ 3.1.0b1. The easiest fix is to edit the rundig file and change
+ the line "TMPDIR=@DATABASE_DIR@" to set TMPDIR to a directory
+ with a large amount of temporary disk space for htmerge. This
+ bug is fixed in version 3.1.0b2.</p>
+
+ <strong>5.4. <a name="q5.4">When I run htmerge, it stops with an "out
+ of diskspace" message.</a></strong><br>
+ <p>This means that htmerge has run out of temporary disk space
+ for sorting. Either in your "rundig" script (if you run htmerge
+ through that) or before you run htmerge, set the variable TMPDIR
+ to a temp directory with lots of space.</p>
+
+ <strong>5.5. <a name="q5.5">I have problems running rundig from cron
+ under Linux.</a></strong><br>
+ <p>This problem commonly occurs on Red Hat Linux 5.0 and 5.1,
+ because of a bug in vixie-cron. It causes htmerge to fail with a
+ "Word sort failed" error. It's fixed in Red Hat 5.2.
+ You can install vixie-cron-3.0.1-26.{arch}.rpm from a 5.2
+ distribution to fix the problem on 5.0 or 5.1. A quick fix for
+ the problem is to change the first line of rundig to "#!/bin/ash"
+ which will run the script through the ash shell, but this doesn't
+ solve the underlying problem.</p>
+
+ <strong>5.6. <a name="q5.6">When I run htmerge, it stops with an
+ "Unexpected file type" message.</a></strong><br>
+ <p>Often this is because the databases are corrupt. Try removing
+ them and rebuilding. If this doesn't work, some have found that
+ the solution for question <a href="#q3.2">3.2</a> works for this
+ as well. This should be fixed in versions from 3.1.x</p>
+
+ <strong>5.7. <a name="q5.7">When I run htsearch, I get lots of Internal
+ Server Errors (#500).</a></strong><br>
+ <p>If you are running under Solaris, see <a href="#q3.6">3.6</a>.
+ The solution for Solaris may also work for other OSes that use shared
+ libraries in non-standard locations, so refer to question 3.6 if
+ you suspect a shared library problem. In any case, check your web
+ server error logs to see the cause of the internal server errors.
+ If it's not a problem with shared libraries, there's a good chance
+ that the error logs will still contain useful error messages that
+ will help you figure out what the problem is.
+ <br>See also questions <a href="#q5.13">5.13</a> and
+ <a href="#q5.23">5.23</a>.</p>
+
+ <strong>5.8. <a name="q5.8">I'm having problems with indexing words
+ with accented characters.</a></strong><br>
+ <p>
+ Most of the time, this is caused by either not setting or
+ incorrectly setting the <a
+ href="attrs.html#locale">locale</a> attribute. The default locale
+ for most systems is the "portable" locale, which strips
+ everything down to standard ASCII. Most systems expect
+ something like <code>locale: en_US</code> or
+ <code>locale: fr_FR</code>. Locale files are often found in
+ <code>/usr/share/locale</code> or the <tt>$LANGUAGE</tt>
+ environment variable. See also question <a href="#q4.10">4.10</a>.
+ </p>
+
+ <p>Setting the locale correctly seems to be a frequent source of
+ frustration for ht://Dig users, so here are a few pointers which
+ some have found useful. First of all, if you don't have any luck
+ with the settings of the <a href="attrs.html#locale">locale</a>
+ attribute that you try, make sure you use a locale that is
+ defined on your system. As mentioned above, these are usually
+ installed in <code>/usr/share/locale</code>, so look there
+ for a directory named for the locale you want to use. If
+ you don't find it, but find something close, try that locale
+ name. Note that the locale may not have to be specific to the
+ language you're indexing, as long as it uses the same character
+ set. E.g. most western European languages use the ISO-8859-1
+ Latin 1 character set, so on most systems the locales for
+ all these languages define the same character types table
+ and can be used interchangeably. Some systems, however,
+ define only the accented letters used for a given language,
+ so "your mileage may vary." The important thing is that the
+ directory for your locale definition <strong>must</strong>
+ have a file named <code>LC_CTYPE</code> in it. For example,
+ on many Linux distributions, a language-specific locale like
+ <code>fr</code> won't contain this file, but country-specific
+ locales like <code>fr_FR</code> or <code>fr_CA</code> will. If
+ you don't find any appropriate locales installed on your system,
+ try obtaining and installing the locale definition files from
+ your OS distribution. Also, once you've set your locale, you need
+ to reindex all your documents in order for the locale to take
+ effect in the word database. This means rerunning the "rundig"
+ script, or running "htdig -i" and htmerge (or htpurge in the 3.2
+ betas).</p>
+
+ <p>Note also that some UNIX systems and libc5-based Linux
+ systems just don't have a working implementation of locales,
+ so you may not be able to get locales working at all on certain
+ systems. The
+ <a href="http://www.htdig.org/files/contrib/other/testlocale.c">testlocale.c</a>
+ program on our web site can let you see the LC_CTYPE tables
+ for any locale, to aid in finding one that works. Carefully
+ follow the directions in the program's comments to know how to
+ use it and what to look for in its output.</p>
+
+ <strong>5.9. <a name="q5.9">When I run htmerge, it stops with a
+ "Word sort failed" message.</a></strong><br>
+ <p>There are three common causes of this. First of all, the sort
+ program may be running out of temporary file space. Fix this
+ by freeing up some space where sort puts its temporary files,
+ or change the setting of the TMPDIR environment variable to a
+ directory on a volume with more space. A second common problem
+ is on systems with a BSD version of the sort program (such as
+ FreeBSD or NetBSD). This program uses the -T option as a record
+ separator rather than an alternate temporary directory. On these
+ systems, you must remove the TMPDIR environment variable from
+ rundig, or change the code in htmerge/words.cc not to use the
+ -T option. A third cause is the cron program on Red Hat Linux
+ 5.0 or 5.1. (See question <a href="#q5.5">5.5</a> above.)</p>
+
+ <strong>5.10. <a name="q5.10">When htsearch has a lot of matches, it runs
+ extremely slowly.</a></strong><br>
+ <p>When you run htsearch with no customization, on a
+ large database, and it gets a lot of hits, it tends to
+ take a long time to process those hits. Some users with
+ large databases have reported much higher performance,
+ for searches that yield lots of hits, by setting the <a
+ href="attrs.html#backlink_factor">backlink_factor</a> attribute
+ in htdig.conf to 0, and sorting by score. The scores calculated
+ this way aren't quite as good, but htsearch can process hits
+ much faster when it doesn't need to look up the db.docdb record
+ for each hit, just to get the backlink count, date or title,
+ either for scoring or for sorting. This affects versions
+ 3.1.0b3 and up. In version 3.2, currently under development,
+ the databases will be structured differently, so it should
+ perform searches more quickly.</p>
+
+ <p>In version 3.1.6, the date range selection code also slows
+ down htsearch for the same reason. Unfortunately, a small bug
+ crept into the code so that even if you don't set any of the
+ date range input parameters (startyear, endyear, etc.), and
+ you set backlink_factor and date_factor to 0, htsearch still
+ looks at the date in the db.docdb record for each hit. You can
+ avoid this either by setting startyear to 1969 and endyear to
+ 2038 in your config file, or by applying this
+ <a href="ftp://ftp.ccsf.org/htdig-patches/3.1.6/timet_enddate.1">
+ patch</a>.</p>
+
+ <strong>5.11. <a name="q5.11">When I run htsearch, it gives me a count of
+ matches, but doesn't list the matching documents.</a></strong><br>
+ <p>This most commonly happens when you run htsearch while the
+ database is currently being rebuilt or updated by htdig.
+ If htdig and htmerge have run to completion, and the problem still
+ occurs, this is usually an indication of a corrupted database. If
+ it's finding matches, it's because it found the matching
+ words in db.words.db. However, it isn't finding the document
+ records themselves in db.docdb, which would suggest that either
+ db.docdb, or db.docs.index (which maps document IDs used in
+ db.words.db to URLs used to look up records in db.docdb), is
+ incomplete or messed up. You'll likely need to rebuild your
+ database from scratch if it's corrupted. Older versions of
+ ht://Dig were susceptible to database corruption of this
+ sort. Versions 3.1.2 and later are much more stable.</p>
+
+ <p>Another possible cause of this problem is unreadable result
+ template files. If you define external template files via the
+ <a href="attrs.html#template_map">template_map</a> attribute,
+ rather than using the builtin-short or builtin-long templates,
+ and the file names are incorrect or the files do not have
+ read permission for the user ID under which htsearch runs,
+ then htsearch won't be able to display the results. Also,
+ all directories leading up to these template files must be
+ searchable (i.e. executable) by htsearch, or it won't be able
+ to open the files. This is the opposite problem of that described
+ in question <a href="#q5.36">5.36</a>. If htsearch displays
+ nothing at all, you may have both problems.</p>
+
+ <strong>5.12. <a name="q5.12">I can't seem to index documents with names
+ like left_index.html with htdig.</a></strong><br>
+ <p>There is a bug in the implementation of the <a
+ href="attrs.html#remove_default_doc">remove_default_doc</a>
+ attribute in htdig versions 3.1.0, 3.1.1 and 3.1.2, which causes
+ it to match more than it should. The default value for this
+ attribute is "index.html", so any URL in which the filename ends
+ with this string (rather than matches it entirely) will have
+ the filename stripped off. This is fixed in version 3.1.3.</p>
+
+ <strong>5.13. <a name="q5.13">I get Premature End of Script Headers errors
+ when running htsearch.</a></strong><br>
+ <p>This happens when htsearch dies before putting out a
+ "Content-Type" header. If you are running Apache under Solaris,
+ or another system that may be using shared libraries in non-standard
+ locations,
+ first try the solution described in question <a href="#q3.6">3.6</a>.
+ If that doesn't work, or you're running on another system, try
+ running "htsearch -vvv" directly from the command line to see where
+ and why it's failing. It should prompt you for the search words,
+ as well as the format.
+ <br>If it works from the command line, but not from the web
+ server, it's almost certainly a web server configuration problem.
+ Check your web server's error log for any information related to
+ htsearch's failure. One increasingly common problem is Apache
+ configurations which expect all CGI scripts to be Perl,
+ rather than binary executables or other scripts, so they use
+ "perl-handler" rather than "cgi-handler".
+ <br>See also questions <a href="#q5.7">5.7</a>,
+ <a href="#q5.14">5.14</a> and <a href="#q5.23">5.23</a>.</p>
+
+ <strong>5.14. <a name="q5.14">I get Segmentation faults when running
+ htdig, htsearch or htfuzzy.</a></strong><br>
+ <p>Despite a great deal of debugging of these programs, we haven't
+ been able to completely eliminate all such problems on all platforms.
+ If you're running htsearch or htfuzzy on a BSDI system, a common
+ cause of core dumps is due to a conflict between the GNU regex
+ code bundled in htdig 3.1.2 and later, and the BSD C or C++ library.
+ The solution is to use the BSD library's own rx code instead,
+ using version 3.1.6 or newer as summarized by Joe Jah:</p>
+ <ul>
+ <li> ./configure --with-rx
+ <li> make
+ </ul>
+ <p>This solution may work on some other platforms as well (we haven't
+ heard one way or the other), but will definitely not work on some
+ platforms. For instance, on libc5-based Linux systems, the bundled
+ regex code works fine by default, but using libc5's regex code
+ causes core dumps.</p>
+
+ <p>Users of Cobalt Raq or Qube servers have complained of
+ segmentation faults in htdig. Apparently this is due to problems
+ in their C++ libraries, which are fixed in their experimental
+ compiler and libraries. The following commands should install
+ the packages you need:</p>
+ <blockquote>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/binutils-2.8.1-3C1.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/egcs-1.0.2-9.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/egcs-c++-1.0.2-9.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/egcs-g77-1.0.2-9.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/egcs-objc-1.0.2-9.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/libstdc++-2.8.0-9.mips.rpm<br>
+ rpm -Uvh ftp://ftp.cobaltnet.com/pub/experimental/libstdc++-devel-2.8.0-9.mips.rpm<br>
+ rpm -Uvh --force ftp://ftp.cobaltnet.com/pub/products/current/RPMS/gcc-2.7.2-C2.mips.rpm
+ </blockquote>
+ <p>You may have to remove the libg++ package, if you have it installed
+ before installing libstdc++, because of conflicts in these packages.
+ Be sure to do a "make clean" before a "make", to remove any object
+ files compiled with the old compiler and headers.</p>
+
+ <p>For other causes of segmentation faults, or in other programs,
+ getting a stack backtrace after the fault can be useful in narrowing
+ down the problem. E.g.: try "gdb /path/to/htsearch /path/to/core",
+ then enter the command "bt". You can also try running the program
+ directly under the debugger, rather than attempting a post-mortem
+ analysis of the core dump. Options to the program can be given on
+ gdb's "run" command, and after the program is suspended on fault,
+ you can use the "bt" command. This may give you enough information
+ to find and fix the problem yourself, or at least it may help others
+ on the htdig mailing list to point out what to do next.</p>
+
+ <strong>5.15. <a name="q5.15">Why does htdig 3.1.3 mangle URL parameters
+ that contain bare "&amp;" characters?</a></strong><br>
+ <p>This is a known bug in 3.1.3, and is fixed with this
+ <a href="ftp://ftp.ccsf.org/htdig-patches/3.1.3/HTML.cc.0">
+ patch</a>. You can apply the patch by entering into the main
+ source directory for htdig-3.1.3, and using the command
+ "patch -p0 &lt; /path/to/HTML.cc.0". This is
+ also fixed as of version 3.1.4.</p>
+
+ <strong>5.16. <a name="q5.16">When I run htmerge, it stops with an
+ "Unable to open word list file '.../db.wordlist'" message.</a></strong><br>
+ <p>The most common cause of this error is that htdig did not
+ manage to index any documents, and so it did not create a word
+ list. You should repeat the htdig or rundig command with the
+ -vvv option to see where and why it is failing.
+ See question <a href="#q4.1">4.1</a>.</p>
+
+ <strong>5.17. <a name="q5.17">When using Netscape, htsearch always returns the
+ "No match" page.</a></strong><br>
+ <p>Check your search form. Chances are there is a hidden input
+ field with no value defined. For example, one user had<br>
+ <code>&lt;input type=hidden name=restrict&gt;</code>
+
+ in his search form, instead of<br>
+
+ <code>&lt;input type=hidden name=restrict value=""&gt;</code>
+
+ The problem is that Netscape sets the missing value to a default of " "
+ (two spaces), rather than an empty string. For the restrict parameter,
+ this is a problem, because htsearch won't likely find any URLs with two
+ spaces in them. Other input parameters may similarly pose a problem.
+ </p>
+
+ <p>Another possibility, if you're running 3.2.0b1 or 3.2.0b2, is
+ that you need to make the db.words.db_weakcmpr file writeable by
+ the user ID under which the web server runs. This is a bug, and
+ is fixed in the 3.2.0b5 beta.</p>
+
+
+ <strong>5.18. <a name="q5.18">Why doesn't htdig follow links to other
+ pages in JavaScript code?</a></strong><br>
+ <p>There probably isn't any indexing tool in existance
+ that follows JavaScript links, because they don't know how
+ to initiate JavaScript events. Realistically, it would take a
+ full JavaScript parser in order to be able to figure out all the
+ possible URLs that the code could generate, something that's way
+ beyond the means of any search engine. You have a few options:</p>
+ <ul>
+ <li>Add "backup" links using plain HTML &lt;a href=...&gt; tags to
+ all the pages that could be accessed through JavaScript,
+ <li>Add &lt;link&gt; tags to point to all these pages (see
+ <a href="http://www.w3.org/TR/html4/struct/links.html#h-12.3.3">Links
+ and search engines</a> in W3C's HTML 4.0 Specification - requires
+ htdig 3.1.3 or greater, but then <em>everyone</em> should be running
+ 3.1.6 or greater anyway),
+ <li>Compose a list of all the unreachable documents, or write
+ a program to do so, and feed that list as part of htdig's
+ <a href="attrs.html#start_url">start_url</a> attribute.
+ See also question <a href="#q5.25">5.25</a>.
+ </ul>
+
+ <strong>5.19. <a name="q5.19">When I run htsearch from the web server,
+ it returns a bunch of binary data.</a></strong><br>
+ <p>Your server is returning the contents of the htsearch binary.
+ Common causes of this are:</p>
+ <ul>
+ <li>no execute permission on the htsearch binary,
+ <li>the binary won't run on this system (it may be compiled
+ for the wrong system type), or
+ <li>the web server doesn't recognize the file as a CGI
+ (for Apache, you must have a ScriptAlias directive for the
+ program or the directory in which it's installed, or define
+ a cgi-script handler for some suffix, e.g. .cgi, and add that
+ suffix to the program file name).
+ </ul>
+ <p>By default, Apache is usually configured with one cgi-bin
+ directory as ScriptAlias, so all your CGI programs must go in
+ there, or have a .cgi suffix on them. Your configuration may
+ differ, however.</p>
+
+ <strong>5.20. <a name="q5.20">Why are the betas of 3.2 so
+ slow at indexing?</a></strong><br>
+ <p>
+ As the release notes for these versions suggest, they are
+ somewhat unoptimized and are made available for testing
+ Since the 3.2 code indexes all locations of words to support
+ phrase searching and other advanced methods, this additional
+ data slows down the indexer. To compensate, the code has a
+ cache configured by the
+ <a href="dev/htdig-3.2/attrs.html#wordlist_cache_size">wordlist_cache_size</a>
+ attribute.
+ As of this writing, the word database code will slow down
+ considerably when the cache fills up. Setting the cache as
+ large as possible provides considerable performance
+ improvement. Development is in progress to improve cache
+ performance.
+ For 3.2.0b6 and higher, see also the
+ <a href="dev/htdig-3.2/attrs.html#store_phrases">store_phrases</a> attribute,
+ which can turn off support for phrase searches, improving the speed.
+ </p>
+
+ <strong>5.21. <a name="q5.21">Why does htsearch use ";" instead of
+ "&amp;" to separate URL parameters for the page buttons?</a></strong><br>
+ <p>In versions 3.1.5 and 3.2.0b2, and later, htsearch was
+ changed to use a semicolon character ";" as a parameter
+ separator for page button URLs, rather than "&amp;", for HTML
+ 4.0 compliance. It now allows both the "&amp;" and the ";" as
+ separators for input parameters, because the CGI specification
+ still uses the "&amp;". This change may cause some PHP or CGI
+ wrapper scripts to stop working, but these scripts should be
+ similarly changed to recognize both separator characters.
+ For the definitive reference on this issue, please refer to
+ section B.2.2 of W3C's HTML 4.0 Specification,
+ <a href="http://www.w3.org/TR/html4/appendix/notes.html#h-B.2.2">
+ Ampersands in URI attribute values</a>. We're all a little
+ tired of arguing about it. If you don't like the standard, you
+ can change the Display::createURL() code yourself to ignore it.
+ <br>See also question <a href="#q4.13">4.13</a>.</p>
+
+ <p>If you want to try working within the new standard, you may
+ find it helpful to know that recent versions of CGI.pm will
+ allow either the ampersand or semicolon as a parameter separator,
+ which should fix any Perl scripts that use this library.
+ In PHP, you can simply set the following in your php.ini file
+ to allow either separator:</p>
+<pre>arg_separator.input = ";&amp;"
+</pre>
+
+ <strong>5.22. <a name="q5.22">Why does htsearch show the
+ "&amp;" character as "&amp;amp;" in search results?</a></strong><br>
+ <p>In version 3.1.5, htsearch was fixed to properly
+ re-encode the characters &amp;, &lt;, &gt;, and &quot;
+ into SGML entities. However, the default value for the
+ <a href="attrs.html#translate_amp">translate_amp</a>,
+ <a href="attrs.html#translate_lt_gt">translate_lt_gt</a>
+ and <a href="attrs.html#translate_quot">translate_quot</a>
+ attributes is still false, so these entities don't get converted
+ by htdig. If you set these three attributes to true in your
+ htdig.conf and reindex, the problem will go away.</p>
+
+ <p>In the 3.2 betas there was a bug in the HTML parser that
+ caused it to fail when attempting to translate the "&amp;amp;"
+ entity. This has been fixed in 3.2.0b3. The translate_* attributes
+ are gone as of 3.2.0b2.</p>
+
+ <strong>5.23. <a name="q5.23">I get Internal Server or Unrecognized
+ character errors when running htsearch.</a></strong><br>
+ <p>An increasingly common problem is Apache configurations
+ which expect all CGI scripts to be Perl, rather than binary
+ executables or other scripts, so they use "perl-handler"
+ rather than "cgi-handler". The fix is to create a separate
+ directory for non-Perl CGI scripts, and define it as such in
+ your httpd.conf file. You should define it the same way as your
+ existing cgi-bin directory, but use "cgi-handler" instead of
+ "perl-handler". In any case, you should check your web server's
+ error log for any information related to htsearch's failure.
+ <br>See also questions <a href="#q5.7">5.7</a>,
+ <a href="#q5.14">5.14</a> and <a href="#q5.13">5.13</a>.</p>
+
+ <strong>5.24. <a name="q5.24">I took some settings out of
+ my htdig.conf but they're still set.</a></strong><br>
+ <p>All configuration file attributes have compiled-in, default
+ values. Taking an attribute out of the file is not the same
+ thing as setting it to an empty string, a 0, or a value of
+ false. See question <a href="#q4.18">4.18</a>.</p>
+
+ <strong>5.25. <a name="q5.25">When I run htdig on my site,
+ it misses entire directories.</a></strong><br>
+ <p>First of all, htdig doesn't look at directories itself. It
+ is a spider, and it follows hypertext links in HTML documents.
+ If htdig seems to be missing some documents or entire directory
+ sub-trees of your site, it is most likely because there are
+ no HTML links to these documents or directories. (See also
+ question <a href="#q5.18">5.18</a>.) If htdig does
+ not come across at least one hypertext link to a document
+ or directory, and it's not explicitly listed in the
+ <a href="attrs.html#start_url">start_url</a> attribute, then
+ this document or directory is essentially hidden from view
+ to htdig, or to any web browser or spider for that matter.
+ You can only get htdig to index directories, without providing
+ your own files with links to the contents of these directories,
+ by using your web server's automatic index generation feature.
+ In Apache, this is done with the mod_autoindex module, which
+ is usually compiled-in by default, and is enabled with the
+ "Indexes" option for a given directory hierarchy. For example,
+ you can put these directives in your Apache configuration:</p>
+<pre>
+&lt;Directory "/path/to/your/document/root"&gt;
+ Options Indexes FollowSymLinks Includes ExecCGI
+&lt;/Directory&gt;
+</pre>
+ <p>This will cause Apache to automatically generate an index
+ for any directory that does not have an index.html or other
+ "DirectoryIndex" file in it. Other web servers will have
+ similar features, which you should look for in your server
+ documentation.</p>
+
+ <p>As an alternative to relying on the web server's autoindex
+ feature, you can compose a list of all the unreachable
+ documents, or write a program to do so, and feed that list as
+ part of htdig's <a href="attrs.html#start_url">start_url</a>
+ attribute. Here is an example of simple shell script to make
+ a file of URLs you can use with a configuration entry like
+ <code>start_url: `/path/to/your/file`</code>:</p>
+<pre>
+find /path/to/your/document/root -type f -name \*.html -print | \
+ sed -e 's|/path/to/your/document/root/|http://www.yourdomain.com/|' > \
+ /path/to/your/file
+</pre>
+ <p>Other reasons why htdig might be missing portions of your
+ site might be that they fall out of the bounds specified
+ by the <a href="attrs.html#limit_urls_to">limit_urls_to</a>
+ attribute (which takes on the value of start_url by default),
+ they are explicitly excluded using the
+ <a href="attrs.html#exclude_urls">exclude_urls</a> attribute,
+ or they are disallowed by a robots.txt file (see the
+ <a href="htdig.html">htdig</a> documentation for notes about
+ robot exclusion) or by a robots meta tag (see question
+ <a href="#q4.15">4.15</a>). If htdig seems to be missing the
+ last part of a large directory or document, see question
+ <a href="#q5.1">5.1</a>. For reasons why htdig may be rejecting
+ some links to parts of your site, see question
+ <a href="#q5.27">5.27</a>.</p>
+
+ <strong>5.26. <a name="q5.26">What do all the numbers and symbols
+ in the htdig -v output mean?</a></strong><br>
+ <p>Output from htdig -v typically looks like this:</p>
+<pre>
+23000:35506:2:http://xxx.yyy.zz/index.html: ***-+****--++***+ size = 4056
+</pre>
+ <p>The first number is the number of documents parsed so far,
+ the second is the DocID for this document, and the third is
+ the hop count of the document (number of hops from one of the
+ start_url documents). After the URL, it shows a "*" for a link
+ in the document that it already visited (or at least queued
+ for retrieval), a "+" for a new link it just queued, and a
+ "-" for a link it rejected for any of a number of reasons.
+ To find out what those reasons are, you need to run htdig
+ with at least 3 "v" options, i.e. -vvv. If there are no "*",
+ "+" or "-" symbols after the URL, it doesn't mean the document
+ was not parsed or was empty, but only that no links to other
+ documents were found within it.</p>
+
+ <strong>5.27. <a name="q5.27">Why is htdig rejecting some of the
+ links in my documents?</a></strong><br>
+ <p>When htdig parses documents and finds hypertext links to
+ other documents (hrefs), it may reject them for any of several
+ reasons. To find out what those reasons are, you need to run
+ htdig with at least 3 "v" options, i.e. -vvv. Here are the
+ meanings of some of the messages you might see at this verbosity
+ level.</p>
+ <dl>
+ <dt>Not an http or relative link!</dt>
+ <dd>In versions 3.1.5 and earlier, only "http://" URLs, or
+ URLs relative to those, are allowed.</dd>
+ <dt>Item in the exclude list: item # <em>n</em></dt>
+ <dd>A substring of the URL matches one of the items in the
+ <a href="attrs.html#exclude_urls">exclude_urls</a>
+ attribute. The given item number will indicate which
+ pattern matched, starting at 1. The 3.2.0 betas do not
+ give the item number.</dd>
+ <dt>Extension is invalid!</dt>
+ <dd>The file name extension or suffix matches one of those
+ listed in the
+ <a href="attrs.html#bad_extensions">bad_extensions</a>
+ attribute.</dd>
+ <dt>Extension is not valid!</dt>
+ <dd>The file name extension or suffix does not match one of those
+ listed in the
+ <a href="attrs.html#valid_extensions">valid_extensions</a>
+ attribute, if any are specified.</dd>
+ <dt>Invalid Querystring! <em>or</em><br>item in bad query list</dt>
+ <dd>The URL contains a query string which matches one of those
+ listed in the
+ <a href="attrs.html#bad_querystr">bad_querystr</a>
+ attribute.</dd>
+ <dt>URL not in the limits!</dt>
+ <dd>No substring of the URL entirely matches one of the items in the
+ <a href="attrs.html#limit_urls_to">limit_urls_to</a>
+ attribute. The purpose of this attribute is to keep htdig
+ from attempting to index the entire World Wide Web.</dd>
+ <dt>forbidden by server robots.txt!</dt>
+ <dd>A substring of the URL matches one of the items disallowed
+ in the servers robots.txt file. See
+ <a href="http://www.robotstxt.org/wc/norobots.html">
+ A Standard for Robot Exclusion</a>. This message exists
+ only in the 3.2.0 betas. In 3.1.5 and earlier, this condition
+ is only caught later, resulting in the message
+ "robots.txt: discarding '<em>URL</em>'" from htdig, and a
+ later "Deleted: no excerpt" message from htmerge.</dd>
+ <dt>url rejected: (level 2)</dt>
+ <dd>No substring of the URL entirely matches one of the items in the
+ <a href="attrs.html#limit_normalized">limit_normalized</a>
+ attribute. All the other rejections above will be indicated
+ as level 1. The 3.2.0 betas give the much more meaningful
+ message 'not in "limit_normalized" list!'</dd>
+ </dl>
+
+ <p>Another possibility, if none of the error messages above appear
+ for some of the links you think htdig should be accepting, is that
+ htdig isn't even finding the links at all. First, make sure you're
+ not making false assumptions about how htdig finds these. It only
+ reads links in HTML code, and not JavaScript, and it doesn't read
+ directories unless the HTTP server is feeding it directory listings.
+ You will need to take a close look at the htdig -vvv (or -vvvv)
+ output to see what htdig is finding, in and around the areas where
+ the desired links are supposed to be found in your HTML code, to see
+ if it's actually finding them.
+ See also question <a href="#q5.25">5.25</a>.</p>
+
+ <strong>5.28. <a name="q5.28">When I run htdig or htmerge, I get a
+ "DB2 problem...: missing or empty key value specified" message.</a></strong><br>
+ <p>The most common cause of this error is that htdig or
+ htmerge rejected any documents that had been put in the
+ database, leaving an empty database. You need to find out the
+ reasons for the rejection of these documents. See questions
+ <a href="#q4.1">4.1</a>, <a href="#q5.25">5.25</a> and
+ <a href="#q5.27">5.27</a>.</p>
+
+ <strong>5.29. <a name="q5.29">When I run htdig on my site,
+ it seems to go on and on without ending.</a></strong><br>
+ <p>There are some things that can cause htdig to run on without
+ ending, especially when indexing dynamic content (ASP, PHP,
+ SSI or CGI pages). This usually involves htdig getting caught
+ in an <em>infinite virtual hierarchy</em>. A sure sign of
+ this is if the current size of your database is much larger
+ than the total size of the site you are indexing, or if in the
+ verbose output of htdig (see question <a href="#q4.1">4.1</a>)
+ you see the same URLs come up again and again with only subtle
+ variations. In any case, you must figure out the reason htdig
+ keeps revisiting the same documents using different URLs, as
+ explained in question <a href="#q4.24">4.24</a>, and set your
+ <a href="attrs.html#exclude_urls">exclude_urls</a> and
+ <a href="attrs.html#bad_querystr">bad_querystr</a> attributes
+ appropriately to stop htdig from going down those paths.
+ </p>
+
+ <strong>5.30. <a name="q5.30">Why does htsearch no longer recognize
+ the -c option when run from the web server?</a></strong><br>
+ <p>This was a security hole in 3.1.5 and older, and 3.2.0b3 and
+ older releases of ht://Dig. (See question <a href="#q2.1">2.1</a>.)
+ There's a compile-time macro you can set in htsearch.cc to disable
+ this security fix, but that's a bad idea because it reopens the hole.
+ This should only be done as a last recourse, when all other avenues
+ fail. The -c option was only intended for testing htsearch from the
+ command line, and not for use when calling htsearch on the web server.
+ Unfortunately, far too many users have needlessly latched onto this
+ option for CGI scripts. The preferred ways of specifying the config
+ file are as follows, in order of preference:</p>
+ <ol>
+ <li>use the "config" input parameter in your
+ <a href="hts_form.html">search form</a>
+ (see question <a href="#q4.2">4.2</a>).
+ <li>if you need to get at files outside the default CONFIG_DIR, use a
+ wrapper script that redefines the CONFIG_DIR environment variable,
+ then use the config input parameter as above
+ (see question <a href="#q4.20">4.20</a>).
+ <li>use a wrapper script to force htsearch to use a specific config
+ file using the -c option. This is especially for cases where you
+ want to prevent the user from selecting other config files in your
+ CONFIG_DIR using the config input parameter. This should
+ be done by using the GET method to call the wrapper script, and in
+ this script you must unset the REQUEST_METHOD enviroment variable
+ and pass "$QUERY_STRING" as a single argument to htsearch.
+ (This safely gets around htsearch's test which disables -c.)
+ <li>configure and compile different htsearch binaries with different
+ compile-time definitions of CONFIG_DIR, so you can avoid wrapper
+ scripts altogether.
+ <li>define ALLOW_INSECURE_CGI_CONFIG in htsearch.cc and recompile
+ htsearch if all other approaches above fail for you.
+ </ol>
+
+ <strong>5.31. <a name="q5.31">I've set a config attribute exactly
+ as documented but it seems to have no effect.</a></strong><br>
+ <p>There are a few fairly common reasons why this might happen:</p>
+ <ol>
+ <li>You may have a typo. Spelling matters, so make sure the attribute
+ name is spelled exactly as it is in the
+ <a href="attrs.html">documentation</a>. Misspelled attribute
+ definitions are silently ignored. This is because you're allowed
+ to make up your own attribute definitions for use by other attribute
+ definitions, as <strong>${myownattribute}</strong>. Also remember
+ to put the colon ("<strong>:</strong>") separator between the
+ attribute name and value in your definition.
+ <li>The attribute isn't supported in your version of the software.
+ The <a href="attrs.html">documented configuration attributes</a>
+ on the www.htdig.org web site are for the most recent
+ <strong>stable</strong> release. See questions
+ <a href="#q2.1">2.1</a> and <a href="#q2.7">2.7</a> for details.
+ If you're running an older version, or even a more recent beta
+ release, you may not have the same set of attributes to work with.
+ Consult the appropriate documentation, or upgrade to the current
+ release.
+ <li>You're not modifying the right configuration file. The default
+ configuration file is specified when you first configure ht://Dig
+ before compiling, but other configuration files can be specified
+ at run time, using the -c command-line option for most programs,
+ or the <strong>config</strong> input parameter for htsearch
+ (see question <a href="#q4.2">4.2</a>).
+ <li>You've got more than one definition of the attribute. Only the
+ last occurrence of an attribute in the configuration file is the
+ definition that's used for that attribute, overriding earlier
+ definitions. This also applies for nested configuration files that
+ are loaded in via the <a href="attrs.html#include">include</a>
+ directive, so check for other definitions in all included files.
+ Similarly for htsearch, look out for multiple definitions of input
+ parameters in your search forms, as mentioned in question
+ <a href="#q4.2">4.2</a> - these don't override each other but they
+ get combined with a Ctrl-A as separator, which may not be what you
+ want either.
+ <li>Your attribute definition is being "swallowed up" by an
+ incomplete multi-line definition above it. Remember that when a line
+ of an attribute definition ends with a single backslash
+ ("<strong>\</strong>") before the end of the line (without any
+ space after the backslash), then the following line is appended to
+ it as a continuation of the same attribute definition. For an
+ attribute definition that spans several lines, all lines but the
+ last must end with a backslash. If you want a backslash to go into
+ the attribute definition literally, it must be doubled-up, as
+ <strong>\\</strong>.
+ <li>On a similar note, make sure your attribute definitions are all
+ terminated by a newline character. Beware of text editors that do
+ word wrapping. It may look like two separate lines on the screen,
+ when it fact you've got two attribute definitions on the same long
+ line, so the second is swallowed up as part of the first.
+ <li>Your attribute definition is being overridden by an htsearch
+ <a href="hts_form.html">CGI input parameter</a>. For example,
+ <a href="attrs.html#template_name">template_name</a> is ignored
+ if the <strong>format</strong> input parameter is defined. The
+ <a href="attrs.html#allow_in_form">allow_in_form</a> attribute
+ can define any number of new CGI input parameters that override
+ the attributes of the same name in your config file.
+ <li>Your attribute definition is being ignored or overridden
+ by a related attribute. Watch out for unexpected interactions
+ between different attributes. For instance, characters in
+ <a href="attrs.html#valid_punctuation">valid_punctuation</a>
+ are stripped out of words, so those characters may
+ not have the effect you want if you've added them to
+ <a href="attrs.html#extra_word_characters">extra_word_characters</a>
+ or
+ <a href="attrs.html#prefix_match_character">prefix_match_character</a>.
+ Also,
+ <a href="attrs.html#search_results_wrapper">search_results_wrapper</a>
+ will override
+ <a href="attrs.html#search_results_header">search_results_header</a>
+ and
+ <a href="attrs.html#search_results_footer">search_results_footer</a>,
+ but only if you've set up the wrapper file correctly.
+ <li>Watch out for possible "latent effects" of some attributes. For
+ example, when you change attributes used by htdig, they won't have
+ an immediate effect on entries already in the database, so you would
+ have to reindex your site before they take effect. Similarly,
+ attributes that affect how htfuzzy builds some of its databases
+ don't take effect until those databases are rebuilt. Another, more
+ subtle latent effect occurs with releases 3.1.6 and 3.2 betas:
+ when you interrupt htdig (i.e. with Control-C or a kill command),
+ it stores the list of currently queued URLs in db.log, in your
+ database directory, so that the next time you invoke htdig it can
+ resume the interrupted dig. A side-effect of this file is that if
+ you change some attributes like limit_urls_to or exclude_urls before
+ restarting, the URLs in the file are still taken as-is, having been
+ checked against the old settings of limit_urls_to or exclude_urls
+ before being queued. This might explain one reason htdig seems to
+ ignore your new settings of these.
+ </ol>
+
+ <strong>5.32. <a name="q5.32">When I run htsearch, it gives a page
+ with an "Unable to read configuration file" message.</a></strong><br>
+ <p>The most common causes of this error are:</p>
+ <ul>
+ <li>Your configuration file name is misspelled in the "config"
+ input parameter of your search form, or you have two definitions
+ of this parameter (see question <a href="#q4.2">4.2</a>).
+ <li>You didn't install your configuration file in the directory
+ defined by the CONFIG_DIR compile-time Makefile variable
+ (see also question <a href="#q4.20">4.20</a>). This is where
+ htsearch will look for the configuration file specified by the
+ "config" input parameter.
+ <li>The configuration file is not readable by the user ID under
+ which your web server, and thus htsearch, runs. Similarly,
+ if the directories from CONFIG_DIR up to the root directory
+ are not executable by this same user ID, htsearch won't be
+ able to access the configuration files.
+ </ul>
+
+ <strong>5.33. <a name="q5.33">How can I find out which version
+ of ht://Dig I have installed?</a></strong><br>
+ <p>You should always check which version of ht://Dig you're
+ running, before you report any problems, or even if you
+ suspect a problem. You can find out the version number of an
+ installed ht://Dig package by running the command:</p>
+ <blockquote>
+ <code>htdig -\? | head</code>
+ </blockquote>
+ <p>(or use "more" if you don't have a "head" command). The
+ full version number appears on the third line of output,
+ after "This program is part of ht://Dig", and it should also
+ include the snapshot date if you're running a pre-release
+ snapshot. Always include this full version number with any
+ bug report or problem report on a mailing list. You can save
+ yourself and others a lot of grief by being certain of which
+ version you're running, especially if you've installed more than
+ one. If you're running ht://Dig from an RPM package, you should
+ also report the package version and release number, which you
+ can determine with the command "<code>rpm -q htdig</code>",
+ and mention where you obtained the package. This will alert
+ us to the ideosyncracies and/or patches in a particular RPM
+ package. Also, if you've applied any patches yourself (see
+ question <a href="#q2.5">2.5</a>) please mention which ones.
+ See also question <a href="#q1.8">1.8</a>, on reporting bugs
+ or configuration problems.</p>
+
+ <strong>5.34. <a name="q5.34">When running htdig, I get "Error (0):
+ PDF file is damaged - attempting to reconstruct xref table..."</a></strong><br>
+ <p>This message comes from the pdftotext utility, when a PDF file
+ has been truncated. Find the largest PDF file on the site you're
+ indexing, and set max_doc_size to at least that size (see question
+ <a href="#q5.2">5.2</a>). If you need to track down which PDF is
+ causing the error, try running "htdig -i -v &gt; log.txt 2&gt;&amp;1" so you
+ can see which URL is being indexed when the error occurs. The output
+ redirects in that command combine stdout (where htdig's output goes)
+ and stderr (where pdftotext's error messages go) into one output
+ stream. If you're using acroread to index PDF files, the error
+ message for a truncated PDF file is simply "Could not repair file."
+ It's also possible to get errors like this from PDF files that are
+ smaller than max_doc_size, if they're already truncated or corrupted
+ on the server.</p>
+
+ <strong>5.35. <a name="q5.35">When running htdig on Mandrake Linux,
+ I get "host not found" and "no server running" errors.</a></strong><br>
+ <p>The default htdig.conf configuration in Mandrake's RPM package
+ of htdig very stupidly enables the
+ <a href="attrs.html#local_urls_only">local_urls_only</a> attribute
+ by default, which means you can only index a limited set of files
+ on the local server. Anything else, where htdig would normally fall
+ back to using HTTP, will fail. To make matters worse, they put a very
+ misleading comment above that attribute setting, which throws users
+ off track. This attribute is useful in certain circumstances where
+ you never want htdig to fall back to HTTP, but enabling it by default
+ was a very bad judgement call on Mandrake's part.</p>
+
+ <strong>5.36. <a name="q5.36">When I run htsearch, it gives me the
+ list of matching documents, but no header or footer.</a></strong><br>
+ <p>The header and footer typically contain the followup search
+ form, an indication of the total number of matches, and buttons
+ to other pages of matches if the results don't fit on one
+ page. If these don't show up, it could be that in attempting
+ to customize these (see question <a href="#q4.2">4.2</a>),
+ you removed them or rendered them unusable. Even if you didn't
+ customize them, make sure you installed the
+ <a href="attrs.html#search_results_header">search_results_header</a>
+ and
+ <a href="attrs.html#search_results_footer">search_results_footer</a>
+ files (or the
+ <a href="attrs.html#search_results_wrapper">search_results_wrapper</a>
+ file) in the correct location (where you told ht://Dig they'd be
+ when you configured prior to compiling). Also make sure they
+ have read permission for the user ID under which htsearch runs,
+ and all directories leading up to these template files are
+ searchable (i.e. executable) by htsearch, or it won't be able
+ to open the files.</p>
+
+ <p>This is the opposite problem of that described in question
+ <a href="#q5.11">5.11</a>. If htsearch displays nothing at
+ all, you may have both problems or you may have no matches or
+ a boolean query syntax error and the
+ <a href="attrs.html#nothing_found_file">nothing_found_file</a>
+ or <a href="attrs.html#syntax_error_file">syntax_error_file</a>
+ is missing or unreadable.</p>
+
+ <strong>5.37. <a name="q5.37">When I index files with doc2html.pl,
+ it fails with the "UNABLE to convert" error.</a></strong><br>
+ <p>This is an indication that doc2html.pl wasn't configured
+ properly. Carefully follow all the directions for installation
+ in the DETAILS file that comes with the script. In addition to
+ installing doc2html.pl, you must:</p>
+ <ul>
+ <li>Install xpdf and check that pdftotext and pdfinfo work from
+ the command line,
+ <li>Configure pdf2html.pl to use pdftotext and pdfinfo and check
+ that it works from the command line,
+ <li>Configure doc2html.pl to use pdf2html.pl and check that it
+ works from the command line:
+<pre>doc2html.pl /full/path/to/sample/filename.pdf "application/pdf" url</pre>
+ </ul>
+ <p>You should repeat a similar set of steps to configure and test
+ doc2html.pl for other document types, such as Word, RTF, Excel and
+ other document types. See also questions <a href="#q4.8">4.8</a>,
+ <a href="#q4.9">4.9</a> and <a href="#q5.39">5.39</a>.</p>
+
+ <strong>5.38. <a name="q5.38">Why do my searches find search terms
+ in pathnames, or how do I prevent matching filenames?</a></strong><br>
+ <p>htdig doesn't normally add the URL components to the index
+ itself, but when you index a directory where the filenames are
+ used as link description text (such as an automatic DirectoryIndex
+ created by Apache's mod_autoindex) then these link descriptions
+ get indexed, carrying the weight assigned to them by the
+ <a href="attrs.html#description_factor">description_factor</a>
+ attribute. Thus, a search for a filename will match this link
+ description, and the file will show up in search results.
+ To avoid that, make sure your DirctoryIndexes don't get indexed
+ as detailed in question <a href="#q4.23">4.23</a>.</p>
+
+ <p>Conversely, there is no way to force htdig to index URL
+ components so that a search for a file name will yield a match
+ on that file, unless you index an HTML file (or several) containing
+ links to all the files you want, where the link description text
+ does contain the full URL or the pathname components you want.</p>
+
+ <strong>5.39. <a name="q5.39">I set up an external parser but I still
+ can't index Word/Excel/PowerPoint/PDF documents.</a></strong><br>
+ <p>You probably need to carefully re-read and follow questions
+ <a href="#q4.8">4.8</a>, <a href="#q4.9">4.9</a>,
+ <a href="#q5.25">5.25</a> and <a href="#q5.27">5.27</a>.
+ When you can't index documents with an external parser or converter,
+ there are three main issues, or points of failure, that you need
+ to resolve. You need to figure out on which of the three stages the
+ process is failing, and focus on that stage to get to the bottom of
+ why it's not working at that stage. You need to run htdig with
+ anywhere from 1 to 4 -v options, to get the debugging output you
+ need to see where it's failing and why. This may be an iterative
+ process, if htdig is failing at more than one stage: you might fix
+ one problem only to run into another.</p>
+
+ <ol>
+ <li>Is htdig actually finding links to the PDF, Word, etc. documents
+ you want to index? Make sure you're not making false assumptions
+ about how htdig finds these (questions <a href="#q5.25">5.25</a>
+ and <a href="#q5.18">5.18</a>), and then find out how htdig is
+ looking at the links in your HTML files to see if it's ignoring
+ or rejecting links to your externally parsed documents (questions
+ <a href="#q4.1">4.1</a> and <a href="#q5.27">5.27</a>).<br><br>
+ <li>If it is finding and accepting the links to these documents, is
+ it correctly fetching them and passing them on to the appropriate
+ external converter to be able to index them? Look at htdig -vvv
+ output, around the time it tries to fetch one of these, and see
+ what it does next. Does the file size look right? Are there any
+ error messages around there? If the external converter isn't even
+ being called, take a close look at your
+ <a href="attrs.html#external_parsers">external_parsers</a>
+ attribute setting to make sure it's correct (see question
+ <a href="#q5.31">5.31</a>).<br><br>
+ <li>If it is attempting to convert them, is the external converter
+ doing what it should, to feed some indexable text back into htdig's
+ parser? You can also try htdig -vvvv (4 -v options) to see if it's
+ actually parsing individual words from any of these. If this is
+ too much output to wade through, try setting
+ <a href="attrs.html#start_url">start_url</a> to the URL
+ of a single document that you want to test, so you can look in
+ detail at what htdig does with it. You can also try running the
+ external converter manually on one of these documents to see
+ what it spits out. See question <a href="#q5.37">5.37</a>.
+ Make sure your documents actually contain indexable text. Some
+ PDFs are nothing but scanned images of pages, so it looks like
+ text but it's just images with no computer-readable text.
+ </ol>
+
+ <br>
+
+ <hr noshade size=4>
+ Last modified: $Date: 2004/05/28 13:15:16 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.am b/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.am
new file mode 100644
index 00000000..c470a439
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.am
@@ -0,0 +1,58 @@
+
+include $(top_srcdir)/Makefile.config
+
+EXTRA_DIST = COPYING \
+ ChangeLog \
+ FAQ.html \
+ RELEASE.html \
+ THANKS.html \
+ TODO.html \
+ author.html \
+ bdot.gif \
+ bugs.html \
+ attrs.html attrs_head.html attrs_tail.html \
+ cf_byname.html cf_byname_head.html cf_byname_tail.html \
+ cf_byprog.html cf_byprog_head.html cf_byprog_tail.html \
+ cf_general.html \
+ cf_types.html \
+ cf_variables.html \
+ config.html \
+ confindex.html \
+ confmenu.html \
+ contents.html \
+ dot.gif \
+ htdig.gif \
+ htdig.html \
+ htdig_big.gif \
+ htfuzzy.html \
+ htmerge.html \
+ htnotify.html \
+ hts_form.html \
+ hts_general.html \
+ hts_menu.html \
+ hts_method.html \
+ hts_templates.html \
+ htsearch.html \
+ index.html \
+ install.html \
+ ma_menu.html \
+ mailarchive.html \
+ mailing.html \
+ main.html \
+ meta.html \
+ notification.html \
+ require.html \
+ triangle.gif \
+ up.gif \
+ uses.html \
+ where.html \
+ cf_generate.pl
+
+attrs.html.in: attrs_head.html attrs_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
+
+cf_byname.html: cf_byname_head.html cf_byname_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
+
+cf_byprog.html: cf_byprog_head.html cf_byprog_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.in b/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.in
new file mode 100644
index 00000000..f4e69b7a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/Makefile.in
@@ -0,0 +1,407 @@
+# Makefile.in generated by automake 1.7.9 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# To compile with profiling do the following:
+#
+# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_triplet = @host@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+APACHE = @APACHE@
+APACHE_MODULES = @APACHE_MODULES@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CGIBIN_DIR = @CGIBIN_DIR@
+COMMON_DIR = @COMMON_DIR@
+CONFIG_DIR = @CONFIG_DIR@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATABASE_DIR = @DATABASE_DIR@
+DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+FIND = @FIND@
+GUNZIP = @GUNZIP@
+HAVE_SSL = @HAVE_SSL@
+HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@
+HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@
+HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@
+IMAGE_DIR = @IMAGE_DIR@
+IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
+MAKEINFO = @MAKEINFO@
+MV = @MV@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+RANLIB = @RANLIB@
+RRDTOOL = @RRDTOOL@
+SEARCH_DIR = @SEARCH_DIR@
+SEARCH_FORM = @SEARCH_FORM@
+SED = @SED@
+SENDMAIL = @SENDMAIL@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TAR = @TAR@
+TESTS_FALSE = @TESTS_FALSE@
+TESTS_TRUE = @TESTS_TRUE@
+TIME = @TIME@
+TIMEV = @TIMEV@
+USER = @USER@
+VERSION = @VERSION@
+YACC = @YACC@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+exec_prefix = @exec_prefix@
+extra_ldflags = @extra_ldflags@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \
+ -I$(top_srcdir)/include -I$(top_srcdir)/htlib \
+ -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \
+ -I$(top_srcdir)/htword \
+ -I$(top_srcdir)/db -I$(top_builddir)/db \
+ $(LOCAL_DEFINES) $(PROFILING)
+
+
+HTLIBS = $(top_builddir)/htnet/libhtnet.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/htlib/libht.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/db/libhtdb.la \
+ $(top_builddir)/htlib/libht.la
+
+
+EXTRA_DIST = COPYING \
+ ChangeLog \
+ FAQ.html \
+ RELEASE.html \
+ THANKS.html \
+ TODO.html \
+ author.html \
+ bdot.gif \
+ bugs.html \
+ attrs.html attrs_head.html attrs_tail.html \
+ cf_byname.html cf_byname_head.html cf_byname_tail.html \
+ cf_byprog.html cf_byprog_head.html cf_byprog_tail.html \
+ cf_general.html \
+ cf_types.html \
+ cf_variables.html \
+ config.html \
+ confindex.html \
+ confmenu.html \
+ contents.html \
+ dot.gif \
+ htdig.gif \
+ htdig.html \
+ htdig_big.gif \
+ htfuzzy.html \
+ htmerge.html \
+ htnotify.html \
+ hts_form.html \
+ hts_general.html \
+ hts_menu.html \
+ hts_method.html \
+ hts_templates.html \
+ htsearch.html \
+ index.html \
+ install.html \
+ ma_menu.html \
+ mailarchive.html \
+ mailing.html \
+ main.html \
+ meta.html \
+ notification.html \
+ require.html \
+ triangle.gif \
+ up.gif \
+ uses.html \
+ where.html \
+ cf_generate.pl
+
+subdir = htdoc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES = attrs.html
+depcomp =
+am__depfiles_maybe =
+DIST_SOURCES =
+DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/Makefile.config \
+ COPYING ChangeLog Makefile.am attrs.html.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign htdoc/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+attrs.html: $(top_builddir)/config.status attrs.html.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/..
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile
+
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-libtool
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-info-am
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ distclean distclean-generic distclean-libtool distdir dvi \
+ dvi-am info info-am install install-am install-data \
+ install-data-am install-exec install-exec-am install-info \
+ install-info-am install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am \
+ uninstall-info-am
+
+
+attrs.html.in: attrs_head.html attrs_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
+
+cf_byname.html: cf_byname_head.html cf_byname_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
+
+cf_byprog.html: cf_byprog_head.html cf_byprog_tail.html ../htcommon/defaults.cc cf_generate.pl
+ @PERL@ ${srcdir}/cf_generate.pl ${top_srcdir}
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/RELEASE.html b/debian/htdig/htdig-3.2.0b6/htdoc/RELEASE.html
new file mode 100644
index 00000000..5caf2b79
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/RELEASE.html
@@ -0,0 +1,1542 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Release notes
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Release notes
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ These are notes that go with each release of ht://Dig. There
+ is also a <a href="ChangeLog">ChangeLog</a> file which has
+ more details on the code changes.
+ </p>
+
+ <p>
+ <strong>Release notes for htdig-3.2.0b6</strong> 20 Jun 2004<br>
+ The next beta release of ht://Dig, 3.2.0b6, is now available.
+ It fixes several bugs from 3.2.0b5, and runs somewhat faster,
+ although still much slower than 3.1.6. (No significant speed
+ improvements are expected in the near future, although we are
+ working on it.) Calling this release a "beta" simply means
+ that exhausive testing, especially on non-Linux platforms, is
+ not yet complete. However, we consider it stable enough for
+ most production use.
+ </p>
+
+ <p>
+ As with 3.2.0b5, if you are upgrading
+ from a previous version, you should read the <a
+ href="upgrade.html">upgrade guide</a> first.
+ </p>
+ Bug fixes:
+ <ul>
+ <li>Correctly handle empty <code>disallow</code> entries in
+ robots.txt</li>
+ <li>No longer compile regular expressions for
+ every URL (improve performances)</li>
+ <li>Allow compressed databases on Cygwin</li>
+ <li>Fixed bugs in phrase searching</li>
+ <li>Improved parsing of the configuration file</li>
+ <li>bin/rundig -a handles multiple database directories</li>
+ <li>Ellipsis displayed correctly by htsearch</li>
+ <li>Allow '-' argument to '-m' ('minimal') runtime option to
+ htdig</li>
+ <li>Check validity of first URL from each server</li>
+ <li>No longer ignore empty configuration attributes</li>
+ <li>fixed bug in handling 'http_proxy', 'http_proxy_authorization',
+ 'authorization attributes'</li>
+ <li>remove stale md5_db if '-i' specified</li>
+ <li>Make 'server_alias' case insensitive</li>
+ <li>fixed bugs with zlib</li>
+ <li>Allow &amp;euro; HTML entity</li>
+ <li>fixed other minor bugs</li>
+ </ul>
+ New features:
+ <ul>
+ <li>added <a
+ href="attrs.html#allow_space_in_url">allow_space_in_url</a>
+ attribute: if set to true, htdig will handle URLs that
+ contain embedded spaces</li>
+ <li>added <a
+ href="attrs.html#store_phrases">store_phrases</a> attribute:
+ if it is false, htdig only stores the first occurrence
+ of each word in a document</li>
+ <li>added an improved version of RTF2HTML into the
+ contrib section</li>
+ <li>added <a href="http://www.openoffice.org/">OpenOffice.org</a>
+ support to doc2html in contrib section</li>
+ <li>improved date factor formula</li>
+ <li>improved tests</li>
+ <li>improved documentation</li>
+ <li>added man pages</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.2.0b5</strong> 10 Nov 2003<br>
+ This version was slated to be 3.2.0rc1, but some final testing
+ is still required. It primarily fixes many bugs in 3.2.0b3, with
+ some limited new functionality.
+ As with 3.2.0b1 and 3.2.0b2, if you are upgrading
+ from a previous version, you should read the <a
+ href="upgrade.html">upgrade guide</a> first.
+ </p>
+ <ul>
+ <li>Fixed database bugs. Introduced zlib compression to replace
+ buggy internal compression.</li>
+ <li>Forward-ported functionality from 3.1.6
+ (description_meta_tag_names, use_doc_date, ignore_alt_text,
+ ignore_dead_servers, boolean_keywords, boolean_syntax_errors,
+ multimatch_factor, translate_latin1)</li>
+ <li>Fixed bugs in phrase searching</li>
+ <li>Fixed compile problems due to deprecated C++ includes</li>
+ <li>Fixed bugs handling double slashes in URLs</li>
+ <li>Suppress display of matches with weight zero</li>
+ <li>Fixed bugs in nesting of tags which turn off indexing</li>
+ </ul>
+ <ul>
+ <li>Added Native Win32 support</li>
+ <li>Added http_proxy_authorization attribute</li>
+ <li>Improved networking code, with improved cookie handling and
+ accept_language support</li>
+ <li>Implemented field-restricted searches (e.g. title:word)</li>
+ <li>Handle noindex_start/noindex_end as string lists</li>
+ <li>Implemented external converters,
+ text/html-&gt;text/html-internal</li>
+ <li>Improved support for MIME types</li>
+ <li>Changed licence to LGPL from GPL</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.2.0b4</strong><br>
+ This beta was never issued.
+ </p>
+
+ <p>
+ <strong>Release notes for htdig-3.2.0b3</strong> 22 Feb 2001<br>
+ This version is still marked beta because it has still only
+ received limited testing and there are still revisions pending
+ for the 3.2 releases. However, it adds more functionality and
+ should address all serious bugs in the 3.2.0b2 release.
+ As with 3.2.0b1 and 3.2.0b2, if you are upgrading
+ from a previous version, you should read the <a
+ href="upgrade.html">upgrade guide</a> first.
+ </p>
+ <p>
+ <strong>Please note</strong> if you are updating from a prior
+ release (3.1 or 3.2), the htmerge program has changed syntax as noted
+ below. You will probably want to change your behavior to call
+ htpurge instead of htmerge after htdig as noted below.
+ </p>
+ <ul>
+ <li>Fixed several non-exploitable bugs in handling external
+ parsers or transport agents.</li>
+ <li>Fix bug where changes in the robots.txt would be
+ ignored. If a URL was indexed and later the robots.txt
+ changed to forbid it, the URL would be checked anyway.</li>
+ <li>Fixed scoring bugs introduced in 3.2.0b2.</li>
+ <li>Fixed a non-exploitable security issue where content-type
+ headers were passed incorrectly to external parsers or converters.</li>
+ <li>Fixed bugs in the accents fuzzy algorithm, cutting down
+ on the size of the accent database.</li>
+ <li>Fixed a bug where duplicate documents would be generated when
+ merging a database with itself.</li>
+ <li>Fixed a bug in the new regex handling for indexing limits
+ where large patterns could fail and would be silently ignored.</li>
+ <li>Fixed minor bugs with the HTTP/1.1 implementation.</li>
+ <li>Fix a bug where an extra config= portion of a URL would
+ be output when using collections.</li>
+ <li>Fixed a bug with content-type declarations in external parsers
+ with combined content-type; charset declarations.</li>
+ <li>Fixed a bug in the config parser that did not correctly
+ handle relative config <a
+ href="attrs.html#include">include</a> statements.</li>
+ <li>Fixed a bug in htfuzzy which would append to an existing
+ synonyms database rather than creating it anew.</li>
+ <li>Fixed problems with the configure script ignoring
+ --enable-bigfile flags.</li>
+ <li>Fixed problems with retrieval order--this could
+ potentially foul things up when limiting indexing by
+ hopcount.</li>
+ <li>Fixed some problems with the HTML in the included sample files.</li>
+ <li>Make the -l flag to <a href="htdig.html">htdig</a>
+ obsolete--this is now the default behavior -- the program
+ will intercept many signals and write a log file for a restart.</li>
+ <li>Updated database format from the mifluz/htword project.</li>
+ <li>Changed syntax of <a href="htmerge.html">htmerge</a>. The
+ program now <em>only</em> merges databases. The <a
+ href="htpurge.html">htpurge</a> program will &quot;clean
+ up&quot; databases after running htdig. The included
+ &quot;rundig&quot; script reflects this.</li>
+ <li>htload now properly loads ASCII word databases.</li>
+ <li>Enhanced <a
+ href="attrs.html#build_select_lists">build_select_lists</a>
+ attribute.</li>
+ <li>Added support for controlling the number of Page buttons
+ in htsearch with <a
+ href="attrs.html#maximum_page_buttons">maximum_page_buttons</a>.</li>
+ <li>Added the METADESCRIPTION htsearch template variable for
+ displaying the &lt;META&gt; description field in output along
+ with the normal description, instead of using the <a
+ href="attrs.html#use_meta_description">use_meta_description</a>
+ attribute.</li>
+ <li>Added support for permanent URL rewriting with the <a
+ href="attrs.html#url_rewrite_rules">url_rewrite_rules</a>
+ attribute. (As opposed to the <a
+ href="attrs.html#url_part_aliases">url_part_aliases</a>
+ attribute which can provide a different URL to htsearch and htdig.)</li>
+ <li>Added support for restricting a search to match only
+ documents between two dates as specified in the <a
+ href="hts_form.html">search form</a> as well as the <a
+ href="hts_templates.html">template variables</a> STARTYEAR,
+ STARTMONTH, STARTDAY, ENDYEAR, ENDMONTH, ENDDAY.</li>
+ <li>Added support for limiting duplicates based on MD5
+ signatures with the new attributes <a
+ href="attrs.html#check_unique_md5">check_unique_md5</a>, <a
+ href="attrs.html#check_unique_date">check_unique_date</a>, <a
+ href="attrs.html#md5_db">md5_db</a>.</li>
+ <li>The documentation has been revised to include a block:
+ portion to note if attributes can be included in URL or
+ Server blocks. See the <a href="confindex.html"
+ target="_top">configuration</a> documentation for more
+ information.</li>
+ <li>More attributes are set on a per-server or per-URL basis.</li>
+ <li>New support for nttp:// protocol.</li>
+ <li>Added support for auto-generating directory listings for
+ file:// URLs.</li>
+ <li>Set the default compilation to enable tests that can be
+ run with &quot;make check&quot;</li>
+ <li>Greatly improved htnotify program with one message per
+ e-mail address and support for message
+ templates using the new attributes <a
+ href="attrs.html#htnotify_webmaster">htnotify_webmaster</a>,
+ <a href="attrs.html#htnotify_replyto">htnotify_replyto</a>, <a
+ href="attrs.html#htnotify_prefix_file">htnotify_prefix_file</a>,
+ <a href="attrs.html#htnotify_suffix_file">htnotify_suffix_file</a>.</li>
+ <li>There are the usual variety of other fixes and
+ changes. See the <a href="ChangeLog">ChangeLog</a> for
+ more details.</li>
+ <li>Once again, a huge thank you to everyone who
+ contributed bug reports, fixes and patches!</li>
+ </ul>
+
+ <strong>Release notes for htdig-3.2.0b2</strong> 11 Apr 2000<br>
+ This version is still marked beta because it has still only
+ received limited testing. However, it adds more functionality
+ and should fix all known bugs in the previous 3.2.0b1 release,
+ including the security hole fixed in version 3.1.5 in
+ production versions. As with 3.2.0b1, if you are upgrading
+ from a previous version, you should read the <a
+ href="upgrade.html">upgrade guide</a> first.
+ </p>
+ <ul>
+ <li>Fixed several bugs in the new HTTP/1.1 implementation that would
+ cause problems with so-called &quot;Chunked&quot; data.</li>
+ <li>Fixed a bug in the new regex-based configuration options that
+ would ignore the case_sensitive attribute.</li>
+ <li>Fixed the robots.txt parsing to more rigorously stick to the
+ standard.</li>
+ <li>Fixed a bug where upper-case META robots directives would be
+ ignored.</li>
+ <li>Fixed a bug that could leave a connection open when it failed.</li>
+ <li>Fixed the timeout in the connection code to ensure that hung
+ connections are killed properly.</li>
+ <li>Fixed a bug where duplicates of modified documents could pile up
+ over time.</li>
+ <li>Fixed a bug in the SGML entity handling where numeric entities
+ would be ignored. (e.g. &amp;#162; -> &#162;)</li>
+ <li>Fixed a bug in the new configuration parser that
+ wouldn't accept lists including numbers</li>
+ <li>Fixed a potential infinite loop in the phrase
+ searching parser that came up when fuzzy algorithms were
+ used.</li>
+ <li>The HTML parser now ignores anything between &lt;script&gt; tags,
+ much like it does for &lt;style&gt; tags.</li>
+ <li>Fixed some performance problems in the new word database code.</li>
+ <li>Removed the attributes translate_quot, translate_lt, translate_gt
+ and translate_amp since all SGML entities are now encoded and decoded
+ when displayed.</li>
+ <li>Removed the attribute uncoded_db_compatible since the 3.2
+ databases are no longer compatible with previous versions anyway.</li>
+ <li>Removed the attribute word_list because the db.wordlist file is no
+ longer generated. To get an ASCII version of the database, use the
+ word_dump attribute.</li>
+ <li>Removed the pdf_parser attribute. It is now preferred to use the
+ external parser or external converter support with xpdf.</li>
+ <li>The <a
+ href="attrs.html#wordlist_compress">wordlist_compress</a>
+ attribute is now turned on by default.</li>
+ <li>The output from htsearch and the default and included templates
+ should now be more HTML-4.0 compliant.</li>
+ <li>Added support for searching collections of multiple
+ databases. To use this, supply multiple config fields or
+ config names separated by &quot;|&quot characters. Also
+ see the <a
+ href="attrs.html#collection_names">collection_names</a> attribute.</li>
+ <li>Added a new accents fuzzy algorithm, which treats
+ accented and unaccented words the same. You must create an
+ <a href="attrs.html#accents_db">accents_db</a> with
+ htfuzzy after indexing.</li>
+ <li>Added new attributes <a
+ href="attrs.html#tcp_max_retries">tcp_max_retries</a> and
+ <a href="attrs.html#tcp_wait_time">tcp_wait_time</a> to
+ control how many times a low-level connection is retried
+ and how long to wait on a hung connection.</li>
+ <li>Add <a href="attrs.html#any_keywords">any_keywords</a>
+ attribute to OR the keywords field in a search form
+ instead of AND-ing them together.</li>
+ <li>Add the attributes <a
+ href="attrs.html#search_results_order">search_results_order</a>
+ and <a href="attrs.html#url_seed_score">url_seed_score</a>
+ to control result ranking and scoring based on URL patterns.</li>
+ <li>Moved the htnotify program into the new httools directory.</li>
+ <li>Added the programs <a href="htdump.html">htdump</a>,
+ <a href="htload.html">htload</a>, <a
+ href="htstat.html">htstat</a> and <a
+ href="htpurge.html">htpurge</a>.</li>
+ <li>There are the usual variety of other fixes and
+ changes. See the <a href="ChangeLog">ChangeLog</a> for
+ more details.</li>
+ <li>Once again, a huge thank you to everyone who
+ contributed bug reports, fixes and patches!</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.5</strong> 25 Feb 2000<br>
+ This version cleans up some remaining bugs in the 3.1.4
+ release. As the latest stable release of ht://Dig, it is
+ recommended for all production servers.
+ </p>
+ <ul>
+ <li>Fixed a nasty security hole in htsearch, which would allow
+ users to view any file on your site that had read permission.</li>
+ <li>Fixed a bug that could cause problems with 8-bit
+ characters on some systems.</li>
+ <li>Made some attempts to get htsearch's output to be more HTML 4.0
+ compliant. It quotes all HTML tag parameters, and uses ";"
+ instead of "&amp;" as parameter separator in URLs for next
+ pages. Reserved characters in parameters are now
+ encoded. Please note that this may break a variety of CGI
+ wrappers, for example, those written in PHP3.</li>
+ <li>Fixed handling of SGML entities: htdig will still decode
+ them to store as single characters in the database, but
+ htsearch now encodes some of them back for compliant results.</li>
+ <li>Added two new formats for variables in htsearch templates,
+ $%(var), which escapes the variable for a URL, and $&(var),
+ which HTML-escapes the variable as necessary.</li>
+ <li>Fixed htdig's handling of robots.txt, such that only the first
+ applicable User-agent field bearing its name will be used, rather
+ than only the last.</li>
+ <li>Fixed htdig's handling of servers that return 2-digit years.</li>
+ <li>Fixed handling of embedded quotes in quoted string lists.</li>
+ <li>Fixed handling of relative URLs with trailing ".." or leading
+ "//".</li>
+ <li>Fixed handling of the
+ <a href="attrs.html#valid_extensions">valid_extensions</a>
+ attribute, which sometimes failed in the previous version.</li>
+ <li>Enhanced the handling of local filesystem indexing with the
+ <a href="attrs.html#local_urls">local_urls</a>,
+ <a href="attrs.html#local_user_urls">local_user_urls</a> or
+ <a href="attrs.html#local_default_doc">local_default_doc</a>
+ attributes, which now allow multiple directory or file names to
+ be tried.</li>
+ <li>Added the <a
+ href="attrs.html#build_select_lists">build_select_lists</a>
+ attribute to allow the config file to specify
+ &lt;select&gt; form elements in htsearch output as a
+ template variable, much like $(SORT) and $(METHOD).</li>
+ <li>Added support for two additional configuration attributes:
+ <a href="attrs.html#max_keywords">max_keywords</a>, and
+ <a href="attrs.html#nph">nph</a>.</li>
+ <li>A variety of other bug fixes, and many documentation updates.
+ See the <a href="ChangeLog">ChangeLog</a> for details.</li>
+ <li>Once again, thanks to everyone who reported bugs and bug
+ fixes.</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.2.0b1</strong> 4 Feb 2000<br>
+ This marks the first beta version of the 3.2.0 codebase,
+ over a year in the works. Since it has not received as much
+ testing as the 3.1.x series, it is *not* recommended for
+ production environments. A full description of how to upgrade
+ is provided <a href="upgrade.html">here</a>.
+ <blockquote><strong>NOTE:</strong> Read this document before
+ upgrading. You have been warned.</blockquote>
+ </p>
+ <ul>
+ <li>Fixed a bug in htdig where hopcounts could be calculated
+ incorrectly between multiple servers.</li>
+ <li>Fixed a bug that could cause problems with 8-bit
+ characters on some systems.</li>
+ <li>Fixed handling of unreachable servers. First, the new <a
+ href="attrs.html#max_retries">max_retries</a> attribute allows
+ htdig to attempt multiple connections. Secondly, if the server
+ is not available, htdig will stop trying to connect.</li>
+ <li>Fixed handling of SGML entities: htdig will still decode
+ them to store as single characters in the database, but
+ htsearch now encodes them back for compliant results.</li>
+ <li>Rewrote the database formats, allowing room for more
+ sophisticated searches and compression of the word database
+ using the new attribute <a
+ href="attrs.html#wordlist_compress">wordlist_compress</a>.
+ These changes include the removal of the word_list file
+ (db.wordlist) and the addition of the new <a
+ href="attrs.html#doc_excerpt">doc_excerpt</a> database.</li>
+ <li>Cleaned up many parts of the code, including the URL and
+ HTML parsers. Additionally, on platforms that support it, much
+ of the code will be built as shared libraries, which should
+ help memory utilization, especially under high load.</li>
+ <li>Removed the modification_time_is_now attribute, which is
+ now on by default. This means the time at indexing is taken as
+ the date of the document if the server does not return a
+ date.</li>
+ <li>Added the new attribute <a
+ href="attrs.html#use_doc_date">use_doc_date</a> to use the
+ date specified in a META date tag.</li>
+ <li>Merged all heading_factor attributes into one new
+ attribute, <a
+ href="attrs.html#heading_factor">heading_factor</a>.</li>
+ <li>As a result of the new database format, all _factor
+ attributes (like <a
+ href="attrs.html#title_factor">title_factor<a/> and <a
+ href="attrs.html#keywords_factor">keywords_factor</a> are
+ now dynamic--you do not have to rebuild your database to
+ change the scaling.</li>
+ <li>Changed attributes <a
+ href="attrs.html#bad_querystr">bad_querystr</a>, <a
+ href="attrs.html#exclude_urls">exclude_urls</a>, <a
+ href="attrs.html#limit_urls_to">limit_urls_to</a>, <a
+ href="attrs.html#limit_normalized">limit_normalized</a>,
+ <a
+ href="attrs.html#http_proxy_exclude">http_proxy_exclude</a>
+ to allow full regular expressions when the regex are
+ surrounded by [ and ].</li>
+ <li>Changed htsearch fields restrict and exclude to allow
+ regular expressions when the regex are surrounded by [ and
+ ].</li>
+ <li>Added phrase searching support to htsearch--queries
+ enclosed in quotes will be checked to ensure the words
+ occur in that exact order in the documents.</li>
+ <li>Added the <a
+ href="attrs.html#build_select_lists">build_select_lists</a>
+ attribute to allow the config file to specify
+ &lt;select&gt; form elements in htsearch output as a
+ template variable, much like $(SORT) and $(METHOD).
+ <li>Added a regex fuzzy method. This will allow searches to
+ include regex that match words. The fuzzy method will
+ return up to <a
+ href="attrs.html#regex_max_words">regex_max_words</a> matches.</li>
+ <li>Added a speling [sic] fuzzy method. This attempts several
+ simple spelling mistakes (like transposed letters and
+ extra letters) to find matches. This adds the new
+ attribute <a
+ href="attrs.html#minimum_speling_length">minimum_speling_length</a>
+ to restrict whether small words should be
+ checked. Transposing letters in smaller words can give
+ unrelated correctly-spelled words.</li>
+ <li>Added support for external transport methods, using the <a
+ href="attrs.html#external_protocols">external_protocols</a>
+ attribute, an analogue of the external_parsers system.</li>
+ <li>Added support for HTTP/1.1, including persistent
+ connections. This can be configured using the new attributes <a
+ href="attrs.html#persistent_connections">persistent_connections</a>,
+ <a href="attrs.html#head_before_get">head_before_get</a>,
+ and <a href="attrs.html#max_connection_requests">max_connection_requests</a>.
+ </li>
+ <li>Added support for file:// URLs and support for using the
+ <a href="attrs.html#mime_types">mime_types</a> file to
+ decide whether local files are parsable.</li>
+ <li>Added two new formats for variables in htsearch templates,
+ $%(var), which escapes the variable for a URL, and $&(var),
+ which HTML-escapes the variable as necessary.</li>
+ <li>Added support for reading the list of URLs to index with
+ <a href="htdig.html">htdig</a> by supplying the
+ command-line option -.</li>
+ <li>Added a flag -m to <a href="htdig.html">htdig</a> to index <em>only</em> the
+ files given in the filename.</li>
+ <li>There are many more changes especially to the internal
+ code structure, so a huge thank you goes out to everyone
+ who helped make this release!
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.4</strong> 9 Dec 1999<br>
+ This version cleans up some remaining bugs in the 3.1.3
+ release. As the latest stable release of ht://Dig, it is
+ recommended for all production servers.
+ </p>
+ <ul>
+ <li>Fixed a nasty bug in URL parameter parsing, which was gobbling
+ up bare ampersands (&amp;) and CGI parameter names.</li>
+ <li>Fixed a bug where htdig would go into an infinite loop if an
+ entry in <a href="attrs.html#local_urls">local_urls</a>,
+ <a href="attrs.html#local_user_urls">local_user_urls</a> or
+ <a href="attrs.html#server_aliases">server_aliases</a> was
+ missing the "=".</li>
+ <li>Fixed a bug in htsearch, where it failed when reading long
+ queries via the POST method.</li>
+ <li>Fixed a bug in htdig, where it failed to close the connection
+ after certain errors.</li>
+ <li>Fixed a bug that clobbered the hop count of initial documents.</li>
+ <li>Fixed bugs in HTML parser's handling of META tags. It no longer
+ continues indexing meta tags when indexing is turned off for the
+ document, and it no longer gets confused by punctuation in META
+ descriptions and keywords.</li>
+ <li>Fixed a bug in the handling of the
+ <a href="attrs.html#case_sensitive">case_sensitive</a>
+ attribute, so that it's not limited to robots.txt
+ parsing. Now, if false, it causes URLs to be mapped to
+ lowercase, to avoid mixed case duplicates as expected.</li>
+ <li>HTML parser now indexes text in alt parameter of img tags, and
+ calculates word locations more accurately than before.</li>
+ <li>Digging via the local filesystem can now be done even without
+ an HTTP server running, and a few more file types can be indexed
+ locally, without having to rely on the server.</li>
+ <li>Sender name in htnotify's e-mail messages is now quoted.</li>
+ <li>The <a href="attrs.html#external_parsers">external_parsers</a>
+ attribute is now extended to support external converters, to avoid
+ a lot of the complications of writing external parsers.</li>
+ <li>Added support for several new configuration attributes:
+ <a href="attrs.html#authorization">authorization</a>,
+ <a href="attrs.html#start_highlight">start_highlight</a>,
+ <a href="attrs.html#end_highlight">end_highlight</a>,
+ <a href="attrs.html#local_urls_only">local_urls_only</a>,
+ <a href="attrs.html#page_number_separator">page_number_separator</a>,
+ <a href="attrs.html#script_name">script_name</a>,
+ <a href="attrs.html#template_patterns">template_patterns</a>, and
+ <a href="attrs.html#valid_extensions">valid_extensions</a>.</li>
+ <li>The keywords input parameter to htsearch is now propagated to
+ followup searches, as for other input parameters.</li>
+ <li>The query string can now be passed to htsearch as a single
+ command line argument, for use in scripts.</li>
+ <li>Added better examples and comments in sample htdig.conf, and
+ added boolean match type to sample search.html form.</li>
+ <li>The HTML parser in htdig now turns off indexing between
+ &lt;style&gt; and &lt;/style&gt; tags.</li>
+ <li>A variety of other bug fixes, and many documentation updates.
+ See the <a href="ChangeLog">ChangeLog</a> for details.</li>
+ <li>Once again, thanks to everyone who reported bugs and bug
+ fixes.</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.3</strong> 22 Sep 1999<br>
+ This version fixes a number of bugs in the 3.1.2 release and
+ is the latest stable release of ht://Dig. It is the only version
+ recommended for production servers and users of all previous
+ versions are suggested to upgrade.
+ </p>
+ <ul>
+ <li>Fixed a long-standing bug where search queries containing
+ punctuation would not be highlighted in excerpts.</li>
+ <li>Fixed a bug where SGML entities inside HTML tags were not
+ expanded.</li>
+ <li>Fixed the <a
+ href="attrs.html#server_aliases">server_aliases</a>
+ attribute to default to port 80 if ommitted.
+ <li>Fixed a bug in URL parsing, where documents ending in the
+ value used for remove_default_doc were ignored. For
+ example, a URL ending in /left_index.html would become /.
+ <li>Fixed META robot parsing to correctly parse multiple
+ directives.</li>
+ <li>Fixed a coredump when generating the metaphone fuzzy
+ database on some systems.</li>
+ <li>Fixed the behavior of the <a
+ href="attrs.html#modification_time_is_now">modification_time_is_now</a>
+ attribute to work as documented.</li>
+ <li>Fixed the behavior of htdig to block out the
+ username/password set on the command-line in process
+ listing.</li>
+ <li>Fixed a bug with external parsers to prevent shell escapes
+ in filenames.</li>
+ <li>Fixed a bug on some systems, where printing a date might
+ crash.</li>
+ <li>Handles the ispell endings lists better so that suffixes
+ more closely match grammatical rules.</li>
+ <li>Changed the maximum word length to a run-time option, set
+ with the new attribute <a
+ href="attrs.html#maximum_word_length">maximum_word_length</a>.
+ <li>Tests for the presence of alloca.h, which would cause
+ problems with compiling the regex code under non-GNU
+ compilers.</li>
+ <li>Added support for &lt;EMBED&gt;, &lt;OBJECT&gt;, and
+ &lt;LINK&gt; HTML tags.
+ <li>A variety of other bugs were fixed, see the
+ <a href="ChangeLog">ChangeLog</a> for details.</li>
+ <li>When indexing, htdig should now attempt to index compound
+ words as separate words in addition to a compound word. For
+ example, "pdf_parser" would also be indexed as "pdf" and "parser."
+ <li>Once again, thanks to everyone who reported bugs and bug
+ fixes.</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.2</strong> 21 Apr 1999<br>
+ This version fixes a number of bugs in the 3.1.1 release and
+ is the latest stable release of ht://Dig. It is highly
+ recommended for production servers.
+ </p>
+ <ul>
+ <li>Fixed a bug that ignored META description tags when they
+ were also added to the meta_keywords attribute.</li>
+ <li>Fixed the HTML comment parsing to be more lenient about
+ non-standard comments.</li>
+ <li>Fixed problems in the date-parsing code that made it Y2K
+ incompatible. In particular, it forgot that 2000 is a leap
+ year and wouldn't correctly parse dates after 29 Feb
+ 2000.</li>
+ <li>Fixed a variety of bugs in the HTML parser.</li>
+ <li>Fixed an old bug that would exclude <strong>all</strong> URLs if
+ the exclude_urls attribute left empty.</li>
+ <li>Fixed display of META description tags. Now it always
+ shows the top of a description. If no description exists, it
+ looks for the search terms in the excerpt as usual.</li>
+ <li>Fixed some small memory leaks.</li>
+ <li>Changed the htfuzzy endings algorithm to use a more
+ efficient regex system. Speed improvements on non-English
+ languages are noted, now taking minutes for generation that
+ would take days!</li>
+ <li>Changed the noindex_start and noindex_end attributes to
+ allow case-insensitive matching.</li>
+ <li>Added on-disk versions of the builtin templates to make it
+ more obvious how to change the results templates.</li>
+ <li>Added <a href="attrs.html#date_format">date_format</a>
+ attribute to change the format of dates output in search results.</li>
+ <li>Added <a href="attrs.html#extra_word_characters">extra_word_characters</a>
+ attribute that defines extra characters that should be
+ considered part of a word, rather than punctuation.</li>
+ <li>Several other, relatively minor bugs were also
+ fixed. Many thanks to those who sent in bug reports and to
+ Gilles Detillieux for coordinating this release.</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.1</strong> 17 Feb 1999<br>
+ This version cleans up some remaining bugs in the 3.1.0
+ release. As the latest stable release of ht://Dig, it is
+ recommended for all production servers.
+ </p>
+ <ul>
+ <li>Fixed a bug in the configure script under IRIX and Solaris 7.
+ </li>
+ <li>Fixed a minor bug with the Berkeley database code under
+ AlphaLinux.</li>
+ <li>Fixed a serious bug causing bus errors on several platforms,
+ notably Solaris SPARC, caused by unaligned access to database
+ structures.</li>
+ <li>Fixed some bugs in the boolean search parser.</li>
+ <li>Replaced the contributed parse_word_doc.pl script with a
+ more capable parse_doc.pl script.</li>
+ <li>Fixed the htnotify program to parse dates as mentioned in the
+ <a href="notification.html">documentation</a>.</li>
+ <li>Cleaned up some minor mistakes in the documentation and moved
+ to HTML 4.0 Transitional syntax.</li>
+ <li>Fixed the documentation for the <a
+ href="attrs.html#pdf_parser">pdf_parser</a> attribute that was
+ changed in version 3.1.0. This attribute must call the parser with
+ all command-line options.
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.0</strong> 9 Feb 1999<br>
+ This version marks the "full release" of version
+ 3.1.0. Naturally, this version adds a few new feature and fixes a
+ large number of remaining bugs. This version is the latest stable
+ release of ht://Dig and is recommended for all production servers
+ for current bug-fixes and oft-requested
+ features.
+ </p>
+ <blockquote>
+ <p>
+ <strong>NOTE:</strong> You <em>must</em> rebuild
+ your databases from scratch after updating to this
+ version. Several database-related bugs were fixed and will remain
+ unless you rebuild from scratch. We're sorry for any
+ inconvenience.
+ </p>
+ </blockquote>
+ <ul>
+ <li>Fixed a variety of small memory leaks.</li>
+ <li>Fixed a bug that could duplicate documents in the document
+ databases.</li>
+ <li>Fixed a bug that would not remove documents marked as deleted.</li>
+ <li>Fixed a bug that could dump core with incorrectly defined
+ template_map attributes.</li>
+ <li>Fixed a bug that could dump core or produce bogus dates when
+ a server returns the date in an incorrect format.</li>
+ <li>Fixed a variety of string-matching bugs that caused problems
+ with restricting indexing and searching.</li>
+ <li>Fixed a bug that could dump core if logging searches and CGI
+ environment variables were not set.</li>
+ <li>Fixed a bug that would not hilight searches properly if they
+ contained punctuation.</li>
+ <li>Fixed PDF parsing to support programs beyond acroread.</li>
+ <li>Fixed a bug that caused problems with large robots.txt files.</li>
+ <li>Fixed a bug in the sample rundig script from a non-portable
+ test for the age of databases.</li>
+ <li>Fixed bugs in the fuzzy matching code that could prevent
+ searches from completing if fuzzy databases were not present.</li>
+ <li>Fixed bugs in the soundex and metaphone algorithms that
+ would only return the first word of several matching
+ words. <strong>Note</strong> that to completely fix this bug, you must
+ rebuild your soundex and metaphone databases.</li>
+ <li>Fixed up many compilation warnings and errors.</li>
+ <li>Fixed a performance slowdown in htsearch when
+ <a href="attrs.html#backlink_factor">backlink_factor</a> and
+ <a href="attrs.html#date_factor">date_factor</a> are zero and can
+ be ignored.</li>
+ <li>Improved performance when a server ignores the
+ If-Modified-Since request during update digs.</li>
+ <li>Added a warning message if the locale: option is set
+ to a locale that is not present.</li>
+ <li>Some minor performance improvements.</li>
+ <li>Allow "include" keyword in <a href="cf_general.html">config
+ file</a> to include other config files.</li>
+ <li>Uses latest (2.6.4) version of the Berkeley database.</li>
+ <li>Two databases may be merged together using
+ <a href="htmerge.html">htmerge</a>.</li>
+ <li>The <a href="htdig.html">htdig</a> program can be safely
+ stopped and restarted in the middle of a dig. The dig will write
+ the progress to the file specified by the new
+ <a href="attrs.html#url_log">url_log</a> option.</li>
+ <li>Added support for anchors in excerpts with the
+ <a href="attrs.html#add_anchors_to_excerpt">add_anchors_to_excerpt</a>
+ option and the ANCHOR template variable.</li>
+ <li>Added support for sorting results in increasing or
+ decreasing order of document date, size, title and score using
+ the <a href="hts_form.html">search form</a>. Note that changing
+ sort from the default of score will result in a performance
+ decrease.</li>
+ <li>Added config options <a href="attrs.html#sort">sort</a> and
+ <a href="attrs.html#sort_names">sort_names</a> to change the
+ default sort and names used in the SORT template variable.
+ <li>Added the option <a
+ href="attrs.html#compression_level">compression_level</a> to
+ compress the document database if the zlib library is
+ present.</li>
+ <li>Added the options
+ <a href="attrs.html#noindex_start">noindex_start</a> and
+ <a href="attrs.html#noindex_stop">noindex_stop</a> to delimit
+ sections of HTML documents to be ignored.</li>
+ <li>Added the option
+ <a href="attrs.html#allow_in_form">allow_in_form</a> to allow
+ specific config options to be set in the search form.</li>
+ <li>Added the option
+ <a href="attrs.html#bad_querystr">bad_querystr</a> to ingore URLs
+ containing specified CGI queries.</li>
+ <li>Added the option
+ <a href="attrs.html#search_results_wrapper">search_results_wrapper</a>
+ to replace separate header and footer files. For mor
+ information, see the <a href="hts_general.html">general
+ htsearch</a> documentation.</li>
+ <li>Added option
+ <a href="attrs.html#no_title_text">no_title_text</a> to allow
+ configuration of the text used when no title is found.</li>
+ <li>Added option
+ <a href="attrs.html#url_part_aliases">url_part_aliases</a> to allow
+ rewriting portions of URLs.</li>
+ <li>Added option
+ <a href="attrs.html#common_url_parts">common_url_parts</a> to
+ compression common portions of URLs. Requires rebuilding
+ databases when changed.</li>
+ <li>Added option
+ <a href="attrs.html#remove_default_doc">remove_default_doc</a> to
+ control whether ht://Dig strips off the default document in a
+ folder. Set to empty will prevent problems with servers that
+ treat / and /index.html as different URLs.</li>
+ <li>Of course there are many other bug-fixes and small
+ enhancements. Many thanks to everyone who reported a bug or
+ contributed code for this release!</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.0b4</strong> 22 Dec 1998<br>
+ This version fixes a security hole in htnotify. The hole has been
+ present in previous versions but was inadevertently made worse in
+ the 3.1.0 beta releases. Malicious users could contstruct pages
+ that executed commands running under the shell of the user running
+ htnotify. <strong>It is highly recommended that users of previous
+ versions switch to this release.</strong>
+ </p>
+ <ul>
+ <li>Fixed a memory leak in htnotify and htsearch.</li>
+ <li>Updated the contributed parse_word_doc.pl script.</li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.0b3</strong> 15 Dec 1998<br>
+ This version adds only a few features and a significant number of
+ bug fixes. This version has been pretty thoroughly tested. Though
+ there are a few remaining issues, it is hoped that this will be
+ near the end of the beta releases before version 3.1.0. Note that
+ it's recommended to update your databases to eliminate the
+ possibility of subtle changes in the database format.
+ </p>
+ <ul>
+ <li>Fixed a bug which would ignore the proxy settings,
+ introduced in version 3.1.0b2.</li>
+ <li>Fixed a bug where words would remain from deleted
+ documents.</li>
+ <li>Fixed a bug where SGML &lt; was considered part of a tag
+ in the HTML parser, introduced in verison 3.1.0b2.</li>
+ <li>Fixed a bug where empty boolean searches would dump
+ core.</li>
+ <li>Fixed a bug where boolean "and," "or," and "not" would be
+ removed from a search string, causing a sytnax error.</li>
+ <li>Fixed a bug which wouldn't keep track of the hopcounts
+ correctly.</li>
+ <li>Added support for META refresh tags, contributed by Aidas
+ Kasparas</li>
+ <li>Added support for using CGI
+ <a href="http://hoohoo.ncsa.uiuc.edu/cgi/">environment
+ variables</a> in the search templates, contributed by Gilles
+ Detillieux.</li>
+ <li>Improved memory requirements <strong>slightly</strong> through
+ fixing a memory leak in htdig and a general system-wide
+ adjustment.</li>
+ <li>Improved support for multiple exclude and restrict items
+ through htsearch, contributed by William Rhee and Gilles.</li>
+ <li>Improved support to compile under CygWinB20, contributed
+ by Klaus Mueller.</li>
+ <li>Upgraded to the latest version (2.5.9) of the
+ <a href="http://www.sleepycat.com/">Berkeley DB</a>
+ <li>Added a new option
+ <a href="attrs.html#server_wait_time">server_wait_time</a> to
+ give a delay between connections to a server. Currently this
+ can also affect local filesystem digging if set.</li>
+ <li>Added a new option
+ <a href="attrs.html#server_max_docs">server_max_docs</a> to limit
+ the number of documents pulled down from a server in one dig.</li>
+ <li>Added a new option
+ <a href="attrs.html#http_proxy_exclude">http_proxy_exclude</a>
+ to ignore the proxy setting on certain URLs.</li>
+ <li>Added a new option
+ <a href="attrs.html#no_excerpt_show_top">no_excerpt_show_top</a>to
+ show the top of a document when there is no excerpt.</li>
+ <li>Added new options
+ <a href="attrs.html#date_factor">date_factor</a>,
+ <a href="attrs.html#backlink_factor">backlink_factor</a>, and
+ <a href="attrs.html#description_factor">description_factor</a> to
+ improve search rankings. Respectively, they can give higher
+ rankings to more recent documents, documents with a high
+ number of links pointing to them, and documents with relevant
+ URL descriptions pointing to them. See the documentation for
+ more information.</li>
+ <li>Added a set of contributed scripts called multidig to help
+ work with multiple sets of URLs and databases.</li>
+ <li>Fixed many compilation problems under AIX, thanks to
+ Alexander Bergolth!</li>
+ <li>
+ Many other bugs were fixed, so a big thanks to everyone
+ who submitted a bug report, patch or gave other feedback! See the
+ <a href="ChangeLog">ChangeLog</a> for more details.
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.1.0b2</strong> 1 Nov 1998<br>
+ This version adds a few minor features as well as many
+ bugfixes. It is still considered beta as some bug reports have not
+ been fully examined.
+ </p>
+ <ul>
+ <li>
+ Fixed a <strong>major</strong> database corruption
+ problem. Since this bug corrupted the document databases, to
+ completely fix it, you will need to rebuild your databases from
+ scratch.
+ </li>
+ <li>
+ Fixed many problems with the Makefiles and configure
+ scripts. Using <code>./configure --prefix=</code> now works.
+ </li>
+ <li>
+ Added fixes for connection problems with Digital Alpha-based
+ systems contributed by Paul J. Meyer!
+ </li>
+ <li>
+ Added support for syslog-based htsearch logging. See the
+ <a href="attrs.html#logging">config documentation</a> for more
+ details. Thanks to Leo Bergolth for this!
+ </li>
+ <li>
+ Added fixes to work with DNS aliases (as opposed to virtual
+ hosts) through the
+ <a href="attrs.html#server_aliases">server_aliases</a> and
+ <a href="attrs.html#limit_normalized">limit_normalized</a> options
+ as contributed by Leo Bergolth.
+ </li>
+ <li>
+ Added cleanups of the HTML parser and the connection timeout
+ code contributed by Ren&eacute; Seindal.
+ </li>
+ <li>
+ Now supports case insensitive servers through the
+ <a href="attrs.html#case_sensitive">case_sensitive</a> option.
+ </li>
+ <li>
+ Now supports ISO 8601 date format, using the
+ <a href="attrs.html#iso_8601">iso_8601</a> option.
+ </li>
+ <li>
+ Added a wrapper to emulate Exite for Web Servers (EWS)
+ contributed by John Grohol.
+ </li>
+ <li>
+ Added fixes to the contrib whatsnew.pl script to work with DB2
+ contributed by Jacques Reynes.
+ </li>
+ <li>
+ Added a new contributed synonyms file from John Banbury
+ <li>
+ Added a new template variable: CURRENT, the number of the
+ current match, from a patch by Ren&eacute; Seindal.
+ <li>
+ Many other minor bugs were fixed, so a big thanks to everyone
+ who submitted a bug report or a patch! See the
+ <a href="ChangeLog">ChangeLog</a> for more details.
+ </li>
+ </ul>
+ <br>
+
+ <p>
+ <strong>Release notes for htdig-3.1.0b1</strong> 8 Sep
+ 1998<br>
+ This version adds several major new features as well as some
+ bug-fixes. It is considered a beta release since it has only seen
+ limited testing.
+ </p>
+ <blockquote>
+ <p>
+ <font face="Helvetica" size="+1">It is <strong>
+ extremely</strong> important that you rebuild all your databases made
+ with previous versions. This version no longer uses the GDBM database
+ format and databases produced with it will be incompatible with other
+ versions. Do not blame me for anything if you didn't do this. You have
+ been warned...</font>
+ </p>
+ </blockquote>
+ <ul>
+ <li>
+ Added patches made by Pasi Eronen to support local filesystem access
+ </li>
+ <li>
+ Added a PDF parser contributed by Sylvain Wallez
+ </li>
+ <li>
+ Added support for META description and robots tags
+ </li>
+ <li>
+ Converted the database code to use the BerkeleyDB format, contibuted
+ by Esa Ahola and Jesse op den Brouw.
+ </li>
+ <li>
+ Added a prefix fuzzy algorithm, contributed by Esa and Jesse.
+ </li>
+ <li>
+ Various other bugs were fixed. Thanks for all the patches
+ that were sent to me and the mailing list!
+ </li>
+ </ul>
+ <br>
+
+ <p>
+ <strong>Release notes for htdig-3.0.8b2</strong> 15 Aug
+ 1997<br>
+ This new version contains most of the patches that Pasi Eronen
+ has posted to the list plus some other random fixes.
+ </p>
+
+ <p>
+ <strong>Release notes for htdig-3.0.8b1</strong>
+ 27-Apr-1997<br>
+ I consider this a beta release since I have not had time to
+ test everything. Use at your own risk...
+ </p>
+ <ul>
+ <li>
+ Base tag problem fixed
+ </li>
+ <li>
+ URL parser somewhat more robust
+ </li>
+ <li>
+ Date parsing bug fixed
+ </li>
+ <li>
+ Added Substring fuzzy algorithm.
+ </li>
+ <li>
+ Various other bugs were fixed. Thanks for all the patches
+ that were sent to me!
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.7</strong> 12-Jan-1997<br>
+ More bug fixes and some minor new functionality. Hopefully,
+ I'll be able to finish up work on version 3.1 at some point in
+ the near future.<br>
+ I have recently received some more patches for various things,
+ but I have not incorporated those, yet. Next version.
+ </p>
+ <ul>
+ <li>
+ The problem with the missing words has been fixed. This was
+ a problem in the Dictionary class.
+ </li>
+ <li>
+ htsearch is a *lot* faster due to a patch by Esa Ahola.
+ </li>
+ <li>
+ htfuzzy has some work done to it. With the addition of the
+ new rx-1.4 library, the endings algorithm now actually
+ works for languages other than English... It still takes an
+ awfully long time to build the tables for languages with
+ lots of rules.
+ </li>
+ <li>
+ URLs now can be of the dubious form http:foo.html I have
+ never seen this used and think it is bogus, but alas, it
+ works now.
+ </li>
+ <li>
+ A search form can now manually add words to any search
+ using the new <em>keywords</em> form attribute.
+ </li>
+ <li>
+ A problem in the plaintext parser used to cause bogus HTML
+ in search results. This has been fixed.
+ </li>
+ <li>
+ New documentation format. Lots of new documentation, as
+ well.
+ </li>
+ <li>
+ New robotstxt_name attribute. Used to match the
+ 'user-agent' lines in robots.txt files.
+ </li>
+ <li>
+ The &lt;base&gt; tag is now properly supported.
+ </li>
+ <li>
+ Preliminary support for lots of new features, including:
+ <ul>
+ <li>
+ External document parsers. You'll be able to write your
+ own document parser for that special document type that
+ ht://Dig doesn't know about.
+ </li>
+ <li>
+ New fuzzy search algorithms: substring, regex,
+ globbing, etc.
+ </li>
+ </ul>
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.6</strong> 26-Oct-1996<br>
+ Just a single bug fix and one additional feature in this
+ release.
+ </p>
+ <ul>
+ <li>
+ Fixed the problem that caused frequent crashes with virtual
+ memory exhausted.
+ </li>
+ <li>
+ Added a new attribute, keywords_meta_tag_names, which
+ should contain a list of meta tag names for which the
+ content should be used as keywords. The default is set to
+ "keywords htdig-keywords"
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.5</strong> 13-Oct-1996<br>
+ This release consists of more bug fixes.<br>
+ I want to thank Elliot Lee &lt;sopwith@cuc.edu&gt; for his
+ help with tracking down several bugs.
+ </p>
+ <ul>
+ <li>
+ Fixed problem with accent characters. Words with SGML
+ entities and iso-8859-1 characters will now be indexed
+ correctly.
+ </li>
+ <li>
+ Changed the auto configuration to detect the need for a
+ prototype for the gethostname() function. (This was
+ supposed to be fixed before, but wasn't)
+ </li>
+ <li>
+ Reduced the memory requirements for all the programs by
+ changing the rehash() method in the Dictionary class.
+ Access to hashes may be a little slower, but the memory
+ requirements were reduced by a factor 10 or so.
+ </li>
+ <li>
+ Hopefully fixed a problem with the time related functions
+ on certain platforms. More checks are done to make sure the
+ functions that are used are actually available.
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.4</strong> 2-Sep-1996<br>
+ The previous version failed to build under Linux. This should
+ be fixed now.
+ </p>
+ <ul>
+ <li>
+ Fixed problem with the time stuff which caused the build of
+ htdig to fail.
+ </li>
+ <li>
+ Fixed a memory problem in htdig
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.3</strong> 2-Sep-1996<br>
+ Bugs bugs bugs... Will they <em>ever</em> all be found?
+ </p>
+ <p>
+ <strong>NOTE</strong>: I made extensive changes to the htdig.conf file
+ that gets installed. I would advise you to remove or rename
+ your existing htdig.conf and let the installation process
+ create a new one for you that you can then modify.
+ </p>
+ <p>
+ Also, since the rundig script has changed, you should remove
+ the old one before installing ht://Dig. (The installation
+ will refuse to overwrite existing files...)
+ </p>
+ <ul>
+ <li>
+ The problem with htsearch crashing on some machines has
+ been fixed.
+ </li>
+ <li>
+ A bug caused the &lt;AREA&gt; tab to be ignored. Fixed.
+ </li>
+ <li>
+ A bug in SunOS caused dates to be all screwed up.
+ </li>
+ <li>
+ Added lots of comments to the example htdig.conf file. Also
+ added some additional example attributes.
+ </li>
+ <li>
+ Fixed a bug in the installation process which caused rundig
+ to be created incorrectly.
+ </li>
+ <li>
+ Added a sample synonyms file. Also modified rundig to
+ create a synonyms database for it.
+ </li>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0.2</strong> 22-Aug-1996<br>
+ More bug fixes.
+ </p>
+ <ul>
+ <li>
+ Multiple start URLs now actually work. Before they were
+ just documented to work, but didn't actually work.
+ </li>
+ <li>
+ htmerge now will refuse to remove database files if it
+ detects that the call to /bin/sort failed.
+ </li>
+ <li>
+ htmerge can now tell /bin/sort to use a specific temporary
+ directory. This is done by setting the TMPDIR environment
+ variable.
+ </li>
+ <li>
+ htsearch can now search for words with non-ASCII characters
+ in them.
+ </li>
+ <li>
+ Added support for finding URLs in the &lt;frame&gt; and
+ &lt;area&gt; tags.
+ </li>
+ <li>
+ There is a problem with htsearch under Linux. It causes a
+ segmentation violation after the first search result is
+ displayed. Don't know what the problem is, yet.
+ </li>
+ <li>
+ Fixed bug in the auto configuration which always set the
+ value for NEED_PROTO_GETHOSTNAME to 1. For most systems
+ this actually needs to be 0.
+ </li>
+ <li>
+ <strong>Release notes for htdig-3.0.1</strong>
+ 16-Aug-1996<br>
+ This is a maintenance release in response to several bug
+ reports.
+ <ul>
+ <li>
+ htdig now will display a list of errors when the
+ statistics option (-s) is used. The list gives the URL
+ that caused the error and a URL that referred to it.
+ Hopefully this information is useful for site
+ maintainers.
+ </li>
+ <li>
+ Some problems with the SGML character entities were
+ fixed. The major symptom was that the ';' that ends an
+ entity used to be included as well.
+ </li>
+ <li>
+ Major problems with htnotify were fixed. There were
+ many hardcoded things in this program that made it very
+ specific to SDSU and to me.
+ </li>
+ <li>
+ malloc.h should not be included anymore. All references
+ to it were replaced with stdlib.h instead. This should
+ make compiles on some platforms work better.
+ </li>
+ <li>
+ htsearch now will use the CONFIG_DIR environment
+ variable to override the compiled in default. (set in
+ the CONFIG file...) This was done so that htsearch can
+ be called from a simple wrapper that sets that
+ environment variable. Only the wrapper needs to be be
+ modified to get different CONFIG_DIR values.
+ </li>
+ </ul>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0</strong>
+ 17-Jul-1996<br>
+ I decided to make this the <em>official</em> 3.0 release.
+ </p>
+ <blockquote>
+ <blockquote>
+ <font face="Helvetica" size="+1">It is <strong>
+ extremely</strong> important that you remove all traces
+ of earlier beta versions of the software before
+ installing this version or that you install in a
+ completely different location. Do not blame me for
+ anything if you didn't do this. You have been
+ warned...</font>
+ </blockquote>
+ </blockquote>
+ <ul>
+ <li>
+ htwrapper is no more. htsearch is now the CGI program
+ </li>
+ <li>
+ <a href="htsearch.html" target="_top">htsearch</a> now
+ uses templates to display the results. A template is
+ simply a piece of HTML code for a single match. The
+ HTML code includes variables that will be expanded to
+ the various items that are unique to each match, like
+ URL, EXCERPT, TITLE, etc. The template can be selected
+ at search time (through a menu). There are two builtin
+ templates: <code>builtin-short</code> and <tt>
+ builtin-long</code>. The <code>builtin-short</tt> template
+ just lists the stars and title while the <code>
+ builtin-long</code> template lists results in a similar
+ fashion to the way Alta Vista displays results.
+ </li>
+ <li>
+ Many runtime configuration options have been removed
+ and many new ones have been added. Check the
+ <a href="attrs.html">configuration file</a> documentation for
+ details. There are also some enhancements to the format
+ of the configuration file.
+ <ul>
+ <li>
+ Attribute values can now span multiple lines by
+ ending each line that needs to be continued with a
+ backslash ('\'). The file that is specified is read
+ in and all newlines and starting and trailing
+ whitespaces are reduced to a single space. If the
+ file is not found, nothing is included and no error
+ is flagged.<br>
+ Note that the backquote character is used, not the
+ regular quote character.
+ </li>
+ <li>
+ Attribute values can now include the contents of
+ files. Just put the filename in back-quotes. The
+ filename can use the normal variable expansion so
+ that things like:
+ <blockquote>
+ <code>someattribute: `${common_dir}/somefile`</code>
+ </blockquote>
+ </li>
+ </ul>
+ Notable attribute changes:
+ <ul>
+ <li>
+ All the attributes that set the heading text have
+ been removed. These attributes include:
+ <ul>
+ <li>
+ accessed_heading_text
+ </li>
+ <li>
+ datesize_heading_text
+ </li>
+ <li>
+ descriptions_heading_text
+ </li>
+ <li>
+ excerpt_heading_text
+ </li>
+ <li>
+ modified_heading_text
+ </li>
+ <li>
+ score_heading_text
+ </li>
+ <li>
+ size_heading_text
+ </li>
+ <li>
+ url_heading_text
+ </li>
+ <li>
+ wordlist_heading_text
+ </li>
+ <li>
+ field_order
+ </li>
+ </ul>
+ </li>
+ <li>
+ New attributes added:
+ <dl>
+ <dt>
+ <strong>http_proxy</strong>
+ </dt>
+ <dd>
+ Added to support the use of a HTTP proxy server
+ to index documents
+ </dd>
+ <dt>
+ <strong>locale</strong>
+ </dt>
+ <dd>
+ Added to support international character sets
+ </dd>
+ <dt>
+ <strong>match_method</strong>
+ </dt>
+ <dd>
+ New way of specifying if a search is an 'or',
+ 'and', or 'boolean' search
+ </dd>
+ <dt>
+ <strong>matches_per_page</strong>
+ </dt>
+ <dd>
+ The new paged results uses this
+ </dd>
+ <dt>
+ <strong>max_doc_size</strong>
+ </dt>
+ <dd>
+ Limit the size of documents retrieved
+ </dd>
+ <dt>
+ <strong>next_page_text</strong>
+ </dt>
+ <dd>
+ Used in the navigation between pages
+ </dd>
+ <dt>
+ <strong>no_excerpt_text</strong>
+ </dt>
+ <dd>
+ Text displayed if no excerpt was available
+ (this used to be hard-coded)
+ </dd>
+ <dt>
+ <strong>no_next_page_text</strong>
+ </dt>
+ <dd>
+ Used in the navigation between pages
+ </dd>
+ <dt>
+ <strong>no_prev_page_text</strong>
+ </dt>
+ <dd>
+ Used in the navigation between pages
+ </dd>
+ <dt>
+ <strong>prev_page_text</strong>
+ </dt>
+ <dd>
+ Used in the navigation between pages
+ </dd>
+ <dt>
+ <strong>star_patterns</strong>
+ </dt>
+ <dd>
+ Allow different star images to be used
+ depending on the match URL
+ </dd>
+ <dt>
+ <strong>synonym_dictionary</strong>
+ </dt>
+ <dd>
+ Support for the new synonyms fuzzy algorithm
+ </dd>
+ <dt>
+ <strong>synonym_db</strong>
+ </dt>
+ <dd>
+ Support for the new synonyms fuzzy algorithm
+ </dd>
+ <dt>
+ <strong>syntax_error_file</strong>
+ </dt>
+ <dd>
+ HTML file displayed if there was a boolean
+ expression syntax error
+ </dd>
+ <dt>
+ <strong>template_map</strong>
+ </dt>
+ <dd>
+ Used in the support for the new result display
+ templates
+ </dd>
+ <dt>
+ <strong>template_name</strong>
+ </dt>
+ <dd>
+ Sets the default template name
+ </dd>
+ <dt>
+ <strong>text_factor</strong>
+ </dt>
+ <dd>
+ Added to allow normal text to have a variable
+ weight (0, for example...)
+ </dd>
+ </dl>
+ </li>
+ </ul>
+ <ul>
+ <li>
+ Some form tag names have changed. The list of
+ recognized form tags are in the
+ <a href="htsearch.html" target="_top">htsearch</a>
+ documentation.
+ </li>
+ <li>
+ Multiple start urls can be specified as a value to the
+ 'start_url' attribute. This could be combined with the
+ file inclusion to read in a file of URLs to start with.
+ </li>
+ <li>
+ <a href="htdig.html">htdig</a> now sends the 'Referer:'
+ header in HTTP requests so that any link errors will be
+ logged in the server's log files.
+ </li>
+ <li>
+ In addition to the "htdig-keywords" META tag name,
+ <a href="htdig.html">htdig</a> now also supports just
+ "keywords". This is to make it more compatible with the
+ Alta Vista search engine.
+ </li>
+ <li>
+ The verbose display of <a href="htdig.html">htdig</a>
+ was enhanced to show '+' for a link that will be
+ followed and '-' for a link that was discarded.
+ </li>
+ <li>
+ <a href="htmerge.html">htmerge</a> was changed to use
+ the Unix sort program instead of doing its own sorting.
+ It no longer uses mmap() to map the words into memory.
+ This was causing problems on systems with limited
+ virtual memory available. (What??? You mean you DON'T
+ have at least a 1GB disk dedicated to swap???)
+ </li>
+ <li>
+ The Endings algorithm was fixed up to work properly
+ now. There were several well hidden bugs that made the
+ algorithm come up with illegal words.
+ </li>
+ <li>
+ The <strong>synonyms</strong> fuzzy algorithm was
+ added. This is simply a mapping of words to other
+ words. The input file is just a list of words which
+ causes the first word on a line to be mapped to the
+ rest of the words on that line. (We use this to map
+ course abbreviations to full course names)
+ </li>
+ <li>
+ SGML entities are now supported. They are translated to
+ their equivalent ISO-8859-1 encoding.
+ </li>
+ </ul>
+ </ul>
+
+ <p>
+ <strong>Release notes for htdig-3.0b5</strong>
+ </p>
+ <ul>
+ <li>
+ The configuration has changed. There is now a CONFIG
+ file which contains all the variables which control
+ where things get installed. 'make install' will now
+ actually attempt to set everything up with default or
+ example files.<br>
+ Note that some default directories have changed. For
+ example, the default configuration file location is not
+ /usr/local/etc/htdig.conf anymore. Instead it is now
+ defined in terms of CONFIG_DIR.
+ </li>
+ <li>
+ The htfuzzy/createDict.pl Perl program has been
+ obsoleted. Creating the endings database is now done by
+ htfuzzy itself. If you already have endings databases,
+ you don't need to recreate them, they will still work.
+ </li>
+ <li>
+ GNU rx-1.0 is now included with the distribution. This
+ is used by htfuzzy to create the endings databases.
+ </li>
+ <li>
+ The name of the whole search system has changed from
+ <em>HTDig</em> to <em>ht://Dig</em>.
+ </li>
+ <li>
+ The HTML documentation got a big facelift! This
+ includes the new logo for ht://Dig. (Thanks goes to
+ Keith Parks for the Images!)
+ </li>
+ <li>
+ htsearch got a new option '-r' which will allow it to
+ produce raw output. This output can easily parsed by a
+ wrapper program to produce custom HTML or other output
+ for the search results.
+ </li>
+ </ul>
+
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/06/12 13:39:12 $
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/THANKS.html b/debian/htdig/htdig-3.2.0b6/htdoc/THANKS.html
new file mode 100644
index 00000000..3adfecda
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/THANKS.html
@@ -0,0 +1,104 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig: Thanks</title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>ht://Dig: Thanks</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ As with most development projects, contributions come from many
+ people and in many forms. The ht://Dig project would like to
+ thank its many contributors. Omissions are merely accidental,
+ please e-mail the
+ <a href="mailto:htdig-dev@lists.sourceforge.net">htdig-dev</a> list
+ if you have been left out or a contribution is not mentioned.
+ </p>
+<h2>Active Developers (in alphabetical order)</h2>
+<ul>
+<li>Lachlan Andrew for much testing, debugging, continual help and cracking of the whip.</li>
+<li>Gabriele Bartolini for the new htnet and date/time code and continual help.</li>
+<li>Jim Cole for testing, debugging, and being a big help on the mailing lists.</li>
+<li>Loic Dachary for Berkeley DB compression, automake/libtool customization, word database implementation, regression tests, fixes using Purify.</li>
+<li>Gilles Detillieux for uncountable bug-fixes, feature enhancements, RedHat RPMs, and documentation updates.</li>
+<li>Geoff Hutchison for filling in as maintainer, dreaming up implementations, and handling many of the little details.</li>
+<li>Torsten Neuer for a variety of bug-fixes, suggestions, and the contributed PHP wrapper.</li>
+<li>Toivo Pedaste for fixing memory leaks and various bugs and contributing md5 duplicate-detection code.</li>
+<li>Neal Richter for the native Win32 port, libhtdig development, testing and debugging.</li>
+<li>Quim Sanmarti for providing a new htsearch query parser in qtest.</li>
+</ul>
+<h2>Additional Contributors:</h2>
+<ul>
+<li>Sergey Abdurashitov for some nifty htsearch formatting options.</li>
+<li>D.J. Adams for contributing the doc2html external converter.</li>
+<li>Esa Ahloa for the Prefix fuzzy algorithm, for the DB2 backend and many bug fixes.</li>
+<li>Jerome Alet for bugfixes in the htsearch CGI.</li>
+<li>Andy Armstrong for contributing regex-based URL rewriting.</li>
+<li>John Banbury for his contributed synonym file.</li>
+<li>Richard Beton for his rewrite of htnotify, including one message per e-mail address and customizable messages.</li>
+<li>Alexander Bergolth for bug fixes, htsearch logging, and other features.</li>
+<li>Andrew Bishop for many bug fixes and gentle prodding.</li>
+<li>Pontus Borg for fixes under AIX.</li>
+<li>Marcel Bosc for improvements in the word database implementation.</li>
+<li>Lorenzo Campedelli for fixing problems with phrase searching.</li>
+<li>Vadim Chekan for many bug fixes and the new configuration parser.</li>
+<li>Matthew Daniel for nph-CGI support.</li>
+<li>&Aacute;kos D&ouml;m&ouml;t&ouml;r for unaccented search-match code.</li>
+<li>Jennifer Dudley for fixing an infinite loop in htdig's local_urls handling.</li>
+<li>Peter Enderborg for bug fixes in the plaintext parser.</li>
+<li>Pasi Eronen for countless memory patches, virtual hosting, and local file digging.</li>
+<li>Iosif Fettich for improving internationalization support.</li>
+<li>Tim Frost for fixing URLs with single quotes.</li>
+<li>Didier Gautheron for fixing many bugs and general improvements.</li>
+<li>Joshua Gerth for contributing HTTPS support to 3.2 using OpenSSL.</li>
+<li>Peter Gray for contributing support for regex in config attributes.</li>
+<li>Bernhard Griener for argument checks in mystrncasecmp().</li>
+<li>John Grohol for the EWS wrapper script.</li>
+<li>Jason Haar for a fix to htnotify's e-mail header syntax.</li>
+<li>Zvi Har'El for bug reports and patches.</li>
+<li>Werner Hett for discovering a security hole in htnotify.</li>
+<li>Rajendra Inamdar for contributing htsearch collection support.</li>
+<li>Joe Jah for bug fixes and maintaining the ht://Dig patch archive at
+<a href="ftp://sol.ccsf.cc.ca.us/htdig-patches/">
+ ftp://sol.ccsf.cc.ca.us/htdig-patches/</a>.</li>
+<li>Richard Jones for his contributed MS Word external parser.</li>
+<li>Aidas Kasparas for contributing support for META refresh tags.</li>
+<li>Marjolein Katsma for implementing anchors in excerpts and parsing comments and many updates to the documentation.</li>
+<li>Ric Klaren for patches to the Makefiles and help with RPM support.</li>
+<li>Maren Leizoala for countless bug reports and thorough testing.</li>
+<li>David Lippi for testing and improving RTF2HTML.</li>
+<li>Benoit Majeau for contributing additional URL debugging info.</li>
+<li>Robert Marchand for contributing the accents fuzzy algorithm.</li>
+<li>Paul Meyer for fixing connection code on Dec Alpha environments.</li>
+<li>Alexis Mikhailov for contributing better support for local filesystem indexing.</li>
+<li>Hanno Mueller for script_name patch and examples.</li>
+<li>Klaus Mueller for working out a Win32 port using CygWin.</li>
+<li>Marco Nenciarini for the new autotools support and the conditional removal of deprecated include files.</li>
+<li>Hans-Peter Nilsson for many bug fixes and other contributions.</li>
+<li>Tobias Oetiker for fixes to the timeout system.</li>
+<li>Jesse op den Brouw for the DB2 backend and survey and the parse_word_doc script.</li>
+<li>Marc Pohl for 8-bit clean patch for Solaris, other bug fixes.</li>
+<li>Arthur Prokosch for fixing problems with phrase searching.</li>
+<li>William Rhee for improving support with multiple excludes and restricts</li>
+<li>Jacques Reynes for updates to the whatsnew script.</li>
+<li>Robert Ribnitz and the Debian Project for maintaining the <em>man</em> pages.</li>
+<li>Chris-Jason Richards for fixing htnotify under sendmail.</li>
+<li>Phillippe Rochat for fixing whitespace detection in config files.</li>
+<li>Markus R&ouml;hricht for help debugging a database error under CygWin.</li>
+<li><a href="author.html">Andrew Scherpbier</a> for being the original author and leading the project.</li>
+<li>Ren&eacute; Seindal for updates to the HTML parser and the timeout mechanism.</li>
+<li>Benjamin Smedberg for help with the ht://Dig website.</li>
+<li>Jan Sorensen for allowing parsing of 8-bit text without compiler options.</li>
+<li>Sylvain Wallez for the PDF parser.</li>
+<li>Randy Winch for contributing zlib document compression.</li>
+<li>And many others who made bug reports and suggested changes to the code!</li>
+ </ul>
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/06/03 08:19:51 $
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/TODO.html b/debian/htdig/htdig-3.2.0b6/htdoc/TODO.html
new file mode 100644
index 00000000..10026371
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/TODO.html
@@ -0,0 +1,141 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: TODO list
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ TODO list
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ Here is a list of things have been requested or that are in
+ the works. They in no particular order. Tasks that are
+ outstanding are designated with a square. Tasks which are
+ complete but not in the current release yet are designated
+ with a circle, and tasks which have been completed and are in
+ the current release are designated with a bullet.
+ </p>
+ <p>
+ Feature requests and bug reports are always welcome. They should
+ be sent to
+ <a href="mailto:htdig3-bugs@htdig.org">&lt;htdig3-bugs@htdig.org&gt;</a>
+ </p>
+ <ul>
+ <li type="bullet">
+ Redesign the database backend to support additional enhancements:
+ <ul>
+ <li type="bullet">
+ Phrase searching
+ </li>
+ <li type="circle">
+ Field-based searching
+ </li>
+ <li type="bullet">
+ &quot;Collections&quot; of multiple databases
+ </li>
+ <li type="square">
+ Continual indexing
+ </li>
+ <li type="square">
+ Parallel indexing and searching
+ </li>
+ </ul>
+ </li>
+ <li type="bullet">
+ Add support for BSDI make program
+ </li>
+ <li type="bullet">
+ Add support for different transport protocols
+ <ul>
+ <li type="square">
+ Gopher
+ </li>
+ <li type="square">
+ FTP
+ </li>
+ <li type="bullet">
+ local filesystem
+ </li>
+ <li type="bullet">
+ HTTP-PROXY
+ </li>
+ <li type="square">
+ HTTPS
+ </li>
+ <li type="bullet">
+ HTTP/1.1
+ </li>
+ </ul>
+ </li>
+ <li type="square">
+ Better Internationalization
+ <ul>
+ <li type="square">
+ Support for UTF-8
+ </li>
+ <li type="bullet">
+ Allow character translation (e.g. remove accents)
+ </li>
+ </ul>
+ </li>
+ <li type="square">
+ Better examples of configuration and result templates
+ </li>
+ <li type="bullet">
+ Eliminate or detect duplicate documents
+ </li>
+ <li type="bullet">
+ Send one e-mail per author using htnotify
+ </li>
+ <li type="bullet">
+ Allow "external decoders," programs to perform some action on
+ files before parsing.
+ <ul>
+ <li type="bullet">Compress, gzip, bzip2, zlib decoders</li>
+ <li type="square">DVI, TeX -&gt; PS decoders</li>
+ </ul>
+ </li>
+ <li type="bullet">
+ AltaVista style +/- boolean queries
+ </li>
+ <li type="square">
+ Support for browser cookies
+ </li>
+ <li type="square">
+ Search improvements
+ <ul>
+ <li type="bullet">
+ Phrase searching
+ </li>
+ <li type="square">
+ New fuzzy search types
+ <ul>
+ <li type="bullet">
+ Automatic spelling correction
+ </li>
+ <li type="square">
+ Trigram matching
+ </li>
+ <li type="bullet">
+ Regex word match
+ </li>
+ <li type="bullet">
+ Accent stripping
+ </li>
+ </ul>
+ </li>
+ </ul>
+ </li>
+ </ul>
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/05/28 13:15:17 $
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/all.html b/debian/htdig/htdig-3.2.0b6/htdoc/all.html
new file mode 100644
index 00000000..1f625a57
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/all.html
@@ -0,0 +1,137 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Overview of Programs
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Overview of Programs
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ There are several programs in the ht://Dig package.
+ </p>
+ <h3>
+ <a href="htdig.html">htdig</a>
+ </h3>
+ <p>
+ Digging is the first step in creating a search database. This
+ system uses the word <em>digging</em> while other systems call
+ it <em>harvesting</em> or <em>gathering</em>. In the ht://Dig
+ system, the program <a href="htdig.html">htdig</a> performs
+ the information gathering stage. In this process, the program
+ will act as a regular web user, except that it will follow
+ <em>all</em> hyperlinks that it comes across. (Actually, it
+ will not follow all of them, just those that are within the
+ domain it needs to gather information on...)<br>
+ Each document it goes to is examined and all the unique
+ words in this document are extracted and stored.
+ </p>
+ <p>
+ The digging process will <em>only</em> follow links and has
+ no notion of JavaScript, applets, or user-input forms.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </h3>
+ <p>
+ Searching is where the users actually get to use all the
+ information that was gathered during the dig and merge
+ stages. The <a href="htsearch.html" target="_top">
+ htsearch</a> program performs the actual searches. It typically
+ produces <code>HTML</code> output which will be seen by the
+ users, though other text formats could be generated by
+ editing the output templates.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htmerge.html">htmerge</a>
+ </h3>
+ <p>
+ Merging does exactly that--it merges one database
+ into another. In previous versions of ht://Dig, the htmerge
+ program also formed databases for use by htsearch from the
+ htdig output. This process is now largely unnecessary except
+ for removal of invalid URLs which is now done by the htpurge
+ program.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htpurge.html">htpurge</a>
+ </h3>
+ <p>
+ Purging removes documents and the associated words from the
+ databases. This should be done after running htdig to remove
+ invalid URLs, documents marked not to be indexed, old
+ versions of modified documents, etc. You can also specify
+ specific URLs to be removed explicitly by htpurge.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htload.html">htload</a>
+ </h3>
+ <p>
+ Loading involves importing the contents of the databases
+ from formatted ASCII text documents as created by htdump or
+ the -t flag from htdig. This is, of course, destructive by
+ nature and data from the text files will replace any
+ conflicting data in the databases.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htdump.html">htdump</a>
+ </h3>
+ <p>
+ Dumping involves exporting the contents of the databases to
+ formatted ASCII text documents. This can be useful for
+ backups, transferring databases between different operating
+ systems, changing the compression or encodings in the
+ ht://Dig configuration, parsing by external utilities. It is
+ <em>not</em> recommended to edit these files by hand, so be
+ warned! (Minor edits will probably be fine.)
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htstat.html">htstat</a>
+ </h3>
+ <p>
+ The htstat program returns statistics on the databases,
+ similar to the -s flags for some of the programs. In
+ addition, it can return a list of URLs in the databases.
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htnotify.html">htnotify</a>
+ </h3>
+ <p>
+ The ht://Dig system includes a handy reminder service which
+ allows HTML authors to add some ht://Dig specific <a href="meta.html">meta
+ information</a> in HTML documents. This meta information is
+ used to email authors after a specified date. Very useful
+ to maintain lists that contain those annoying &quot;new&quot;
+ graphics with new items. (Hint: Things really aren't all
+ that new anymore after 6 months!)<br>
+ </p>
+ <hr noshade>
+ <h3>
+ <a href="htfuzzy.html">htfuzzy</a>
+ </h3>
+ <p>
+ To allow the searches to use &quot;fuzzy&quot; algorithms to match
+ words, the <a href="htfuzzy.html">htfuzzy</a> program can
+ create indexes for several different algorithms.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:17 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/attrs.html.in b/debian/htdig/htdig-3.2.0b6/htdoc/attrs.html.in
new file mode 100644
index 00000000..16edce17
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/attrs.html.in
@@ -0,0 +1,14606 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ attr_head.html and attr_tail.html -->
+
+<html>
+ <head>
+ <title>ht://Dig: Configuration file attributes</title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>Configuration file format -- Attributes</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Alphabetical list of attributes
+ </h2>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="accents_db">
+ accents_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.accents.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The database file used for the fuzzy "accents" search
+ algorithm. This database is created by
+ <a href="htfuzzy.html">htfuzzy</a> and used by
+ <a href="htsearch.html" target="_top">htsearch</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ accents_db:
+ </td>
+ <td nowrap>
+ ${database_base}.uml.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="accept_language">
+ accept_language</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute allows you to restrict the set of natural languages
+ that are preferred as a response to an HTTP request performed by the
+ digger. This can be done by putting one or more language tags
+ (as defined by RFC 1766) in the preferred order, separated by spaces.
+ By doing this, when the server performs a content negotiation based
+ on the 'accept-language' given by the HTTP user agent, a different
+ content can be shown depending on the value of this attribute. If
+ set to an empty list, no language will be sent and the server default
+ will be returned.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ accept_language:
+ </td>
+ <td nowrap>
+ en-us en it
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="add_anchors_to_excerpt">
+ add_anchors_to_excerpt</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, the first occurrence of each matched
+ word in the excerpt will be linked to the closest
+ anchor in the document. This only has effect if the
+ <strong>EXCERPT</strong> variable is used in the output
+ template and the excerpt is actually going to be displayed.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ add_anchors_to_excerpt:
+ </td>
+ <td nowrap>
+ no
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="allow_double_slash">
+ allow_double_slash</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, strings of multiple slashes ('/') in URL paths
+ will be left intact, rather than being collapsed. This is necessary
+ for some search engine URLs which use slashes to separate fields rather
+ than to separate directory components. However, it can lead to multiple database
+ entries refering to the same file, and it causes '/foo//../' to
+ be equivalent to '/foo/', rather than to '/'.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ allow_double_slash:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="allow_in_form">
+ allow_in_form</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Allows the specified config file attributes to be specified
+ in search forms as separate fields. This could be used to
+ allow form writers to design their own headers and footers
+ and specify them in the search form. Another example would
+ be to offer a menu of search_algorithms in the form.
+ <table>
+ <tr>
+ <td nowrap>
+ <code>
+ &nbsp;&nbsp;&lt;SELECT NAME="search_algorithm"&gt;<br>
+ &nbsp;&nbsp;&lt;OPTION VALUE="exact:1 prefix:0.6 synonyms:0.5 endings:0.1" SELECTED&gt;fuzzy<br>
+ &nbsp;&nbsp;&lt;OPTION VALUE="exact:1"&gt;exact<br>
+ &nbsp;&nbsp;&lt;/SELECT&gt;
+ </code></td>
+ </tr>
+ </table>
+ The general idea behind this is to make an input parameter out
+ of any configuration attribute that's not already automatically
+ handled by an input parameter. You can even make up your own
+ configuration attribute names, for purposes of passing data from
+ the search form to the results output. You're not restricted to
+ the existing attribute names. The attributes listed in the
+ allow_in_form list will be settable in the search form using
+ input parameters of the same name, and will be propagated to
+ the follow-up search form in the results template using template
+ variables of the same name in upper-case.
+ You can also make select lists out of any of these input
+ parameters, in the follow-up search form, using the
+ <a href="#build_select_lists">build_select_lists</a>
+ configuration attribute.
+ <br>WARNING: Extreme care are should be taken with this option, as
+ allowing CGI scripts to set file names can open security holes.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ allow_in_form:
+ </td>
+ <td nowrap>
+ search_algorithm search_results_header
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="allow_numbers">
+ allow_numbers</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, numbers are considered words. This
+ means that searches can be done on strings of digits as well as
+ regular words. All the same rules apply to numbers as
+ to words. This does not cause numbers containing a decimal point or
+ commas to be treated as a single entity.
+ When allow_numbers is false, words are stil
+ allowed to contain digits, but they must also contain at
+ least one alphabetic character or
+ <a href="#extra_word_characters">extra word</a> character.
+ To disallow digits in words, add the digits to
+ <a href="#valid_punctuation">valid_punctuation</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ allow_numbers:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="allow_space_in_url">
+ allow_space_in_url</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, htdig will handle URLs that contain
+ embedded spaces. Technically, this is a violation of
+ RFC 2396, which says spaces should be stripped out
+ (as htdig does by default). However, many web browsers
+ and HTML code generators violate this standard already,
+ so enabling this attribute allows htdig to handle these
+ non-compliant URLs. Even with this attribute set, htdig
+ still strips out all white space (leading, trailing and
+ embedded), except that space characters embedded within
+ the URL will be encoded as %20.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ allow_space_in_url:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="allow_virtual_hosts">
+ allow_virtual_hosts</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.8b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, htdig will index virtual web sites as
+ expected. If false, all URL host names will be
+ normalized into whatever the DNS server claims the IP
+ address to map to. If this option is set to false,
+ there is no way to index either "soft" or "hard"
+ virtual web sites.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ allow_virtual_hosts:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="anchor_target">
+ anchor_target</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When the first matched word in the excerpt is linked
+ to the closest anchor in the document, this string
+ can be set to specify a target in the link so the
+ resulting page is displayed in the desired frame.
+ This value will only be used if the
+ <a href="#add_anchors_to_excerpt">add_anchors_to_excerpt</a>
+ attribute is set to true, the <strong>EXCERPT</strong>
+ variable is used in the output template and the
+ excerpt is actually displayed with a link.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ anchor_target:
+ </td>
+ <td nowrap>
+ body
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="any_keywords">
+ any_keywords</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, the words in the <strong>keywords</strong>
+ input parameter in the search form will be joined with logical
+ ORs rather than ANDs, so that any of the words provided will do.
+ Note that this has nothing to do with limiting the search to
+ words in META keywords tags. See the <a href="hts_form.html">
+ search form</a> documentation for details on this.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ any_keywords:
+ </td>
+ <td nowrap>
+ yes
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="author_factor">
+ author_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Weighting applied to words in a &lt;meta name="author" ... &gt;
+ tag.<br>
+ See also <a href="#heading_factor">heading_factor</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ author_factor:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="authorization">
+ authorization</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This tells htdig to send the supplied
+ <em>username</em><strong>:</strong><em>password</em> with each HTTP request.
+ The credentials will be encoded using the "Basic" authentication
+ scheme. There <em>must</em> be a colon (:) between the username and
+ password.<br>
+ This attribute can also be specified on htdig's command line using
+ the -u option, and will be blotted out so it won't show up in a
+ process listing. If you use it directly in a configuration file,
+ be sure to protect it so it is readable only by you, and do not
+ use that same configuration file for htsearch.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ authorization:
+ </td>
+ <td nowrap>
+ myusername:mypassword
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="backlink_factor">
+ backlink_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 0.1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a weight of "how important" a page is, based on
+ the number of URLs pointing to it. It's actually
+ multiplied by the ratio of the incoming URLs (backlinks)
+ and outgoing URLs (links on the page), to balance out pages
+ with lots of links to pages that link back to them. The ratio
+ gives lower weight to "link farms", which often have many
+ links to them. This factor can
+ be changed without changing the database in any way.
+ However, setting this value to something other than 0
+ incurs a slowdown on search results.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ backlink_factor:
+ </td>
+ <td nowrap>
+ 501.1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="bad_extensions">
+ bad_extensions</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of extensions on URLs which are
+ considered non-parsable. This list is used mainly to
+ supplement the MIME-types that the HTTP server provides
+ with documents. Some HTTP servers do not have a correct
+ list of MIME-types and so can advertise certain
+ documents as text while they are some binary format.
+ If the list is empty, then all extensions are acceptable,
+ provided they pass other criteria for acceptance or rejection.
+ See also <a href="#valid_extensions">valid_extensions</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ bad_extensions:
+ </td>
+ <td nowrap>
+ .foo .bar .bad
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="bad_local_extensions">
+ bad_local_extensions</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ .php .shtml .cgi
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of extensions on URLs which must be retrieved
+ using the URL's true transport mechanism (such as HTTP).
+ If <a href="#local_urls">local_urls</a> is specified, URLs not
+ ending with these extensions may instead be retrieved through
+ the local filesystem for efficiency.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top"><em>No example provided</em></td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="bad_querystr">
+ bad_querystr</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of CGI query strings to be excluded from
+ indexing. This can be used in conjunction with CGI-generated
+ portions of a website to control which pages are
+ indexed.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ bad_querystr:
+ </td>
+ <td nowrap>
+ forum=private section=topsecret&amp;passwd=required
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="bad_word_list">
+ bad_word_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/bad_words
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a file which contains words which should
+ be excluded when digging or searching. This list should
+ include the most common words or other words that you
+ don't want to be able to search on (things like <em>
+ sex</em> or <em>smut</em> are examples of these.)<br>
+ The file should contain one word per line. A sample
+ bad words file is located in the <code>contrib/examples</code>
+ directory.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ bad_word_list:
+ </td>
+ <td nowrap>
+ ${common_dir}/badwords.txt
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="bin_dir">
+ bin_dir</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @bindir@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the directory in which the executables
+ related to ht://Dig are installed. It is never used
+ directly by any of the programs, but other attributes
+ can be defined in terms of this one.
+ <p>
+ The default value of this attribute is determined at
+ compile time.
+ </p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ bin_dir:
+ </td>
+ <td nowrap>
+ /usr/local/bin
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="boolean_keywords">
+ boolean_keywords</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ and or not
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These three strings are used as the keywords used in
+ constructing the
+ <a href="hts_templates.html#LOGICAL_WORDS">LOGICAL_WORDS</a>
+ template variable,
+ and in parsing the <a href="hts_form.html#words">words</a> input
+ parameter when the <a href="hts_form.html#method">method</a>
+ parameter or <a href="#match_method">match_method</a> attribute
+ is set to <code>boolean</code>.
+ See also the
+ <a href="#boolean_syntax_errors">boolean_syntax_errors</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ boolean_keywords:
+ </td>
+ <td nowrap>
+ et ou non
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="boolean_syntax_errors">
+ boolean_syntax_errors</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ Expected
+ &#39;a search word, a quoted phrase or a boolean expression between ()&#39;
+ &#39;at the end&#39; &#39;instead of&#39; &#39;end of expression&#39; quotes
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These six strings are used as the keywords used to
+ construct various syntax error messages for errors encountered in
+ parsing the <a href="hts_form.html#words">words</a> input
+ parameter when the <a href="hts_form.html#method">method</a> parameter
+ or <a href="#match_method">match_method</a> attribute
+ is set to <code>boolean</code>.
+ They are used in conjunction with the
+ <a href="#boolean_keywords">boolean_keywords</a> attribute, and
+ comprise all
+ English-specific parts of these error messages. The order in which
+ the strings are put together may not be ideal, or even gramatically
+ correct, for all languages, but they can be used to make fairly
+ intelligible messages in many languages.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ boolean_syntax_errors:
+ </td>
+ <td nowrap>
+ Attendait "un mot" "&agrave; la fin"
+ "au lieu de" "fin d'expression" "guillemet"
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="build_select_lists">
+ build_select_lists</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This list allows you to define any htsearch input parameter as
+ a select list for use in templates, provided you also define
+ the corresponding name list attribute which enumerates all the
+ choices to put in the list. It can be used for existing input
+ parameters, as well as any you define using the
+ <a href="#allow_in_form">allow_in_form</a>
+ attribute. The entries in this list each consist of an octuple,
+ a set of eight strings defining the variables and how they are to
+ be used to build a select list. The attribute can contain many
+ of these octuples. The strings in the string list are merely
+ taken eight at a time. For each octuple of strings specified in
+ build_select_lists, the elements have the following meaning:
+ <ol>
+ <li>the name of the template variable to be defined as a list,
+ optionally followed by a comma and the type of list, and
+ optional formatting codes
+ <li>the input parameter name that the select list will set
+ <li>the name of the user-defined attribute containing the
+ name list
+ <li>the tuple size used in the name list above
+ <li>the index into a name list tuple for the value
+ <li>the index for the corresponding label on the selector
+ <li>the configuration attribute where the default value for
+ this input parameter is defined
+ <li>the default label, if not an empty string, which will be
+ used as the label for an additional list item for the current
+ input parameter value if it doesn't match any value in the
+ given list
+ </ol>
+ See the <a href="hts_selectors.html">select list documentation</a>
+ for more information on this attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ build_select_lists:
+ </td>
+ <td nowrap>
+
+ MATCH_LIST matchesperpage matches_per_page_list \<br>
+ 1 1 1 matches_per_page "Previous Amount" \<br>
+ RESTRICT_LIST,multiple restrict restrict_names 2 1 2 restrict "" \<br>
+ FORMAT_LIST,radio format template_map 3 2 1 template_name ""
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="caps_factor">
+ caps_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ ??
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ TO BE COMPLETED<br>
+ See also <a href="#heading_factor">heading_factor</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ caps_factor:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="case_sensitive">
+ case_sensitive</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies whether ht://Dig should consider URLs
+ case-sensitive or not. If your server is case-insensitive,
+ you should probably set this to false. <br>
+ Even if this is false,
+ <a href="#common_url_parts">common_url_parts</a>,
+ <a href="#url_part_aliases">url_part_aliases</a> and
+ <a href="#url_rewrite_rules">url_rewrite_rules</a>
+ are all still case sensitive, and
+ <a href="#server_aliases">server_aliases</a>
+ is still case insensitive.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ case_sensitive:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="check_unique_date">
+ check_unique_date</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Include the modification date of the page in the MD5 hash, to reduce the
+ problem with identical but physically separate pages in different parts of the tree pointing to
+ different pages.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ check_unique_date:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="check_unique_md5">
+ check_unique_md5</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Uses the MD5 hash of pages to reject aliases, prevents multiple entries
+ in the index caused by such things as symbolic links
+ Note: May not do the right thing for incremental update
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ check_unique_md5:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="collection_names">
+ collection_names</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of config file names that are used for searching multiple databases.
+ Simply put, htsearch will loop through the databases specified by each of these config
+ files and present the result of the search on all of the databases.
+ The corresponding config files are looked up in the <a href="#config_dir">config_dir</a> directory.
+ Each listed config file <strong>must</strong> exist, as well as the corresponding databases.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ collection_names:
+ </td>
+ <td nowrap>
+ htdig_docs htdig_bugs
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="common_dir">
+ common_dir</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @COMMON_DIR@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the directory for files that will or can be
+ shared among different search databases. The default
+ value for this attribute is defined at compile time.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ common_dir:
+ </td>
+ <td nowrap>
+ /tmp
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="common_url_parts">
+ common_url_parts</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .shtml /index.html /index.htm .com/ .com mailto:
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Sub-strings often found in URLs stored in the
+ database. These are replaced in the database by an
+ internal space-saving encoding. If a string
+ specified in <a href="#url_part_aliases">url_part_aliases</a>,
+ overlaps any string in common_url_parts, the
+ common_url_parts string is ignored.<br>
+ Note that when this attribute is changed, the
+ database should be rebuilt, unless the effect of
+ "changing" the affected URLs in the database is
+ wanted.<br>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ common_url_parts:
+ </td>
+ <td nowrap>
+ http://www.htdig.org/ml/ \<br>
+.html \<br>
+http://dev.htdig.org/ \<br>
+http://www.htdig.org/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="compression_level">
+ compression_level</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 6
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If non-zero and the
+ <a href="http://www.cdrom.com/pub/infozip/zlib/">zlib</a>
+ compression library was available when compiled,
+ this attribute controls the amount of compression used in the
+ <a href="#doc_excerpt">doc_excerpt</a> file.
+ <br/>This must be in the range 0-9, and must be non-zero when
+ <a href="#wordlist_compress_zlib">wordlist_compress_zlib</a>
+ is used.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ compression_level:
+ </td>
+ <td nowrap>
+ 0
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="config">
+ config</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ ??
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Name of configuration file to load.
+ For security reasons, restrictions are placed on the values which
+ can be specified on the command line to
+ <a href="htsearch.html" target="_top">htsearch</a>.
+ The default value of this attribute is determined at
+ compile time.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top"><em>No example provided</em></td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="config_dir">
+ config_dir</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @CONFIG_DIR@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the directory which contains all configuration
+ files related to ht://Dig. It is never used
+ directly by any of the programs, but other attributes
+ or the <a href="#include">include</a> directive
+ can be defined in terms of this one.
+ <p>
+ The default value of this attribute is determined at
+ compile time.
+ </p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ config_dir:
+ </td>
+ <td nowrap>
+ /var/htdig/conf
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="content_classifier">
+ content_classifier</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#bin_dir">bin_dir</a>}/HtFileType
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When ht://Dig can't determine the type of a <code>file://</code>
+ URL from its extension, this program is used to determine the type.
+ The program is called with one argument, the name of (possibly a
+ temporary copy of) the file.
+ <p>
+ See also <a href="#mime_types">mime_types</a>.
+ </p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ content_classifier:
+ </td>
+ <td nowrap>
+ file -i -b
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="cookies_input_file">
+ cookies_input_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the location of the file used for importing cookies
+ for the crawl. These cookies will be preloaded into htdig's
+ in-memory cookie jar, but aren't written back to the file.
+ Cookies are specified according to Netscape's format
+ (tab-separated fields). If this attribute is left blank,
+ no cookie file will be read.
+ For more information, see the sample cookies.txt file in the
+ ht://Dig source distribution.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ cookies_input_file:
+ </td>
+ <td nowrap>
+ ${common_dir}/cookies.txt
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="create_image_list">
+ create_image_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, a file with all the image URLs that
+ were seen will be created, one URL per line. This list
+ will not be in any order and there will be lots of
+ duplicates, so after htdig has completed, it should be
+ piped through <code>sort -u</code> to get a unique list.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ create_image_list:
+ </td>
+ <td nowrap>
+ yes
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="create_url_list">
+ create_url_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, a file with all the URLs that were seen
+ will be created, one URL per line. This list will not
+ be in any order and there will be lots of duplicates,
+ so after htdig has completed, it should be piped
+ through <code>sort -u</code> to get a unique list.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ create_url_list:
+ </td>
+ <td nowrap>
+ yes
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="database_base">
+ database_base</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_dir">database_dir</a>}/db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the common prefix for files that are specific
+ to a search database. Many different attributes use
+ this prefix to specify filenames. Several search
+ databases can share the same directory by just changing
+ this value for each of the databases.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ database_base:
+ </td>
+ <td nowrap>
+ ${database_dir}/sales
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="database_dir">
+ database_dir</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @DATABASE_DIR@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the directory which contains all database and
+ other files related to ht://Dig. It is never used
+ directly by any of the programs, but other attributes
+ are defined in terms of this one.
+ <p>
+ The default value of this attribute is determined at
+ compile time.
+ </p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ database_dir:
+ </td>
+ <td nowrap>
+ /var/htdig
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="date_factor">
+ date_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 0
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This factor, gives higher
+ rankings to newer documents and lower rankings to older
+ documents. Before setting this factor, it's advised to
+ make sure your servers are returning accurate dates
+ (check the dates returned in the long format).
+ Additionally, setting this to a nonzero value incurs a
+ small performance hit on searching.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ date_factor:
+ </td>
+ <td nowrap>
+ 0.35
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="date_format">
+ date_format</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This format string determines the output format for
+ modification dates of documents in the search results.
+ It is interpreted by your system's <em>strftime</em>
+ function. Please refer to your system's manual page
+ for this function, for a description of available
+ format codes. If this format string is empty, as it
+ is by default,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ will pick a format itself. In this case, the <a
+ href="#iso_8601">iso_8601</a> attribute can be used
+ to modify the appearance of the date.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ date_format:
+ </td>
+ <td nowrap>
+ %Y-%m-%d
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="description_factor">
+ description_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 150
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Plain old "descriptions" are the text of a link pointing
+ to a document. This factor gives weight to the words of
+ these descriptions of the document. Not surprisingly,
+ these can be pretty accurate summaries of a document's
+ content. See also <a href="#heading_factor">heading_factor</a>
+ and <a href="#meta_description_factor">meta_description_factor</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ description_factor:
+ </td>
+ <td nowrap>
+ 350
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="description_meta_tag_names">
+ description_meta_tag_names</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ description
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The words in this list are used to search for descriptions in HTML
+ <em>META</em> tags. This list can contain any number of strings
+ that each will be seen as the name for whatever description
+ convention is used. While words in any of the specified
+ description contents will be indexed, only the last meta tag
+ containing a description will be kept for the
+ <a href="hts_templates.html#METADESCRIPTION"METADESCRIPTION</a>
+ variable in search results. The order in
+ which the names are specified in this configuration attribute
+ is irrelevant, as it is the order in which the tags appear in
+ the documents that matters.<br> The <em>META</em> tags have the
+ following format:<br>
+ <tt> &nbsp;&nbsp;&lt;META name="<em>somename</em>"
+ content="<em>somevalue</em>"&gt; </tt><br>
+ See also <a href="#meta_description_factor">meta_description_factor</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ description_meta_tag_names:
+ </td>
+ <td nowrap>
+ "description htdig-description"
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="disable_cookies">
+ disable_cookies</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This option, if set to true, will disable HTTP cookies.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ disable_cookies:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="doc_db">
+ doc_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.docdb
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file will contain a Berkeley database of documents
+ indexed by document number. It contains all the information
+ gathered for each document, except the document excerpts
+ which are stored in the <a href="#doc_excerpt"><em>
+ doc_excerpt</em></a> file.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ doc_db:
+ </td>
+ <td nowrap>
+ ${database_base}documents.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="doc_excerpt">
+ doc_excerpt</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.excerpts
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file will contain a Berkeley database of document excerpts
+ indexed by document number. It contains all the text
+ gathered for each document, so this file can become
+ rather large if <a href="#max_head_length"><em>
+ max_head_length</em></a> is set to a large value.
+ The size can be reduced by setting the
+ <a href="#compression_level"><em>compression_level</em></a>,
+ if supported on your system.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ doc_excerpt:
+ </td>
+ <td nowrap>
+ ${database_base}excerpts.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="doc_index">
+ doc_index</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.docs.index
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file contains a mapping of document numbers to URLs and is
+ used by htdig during indexing. It is used on updates if it exists.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ doc_index:
+ </td>
+ <td nowrap>
+ documents.index.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="doc_list">
+ doc_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htdump.html">htdump</a>,
+ <a href="htload.html">htload</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.docs
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file is basically a text version of the file
+ specified in <em><a href="#doc_db">doc_db</a></em>. Its
+ only use is to have a human readable database of all
+ documents. The file is easy to parse with tools like
+ perl or tcl.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ doc_list:
+ </td>
+ <td nowrap>
+ /tmp/documents.text
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endday">
+ endday</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Day component of last date allowed as last-modified date
+ of returned docutments.
+ This is most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.
+ See also <a href="#startyear">startyear</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endday:
+ </td>
+ <td nowrap>
+ 31
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="end_ellipses">
+ end_ellipses</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;strong&gt;&lt;code&gt; ...&lt;/code&gt;&lt;/strong&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When excerpts are displayed in the search output, this
+ string will be appended to the excerpt if there is text
+ following the text displayed. This is just a visual
+ reminder to the user that the excerpt is only part of
+ the complete document.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ end_ellipses:
+ </td>
+ <td nowrap>
+ ...
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="end_highlight">
+ end_highlight</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;/strong&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When excerpts are displayed in the search output, matched
+ words will be highlighted using <a href="#start_highlight">
+ start_highlight</a> and this string.
+ You should ensure that highlighting tags are balanced,
+ that is, this string should close any formatting
+ tag opened by start_highlight.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ end_highlight:
+ </td>
+ <td nowrap>
+ &lt;/font&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endings_affix_file">
+ endings_affix_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/english.aff
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the location of the file which contains the
+ affix rules used to create the endings search algorithm
+ databases. Consult the documentation on
+ <a href="htfuzzy.html">htfuzzy</a> for more information on the
+ format of this file.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endings_affix_file:
+ </td>
+ <td nowrap>
+ /var/htdig/affix_rules
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endings_dictionary">
+ endings_dictionary</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/english.0
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the location of the file which contains the
+ dictionary used to create the endings search algorithm
+ databases. Consult the documentation on
+ <a href="htfuzzy.html">htfuzzy</a> for more information on the
+ format of this file.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endings_dictionary:
+ </td>
+ <td nowrap>
+ /var/htdig/dictionary
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endings_root2word_db">
+ endings_root2word_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/root2word.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attributes specifies the database filename to be
+ used in the 'endings' fuzzy search algorithm. The
+ database maps word roots to all legal words with that
+ root. For more information about this and other fuzzy
+ search algorithms, consult the
+ <a href="htfuzzy.html">htfuzzy</a> documentation.<br>
+ Note that the default value uses the
+ <a href="#common_dir">common_dir</a> attribute instead of the
+ <a href="#database_dir">database_dir</a> attribute.
+ This is because this database can be shared with
+ different search databases.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endings_root2word_db:
+ </td>
+ <td nowrap>
+ /var/htdig/r2w.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endings_word2root_db">
+ endings_word2root_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/word2root.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attributes specifies the database filename to be
+ used in the 'endings' fuzzy search algorithm. The
+ database maps words to their root. For more information
+ about this and other fuzzy search algorithms, consult
+ the <a href="htfuzzy.html">htfuzzy</a>
+ documentation.<br>
+ Note that the default value uses the
+ <a href="#common_dir">common_dir</a> attribute instead of the
+ <a href="#database_dir">database_dir</a> attribute.
+ This is because this database can be shared with
+ different search databases.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endings_word2root_db:
+ </td>
+ <td nowrap>
+ /var/htdig/w2r.bm
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endmonth">
+ endmonth</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Month component of last date allowed as last-modified date
+ of returned docutments.
+ This is most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.
+ See also <a href="#startyear">startyear</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endmonth:
+ </td>
+ <td nowrap>
+ 12
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="endyear">
+ endyear</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Year component of last date allowed as last-modified date
+ of returned docutments.
+ This is most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.
+ See also <a href="#startyear">startyear</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ endyear:
+ </td>
+ <td nowrap>
+ 2002
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="excerpt_length">
+ excerpt_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 300
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the maximum number of characters the displayed
+ excerpt will be limited to. The first matched word will
+ be highlighted in the middle of the excerpt so that there is
+ some surrounding context.<br>
+ The <em><a href="#start_ellipses">
+ start_ellipses</a></em> and
+ <em><a href="#end_ellipses">end_ellipses</a></em> are used to
+ indicate that the document contains text before and
+ after the displayed excerpt respectively.
+ The <em><a href="#start_highlight">start_highlight</a></em> and
+ <em><a href="#end_highlight">end_highlight</a></em> are used to
+ specify what formatting tags are used to highlight matched words.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ excerpt_length:
+ </td>
+ <td nowrap>
+ 500
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="excerpt_show_top">
+ excerpt_show_top</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, the excerpt of a match will always show
+ the top of the matching document. If it is false (the
+ default), the excerpt will attempt to show the part of
+ the document that actually contains one of the words.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ excerpt_show_top:
+ </td>
+ <td nowrap>
+ yes
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="exclude">
+ exclude</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If a URL contains any of the space separated patterns, it will be
+ discarded in the searching phase. This is used to exclude certain
+ URLs from search results. The list can be specified from within
+ the configuration file, and can be overridden with the "exclude"
+ input parameter in the search form.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ exclude:
+ </td>
+ <td nowrap>
+ myhost.com/mailarchive/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="exclude_urls">
+ exclude_urls</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ /cgi-bin/ .cgi
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If a URL contains any of the space separated patterns,
+ it will be rejected. This is used to exclude such
+ common things such as an infinite virtual web-tree
+ which start with cgi-bin.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ exclude_urls:
+ </td>
+ <td nowrap>
+ students.html cgi-bin
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="external_parsers">
+ external_parsers</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.7 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute is used to specify a list of
+ content-type/parsers that are to be used to parse
+ documents that cannot by parsed by any of the internal
+ parsers. The list of external parsers is examined
+ before the builtin parsers are checked, so this can be
+ used to override the internal behavior without
+ recompiling htdig.<br>
+ The external parsers are specified as pairs of
+ strings. The first string of each pair is the
+ content-type that the parser can handle while the
+ second string of each pair is the path to the external
+ parsing program. If quoted, it may contain parameters,
+ separated by spaces.<br>
+ External parsing can also be done with external
+ converters, which convert one content-type to
+ another. To do this, instead of just specifying
+ a single content-type as the first string
+ of a pair, you specify two types, in the form
+ <em>type1</em><strong>-&gt;</strong><em>type2</em>,
+ as a single string with no spaces. The second
+ string will define an external converter
+ rather than an external parser, to convert
+ the first type to the second. If the second
+ type is <strong>user-defined</strong>, then
+ it's up to the converter script to put out a
+ "Content-Type:&nbsp;<em>type</em>" header followed
+ by a blank line, to indicate to htdig what type it
+ should expect for the output, much like what a CGI
+ script would do. The resulting content-type must
+ be one that htdig can parse, either internally,
+ or with another external parser or converter.<br>
+ Only one external parser or converter can be
+ specified for any given content-type. However,
+ an external converter for one content-type can be
+ chained to the internal parser for the same type,
+ by appending <strong>-internal</strong> to the
+ second type string (e.g. text/html->text/html-internal)
+ to perform external preprocessing on documents of
+ this type before internal parsing.
+ There are two internal parsers, for text/html and
+ text/plain.<p>
+ The parser program takes four command-line
+ parameters, not counting any parameters already
+ given in the command string:<br>
+ <em>infile content-type URL configuration-file</em><br>
+ <table border="1">
+ <tr>
+ <th> Parameter </th>
+ <th> Description </th>
+ <th> Example </th>
+ </tr>
+ <tr>
+ <td valign="top"> infile </td>
+ <td> A temporary file with the contents to be parsed. </td>
+ <td> /var/tmp/htdext.14242 </td>
+ </tr>
+ <tr>
+ <td valign="top"> content-type </td>
+ <td> The MIME-type of the contents. </td>
+ <td> text/html </td>
+ </tr>
+ <tr>
+ <td valign="top"> URL </td>
+ <td> The URL of the contents. </td>
+ <td> http://www.htdig.org/attrs.html </td>
+ </tr>
+ <tr>
+ <td valign="top"> configuration-file </td>
+ <td> The configuration-file in effect. </td>
+ <td> /etc/htdig/htdig.conf </td>
+ </tr>
+ </table><p>
+ The external parser is to write information for
+ htdig on its standard output. Unless it is an
+ external converter, which will output a document
+ of a different content-type, then its output must
+ follow the format described here.<br>
+ The output consists of records, each record terminated
+ with a newline. Each record is a series of (unless
+ expressively allowed to be empty) non-empty tab-separated
+ fields. The first field is a single character
+ that specifies the record type. The rest of the fields
+ are determined by the record type.
+ <table border="1">
+ <tr>
+ <th> Record type </th>
+ <th> Fields </th>
+ <th> Description </th>
+ </tr>
+ <tr>
+ <th rowspan="3" valign="top"> w </th>
+ <td valign="top"> word </td>
+ <td> A word that was found in the document. </td>
+ </tr>
+ <tr>
+ <td valign="top"> location </td>
+ <td>
+ A number indicating the normalized location of
+ the word within the document. The number has to
+ fall in the range 0-1000 where 0 means the top of
+ the document.
+ </td>
+ </tr>
+ <tr>
+ <td valign="top"> heading level </td>
+ <td>
+ A heading level that is used to compute the
+ weight of the word depending on its context in
+ the document itself. The level is in the range of
+ 0-11 and are defined as follows:
+ <dl compact>
+ <dt> 0 </dt> <dd> Normal text </dd>
+ <dt> 1 </dt> <dd> Title text </dd>
+ <dt> 2 </dt> <dd> Heading 1 text </dd>
+ <dt> 3 </dt> <dd> Heading 2 text </dd>
+ <dt> 4 </dt> <dd> Heading 3 text </dd>
+ <dt> 5 </dt> <dd> Heading 4 text </dd>
+ <dt> 6 </dt> <dd> Heading 5 text </dd>
+ <dt> 7 </dt> <dd> Heading 6 text </dd>
+ <dt> 8 </dt> <dd> text alternative to images </dd>
+ <dt> 9 </dt> <dd> Keywords </dd>
+ <dt> 10 </dt> <dd> Meta-description </dd>
+ <dt> 11 </dt> <dd> Author </dd>
+ </dl>
+ </td>
+ </tr>
+ <tr>
+ <th rowspan="2" valign="top"> u </th>
+ <td valign="top"> document URL </td>
+ <td>
+ A hyperlink to another document that is
+ referenced by the current document. It must be
+ complete and non-relative, using the URL parameter to
+ resolve any relative references found in the document.
+ </td>
+ </tr>
+ <tr>
+ <td valign="top"> hyperlink description </td>
+ <td>
+ For HTML documents, this would be the text
+ between the &lt;a href...&gt; and &lt;/a&gt;
+ tags.
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> t </th>
+ <td valign="top"> title </td>
+ <td> The title of the document </td>
+ </tr>
+ <tr>
+ <th valign="top"> h </th>
+ <td valign="top"> head </td>
+ <td>
+ The top of the document itself. This is used to
+ build the excerpt. This should only contain
+ normal ASCII text
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> a </th>
+ <td valign="top"> anchor </td>
+ <td>
+ The label that identifies an anchor that can be
+ used as a target in an URL. This really only
+ makes sense for HTML documents.
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> i </th>
+ <td valign="top"> image URL </td>
+ <td>
+ An URL that points at an image that is part of
+ the document.
+ </td>
+ </tr>
+ <tr>
+ <th rowspan="3" valign="top"> m </th>
+ <td valign="top"> http-equiv </td>
+ <td>
+ The HTTP-EQUIV attribute of a
+ <a href="meta.html"><em>META</em> tag</a>.
+ May be empty.
+ </td>
+ </tr>
+ <tr>
+ <td valign="top"> name </td>
+ <td>
+ The NAME attribute of this
+ <a href="meta.html"><em>META</em> tag</a>.
+ May be empty.
+ </td>
+ </tr>
+ <tr>
+ <td valign="top"> contents </td>
+ <td>
+ The CONTENTS attribute of this
+ <a href="meta.html"><em>META</em> tag</a>.
+ May be empty.
+ </td>
+ </tr>
+ </table>
+ <p><em>See also FAQ questions <a href="FAQ.html#q4.8">4.8</a> and
+ <a href="FAQ.html#q4.9">4.9</a> for more examples.</em></p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ external_parsers:
+ </td>
+ <td nowrap>
+ text/html /usr/local/bin/htmlparser \<br>
+ application/pdf /usr/local/bin/parse_doc.pl \<br>
+ application/msword-&gt;text/plain "/usr/local/bin/mswordtotxt -w" \<br>
+ application/x-gunzip-&gt;user-defined /usr/local/bin/ungzipper
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="external_protocols">
+ external_protocols</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute is a bit like
+ <a href="#external_parsers">external_parsers</a> since it specifies
+ a list of protocols/handlers that are used to download documents
+ that cannot be retrieved using the internal methods. This enables
+ htdig to index documents with URL schemes it does not understand,
+ or to use more advanced authentication for the documents it is
+ retrieving. This list is checked before HTTP or other methods,
+ so this can override the internal behavior without writing additional
+ code for htdig.<br>
+ The external protocols are specified as pairs of strings, the first
+ being the URL scheme that the script can handle while the second
+ is the path to the script itself. If the second is
+ quoted, then additional command-line arguments may be given.<br>
+ If the external protocol does not contain a colon (:), it is assumed
+ to have the standard format
+ "protocol://[usr[:password]@]address[:port]/path".
+ If it ends with a colon, then it is assumed to have the simpler format
+ "protocol:path". If it ends with "://" then the standard form is
+ again assumed. <br>
+ The program takes three command-line parameters, not counting any
+ parameters already given in the command string:<br>
+ <em>protocol URL configuration-file</em><br>
+ <table border="1">
+ <tr>
+ <th> Parameter </th>
+ <th> Description </th>
+ <th> Example </th>
+ </tr>
+ <tr>
+ <td valign="top"> protocol </td>
+ <td> The URL scheme to be used. </td>
+ <td> https </td>
+ </tr>
+ <tr>
+ <td valign="top"> URL </td>
+ <td> The URL to be retrieved. </td>
+ <td> https://www.htdig.org:8008/attrs.html </td>
+ </tr>
+ <tr>
+ <td valign="top"> configuration-file </td>
+ <td> The configuration-file in effect. </td>
+ <td> /etc/htdig/htdig.conf </td>
+ </tr>
+ </table><p>
+ The external protocol script is to write information for htdig on the
+ standard output. The output must follow the form described here. The
+ output consists of a header followed by a blank line, followed by
+ the contents of the document. Each record in the header is terminated
+ with a newline. Each record is a series of (unless expressively
+ allowed to be empty) non-empty tab-separated fields. The first field
+ is a single character that specifies the record type. The rest of
+ the fields are determined by the record type.
+ <table border="1">
+ <tr>
+ <th> Record type </th>
+ <th> Fields </th>
+ <th> Description </th>
+ </tr>
+ <tr>
+ <th valign="top"> s </th>
+ <td valign="top"> status code </td>
+ <td>
+ An HTTP-style status code, e.g. 200, 404. Typical codes include:
+ <dl compact>
+ <dt> 200 </dt>
+ <dd> Successful retrieval </dd>
+ <dt> 304 </dt>
+ <dd>
+ Not modified (for example, if the document hasn't
+ changed since the last dig)
+ </dd>
+ <dt> 301 </dt>
+ <dd> Redirect (to another URL) </dd>
+ <dt> 401 </dt>
+ <dd> Not authorized </dd>
+ <dt> 404 </dt>
+ <dd> Not found </dd>
+ </dl>
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> r </th>
+ <td valign="top"> reason </td>
+ <td>
+ A text string describing the status code,
+ e.g "Redirect" or "Not Found."
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> m </th>
+ <td valign="top"> status code </td>
+ <td>
+ The modification time of this document. While the code is
+ fairly flexible about the time/date formats it accepts, it
+ is recommended to use something standard, like
+ RFC1123: Sun, 06 Nov 1994 08:49:37 GMT, or
+ ISO-8601: 1994-11-06 08:49:37 GMT.
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> t </th>
+ <td valign="top"> content-type </td>
+ <td>
+ A valid MIME type for the document, like text/html or text/plain.
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> l </th>
+ <td valign="top"> content-length </td>
+ <td>
+ The length of the document on the server, which may not
+ necessarily be the length of the buffer returned.
+ </td>
+ </tr>
+ <tr>
+ <th valign="top"> u </th>
+ <td valign="top"> url </td>
+ <td>
+ The URL of the document, or in the case of a redirect, the
+ URL that should be indexed as a result of the redirect.
+ </td>
+ </tr>
+ </table>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ external_protocols:
+ </td>
+ <td nowrap>
+ https /usr/local/bin/handler.pl \<br>
+ ftp /usr/local/bin/ftp-handler.pl
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="extra_word_characters">
+ extra_word_characters</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These characters are considered part of a word.
+ In contrast to the characters in the
+ <a href="#valid_punctuation">valid_punctuation</a>
+ attribute, they are treated just like letter
+ characters. See also the <a href="#allow_numbers">allow_numbers</a>
+ attribute.<br>
+ Note that the <a href="#locale">locale</a> attribute
+ is normally used to configure which characters
+ constitute letter characters.<br>
+ Note also that it is an error to have characters in both
+ extra_word_characters and
+ <a href="#valid_punctuation">valid_punctuation</a>.
+ To add one of the characters in the default valid_punctuation to
+ extra_word_characters, an explicit valid_punctuation entry must be
+ added to the configuration file.<br>
+ See also the comments about special characters at
+ <a href="#valid_punctuation">valid_punctuation</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ extra_word_characters:
+ </td>
+ <td nowrap>
+ _
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="head_before_get">
+ head_before_get</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, an HTTP/1.1 <em>HEAD</em>
+ call is made in order to retrieve header information about a document.
+ If the status code and the content-type returned show that the
+ document is parsable, then a subsequent 'GET' call is made. In
+ general, it is recommended that this attribute be set to 'true',
+ as it can really improve performance (especially when used with
+ persistent connections). This is particularly so during an
+ incremental dig, since in this case 'htdig' can ask the server if the
+ document has been modified since last dig. However there are a few
+ cases when it is better to switch it off:
+ <ul>
+ <li>the majority of documents are parsable (HTML or a type for which
+ an external parser has been provided) and must be retrieved anyway
+ (initial dig);</li>
+ <li>the server does not support the HEAD method or it is
+ disabled;</li>
+ <li>in some cases <a href="#persistent_connections">persistent_connections</a> may
+ not work properly and either the 'head_before_get' attribute or the
+ 'persistent_connections' attribute must be turned off.</li>
+ </ul>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ head_before_get:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="heading_factor">
+ heading_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 5
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a factor which will be used to multiply the
+ weight of words between &lt;h1&gt; and &lt;/h1&gt;
+ tags, as well as headings of levels &lt;h2&gt; through
+ &lt;h6&gt;. It is used to assign the level of importance
+ to headings. Setting a factor to 0 will cause words
+ in these headings to be ignored. The number may be a
+ floating point number. See also
+ <a href="#author_factor">author_factor</a>
+ <a href="#backlink_factor">backlink_factor</a>
+ <a href="#caps_factor">caps_factor</a>
+ <a href="#date_factor">date_factor</a>
+ <a href="#description_factor">description_factor</a>
+ <a href="#keywords_factor">keywords_factor</a>
+ <a href="#meta_description_factor">meta_description_factor</a>
+ <a href="#text_factor">text_factor</a>
+ <a href="#title_factor">title_factor</a>
+ <a href="#url_text_factor">url_text_factor</a>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ heading_factor:
+ </td>
+ <td nowrap>
+ 20
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="htnotify_prefix_file">
+ htnotify_prefix_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the file containing text to be inserted in each mail
+ message sent by htnotify before the list of expired webpages. If omitted,
+ nothing is inserted.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ htnotify_prefix_file:
+ </td>
+ <td nowrap>
+ ${common_dir}/notify_prefix.txt
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="htnotify_replyto">
+ htnotify_replyto</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the email address that htnotify email messages
+ include in the Reply-to: field.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ htnotify_replyto:
+ </td>
+ <td nowrap>
+ design-group@foo.com
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="htnotify_sender">
+ htnotify_sender</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ webmaster@www
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the email address that htnotify email
+ messages get sent out from. The address is forged using
+ /usr/lib/sendmail. Check htnotify/htnotify.cc for
+ detail on how this is done.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ htnotify_sender:
+ </td>
+ <td nowrap>
+ bigboss@yourcompany.com
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="htnotify_suffix_file">
+ htnotify_suffix_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the file containing text to be inserted in each mail message
+ sent by htnotify after the list of expired webpages. If omitted, htnotify
+ will insert a standard message.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ htnotify_suffix_file:
+ </td>
+ <td nowrap>
+ ${common_dir}/notify_suffix.txt
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="htnotify_webmaster">
+ htnotify_webmaster</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ht://Dig Notification Service
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This provides a name for the From field, in addition to the email
+ address for the email messages sent out by htnotify.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ htnotify_webmaster:
+ </td>
+ <td nowrap>
+ Notification Service
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="http_proxy">
+ http_proxy</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When this attribute is set, all HTTP document
+ retrievals will be done using the HTTP-PROXY protocol.
+ The URL specified in this attribute points to the host
+ and port where the proxy server resides.<br>
+ Later, this should be able to be overridden by the
+ <code>http_proxy</code> environement variable, but it currently cannot.
+ The use of a proxy server greatly improves performance
+ of the indexing process.<br>
+ See also
+ <a href="#http_proxy_authorization">http_proxy_authorization</a> and
+ <a href="#http_proxy_exclude">#http_proxy_exclude</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ http_proxy:
+ </td>
+ <td nowrap>
+ http://proxy.bigbucks.com:3128
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="http_proxy_authorization">
+ http_proxy_authorization</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This tells htdig to send the supplied
+ <em>username</em><strong>:</strong><em>password</em> with each HTTP request,
+ when using a proxy with authorization requested.
+ The credentials will be encoded using the "Basic" authentication
+ scheme. There <em>must</em> be a colon (:) between the username and
+ password.<br>
+ If you use this option, be sure to protect the configuration file
+ so it is readable only by you, and do not
+ use that same configuration file for htsearch.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ http_proxy_authorization:
+ </td>
+ <td nowrap>
+ myusername:mypassword
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="http_proxy_exclude">
+ http_proxy_exclude</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When this is set, URLs matching this will not use the
+ proxy. This is useful when you have a mixture of sites
+ near to the digging server and far away.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ http_proxy_exclude:
+ </td>
+ <td nowrap>
+ http://intranet.foo.com/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="ignore_alt_text">
+ ignore_alt_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set, this causes the text of the ALT field in an &lt;IMG...&gt; tag
+ not to be indexed as part of the text of the document, nor included in
+ excerpts.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ ignore_alt_text:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="ignore_dead_servers">
+ ignore_dead_servers</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Determines whether htdig will continue to index URLs from a
+ server after an attempted connection to the server fails as
+ &quot;no host found&quot; or &quot;host not found (port).&quot; If
+ set to false, htdig will try <em>every</em> URL from that server.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ ignore_dead_servers:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="image_list">
+ image_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.images
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the file that a list of image URLs gets written
+ to by <a href="htdig.html">htdig</a> when the
+ <a href="#create_image_list">create_image_list</a> is set to
+ true. As image URLs are seen, they are just appended to
+ this file, so after htdig finishes it is probably a
+ good idea to run <code>sort -u</code> on the file to
+ eliminate duplicates from the file.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ image_list:
+ </td>
+ <td nowrap>
+ allimages
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="image_url_prefix">
+ image_url_prefix</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @IMAGE_URL_PREFIX@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the directory portion of the URL used
+ to display star images. This attribute isn't directly
+ used by htsearch, but is used in the default URL for
+ the <a href="#star_image">star_image</a> and
+ <a href="#star_blank">star_blank</a> attributes, and
+ other attributes may be defined in terms of this one.
+ <p>
+ The default value of this attribute is determined at
+ compile time.
+ </p>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ image_url_prefix:
+ </td>
+ <td nowrap>
+ /images/htdig
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="include">
+ include</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is not quite a configuration attribute, but
+ rather a directive. It can be used within one
+ configuration file to include the definitions of
+ another file. The last definition of an attribute
+ is the one that applies, so after including a file,
+ any of its definitions can be overridden with
+ subsequent definitions. This can be useful when
+ setting up many configurations that are mostly the
+ same, so all the common attributes can be maintained
+ in a single configuration file. The include directives
+ can be nested, but watch out for nesting loops.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ include:
+ </td>
+ <td nowrap>
+ ${config_dir}/htdig.conf
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="iso_8601">
+ iso_8601</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>,
+ <a href="htnotify.html">htnotify</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets whether dates should be output in ISO 8601
+ format. For example, this was written on: 1998-10-31 11:28:13 EST.
+ See also the <a
+ href="#date_format">date_format</a> attribute, which
+ can override any date format that
+ <a href="htsearch.html" target="_top">htsearch</a>
+ picks by default.<br>
+ This attribute also affects the format of the date
+ <a href="htnotify.html">htnotify</a> expects to find
+ in a <strong>htdig-notification-date</strong> field.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ iso_8601:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="keywords">
+ keywords</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ ??
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Keywords which <strong>must</strong> be found on all pages returned,
+ even if the "or" ("Any") <a href="#method">method</a> is
+ selected.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ keywords:
+ </td>
+ <td nowrap>
+ documentation
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="keywords_factor">
+ keywords_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 100
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a factor which will be used to multiply the
+ weight of words in the list of
+ <a href="#keywords_meta_tag_names">meta keywords</a> of a document.
+ The number may be a floating point number. See also the
+ <a href="#heading_factor">heading_factor</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ keywords_factor:
+ </td>
+ <td nowrap>
+ 12
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="keywords_meta_tag_names">
+ keywords_meta_tag_names</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ keywords htdig-keywords
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The words in this list are used to search for keywords
+ in HTML <em>META</em> tags. This list can contain any
+ number of strings that each will be seen as the name
+ for whatever keyword convention is used.<br>
+ The <em>META</em> tags have the following format:<br>
+<code>
+&nbsp;&nbsp;&lt;META name="<em>somename</em>" content="<em>somevalue</em>"&gt;
+</code>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ keywords_meta_tag_names:
+ </td>
+ <td nowrap>
+ keywords description
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="limit_normalized">
+ limit_normalized</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a set of patterns that all URLs have to
+ match against in order for them to be included in the
+ search. Unlike the limit_urls_to attribute, this is done
+ <strong>after</strong> the URL is normalized and the
+ <a href="#server_aliases">server_aliases</a>
+ attribute is applied. This allows filtering after any
+ hostnames and DNS aliases are resolved. Otherwise, this
+ attribute is the same as the <a
+ href="#limit_urls_to">limit_urls_to</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ limit_normalized:
+ </td>
+ <td nowrap>
+ http://www.mydomain.com
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="limit_urls_to">
+ limit_urls_to</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#start_url">start_url</a>}
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a set of patterns that all URLs have to
+ match against in order for them to be included in the
+ search. Any number of strings can be specified,
+ separated by spaces. If multiple patterns are given, at
+ least one of the patterns has to match the URL.<br>
+ Matching, by default, is a case-sensitive string match on the URL
+ to be used, unless the <a href="#case_sensitive">case_sensitive</a>
+ attribute is false. The match will be performed <em>after</em>
+ the relative references have been converted to a valid
+ URL. This means that the URL will <em>always</em> start
+ with a transport specifier (<code>http://</code> if none is
+ specified).<br>
+ Granted, this is not the perfect way of doing this,
+ but it is simple enough and it covers most cases.<br>
+ To limit URLs in htsearch, use
+ <a href="#restrict">restrict</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ limit_urls_to:
+ </td>
+ <td nowrap>
+ .sdsu.edu kpbs [.*\.html]
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="local_default_doc">
+ local_default_doc</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ index.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.8b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to the default documents in a directory used by the
+ server. This is used for local filesystem access,
+ using <a href="#local_urls">local_urls</a>, to
+ translate URLs like http://foo.com/ into something like
+ /home/foo.com/index.html
+ (see also <a href="#remove_default_doc">remove_default_doc</a>).
+ <br>The list should only contain names that the local server
+ recognizes as default documents for directory URLs, as defined
+ by the DirectoryIndex setting in Apache's srm.conf, for example.
+ As of version 3.1.5, this can be a string list rather than a single
+ name, and htdig will use the first name that works. Since this
+ requires a loop, setting the most common name first will improve
+ performance. Special characters can be embedded in these names
+ using %xx hex encoding.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ local_default_doc:
+ </td>
+ <td nowrap>
+ default.html default.htm index.html index.htm
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="local_urls">
+ local_urls</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.8b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to tell ht://Dig to access certain URLs through
+ local filesystems. At first ht://Dig will try to access
+ pages with URLs matching the patterns through the
+ filesystems specified. If it cannot find the file, or
+ if it doesn't recognize the file name extension, it will
+ try the URL through HTTP instead. Note the example--the
+ equal sign and the final slashes in both the URL and the
+ directory path are critical.
+ <br>The fallback to HTTP can be disabled by setting the
+ <a href="#local_urls_only">local_urls_only</a> attribute to true.
+ To access user directory URLs through the local filesystem,
+ set <a href="#local_user_urls">local_user_urls</a>.
+ File types which need processing by the HTTP server may be
+ specified by the
+ <a href="#bad_local_extensions">bad_local_extensions</a>
+ attribute.
+ As of version 3.1.5, you can provide multiple mappings of a given
+ URL to different directories, and htdig will use the first
+ mapping that works.
+ Special characters can be embedded in these names using %xx hex encoding.
+ For example, you can use %3D to embed an "=" sign in an URL pattern.
+ <br>
+ See also <a href="#local_default_doc">local_default_doc</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ local_urls:
+ </td>
+ <td nowrap>
+ http://www.foo.com/=/usr/www/htdocs/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="local_urls_only">
+ local_urls_only</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to tell ht://Dig to access files only through the
+ local filesystem, for URLs matching the patterns in the
+ <a href="#local_urls">local_urls</a> or
+ <a href="#local_user_urls">local_user_urls</a> attribute. If it
+ cannot find the file, it will give up rather than trying HTTP or
+ another protocol. With this option, even <code>file://</code> urls
+ are not retrieved, except throught the local_urls mechanism.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ local_urls_only:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="local_user_urls">
+ local_user_urls</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.8b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to access user directory URLs through the local
+ filesystem. If you leave the "path" portion out, it will
+ look up the user's home directory in /etc/password (or NIS
+ or whatever). As with <a href="#local_urls">local_urls</a>,
+ if the files are not found, ht://Dig will try with HTTP or the
+ appropriate protocol. Again, note the
+ example's format. To map http://www.my.org/~joe/foo/bar.html
+ to /home/joe/www/foo/bar.html, try the example below.
+ <br>The fallback to HTTP can be disabled by setting the
+ <a href="#local_urls_only">local_urls_only</a> attribute to true.
+ As of version 3.1.5, you can provide multiple mappings of a given
+ URL to different directories, and htdig will use the first
+ mapping that works.
+ Special characters can be embedded in these names using %xx hex encoding.
+ For example, you can use %3D to embed an "=" sign in an URL pattern.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ local_user_urls:
+ </td>
+ <td nowrap>
+ http://www.my.org/=/home/,/www/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="locale">
+ locale</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ C
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to whatever locale you want your search
+ database cover. It affects the way international
+ characters are dealt with. On most systems a list of
+ legal locales can be found in /usr/lib/locale. Also
+ check the <strong>setlocale(3C)</strong> man page.
+ Note that depending the locale you choose, and whether
+ your system's locale implementation affects floating
+ point input, you may need to specify the decimal point
+ as a comma rather than a period. This will affect
+ settings of <a href="#search_algorithm">search_algorithm</a>
+ and any of the scoring factors.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ locale:
+ </td>
+ <td nowrap>
+ en_US
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="logging">
+ logging</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets whether htsearch should use the syslog() to log
+ search requests. If set, this will log requests with a
+ default level of LOG_INFO and a facility of LOG_LOCAL5. For
+ details on redirecting the log into a separate file or other
+ actions, see the <strong>syslog.conf(5)</strong> man
+ page. To set the level and facility used in logging, change
+ LOG_LEVEL and LOG_FACILITY in the include/htconfig.h file
+ before compiling.
+ <dl>
+ <dt>
+ Each line logged by htsearch contains the following:
+ </dt>
+ <dd>
+ REMOTE_ADDR [config] (match_method) [words]
+ [logicalWords] (matches/matches_per_page) -
+ page, HTTP_REFERER
+ </dd>
+ </dl>
+ where any of the above are null or empty, it
+ either puts in '-' or 'default' (for config).
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ logging:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="maintainer">
+ maintainer</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ bogus@unconfigured.htdig.user
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This should be the email address of the person in
+ charge of the digging operation. This string is added
+ to the user-agent: field when the digger sends a
+ request to a server.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ maintainer:
+ </td>
+ <td nowrap>
+ ben.dover@uptight.com
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="match_method">
+ match_method</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ and
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the default method for matching that htsearch
+ uses. The valid choices are:
+ <ul>
+ <li> or </li>
+ <li> and </li>
+ <li> boolean </li>
+ </ul>
+ This attribute will only be used if the HTML form that
+ calls htsearch didn't have the
+ <a href="hts_form.html#method">method</a> value set.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ match_method:
+ </td>
+ <td nowrap>
+ boolean
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="matches_per_page">
+ matches_per_page</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 10
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If this is set to a relatively small number, the
+ matches will be shown in pages instead of all at once.
+ This attribute will only be used if the HTML form that
+ calls htsearch didn't have the
+ <a href="hts_form.html#matchesperpage">matchesperpage</a> value set.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ matches_per_page:
+ </td>
+ <td nowrap>
+ 999
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_connection_requests">
+ max_connection_requests</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ -1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute tells htdig to limit the number of requests it will
+ send to a server using a single, persistent HTTP connection. This
+ only applies when the
+ <a href="#persistent_connections">persistent_connections</a>
+ attribute is set. You may set the limit as high as you want,
+ but it must be at least 1. A value of -1 specifies no limit.
+ Requests in the queue for a server will be combined until either
+ the limit is reached, or the queue is empty.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_connection_requests:
+ </td>
+ <td nowrap>
+ 100
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_description_length">
+ max_description_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 60
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ While gathering descriptions of URLs,
+ <a href="htdig.html">htdig</a> will only record
+ up to this many bytes of hyperlink descriptions for use in the
+ <a href="hts_templates.html#DESCRIPTION">DESCRIPTION</a> template
+ variable. This is used mostly to deal with broken HTML. (If a
+ hyperlink is not terminated with a &lt;/a&gt; the
+ description will go on until the end of the document.)
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_description_length:
+ </td>
+ <td nowrap>
+ 40
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_descriptions">
+ max_descriptions</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 5
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ While gathering <a href="#description_factor">descriptions</a> of
+ URLs for the
+ <a href="hts_templates.html#DESCRIPTIONS">DESCRIPTIONS</a> template
+ variable, <a href="htdig.html">htdig</a> will only record up to this
+ number of descriptions, in the order in which it encounters
+ them. This is used to prevent the database entry for a document
+ from growing out of control if the document has a huge number
+ of links to it. <br>
+ Note that all descriptions are used for indexing.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_descriptions:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_doc_size">
+ max_doc_size</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 100000
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the upper limit to the amount of data retrieved
+ for documents (in bytes). This is mainly used to prevent
+ unreasonable memory consumption since each document
+ will be read into memory by <a href="htdig.html">
+ htdig</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_doc_size:
+ </td>
+ <td nowrap>
+ 5000000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_excerpts">
+ max_excerpts</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This value determines the maximum number of excerpts
+ that can be displayed for one matching document in the
+ search results.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_excerpts:
+ </td>
+ <td nowrap>
+ 10
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_head_length">
+ max_head_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 512
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ For each document retrieved, the top of the document is
+ stored. This attribute determines the size of this
+ block (in bytes). The text that will be stored is only the text;
+ no markup is stored.<br>
+ We found that storing 50,000 bytes will store about
+ 95% of all the documents completely. This really
+ depends on how much storage is available and how much
+ you want to show. Currently, this is must not be 0.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_head_length:
+ </td>
+ <td nowrap>
+ 50000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_hop_count">
+ max_hop_count</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 999999
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Instead of limiting the indexing process by URL
+ pattern, it can also be limited by the number of hops
+ or clicks a document is removed from the starting URL.
+ <br>
+ The starting page or pages will have hop count 0.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_hop_count:
+ </td>
+ <td nowrap>
+ 4
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_keywords">
+ max_keywords</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ -1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute can be used to limit the number of keywords
+ per document that htdig will accept from meta keywords tags.
+ A value of -1 or less means no limit. This can help combat meta
+ keyword spamming, by limiting the amount of keywords that will be
+ indexed, but it will not completely prevent irrelevant matches
+ in a search if the first few keywords in an offending document
+ are not relevant to its contents.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_keywords:
+ </td>
+ <td nowrap>
+ 10
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_meta_description_length">
+ max_meta_description_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 512
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ While gathering descriptions from meta description tags,
+ <a href="htdig.html">htdig</a> will only store up to
+ this much of the text (in bytes) for each document to fill the
+ <a href="hts_templates.html#METADESCRIPTION">METADESCRIPTION</a>
+ template variable. All words in the meta description are still
+ used for indexing.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_meta_description_length:
+ </td>
+ <td nowrap>
+ 1000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_prefix_matches">
+ max_prefix_matches</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1000
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The Prefix <a href="#search_algorithm">fuzzy algorithm</a>
+ could potentially match a
+ very large number of words. This value limits the
+ number of words each prefix can match. Note
+ that this does not limit the number of documents that
+ are matched in any way.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_prefix_matches:
+ </td>
+ <td nowrap>
+ 100
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_retries">
+ max_retries</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 3
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This option set the maximum number of retries when retrieving a document
+ fails (mainly for reasons of connection).
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_retries:
+ </td>
+ <td nowrap>
+ 6
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="max_stars">
+ max_stars</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 4
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When stars are used to display the score of a match,
+ this value determines the maximum number of stars that
+ can be displayed.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ max_stars:
+ </td>
+ <td nowrap>
+ 6
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="maximum_page_buttons">
+ maximum_page_buttons</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#maximum_pages">maximum_pages</a>}
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This value limits the number of page links that will be
+ included in the page list at the bottom of the search
+ results page. By default, it takes on the value of the
+ <a href="#maximum_pages">maximum_pages</a>
+ attribute, but you can set it to something lower to allow
+ more pages than buttons. In this case, pages above this
+ number will have no corresponding button.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ maximum_page_buttons:
+ </td>
+ <td nowrap>
+ 20
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="maximum_pages">
+ maximum_pages</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 10
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This value limits the number of page links that will be
+ included in the page list at the bottom of the search
+ results page. As of version 3.1.4, this will limit the
+ total number of matching documents that are shown.
+ You can make the number of page buttons smaller than the
+ number of allowed pages by setting the
+ <a href="#maximum_page_buttons">maximum_page_buttons</a>
+ attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ maximum_pages:
+ </td>
+ <td nowrap>
+ 20
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="maximum_word_length">
+ maximum_word_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>,
+ <a href="htfuzzy.html">htfuzzy</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 32
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets the maximum length of words that will be
+ indexed. Words longer than this value will be silently
+ truncated when put into the index, or searched in the
+ index.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ maximum_word_length:
+ </td>
+ <td nowrap>
+ 15
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="md5_db">
+ md5_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.md5hash.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file holds a database of md5 and date hashes of pages to
+ catch and eliminate duplicates of pages. See also the
+ <a href="#check_unique_md5">check_unique_md5</a> and
+ <a href="#check_unique_date">check_unique_date</a> attributes.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ md5_db:
+ </td>
+ <td nowrap>
+ ${database_base}.md5.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="meta_description_factor">
+ meta_description_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 50
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a factor which will be used to multiply the
+ weight of words in any META description tags in a document.
+ The number may be a floating point number. See also the
+ <a href="#heading_factor">heading_factor</a> attribute and the
+ <a href="#description_factor">description_factor</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ meta_description_factor:
+ </td>
+ <td nowrap>
+ 20
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="metaphone_db">
+ metaphone_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.metaphone.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The database file used for the fuzzy "metaphone" search
+ algorithm. This database is created by
+ <a href="htfuzzy.html">htfuzzy</a> and used by
+ <a href="htsearch.html" target="_top">htsearch</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ metaphone_db:
+ </td>
+ <td nowrap>
+ ${database_base}.mp.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="method_names">
+ method_names</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ and All or Any boolean Boolean
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These values are used to create the <strong>
+ method</strong> menu. It consists of pairs. The first
+ element of each pair is one of the known methods, the
+ second element is the text that will be shown in the
+ menu for that method. This text needs to be quoted if
+ it contains spaces.
+ See the <a href="hts_selectors.html">select list documentation</a>
+ for more information on how this attribute is used.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ method_names:
+ </td>
+ <td nowrap>
+ or Or and And
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="mime_types">
+ mime_types</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#config_dir">config_dir</a>}/mime.types
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file is used by htdig for local file access and resolving
+ file:// URLs to ensure the files are parsable. If you are running
+ a webserver with its own MIME file, you should set this attribute
+ to point to that file.
+ <p>
+ See also <a href="#content_classifier">content_classifier</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ mime_types:
+ </td>
+ <td nowrap>
+ /etc/mime.types
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="minimum_prefix_length">
+ minimum_prefix_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets the minimum length of prefix matches used by the
+ "prefix" fuzzy matching algorithm. Words shorter than this
+ will not be used in prefix matching.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ minimum_prefix_length:
+ </td>
+ <td nowrap>
+ 2
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="minimum_speling_length">
+ minimum_speling_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 5
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets the minimum length of words used by the
+ "speling" fuzzy matching algorithm. Words shorter than this
+ will not be used in this fuzzy matching.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ minimum_speling_length:
+ </td>
+ <td nowrap>
+ 3
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="minimum_word_length">
+ minimum_word_length</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 3
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This sets the minimum length of words that will be
+ indexed. Words shorter than this value will be silently
+ ignored but still put into the excerpt.<br>
+ Note that by making this value less than 3, a lot more
+ words that are very frequent will be indexed. It might
+ be advisable to add some of these to the
+ <a href="#bad_word_list">bad_words list</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ minimum_word_length:
+ </td>
+ <td nowrap>
+ 2
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="multimatch_factor">
+ multimatch_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This factor gives higher rankings to documents that have more than
+ one matching search word when the <strong>or</strong>
+ <a href="#match_method">match_method</a> is used.
+ In version 3.1.6, the matching words' combined scores were multiplied
+ by this factor for each additional matching word. Currently, this
+ multiplier is applied at most once.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ multimatch_factor:
+ </td>
+ <td nowrap>
+ 1000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="next_page_text">
+ next_page_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ [next]
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text displayed in the hyperlink to go to the next
+ page of matches.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ next_page_text:
+ </td>
+ <td nowrap>
+ &lt;img src="/htdig/buttonr.gif"&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_excerpt_show_top">
+ no_excerpt_show_top</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If no excerpt is available, this option will act the
+ same as <a
+ href="#excerpt_show_top">excerpt_show_top</a>, that is,
+ it will show the top of the document.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ no_excerpt_show_top:
+ </td>
+ <td nowrap>
+ yes
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_excerpt_text">
+ no_excerpt_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;em&gt;(None of the search words were found in the top of this document.)&lt;/em&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This text will be displayed in place of the excerpt if
+ there is no excerpt available. If this attribute is set
+ to nothing (blank), the excerpt label will not be
+ displayed in this case.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top">no_excerpt_text:</td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_next_page_text">
+ no_next_page_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#next_page_text">next_page_text</a>}
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text displayed where there would normally be a
+ hyperlink to go to the next page of matches.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top">no_next_page_text:</td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_page_list_header">
+ no_page_list_header</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This text will be used as the value of the PAGEHEADER
+ variable, for use in templates or the
+ <a href="#search_results_footer">search_results_footer</a>
+ file, when all search results fit on a single page.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ no_page_list_header:
+ </td>
+ <td nowrap>
+ &lt;hr noshade size=2&gt;All results on this page.&lt;br&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_page_number_text">
+ no_page_number_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text strings in this list will be used when putting
+ together the PAGELIST variable, for use in templates or
+ the <a href="#search_results_footer">search_results_footer</a>
+ file, when search results fit on more than page. The PAGELIST
+ is the list of links at the bottom of the search results page.
+ There should be as many strings in the list as there are
+ pages allowed by the <a href="#maximum_page_buttons">maximum_page_buttons</a>
+ attribute. If there are not enough, or the list is empty,
+ the page numbers alone will be used as the text for the links.
+ An entry from this list is used for the current page, as the
+ current page is shown in the page list without a hypertext link,
+ while entries from the <a href="#page_number_text">
+ page_number_text</a> list are used for the links to other pages.
+ The text strings can contain HTML tags to highlight page numbers
+ or embed images. The strings need to be quoted if they contain
+ spaces.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ no_page_number_text:
+ </td>
+ <td nowrap>
+
+ &lt;strong&gt;1&lt;/strong&gt; &lt;strong&gt;2&lt;/strong&gt; \<br>
+ &lt;strong&gt;3&lt;/strong&gt; &lt;strong&gt;4&lt;/strong&gt; \<br>
+ &lt;strong&gt;5&lt;/strong&gt; &lt;strong&gt;6&lt;/strong&gt; \<br>
+ &lt;strong&gt;7&lt;/strong&gt; &lt;strong&gt;8&lt;/strong&gt; \<br>
+ &lt;strong&gt;9&lt;/strong&gt; &lt;strong&gt;10&lt;/strong&gt;
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_prev_page_text">
+ no_prev_page_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#prev_page_text">prev_page_text</a>}
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text displayed where there would normally be a
+ hyperlink to go to the previous page of matches.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top">no_prev_page_text:</td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="no_title_text">
+ no_title_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ filename
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the text to use in search results when no
+ title is found in the document itself. If it is set to
+ filename, htsearch will use the name of the file itself,
+ enclosed in brackets (e.g. [index.html]).
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ no_title_text:
+ </td>
+ <td nowrap>
+ "No Title Found"
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="noindex_end">
+ noindex_end</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;!--/htdig_noindex--&gt; &lt;/SCRIPT&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This string marks the end of a section of an HTML file that should be
+ completely ignored when indexing. Note that text between noindex_start
+ and noindex_end isn't even counted as white space; the text
+ "<code>foo<!--htdig_noindex-->something<!--/htdig_noindex-->bar</code>"
+ matches the word "foobar", not the phrase "foo bar". White space
+ following noindex_end <em>is</em> counted as white space. See also
+ <a href="#noindex_start">noindex_start</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ noindex_end:
+ </td>
+ <td nowrap>
+ &lt;/SCRIPT&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="noindex_start">
+ noindex_start</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;!--htdig_noindex--&gt; &lt;SCRIPT
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These strings mark the start of a section of an HTML file that should
+ be completely ignored when indexing. They work together with
+ <a href="#noindex_end">noindex_end</a>. Once a string in
+ noindex_start is found, text is ignored until the string at the
+ <em>same position</em> within <a href="#noindex_end">noindex_end</a>
+ is encountered. The sections marked off this way cannot overlap.
+ As in the first default pattern, this can be SGML comment
+ declarations that can be inserted anywhere in the documents to exclude
+ different sections from being indexed. However, existing tags can also
+ be used; this is especially useful to exclude some sections from being
+ indexed where the files to be indexed can not be edited. The second
+ default pattern shows how SCRIPT sections in 'uneditable' documents
+ can be skipped; note how noindex_start does not contain an ending
+ &gt;: this allows for all SCRIPT tags to be matched regardless of
+ attributes defined (different types or languages).
+ Note that the match for this string is case insensitive.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ noindex_start:
+ </td>
+ <td nowrap>
+ &lt;SCRIPT
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="nothing_found_file">
+ nothing_found_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/nomatch.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the file which contains the <code>
+ HTML</code> text to display when no matches were found.
+ The file should contain a complete <code>HTML</code>
+ document.<br>
+ Note that this attribute could also be defined in
+ terms of <a href="#database_base">database_base</a> to
+ make is specific to the current search database.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ nothing_found_file:
+ </td>
+ <td nowrap>
+ /www/searching/nothing.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="nph">
+ nph</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute determines whether htsearch sends out full HTTP
+ headers as required for an NPH (non-parsed header) CGI. Some
+ servers assume CGIs will act in this fashion, for example MS
+ IIS. If your server does not send out full HTTP headers, you
+ should set this to true.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ nph:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="page_list_header">
+ page_list_header</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;hr noshade size=2&gt;Pages:&lt;br&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This text will be used as the value of the PAGEHEADER
+ variable, for use in templates or the
+ <a href="#search_results_footer">search_results_footer</a>
+ file, when all search results fit on more than one page.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top">page_list_header:</td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="page_number_separator">
+ page_number_separator</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &quot; &quot;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text strings in this list will be used when putting
+ together the PAGELIST variable, for use in templates or
+ the <a href="#search_results_footer">search_results_footer</a>
+ file, when search results fit on more than page. The PAGELIST
+ is the list of links at the bottom of the search results page.
+ The strings in the list will be used in rotation, and will
+ separate individual entries taken from
+ <a href="#page_number_text">page_number_text</a> and
+ <a href="#no_page_number_text">no_page_number_text</a>.
+ There can be as many or as few strings in the list as you like.
+ If there are not enough for the number of pages listed, it goes
+ back to the start of the list. If the list is empty, a space is
+ used. The text strings can contain HTML tags. The strings need
+ to be quoted if they contain spaces, or to specify an empty string.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ page_number_separator:
+ </td>
+ <td nowrap>
+ "&lt;/td&gt; &lt;td&gt;"
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="page_number_text">
+ page_number_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text strings in this list will be used when putting
+ together the PAGELIST variable, for use in templates or
+ the <a href="#search_results_footer">search_results_footer</a>
+ file, when search results fit on more than page. The PAGELIST
+ is the list of links at the bottom of the search results page.
+ There should be as many strings in the list as there are
+ pages allowed by the <a href="#maximum_page_buttons">maximum_page_buttons</a>
+ attribute. If there are not enough, or the list is empty,
+ the page numbers alone will be used as the text for the links.
+ Entries from this list are used for the links to other pages,
+ while an entry from the <a href="#no_page_number_text">
+ no_page_number_text</a> list is used for the current page, as the
+ current page is shown in the page list without a hypertext link.
+ The text strings can contain HTML tags to highlight page numbers
+ or embed images. The strings need to be quoted if they contain
+ spaces.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ page_number_text:
+ </td>
+ <td nowrap>
+
+ &lt;em&gt;1&lt;/em&gt; &lt;em&gt;2&lt;/em&gt; \<br>
+ &lt;em&gt;3&lt;/em&gt; &lt;em&gt;4&lt;/em&gt; \<br>
+ &lt;em&gt;5&lt;/em&gt; &lt;em&gt;6&lt;/em&gt; \<br>
+ &lt;em&gt;7&lt;/em&gt; &lt;em&gt;8&lt;/em&gt; \<br>
+ &lt;em&gt;9&lt;/em&gt; &lt;em&gt;10&lt;/em&gt;
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="persistent_connections">
+ persistent_connections</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, when servers make it possible, htdig can take advantage
+ of persistent connections, as defined by HTTP/1.1 (<em>RFC2616</em>). This permits
+ to reduce the number of open/close operations of connections, when retrieving
+ a document with HTTP.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ persistent_connections:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="plural_suffix">
+ plural_suffix</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ s
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the value of the PLURAL_MATCHES template
+ variable used in the header, footer and template files.
+ This can be used for localization for non-English languages
+ where 's' is not the appropriate suffix.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ plural_suffix:
+ </td>
+ <td nowrap>
+ en
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="prefix_match_character">
+ prefix_match_character</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ *
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ A null prefix character means that prefix matching should be
+ applied to every search word. Otherwise prefix matching is
+ done on any search word ending with the characters specified
+ in this string, with the string being stripped off before
+ looking for matches. The "prefix" algorithm must be enabled
+ in <a href="#search_algorithm">search_algorithm</a>
+ for this to work. You may also want to set the <a
+ href="#max_prefix_matches">max_prefix_matches</a> and <a
+ href="#minimum_prefix_length">minimum_prefix_length</a> attributes
+ to get it working as you want.<br> As a special case, in version
+ 3.1.6 and later, if this string is non-null and is entered alone
+ as a search word, it is taken as a wildcard that matches all
+ documents in the database. If this string is null, the wildcard
+ for this special case will be <strong>*</strong>. This wildcard
+ doesn't require the prefix algorithm to be enabled.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ prefix_match_character:
+ </td>
+ <td nowrap>
+ ing
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="prev_page_text">
+ prev_page_text</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ [prev]
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The text displayed in the hyperlink to go to the
+ previous page of matches.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ prev_page_text:
+ </td>
+ <td nowrap>
+ &lt;img src="/htdig/buttonl.gif"&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="regex_max_words">
+ regex_max_words</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 25
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The "regex" <a href="#search_algorithm">fuzzy algorithm</a>
+ could potentially match a
+ very large number of words. This value limits the
+ number of words each regular expression can match. Note
+ that this does not limit the number of documents that
+ are matched in any way.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ regex_max_words:
+ </td>
+ <td nowrap>
+ 10
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="remove_bad_urls">
+ remove_bad_urls</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htpurge.html">htpurge</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If TRUE, htpurge will remove any URLs which were marked
+ as unreachable by htdig from the database. If FALSE, it
+ will not do this. When htdig is run in initial mode,
+ documents which were referred to but could not be
+ accessed should probably be removed, and hence this
+ option should then be set to TRUE, however, if htdig is
+ run to update the database, this may cause documents on
+ a server which is temporarily unavailable to be
+ removed. This is probably NOT what was intended, so
+ hence this option should be set to FALSE in that case.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ remove_bad_urls:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="remove_default_doc">
+ remove_default_doc</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ index.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Set this to the default documents in a directory used by the
+ servers you are indexing. These document names will be stripped
+ off of URLs when they are normalized, if one of these names appears
+ after the final slash, to translate URLs like
+ http://foo.com/index.html into http://foo.com/<br>
+ Note that you can disable stripping of these names during
+ normalization by setting the list to an empty string.
+ The list should only contain names that all servers you index
+ recognize as default documents for directory URLs, as defined
+ by the DirectoryIndex setting in Apache's srm.conf, for example.
+ This does not apply to file:/// or ftp:// URLS.
+ <br>See also <a href="#local_default_doc">local_default_doc</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ remove_default_doc:
+ </td>
+ <td nowrap>
+ default.html default.htm index.html index.htm
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="remove_unretrieved_urls">
+ remove_unretrieved_urls</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htpurge.html">htpurge</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If TRUE, htpurge will remove any URLs which were discovered
+ and included as stubs in the database but not yet retrieved. If FALSE, it
+ will not do this. When htdig is run in initial mode with no restrictions
+ on hopcount or maximum documents, these should probably be removed and set
+ to true. However, if you are hoping to index a small set of documents and
+ eventually get to the rest, you should probably leave this as false.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ remove_unretrieved_urls:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="restrict">
+ restrict</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ pattern list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a set of patterns that all URLs have to
+ match against in order for them to be included in the search
+ results. Any number of strings can be specified, separated by
+ spaces. If multiple patterns are given, at least one of the
+ patterns has to match the URL. The list can be specified
+ from within the configuration file, and can be overridden
+ with the "restrict" input parameter in the search form. Note
+ that the restrict list does not take precedence over the
+ <a href="#exclude">exclude</a> list - if a URL matches patterns
+ in both lists it is still excluded from the search results.
+ <br>To restrict URLs in htdig, use
+ <a href="#limit_urls_to">limit_urls_to</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ restrict:
+ </td>
+ <td nowrap>
+ http://www.acme.com/widgets/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="robotstxt_name">
+ robotstxt_name</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ htdig
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.7 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Sets the name that htdig will look for when parsing
+ robots.txt files. This can be used to make htdig appear
+ as a different spider than ht://Dig. Useful to
+ distinguish between a private and a global index.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ robotstxt_name:
+ </td>
+ <td nowrap>
+ myhtdig
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="script_name">
+ script_name</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Overrides the value of the SCRIPT_NAME
+ environment attribute. This is useful if
+ htsearch is not being called directly as a CGI
+ program, but indirectly from within a dynamic
+ .shtml page using SSI directives. Previously,
+ you needed a wrapper script to do this, but
+ this configuration attribute makes wrapper
+ scripts obsolete for SSI and possibly for
+ other server scripting languages, as
+ well. (You still need a wrapper script when
+ using PHP, though.)<br>
+ Check out the <code>contrib/scriptname</code>
+ directory for a small example. Note that this
+ attribute also affects the value of the <a
+ href="hts_templates.html#CGI">CGI</a> variable
+ used in htsearch templates.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ script_name:
+ </td>
+ <td nowrap>
+ /search/results.shtml
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_algorithm">
+ search_algorithm</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ exact:1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the search algorithms and their weight to use
+ when searching. Each entry in the list consists of the
+ algorithm name, followed by a colon (:) followed by a
+ weight multiplier. The multiplier is a floating point
+ number between 0 and 1. Note that depending on your
+ <a href="#locale">locale</a> setting, and whether your
+ system's locale implementation affects floating point
+ input, you may need to specify the decimal point as a
+ comma rather than a period.<br>
+ <strong>Note:</strong>If the exact
+ method is not listed, the search may not work since the
+ original terms will not be used.<br>
+ Current algorithms supported are:
+ <dl>
+ <dt>
+ exact
+ </dt>
+ <dd>
+ The default exact word matching algorithm. This
+ will find only exactly matched words.
+ </dd>
+ <dt>
+ soundex
+ </dt>
+ <dd>
+ Uses a slightly modified <a href="http://www.sog.org.uk/cig/vol6/605tdrake.pdf">soundex</a> algorithm to match
+ words. This requires that the soundex database be
+ present. It is generated with the
+ <a href="htfuzzy.html">htfuzzy</a> program.
+ </dd>
+ <dt>
+ metaphone
+ </dt>
+ <dd>
+ Uses the metaphone algorithm for matching words.
+ This algorithm is more specific to the english
+ language than soundex. It requires the metaphone
+ database, which is generated with the <a
+ href="htfuzzy.html">htfuzzy</a> program.
+ </dd>
+ <dt>
+ accents
+ </dt>
+ <dd>
+ Uses the accents algorithm for matching words.
+ This algorithm will treat all accented letters
+ as equivalent to their unaccented counterparts.
+ It requires the accents database, which is
+ generated with the <a
+ href="htfuzzy.html">htfuzzy</a> program.
+ </dd>
+ <dt>
+ endings
+ </dt>
+ <dd>
+ This algorithm uses language specific word endings
+ to find matches. Each word is first reduced to its
+ word root and then all known legal endings are used
+ for the matching. This algorithm uses two databases
+ which are generated with <a href="htfuzzy.html">
+ htfuzzy</a>.
+ </dd>
+ <dt>
+ synonyms
+ </dt>
+ <dd>
+ Performs a dictionary lookup on all the words. This
+ algorithm uses a database generated with the <a
+ href="htfuzzy.html">htfuzzy</a> program.
+ </dd>
+ <dt>
+ substring
+ </dt>
+ <dd>
+ Matches all words containing the queries as
+ substrings. Since this requires checking every word in
+ the database, this can really slow down searches
+ considerably.
+ <dd>
+ <dt>
+ prefix
+ </dt>
+ <dd>
+ Matches all words beginning with the query
+ strings. Uses the option <a
+ href="#prefix_match_character">prefix_match_character</a>
+ to decide whether a query requires prefix
+ matching. For example "abc*" would perform prefix
+ matching on "abc" since * is the default
+ prefix_match_character.
+ </dd>
+ <dt>
+ regex
+ </dt>
+ <dd>
+ Matches all words that match the patterns given as regular
+ expressions. Since this requires checking every word in
+ the database, this can really slow down searches
+ considerably. The config file used for searching
+ must include the regex meta-characters (^$\[-]|.*)
+ included in <a href="#extra_word_characters">extra_word_characters</a>,
+ while the config file used for digging should not.
+ <dd>
+ <dt>
+ speling
+ </dt>
+ <dd>
+ A simple fuzzy algorithm that tries to find one-off spelling
+ mistakes, such as transposition of two letters or an extra character.
+ Since this usually generates just a few possibilities, it is
+ relatively quick.
+ <dd>
+ </dl>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_algorithm:
+ </td>
+ <td nowrap>
+ exact:1 soundex:0.3
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_results_contenttype">
+ search_results_contenttype</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ text/html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a Content-type to be output as an HTTP header
+ at the start of search results. If set to an empty string,
+ the Content-type header will be omitted altogether.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_results_contenttype:
+ </td>
+ <td nowrap>
+ text/xml
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_results_footer">
+ search_results_footer</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/footer.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a filename to be output at the end of
+ search results. While outputting the footer, some
+ variables will be expanded. Variables use the same
+ syntax as the Bourne shell. If there is a variable VAR,
+ the following will all be recognized:
+ <ul>
+ <li>
+ $VAR
+ </li>
+ <li>
+ $(VAR)
+ </li>
+ <li>
+ ${VAR}
+ </li>
+ </ul>
+ The following variables are available. See
+ <a href="hts_template.html">hts_template.html</a> for a complete
+ list.
+ <dl>
+ <dt>
+ MATCHES
+ </dt>
+ <dd>
+ The number of documents that were matched.
+ </dd>
+ <dt>
+ PLURAL_MATCHES
+ </dt>
+ <dd>
+ If MATCHES is not 1, this will be the string "s",
+ else it is an empty string. This can be used to say
+ something like "$(MATCHES)
+ document$(PLURAL_MATCHES) were found"
+ </dd>
+ <dt>
+ MAX_STARS
+ </dt>
+ <dd>
+ The value of the <a href="#max_stars">max_stars</a>
+ attribute.
+ </dd>
+ <dt>
+ LOGICAL_WORDS
+ </dt>
+ <dd>
+ A string of the search words with either "and" or
+ "or" between the words, depending on the type of
+ search.
+ </dd>
+ <dt>
+ WORDS
+ </dt>
+ <dd>
+ A string of the search words with spaces in
+ between.
+ </dd>
+ <dt>
+ PAGEHEADER
+ </dt>
+ <dd>
+ This expands to either the value of the
+ <a href="#page_list_header">page_list_header</a> or
+ <a href="#no_page_list_header">no_page_list_header</a>
+ attribute depending on how many pages there are.
+ </dd>
+ </dl>
+ Note that this file will <strong>NOT</strong> be output
+ if no matches were found. In this case the
+ <a href="#nothing_found_file">nothing_found_file</a>
+ attribute is used instead.
+ Also, this file will not be output if it is
+ overridden by defining the
+ <a href="#search_results_wrapper">search_results_wrapper</a>
+ attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_results_footer:
+ </td>
+ <td nowrap>
+ /usr/local/etc/ht/end-stuff.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_results_header">
+ search_results_header</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/header.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a filename to be output at the start of
+ search results. While outputting the header, some
+ variables will be expanded. Variables use the same
+ syntax as the Bourne shell. If there is a variable VAR,
+ the following will all be recognized:
+ <ul>
+ <li>
+ $VAR
+ </li>
+ <li>
+ $(VAR)
+ </li>
+ <li>
+ ${VAR}
+ </li>
+ </ul>
+ The following variables are available. See
+ <a href="hts_template.html">hts_template.html</a> for a complete
+ list.
+ <!-- Do these need to be listed for both _footer and _header? -->
+ <dl>
+ <dt>
+ MATCHES
+ </dt>
+ <dd>
+ The number of documents that were matched.
+ </dd>
+ <dt>
+ PLURAL_MATCHES
+ </dt>
+ <dd>
+ If MATCHES is not 1, this will be the string "s",
+ else it is an empty string. This can be used to say
+ something like "$(MATCHES)
+ document$(PLURAL_MATCHES) were found"
+ </dd>
+ <dt>
+ MAX_STARS
+ </dt>
+ <dd>
+ The value of the <a href="#max_stars">max_stars</a>
+ attribute.
+ </dd>
+ <dt>
+ LOGICAL_WORDS
+ </dt>
+ <dd>
+ A string of the search words with either "and" or
+ "or" between the words, depending on the type of
+ search.
+ </dd>
+ <dt>
+ WORDS
+ </dt>
+ <dd>
+ A string of the search words with spaces in
+ between.
+ </dd>
+ </dl>
+ Note that this file will <strong>NOT</strong> be output
+ if no matches were found. In this case the
+ <a href="#nothing_found_file">nothing_found_file</a>
+ attribute is used instead.
+ Also, this file will not be output if it is
+ overridden by defining the
+ <a href="#search_results_wrapper">search_results_wrapper</a>
+ attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_results_header:
+ </td>
+ <td nowrap>
+ /usr/local/etc/ht/start-stuff.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_results_order">
+ search_results_order</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a list of patterns for URLs in
+ search results. Results will be displayed in the
+ specified order, with the search algorithm result
+ as the second order. Remaining areas, that do not
+ match any of the specified patterns, can be placed
+ by using * as the pattern. If no * is specified,
+ one will be implicitly placed at the end of the
+ list.<br>
+ See also <a href="#url_seed_score">url_seed_score</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_results_order:
+ </td>
+ <td nowrap>
+
+ /docs/|faq.html * /maillist/ /testresults/
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_results_wrapper">
+ search_results_wrapper</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies a filename to be output at the start and
+ end of search results. This file replaces the
+ <a href="#search_results_header">search_results_header</a> and
+ <a href="#search_results_footer">search_results_footer</a>
+ files, with the contents of both in one file, and uses the
+ pseudo-variable <strong>$(HTSEARCH_RESULTS)</strong> as a
+ separator for the header and footer sections.
+ If the filename is not specified, the file is unreadable,
+ or the pseudo-variable above is not found, htsearch reverts
+ to the separate header and footer files instead.
+ While outputting the wrapper,
+ some variables will be expanded, just as for the
+ <a href="#search_results_header">search_results_header</a> and
+ <a href="#search_results_footer">search_results_footer</a>
+ files.<br>
+ Note that this file will <strong>NOT</strong> be output
+ if no matches were found. In this case the
+ <a href="#nothing_found_file">nothing_found_file</a>
+ attribute is used instead.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_results_wrapper:
+ </td>
+ <td nowrap>
+ ${common_dir}/wrapper.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="search_rewrite_rules">
+ search_rewrite_rules</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of pairs, <em>regex</em> <em>replacement</em>, used
+ to rewrite URLs in the search results. The left hand string is a
+ regular expression; the right hand string is a literal string with
+ embedded placeholders for fragments that matched inside brackets in
+ the regular expression. \0 is the whole matched string, \1 to \9
+ are bracketted substrings. The backslash must be doubled-up in the
+ attribute setting to get past the variable expansion parsing. Rewrite
+ rules are applied sequentially to each URL before it is displayed
+ or checked against the <a href="#restrict">restrict</a> or
+ <a href="#exclude">exclude</a> lists. Rewriting does not stop once a
+ match has been made, so multiple rules may affect a given URL. See
+ also <a href="#url_part_aliases">url_part_aliases</a> which allows
+ URLs to be of one form during indexing and translated for results,
+ and <a href="#url_rewrite_rules">url_rewrite_rules</a> which allows
+ URLs to be rewritten while indexing.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ search_rewrite_rules:
+ </td>
+ <td nowrap>
+ http://(.*)\\.mydomain\\.org/([^/]*) http://\\2.\\1.com \<br>
+ http://www\\.myschool\\.edu/myorgs/([^/]*) http://\\1.org
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="server_aliases">
+ server_aliases</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute tells the indexer that servers have several
+ DNS aliases, which all point to the same machine and are NOT
+ virtual hosts. This allows you to ensure pages are indexed
+ only once on a given machine, despite the alias used in a URL.
+ As shown in the example, the mapping goes from left to right,
+ so the server name on the right hand side is the one that is
+ used. As of version 3.1.3, the port number is optional, and is
+ assumed to be 80 if omitted. There is no easy way to map all
+ ports from one alias to another without listing them all.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ server_aliases:
+ </td>
+ <td nowrap>
+
+ foo.mydomain.com:80=www.mydomain.com:80 \<br>
+ bar.mydomain.com:80=www.mydomain.com:80
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="server_max_docs">
+ server_max_docs</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ -1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute tells htdig to limit the dig to retrieve a maximum
+ number of documents from each server. This can cause
+ unusual behavior on update digs since the old URLs are
+ stored alphabetically. Therefore, update digs will add
+ additional URLs in pseudo-alphabetical order, up to the
+ limit of the attribute. However, it is most useful to
+ partially index a server as the URLs of additional
+ documents are entered into the database, marked as never
+ retrieved.<br>
+ A value of -1 specifies no limit.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ server_max_docs:
+ </td>
+ <td nowrap>
+ 50
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="server_wait_time">
+ server_wait_time</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 0
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute tells htdig to ensure a server has had a
+ delay (in seconds) from the beginning of the last
+ connection. This can be used to prevent "server abuse"
+ by digging without delay. It's recommended to set this
+ to 10-30 (seconds) when indexing servers that you don't
+ monitor yourself. Additionally, this attribute can slow
+ down local indexing if set, which may or may not be what
+ you intended.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ server_wait_time:
+ </td>
+ <td nowrap>
+ 20
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="sort">
+ sort</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ score
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the default sorting method that htsearch
+ uses to determine the order in which matches are displayed.
+ The valid choices are:
+ <table border="0">
+ <tr>
+ <td>
+ <ul>
+ <li> score </li>
+ <li> time </li>
+ <li> title </li>
+ </ul>
+ </td>
+ <td>
+ <ul>
+ <li> revscore </li>
+ <li> revtime </li>
+ <li> revtitle </li>
+ </ul>
+ </td>
+ </tr>
+ </table>
+ This attribute will only be used if the HTML form that
+ calls htsearch didn't have the <strong>sort</strong>
+ value set. The words date and revdate can be used instead
+ of time and revtime, as both will sort by the time that
+ the document was last modified, if this information is
+ given by the server. The default is to sort by the score,
+ which ranks documents by best match. The sort methods that
+ begin with "rev" simply reverse the order of the
+ sort. Note that setting this to something other than
+ "score" will incur a slowdown in searches.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ sort:
+ </td>
+ <td nowrap>
+ revtime
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="sort_names">
+ sort_names</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ score Score time Time title Title revscore &#39;Reverse Score&#39; revtime &#39;Reverse Time&#39; revtitle &#39;Reverse Title&#39;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ These values are used to create the <strong>
+ sort</strong> menu. It consists of pairs. The first
+ element of each pair is one of the known sort methods, the
+ second element is the text that will be shown in the
+ menu for that sort method. This text needs to be quoted if
+ it contains spaces.
+ See the <a href="hts_selectors.html">select list documentation</a>
+ for more information on how this attribute is used.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ sort_names:
+ </td>
+ <td nowrap>
+
+ score 'Best Match' time Newest title A-Z \<br>
+ revscore 'Worst Match' revtime Oldest revtitle Z-A
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="soundex_db">
+ soundex_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.soundex.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The database file used for the fuzzy "soundex" search
+ algorithm. This database is created by
+ <a href="htfuzzy.html">htfuzzy</a> and used by
+ <a href="htsearch.html" target="_top">htsearch</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ soundex_db:
+ </td>
+ <td nowrap>
+ ${database_base}.snd.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="star_blank">
+ star_blank</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#image_url_prefix">image_url_prefix</a>}/star_blank.gif
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the URL to use to display a blank of the
+ same size as the star defined in the
+ <a href="#star_image">star_image</a> attribute or in the
+ <a href="#star_patterns">star_patterns</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ star_blank:
+ </td>
+ <td nowrap>
+ http://www.somewhere.org/icons/noelephant.gif
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="star_image">
+ star_image</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#image_url_prefix">image_url_prefix</a>}/star.gif
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the URL to use to display a star. This
+ allows you to use some other icon instead of a star.
+ (We like the star...)<br>
+ The display of stars can be turned on or off with the
+ <em><a href="#use_star_image">use_star_image</a></em>
+ attribute and the maximum number of stars that can be
+ displayed is determined by the
+ <em><a href="#max_stars">max_stars</a></em> attribute.<br>
+ Even though the image can be changed, the ALT value
+ for the image will always be a '*'.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ star_image:
+ </td>
+ <td nowrap>
+ http://www.somewhere.org/icons/elephant.gif
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="star_patterns">
+ star_patterns</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute allows the star image to be changed
+ depending on the URL or the match it is used for. This
+ is mainly to make a visual distinction between matches
+ on different web sites. The star image could be
+ replaced with the logo of the company the match refers
+ to.<br>
+ It is advisable to keep all the images the same size
+ in order to line things up properly in a short result
+ listing.<br>
+ The format is simple. It is a list of pairs. The first
+ element of each pair is a pattern, the second element
+ is a URL to the image for that pattern.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ star_patterns:
+ </td>
+ <td nowrap>
+
+ http://www.sdsu.edu /sdsu.gif \<br>
+ http://www.ucsd.edu /ucsd.gif
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="startday">
+ startday</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Day component of first date allowed as last-modified date
+ of returned docutments.
+ This is most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.
+ See also <a href="#startyear">startyear</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ startday:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="start_ellipses">
+ start_ellipses</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;strong&gt;&lt;code&gt;... &lt;/code&gt;&lt;/strong&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When excerpts are displayed in the search output, this
+ string will be prepended to the excerpt if there is
+ text before the text displayed. This is just a visual
+ reminder to the user that the excerpt is only part of
+ the complete document.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ start_ellipses:
+ </td>
+ <td nowrap>
+ ...
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="start_highlight">
+ start_highlight</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ &lt;strong&gt;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ When excerpts are displayed in the search output, matched
+ words will be highlighted using this string and
+ <a href="#end_highlight"> end_highlight</a>.
+ You should ensure that highlighting tags are balanced,
+ that is, any formatting tags that this string
+ opens should be closed by end_highlight.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ start_highlight:
+ </td>
+ <td nowrap>
+ &lt;font color="#FF0000"&gt;
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="startmonth">
+ startmonth</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Month component of first date allowed as last-modified date
+ of returned docutments.
+ This is most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.
+ See also <a href="#startyear">startyear</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ startmonth:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="start_url">
+ start_url</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ http://www.htdig.org/
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the list of URLs that will be used to start a
+ dig when there was no existing database. Note that
+ multiple URLs can be given here.
+ <br>Note also that the value of <em>start_url</em>
+ will be the default value for
+ <a href="#limit_urls_to">limit_urls_to</a>, so if
+ you set start_url to the URLs for specific files,
+ rather than a site or subdirectory URL, you may need
+ to set limit_urls_to to something less restrictive
+ so htdig doesn't reject links in the documents.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ start_url:
+ </td>
+ <td nowrap>
+ http://www.somewhere.org/alldata/index.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="startyear">
+ startyear</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.6 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the year of the cutoff start date for
+ search results. If the start or end date are specified,
+ only results with a last modified date within this
+ range are shown. If a start or end date is specified, but startyear
+ is not, then it defaults to 1970.
+ See also <a href="#startday">startday</a>,
+ <a href="#startmonth">startmonth</a>,
+ <a href="#endday">endday</a>,
+ <a href="#endmonth">endmonth</a>,
+ <a href="#endyear">endyear</a>.
+ These are most usefully specified as a
+ <a href="hts_form.html#startyear">GCI argument</a>.<br>
+ For each component, if a negative number is given,
+ it is taken as relative to the current date.
+ Relative days can span several months or even years if desired,
+ and relative months can span several years. A startday of
+ -90 will select matching documents modified within
+ the last 90 days.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ startyear:
+ </td>
+ <td nowrap>
+ 2001
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="store_phrases">
+ store_phrases</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b5 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Causes htdig to record all occurrences of each word in a document,
+ to allow accurate phrase searches. If this is false, only the first
+ occurrence of each word will be stored, causing many phrases to be
+ missed. Setting this false increases indexing speed by about 20%,
+ and reduces disk requirements by about 60%.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top"><em>No example provided</em></td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="substring_max_words">
+ substring_max_words</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 25
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0.8b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ The Substring <a href="#search_algorithm">fuzzy algorithm</a>
+ could potentially match a
+ very large number of words. This value limits the
+ number of words each substring pattern can match. Note
+ that this does not limit the number of documents that
+ are matched in any way.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ substring_max_words:
+ </td>
+ <td nowrap>
+ 100
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="synonym_db">
+ synonym_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>,
+ <a href="htfuzzy.html">htfuzzy</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/synonyms.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Points to the database that <a href="htfuzzy.html">
+ htfuzzy</a> creates when the <strong>synonyms</strong>
+ algorithm is used.<br>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ uses this to perform synonym dictionary lookups.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ synonym_db:
+ </td>
+ <td nowrap>
+ ${database_base}.syn.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="synonym_dictionary">
+ synonym_dictionary</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htfuzzy.html">htfuzzy</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/synonyms
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This points to a text file containing the synonym
+ dictionary used for the synonyms search algorithm.<br>
+ Each line of this file has at least two words. The
+ first word is the word to replace, the rest of the
+ words are synonyms for that word.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ synonym_dictionary:
+ </td>
+ <td nowrap>
+ /usr/dict/synonyms
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="syntax_error_file">
+ syntax_error_file</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#common_dir">common_dir</a>}/syntax.html
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This points to the file which will be displayed if a
+ boolean expression syntax error was found.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ syntax_error_file:
+ </td>
+ <td nowrap>
+ ${common_dir}/synerror.html
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="tcp_max_retries">
+ tcp_max_retries</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This option set the maximum number of attempts when a connection
+ <A href="#timeout">timeout</A>s.
+ After all these retries, the connection attempt results <timed out>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ tcp_max_retries:
+ </td>
+ <td nowrap>
+ 6
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="tcp_wait_time">
+ tcp_wait_time</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 5
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute sets the wait time (in seconds) after a connection
+ fails and the <A href="#timeout">timeout</A> is raised.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ tcp_wait_time:
+ </td>
+ <td nowrap>
+ 10
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="template_map">
+ template_map</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ quoted string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ Long builtin-long builtin-long Short builtin-short builtin-short
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This maps match template names to internal names and
+ template file names. It is a list of triplets. The
+ first element in each triplet is the name that will be
+ displayed in the FORMAT menu. The second element is the
+ name used internally and the third element is a
+ filename of the template to use.<br>
+ There are two predefined templates, namely <strong>
+ builtin-long</strong> and <strong>
+ builtin-short</strong>. If the filename is one of
+ those, they will be used instead.<br>
+ More information about templates can be found in the
+ <a href="htsearch.html" target="_top">htsearch</a>
+ documentation. The particular template is selecterd by the
+ <a href="hts_form.html#format">format</a> cgi argument, and the
+ default is given by <a href="#template_name">template_name</a> in
+ the config file.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ template_map:
+ </td>
+ <td nowrap>
+
+ Short short ${common_dir}/short.html \<br>
+ Normal normal builtin-long \<br>
+ Detailed detail ${common_dir}/detail.html
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="template_name">
+ template_name</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ builtin-long
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the default template if no
+ <a href="hts_form.html#format">format</a> field is given by the
+ search form. This needs to map to the
+ <a href="#template_map">template_map</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ template_name:
+ </td>
+ <td nowrap>
+ long
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="template_patterns">
+ template_patterns</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This attribute allows the results template to be changed
+ depending on the URL or the match it is used for. This
+ is mainly to make a visual distinction between matches
+ on different web sites. The results for each site could
+ thus be shown in a style matching that site.<br>
+ The format is simply a list of pairs. The first
+ element of each pair is a pattern, the second element
+ is the name of the template file for that pattern.<br>
+ More information about templates can be found in the
+ <a href="htsearch.html" target="_top">htsearch</a>
+ documentation.<br>
+ Normally, when using this template selection method, you
+ would disable user selection of templates via the <strong>format</strong>
+ input parameter in search forms, as the two methods were not
+ really designed to interact. Templates selected by URL patterns
+ would override any user selection made in the form. If you want
+ to use the two methods together, see the notes on
+ <a href="hts_selectors.html#template_patterns">combining</a>
+ them for an example of how to do this.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ template_patterns:
+ </td>
+ <td nowrap>
+
+ http://www.sdsu.edu ${common_dir}/sdsu.html \<br>
+ http://www.ucsd.edu ${common_dir}/ucsd.html
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="text_factor">
+ text_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a factor which will be used to multiply the
+ weight of words that are not in any special part of a
+ document. Setting a factor to 0 will cause normal words
+ to be ignored. The number may be a floating point
+ number. See also the <a href="#heading_factor"> heading_factor</a>
+ attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ text_factor:
+ </td>
+ <td nowrap>
+ 0
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="timeout">
+ timeout</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 30
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Specifies the time the digger will wait to complete a
+ network read. This is just a safeguard against
+ unforeseen things like the all too common
+ transformation from a network to a notwork.<br>
+ The timeout is specified in seconds.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ timeout:
+ </td>
+ <td nowrap>
+ 42
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="title_factor">
+ title_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 100
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a factor which will be used to multiply the
+ weight of words in the title of a document. Setting a
+ factor to 0 will cause words in the title to be
+ ignored. The number may be a floating point number. See
+ also the <a href="#heading_factor">
+ heading_factor</a> attribute.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ title_factor:
+ </td>
+ <td nowrap>
+ 12
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="translate_latin1">
+ translate_latin1</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b5 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to false, the SGML entities for ISO-8859-1 (or
+ Latin 1) characters above &amp;nbsp; (or &amp;#160;)
+ will not be translated into their 8-bit equivalents.
+ This attribute should be set to false when using a
+ <a href="#locale">locale</a> that doesn't use the
+ ISO-8859-1 character set, to avoid these entities
+ being mapped to inappropriate 8-bit characters, or
+ perhaps more importantly to avoid 8-bit characters from
+ your locale being mapped back to Latin 1 SGML entities
+ in search results.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ translate_latin1:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_list">
+ url_list</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.urls
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file is only created if
+ <em><a href="#create_url_list">create_url_list</a></em> is set to
+ true. It will contain a list of all URLs that were
+ seen.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_list:
+ </td>
+ <td nowrap>
+ /tmp/urls
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_log">
+ url_log</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.log
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If <a href="htdig.html">htdig</a> is
+ interrupted, it will write out its progress to this
+ file. Note that if it has a large number of URLs to write,
+ it may take some time to exit. This can especially happen
+ when running update digs and the run is interrupted soon
+ after beginning.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_log:
+ </td>
+ <td nowrap>
+ /tmp/htdig.progress
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_part_aliases">
+ url_part_aliases</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ A list of translations pairs <em>from</em> and
+ <em>to</em>, used when accessing the database.
+ If a part of an URL matches with the
+ <em>from</em>-string of each pair, it will be
+ translated into the <em>to</em>-string just before
+ writing the URL to the database, and translated
+ back just after reading it from the database.<br>
+ This is primarily used to provide an easy way to
+ rename parts of URLs for e.g. changing
+ www.example.com/~htdig to www.htdig.org. Two
+ different configuration files for digging and
+ searching are then used, with url_part_aliases
+ having different <em>from</em> strings, but
+ identical <em>to</em>-strings.<br>
+ See also <a
+ href="#common_url_parts">common_url_parts</a>.<br>
+ Strings that are normally incorrect in URLs or
+ very seldom used, should be used as
+ <em>to</em>-strings, since extra storage will be
+ used each time one is found as normal part of a
+ URL. Translations will be performed with priority
+ for the leftmost longest match. Each
+ <em>to</em>-string must be unique and not be a
+ part of any other <em>to</em>-string. It also helps
+ to keep the <em>to</em>-strings short to save space
+ in the database. Other than that, the choice of
+ <em>to</em>-strings is pretty arbitrary, as they
+ just provide a temporary, internal encoding in the
+ databases, and none of the characters in these
+ strings have any special meaning.<br>
+ Note that when this attribute is changed, the
+ database should be rebuilt, unless the effect of
+ "moving" the affected URLs in the database is
+ wanted, as described above.<br>
+ <strong>Please note:</strong> Don't just copy the
+ example below into a single configuration file.
+ There are two separate settings of
+ <em>url_part_aliases</em> below; the first one is
+ for the configuration file to be used by htdig,
+ htmerge, and htnotify, and the second one is for the
+ configuration file to be used by htsearch.
+ In this example, htdig will encode the URL
+ "http://search.example.com/~htdig/contrib/stuff.html"
+ as "*sitecontrib/stuff*2" in the databases, and
+ htsearch will decode it as
+ "http://www.htdig.org/contrib/stuff.htm".<br>
+ As of version 3.1.6, you can also do more complex
+ rewriting of URLs using
+ <a href="#url_rewrite_rules">url_rewrite_rules</a> and
+ <a href="#search_rewrite_rules">search_rewrite_rules</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_part_aliases:
+ </td>
+ <td nowrap>
+
+ http://search.example.com/~htdig *site \<br>
+ http://www.htdig.org/this/ *1 \<br>
+ .html *2
+
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">
+ url_part_aliases:
+ </td>
+ <td nowrap>
+
+ http://www.htdig.org/ *site \<br>
+ http://www.htdig.org/that/ *1 \<br>
+ .htm *2
+
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_rewrite_rules">
+ url_rewrite_rules</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b3 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of pairs, <em>regex</em> <em>replacement</em> used to
+ permanently rewrite URLs as they are indexed. The left hand string is
+ a regular expression; the right hand string is a literal string with
+ embedded placeholders for fragments that matched inside brackets in
+ the regex. \0 is the whole matched string, \1 to \9 are bracketted
+ substrings. Note that the <strong>entire</strong> URL is replaced by
+ the right hand string (not just the portion which matches the left hand
+ string). Thus, a leading and trailing (.*) should be included in the
+ pattern, with matching placeholders in the replacement string.<br>
+ Rewrite rules are applied sequentially to each
+ incoming URL before normalization occurs. Rewriting does not stop
+ once a match has been made, so multiple rules may affect a given URL.
+ See also <a href="#url_part_aliases">url_part_aliases</a> which
+ allows URLs to be of one
+form during indexing and translated for results.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_rewrite_rules:
+ </td>
+ <td nowrap>
+ (.*)\\?JServSessionIdroot=.* \\1 \<br>
+ (.*)\\&amp;JServSessionIdroot=.* \\1 \<br>
+ (.*)&amp;context=.* \\1<br>
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_seed_score">
+ url_seed_score</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of pairs, <em>pattern</em>
+ <em>formula</em>, used to weigh the score of
+ hits, depending on the URL of the document.<br>
+ The <em>pattern</em> part is a substring to match
+ against the URL. Pipe ('|') characters can be
+ used in the pattern to concatenate substrings for
+ web-areas that have the same formula.<br>
+ The formula describes a <em>factor</em> and a
+ <em>constant</em>, by which the hit score is
+ weighed. The <em>factor</em> part is multiplied
+ to the original score, then the <em>constant</em>
+ part is added.<br>
+ The format of the formula is the factor part:
+ &quot;*<em>N</em>&quot; optionally followed by comma and
+ spaces, followed by the constant part :
+ &quot;+<em>M</em>&quot;, where the plus sign may be emitted
+ for negative numbers. Either part is optional,
+ but must come in this order.<br>
+ The numbers <em>N</em> and <em>M</em> are floating
+ point constants.<br>
+ More straightforward is to think of the format as
+ &quot;newscore = oldscore*<em>N</em>+<em>M</em>&quot;,
+ but with the &quot;newscore = oldscore&quot; part left out.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_seed_score:
+ </td>
+ <td nowrap>
+
+ /mailinglist/ *.5-1e6 <br>
+ /docs/|/news/ *1.5 <br>
+ /testresults/ &quot;*.7 -200&quot; <br>
+ /faq-area/ *2+10000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="url_text_factor">
+ url_text_factor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 1
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ ??
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ TO BE COMPLETED<br>
+ See also <a href="#heading_factor">heading_factor</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ url_text_factor:
+ </td>
+ <td nowrap>
+ 1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="use_doc_date">
+ use_doc_date</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, htdig will use META date tags in documents,
+ overriding the modification date returned by the server.
+ Any documents that do not have META date tags will retain
+ the last modified date returned by the server or found on
+ the local file system.
+ As of version 3.1.6, in addition to META date tags, htdig will also
+ recognize dc.date, dc.date.created and dc.date.modified.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ use_doc_date:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="use_meta_description">
+ use_meta_description</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, any META description tags will be used as
+ excerpts by htsearch. Any documents that do not have META
+ descriptions will retain their normal excerpts.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ use_meta_description:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="use_star_image">
+ use_star_image</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If set to true, the <em><a href="#star_image">
+ star_image</a></em> attribute is used to display upto
+ <em><a href="#max_stars">max_stars</a></em> images for
+ each match.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ use_star_image:
+ </td>
+ <td nowrap>
+ no
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="user_agent">
+ user_agent</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ htdig
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Server
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.0b2 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This allows customization of the user_agent: field sent when
+ the digger requests a file from a server.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ user_agent:
+ </td>
+ <td nowrap>
+ htdig-digger
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="valid_extensions">
+ valid_extensions</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string list
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ URL
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.1.4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is a list of extensions on URLs which are
+ the only ones considered acceptable. This list is used to
+ supplement the MIME-types that the HTTP server provides
+ with documents. Some HTTP servers do not have a correct
+ list of MIME-types and so can advertise certain
+ documents as text while they are some binary format.
+ If the list is empty, then all extensions are acceptable,
+ provided they pass other criteria for acceptance or rejection.
+ If the list is not empty, only documents with one of the
+ extensions in the list are parsed.
+ See also <a href="#bad_extensions">bad_extensions</a>.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ valid_extensions:
+ </td>
+ <td nowrap>
+ .html .htm .shtml
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="valid_punctuation">
+ valid_punctuation</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ .-_/!#\$%^&amp;&#39;
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the set of characters which may be deleted
+ from the document before determining what a word is.
+ This means that if a document contains something like
+ <code>half-hearted</code> the digger will see this as the three
+ words <code> half</code>, <code>hearted</code> and
+ <code>halfhearted</code>.<br>
+ These characters are also removed before keywords are passed to the
+ search engine, so a search for "half-hearted" works as expected.<br>
+ Note that the dollar sign ($) and backslash (\) must be escaped by a
+ backslash in both valid_punctuation and extra_word_characters.
+ Moreover, the backslash should not be the last character on the line.
+ There is currently no way to include a back-quote (`) in
+ extra_word_characters or valid_punctuation.<br>
+ See also the
+ <a href="#extra_word_characters">extra_word_characters</a>
+ and <a href="#allow_numbers">allow_numbers</a>
+ attributes.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ valid_punctuation:
+ </td>
+ <td nowrap>
+ -'
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="version">
+ version</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htsearch.html" target="_top">htsearch</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ @VERSION@
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This specifies the value of the VERSION
+ variable which can be used in search templates.
+ The default value of this attribute is determined
+ at compile time, and will not normally be set
+ in configuration files.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ version:
+ </td>
+ <td nowrap>
+ 3.2.0
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="word_db">
+ word_db</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.words.db
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ all
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This is the main word database. It is an index of all
+ the words to a list of documents that contain the
+ words. This database can grow large pretty quickly.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ word_db:
+ </td>
+ <td nowrap>
+ ${database_base}.allwords.db
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="word_dump">
+ word_dump</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htdump.html">htdump</a>,
+ <a href="htload.html">htload</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ ${<a href="#database_base">database_base</a>}.worddump
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This file is basically a text version of the file
+ specified in <em><a href="#word_db">word_db</a></em>. Its
+ only use is to have a human readable database of all
+ words. The file is easy to parse with tools like
+ perl or tcl.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ word_dump:
+ </td>
+ <td nowrap>
+ /tmp/words.txt
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_cache_inserts">
+ wordlist_cache_inserts</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="???.html">???</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ ???
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ If true, create a cache of size wordlist_cache_size/2 for class
+ WordListOne. <em>I don't know what this is for. Does anyone?</em>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_cache_inserts:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_cache_size">
+ wordlist_cache_size</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 10000000
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Size (in bytes) of memory cache used by Berkeley DB (DB used by the indexer)
+ IMPORTANT: It makes a <strong>huge</strong> difference. The rule
+ is that the cache size should be at least 2% of the expected index size. The
+ Berkeley DB file has 1% of internal pages that <em>must</em> be cached for good
+ performances. Giving an additional 1% leaves room for caching leaf pages.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_cache_size:
+ </td>
+ <td nowrap>
+ 40000000
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_compress">
+ wordlist_compress</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Enables or disables the default compression system for the indexer.
+ This currently attempts to compress the index by a factor of 8. If the
+ Zlib library is not found on the system, the default is false.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_compress:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_compress_zlib">
+ wordlist_compress_zlib</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ true
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b4 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Enables or disables the zlib compression system for the indexer.
+ Both <a href="#wordlist_compress">wordlist_compress</a> and
+ <a href="#compression_level">compression_level</a> must be true
+ (non-zero) to use this option!
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_compress_zlib:
+ </td>
+ <td nowrap>
+ false
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_monitor">
+ wordlist_monitor</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ boolean
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ false
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ This enables monitoring of what's happening in the indexer.
+ It can help to detect performance/configuration problems.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_monitor:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_monitor_period">
+ wordlist_monitor_period</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ number
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 0
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Sets the number of seconds between each monitor output.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_monitor_period:
+ </td>
+ <td nowrap>
+ .1
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_monitor_output">
+ wordlist_monitor_output</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Print monitoring output on file instead of the default stderr.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_monitor_output:
+ </td>
+ <td nowrap>
+ myfile
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_page_size">
+ wordlist_page_size</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ 0
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Size (in bytes) of pages used by Berkeley DB (DB used by the indexer).
+ Must be a power of two.
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_page_size:
+ </td>
+ <td nowrap>
+ 8192
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_verbose">
+ wordlist_verbose</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ integer
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ <em>No default</em>
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ wordlist_verbose 1 walk logic<br>
+ wordlist_verbose 2 walk logic details<br>
+ wordlist_verbose 2 walk logic lots of details<br>
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr>
+ <td valign="top">
+ wordlist_verbose:
+ </td>
+ <td nowrap>
+ true
+ </td>
+ </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_wordkey_description">
+ wordlist_wordkey_description</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ Word/DocID 32/Flags 8/Location 16
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Internal key description: *not user configurable*
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top"><em>No example provided</em></td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ <dl>
+ <dt>
+ <strong><a name="wordlist_wordrecord_description">
+ wordlist_wordrecord_description</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ string
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ <a href="all.html">all</a>
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ DATA
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ Global
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ 3.2.0b1 or later
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>
+ Internal data description: *not user configurable*
+ </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+ <tr> <td valign="top"><em>No example provided</em></td> </tr>
+ </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+ Last modified: Sat Jun 12 23:26:34 EST 2004
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/attrs_head.html b/debian/htdig/htdig-3.2.0b6/htdoc/attrs_head.html
new file mode 100644
index 00000000..1629dad6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/attrs_head.html
@@ -0,0 +1,22 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ attr_head.html and attr_tail.html -->
+
+<html>
+ <head>
+ <title>ht://Dig: Configuration file attributes</title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>Configuration file format -- Attributes</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Alphabetical list of attributes
+ </h2>
+ <hr>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/attrs_tail.html b/debian/htdig/htdig-3.2.0b6/htdoc/attrs_tail.html
new file mode 100644
index 00000000..2c39ee75
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/attrs_tail.html
@@ -0,0 +1,3 @@
+ Last modified: $Date: 2002/02/01 22:49:31 $
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/author.html b/debian/htdig/htdig-3.2.0b6/htdoc/author.html
new file mode 100644
index 00000000..65141308
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/author.html
@@ -0,0 +1,39 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig: About the Author</title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>ht://Dig: About the Author</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ Andrew Scherpbier is the author of ht://Dig. He started work on
+ ht://Dig while working at <a href="http://www.sdsu.edu/">San Diego
+ State University</a>. In August 1996, he formed his own
+ software company, <a href="http://www.contigo.com/">Contigo
+ Software</a>, together with several partners. He currently is
+ Vice President of Research and Development at Contigo Software.
+ </p>
+ <p>
+ Unfortunately, since starting Contigo Software, Andrew has had
+ very little time to work on ht://Dig. This is also the reason
+ that email regarding ht://Dig sent directly to Andrew will most
+ likely go unanswered. Please direct questions to the
+ <a href="mailing.html">ht://Dig mailing list</a>. Also make sure
+ you consult the <a href="FAQ.html">Frequently Asked Questions</a>
+ page and browse or search the <a href="mailarchive.html">Mailing
+ list archive.</a>
+ </p>
+ <hr size="4" noshade>
+ Andrew Scherpbier &lt;andrew@contigo.com&gt;
+ Last modified: $Date: 2004/05/28 13:15:18 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/bdot.gif b/debian/htdig/htdig-3.2.0b6/htdoc/bdot.gif
new file mode 100644
index 00000000..ad59e8f3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/bdot.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/bugs.html b/debian/htdig/htdig-3.2.0b6/htdoc/bugs.html
new file mode 100644
index 00000000..14f7e9c2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/bugs.html
@@ -0,0 +1,55 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Bug Reporting
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Bug Reporting
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ If you are having problems or have suggestions for ht://Dig feel
+ free to fill out a bug report form. Before you do this, please do
+ the following:
+ </p>
+ <ul>
+ <li>Make sure you have the latest version, always available
+ at <a
+ href="http://www.htdig.org/where.html">http://www.htdig.org/where.html</a>.
+ You can also check the <a
+ href="http://www.htdig.org/RELEASE.html">release notes</a>
+ for what has changed in new versions.</li>
+ <li>Read the latest documentation, always available at
+ <a href="http://www.htdig.org/">http://www.htdig.org/</a></li>
+ <li>Read the FAQ, always available at
+ <a href="http://www.htdig.org/FAQ.html">http://www.htdig.org/FAQ.html</a></li>
+ <li>Looked at the log from running with more verbosity. Usually
+ "-vvv" is helpful for debugging information. Please include any
+ related messages and as much information about your configuration
+ as possible.</li>
+ <li>Checked the ht://Dig
+ <a href="http://cgi.htdig.org/cgi-bin/htdig3">bug database</a>.</li>
+ <li>If the bug involves dumping a core file, it also helps
+ if you can provide debugging information from the core. If
+ you don't know how to do this, feel free to ask.</li>
+ </ul>
+ <p>
+ If you've done this, you can easily submit a bug report or a
+ feature request through the
+ <a href="http://cgi.htdig.org/cgi-bin/htdig3">bug database</a>.
+ </p>
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/05/28 13:15:18 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_blocks.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_blocks.html
new file mode 100644
index 00000000..3fbe8b6f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_blocks.html
@@ -0,0 +1,58 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Configuration file format -- Block restrictions
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Configuration file format -- Restricting Attributes
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ Many attributes may be restricted in scope, specifically those
+ used by the <a href="htdig.html">htdig</a> indexer. These
+ attributes can be specified on a per-server or per-URL basis
+ and thus can be applied to only one site or even one
+ particular portion of a site. For example:
+ </p>
+ <dl compact>
+ <dd>
+ <code>
+ &lt;server: www.foo.com&gt;<br>
+ <a
+ href="attrs.html#server_wait_time">server_wait_time</a>: 5<br>
+ &lt;/server&gt;<br>
+ </code>
+ </dd>
+ </dl>
+ <p>
+ Here the portions inside the &lt;server:&gt; &lt;/server&gt;
+ <strong>block</strong> are normal attributes as specified in
+ the <a href="cf_general.html">general configuration</a>
+ documentation. However, rather than applying to all servers,
+ these attributes will apply <em>only</em> to the www.foo.com server.
+ </p>
+ <p>
+ It is also possible to have &lt;url:&gt; &lt/url&gt;
+ blocks. With these, any URL matching the pattern specified in
+ the block will use the attributes within, overriding any other
+ configuration.
+ </p>
+ <p>
+ Not all attributes apply within blocks. Those that do are
+ listed with the appropriate context in the <a href="attrs.html">attribute
+ documentation</a>.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname.html
new file mode 100644
index 00000000..3ee0ddf7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname.html
@@ -0,0 +1,269 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ cf_byname_head.html and cf_byname_tail.html -->
+
+<html>
+ <head>
+ <title>Alphabetical Attributes</title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Alphabetical Attributes
+ </h2>
+ <p>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a><br>
+ &nbsp;&nbsp;&nbsp;<img src="up.gif" alt="^" width=9 height=9> <a href="confmenu.html" target="contents">Configuration file</a>
+ </p>
+ <strong>A</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#accents_db">accents_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#accept_language">accept_language</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#add_anchors_to_excerpt">add_anchors_to_excerpt</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_double_slash">allow_double_slash</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_in_form">allow_in_form</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_numbers">allow_numbers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_space_in_url">allow_space_in_url</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_virtual_hosts">allow_virtual_hosts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#anchor_target">anchor_target</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#any_keywords">any_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#author_factor">author_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#authorization">authorization</a><br>
+ </font> <br>
+ <strong>B</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#backlink_factor">backlink_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_extensions">bad_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_local_extensions">bad_local_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_querystr">bad_querystr</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_word_list">bad_word_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bin_dir">bin_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#boolean_keywords">boolean_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#boolean_syntax_errors">boolean_syntax_errors</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#build_select_lists">build_select_lists</a><br>
+ </font> <br>
+ <strong>C</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#caps_factor">caps_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#case_sensitive">case_sensitive</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#check_unique_date">check_unique_date</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#check_unique_md5">check_unique_md5</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#collection_names">collection_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#common_dir">common_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#common_url_parts">common_url_parts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#compression_level">compression_level</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#config">config</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#config_dir">config_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#content_classifier">content_classifier</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#cookies_input_file">cookies_input_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#create_image_list">create_image_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#create_url_list">create_url_list</a><br>
+ </font> <br>
+ <strong>D</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#database_base">database_base</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#database_dir">database_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#date_factor">date_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#date_format">date_format</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#description_factor">description_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#description_meta_tag_names">description_meta_tag_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#disable_cookies">disable_cookies</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_db">doc_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_excerpt">doc_excerpt</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_index">doc_index</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_list">doc_list</a><br>
+ </font> <br>
+ <strong>E</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endday">endday</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#end_ellipses">end_ellipses</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#end_highlight">end_highlight</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_affix_file">endings_affix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_dictionary">endings_dictionary</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_root2word_db">endings_root2word_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_word2root_db">endings_word2root_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endmonth">endmonth</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endyear">endyear</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#excerpt_length">excerpt_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#excerpt_show_top">excerpt_show_top</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#exclude">exclude</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#exclude_urls">exclude_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#external_parsers">external_parsers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#external_protocols">external_protocols</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#extra_word_characters">extra_word_characters</a><br>
+ </font> <br>
+ <strong>H</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#head_before_get">head_before_get</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#heading_factor">heading_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_prefix_file">htnotify_prefix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_replyto">htnotify_replyto</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_sender">htnotify_sender</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_suffix_file">htnotify_suffix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_webmaster">htnotify_webmaster</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy">http_proxy</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy_authorization">http_proxy_authorization</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy_exclude">http_proxy_exclude</a><br>
+ </font> <br>
+ <strong>I</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#ignore_alt_text">ignore_alt_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#ignore_dead_servers">ignore_dead_servers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#image_list">image_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#image_url_prefix">image_url_prefix</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#include">include</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#iso_8601">iso_8601</a><br>
+ </font> <br>
+ <strong>K</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords">keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords_factor">keywords_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords_meta_tag_names">keywords_meta_tag_names</a><br>
+ </font> <br>
+ <strong>L</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#limit_normalized">limit_normalized</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#limit_urls_to">limit_urls_to</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_default_doc">local_default_doc</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_urls">local_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_urls_only">local_urls_only</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_user_urls">local_user_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#locale">locale</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#logging">logging</a><br>
+ </font> <br>
+ <strong>M</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maintainer">maintainer</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#match_method">match_method</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#matches_per_page">matches_per_page</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_connection_requests">max_connection_requests</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_description_length">max_description_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_descriptions">max_descriptions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_doc_size">max_doc_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_excerpts">max_excerpts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_head_length">max_head_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_hop_count">max_hop_count</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_keywords">max_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_meta_description_length">max_meta_description_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_prefix_matches">max_prefix_matches</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_retries">max_retries</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_stars">max_stars</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_page_buttons">maximum_page_buttons</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_pages">maximum_pages</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_word_length">maximum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#md5_db">md5_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#meta_description_factor">meta_description_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#metaphone_db">metaphone_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#method_names">method_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#mime_types">mime_types</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_prefix_length">minimum_prefix_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_speling_length">minimum_speling_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_word_length">minimum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#multimatch_factor">multimatch_factor</a><br>
+ </font> <br>
+ <strong>N</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#next_page_text">next_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_excerpt_show_top">no_excerpt_show_top</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_excerpt_text">no_excerpt_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_next_page_text">no_next_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_page_list_header">no_page_list_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_page_number_text">no_page_number_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_prev_page_text">no_prev_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_title_text">no_title_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#noindex_end">noindex_end</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#noindex_start">noindex_start</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#nothing_found_file">nothing_found_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#nph">nph</a><br>
+ </font> <br>
+ <strong>P</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_list_header">page_list_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_number_separator">page_number_separator</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_number_text">page_number_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#persistent_connections">persistent_connections</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#plural_suffix">plural_suffix</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#prefix_match_character">prefix_match_character</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#prev_page_text">prev_page_text</a><br>
+ </font> <br>
+ <strong>R</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#regex_max_words">regex_max_words</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_bad_urls">remove_bad_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_default_doc">remove_default_doc</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_unretrieved_urls">remove_unretrieved_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#restrict">restrict</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#robotstxt_name">robotstxt_name</a><br>
+ </font> <br>
+ <strong>S</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#script_name">script_name</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_algorithm">search_algorithm</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_contenttype">search_results_contenttype</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_footer">search_results_footer</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_header">search_results_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_order">search_results_order</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_wrapper">search_results_wrapper</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_rewrite_rules">search_rewrite_rules</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_aliases">server_aliases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_max_docs">server_max_docs</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_wait_time">server_wait_time</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort">sort</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort_names">sort_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#soundex_db">soundex_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_blank">star_blank</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_image">star_image</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_patterns">star_patterns</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startday">startday</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_ellipses">start_ellipses</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_highlight">start_highlight</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startmonth">startmonth</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_url">start_url</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startyear">startyear</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#store_phrases">store_phrases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#substring_max_words">substring_max_words</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#synonym_db">synonym_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#synonym_dictionary">synonym_dictionary</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#syntax_error_file">syntax_error_file</a><br>
+ </font> <br>
+ <strong>T</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#tcp_max_retries">tcp_max_retries</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#tcp_wait_time">tcp_wait_time</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_map">template_map</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_name">template_name</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_patterns">template_patterns</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#text_factor">text_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#timeout">timeout</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#title_factor">title_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#translate_latin1">translate_latin1</a><br>
+ </font> <br>
+ <strong>U</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_list">url_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_log">url_log</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_part_aliases">url_part_aliases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_rewrite_rules">url_rewrite_rules</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_seed_score">url_seed_score</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_text_factor">url_text_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_doc_date">use_doc_date</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_meta_description">use_meta_description</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_star_image">use_star_image</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#user_agent">user_agent</a><br>
+ </font> <br>
+ <strong>V</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#valid_extensions">valid_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#valid_punctuation">valid_punctuation</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#version">version</a><br>
+ </font> <br>
+ <strong>W</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_db">word_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_dump">word_dump</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_cache_inserts">wordlist_cache_inserts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_cache_size">wordlist_cache_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_compress">wordlist_compress</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_compress_zlib">wordlist_compress_zlib</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor">wordlist_monitor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor_period">wordlist_monitor_period</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor_output">wordlist_monitor_output</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_page_size">wordlist_page_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_verbose">wordlist_verbose</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_wordkey_description">wordlist_wordkey_description</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_wordrecord_description">wordlist_wordrecord_description</a><br>
+ </font>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_head.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_head.html
new file mode 100644
index 00000000..a058cc37
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_head.html
@@ -0,0 +1,20 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ cf_byname_head.html and cf_byname_tail.html -->
+
+<html>
+ <head>
+ <title>Alphabetical Attributes</title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Alphabetical Attributes
+ </h2>
+ <p>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a><br>
+ &nbsp;&nbsp;&nbsp;<img src="up.gif" alt="^" width=9 height=9> <a href="confmenu.html" target="contents">Configuration file</a>
+ </p>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_tail.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_tail.html
new file mode 100644
index 00000000..08ef06e8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byname_tail.html
@@ -0,0 +1,10 @@
+ </font>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog.html
new file mode 100644
index 00000000..20b91e07
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog.html
@@ -0,0 +1,260 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ cf_byprog_head.html and cf_byprog_tail.html -->
+
+<html>
+ <head>
+ <title>Attributes by Program</title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Attributes by Program
+ </h2>
+ <p>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a><br>
+ &nbsp;&nbsp;&nbsp;<img src="up.gif" alt="^" width=9 height=9> <a href="confmenu.html" target="contents">Configuration file</a>
+ </p>
+ <br><strong><a href="???.html" target="body">???</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_cache_inserts">wordlist_cache_inserts</a><br>
+ <br><strong><a href="all.html" target="body">all</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bin_dir">bin_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#common_dir">common_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#common_url_parts">common_url_parts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#config">config</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#config_dir">config_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#database_base">database_base</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#database_dir">database_dir</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_db">doc_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_excerpt">doc_excerpt</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#include">include</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_part_aliases">url_part_aliases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_db">word_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_cache_size">wordlist_cache_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_compress">wordlist_compress</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_compress_zlib">wordlist_compress_zlib</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor">wordlist_monitor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor_period">wordlist_monitor_period</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_monitor_output">wordlist_monitor_output</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_page_size">wordlist_page_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_wordkey_description">wordlist_wordkey_description</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#wordlist_wordrecord_description">wordlist_wordrecord_description</a><br>
+ <br><strong><a href="htdig.html" target="body">htdig</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#accept_language">accept_language</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_double_slash">allow_double_slash</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_numbers">allow_numbers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_space_in_url">allow_space_in_url</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_virtual_hosts">allow_virtual_hosts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#authorization">authorization</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_extensions">bad_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_local_extensions">bad_local_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_querystr">bad_querystr</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_word_list">bad_word_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#case_sensitive">case_sensitive</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#check_unique_date">check_unique_date</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#check_unique_md5">check_unique_md5</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#compression_level">compression_level</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#content_classifier">content_classifier</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#cookies_input_file">cookies_input_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#create_image_list">create_image_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#create_url_list">create_url_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#description_meta_tag_names">description_meta_tag_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#disable_cookies">disable_cookies</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_index">doc_index</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_list">doc_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#exclude_urls">exclude_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#external_parsers">external_parsers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#external_protocols">external_protocols</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#extra_word_characters">extra_word_characters</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#head_before_get">head_before_get</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy">http_proxy</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy_authorization">http_proxy_authorization</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#http_proxy_exclude">http_proxy_exclude</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#ignore_alt_text">ignore_alt_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#ignore_dead_servers">ignore_dead_servers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#image_list">image_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords_meta_tag_names">keywords_meta_tag_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#limit_normalized">limit_normalized</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#limit_urls_to">limit_urls_to</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_default_doc">local_default_doc</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_urls">local_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_urls_only">local_urls_only</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#local_user_urls">local_user_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#locale">locale</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maintainer">maintainer</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_connection_requests">max_connection_requests</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_description_length">max_description_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_descriptions">max_descriptions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_doc_size">max_doc_size</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_head_length">max_head_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_hop_count">max_hop_count</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_keywords">max_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_meta_description_length">max_meta_description_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_retries">max_retries</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_word_length">maximum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#md5_db">md5_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#mime_types">mime_types</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_word_length">minimum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#noindex_end">noindex_end</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#noindex_start">noindex_start</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#persistent_connections">persistent_connections</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_default_doc">remove_default_doc</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#robotstxt_name">robotstxt_name</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_aliases">server_aliases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_max_docs">server_max_docs</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_wait_time">server_wait_time</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_url">start_url</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#store_phrases">store_phrases</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#tcp_max_retries">tcp_max_retries</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#tcp_wait_time">tcp_wait_time</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#timeout">timeout</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#translate_latin1">translate_latin1</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_list">url_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_log">url_log</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_rewrite_rules">url_rewrite_rules</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_doc_date">use_doc_date</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#user_agent">user_agent</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#valid_extensions">valid_extensions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#valid_punctuation">valid_punctuation</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_dump">word_dump</a><br>
+ <br><strong><a href="htdump.html" target="body">htdump</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_list">doc_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_dump">word_dump</a><br>
+ <br><strong><a href="htfuzzy.html" target="body">htfuzzy</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#accents_db">accents_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_affix_file">endings_affix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_dictionary">endings_dictionary</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_root2word_db">endings_root2word_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_word2root_db">endings_word2root_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_word_length">maximum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#metaphone_db">metaphone_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#soundex_db">soundex_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#synonym_db">synonym_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#synonym_dictionary">synonym_dictionary</a><br>
+ <br><strong><a href="htload.html" target="body">htload</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#doc_list">doc_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#word_dump">word_dump</a><br>
+ <br><strong><a href="htnotify.html" target="body">htnotify</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_prefix_file">htnotify_prefix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_replyto">htnotify_replyto</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_sender">htnotify_sender</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_suffix_file">htnotify_suffix_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#htnotify_webmaster">htnotify_webmaster</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#iso_8601">iso_8601</a><br>
+ <br><strong><a href="htpurge.html" target="body">htpurge</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_bad_urls">remove_bad_urls</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#remove_unretrieved_urls">remove_unretrieved_urls</a><br>
+ <br><strong><a href="htsearch.html" target="_top">htsearch</a></strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#accents_db">accents_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#add_anchors_to_excerpt">add_anchors_to_excerpt</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_in_form">allow_in_form</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#allow_numbers">allow_numbers</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#anchor_target">anchor_target</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#any_keywords">any_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#author_factor">author_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#backlink_factor">backlink_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#bad_word_list">bad_word_list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#boolean_keywords">boolean_keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#boolean_syntax_errors">boolean_syntax_errors</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#build_select_lists">build_select_lists</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#caps_factor">caps_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#collection_names">collection_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#date_factor">date_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#date_format">date_format</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#description_factor">description_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endday">endday</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#end_ellipses">end_ellipses</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#end_highlight">end_highlight</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_root2word_db">endings_root2word_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endings_word2root_db">endings_word2root_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endmonth">endmonth</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#endyear">endyear</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#excerpt_length">excerpt_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#excerpt_show_top">excerpt_show_top</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#exclude">exclude</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#extra_word_characters">extra_word_characters</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#heading_factor">heading_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#image_url_prefix">image_url_prefix</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#iso_8601">iso_8601</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords">keywords</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#keywords_factor">keywords_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#logging">logging</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#match_method">match_method</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#matches_per_page">matches_per_page</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_excerpts">max_excerpts</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_prefix_matches">max_prefix_matches</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_stars">max_stars</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_page_buttons">maximum_page_buttons</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_pages">maximum_pages</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#maximum_word_length">maximum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#meta_description_factor">meta_description_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#metaphone_db">metaphone_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#method_names">method_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_prefix_length">minimum_prefix_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_speling_length">minimum_speling_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#minimum_word_length">minimum_word_length</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#multimatch_factor">multimatch_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#next_page_text">next_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_excerpt_show_top">no_excerpt_show_top</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_excerpt_text">no_excerpt_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_next_page_text">no_next_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_page_list_header">no_page_list_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_page_number_text">no_page_number_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_prev_page_text">no_prev_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#no_title_text">no_title_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#nothing_found_file">nothing_found_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#nph">nph</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_list_header">page_list_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_number_separator">page_number_separator</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#page_number_text">page_number_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#plural_suffix">plural_suffix</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#prefix_match_character">prefix_match_character</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#prev_page_text">prev_page_text</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#regex_max_words">regex_max_words</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#restrict">restrict</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#script_name">script_name</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_algorithm">search_algorithm</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_contenttype">search_results_contenttype</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_footer">search_results_footer</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_header">search_results_header</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_order">search_results_order</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_results_wrapper">search_results_wrapper</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_rewrite_rules">search_rewrite_rules</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort">sort</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort_names">sort_names</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#soundex_db">soundex_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_blank">star_blank</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_image">star_image</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#star_patterns">star_patterns</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startday">startday</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_ellipses">start_ellipses</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#start_highlight">start_highlight</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startmonth">startmonth</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#startyear">startyear</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#substring_max_words">substring_max_words</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#synonym_db">synonym_db</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#syntax_error_file">syntax_error_file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_map">template_map</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_name">template_name</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#template_patterns">template_patterns</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#text_factor">text_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#title_factor">title_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#translate_latin1">translate_latin1</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_seed_score">url_seed_score</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#url_text_factor">url_text_factor</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_meta_description">use_meta_description</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#use_star_image">use_star_image</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#valid_punctuation">valid_punctuation</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#version">version</a><br>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_head.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_head.html
new file mode 100644
index 00000000..87128902
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_head.html
@@ -0,0 +1,20 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
+<!-- WARNING: this file was generated using cf_generate.pl from
+ informations found in ../htcommon/defaults.cc and using
+ cf_byprog_head.html and cf_byprog_tail.html -->
+
+<html>
+ <head>
+ <title>Attributes by Program</title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Attributes by Program
+ </h2>
+ <p>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a><br>
+ &nbsp;&nbsp;&nbsp;<img src="up.gif" alt="^" width=9 height=9> <a href="confmenu.html" target="contents">Configuration file</a>
+ </p>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_tail.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_tail.html
new file mode 100644
index 00000000..eb4e8060
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_byprog_tail.html
@@ -0,0 +1,9 @@
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_general.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_general.html
new file mode 100644
index 00000000..838b8e89
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_general.html
@@ -0,0 +1,80 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Configuration file format -- General
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Configuration file format -- General
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ All programs in the ht://Dig system use a flexible configuration
+ file. This configuration file is a plain ASCII text file. Each line in
+ the file is either a comment or contains an attribute.
+ Comment lines are blank lines or lines that start with a '#'.
+ Attributes consist of a variable name and an associated
+ value:
+ </p>
+ <dl compact>
+ <dd>
+ <code>
+ &lt;name&gt;:&lt;whitespace&gt;&lt;value&gt;&lt;newline&gt;</code>
+ </dd>
+ </dl>
+ <p>
+ The &lt;name&gt; contains any alphanumeric character or
+ underline (_) The &lt;value&gt; can include any character
+ except newline. It also cannot start with spaces or tabs since
+ those are considered part of the whitespace after the colon. It
+ is important to keep in mind that any trailing spaces or tabs
+ will be included.
+ </p>
+ <p>
+ It is possible to split the &lt;value&gt; across several
+ lines of the configuration file by ending each line with a
+ backslash (\). The effect on the value is that a space is
+ added where the line split occurs.
+ </p>
+ <p>
+ Each program only extracts the attributes it needs and hence
+ it is often convenient to use the same configuration file with
+ all ht://Dig programs. If an attribute is specified more than
+ once in the file, only the last one will be used.
+ </p>
+ <p>
+ If a program needs a particular attribute and it is not in
+ the configuration file, it will use the default value which
+ is compiled into the program as defined in
+ <code>htcommon/defaults.cc</code>.
+ </p>
+ <p>
+ A configuration file can include another file, by using the special
+ &lt;name&gt;, <code>include</code>. The &lt;value&gt; is taken as
+ the file name of another configuration file to be read in at
+ this point. If the given file name is not fully qualified, it is
+ taken relative to the directory in which the current configuration
+ file is found. Variable expansion is permitted in the file name.
+ Multiple include statements, and nested includes are also permitted.
+ </p>
+ <dl>
+ <dt>
+ <em>Example:</em>
+ </dt>
+ <dd>
+ <code>include: common.conf</code>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_generate.pl b/debian/htdig/htdig-3.2.0b6/htdoc/cf_generate.pl
new file mode 100755
index 00000000..a3193feb
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_generate.pl
@@ -0,0 +1,289 @@
+#
+# cf_generate.pl
+#
+# cf_generate: Build the files cf_byprog.html, cf_byname.html and
+# attrs.html from the informations found
+# in ../htcommon/defaults.cc.
+# attrs.html : attrs_head.html + generation + attrs_tail.html
+# cf_byprog.html : cf_byprog_head.html + generation + cf_byprog_tail.html
+# cf_byname.html : cf_byname_head.html + generation + cf_byname_tail.html
+#
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License (LGPL) version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+# $Id: cf_generate.pl,v 1.9 2004/06/05 04:30:47 lha Exp $
+#
+use strict;
+
+use vars qw(%char2quote);
+
+%char2quote = (
+ '>' => '&gt;',
+ '<' => '&lt;',
+ '&' => '&amp;',
+ "'" => '&#39;',
+ '"' => '&quot;',
+ );
+
+sub html_escape {
+ my($toencode) = @_;
+
+ return undef if(!defined($toencode));
+
+ $toencode =~ s;([&\"<>\']);$char2quote{$1};ge;
+ return $toencode;
+}
+
+#
+# Read and parse attributes descriptions found in defaults.cc
+#
+
+my($dir);
+if (scalar(@ARGV) == 0) {
+ $dir = '..';
+}
+else {
+ $dir = @ARGV[0];
+}
+
+local($/) = undef;
+my($file) = $dir . "/htcommon/defaults.cc";
+my($content);
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+close(FILE);
+
+#
+# Change curly to square brackets to generate perl arrays instead
+# of hashes. Order is important.
+#
+$content =~ s/.*ConfigDefaults.*?\{(.*)\{0, 0.*/[$1]/s;
+$content =~ s/\s*\\*$//mg;
+$content =~ s/([\@\$])/\\$1/gs;
+$content =~ s/^\{/\[/mg;
+$content =~ s/^\"\s*\},$/\" \],/mg;
+#
+# Transform macro substituted strings by @strings@ (substitued by ../configure)
+# Three step process ( -> \@ string\@ -> \@string\@ -> @string@ )
+# as perl seems to get confused by @$2.
+#
+$content =~ s|^(\[ \"\w+\", )([A-Z].*?),\n|$1\"\\\@$2\\@\",\n|mg;
+#$content =~ s/^(\[ \"\w+\", )\"(.*?)\"(.*?)\"(.*?)\",\n/$1\"$2\\\"$3\\\"$4\",\n/mg;
+$content =~ s/BIN_DIR/bindir/g;
+my($config);
+eval "\$config = $content";
+
+if(!$config) {
+ die "could not extract any configuration info from $file";
+}
+
+#
+# Spit the HTML pages
+#
+
+my($file);
+#
+# Complete list of attributes with descriptions and examples.
+#
+$file = "attrs.html.in";
+open(ATTR, ">$file") or die "cannot open $file for writing : $!";
+
+$file = $dir . "/htdoc/attrs_head.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+print ATTR $content;
+close(FILE);
+
+#
+# Index by attribute name
+#
+$file = "cf_byname.html";
+open(BYNAME, ">$file") or die "cannot open $file for writing : $!";
+
+$file = $dir . "/htdoc/cf_byname_head.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+print BYNAME $content;
+close(FILE);
+
+my($letter) = '';
+my($record);
+foreach $record (@$config) {
+ my($name, $default, $type, $programs, $block, $version, $category, $example, $description) = @$record;
+
+ if($letter ne uc(substr($name, 0, 1))) {
+ print BYNAME "\t</font> <br>\n" if($letter);
+ $letter = uc(substr($name, 0, 1));
+ print BYNAME "\t<strong>$letter</strong> <font face=\"helvetica,arial\" size=\"2\"><br>\n";
+ }
+
+ print BYNAME "\t <img src=\"dot.gif\" alt=\"*\" width=9 height=9> <a target=\"body\" href=\"attrs.html#$name\">$name</a><br>\n";
+
+ my($used_by) = join(",\n\t\t\t",
+ map {
+ my($top) = $_ eq 'htsearch' ? " target=\"_top\"" : "";
+ "<a href=\"$_.html\"$top>$_</a>";
+ }
+ split(' ', $programs));
+
+ if ($block eq '') {
+ $block = "Global";
+ }
+
+ if($version != 'all') {
+ $version = "$version or later";
+ }
+
+ if(!($example =~ /^$name:/)) {
+ $example = "\t\t\t <tr> <td valign=\"top\"><em>No example provided</em></td> </tr>\n";
+ } elsif($example =~ /\A$name:\s*\Z/s) {
+ $example = "\t\t\t <tr> <td valign=\"top\">$name:</td> </tr>\n";
+ } else {
+ my($one);
+ my($html) = '';
+ foreach $one (split("$name:", $example)) {
+ next if($one =~ /^\s*$/);
+ $html .= <<EOF;
+ <tr>
+ <td valign="top">
+ $name:
+ </td>
+ <td nowrap>
+ $one
+ </td>
+ </tr>
+EOF
+ }
+ $example = $html;
+ }
+
+ if($default =~ /^\s*$/) {
+ $default = "<em>No default</em>";
+ } else {
+ $default =~ s/^([A-Z][A-Z_]*) \" (.*?)\"/$1 $2/; # for PDF_PARSER
+ $default = html_escape($default);
+ # hyperlink default values defined in terms of other attributes
+ $default =~ s/\${([A-Za-z_]*)}/\${<a href=\"#$1\">$1<\/a>}/;
+
+ }
+ print ATTR <<EOF;
+ <dl>
+ <dt>
+ <strong><a name="$name">
+ $name</a></strong>
+ </dt>
+ <dd>
+ <dl>
+ <dt>
+ <em>type:</em>
+ </dt>
+ <dd>
+ $type
+ </dd>
+ <dt>
+ <em>used by:</em>
+ </dt>
+ <dd>
+ $used_by
+ </dd>
+ <dt>
+ <em>default:</em>
+ </dt>
+ <dd>
+ $default
+ </dd>
+ <dt>
+ <em>block:</em>
+ </dt>
+ <dd>
+ $block
+ </dd>
+ <dt>
+ <em>version:</em>
+ </dt>
+ <dd>
+ $version
+ </dd>
+ <dt>
+ <em>description:</em>
+ </dt>
+ <dd>$description </dd>
+ <dt>
+ <em>example:</em>
+ </dt>
+ <dd>
+ <table border="0">
+$example </table>
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <hr>
+EOF
+
+}
+
+open(FILE, "date |") or die "cannot open pipe to date command for reading : $!";
+$content = <FILE>;
+close(FILE);
+my($date) = $content;
+
+my($file) = $dir . "/htdoc/attrs_tail.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+$content =~ s/Last modified: [^\n]*\n/Last modified: $date/;
+print ATTR $content;
+close(FILE);
+
+my($file) = $dir . "/htdoc/cf_byname_tail.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+print BYNAME $content;
+close(FILE);
+
+close(ATTR);
+close(BYNAME);
+
+#
+# Index by program name
+#
+$file = "cf_byprog.html";
+open(BYPROG, ">$file") or die "cannot open $file for writing : $!";
+
+$file = $dir . "/htdoc/cf_byprog_head.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+print BYPROG $content;
+close(FILE);
+
+my(%prog2attr);
+foreach $record (@$config) {
+ my($name, $default, $type, $programs, $example, $description) = @$record;
+
+ my($prog);
+ foreach $prog (split(' ', $programs)) {
+ push(@{$prog2attr{$prog}}, $record);
+ }
+}
+
+my($prog);
+foreach $prog (sort(keys(%prog2attr))) {
+ my($top) = $prog eq 'htsearch' ? "target=\"_top\"" : "target=\"body\"";
+ print BYPROG "\t<br><strong><a href=\"$prog.html\" $top>$prog</a></strong> <font face=\"helvetica,arial\" size=\"2\"><br>\n";
+ my($record);
+ foreach $record (@{$prog2attr{$prog}}) {
+ my($name, $default, $type, $programs, $example, $description) = @$record;
+ print BYPROG "\t <img src=\"dot.gif\" alt=\"*\" width=9 height=9> <a target=\"body\" href=\"attrs.html#$name\">$name</a><br>\n";
+ }
+}
+
+my($file) = $dir . "/htdoc/cf_byprog_tail.html";
+open(FILE, "<$file") or die "cannot open $file for reading : $!";
+$content = <FILE>;
+print BYPROG $content;
+close(FILE);
+
+close(BYPROG);
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_types.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_types.html
new file mode 100644
index 00000000..006a6d90
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_types.html
@@ -0,0 +1,103 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Configuration file format -- Attribute Types
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Configuration file format -- Attribute Types
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ The meaning of the attribute
+ value depends on the attribute itself. In general, the values
+ can be of several types:
+ </p>
+ <dl>
+ <dt>
+ <strong>String</strong>
+ </dt>
+ <dd>
+ Any string of characters except newline.
+ </dd>
+ <dt>
+ <strong>String List</strong>
+ </dt>
+ <dd>
+ A sequence of strings separated by whitespace. Individual
+ strings within the list cannot be quoted and therefore
+ cannot contain whitespace.
+ </dd>
+ <dt>
+ <strong>Quoted String List</strong>
+ </dt>
+ <dd>
+ A sequence of strings separated by whitespace. Individual
+ strings within the list may be quoted using single or double
+ quotes. The quotes are needed when the individual strings
+ contain whitespace. If you want a quote mark or a backslash
+ to be inserted as-is into a string, you must preceed it with
+ a backslash.
+ </dd>
+ <dt>
+ <strong>Pattern List</strong>
+ </dt>
+ <dd>
+ A sequence of patterns separated by whitespace. Normal
+ strings are considered patterns to be matched
+ exactly. Strings surrounded by [ and ] are considered
+ regular expressions (ignoring the outer [] characters).
+ </dd>
+ <dt>
+ <strong>Number</strong>
+ </dt>
+ <dd>
+ A string that represents a number. The attribute determines
+ if the number has to be a pure integer or if it can be a
+ floating point number.
+ </dd>
+ <dt>
+ <strong>Boolean</strong>
+ </dt>
+ <dd>
+ A string that represents a truth value. Acceptable truth
+ values are given below.<br>
+ For <em>true</em>:
+ <ul>
+ <li>
+ yes
+ </li>
+ <li>
+ true
+ </li>
+ <li>
+ 1
+ </li>
+ </ul>
+ For <em>false</em>:
+ <ul>
+ <li>
+ no
+ </li>
+ <li>
+ false
+ </li>
+ <li>
+ 0
+ </li>
+ </ul>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/cf_variables.html b/debian/htdig/htdig-3.2.0b6/htdoc/cf_variables.html
new file mode 100644
index 00000000..678c12c9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/cf_variables.html
@@ -0,0 +1,69 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Configuration file format -- Variable Expansion
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Configuration file format -- Variable Expansion
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ Attribute values can contain
+ references to other attributes. This is done similarly to how
+ Perl, Bourne shell, and Makefiles use variables. The variables
+ that are referenced will be expanded when the variable is used,
+ not when it is defined. This means that attributes don't have
+ to be defined in the order that they are used.
+ </p>
+ <p>
+ Example attribute reference in an attribute value:
+ </p>
+ <blockquote>
+ database_base: ${database_dir}/htdig
+ </blockquote>
+ <p>
+ or
+ </p>
+ <blockquote>
+ database_base: $database_dir/htdig
+ </blockquote>
+ <p>
+ Many default values use the attribute expansion mechanism to
+ make configuration easier and more generic.
+ </p>
+ <p>
+ In addition to variable expansion, it is also possible to
+ include the contents of a file as the value of an attribute.
+ This is done by putting the filename in backquotes (`).
+ Within the backquotes, variable expansion will still work so
+ that files can be specified relative to some path. The
+ backquotes and the filename will be replaced by the contents
+ of the file if it exists. All whitespace (spaces, tabs,
+ newlines, etc) in the file are condensed to one space before
+ the substitution takes place. The following are some examples
+ of the use of the backquoting scheme:
+ </p>
+ <blockquote>
+ <table>
+ <tr>
+ <td nowrap>
+ start_url: `${common_dir}/starting_points`<br>
+ limit_urls_to: `${common_dir}/limit_list`
+ </td>
+ </tr>
+ </table>
+ </blockquote>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/config.html b/debian/htdig/htdig-3.2.0b6/htdoc/config.html
new file mode 100644
index 00000000..a732ef2c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/config.html
@@ -0,0 +1,509 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Configuration
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Configuration
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ ht://Dig requires a configuration file and several HTML files
+ to operate correctly. Fortunately, when ht://Dig is
+ installed, a very reasonable configuration is created and in
+ most cases only minor modifications to the files are
+ necessary.
+ </p>
+ <p>
+ Below, we will use the variables that were set in CONFIG to
+ designate specific paths.
+ </p>
+ <p>
+ Standard files:
+ </p>
+ <ul>
+ <li>
+ <a href="#htdig.conf">${CONFIG_DIR}/htdig.conf</a>
+ </li>
+ <li>
+ <a href="#search.html">${SEARCH_DIR}/search.html</a>
+ </li>
+ <li>
+ <a href="#header.html">${COMMON_DIR}/header.html</a>
+ </li>
+ <li>
+ <a href="#footer.html">${COMMON_DIR}/footer.html</a>
+ </li>
+ <li>
+ <a href="#wrapper.html">${COMMON_DIR}/wrapper.html</a>
+ </li>
+ <li>
+ <a href="#nomatch.html">${COMMON_DIR}/nomatch.html</a>
+ </li>
+ <li>
+ <a href="#syntax.html">${COMMON_DIR}/syntax.html</a>
+ </li>
+ </ul>
+ <hr noshade>
+ <h2>
+ <a name="#htdig.conf">${CONFIG_DIR}/htdig.conf</a>
+ </h2>
+ <p>
+ This is the main runtime configuration file for all programs
+ that make up ht://Dig. The file is fully described in the
+ <a href="confindex.html" target="_top">Configuration file
+ manual</a>.
+ </p>
+ <p>
+ When ht://Dig is installed, several attributes will be
+ customized to your particular environment, but for reference,
+ here is a sample copy of what it can look like:
+ </p>
+<pre>
+<font size="-1">
+#
+# Example config file for ht://Dig.
+#
+# This configuration file is used by all the programs that make up ht://Dig.
+# Please refer to the attribute reference manual for more details on what
+# can be put into this file. (http://www.htdig.org/confindex.html)
+# Note that most attributes have very reasonable default values so you
+# really only have to add attributes here if you want to change the defaults.
+#
+# What follows are some of the common attributes you might want to change.
+#
+
+#
+# Specify where the database files need to go. Make sure that there is
+# plenty of free disk space available for the databases. They can get
+# pretty big.
+#
+database_dir: /opt/www/htdig/db
+
+#
+# This specifies the URL where the robot (htdig) will start. You can specify
+# multiple URLs here. Just separate them by some whitespace.
+# The example here will cause the ht://Dig homepage and related pages to be
+# indexed.
+# You could also index all the URLs in a file like so:
+# start_url: `${common_dir}/start.url`
+#
+start_url: http://www.htdig.org/
+
+#
+# This attribute limits the scope of the indexing process. The default is to
+# set it to the same as the start_url above. This way only pages that are on
+# the sites specified in the start_url attribute will be indexed and it will
+# reject any URLs that go outside of those sites.
+#
+# Keep in mind that the value for this attribute is just a list of string
+# patterns. As long as URLs contain at least one of the patterns it will be
+# seen as part of the scope of the index.
+#
+limit_urls_to: ${start_url}
+
+#
+# If there are particular pages that you definitely do NOT want to index, you
+# can use the exclude_urls attribute. The value is a list of string patterns.
+# If a URL matches any of the patterns, it will NOT be indexed. This is
+# useful to exclude things like virtual web trees or database accesses. By
+# default, all CGI URLs will be excluded. (Note that the /cgi-bin/ convention
+# may not work on your web server. Check the path prefix used on your web
+# server.)
+#
+exclude_urls: /cgi-bin/ .cgi
+
+#
+# Since ht://Dig does not (and cannot) parse every document type, this
+# attribute is a list of strings (extensions) that will be ignored during
+# indexing. These are *only* checked at the end of a URL, whereas
+# exclude_url patterns are matched anywhere.
+#
+# Also keep in mind that while other attributes allow regex, these must be
+# actual strings.
+#
+bad_extensions: .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif \
+ .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css
+
+#
+# The string htdig will send in every request to identify the robot. Change
+# this to your email address.
+#
+maintainer: unconfigured@htdig.searchengine.maintainer
+
+#
+# The excerpts that are displayed in long results rely on stored information
+# in the index databases. The compiled default only stores 512 characters of
+# text from each document (this excludes any HTML markup...) If you plan on
+# using the excerpts you probably want to make this larger. The only concern
+# here is that more disk space is going to be needed to store the additional
+# information. Since disk space is cheap (! :-)) you might want to set this
+# to a value so that a large percentage of the documents that you are going
+# to be indexing are stored completely in the database. At SDSU we found
+# that by setting this value to about 50k the index would get 97% of all
+# documents completely and only 3% was cut off at 50k. You probably want to
+# experiment with this value.
+# Note that if you want to set this value low, you probably want to set the
+# excerpt_show_top attribute to false so that the top excerpt_length characters
+# of the document are always shown.
+#
+max_head_length: 10000
+
+#
+# To limit network connections, ht://Dig will only pull up to a certain limit
+# of bytes. This prevents the indexing from dying because the server keeps
+# sending information. However, several FAQs happen because people have files
+# bigger than the default limit of 100KB. This sets the default a bit higher.
+# (see &lt;http://www.htdig.org/FAQ.html&gt; for more)
+#
+max_doc_size: 200000
+
+#
+# Most people expect some sort of excerpt in results. By default, if the
+# search words aren't found in context in the stored excerpt, htsearch shows
+# the text defined in the no_excerpt_text attribute:
+# (None of the search words were found in the top of this document.)
+# This attribute instead will show the top of the excerpt.
+#
+no_excerpt_show_top: true
+
+#
+# Depending on your needs, you might want to enable some of the fuzzy search
+# algorithms. There are several to choose from and you can use them in any
+# combination you feel comfortable with. Each algorithm will get a weight
+# assigned to it so that in combinations of algorithms, certain algorithms get
+# preference over others. Note that the weights only affect the ranking of
+# the results, not the actual searching.
+# The available algorithms are:
+# accents
+# exact
+# endings
+# metaphone
+# prefix
+# regex
+# soundex
+# speling [sic]
+# substring
+# synonyms
+# By default only the "exact" algorithm is used with weight 1.
+# Note that if you are going to use the endings, metaphone, soundex, accents,
+# or synonyms algorithms, you will need to run htfuzzy to generate
+# the databases they use.
+#
+search_algorithm: exact:1 synonyms:0.5 endings:0.1
+
+#
+# The following are the templates used in the builtin search results
+# The default is to use compiled versions of these files, which produces
+# slightly faster results. However, uncommenting these lines makes it
+# very easy to change the format of search results.
+# See &lt;http://www.htdig.org/hts_templates.html&gt; for more details.
+#
+# template_map: Long long ${common_dir}/long.html \
+# Short short ${common_dir}/short.html
+# template_name: long
+
+#
+# The following are used to change the text for the page index.
+# The defaults are just boring text numbers. These images spice
+# up the result pages quite a bit. (Feel free to do whatever, though)
+#
+next_page_text: &lt;img src="/htdig/buttonr.gif" border="0" align="middle" width="30" height="30" alt="next"&gt;
+no_next_page_text:
+prev_page_text: &lt;img src="/htdig/buttonl.gif" border="0" align="middle" width="30" height="30" alt="prev"&gt;
+no_prev_page_text:
+page_number_text: '&lt;img src="/htdig/button1.gif" border="0" align="middle" width="30" height="30" alt="1"&gt;' \
+ '&lt;img src="/htdig/button2.gif" border="0" align="middle" width="30" height="30" alt="2"&gt;' \
+ '&lt;img src="/htdig/button3.gif" border="0" align="middle" width="30" height="30" alt="3"&gt;' \
+ '&lt;img src="/htdig/button4.gif" border="0" align="middle" width="30" height="30" alt="4"&gt;' \
+ '&lt;img src="/htdig/button5.gif" border="0" align="middle" width="30" height="30" alt="5"&gt;' \
+ '&lt;img src="/htdig/button6.gif" border="0" align="middle" width="30" height="30" alt="6"&gt;' \
+ '&lt;img src="/htdig/button7.gif" border="0" align="middle" width="30" height="30" alt="7"&gt;' \
+ '&lt;img src="/htdig/button8.gif" border="0" align="middle" width="30" height="30" alt="8"&gt;' \
+ '&lt;img src="/htdig/button9.gif" border="0" align="middle" width="30" height="30" alt="9"&gt;' \
+ '&lt;img src="/htdig/button10.gif" border="0" align="middle" width="30" height="30" alt="10"&gt;'
+#
+# To make the current page stand out, we will put a border around the
+# image for that page.
+#
+no_page_number_text: '&lt;img src="/htdig/button1.gif" border="2" align="middle" width="30" height="30" alt="1"&gt;' \
+ '&lt;img src="/htdig/button2.gif" border="2" align="middle" width="30" height="30" alt="2"&gt;' \
+ '&lt;img src="/htdig/button3.gif" border="2" align="middle" width="30" height="30" alt="3"&gt;' \
+ '&lt;img src="/htdig/button4.gif" border="2" align="middle" width="30" height="30" alt="4"&gt;' \
+ '&lt;img src="/htdig/button5.gif" border="2" align="middle" width="30" height="30" alt="5"&gt;' \
+ '&lt;img src="/htdig/button6.gif" border="2" align="middle" width="30" height="30" alt="6"&gt;' \
+ '&lt;img src="/htdig/button7.gif" border="2" align="middle" width="30" height="30" alt="7"&gt;' \
+ '&lt;img src="/htdig/button8.gif" border="2" align="middle" width="30" height="30" alt="8"&gt;' \
+ '&lt;img src="/htdig/button9.gif" border="2" align="middle" width="30" height="30" alt="9"&gt;' \
+ '&lt;img src="/htdig/button10.gif" border="2" align="middle" width="30" height="30" alt="10"&gt;'
+</font>
+</pre>
+ <hr noshade>
+ <h2>
+ <a name="search.html">${SEARCH_DIR}/search.html</a>
+ </h2>
+ <p>
+ This is the default search form. It is an example interface to
+ the search engine, htsearch. The file contains a form with as
+ its action a call to htsearch. There are several form variables
+ which htsearch will use. More about those can be found in the
+ <a href="htsearch.html" target="_top">htsearch
+ documentation</a>.
+ </p>
+ <p>
+ An example file can be as follows:
+ </p>
+<pre>
+<font size="-1">
+&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"&gt;
+&lt;html&gt;
+&lt;head&gt;
+&lt;title&gt;ht://Dig WWW Search&lt;/title&gt;
+&lt;/head&gt;
+&lt;body bgcolor="#eef7ff"&gt;
+&lt;h1&gt;
+&lt;a href="http://www.htdig.org"&gt;&lt;IMG SRC="/htdig/htdig.gif" align="bottom" alt="ht://Dig" border="0"&gt;&lt;/a&gt;
+WWW Site Search&lt;/h1&gt;
+&lt;hr noshade size="4"&gt;
+This search will allow you to search the contents of
+all the publicly available WWW documents at this site.
+&lt;br&gt;
+&lt;p&gt;
+&lt;form method="post" action="/cgi-bin/htsearch"&gt;
+&lt;font size="-1"&gt;
+Match: &lt;select name="method"&gt;
+&lt;option value="and"&gt;All
+&lt;option value="or"&gt;Any
+&lt;option value="boolean"&gt;Boolean
+&lt;/select&gt;
+Format: &lt;select name="format"&gt;
+&lt;option value="builtin-long"&gt;Long
+&lt;option value="builtin-short"&gt;Short
+&lt;/select&gt;
+Sort by: &lt;select name="sort"&gt;
+&lt;option value="score"&gt;Score
+&lt;option value="time"&gt;Time
+&lt;option value="title"&gt;Title
+&lt;option value="revscore"&gt;Reverse Score
+&lt;option value="revtime"&gt;Reverse Time
+&lt;option value="revtitle"&gt;Reverse Title
+&lt;/select&gt;
+&lt;/font&gt;
+&lt;input type="hidden" name="config" value="htdig"&gt;
+&lt;input type="hidden" name="restrict" value=""&gt;
+&lt;input type="hidden" name="exclude" value=""&gt;
+&lt;br&gt;
+Search:
+&lt;input type="text" size="30" name="words" value=""&gt;
+&lt;input type="submit" value="Search"&gt;
+&lt;/form&gt;
+&lt;hr noshade size="4"&gt;
+&lt;/body&gt;
+&lt;/html&gt;
+
+</font>
+</pre>
+ <hr noshade>
+ <h2>
+ <a name="header.html">${COMMON_DIR}/header.html</a>
+ </h2>
+ <p>
+ This file is the file that is output before any of the search
+ results are produced in a search. This file can be customized
+ to reflect your particular web look-and-feel, for example. Take
+ note that this file is only the top part of the full HTML
+ document that is produced when search results are displayed.
+ This means that it should start with the proper HTML
+ introductory tags and title.
+ </p>
+ <p>
+ This file will not just simply be copied. Instead, the search
+ engine will look for special variables inside the file. These
+ variables will be replaced with the appropriate values for
+ the particular search it is used for. For more details of the
+ use of these variables, consult the
+ <a href="hts_templates.html">htsearch templates</a> documentation.
+ </p>
+ <p>
+ Below is the default header.html file that gets installed.
+ Note that it contains a form to allow the user to refine the
+ search.
+ </p>
+<pre>
+<font size="-1">
+&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"&gt;
+&lt;html&gt;&lt;head&gt;&lt;title&gt;Search results for '$&(WORDS)'&lt;/title&gt;&lt;/head&gt;
+&lt;body bgcolor="#eef7ff"&gt;
+&lt;h2&gt;&lt;img src="/htdig/htdig.gif" alt="ht://Dig"&gt;
+Search results for '$&(LOGICAL_WORDS)'&lt;/h2&gt;
+&lt;hr noshade size="4"&gt;
+&lt;form method="get" action="$(CGI)"&gt;
+&lt;font size="-1"&gt;
+&lt;input type="hidden" name="config" value="$&(CONFIG)"&gt;
+&lt;input type="hidden" name="restrict" value="$&(RESTRICT)"&gt;
+&lt;input type="hidden" name="exclude" value="$&(EXCLUDE)"&gt;
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+&lt;br&gt;
+Refine search:
+&lt;input type="text" size="30" name="words" value="$&(WORDS)"&gt;
+&lt;input type="submit" value="Search"&gt;
+&lt;/font&gt;
+&lt;/form&gt;
+&lt;hr noshade size="1"&gt;
+&lt;strong&gt;Documents $(FIRSTDISPLAYED) - $(LASTDISPLAYED) of $(MATCHES) matches.
+More &lt;img src="/htdig/star.gif" alt="*"&gt;'s indicate a better match.
+&lt;/strong&gt;
+&lt;hr noshade size="1"&gt;
+</font>
+</pre>
+ <hr noshade>
+ <h2>
+ <a name="footer.html">${COMMON_DIR}/footer.html</a>
+ </h2>
+ <p>
+ This file is output after all the search results have been
+ displayed. All the same header.html rules apply to this file,
+ except that it is supposed to contain all the ending HTML tags.
+ </p>
+ <p>
+ Below is the default footer.html file that gets installed.
+ Note that it contains the page navigation stuff.
+ </p>
+<pre>
+<font size="-1">
+$(PAGEHEADER)
+$(PREVPAGE) $(PAGELIST) $(NEXTPAGE)
+&lt;hr noshade size="4"&gt;
+&lt;a href="http://www.htdig.org/"&gt;
+&lt;img src="/htdig/htdig.gif" border="0" alt="ht://Dig"&gt;ht://Dig $(VERSION)&lt;/a&gt;
+&lt;/body&gt;&lt;/html&gt;
+</font>
+</pre>
+ <hr noshade>
+ <h2>
+ <a name="wrapper.html">${COMMON_DIR}/wrapper.html</a>
+ </h2>
+ <p>
+ This file may be used in place of the header.html and footer.html
+ files above. It is simply the concatenation of these two files,
+ with the pseudo-variable <strong>$(HTSEARCH_RESULTS)</strong> as
+ a separator for the header and footer sections.
+ All the same header.html and footer.html rules apply to this file.
+ To make this file override the header and footer files above, you
+ must define the <a href="attrs.html#search_results_wrapper">
+ search_results_wrapper</a> attribute.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="nomatch.html">${COMMON_DIR}/nomatch.html</a>
+ </h2>
+ <p>
+ If a search produces no matches, this file is displayed. All
+ the relevant variables will be replaced as in the header.html
+ and footer.html files. The default nomatch.html is little more
+ than header.html and footer.html appended:
+ </p>
+<pre>
+<font size="-1">
+&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"&gt;
+&lt;html&gt;&lt;head&gt;&lt;title&gt;No match for '$&(LOGICAL_WORDS)'&lt;/title&gt;&lt;/head&gt;
+&lt;body bgcolor="#eef7ff"&gt;
+&lt;h1&gt;&lt;img src="/htdig/htdig.gif" alt="ht://Dig"&gt;
+Search results&lt;/h1&gt;
+&lt;hr noshade size="4"&gt;
+&lt;h2&gt;No matches were found for '$&(LOGICAL_WORDS)'&lt;/h2&gt;
+&lt;p&gt;
+Check the spelling of the search word(s) you used.
+If the spelling is correct and you only used one word,
+try using one or more similar search words with "&lt;strong&gt;Any&lt;/strong&gt;."
+&lt;/p&gt;&lt;p&gt;
+If the spelling is correct and you used more than one
+word with "&lt;strong&gt;Any&lt;/strong&gt;," try using one or more similar search
+words with "&lt;strong&gt;Any&lt;/strong&gt;."&lt;/p&gt;&lt;p&gt;
+If the spelling is correct and you used more than one
+word with "&lt;strong&gt;All&lt;/strong&gt;," try using one or more of the same words
+with "&lt;strong&gt;Any&lt;/strong&gt;."&lt;/p&gt;
+&lt;hr noshade size="4"&gt;
+&lt;form method="get" action="$(CGI)"&gt;
+&lt;font size="-1"&gt;
+&lt;input type="hidden" name="config" value="$&(CONFIG)"&gt;
+&lt;input type="hidden" name="restrict" value="$&(RESTRICT)"&gt;
+&lt;input type="hidden" name="exclude" value="$&(EXCLUDE)"&gt;
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+&lt;br&gt;
+Refine search:
+&lt;input type="text" size="30" name="words" value="$&(WORDS)"&gt;
+&lt;input type="submit" value="Search"&gt;
+&lt;/font&gt;
+&lt;/form&gt;
+&lt;hr noshade size="4"&gt;
+&lt;a href="http://www.htdig.org/"&gt;
+&lt;img src="/htdig/htdig.gif" border="0" alt="ht://Dig"&gt;ht://Dig $(VERSION)&lt;/a&gt;
+&lt;/body&gt;&lt;/html&gt;
+</font>
+</pre>
+ <hr noshade>
+ <h2>
+ <a name="syntax.html">${COMMON_DIR}/syntax.html</a>
+ </h2>
+ <p>
+ If a boolean expression search causes a syntax error, this file
+ will be displayed.
+ </p>
+<pre>
+<font size="-1">
+&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"&gt;
+&lt;html&gt;&lt;head&gt;&lt;title&gt;Error in Boolean search for '$&(WORDS)'&lt;/title&gt;&lt;/head&gt;
+&lt;body bgcolor="#eef7ff"&gt;
+&lt;h1&gt;&lt;img src="/htdig/htdig.gif" alt="ht://Dig"&gt;
+Error in Boolean search for '$&(LOGICAL_WORDS)'&lt;/h1&gt;
+&lt;hr noshade size="4"&gt;
+Boolean expressions need to be 'correct' in order for the search
+system to use them.
+The expression you entered has errors in it.&lt;p&gt;
+Examples of correct expressions are: &lt;strong&gt;cat and dog&lt;/strong&gt;, &lt;strong&gt;cat
+not dog&lt;/strong&gt;, &lt;strong&gt;cat or (dog not nose)&lt;/strong&gt;.&lt;br&gt;Note that
+the operator &lt;strong&gt;not&lt;/strong&gt; has the meaning of 'without'.
+&lt;blockquote&gt;&lt;strong&gt;
+$(SYNTAXERROR)
+&lt;/strong&gt;&lt;/blockquote&gt;
+&lt;hr noshade size="4"&gt;
+&lt;form method="get" action="$(CGI)"&gt;
+&lt;font size="-1"&gt;
+&lt;input type="hidden" name="config" value="$&(CONFIG)"&gt;
+&lt;input type="hidden" name="restrict" value="$&(RESTRICT)"&gt;
+&lt;input type="hidden" name="exclude" value="$&(EXCLUDE)"&gt;
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort: $(SORT)
+&lt;br&gt;
+Refine search:
+&lt;input type="text" size="30" name="words" value="$&(WORDS)"&gt;
+&lt;input type="submit" value="Search"&gt;
+&lt;/font&gt;
+&lt;/form&gt;
+&lt;hr noshade size="4"&gt;
+&lt;a href="http://www.htdig.org/"&gt;
+&lt;img src="/htdig/htdig.gif" border="0" alt="ht://Dig"&gt;ht://Dig $(VERSION)&lt;/a&gt;
+&lt;/body&gt;&lt;/html&gt;
+
+</font>
+</pre>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/confindex.html b/debian/htdig/htdig-3.2.0b6/htdoc/confindex.html
new file mode 100644
index 00000000..ce6311aa
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/confindex.html
@@ -0,0 +1,12 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig -- Internet search engine software
+ </title>
+ </head>
+ <frameset cols="212, *" frameborder="1" framespacing="5" border="1">
+ <frame name="contents" src="confmenu.html">
+ <frame name="body" src="cf_general.html">
+ </frameset>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/confmenu.html b/debian/htdig/htdig-3.2.0b6/htdoc/confmenu.html
new file mode 100644
index 00000000..37bc3476
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/confmenu.html
@@ -0,0 +1,34 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ Configuration file
+ </title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Configuration file
+ </h2>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a> <br>
+ <br>
+ <strong>File format</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="cf_general.html" target="body">General</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="cf_blocks.html" target="body">Block restrictions</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="cf_types.html" target="body">Attribute types</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="cf_variables.html" target="body">Variable expansion</a><br>
+ </font><br>
+ <strong>Attributes</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="cf_byprog.html" target="contents">By program</a><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="cf_byname.html" target="contents">Alphabetical</a><br>
+ </font>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/contents.html b/debian/htdig/htdig-3.2.0b6/htdoc/contents.html
new file mode 100644
index 00000000..7ec9e315
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/contents.html
@@ -0,0 +1,59 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig Table of Contents
+ </title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ Contents
+ </h2>
+ <strong>General</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="main.html" target="body">ht://Dig</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="require.html" target="body">Features and Requirements</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="where.html" target="body">Where to get it</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="install.html" target="body">Installation</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="config.html" target="body">Configuration</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="running.html" target="body">Running ht://Dig</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="FAQ.html" target="body">FAQ</a><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="mailarchive.html" target="_top">Mailing list</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="uses.html" target="body">Uses of ht://Dig</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="COPYING" target="body">License information</a><br>
+ </font> <br>
+ <strong>Reference</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="all.html" target="body">Overview</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="rundig.html" target="body">rundig</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htdig.html" target="body">htdig</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htmerge.html" target="body">htmerge</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htnotify.html" target="body">htnotify</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htdump.html" target="body">htdump</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htload.html" target="body">htload</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htpurge.html" target="body">htpurge</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htstat.html" target="body">htstat</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="htfuzzy.html" target="body">htfuzzy</a><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="htsearch.html" target="_top">htsearch</a><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="confindex.html" target="_top">Configuration file</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="meta.html" target="body">META tags</a><br>
+ </font> <br>
+ <strong>Other</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="THANKS.html" target="body">Contributors</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="RELEASE.html" target="body">Release notes</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="ChangeLog" target="body">ChangeLog</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="TODO.html" target="body">TODO</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="bugs.html" target="body">Bug Reporting</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/rel_projects.html" target="body">Related Projects</a><br>
+ <img src="triangle.gif" alt="&gt;" width=9 height=9> <a href="http://www.htdig.org/contrib/" target="_top">Contributed Work</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://sourceforge.net/projects/htdig/" target="_top">Developer Site</a><br>
+ </font><br>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ <input type=hidden name=config value=htdig>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/dot.gif b/debian/htdig/htdig-3.2.0b6/htdoc/dot.gif
new file mode 100644
index 00000000..3ea026b7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/dot.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htdig.gif b/debian/htdig/htdig-3.2.0b6/htdoc/htdig.gif
new file mode 100644
index 00000000..35443fb6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htdig.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htdig.html b/debian/htdig/htdig-3.2.0b6/htdoc/htdig.html
new file mode 100644
index 00000000..0416c90b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htdig.html
@@ -0,0 +1,256 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htdig
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htdig
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htdig [<em>options</em>] [<em>start_url_file</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htdig retrieves HTML documents using the HTTP protocol and
+ gathers information from these documents which can later be
+ used to search these documents. This program can be
+ referred to as the search robot.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htdig to append <em>
+ .work</em> to database files, causing a second copy of
+ the database to be built. This allows the original
+ files to be used by htsearch during the indexing run. When
+ used without the "-i" flag for an update dig, htdig will
+ use any existing .work files for the databases to update.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <em>configfile</em> file instead of the
+ default.
+ </dd>
+ <dt>
+ -h <em>maxhops</em>
+ </dt>
+ <dd>
+ Restrict the dig to documents that are at most <em>
+ maxhops</em> links away from the starting document.
+ </dd>
+ <dt>
+ -i
+ </dt>
+ <dd>
+ Initial. Do not use any old databases. This is
+ accomplished by first erasing the databases.
+ </dd>
+ <dt>
+ -m <em>url_file</em>
+ </dt>
+ <dd>
+ Minimal. Index only the URLs listed in
+ <em>url_file</em> and no others.
+ A file name of "-" reads from STDIN.
+ See also the <em>start_url_file</em> argument.
+ </dd>
+ <dt>
+ -s
+ </dt>
+ <dd>
+ Print statistics about the dig after completion.
+ </dd>
+ <dt>
+ -t
+ </dt>
+ <dd>
+ Create an ASCII version of the document database. This
+ database is easy to parse with other programs so that
+ information can be extracted from it for purposes other
+ than searching. One could gather some interesting
+ statistics from this database.
+ <p>Each line in the file starts with the document id
+ followed by a list of
+ <strong>\t<em>fieldname</em>:<em>value</em></strong>.
+ The fields always appear in the order listed below:
+ </p>
+ <table border=0>
+ <tr> <th>fieldname</th><th>value</th></tr>
+ <tr> <td>u</td><td>URL</td></tr>
+ <tr> <td>t</td><td>Title</td></tr>
+ <tr> <td>a</td><td>State (0 = normal, 1 = not found, 2
+ = not indexed, 3 = obsolete)</td></tr>
+ <tr> <td>m</td><td>Last modification time as reported
+ by the server</td></tr>
+ <tr> <td>s</td><td>Size in bytes</td></tr>
+ <tr> <td>H</td><td>Excerpt</td></tr>
+ <tr> <td>h</td><td>Meta description</td></tr>
+ <tr> <td>l</td><td>Time of last retrieval</td></tr>
+ <tr> <td>L</td><td>Count of the links in the document
+ (outgoing links)</td></tr>
+ <tr> <td>b</td><td>Count of the links to the document
+ (incoming links or backlinks)</td></tr>
+ <tr> <td>c</td><td>HopCount of this document</td></tr>
+ <tr> <td>g</td><td>Signature of the document used for
+ duplicate-detection</td></tr>
+ <tr> <td>e</td><td>E-mail address to use for a
+ notification message from htnotify</td></tr>
+ <tr> <td>n</td><td>Date to send out a notification
+ e-mail message</td></tr>
+ <tr> <td>S</td><td>Subject for a notification e-mail
+ message</td></tr>
+ <tr> <td>d</td><td>The text of links pointing to this
+ document. (e.g. &lt;a
+ href=&quot;docURL&quot;&gt;description&lt;/a&gt;)</td></tr>
+ <tr> <td>A</td><td>Anchors in the document (i.e. &lt;A
+ NAME=...)</td></tr>
+ </table>
+ </dd>
+ <dt>
+ -u <em>username:password</em>
+ </dt>
+ <dd>
+ Tells htdig to send the supplied username and password
+ with each HTTP request. The credentials will be encoded
+ using the 'Basic' authentication scheme. There <strong>
+ HAS</strong> to be a colon (:) between the username and
+ password.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This increases the verbosity of the
+ program. Using more than 2 is probably only useful for
+ debugging purposes. The default verbose mode (using
+ only one -v) gives a nice progress report while
+ digging. This progress report can be a bit
+ cryptic, so here is a brief explanation. A line
+ is shown for each URL, with 3 numbers before the
+ URL and some symbols after the URL. The first
+ number is the number of documents parsed so
+ far, the second is the DocID for this document,
+ and the third is the hop count of the document
+ (number of hops from one of the start_url
+ documents). After the URL, it shows a "*" for
+ a link in the document that it already visited,
+ a "+" for a new link it just queued, and a "-"
+ for a link it rejected for any of a number of
+ reasons. To find out what those reasons are,
+ you need to run htdig with at least 3 -v options,
+ i.e. -vvv. If there are no "*", "+" or "-" symbols
+ after the URL, it doesn't mean the document was
+ not parsed or was empty, but only that no links
+ to other documents were found within it. With
+ more verbose output, these symbols will get
+ interspersed in several lines of debugging output.
+ </dd>
+ <dt>
+ <em>start_url_file</em>
+ </dt>
+ <dd>
+ A file containing a list of URLs to start indexing
+ from, or "-" for STDIN. This will augment the default
+ <a href="attrs.html#start_url">start_url</a>
+ and override the file supplied to
+ [-m <em>url_file</em>].
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.docdb
+ </dt>
+ <dd>
+ Stores data about each document (title, url, etc.).
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.words.db,
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.words.db_weakcmpr
+ </dt>
+ <dd>
+ Record which documents each word occurs in.
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.excerpts
+ </dt>
+ <dd>
+ Stores start of each document to show context of
+ matches.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htmerge.html">htmerge</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>,
+ <a href="attrs.html">Configuration file format</a>, and
+ <a href="http://www.robotstxt.org/wc/norobots.html">
+ A Standard for Robot Exclusion</a>.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htdig_big.gif b/debian/htdig/htdig-3.2.0b6/htdoc/htdig_big.gif
new file mode 100644
index 00000000..1a8ada54
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htdig_big.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htdigconfig.8.in b/debian/htdig/htdig-3.2.0b6/htdoc/htdigconfig.8.in
new file mode 100644
index 00000000..0355a3af
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htdigconfig.8.in
@@ -0,0 +1,18 @@
+.TH htdigconfig 8 "23 April 1999"
+.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection
+.\" other parms are allowed: see man(7), man(1)
+.SH NAME
+htdigconfig \- script to create fuzzy databases for ht://Dig
+.SH SYNOPSIS
+.B htdigconfig
+.SH "DESCRIPTION"
+.I htdigconfig
+is a script to create fuzzy databases such as word2root, root2word and
+synonyms databases for the ht://Dig search engine.
+.SH "SEE ALSO"
+Please refer to the HTML pages (in the htdig-doc package)
+.B (THIS MUST BE CUSTOMISED...)
+.B /usr/share/doc/htdig/html/index.html
+for a detailed description of ht://Dig and its commands.
+.SH AUTHOR
+This manual page was written by Gergely Madarasz, modified by Stijn de Bekker.
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htdump.html b/debian/htdig/htdig-3.2.0b6/htdoc/htdump.html
new file mode 100644
index 00000000..5462a929
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htdump.html
@@ -0,0 +1,201 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htdump
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htdump
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htdump [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htdump writes out an ASCII-text version of the document and word
+ databases in the same form as the -t option of htdig.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htdump to append <em>
+ .work</em> to database files, allowing it to
+ operate on a second set of databases.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <em>configfile</em> file instead of the
+ default.
+ </dd>
+ <dt>
+ -d
+ </dt>
+ <dd>
+ Do <strong>not</strong> dump the document database.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This doesn't have much effect.
+ </dd>
+ <dt>
+ -w
+ </dt>
+ <dd>
+ Do <strong>not</strong> dump the word database.
+ </dd>
+
+ </dl>
+ </dd>
+ </dl>
+
+ <dl>
+ <dd>
+ <h2>
+ File Formats
+ </h2>
+ </dd>
+ <dl>
+ <dt>
+ <h3>Document Database</h3>
+ </dt>
+ <dd>
+ <p>Each line in the file starts with the document id
+ followed by a list of
+ <strong><em>fieldname</em>:<em>value</em></strong>
+ separated by tabs. The fields always appear in the
+ order listed below:
+ </p>
+ <table border=0>
+ <tr> <th>fieldname</th> <th align="left">value</th></tr>
+ <tr> <td>u</td><td>URL</td></tr>
+ <tr> <td>t</td><td>Title</td></tr>
+ <tr> <td>a</td><td>State (0 = normal, 1 = not found, 2
+ = not indexed, 3 = obsolete)</td></tr>
+ <tr> <td>m</td><td>Last modification time as reported
+ by the server</td></tr>
+ <tr> <td>s</td><td>Size in bytes</td></tr>
+ <tr> <td>H</td><td>Excerpt</td></tr>
+ <tr> <td>h</td><td>Meta description</td></tr>
+ <tr> <td>l</td><td>Time of last retrieval</td></tr>
+ <tr> <td>L</td><td>Count of the links in the document
+ (outgoing links)</td></tr>
+ <tr> <td>b</td><td>Count of the links to the document
+ (incoming links or backlinks)</td></tr>
+ <tr> <td>c</td><td>HopCount of this document</td></tr>
+ <tr> <td>g</td><td>Signature of the document used for
+ duplicate-detection</td></tr>
+ <tr> <td>e</td><td>E-mail address to use for a
+ notification message from htnotify</td></tr>
+ <tr> <td>n</td><td>Date to send out a notification
+ e-mail message</td></tr>
+ <tr> <td>S</td><td>Subject for a notification e-mail
+ message</td></tr>
+ <tr> <td>d</td><td>The text of links pointing to this
+ document. (e.g. &lt;a
+ href=&quot;docURL&quot;&gt;description&lt;/a&gt;)</td></tr>
+ <tr> <td>A</td><td>Anchors in the document (i.e. &lt;A
+ NAME=...)</td></tr>
+ </table>
+ </dd>
+ <dt>
+ <h3>Word Database</h3>
+ </dt>
+ <dd>
+ <p>
+ The first line of the ASCII word database is a comment,
+ prefixed with '#' and specifies the columns of the file
+ separated by tabs.
+ The fields are:</p>
+ <blockquote>
+ <em>word</em><br>
+ <em>document id</em><br>
+ <em>flags</em><br>
+ <em>location</em><br>
+ <em>anchor</em><br>
+ </blockquote>
+ </table>
+ </p>
+ </dd>
+ </dl>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.docs
+ </dt>
+ <dd>
+ The default ASCII document database file.
+ </dd>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.worddump
+ </dt>
+ <dd>
+ The default ASCII word database file.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htload.html">htload</a> and
+ <a href="attrs.html">Configuration file format</a>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htfuzzy.html b/debian/htdig/htdig-3.2.0b6/htdoc/htfuzzy.html
new file mode 100644
index 00000000..2acec1d2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htfuzzy.html
@@ -0,0 +1,239 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htfuzzy
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htfuzzy
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htfuzzy [-c <em>configfile</em>][-v] <em>algorithm</em> ...
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htfuzzy creates indexes for different "fuzzy" search
+ algorithms. These indexes can then be used by the
+ <a href="htsearch.html" target="_top">htsearch</a> program.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified configuration file instead of the
+ default.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. Used once will provide progress feedback,
+ used more than once will overflow even the biggest
+ buffers. :-)
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Algorithms
+ </h2>
+ </dd>
+ <dd>
+ Indexes for the following search algorithms can currently
+ be created:
+ <dl>
+ <dt>
+ <strong>soundex</strong>
+ </dt>
+ <dd>
+ Creates a slightly modified <a href="http://www.sog.org.uk/cig/vol6/605tdrake.pdf">soundex</a> key database.
+ A soundex key encodes letters as digits, with similar
+ sounding letters (c, k, q) given the same digit. Vowels
+ are not coded.
+ Differences with the standard soundex algorithm are:
+ <ul>
+ <li>
+ Keys are 6 digits.
+ </li>
+ <li>
+ The first letter is also encoded.
+ </li>
+ </ul>
+ </dd>
+ <dt>
+ <strong>metaphone</strong>
+ </dt>
+ <dd>
+ Creates a metaphone key database. This algorithm is
+ more specific to English, but will get fewer "weird"
+ matches than the soundex algorithm.
+ </dd>
+ <dt>
+ <strong>accents</strong>
+ </dt>
+ <dd>
+ Creates an accents key database. This algorithm will
+ map all accented letters to their unaccented
+ counterparts, so that a search for the unaccented
+ word will yield all variations of this word with
+ accents.
+ </dd>
+ <dt>
+ <strong>endings</strong>
+ </dt>
+ <dd>
+ Creates two databases which can be used to match common
+ word endings. The creation of these databases requires
+ a list of affix rules and a dictionary which uses those
+ affix rules. The format of the affix rules and
+ dictionary files are the ones used by the
+ <a href="http://fmg-www.cs.ucla.edu/fmg-members/geoff/ispell.html">
+ ispell</a> program. Included with the distribution are
+ the affix rules for English and a fairly small English
+ dictionary. Other languages can be supported by getting
+ the appropriate affix rules and dictionaries. These are
+ available for many languages; check the ispell
+ distribution for more details.
+ </dd>
+ <dt>
+ <strong>synonyms</strong>
+ </dt>
+ <dd>
+ Creates a database of synonyms for words. It reads a
+ text database of synonyms and creates a database that
+ htsearch can then use. Each line of the text database
+ consists of words where the first word will have the
+ other words on that line as synonyms.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.accents.db
+ </dt>
+ <dd>
+ (Output) Maps between characters with and without
+ accents for accents fuzzy rule
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.metaphone.db
+ </dt>
+ <dd>
+ (Output) Database of similar-sounding words for
+ metaphone fuzzy rule
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.soundex.db
+ </dt>
+ <dd>
+ (Output) Database of similar-sounding words for soundex
+ fuzzy rule
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/english.0, <a href="attrs.html#common_dir">COMMON_DIR</a>/english.aff
+ </dt>
+ <dd>
+ (Input) List of words and affix rules used to generate
+ endings
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/root2word.db, <a href="attrs.html#common_dir">COMMON_DIR</a>/word2rood.db
+ </dt>
+ <dd>
+ (Output) Database used for endings fuzzy rule
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/synonyms
+ </dt>
+ <dd>
+ (Input) List of groups of words considered synonymous
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/synonyms.db
+ </dt>
+ <dd>
+ (Output) Database used for synonyms fuzzy rule
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htmerge.html">htmerge</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>,
+ <a href="attrs.html">Configuration file format</a>, and
+ <a href="http://fmg-www.cs.ucla.edu/fmg-members/geoff/ispell.html">
+ ispell</a>.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htload.html b/debian/htdig/htdig-3.2.0b6/htdoc/htload.html
new file mode 100644
index 00000000..76211d97
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htload.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htload
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htload
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htload [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htload reads in an ASCII-text version of the document and word
+ databases in the same form as the -t option of htdig
+ and htdump. Note that this will overwrite data in your
+ databases, so this should be used with great care.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htload to append <em>
+ .work</em> to database files, allowing it to
+ operate on a second set of databases.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <em>configfile</em> file instead of the
+ default.
+ </dd>
+ <dt>
+ -d
+ </dt>
+ <dd>
+ Do <strong>not</strong> load the document database.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This doesn't have much effect.
+ </dd>
+ <dt>
+ -w
+ </dt>
+ <dd>
+ Do <strong>not</strong> load the word database.
+ </dd>
+
+ </dl>
+ </dd>
+ </dl>
+
+ <dl>
+ <dd>
+ <h2>
+ File Formats
+ </h2>
+ </dd>
+ <dl>
+ <dt>
+ <h3>Document Database</h3>
+ </dt>
+ <dd>
+ <p>Each line in the file starts with the document id
+ followed by a list of
+ <strong><em>fieldname</em>:<em>value</em></strong>
+ separated by tabs. The fields always appear in the
+ order listed below:
+ </p>
+ <table border=0>
+ <tr> <th>fieldname</th> <th align="left">value</th></tr>
+ <tr> <td>u</td><td>URL</td></tr>
+ <tr> <td>t</td><td>Title</td></tr>
+ <tr> <td>a</td><td>State (0 = normal, 1 = not found, 2
+ = not indexed, 3 = obsolete)</td></tr>
+ <tr> <td>m</td><td>Last modification time as reported
+ by the server</td></tr>
+ <tr> <td>s</td><td>Size in bytes</td></tr>
+ <tr> <td>H</td><td>Excerpt</td></tr>
+ <tr> <td>h</td><td>Meta description</td></tr>
+ <tr> <td>l</td><td>Time of last retrieval</td></tr>
+ <tr> <td>L</td><td>Count of the links in the document
+ (outgoing links)</td></tr>
+ <tr> <td>b</td><td>Count of the links to the document
+ (incoming links or backlinks)</td></tr>
+ <tr> <td>c</td><td>HopCount of this document</td></tr>
+ <tr> <td>g</td><td>Signature of the document used for
+ duplicate-detection</td></tr>
+ <tr> <td>e</td><td>E-mail address to use for a
+ notification message from htnotify</td></tr>
+ <tr> <td>n</td><td>Date to send out a notification
+ e-mail message</td></tr>
+ <tr> <td>S</td><td>Subject for a notification e-mail
+ message</td></tr>
+ <tr> <td>d</td><td>The text of links pointing to this
+ document. (e.g. &lt;a
+ href=&quot;docURL&quot;&gt;description&lt;/a&gt;)</td></tr>
+ <tr> <td>A</td><td>Anchors in the document (i.e. &lt;A
+ NAME=...)</td></tr>
+ </table>
+ </dd>
+ <dt>
+ <h3>Word Database</h3>
+ </dt>
+ <dd>
+ <p>
+ The first line of the ASCII word database is a comment,
+ prefixed with '#' and specifies the columns of the file
+ separated by tabs.
+ The fields are:</p>
+ <blockquote>
+ <em>word</em><br>
+ <em>document id</em><br>
+ <em>flags</em><br>
+ <em>location</em><br>
+ <em>anchor</em><br>
+ </blockquote>
+ </table>
+ </p>
+ </dd>
+ </dl>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ CONFIG_DIR/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ <dt>
+ DATABASE_DIR/db.docs
+ </dt>
+ <dd>
+ The default ASCII document database file.
+ </dd>
+ <dt>
+ DATABASE_DIR/db.worddump
+ </dt>
+ <dd>
+ The default ASCII word database file.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htdump.html">htdump</a> and
+ <a href="attrs.html">Configuration file format</a>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htmerge.html b/debian/htdig/htdig-3.2.0b6/htdoc/htmerge.html
new file mode 100644
index 00000000..6f160096
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htmerge.html
@@ -0,0 +1,160 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htmerge
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htmerge
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htmerge [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htmerge is used to create a document index and word
+ database from the files that were created by
+ <a href="htdig.html">htdig</a>. These databases are then used by
+ <a href="htsearch.html" target="_top">htsearch</a> to perform
+ the actual searched.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htdig to append <em>
+ .work</em> to database files, causing a second copy of
+ the database to be built. This allows the original
+ files to be used by htsearch during the indexing run.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified configuration file instead of the
+ default.
+ </dd>
+ <dt>
+ -d
+ </dt>
+ <dd>
+ Prevent the document index from being created.
+ </dd>
+ <dt>
+ -m <em>merge_configfile</em>
+ </dt>
+ <dd>
+ Merge the databases specified by merge_configfile
+ into the databases specified by -c or the default.
+ You will need a separate config file for each of
+ the two databases. Then each file will set the
+ <a href="attrs.html#database_dir">database_dir</a> or
+ <a href="attrs.html#database_base">database_base</a>
+ attribute to change the name of the databases, so
+ they are in different directories or files.<br>
+ <strong>Note:</strong> You <em>must</em> run htmerge
+ separately on each of the databases created by
+ <a href="htdig.html">htdig</a> before merging them
+ together with this option. This is because merging
+ the two wordlists together requires wordlists that
+ have already been cleaned up by htmerge.
+ Because the -m option allows you to specify only
+ one database to be merged into the other, and only
+ one -m option may be specified, if you need to merge
+ three or more databases together you must run htmerge
+ multiple times with the -m option.
+ </dd>
+ <dt>
+ -s
+ </dt>
+ <dd>
+ Print statistics about the document and word databases
+ after htmerge has finished.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Run in verbose mode. This will provide some hints as to
+ the progress of the merge. This can be useful when
+ running htmerge interactively since some parts
+ (especially the word database creation) can take a very
+ long time.
+ </dd>
+ <dt>
+ -w
+ </dt>
+ <dd>
+ Prevent the word database from being created.
+ </dd>
+ </dl>
+ In addition to the command line options, the environment
+ variable <strong>TMPDIR</strong> will be used to designate the
+ directory where intermediate files are stored during the
+ sorting process.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htsearch.html" target="_top">htsearch</a> and
+ <a href="attrs.html">Configuration file format</a>.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htnotify.html b/debian/htdig/htdig-3.2.0b6/htdoc/htnotify.html
new file mode 100644
index 00000000..6bce3623
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htnotify.html
@@ -0,0 +1,120 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htnotify
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htnotify
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htnotify [-b <em>database</em>][-c <em>configfile</em>][-v]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htnotify scans the document database created by
+ <a href="htmerge.html">htmerge</a> and sends an email message for
+ every page that is out of date. Look in the
+ <a href="notification.html">notification</a> manual for
+ instructions to set up this service.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -b <em>database</em>
+ </dt>
+ <dd>
+ Specifies an alternative database than what is
+ specified in the configuration file.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified configuration file instead of the
+ default.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This increases the verbosity of the
+ program. Used once will display a log of what email
+ messages were sent. Used more than once will display
+ information about each document that has email
+ notification set.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ <dl>
+ <dt>
+ <a href="attrs.html#database_dir">DATABASE_DIR</a>/db.docdb
+ </dt>
+ <dd>
+ Stores data about each document (title, url, etc.).
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htmerge.html">htmerge</a>,
+ <a href="notification.html">Email notification service</a> and
+ <a href="attrs.html">Configuration file format</a>.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htpurge.html b/debian/htdig/htdig-3.2.0b6/htdoc/htpurge.html
new file mode 100644
index 00000000..d8dcb086
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htpurge.html
@@ -0,0 +1,127 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htpurge
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htpurge
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htpurge [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htpurge functions to remove specified URLs from the
+ databases as well as bad URLs, unretrieved URLs,
+ obsolete documents, etc. It is recommended that
+ htpurge be run after htdig to clean out any documents
+ of this sort.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -
+ </dt>
+ <dd>
+ URL input. Read in a list of URLs to remove
+ from the standard input, one per line.
+ </dd>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htpurge to
+ append .work to the database files allowing it
+ to operate on a second set of databases.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <em>configfile</em> file instead of the
+ default.
+ </dd>
+ <dt>
+ -u
+ </dt>
+ <dd>
+ URL input. Add this URL to the list of URLs to remove.
+ This can be specified multiple times.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This increases the verbosity of the
+ program. Using more than 2 is probably only useful
+ for debugging purposes. The default verbose mode
+ gives a progress on what it is doing and where it is.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htmerge.html">htmerge</a>, and
+ <a href="attrs.html">Configuration file format</a>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+ The ht://Dig Group <a
+ href="mailto:htdig@htdig.org">&lt;htdig@htdig.org&gt;</a>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_form.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_form.html
new file mode 100644
index 00000000..932dae77
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_form.html
@@ -0,0 +1,209 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htsearch
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ HTML Form
+ </h2>
+ <p>
+ The primary interface to htsearch is through an HTML form.
+ When the form is submitted, the htsearch program will take
+ values from the form and perform the actual search. The
+ search can be modified in many ways with either hidden input
+ fields or other HTML form tags. Study the examples to get a
+ feel of what things are possible.
+ </p>
+ <p>
+ The HTML form is expected to contain at least an input text
+ field named <strong>words</strong>. This is where the user will enter
+ the search words. Other values are also recognized but have
+ appropriate defaults in case they are not used:
+ </p>
+ <dl>
+ <dt>
+ </a>
+ <strong><a name="config">config</a></strong>
+ </dt>
+ <dd>
+ Specifies the name of the configuration file. The name here
+ is the name without the path and without the <em>.conf</em>
+ at the end. This file is assumed to be located in the
+ CONFIG_DIR directory. Periods are <strong>not</strong> allowed in this
+ field for security reasons (to prevent HTML authors from
+ pointing all around at your files).<br>
+ The default is <em>htdig</em>
+ </dd>
+ <dt>
+ <strong><a name="exclude">exclude</a></strong>
+ </dt>
+ <dd>
+ This value is a pattern that specifies which URLs are to be
+ excluded from the search results. If a URL matches one of
+ these patterns it is discarded. Multiple patterns can be
+ given, separated by a bar ("|"), or multiple definitions
+ of the exclude input parameter can be given. This pattern
+ may include regular expressions when enclosed within [ and ]
+ characters.<br>
+ The default is specified by the <a href="attrs.html#exclude">exclude</a>
+ attribute in the configuration file.
+
+ </dd>
+ <dt>
+ <strong><a name="format">format</a></strong>
+ </dt>
+ <dd>
+ This specifies the name of the template to display the
+ search results in. There are two builtin templates named
+ <em>builtin-long</em> and <i>builtin-short</i> which can be
+ used, but any number of custom templates can also be
+ defined. Find out more about the templates in the
+ <a href="hts_templates.html">Output Templates</a> section.<br>
+ The <em>format</em> value can be specified as either a
+ hidden input field or a drop down menu.<br>
+ The default is specified by the <a href="attrs.html#template_name">template_name</a>
+ attribute in the configuration file, and the template variable
+ is <a href="hts_template.html#SELECTED_FORMAT">SELECTED_FORMAT</a>.
+ </dd>
+ <dt>
+ <strong><a name="keywords">keywords</a></strong>
+ </dt>
+ <dd>
+ Used to specify a list of required words that have to be in
+ the documents. This list of words is added to the normal
+ <em>words</em> value using logical "and"s, or logical "or"s
+ if the <a href="attrs.html#any_keywords">any_keywords</a>
+ attribute is set to true in the configuration file.<br>
+ An example use for this value is to make it a drop down
+ menu with a limited set of predetermined categories or
+ keywords to restrict the search. This can be very useful
+ for very structured pages.
+ <br>Note that the words may appear anywhere in the document.
+ The scope of these required words is <strong>not</strong> limited to
+ words in META tags with the "keywords" or "htdig-keywords"
+ property, despite what the parameter name may suggest.<br>
+ The default is specified by the <a href="attrs.html#keywords">keywords</a>
+ attribute in the configuration file.
+ </dd>
+ <dt>
+ <strong><a name="matchesperpage">matchesperpage</a></strong>
+ </dt>
+ <dd>
+ Specifies how many matches will be displayed on each page
+ of results.<br>
+ The default is specified by the <a href="attrs.html#matches_per_page">matches_per_page</a>
+ attribute in the configuration file, and the template variable
+ is <a href="hts_templates.html#MATCHES_PER_PAGE">MATCHES_PER_PAGE</a>. Since this value has
+ to be a number, it either needs to be set using a hidden
+ input field or a with a drop down menu.
+ </dd>
+ <dt>
+ <strong><a name="method">method</a></strong>
+ </dt>
+ <dd>
+ This can be one of <em>and</em>, <i>or</i>, or <em>
+ boolean</em>. It determines what type of search will be
+ performed.<br>
+ The default is specified by the
+ <a href="attrs.html#match_method">match_method</a> attribute in
+ the configuration file and the template variable is
+ <a href="hts_templates.html#SELECTED_METHOD">SELECTED_METHOD</a>.
+ It is quite useful to make this item a drop down menu so the
+ user can select the type of search at search time.
+ </dd>
+ <dt>
+ <strong><a name="page">page</a></strong>
+ </dt>
+ <dd>
+ This should normally not be used. It is generated by the
+ paged results display.
+ </dd>
+ <dt>
+ <strong><a name="restrict">restrict</a></strong>
+ </dt>
+ <dd>
+ This value is a pattern that all URLs of the search results
+ will have to match. This can be used to restrict the search
+ to a particular subtree or subsection of a bigger database.
+ Multiple patterns can be given, separated by a bar ("|"), or
+ multiple definitions of the restrict input parameter can be
+ given. Any URL in the search results will have to match at
+ least one of these patterns. The pattern may include regular
+ expressions when the expression is enclosed by [ and ]
+ characters.<br>
+ Note that the restrict list does not take precedence over the
+ exclude list - if a URL matches patterns in both lists it is
+ still excluded from the search results.<br>
+ The default is specified by the <a href="attrs.html#restrict">restrict</a>
+ attribute in the configuration file.
+ </dd>
+ <dt>
+ <strong><a name="sort">sort</a></strong>
+ </dt>
+ <dd>
+ This can be one of <em>score</em>, <i>time</i>, <i>date</i>,
+ <em>title</em>, <i>revscore</i>, <i>revtime</i>, <i>revdate</i>,
+ or <em>revtitle</em>. It determines what type of sort will be
+ performed on the search results. The types <em>time</em> and
+ <em>date</em> are synonymous, as are <i>revtime</i> and
+ <em>revdate</em>, as all four sort on the time that the
+ documents were last modified, if this information is given
+ by the server.
+ The sort methods that begin with <em>rev</em> simply reverse
+ the order of the sort.<br>
+ The default is specified by the
+ <a href="attrs.html#sort">sort</a> attribute in the
+ configuration file, and the template variable is
+ <a href="hts_templates.html#SELECTED_SORT">SELECTED_SORT</a>.
+ It is quite useful to make this item a drop down menu so the
+ user can select the type of sort at search time.
+ </dd>
+ <dt>
+ <strong><a name="startyear">startyear</a></strong>, <strong>startmonth</strong>, <strong>startday</strong>,
+ <strong>endyear</strong>, <strong>endmonth</strong>, <strong>endday</strong>
+ </dt>
+ <dd>
+ These values specify the allowed range of document
+ modification dates allowed in the search results.
+ They can be used to restrict the search
+ to particular "ages" of documents, new or old.<br>
+ If the year is specified by two digits (e.g. 02), then it
+ is assumed to be in the 1900s if it is in the range 70-99, and
+ in the 2000s if it is in the range 00-69. If the year is not
+ specified, the search does <strong>not</strong> exclude
+ documents outside the range of dates within the year. Thus
+ it is impossible, for example, to restrict a search to
+ documents dated "December".<br>
+ Incompletely specified end dates are interpreted as follows:<br>
+ <table>
+ <tr><th>Date</th> <th>Becomes</th></tr>
+ <tr><td>04-31</td> <td>04-31- end of time</td></tr>
+ <tr><td>05-1999</td><td>05-31-1999</td></tr>
+ <tr><td>1999</td> <td>12-31-1999</td></tr>
+ </table><br>
+ The default is the full range of documents in the database.
+ These values can also be specified by configuration attributes
+ of the same names in the
+ <a href="attr.html#startyear">configuration file</a>.
+ If a negative number is given for any of these, it is taken
+ as relative to the current year, month or day.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_general.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_general.html
new file mode 100644
index 00000000..3fc05c03
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_general.html
@@ -0,0 +1,72 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htsearch
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ Htsearch is the actual search engine of the ht://Dig search
+ system. It is a CGI program that is expected to be invoked by
+ an HTML form. It will accept both the GET and POST methods of
+ passing data to the CGI program.
+ </p>
+ <h2>
+ Files used by htsearch
+ </h2>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/header.html
+ </dt>
+ <dd>
+ The default search results header file
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/footer.html
+ </dt>
+ <dd>
+ The default search results footer file
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/wrapper.html
+ </dt>
+ <dd>
+ The default search results wrapper file, that contains the
+ header and footer together in one file
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/nomatch.html
+ </dt>
+ <dd>
+ The default 'no matches found' HTML file
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/syntax.html
+ </dt>
+ <dd>
+ The default file that explains boolean expression syntax
+ errors
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_menu.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_menu.html
new file mode 100644
index 00000000..f719aeec
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_menu.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ htsearch
+ </h2>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a> <br>
+ <br>
+ <strong>htsearch</strong> <font face="helvetica,arial" size="2"><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="hts_general.html" target="body">General</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="hts_form.html" target="body">HTML form</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="hts_templates.html" target="body">Templates</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="hts_method.html" target="body">Search method</a><br>
+ </font> <br>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_method.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_method.html
new file mode 100644
index 00000000..d4a7c676
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_method.html
@@ -0,0 +1,102 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htsearch
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Search Method Used
+ </h2>
+ <p>
+ The way htsearch performs it search and applies its ranking
+ rules are fairly complicated. This is an attempt at explaining
+ in global terms what goes on when htsearch searches.
+ </p>
+ <p>
+ htsearch gets a list of (case insensitive) words from the HTML
+ form that invoked
+ it. If htsearch was invoked with boolean expression parsing
+ enabled, it will do a quick syntax check on the input words.
+ If there are syntax errors, it will display the syntax error
+ file that is specified with the
+ <a href="attrs.html#syntax_error_file">syntax_error_file</a>
+ attribute.
+ </p>
+ <p>
+ If the boolean parser was not enabled, the list of words is
+ converted into a boolean expression by putting either "and"s
+ or "or"s between the words. (This depends on the search
+ type.) Phrases within double quotes (") specify that the words
+ must occur sequentially within the document.
+ </p>
+ <p>
+ If a word is immediately preceeded by a field specifer
+ (title:, heading:, author:, keyword:, descr:, link:, url:)
+ then it will only match documents in which the word occurred
+ within field. For example, descr:foo only matches documents
+ containing &lt;meta value="description" value="... foo ..."&gt;.
+ The link: field refers to the text in the hyperlinks to a document,
+ rather than text within the document itself. Similarly url:
+ (will eventually) refer to the actual URL of the document, not any
+ of its contents.
+ The prefixes exact: and hidden: are also accepted.
+ The former (will) cause the
+ <a href="attrs.html#search_algorithm">fuzzy search algorithm</a>
+ not to be applied to this word, while the latter causes the word
+ not to be displayed in the query string of the results page.
+ </p>
+ <p>
+ Each of the words in the list (but not within a phrase) is now
+ expanded using the search algorithms that were specified in the
+ <a href="attrs.html#search_algorithm">search_algorithm</a>
+ attribute. For example, the endings algorithm will convert a
+ word like "person" into "person or persons". In this fashion,
+ all the specified algorithms are used on each of the words
+ and the result is a new boolean expression.
+ </p>
+ <p>
+ The next step is to perform database lookups on the words in
+ the expression. The result of these lookups are then passed
+ to the boolean expression parser.
+ </p>
+ <p>
+ The boolean expression parser is a simple recursive descent
+ parser with an operand stack. It knows how to deal with
+ "not", "and", "or" and parenthesis. The result of the parser
+ will be one set of matches.<br>
+ Note that the operator "not" is used as the word 'without' and
+ is binary: You can not write "cat and not dog" or just "not
+ dog" but you can write "cat not dog".
+ </p>
+ <p>
+ At this point, the matches are ranked. The rank of a match is
+ determined by the weight of the words that caused the match
+ and the weight of the algorithm that generated the word. Word
+ weights are generally determined by the importance of the
+ word in a document. For example, words in the title of a
+ document have a much higher weight than words at the bottom
+ of the document.
+ </p>
+ <p>
+ Finally, when the document ranks have been determined and the
+ documents sorted, the resulting matches are displayed. If
+ paged output is required, only a subset of all the matches
+ will be displayed.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_selectors.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_selectors.html
new file mode 100644
index 00000000..e05037f9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_selectors.html
@@ -0,0 +1,324 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htsearch
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Input parameter select lists
+ </h2>
+ <p>
+ The primary interface to htsearch is through an HTML
+ <a href="hts_form.html">form</a>.
+ The input fields in this form can be defined as any type that
+ HTML allows, but some of these are best handled through HTML
+ <em>select</em> lists.
+ </p>
+ <h3>
+ <a name="predefined"></a>
+ Pre-defined template select lists
+ </h3>
+ <p>
+ In order to propagate these select lists to the search form
+ in the results template, so that the default <em>selected</em>
+ value will be the one the user last selected, htsearch defines
+ a few of these select lists as
+ <a href="hts_templates.html">template variables</a>.
+ These variables are:
+ </p>
+ <ul>
+ <li>
+ <strong>FORMAT</strong>, which selects the <b>format</b> input parameter
+ for follow-up searches, with the current value being the
+ default. The current value is taken from the <strong>format</strong>
+ input parameter, if one was provided, or from the
+ <a href="attrs.html#template_name">template_name</a>
+ attribute otherwise, and this value will also be given in
+ the <strong>SELECTED_FORMAT</strong> template variable.
+ The various parameter values, and their corresponding
+ labels for the select list, are taken from the
+ <a href="attrs.html#template_map">template_map</a>
+ attribute.
+ </li>
+ <li>
+ <strong>METHOD</strong>, which selects the <b>method</b> input parameter
+ for follow-up searches, with the current value being the
+ default. The current value is taken from the <strong>method</strong>
+ input parameter, if one was provided, or from the
+ <a href="attrs.html#match_method">match_method</a>
+ attribute otherwise, and this value will also be given in
+ the <strong>SELECTED_METHOD</strong> template variable.
+ The various parameter values, and their corresponding
+ labels for the select list, are taken from the
+ <a href="attrs.html#method_names">method_names</a>
+ attribute.
+ </li>
+ <li>
+ <strong>SORT</strong>, which selects the <b>sort</b> input parameter
+ for follow-up searches, with the current value being the
+ default. The current value is taken from the <strong>sort</strong>
+ input parameter, if one was provided, or from the
+ <a href="attrs.html#sort">sort</a>
+ attribute otherwise, and this value will also be given in
+ the <strong>SELECTED_SORT</strong> template variable.
+ The various parameter values, and their corresponding
+ labels for the select list, are taken from the
+ <a href="attrs.html#sort_names">sort_names</a>
+ attribute.
+ </li>
+ </ul>
+ <p>
+ In addition to these template variables, htsearch makes
+ use of a number of other input parameters, all of which
+ have corresponding template variables and configuration
+ attributes. It's also possible, within htsearch, to make an
+ input parameter out of any <a href="attrs.html">configuration
+ attribute</a> that's not already automatically handled by an
+ input parameter. This is accomplished by means of the
+ <a href="attrs.html#allow_in_form">allow_in_form</a> attribute.
+ The attributes listed in the allow_in_form list will be settable
+ in the search form using input parameters of the same name,
+ and will be propagated to the follow-up search form in the
+ results template using template variables of the same name
+ in upper-case.
+ </p>
+ <h3>
+ <a name="custom"></a>
+ Custom template select lists
+ </h3>
+ <p>
+ This gives you a great deal of flexibility in configuring
+ htsearch, but all of these template variables still contain
+ only the parameter value, and not a select list to choose
+ the value. In order to use any input parameters as select
+ lists, other than the three pre-defined variables above, one
+ must either statically define a select list in the results
+ template follow-up form, just as in the initial search form
+ or instruct htsearch to build one as a template variable.
+ Statically defining new select lists is easier, as you have
+ to do it for the initial search form anyway, but this has the
+ drawback that the user's selection from the initial form does
+ not appear as the default selection in the follow-up form,
+ because the default is static.
+ </p>
+ <p>
+ To overcome this drawback, you must use the
+ <a href="attrs.html#build_select_lists">build_select_lists</a>
+ configuration attribute. Its usage is a bit complicated, but
+ it's extremely flexible, allowing you do define any htsearch
+ input parameter as a select list for use in templates, provided
+ you also define the corresponding name list attribute which
+ enumerates all the choices to put in the list. It can be used
+ for existing input parameters, as well as any you define using
+ the allow_in_form attribute.
+ </p>
+ <p>
+ The entries in this list each consist of an <em>octuple</em>, a
+ set of eight strings defining the variables and how they are to
+ be used to build a select list. The attribute can contain many
+ of these octuples. The strings in the string list are merely
+ taken eight at a time. For each octuple of strings specified in
+ build_select_lists, the elements have the following meaning:
+ </p>
+ <ol>
+ <li>
+ the name of the template variable to be defined as a list,
+ optionally followed by a comma and the type of list, and
+ optional formatting codes
+ </li>
+ <li>
+ the input parameter name that the select list will set
+ </li>
+ <li>
+ the name of the user-defined attribute containing the
+ <em>name list</em>, that is the list of values and labels
+ for the select list items, much like the template_map,
+ method_name and sort_name attributes
+ </li>
+ <li>
+ the <em>tuple</em> size used in the name list above
+ </li>
+ <li>
+ the index into a name list tuple for the value
+ </li>
+ <li>
+ the index for the corresponding label to be displayed on
+ the selector
+ </li>
+ <li>
+ the configuration attribute where the default value for
+ this input parameter is defined, which may or may not be
+ the same name as the input parameter
+ </li>
+ <li>
+ the default label, if not an empty string, which will be
+ used as the label for an additional list item for the
+ current input parameter value if it doesn't match any value
+ in the given list
+ </li>
+ </ol>
+ <p>
+ The first element in an entry is actually a comma separated
+ list. The first item within this list is the name of the
+ template variable to be created. The next item, if specified,
+ is the type of select list or input list to be created in this
+ template variable. Choices are <strong>select</strong>,
+ <strong>multiple</strong>, <strong>radio</strong>,
+ and <strong>checkbox</strong>, with the default being
+ <strong>select</strong>. The word <strong>multiple</strong>
+ refers to a &lt;select multiple&gt; type of select list,
+ where more than one option can be selected. The choices
+ <strong>radio</strong> and <strong>checkbox</strong> will build
+ lists of &lt;input&gt; tags of these types, rather than a select
+ list with &lt;option&gt; tags. The optional third and fourth
+ items in this comma separated list are text or formatting tags
+ that will be prepended and appended, respectively, to each item
+ in the built list, before the &lt;option&gt; or &lt;input&gt;
+ tag and after the label for that tag. This first element is
+ parsed as a quoted string list within a quoted string list,
+ so you can embed quotes and commas within elements of this
+ inner list if you use correct quoting. See examples below.
+ </p>
+ <p>
+ The name list that you define will most commonly consist
+ of pairs of values, and therefore you'd use a tuple size of
+ 2. The method_names and sort_names attributes are samples of
+ such name lists. You can give any name you'd like to the name
+ list attributes you define yourself, but you should try to
+ avoid using any of the names of <a href="attrs.html">existing
+ attributes</a>. The value and label for the pairs in lists you
+ define yourself can appear in either order, depending on the
+ index you specify for each. In the case where you'd want the
+ labels on the selector to be the same as the actual parameter
+ values used, which would make sense for lists of numbers,
+ you can use a tuple size of 1, and indexes of 1, to avoid
+ having to duplicate all the numbers in the list.
+ </p>
+ <p>
+ Any of the strings in an octuple may be quoted, and should be if
+ you want to include spaces or define an empty string. If the
+ default label, the eight element in an entry, is an empty
+ string, the select list will not have a <em>selected</em> item
+ if the current input parameter value doesn't match any value
+ in the name list. If a default label is given, an additional
+ list item will be added to the list using this label, if the
+ current input parameter value doesn't match.
+ </p>
+ <p>
+ If the seventh element, the configuration attribute name, is
+ an empty string, the default value will be taken from from
+ the input parameter, the second element, instead. This is
+ especially useful for input parameters that don't get mapped
+ to a configuration attribute, or for checkboxes or multiple
+ selects, where the separation between individual choices may
+ get lost when the input parameter is mapped to an attribute.
+ </p>
+ <p>
+ Here is an example of its usage, which illustrates different
+ tuple sizes and orders:
+ </p>
+ <pre>
+build_select_lists: MATCH_LIST,radio matchesperpage matches_per_page_list \
+ 1 1 1 matches_per_page "Previous Amount" \
+ RESTRICT_LIST,multiple restrict restrict_names 2 1 2 restrict "" \
+ FORMAT_LIST format template_map 3 2 1 template_name ""
+
+matches_per_page_list: 1 5 10 20 100 500
+
+restrict_names: "http://www.myschool.edu/Admin/" "Admin Web Pages" \
+ "http://www.myschool.edu/Faculty/" "Faculty Web Pages" \
+ "http://www.myschool.edu/Student/" "Student Web Pages" \
+ "" "Whole Web Site"
+ </pre>
+ <p>
+ The FORMAT_LIST example should give something equivalent to the FORMAT
+ template variable, which is already set by htsearch. It is included as
+ an additional example of how to specify the tuple size and indices of
+ values and labels in a tuple.
+ </p>
+ <p>
+ Here is an example which illustrates additional formatting tags,
+ and the quoting that may be required:
+ </p>
+ <pre>
+build_select_lists: "RESTRICT_LIST,checkbox,'&lt;font face=\\"Arial,Helvetica\\" size=\\"+2\\"&gt;',&lt;/font&gt;&lt;br&gt;" \
+ restrict restrict_names 2 1 2 restrict ""
+ </pre>
+ <p>
+ In this example, the font tag will be inserted before each
+ checkbox input tag, and the closing font tag and line break will
+ be appended after each label that follows the input tag. Because
+ the font tag has an embeded comma, which is also the separator
+ for the list in the first element, the whole tag (i.e. the whole
+ third item in the comma separated list) is quoted, and it's
+ quoted with single quotes so as not to conflict with the double
+ quotes enclosing the whole element. Finally, to embed a double
+ quote in the font tag, it must be escaped with two backslashes -
+ the first of these is absorbed by the variable expansion phase
+ that all attribute values go through, and the second is used
+ to embed the double quote within a double quoted string.
+ </p>
+ <h3>
+ <a name="template_patterns"></a>
+ Combining the format select list with template_patterns
+ </h3>
+ <p>
+ The addition of the <a href="attrs.html#template_patterns">
+ template_patterns</a> attribute has added a new wrinkle to
+ the pre-defined select list for the <strong>format</strong> parameter.
+ If a document URL matches an entry in template_patterns, its
+ corresponding result template will override any template the
+ user selected with the format parameter. The problem stems
+ from the fact that the two were not originally intended to
+ be used together in the same htsearch configuration. When
+ configuring htsearch, you'd normally set it up to use one
+ mechanism or the other, but not a combination of the two.
+ </p>
+ <p>
+ However, the following example would allow user-selected
+ templates to work together with template_patterns:
+ </p>
+ <pre>
+template_map: Long long ${common_dir}/long.html \
+ Short short ${common_dir}/short.html
+template_name: long
+template_patterns: .pdf ${common_dir}/${template_name}-pdffile.html \
+ .ps ${common_dir}/${template_name}-psfile.html \
+ .doc ${common_dir}/${template_name}-docfile.html
+ </pre>
+ <p>
+ This works because the template_name attribute gets set
+ internally in htsearch, to the user-selected value of the
+ <strong>format</strong> input parameter, before the variable expansion
+ in template_patterns takes place. As long as you stick to
+ a template file naming convention that uses the internal
+ name element of template_map (second element in a triple) as
+ part of the file name, this should work like a charm. Your
+ common directory would have to contain the template files
+ long-pdffile.html, long-psfile.html, long-docfile.html,
+ short-pdffile.html, short-psfile.html, and short-docfile.html,
+ for the example above to work, in addition to the long.html
+ and short.html files, which will be used for URLs that don't
+ match any of the patterns. Of course, these patterns can be
+ any URL parts, and not just suffixes. Be sure to also change
+ the values for format in your initial search form, search.html,
+ to use your chosen internal names.
+ </p>
+
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/hts_templates.html b/debian/htdig/htdig-3.2.0b6/htdoc/hts_templates.html
new file mode 100644
index 00000000..1f9afb27
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/hts_templates.html
@@ -0,0 +1,513 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htsearch
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htsearch
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Output Templates
+ </h2>
+ <p>
+ The htsearch program will normally produce HTML output. In this
+ process it makes extensive use of template in which variables
+ will be substituted. The templates are specified in the
+ configuration file. The configuration file attributes defining
+ these templates are:
+ </p>
+ <ul>
+ <li>
+ <a href="attrs.html#search_results_header">
+ search_results_header</a>
+ </li>
+ <li>
+ <a href="attrs.html#search_results_footer">
+ search_results_footer</a>
+ </li>
+ <li>
+ <a href="attrs.html#search_results_wrapper">
+ search_results_wrapper</a>
+ </li>
+ <li>
+ <a href="attrs.html#nothing_found_file">
+ nothing_found_file</a>
+ </li>
+ <li>
+ <a href="attrs.html#syntax_error_file">
+ syntax_error_file</a>
+ </li>
+ </ul>
+ <p>
+ In addition to these files, the search results are also
+ produced with the use of templates. The result templates are
+ a bit more complicated because they can be specified at
+ runtime using an HTML menu. They are specified using the <a
+ href="attrs.html#template_map">template_map</a> configuration
+ file attribute. This attribute contains a list of strings
+ triplets. For each triplet of strings, the elements have the
+ following meaning:
+ </p>
+ <ol>
+ <li>
+ the name that will appear in the FORMAT menu (see below)
+ </li>
+ <li>
+ the internal name used by htsearch for this result template
+ </li>
+ <li>
+ the base filename for the template.
+ </li>
+ </ol>
+ <p>
+ There are two predefined templates that are used by default.
+ They have the internal names <em>builtin-long</em> and <em>
+ builtin-short</em>. As such, the default value for the <em>
+ template_map</em> attribute is as follows:
+ </p>
+ <blockquote>
+ Long builtin-long builtin-long \<br>
+ Short builtin-short builtin-short
+ </blockquote>
+ <p>
+ Which means that there will be two ways to display the search
+ results: a "Long" and a "Short" way. The first template listed
+ will always be the default one for the first search.
+ Thereafter, the default will be whatever was selected for the
+ previous search.
+ </p>
+ <h4>
+ Result template files
+ </h4>
+ <p>
+ When a custom template is required, one or more template files
+ need to be created and referenced in the <em>template_map</em>
+ attribute. These templates are user-selectable from the search
+ form. It is also possible to select result templates based on
+ URL patterns of the search matches, using the
+ <em>template_patterns</em> attribute. This allows distinct visual
+ styles to be used for matches on different web sites.
+ </p>
+ <p>
+ There are many variables that can be substituted into these
+ templates. Not all of them make sense for each file, so not
+ all of them will be substituted for every file, as noted below.
+ <!-- Should explain "matchTemplate" more explicitly... How? -->
+ In addition, all
+ of the standard CGI environment variables are available, and
+ listed in the <a href="http://hoohoo.ncsa.uiuc.edu/cgi/">cgi
+ specification</a>. Variables will be substituted normally
+ with the format $(VAR), escaped for use in a URL with the
+ format $%(VAR), URL-encoding decoded with the format $=(VAR),
+ and HTML-escaped with the format $&amp;(VAR). The variables are:
+ </p>
+ <dl>
+ <dt>
+ <strong>ANCHOR</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ Expands to the named anchor found just before the first match; it
+ includes "#" at the start so it can be directly appended to the URL
+ variable. If no anchor was found, ANCHOR is empty.
+ </dd>
+ <dt>
+ <strong>BACKLINKS</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The number of links <strong>to</strong> the current match.
+ </dd>
+ <dt>
+ <strong><a name="CGI">CGI</a></strong>
+ </dt>
+ <dd>
+ Unless the configuration file sets a different value,
+ this expands to whatever the SCRIPT_NAME environment
+ variable is. See the
+ <a href="attrs.html#script_name">script_name</a>
+ configuration file attribute for more information.
+ </dd>
+ <dt>
+ <strong>CONFIG</strong>
+ </dt>
+ <dd>
+ The <a href="hts_form.html#config">configuration file</a>.
+ </dd>
+ <dt>
+ <strong>CURRENT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The number of the current match.
+ </dd>
+ <dt>
+ <strong><a name="DESCRIPTION">DESCRIPTION</a></strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The first URL description for the matched document. See below.
+ </dd>
+ <dt>
+ <strong><a name="DESCRIPTIONS">DESCRIPTIONS</a></strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ A list of <a href="attrs.html#description_factor">URL text
+ descriptions</a> for the matched document. The
+ entries in the list are separated by &lt;br&gt;. These are the
+ text used between the &lt;a href...&gt; and &lt;/a&gt;tags.
+ These are controlled by the
+ <a href="attrs.html#max_descriptions">max_descriptions</a> and
+ <a href="attrs.html#max_description_length">max_description_length</a>
+ configuration attributes.
+ </dd>
+ <dt>
+ <strong>DOCID</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The internal ID for the document for the current match.
+ </dd>
+ <dt>
+ <strong>EXCERPT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The relevant <a href="attrs.html#excerpt_show_top">excerpt</a>
+ for the current match.
+ </dd>
+ <dt>
+ <strong>EXCLUDE</strong>
+ </dt>
+ <dd>
+ A <a href="hts_form.html#exclude">list of words excluded</a>
+ from the search, separated by '|'.
+ </dd>
+ <dt>
+ <strong>FIRSTDISPLAYED</strong>
+ </dt>
+ <dd>
+ The index of the first match on this page.
+ </dd>
+ <dt>
+ <strong>FORMAT</strong>
+ </dt>
+ <dd>
+ Expands to an HTML menu of all the available formats. The
+ current format will be the default one.
+ The menu is composed of choices itemized in the
+ <a href="attrs.html#template_map">template_map</a>
+ attribute. The expansion of this template variable is
+ described in more detail in the
+ <a href="hts_selectors.html">select list documentation</a>.
+ </dd>
+ <dt>
+ <strong>HOPCOUNT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The distance of this match away from the starting
+ document(s).
+ </dd>
+ <dt>
+ <strong>KEYWORDS</strong>
+ </dt>
+ <dd>
+ A string of the search keywords with spaces in between, as
+ specified in the
+ <a href="hts_form.html#keywords"><em>keywords</em> input
+ parameter</a>.
+ </dd>
+ <dt>
+ <strong>LASTDISPLAYED</strong>
+ </dt>
+ <dd>
+ The index of the last match on this page.
+ </dd>
+ <dt>
+ <strong><a name="LOGICAL_WORDS">LOGICAL_WORDS</a></strong>
+ </dt>
+ <dd>
+ A string of the <a href="hts_form.html#words">search words</a>
+ with "and", "or" or "not" between the words, depending on the
+ <a href="hts_form.html#match_method">type of search</a>.
+ (These values are overridden by the
+ <a href="attrs.html#boolean_keywords">boolean_keywords</a>
+ attribute.)
+ </dd>
+ <dt>
+ <strong>MATCH_MESSAGE</strong>
+ </dt>
+ <dd>
+ This is either <code>all</code> or <code>any</code> depending on
+ the <a href="hts_form.html#match_method">match method</a> used.
+ (These values are overridden by the
+ <a href="attrs.html#method_names">method_names</a>
+ attribute.)
+ </dd>
+ <dt>
+ <strong>MATCHES</strong>
+ </dt>
+ <dd>
+ The total number of matches that were found.
+ </dd>
+ <dt>
+ <strong><a name="MATCHES_PER_PAGE">MATCHES_PER_PAGE</a></strong>
+ </dt>
+ <dd>
+ The configured <a href="hts_form.html#matchesperpage">maximum
+ number of matches</a> on this page.
+ </dd>
+ <dt>
+ <strong>MAX_STARS</strong>
+ </dt>
+ <dd>
+ The configured <a href="attrs.html#max_stars">maximum number
+ of stars</a> to display in matches.
+ </dd>
+ <dt>
+ <strong><a name="METADESCRIPTION">METADESCRIPTION</a></strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The meta description text (if any) for the matched document.
+ </dd>
+ <dt>
+ <strong>METHOD</strong>
+ </dt>
+ <dd>
+ Expands to an HTML menu of all the available matching
+ methods. The current method will be the default one.
+ The menu is composed of choices itemized in the
+ <a href="attrs.html#method_names">method_names</a>
+ attribute. The expansion of this template variable is
+ described in more detail in the
+ <a href="hts_selectors.html">select list documentation</a>.
+ </dd>
+ <dt>
+ <strong>MODIFIED</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The date and time the document was last modified
+ </dd>
+ <dt>
+ <strong>NEXTPAGE</strong>
+ </dt>
+ <dd>
+ This expands to the value of the
+ <a href="attrs.html#next_page_text">next_page_text</a> or
+ <a href="attrs.html#no_next_page_text">no_next_page_text</a>
+ attributes depending on whether there is a next page or not.
+ If there is only one page of output, this is empty, regardless
+ of the setting of no_next_page_text.
+ </dd>
+ <dt>
+ <strong>NSTARS</strong>
+ </dt>
+ <dd>
+ The number of stars calculated for this document as an
+ integer, up to a maximum specified by the <a
+ href="attrs.html#max_stars">max_stars</a> attribute.
+ </dd>
+ <dt>
+ <strong>PAGE</strong>
+ </dt>
+ <dd>
+ The current page number. Equal to the
+ <a href="hts_form.html#page">page</a> CGI argument, or 1 by
+ default.
+ </dd>
+ <dt>
+ <strong>PAGEHEADER</strong>
+ </dt>
+ <dd>
+ This expands to either the value of the
+ <a href="attrs.html#page_list_header">page_list_header</a> or
+ <a href="attrs.html#no_page_list_header">no_page_list_header</a>
+ attributes depending on how many pages there are.
+ </dd>
+ <dt>
+ <strong>PAGELIST</strong>
+ </dt>
+ <dd>
+ This expands to a list of hyperlinks using the
+ <a href="attrs.html#page_number_text">page_number_text</a> and
+ <a href="attrs.html#no_page_number_text">no_page_number_text</a>
+ attributes. For the current page, it displays
+ no_page_number_text without a hyperlink. Other pages have a
+ hyperlink, and use page_number_text.
+ </dd>
+ <dt>
+ <strong>PAGES</strong>
+ </dt>
+ <dd>
+ The total number of pages.
+ </dd>
+ <dt>
+ <strong>PERCENT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The match score as a percentage. Its range is 1 to 100,
+ without a percent sign. The minimum is always 1 so the
+ variable can be used as the value for an HTML WIDTH
+ attribute.
+ </dd>
+ <dt>
+ <strong>PLURAL_MATCHES</strong>
+ </dt>
+ <dd>
+ If the <strong>MATCHES</strong> variable is other than 1, this
+ will be a single 's'.
+ (This value is overridden by the
+ <a href="attrs.html#plural_suffix">plural_suffix</a>
+ attribute.)
+ </dd>
+ <dt>
+ <strong>PREVPAGE</strong>
+ </dt>
+ <dd>
+ This expands to the value of the
+ <a href="attrs.html#prev_page_text">prev_page_text</a> or
+ <a href="attrs.html#no_prev_page_text">no_prev_page_text</a>
+ attributes depending on whether there is a previous page or not.
+ If there is only one page of output, this is empty, regardless
+ of the setting of no_prev_page_text.
+ </dd>
+ <dt>
+ <strong>RESTRICT</strong>
+ </dt>
+ <dd>
+ The <a href="hts_form.html#restrict">list of patterns</a> which
+ must be matched by the returned URLs, separated by '|'.
+ </dd>
+ <dt>
+ <strong>SCORE</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The score of the current match
+ </dd>
+ <dt>
+ <strong><a name="SELECTED_FORMAT">SELECTED_FORMAT</a></strong>
+ </dt>
+ <dd>
+ The currently selected <a href="hts_form.html#format">format</a>.
+ </dd>
+ <dt>
+ <strong><a name="SELECTED_METHOD">SELECTED_METHOD</a></strong>
+ </dt>
+ <dd>
+ The currently selected <a href="hts_form.html#method">matching
+ method</a>.
+ </dd>
+ <dt>
+ <strong><a name="SELECTED_SORT">SELECTED_SORT</a></strong>
+ </dt>
+ <dd>
+ The currently selected <a href="hts_form.html#sort">sorting
+ method</a>.
+ </dd>
+ <dt>
+ <strong>SIZE</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The size of the document for the current match
+ </dd>
+ <dt>
+ <strong>SIZEK</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The size in kilobytes of the document for the current match
+ </dd>
+ <dt>
+ <strong>SORT</strong>
+ </dt>
+ <dd>
+ Expands to an HTML menu of all the available sorting
+ methods. The current method will be the default one.
+ The menu is composed of choices itemized in the
+ <a href="attrs.html#sort_names">sort_names</a>
+ attribute. The expansion of this template variable is
+ described in more detail in the
+ <a href="hts_selectors.html">select list documentation</a>.
+ </dd>
+ <dt>
+ <strong>STARSLEFT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ A set of HTML &lt;img&gt; tags with the
+ <a href="attrs.html#star_image">stars</a> aligned on the left.
+ </dd>
+ <dt>
+ <strong>STARSRIGHT</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ A set of HTML &lt;img&gt; tags with the stars
+ <a href="attrs.html#star_image">stars</a> aligned on the right.
+ </dd>
+ <dt>
+ <strong>STARTYEAR</strong>, <strong>STARTMONTH</strong>, <strong>STARTDAY</strong>,
+ <strong>ENDYEAR</strong>, <strong>ENDMONTH</strong>, <strong>ENDDAY</strong>
+ </dt>
+ <dd>
+ The currently specified <a href="hts_form.html#startyear">date
+ range</a> for restricting search results.
+ </dd>
+ <dt>
+ <strong>SYNTAXERROR</strong>
+ (Only in
+ <a href="attrs.html#syntax_error_file">syntax_error_file</a>)
+ </dt>
+ <dd>
+ Is the text of the boolean expression syntax error.
+ </dd>
+ <dt>
+ <strong>TITLE</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The title of the document for the current match
+ </dd>
+ <dt>
+ <strong>URL</strong>
+ (Only in matchTemplate)
+ </dt>
+ <dd>
+ The URL to the document for the current match
+ </dd>
+ <dt>
+ <strong>VERSION</strong>
+ </dt>
+ <dd>
+ The ht://Dig <a href="attrs.html#version">version number</a>.
+ </dd>
+ <dt>
+ <strong>WORDS</strong>
+ </dt>
+ <dd>
+ A string of the <a href="hts_form.html#words">search words</a>
+ with spaces in between.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htsearch.html b/debian/htdig/htdig-3.2.0b6/htdoc/htsearch.html
new file mode 100644
index 00000000..16446345
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htsearch.html
@@ -0,0 +1,12 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig -- Internet search engine software
+ </title>
+ </head>
+ <frameset cols="160, *" frameborder="1" framespacing="5" border="1">
+ <frame name="contents" src="hts_menu.html">
+ <frame name="body" src="hts_general.html">
+ </frameset>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/htstat.html b/debian/htdig/htdig-3.2.0b6/htdoc/htstat.html
new file mode 100644
index 00000000..c24199b7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/htstat.html
@@ -0,0 +1,116 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: htstat
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ htstat
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ htstat [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Htstat returns statistics on the document and word
+ databases, much like the -s option to htdig or
+ htmerge.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htstat to
+ append <em>.work</em> to the database files
+ allowing it to operate on a second set of
+ databases.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <em>configfile</em> file instead of the
+ default.
+ </dd>
+ <dt>
+ -u
+ </dt>
+ <dd>
+ Output the list of URLs in the document database.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This increases the verbosity of the
+ program. This has little effect.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htmerge.html">htmerge</a>, and
+ <a href="attrs.html">Configuration file format</a>
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+ The ht://Dig Group <a
+ href="mailto:htdig@htdig.org">&lt;htdig@htdig.org&gt;</a>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/index.html b/debian/htdig/htdig-3.2.0b6/htdoc/index.html
new file mode 100644
index 00000000..b5254f40
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/index.html
@@ -0,0 +1,12 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig -- Internet search engine software
+ </title>
+ </head>
+ <frameset cols="160, *" frameborder="1" framespacing="5" border="1">
+ <frame name="contents" src="contents.html">
+ <frame name="body" src="main.html">
+ </frameset>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/install.html b/debian/htdig/htdig-3.2.0b6/htdoc/install.html
new file mode 100644
index 00000000..440222f3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/install.html
@@ -0,0 +1,475 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Installation
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Installation
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ This document will attempt to show the steps needed to build
+ and install the ht://Dig system.<br>
+ The main sections are:
+ </p>
+ <ul>
+ <li>
+ <a href="#simple">For the impatient...</a>
+ </li>
+ <li>
+ <a href="where.html">Getting the software</a>
+ </li>
+ <li>
+ <a href="#extract">Extracting the software</a>
+ </li>
+ <li>
+ <a href="#configure">Configure</a>
+ </li>
+ <li>
+ <a href="#compile">Compile</a>
+ </li>
+ <li>
+ <a href="#test">Test</a>
+ </li>
+ <li>
+ <a href="#install">Install</a>
+ </li>
+ <li>
+ <a href="#sharelib">Shared Libraries</a>
+ </li>
+ <li>
+ <a href="OSs">Notes for particular operating systems</a>
+ </li>
+ </ul>
+ <hr noshade>
+ <h2>
+ <a name="simple">For the impatient...</a>
+ </h2>
+ <p>
+ The standard GNU installation process works for ht://Dig.<br>
+ <code>./configure --prefix=/usr/local</code><br>
+ <code>make</code><br>
+ <code>make install</code><br>
+ <code>vi /usr/local/conf/htdig.conf</code><br>
+ <code>/usr/local/bin/rundig</code><br>
+ (The final three commands must be issued as root.)
+ </p>
+ <hr noshade>
+ <p></p>
+ <hr noshade>
+ <h2>
+ <a name="extract">Extracting the software</a>
+ </h2>
+ <p>
+ The distribution of ht://Dig is in the form of a gzipped tar
+ file. The name of the file will be something like <code>
+ htdig-3.2.0.tar.gz</code>. To extract, you can use either the GNU
+ tar command as follows:
+ </p>
+ <blockquote>
+ % <strong>tar xzf <em>tarfile.tar.gz</em></strong>
+ </blockquote>
+ <p>
+ If you do not have GNU tar, you can do the following:
+ </p>
+ <blockquote>
+ % <strong>gunzip -c <em>tarfile.tar.gz</em> | tar xf -</strong>
+ </blockquote>
+ <p>
+ In either case, a new directory will be created under which the
+ distribution will be extracted. The directory it creates will
+ be <code>htdig-</code> followed by the version number.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="configure">Configure</a>
+ </h2>
+ <p>
+ Once the distribution has been extracted, change to the newly
+ created directory.<br>
+ In this directory you will need execute the <code>configure</code> program:
+ <blockquote>
+ % <strong>./configure</strong>
+ </blockquote>
+ <p>
+ This program will attempt to determine what your particular
+ system can and can't do.
+ </p>
+<!--
+ <p>
+ The <strong>configure</strong> program may produce some warnings about
+ libguile and guile. Ignore those warnings since they do not
+ affect the ht://Dig program at all.
+ </p>
+-->
+ <p>
+ If you are going to change any of the sources to ht://Dig, then
+ you can optionally add dependency information
+ to all the Makefiles before running configure with the command
+ </p>
+ <blockquote>
+ % <strong>automake</strong>
+ </blockquote>
+ <p>
+ This <strong>only</strong> needs to be done if you are going to change
+ any of the sources to ht://Dig and requires <strong>GCC</strong>.
+ </p>
+ <p>
+ The <strong>configure</strong> program has some ht://Dig specific options
+ in addition to the standard ones that you get when running
+ <pre>
+ ./configure --help
+ </pre>
+ </p>
+ <dl>
+ <dt>
+ <code>--prefix=DIR</code>
+ </dt>
+ <dd>
+ This is where all of the ht://Dig parts will be installed.
+ Various other variables will use this value as their base.
+ [default=/opt/www]
+ </dd>
+ <dt>
+ <code>--bindir=DIR</code>
+ </dt>
+ <dd>
+ All the ht://Dig executable programs will go here
+ [default=/opt/www/bin].
+ </dd>
+ <dt>
+ <code>--with-config-dir=DIR</code>
+ </dt>
+ <dd>
+ where your config directory is [default=/opt/www/conf]
+ </dd>
+ <dt>
+ <code>--with-default-config-file=FILE</code>
+ </dt>
+ <dd>
+ Point this to where all the tools will look for the
+ configuration file [default=/opt/www/conf/htdig.conf].
+ </dd>
+ <dt>
+ <code>--with-common-dir=DIR</code>
+ </dt>
+ <dd>
+ This directory is for files which can be shared between
+ different search databases [default=/opt/www/share/htdig].
+ </dd>
+ <dt>
+ <code>--with-database-dir=DIR</code>
+ </dt>
+ <dd>
+ Set this to the directory where the search databases are
+ going to be. (Make sure there is plenty of space on the
+ partition you put this on!) [default=/opt/www/var/htdig].
+ </dd>
+ <dt>
+ <code>--with-cgi-bin-dir=DIR</code>
+ </dt>
+ <dd>
+ The directory where your HTTP server looks for CGI
+ programs. This is where htsearch will get installed
+ [default=/opt/www/cgi-bin].
+ </dd>
+ <dt>
+ <code>--with-image-dir=DIR</code>
+ </dt>
+ <dd>
+ Define this to be a place that can be accessed by your web
+ server. This is where a couple of images will be installed
+ [default=/opt/www/htdocs/htdig].
+ </dd>
+ <dt>
+ <code>--with-image-url-prefix=LOCATION</code>
+ </dt>
+ <dd>
+ This is the URL which points to the directory specified by
+ the <code>--with-image-dir=DIR</code> option above
+ [default=/htdig].
+ </dd>
+ <dt>
+ <code>--with-search-dir=FILE</code>
+ </dt>
+ <dd>
+ where the sample search form should be installed
+ [default=/opt/www/htdocs/htdig].
+ </dd>
+ <dt>
+ <code>--with-search-form=FILE</code>
+ </dt>
+ <dd>
+ The name of the file in which the sample search form will
+ be installed, relative to the directory specified with
+ <code>--with-search-dir=DIR</code> [default=search.html].
+ </dd>
+ </dl>
+ <hr noshade>
+ <h2>
+ <a name="compile">Compile</a>
+ </h2>
+ <p>
+ The configure program will have created Makefiles in all the
+ important directories.
+ <p>
+ If <strong>make</strong> is not able to deal with the generated Makefiles,
+ you should probably obtain
+ <a href="ftp://ftp.gnu.org/pub/gnu/">GNU make</a>.
+ </p>
+ <p>
+ Now build the complete system with
+ </p>
+ <blockquote>
+ % <strong>make</strong>
+ </blockquote>
+ <p>
+ (Sit back and relax for a while...)
+ </p>
+ <p>
+ If the compilation failed with the error that it cannot find
+ libht.a, the most likely problem is that your system does not
+ have libstdc++ installed. Please check the
+ <a href="require.html">system requirements</a> for details on this.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="test">Test</a>
+ </h2>
+ <p>
+ Once built, you can run tests if you provided the
+ <code>--enable-tests</code> at configure time. These tests
+ require that you have a working
+ <a href=http://www.apache.org/>Apache</a> daemon available with
+ version &gt;= 1.3.1. To run the tests execute:
+ </p>
+ <p>
+ If your <a href=http://www.apache.org/>Apache</a> daemon is
+ installed in a non standard place, specify it with the
+ <code>--with-apache=PATH</code> configure option.
+ </p>
+ <blockquote>
+ % <strong>make check</strong>
+ </blockquote>
+ <hr noshade>
+ <h2>
+ <a name="install">Install</a>
+ </h2>
+ <p>
+ Everything should have built at this point. To install the
+ software, you need to execute
+ </p>
+ <blockquote>
+ % <strong>make install</strong>
+ </blockquote>
+ <p>
+ This will perform several tasks. It will first attempt to
+ create the directories that you specified to <strong>./configure</strong>.
+ It will then copy the following programs to the <code>
+ --bindir=DIR</code> directory:
+ </p>
+ <ul>
+ <li>
+ htdig
+ </li>
+ <li>
+ htmerge
+ </li>
+ <li>
+ htfuzzy
+ </li>
+ <li>
+ htnotify
+ </li>
+ <li>
+ htdump
+ </li>
+ <li>
+ htstat
+ </li>
+ <li>
+ htload
+ </li>
+ </ul>
+ <p>
+ It will also copy the htsearch program to your
+ <code>--with-cgi-bin-dir=DIR</code> directory.
+ </p>
+ <p>
+ After this, several files will be customized and installed.
+ Here is a list of the files that get installed:
+ </p>
+ <blockquote>
+ <dl>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"><code>--with-config-dir=DIR</code></em>/htdig.conf
+ </dt>
+ <dd>
+ A minimal config file which can be used to create a
+ search database for http://www.htdig.org/
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"><code>--with-search-form=FILE</code></em>
+ </dt>
+ <dd>
+ A sample HTML document that contains a search form.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/footer.html
+ </dt>
+ <dd>
+ A sample HTML document that can be used as the search
+ results footer.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/header.html
+ </dt>
+ <dd>
+ A sample HTML document that can be used as the search
+ results header.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/nomatch.html
+ </dt>
+ <dd>
+ A sample HTML document that can be used if nothing was
+ found.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/syntax.html
+ </dt>
+ <dd>
+ A sample HTML document that will be displayed if the
+ user entered an illegal boolean expression. found.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/english.0
+ </dt>
+ <dd>
+ Default list of words with affixes that is used by
+ htfuzzy.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-common-dir=DIR</code></em>/english.aff
+ </dt>
+ <dd>
+ Default affix rule database that is used by htfuzzy.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-image-dir=DIR</code></em>/star.gif
+ </dt>
+ <dd>
+ The default star image that is used to rank matches.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-image-dir=DIR</code></em>/star_blank.gif
+ </dt>
+ <dd>
+ The default placeholder image that is the same size as
+ the star, but is blank. This is used to align the
+ results in the short listing.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-image-dir=DIR</code></em>/htdig.gif
+ </dt>
+ <dd>
+ The nifty ht://Dig logo.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--with-image-dir=DIR</code></em>/button*.gif
+ </dt>
+ <dd>
+ Sample images used to show the search result pages.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--bindir=DIR</code></em>/rundig
+ </dt>
+ <dd>
+ A sample shell script which will create a database.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--prefix=DIR</code>/lib/htdig</em>/*.{a,so}
+ </dt>
+ <dd>
+ The shared and static libraries.
+ </dd>
+ <dt>
+ <em><img src="bdot.gif" width=9 height=9 alt="*"> <code>--prefix=DIR</code>/include/htdig</em>/*.h
+ </dt>
+ <dd>
+ The header files that allow anyone to develop a program
+ based on the internals of htdig.
+ </dd>
+ </dl>
+ </blockquote>
+ <p>
+ Note that these files will <em>never</em> replace any existing
+ files that may already be installed.
+ </p>
+ <p>
+ It is also important to note that these files are mostly
+ examples. As they stand, they will work, but you probably
+ want to modify them to reflect your needs.
+ </p>
+ <p>
+ After the installation, you will be ready to test out
+ everything. You can use the <code>rundig</code> script to make a
+ test database of the online documentation at
+ http://www.htdig.org/
+ </p>
+ <p>
+ The only thing left to do is to modify the <code>
+ htdig.conf</code> config file which was placed in <em>
+ <code>--with-config-dir=DIR</code></em>/htdig.conf. The <a href="attrs.html">
+ Configuration</a> manual has the details on how what
+ attributes are needed.
+ Then, you'll be ready to begin <a href="running.html">
+ running ht://Dig</a>.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="sharelib">Shared Libraries</a>
+ </h2>
+ <p>
+ By default ht://Dig is compiled with shared libraries. If
+ running on a platform other than Linux or FreeBSD, this may
+ be a problem. We recommend that you compile with the
+ <code>--disable-shared</code> configure option.
+ </p>
+ <p>
+ If you installed with shared libraries, you must be sure the
+ system will find them. Usually it's done by adding the
+ prefix/lib/htdig directory to the LD_LIBRARY_PATH.
+ This is really system dependent and you must check your
+ documentation.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="OSs">Notes for particular operating systems</a>
+ </h2>
+ <p>
+ <strong>Mac OS X</strong> cannot handle ht://Dig's shared libraries.
+ Use<br> <code>./configure --disable-shared --enable-static</code>.
+ </p>
+ <p>
+ <strong>Solaris</strong> cc has problems with long file offsets.
+ Use<br> <code>./configure --disable-bigfile</code>.
+ </p>
+ <p>
+ <strong>HP-UX 10.20</strong> does not handle ./configure. Sorry.
+ </p>
+ <hr size="4" noshade>
+ <a href="author.html">Andrew Scherpbier &lt;andrew@contigo.com&gt; &amp; the ht://Dig Group</a>
+ <br>
+ Last modified: $Date: 2004/05/28 13:15:18 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/isp.html b/debian/htdig/htdig-3.2.0b6/htdoc/isp.html
new file mode 100644
index 00000000..cb32bb49
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/isp.html
@@ -0,0 +1,87 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig: ISPs that Offer ht://Dig</title>
+ </head>
+<body bgcolor="#eef7ff">
+ <h1>Uses of ht://Dig</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ From time to time, we get requests for recommendations for ISPs
+ that offer ht://Dig hosting services. This list is provided as a
+ service for such requests and does not represent any sort of
+ endorsement of the companies listed. If you are an ISP, you may be
+ listed if you provide web hosting services for third parties and
+ offer ht://Dig as a search engine for clients as a pre-installed option.
+ If you would like
+ to be added to the list, please fill out the form below.
+ <form method="get" action="http://cgi.htdig.org/cgi-bin/cgiemail/isp.txt">
+ Your name: <input name="required-name" size=40><br>
+ Your e-mail: <input name="required-mail"><br>
+ Page title: <input name="title" size=50><br>
+ Page URL: <input name="required-url" size=50><br>
+ <input type="hidden" name="success" value="/linksub.html">
+ <input type="submit" name="submit" value="Add the Link">
+ <input type="reset" name="reset" value="Clear">
+ </form>
+ </p>
+<ul>
+<li><a target="_new" href="http://www.4exposure.com/">4Exposure.com WebHosting Solutions</a></li>
+<li><a target="_new" href="http://www.appleonline.net/">AppleOnline</a></li>
+<li><a target="_new" href="http://www.arges.tempo.at/">Arge Daten (Vienna/Austria)</a></li>
+<li><a target="_new" href="http://www.clari.net.au/">Australia: ClariNET Internet Solutions</a></li>
+<li><a target="_new" href="http://www.caladan.co.uk/">Caladan Communications</a></li>
+<li><a target="_new" href="http://www.chelseadata.com/">Chelsea Data</a></li>
+<li><a target="_new" href="http://www.citrin.ch/">Citrin - Softwareentwicklung und Internet Dienste</a></li>
+<li><a target="_new" href="http://www.climbers.net/">Climbers.Net</a></li>
+<li><a target="_new" href="http://www.crossnet.se/">Crossnet Internethotell AB</a></li>
+<li><a target="_new" href="http://www.dataway.ch/">dataway GmbH (Zurich, Switzerland)</a></li>
+<li><a target="_new" href="http://search.dca.net/">DCANet search</a></li>
+<li><a target="_new" href="http://www.debugnet.com/">DEBUGnet Network</a></li>
+<li><a target="_new" href="http://www.austria.eu.net/">EUnet Austria</a></li>
+<li><a target="_new" href="http://www.europeanservers.net/">EuropeanServers</a></li>
+<li><a target="_new" href="http://www.freedom2surf.net/">Freedom To Surf</a></li>
+<li><a target="_new" href="http://www.hostasite.com/">hostasite.com a DELTA internet service</a></li>
+<li><a target="_new" href="http://www.netconcepts.com/">Internet Concepts</a></li>
+<li><a target="_new" href="http://www.inwise.de/">InWise - Wirtschaftlich-Wissenschaftlicher Internet Service GmbH</a></li>
+<li><a target="_new" href="http://www.ipn.de/">Info Pool Network GmbH (Berlin, Germany)</a></li>
+<li><a target="_new" href="http://www.islandhosting.com/">islandhosting.com</a></li>
+<li><a target="_new" href="http://www.itsamac.com/">Itsamac - Mac OS X Powered Web Hosting Services</a></li>
+<li><a target="_new" href="http://www.kia.net/">KIA Internet Solutions, Inc.</a></li>
+<li><a target="_new" href="http://www.raketti.net/">Kuopion Telephone Ltd. / Raketti Internet Services</a></li>
+<li><a target="_new" href="http://www.marth.com/">Marth.com - WebServers</a></li>
+<li><a target="_new" href="http://www.maytech.net/">Maytech Ltd.</a></li>
+<li><a target="_new" href="http://mwd.com.pl/">Medical Web Designs (Poland)</a></li>
+<li><a target="_new" href="http://www.mindspring.com/">MindSpring Enterprises</a></li>
+<li><a target="_new" href="http://www.missoulaweb.com/">MissoulaWeb Hosting, Programming & Design</a></li>
+<li><a target="_new" href="http://www.netg.se/">NetGuide Internet Services</a></li>
+<li><a target="_new" href="http://www.netsoft.ro/">NetSoft - Romanian ISP</a></li>
+<li><a target="_new" href="http://www.northwestdesign.com/">northwest design + communications (Canada)</a></li>
+<li><a target="_new" href="http://www.overcoffee.com">OverCoffee Web Design and Hosting</a></li>
+<li><a target="_new" href="http://www.quickhosts.com/">Quickhosts.com - Providing Internet Solutions</a></li>
+<li><a target="_new" href="http://www.redestv.net">RedesTV. Proveedor de servicios de internet.</a></li>
+<li><a target="_new" href="http://www.saargate.de/">SaarGate - Der eBusiness Provider im Südwesten Deutschlands</a></li>
+<li><a target="_new" href="http://www.sdv.fr/">SdV Plurimedia</a></li>
+<li><a target="_new" href="http://www.smartcanuk.com/">Smartcanuk Internet/Domain Registration Services</a></li>
+<li><a target="_new" href="http://www.shn.nu/">Sosik-Hamor Networks</a></li>
+<li><a target="_new" href="http://www.swcp.com/">Southwest Cyberport</a></li>
+<li><a target="_new" href="http://www.via-net-works.de">VIA NET.WORKS Deutschland GmbH (Germany)</a></li>
+<li><a target="_new" href="http://www.wanfear.com/">WANfear</a></li>
+<li><a target="_new" href="http://www.web2000.ru/">Web 2000 (Russia)</a></li>
+<li><a target="_new" href="http://www.opurk.nl/">Welkom op Urk.nl</a></li>
+</ul>
+
+<hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+
+</body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/ma_menu.html b/debian/htdig/htdig-3.2.0b6/htdoc/ma_menu.html
new file mode 100644
index 00000000..bc512726
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/ma_menu.html
@@ -0,0 +1,65 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig: Mailing list archive</title>
+ </head>
+ <body bgcolor="#5a7b8c" text="#ffffff" link="#d0d0d0" vlink="#adc0c0">
+ <h2 align="center">
+ <img src="htdig.gif" alt="" width=81 height=54><br>
+ mail archive
+ </h2>
+ <strong><em>Navigate</em></strong><br>
+ <img src="up.gif" alt="^" width=9 height=9> <a href="index.html" target="_top">ht://Dig</a> <br>
+ <br>
+ <strong>mailing list archive</strong><br>
+ <font face="helvetica,arial" size="2">
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="mailing.html" target="body">General</a><br>
+ <h3 align="center">1997</h3>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-01/" target="body">January</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-02/" target="body">February</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-03/" target="body">March</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-04/" target="body">April</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-05/" target="body">May</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-06/" target="body">June</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-07/" target="body">July</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-08/" target="body">August</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-09/" target="body">September</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-10/" target="body">October</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-11/" target="body">November</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1997-12/" target="body">December</a><br>
+ <h3 align="center">1998</h3>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-01/" target="body">January</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-02/" target="body">February</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-03/" target="body">March</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-04/" target="body">April</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-05/" target="body">May</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-06/" target="body">June</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-07/" target="body">July</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-08/" target="body">August</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-09/" target="body">September</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-10/" target="body">October</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-11/" target="body">November</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1998-12/" target="body">December</a><br>
+ <h3 align="center">1999</h3>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-01/" target="body">January</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-02/" target="body">February</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-03/" target="body">March</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-04/" target="body">April</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-05/" target="body">May</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-06/" target="body">June</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-07/" target="body">July</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-08/" target="body">August</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-09/" target="body">September</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-10/" target="body">October</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-11/" target="body">November</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a href="http://www.htdig.org/mail/1999-12/" target="body">December</a><br>
+ </font><br>
+ <form action="http://www.htdig.org/cgi-bin/htsearch" target=body>
+ <strong>Quick Search:</strong><br>
+ <font size="-1">
+ <input type=text name=words size=15>
+ <input type=hidden name=method value=and>
+ </font>
+ </form>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/mailarchive.html b/debian/htdig/htdig-3.2.0b6/htdoc/mailarchive.html
new file mode 100644
index 00000000..e1ff0078
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/mailarchive.html
@@ -0,0 +1,12 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig -- Internet search engine software
+ </title>
+ </head>
+ <frameset cols="160, *" frameborder="1" framespacing="5" border="1">
+ <frame name="contents" src="http://www.htdig.org/mail/menu.html">
+ <frame name="body" src="mailing.html">
+ </frameset>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/mailing.html b/debian/htdig/htdig-3.2.0b6/htdoc/mailing.html
new file mode 100644
index 00000000..96b97d03
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/mailing.html
@@ -0,0 +1,60 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Mailing list
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ ht://Dig mailing list
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ There are now several mailing lists related to ht://Dig.
+ </p>
+ <blockquote>
+ <table>
+ <tr><td><strong>htdig-general</strong> is a relatively high
+ volume list for discussion of ht://Dig and related
+ announcements. Major announcements, such as new releases are
+ sent to this list as well as the htdig3-announce list
+ below. List traffic is <a href="mailarchive.html"
+ target="_top">archived here</a>.</td></tr>
+ <tr><td><strong>htdig-announce</strong> is a
+ read-only list only for announcements of ht://Dig releases
+ from the main htdig list.</td></tr>
+ <tr><td><strong>htdig-dev</strong> is a fairly
+ moderate volume list for discussion of ht://Dig development
+ issues and patches. List traffic is <a
+ href="dev/devmailarchives.html"
+ target="_top">archived here</a>.</td></tr>
+ <tr><td><strong>htdig-updates</strong> is a
+ read-only, relatively high volume list that tracks changes to
+ the CVS source tree.</td></tr>
+ </table>
+ </blockquote>
+ <p>
+ To sign up for a list, you should go to the SourceForge <a
+ href="http://sourceforge.net/mail/?group_id=4593">mailing list
+ page</a>. Please note that the archives on SourceForge are
+ only from the middle of January 2001, while the above archives
+ comprise messages dating back much further.
+ </p>
+ <p>
+ In addition to the mailing lists, bug reports and feature requests
+ should be sent to the online bug-tracking database
+ through the <a href="bugs.html">bug reporting page</a>.
+ </p>
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/main.html b/debian/htdig/htdig-3.2.0b6/htdoc/main.html
new file mode 100644
index 00000000..1305a242
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/main.html
@@ -0,0 +1,108 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Overview
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1 align="center">
+ <img src="htdig_big.gif" alt="ht://Dig" width=199 height=133><br>
+ WWW Search Engine Software
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Recent News
+ </h2>
+ <p>
+
+ <A HREF="http://sourceforge.net/forum/forum.php?forum_id=327276"><B>Release of 3.2.0b5</B></A>
+ <BR>&nbsp;&nbsp;&nbsp;&nbsp;<I>lha - 2003-11-10 03:57</I> &nbsp; - &nbsp; <A HREF="http://sourceforge.net/projects/htdig/">ht://Dig</A><BR>After being asked &quot;Is ht://Dig dead?&quot; once too often, the ht://Dig group is very happy to announce the release of ht://Dig version 3.2.0b5. This fourth beta release of 3.2 (yes, 3.2.0b4 was cancelled) should fix all bugs in previous 3.2 releases and indtroduces a few new features. As a beta release, it has not received exhaustive testing. However, we believe it to be almost stable enough for production use, and hope that you consider giving it a try to provide feedback.<div align="center"> <A HREF="http://sourceforge.net/forum/forum.php?forum_id=327276">[Read More/Comment]</a></div><HR width="100%" size="1" noshade>
+
+ <A HREF="http://sourceforge.net/forum/forum.php?forum_id=149333"><B>Release of 3.1.6</B></A>
+ <BR>&nbsp;&nbsp;&nbsp;&nbsp;<I>ghutchis - 2002-02-01 07:49</I> &nbsp; - &nbsp; <A HREF="http://sourceforge.net/projects/htdig/">ht://Dig</A><BR>The ht://Dig group is quite happy to announce, at long last, the release of version 3.1.6. This new production version fixes a number of important bugs and adds a few heavily-requested features. As the latest stable release, it is recommended for all production servers. For more details, check the Release Notes at <a href="http://www.htdig.org/RELEASE.html" target="_new">http://www.htdig.org/RELEASE.html</a> or download 3.1.6 from <a href="http://www.htdig.org/where.html" target="_new">http://www.htdig.org/where.html</a> or <a href="http://www.htdig.org/mirrors.html" target="_new">http://www.htdig.org/mirrors.html</a><div align="center">(5 Comments) <A HREF="http://sourceforge.net/forum/forum.php?forum_id=149333">[Read More/Comment]</a></div><HR width="100%" size="1" noshade>
+ <A HREF="http://sourceforge.net/forum/forum.php?forum_id=134785"><B>FTP Site ftp.htdig.org Offline</B></A>
+ <BR>&nbsp;&nbsp;&nbsp;&nbsp;<I>ghutchis - 2001-11-30 07:42</I> &nbsp; - &nbsp; <A HREF="http://sourceforge.net/projects/htdig/">ht://Dig</A><BR>SourceForge has stopped hosting project ftp services. So the main FTP repository, ftp.htdig.org is no longer. <BR> <BR>Files available from FTP have also always been available from <a href="http://www.htdig.org/files/" target="_new">http://www.htdig.org/files/</a><div align="center">(1 Comment) <A HREF="http://sourceforge.net/forum/forum.php?forum_id=134785">[Read More/Comment]</a></div><HR width="100%" size="1" noshade>
+ <A HREF="http://sourceforge.net/forum/forum.php?forum_id=117866"><B>Security Vulnerabilities in 3.1.5 and 3.2.0b3</B></A>
+ <BR>&nbsp;&nbsp;&nbsp;&nbsp;<I>ghutchis - 2001-10-15 12:51</I> &nbsp; - &nbsp; <A HREF="http://sourceforge.net/projects/htdig/">ht://Dig</A><BR>The current released versions are vulnerable to a security hole in the htsearch CGI program. Pre-release snapshots of 3.2.0b4 and 3.1.6 are available from the development snapshots directory that fix the problem: <a href="http://www.htdig.org/files/snapshots/" target="_new">http://www.htdig.org/files/snapshots/</a> <div align="center">(6 Comments) <A HREF="http://sourceforge.net/forum/forum.php?forum_id=117866">[Read More/Comment]</a></div><HR width="100%" size="1" noshade>
+ <A HREF="http://sourceforge.net/forum/forum.php?forum_id=67069"><B>Release of 3.2.0b3</B></A>
+ <BR>&nbsp;&nbsp;&nbsp;&nbsp;<I>ghutchis - 2001-02-22 18:20</I> &nbsp; - &nbsp; <A HREF="http://sourceforge.net/projects/htdig/">ht://Dig</A><BR>The ht://Dig group is quite happy to announce, at long last, the release of ht://Dig version 3.2.0b3. This third beta release of 3.2 offers several long-awaited features and should fix all bugs in previous 3.2 releases. As a beta release, it still has only received limited testing. However, as the final release of 3.2.0 nears, we hope that you consider giving it a try to provide feedback.<div align="center">(5 Comments) <A HREF="http://sourceforge.net/forum/forum.php?forum_id=67069">[Read More/Comment]</a></div><HR width="100%" size="1" noshade><div align="center"><a href="http://sourceforge.net/news/?group_id=4593">[News archive]</a></div>
+ <p>
+ <hr size="4" noshade>
+ <h2>
+ Introduction
+ </h2>
+ <p>
+ The ht://Dig system is a complete world wide web indexing and
+ searching system for a domain or intranet. This system
+ is <strong>not</strong> meant to replace the need for
+ powerful internet-wide search systems like Lycos, Infoseek,
+ Google and AltaVista. Instead it is meant to cover the
+ search needs for a single company, campus, or even a
+ particular sub section of a web site.<br>
+ As opposed to some WAIS-based or web-server based search
+ engines, ht://Dig can easily span several web servers. The
+ type of these different web servers doesn't matter as long as
+ they understand common protocols like HTTP.
+ </p>
+ <p>
+ ht://Dig was developed at <a href="http://www.sdsu.edu/">San
+ Diego State University</a> as a way to search the various web
+ servers on the campus network. Here are some examples of the
+ application of ht://Dig on the SDSU network:
+ </p>
+ <ul>
+ <li>
+ <a href="http://www.sdsu.edu/">A "Quick" search on the main
+ campus home page</a>
+ </li>
+ <li>
+ <a href="http://www.sdsu.edu/search/">A more comprehensive
+ interface to the same</a>
+ </li>
+ <li>
+ <a href="http://libweb.sdsu.edu/catalog/Search.html">A
+ search of the online SDSU General Catalog</a>
+ </li>
+ <li>
+ <a href="http://www.sdsu.edu/doc/">Searches through various
+ online documentation</a>
+ </li>
+ </ul>
+ <form action="http://cgi.htdig.org/cgi-bin/htsearch" method="post">
+ <p>
+ You can search this documentation as well:<br>
+ <input type="hidden" name="config" value="htdig">
+ <input type="hidden" name="method" value="and">
+ <input type="text" name="words" size="40"><br>
+ Restrict to: <select name="restrict">
+ <option value="">Everything
+ <option value="www.htdig.org">www.htdig.org only
+ <option value="www.htdig.org/dev/">Developer pages only
+ <option value="/mail/|/htdig-dev/">Mailing list archives
+ <option value="/mail/">htdig@htdig.org archive
+ <option value="/htdig-dev/">htdig3-dev@htdig.org archive
+ </select>
+ <input type="submit" value="Search">
+ </p>
+ </form>
+ <p>
+ Many different types of searches can be set up using only a
+ single search database. For example, the online documentation
+ search above uses the same database as the campus main
+ search. The difference between the searches is that the
+ documentation search will only show results related to the
+ online documentation.
+ </p>
+<hr size="4" noshade>
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/meta.html b/debian/htdig/htdig-3.2.0b6/htdoc/meta.html
new file mode 100644
index 00000000..8369937e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/meta.html
@@ -0,0 +1,269 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Recognized META information in HTML documents
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Recognized META information in HTML documents
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Introduction
+ </h2>
+ <p>
+ As the <a href="index.html">ht://Dig</a> system will index
+ all HTML pages on a system, individual authors of pages may
+ want to control some of the aspects of the indexing
+ operation. To this end, ht://Dig will recognize some special
+ &lt;META&gt; tag attributes. The following things can be
+ controlled in this manner:
+ </p>
+ <ul>
+ <li>
+ Do not index the document
+ </li>
+ <li>
+ Notify a user that the document has expired
+ </li>
+ <li>
+ Set keywords for the document
+ </li>
+ </ul>
+ <hr>
+ <h2>
+ General &lt;META&gt; tag use
+ </h2>
+ <p>
+ In HTML, any number of &lt;META&gt; tags can be used between
+ the &lt;HEAD&gt; and &lt;/HEAD&gt; tags of a document. There
+ are three possible attributes in this tag, two of which are
+ recognized by ht://Dig:
+ </p>
+ <dl>
+ <dt>
+ NAME
+ </dt>
+ <dd>
+ Used to name a specific property.
+ </dd>
+ <dt>
+ CONTENT
+ </dt>
+ <dd>
+ Used to supply the value for a named property.
+ </dd>
+ </dl>
+ <p>
+ A document could start with something like the following:
+ </p>
+ <blockquote>
+ &lt;HTML&gt;<br>
+ &lt;HEAD&gt;<br>
+ &lt;META NAME="htdig-keywords" CONTENT="phone telephone
+ online electronic directory"&gt;<br>
+ &lt;META NAME="htdig-email"
+ CONTENT="pat.user@nowhere.net"&gt;<br>
+ &lt;TITLE&gt;Some document title&lt;/TITLE&gt;<br>
+ &lt;/HEAD&gt;<br>
+ &lt;BODY&gt;
+ <blockquote>
+ <em>Body of document</em>
+ </blockquote>
+ &lt;/BODY&gt;<br>
+ &lt;/HTML&gt;
+ </blockquote>
+ <hr>
+ <h2>
+ Recognized properties
+ </h2>
+ <p>
+ The following properties are recognized by ht://Dig:
+ </p>
+ <ul>
+ <li>
+ htdig-keywords
+ </li>
+ <li>
+ htdig-noindex
+ </li>
+ <li>
+ htdig-email
+ </li>
+ <li>
+ htdig-notification-date
+ </li>
+ <li>
+ htdig-email-subject
+ </li>
+ <li>
+ robots
+ </li>
+ <li>
+ keywords
+ </li>
+ <li>
+ description
+ </li>
+ <li>
+ author
+ </li>
+ </ul>
+ <p>
+ Detailed information about the <em>htdig-email</em>, <em>
+ htdig-notification-date</em>, and <em>
+ htdig-email-subject</em> properties can be found in the
+ <a href="notification.html">Email notification service</a>
+ document.
+ </p>
+ <p>
+ Descriptions of the properties and their values:
+ </p>
+ <dl>
+ <dt>
+ <strong>htdig-keywords</strong>
+ </dt>
+ <dd>
+ The value of this property should be a blank separated list
+ of keywords which will get a very high weight when
+ searching. This can be used to get around some problems
+ with common synonyms for words in the document. For
+ example, if a document is a telephone directory, possible
+ keywords could be "telephone phone directory book list".
+ Now, regardless of what text is actually in the document,
+ it can be found if these keywords are used in the search.
+ The weight that words in the content string will have in
+ search results is controlled by the
+ <a href="attrs.html#keywords_factor">
+ keywords_factor</a> attribute in your configuration.
+ </dd>
+ <dt>
+ <strong>htdig-noindex</strong>
+ </dt>
+ <dd>
+ This property has no value associated with it. If it is
+ used, the document will NOT be included in any searches.
+ Example uses of this could be:
+ <ul>
+ <li>
+ A document which is dynamic. ie: the contents change
+ continually.
+ </li>
+ <li>
+ Temporary document, not officially available, yet.
+ </li>
+ <li>
+ A document you just don't want to be found.
+ </li>
+ </ul>
+ </dd>
+ <dt>
+ <strong>htdig-email</strong>
+ </dt>
+ <dd>
+ The value is the email address a notification message
+ should be sent to. Multiple email addresses can be given by
+ separating them by commas. If no email address is given, no
+ notification will be sent.<br>
+ (Please check the <a href="notification.html">Email
+ notification service</a> documentation for more details on
+ this.)
+ </dd>
+ <dt>
+ <strong>htdig-notification-date</strong>
+ </dt>
+ <dd>
+ The value is the date on or after which the notification
+ should be sent. The format is simply <em>month / day /
+ year</em>, or if the <a href="attrs.html#iso_8601">iso_8601</a>
+ attribute is set, <em>year - month - day</em>.
+ Make sure that the year has the century with it
+ as well. This means that you should use <em>1995</em>
+ instead of <em>95</em>.<br>
+ If no date is given, no notification will be sent. (Please
+ check the <a href="notification.html">Email notification
+ service</a> documentation for more details on this.)
+ </dd>
+ <dt>
+ <strong>htdig-email-subject</strong>
+ </dt>
+ <dd>
+ The value specifies the subject the notification message.
+ This is an optional property. (Please check the
+ <a href="notification.html">Email notification service</a>
+ documentation for more details on this.)
+ </dd>
+ <dt>
+ <a name="robots"><strong>robots</strong></a>
+ </dt>
+ <dd>
+ The value specifies restrictions on robots (including ht://Dig)
+ for the current page. These restrictions can be "noindex" to
+ prevent indexing the document but allowing the robot to follow
+ links from the page, "nofollow" to allow indexing but preventing
+ links from being followed, or "none" to prevent
+ both. Additionally, ht://Dig supports the values "index" and
+ "follow" and "all" which obviously are the opposite of the other
+ values and are the default behavior. For more information on
+ META robots tags, check out the
+ <a href="http://www.robotstxt.org/wc/meta-user.html">
+ HTMLAuthor's Guide to the Robots META tag</a>.
+ </dd>
+ <dt>
+ <strong>keywords</strong>
+ </dt>
+ <dd>
+ The value of this property should be a blank separated list
+ of keywords, just as for the htdig-keywords property.
+ They are treated as equivalent by htdig. The reason for
+ two different properties is that the keywords property
+ is used by other search engines as well, while the
+ htdig-keywords property can be used for words you want
+ indexed only by htdig. You can get htdig to treat other
+ property names as equivalent to htdig-keywords, or disable
+ the htdig-keywords or keywords properties, by changing the
+ <a href="attrs.html#keywords_meta_tag_names">
+ keywords_meta_tag_names</a> attribute in your configuration.
+ </dd>
+ <dt>
+ <strong>description</strong>
+ </dt>
+ <dd>
+ The value allows you to specify an alternate excerpt
+ (description) of a page. If the config-file attribute
+ <a href="attrs.html#use_meta_description">
+ use_meta_description</a> is used, then any documents with
+ descriptions will use them instead of the automatically
+ generated excerpts.
+ The weight that words in the content string will have in
+ search results is controlled by the
+ <a href="attrs.html#meta_description_factor">
+ meta_description_factor</a> attribute in your configuration.
+ </dd>
+ <dt>
+ <strong>author</strong>
+ </dt>
+ <dd>
+ The value specifies the name, email address and/or affiliation
+ of the creator or authoriser of a page.
+ The weight that words in the content string will have in
+ search results is controlled by the
+ <a href="attrs.html#author_factor">author_factor</a>
+ attribute in your configuration.
+ A search for "author:<em>name</em>" will
+ look only in these fields for the word <em>name</em>.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/notification.html b/debian/htdig/htdig-3.2.0b6/htdoc/notification.html
new file mode 100644
index 00000000..2c3bbde1
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/notification.html
@@ -0,0 +1,185 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Email notification service
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ <img alt="ht://Dig" src="htdig.gif" align="bottom" width=81 height=54> Email
+ notification service
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <h2>
+ Introduction
+ </h2>
+ <p>
+ As any HTML author knows, information is only useful if it is
+ valid. Unfortunately, a lot of information has an inherent
+ expiration date. Things like meeting schedules, announcements
+ of upcoming events, and pages with those annoying yellow
+ 'NEW' images by certain links.
+ </p>
+ <p>
+ <a href="index.html">ht://Dig</a> is a WWW index/search
+ system developed at <a href="http://www.sdsu.edu/">San Diego
+ State University</a>. Since this index system already scans
+ all HTML documents, it was the logical choice to incorporate
+ a reminder service into it.
+ </p>
+ <p>
+ ht://Dig can be told to remind you about an HTML page
+ sometime in the future. The reminder/notification will come
+ by email and will contain the URL to the page plus some other
+ information.
+ </p>
+ <hr>
+ <h2>
+ Use
+ </h2>
+ <p>
+ ht://Dig detects special use of the &lt;META&gt; tag in HTML
+ documents. The &lt;META&gt; tags should go between
+ &lt;HEAD&gt; and &lt;/HEAD&gt; of an HTML document.
+ </p>
+ <p>
+ Example:
+ </p>
+ <blockquote>
+ &lt;HTML&gt;<br>
+ &lt;HEAD&gt;<br>
+ &lt;META NAME=&quot;htdig-email&quot;
+ CONTENT=&quot;pat.user@nowhere.net&quot;&gt;<br>
+ &lt;META NAME=&quot;htdig-email-subject&quot; CONTENT=&quot;Reminder to
+ update a page&quot;&gt;<br>
+ &lt;META NAME=&quot;htdig-notification-date&quot;
+ CONTENT=&quot;8/28/1995&quot;&gt;<br>
+ &lt;TITLE&gt;Someone's homepage.&lt;/TITLE&gt;<br>
+ &lt;/HEAD&gt;<br>
+ &lt;BODY&gt;<br>
+ <blockquote>
+ <em>Body of document</em>
+ </blockquote>
+ &lt;/BODY&gt;<br>
+ &lt;/HTML&gt;
+ </blockquote>
+ <p>
+ After 8/28/1995 pat.user@nowhere.net will get a mail message
+ which will look something like this:
+ </p>
+ <blockquote>
+ <strong>From:</strong> ht://Dig email notification service<br>
+ <strong>Subject:</strong> WWW notification: Reminder to
+ update a page<br>
+ <strong>To:</strong> pat.user@nowhere.net<br>
+ <br>
+ The following page was tagged to notify you after 8/28/1995.<br>
+ <br>
+ URL: http://www.sdsu.edu/~turtle/index.html<br>
+ Date: 8/28/1995<br>
+ Subject: Reminder to update a page<br>
+ </blockquote>
+ <hr>
+ <h2>
+ Attributes
+ </h2>
+ <p>
+ The special ht://Dig &lt;META&gt; tag attributes related to
+ notification are:
+ </p>
+ <ul>
+ <li>
+ NAME=&quot;htdig-email&quot; CONTENT=&quot;<em>email address [, email
+ address] ...</em>&quot;
+ </li>
+ <li>
+ NAME=&quot;htdig-notification-date&quot; CONTENT=&quot;<em>earliest
+ notification date</em>&quot;
+ </li>
+ <li>
+ NAME=&quot;htdig-email-subject&quot; CONTENT=&quot;<em>notification message
+ subject</em>&quot;
+ </li>
+ </ul>
+ <p>
+ Descriptions of the values for the attributes:
+ </p>
+ <dl>
+ <dt>
+ <strong>htdig-email</strong>
+ </dt>
+ <dd>
+ This is the email address the notification message should
+ be sent to. Multiple email addresses can be given by
+ separating them by commas. If no email address is given, no
+ notification will be sent.
+ </dd>
+ <dt>
+ <strong>htdig-notification-date</strong>
+ </dt>
+ <dd>
+ This is the date on or after which the notification should
+ be sent. The format is simply <em>month / day / year</em>,
+ or if the <a href="attrs.html#iso_8601">iso_8601</a>
+ attribute is set, <em>year - month - day</em>.
+ Make sure that the year has the century with it as well.
+ This means that you should use <em>1995</em> instead of
+ <em>95</em>.<br>
+ The format of dates is actually a little more flexible than
+ this. Any punctuation or white space can be used as separators,
+ and if the year, month and date do not appear as expected in the
+ order listed above, the notification service will try to make
+ sense of the order used, if the date can be resolved unambiguously
+ in another order. Using four-digit years avoids ambiguity between
+ the year and the month or day. The format <em>year - month -
+ day</em> is accepted without ambiguity when a four-digit year is
+ used, whether the iso_8601 attribute is set or not. When the
+ year is given after the month and day, the format is either
+ <em>day - month - year</em>, if iso_8601 is true and the year
+ has four digits, or <em>month - day - year</em> otherwise.<br>
+ If no date is given, no notification will be sent. If a date is
+ given but is malformed, a notification of this error will be sent.
+ For correct dates, a notification will be sent every time the
+ system runs the notification service, on or after the date given.
+ To end the notifications, you must update or remove the
+ notification date in the document.
+ </dd>
+ <dt>
+ <strong>htdig-email-subject</strong>
+ </dt>
+ <dd>
+ This specifies the subject the notification message. This
+ is an optional attribute. Note that if you want to put
+ spaces in the subject, you <strong>have</strong> to put
+ double quotes (&quot;) around it.
+ </dd>
+ </dl>
+ <p>
+ There are a couple of other META attributes which are
+ recognized. These can be found in the more general
+ <a href="meta.html">ht://Dig META tag documentation</a>.
+ </p>
+ <hr>
+ <h2>
+ Disclaimer
+ </h2>
+ <p>
+ This service is a free service for all HTML maintainers whose
+ documents are covered by the ht://Dig search system. If any
+ of the attributes are improperly formatted, no notifications
+ will be mailed. Email notification may not always occur on
+ the date that you specified. We can only guarantee a
+ notification of at most once a week.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/require.html b/debian/htdig/htdig-3.2.0b6/htdoc/require.html
new file mode 100644
index 00000000..d1975701
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/require.html
@@ -0,0 +1,392 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Features and System requirements
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Features and System requirements
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr noshade>
+ <h2>
+ Features
+ </h2>
+ <p>
+ Here are some of the major features of ht://Dig. They are in
+ no particular order.
+ </p>
+ <blockquote>
+ <dl>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Intranet searching</strong>
+ </dt>
+ <dd>
+ ht://Dig has the ability to search through many servers
+ on a network by acting as a WWW browser.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ It is free</strong>
+ </dt>
+ <dd>
+ The whole system is released under the
+ <a href="COPYING">GNU Library General Public License (LGPL)</a>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Robot exclusion is supported</strong>
+ </dt>
+ <dd>
+ The <a href="http://www.robotstxt.org/wc/norobots.html">
+ Standard for Robot Exclusion</a> is
+ <a href="meta.html#robots">supported by ht://Dig.</a>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Boolean expression searching</strong>
+ </dt>
+ <dd>
+ Searches can be arbitrarily complex using boolean
+ expressions.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Phrase searching</strong>
+ </dt>
+ <dd>
+ A phrase can be searched for by enclosing it in quotes.
+ Phrase searches can be combined with word searches, as in
+ <code>Linux and "high quality"</code>.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Configurable search results</strong>
+ </dt>
+ <dd>
+ The output of a search can easily be tailored to your
+ needs by means of providing HTML templates.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Fuzzy searching</strong>
+ </dt>
+ <dd>
+ Searches can be performed using various
+ <a href="attrs.html#search_algorithm">configurable algorithms</a>.
+ Currently the following algorithms are
+ supported (in any combination):
+ <ul>
+ <li>
+ exact
+ </li>
+ <li>
+ soundex
+ </li>
+ <li>
+ metaphone
+ </li>
+ <li>
+ common word endings
+ </li>
+ <li>
+ synonyms
+ </li>
+ <li>
+ accent stripping
+ </li>
+ <li>
+ substring and prefix
+ </li>
+ <li>
+ regular expressions
+ </li>
+ <li>
+ simple spelling corrections
+ </li>
+ </ul>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Searching of many file formats</strong>
+ </dt>
+ <dd>
+ Both HTML documents and plain text files can be
+ searched directly ht://Dig itself. There is also a
+ <a href="attrs.html#external_parsers">mechanism
+ to allow external programs ("external parsers")</a> to be used
+ while building the database so that arbitrary file formats
+ can be searched. <br>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Document retrieval using many transport services</strong>
+ </dt>
+ <dd>
+ Several transport services can be handled by ht://Dig,
+ including http://, ftp:// and file:///.
+ There is also a
+ <a href="attrs.html#external_protocols">mechanism
+ to allow external programs ("external protocols")</a> to be used
+ while building the database so that arbitrary transport
+ services can be used. <br>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Keywords can be added to HTML documents</strong>
+ </dt>
+ <dd>
+ Any number of <a href="meta.html">keywords</a>
+ can be added to HTML documents
+ which will not show up when the document is viewed.
+ This is used to make a document more like to be found
+ and also to make it appear higher in the list of
+ matches.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Email notification of expired documents</strong>
+ </dt>
+ <dd>
+ Special meta information can be added to HTML documents
+ which can be used to
+ <a href="notification.html">notify the maintainer</a> of those
+ documents at a certain time. It is handy to get
+ reminded when to remove the "New" images from a certain
+ page, for example.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ A Protected server can be indexed</strong>
+ </dt>
+ <dd>
+ ht://Dig can be told to use a specific
+ <a href="attrs.html#authorization">username and password</a>
+ when it retrieves documents. This can be used
+ to index a server or parts of a server that are
+ protected by a username and password.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Searches on subsections of the database</strong>
+ </dt>
+ <dd>
+ It is easy to set up a search which only returns
+ documents whose
+ <a href="hts_form.html#restrict">URL matches a certain pattern.</a>
+ This becomes very useful for people who want to make their
+ own data searchable without having to use a separate
+ search engine or database.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Full source code included</strong>
+ </dt>
+ <dd>
+ The search engine comes with full source code. The
+ whole system is released under the terms and conditions
+ of the <a href="COPYING">GNU Library General Public License (LGPL) version
+ 2.0</a>
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ The depth of the search can be limited</strong>
+ </dt>
+ <dd>
+ Instead of limiting the search to a set of machines, it
+ can also be restricted to documents that are a certain
+ number of <a href="attrs.html#max_hop_count">"mouse-clicks"</a>
+ away from the start document.
+ </dd>
+ <dt>
+ <strong><img src="bdot.gif" width=9 height=9 alt="*">
+ Full support for the ISO-Latin-1 character set</strong>
+ </dt>
+ <dd>
+ Both SGML entities like '&amp;agrave;' and ISO-Latin-1
+ characters can be indexed and searched.
+ </dd>
+ </dl>
+ </blockquote>
+ <hr size="4" noshade>
+ <h1>
+ Requirements to build ht://Dig
+ </h1>
+ <p>
+ ht://Dig was developed under Unix using C++.
+ </p>
+ <p>
+ For this reason, you will need a Unix machine, a C compiler
+ and a C++ compiler. (The C compiler is needed to compile some
+ of the GNU libraries)
+ </p>
+ <p>
+ Unfortunately, we only have access to a couple of different
+ Unix machines. ht://Dig has been tested on these machines:
+ </p>
+ <ul>
+<!--
+ <li>
+ Sun Solaris 2.5 SPARC (using gcc/g++ 2.7.2)
+ </li>
+ <li>
+ Sun SunOS 4.1.4 SPARC (using gcc/gcc 2.7.0)
+ </li>
+ <li>
+ HP/UX A.09.01 (using gcc/g++ 2.6.0)
+ </li>
+ <li>
+ IRIX 5.3 (SGI C++ compiler. Don't know the version)
+ </li>
+ <li>
+ Debian Linux 2.0 (using egcs 1.1b)
+ </li>
+-->
+ <li>
+ FreeBSD 4.6 (using gcc 2.95.3) <!-- lha -->
+ </li>
+ <li>
+ Mandrake Linux 8.2 (using gcc 3.2) <!-- lha -->
+ </li>
+ <li>
+ Debian, 2.2.19 kernel (using gcc 2.95.4) <!-- lha -->
+ </li>
+ <li>
+ Debian on an Alpha <!-- lha -->
+ </li>
+ <li>
+ RedHat 7.3, 8.0 <!-- Jim Cole -->
+ </li>
+ <li>
+ Sun Solaris 2.8 = SunOS 5.8 (using gcc 3.1) <!-- lha -->
+ </li>
+ <li>
+ Sun Solaris 2.8 = SunOS 5.8 (using Sun's cc / g++ 3.1) <!-- lha -->
+ </li>
+ <li>
+ Mac OS X 10.2 (using gcc) <!-- Jim Cole -->
+ </li>
+
+ </ul>
+ There are reports of ht://Dig working on a number of other platforms.
+ <h3>
+ libstdc++
+ </h3>
+ <p>
+ If you plan on using g++ to compile ht://Dig, you have to make
+ sure that libstdc++ has been installed. Unfortunately, libstdc++ is a
+ separate package from gcc/g++. You can get libstdc++ from the
+ <a href="ftp://ftp.gnu.org/pub/gnu/">GNU software archive</a>.
+ </p>
+
+<!-- The current Makefiles don't use include...
+ <h3>
+ Berkeley 'make'
+ </h3>
+ <p>
+ The building relies heavily on the make program. The problem
+ with this is that not all make programs are the same. The
+ requirement for the make program is that it understands the
+ 'include' statement as in
+ </p>
+ <blockquote>
+ <code>include somefile otherfile</code>
+ </blockquote>
+ <p>
+ The Berkeley 4.4 make program doesn't use this syntax, instead
+ it wants
+ </p>
+ <blockquote>
+ <code>.include "somefile"</code><br>
+ <code>.include "otherfile"</code>
+ </blockquote>
+ <p>
+ and hence it cannot be used to build ht://Dig.
+ </p>
+ <p>
+ If your make program doesn't understand the right 'include'
+ syntax, it is best if you get and install
+ <a href="ftp://ftp.gnu.org/pub/gnu/">gnumake</a> before you try
+ to compile everything. The alternative is to change all the
+ Makefiles.
+ </p>
+-->
+ <hr noshade>
+ <h1>
+ Disk space requirements
+ </h1>
+ <p>
+ The search engine will require lots of disk space to store
+ its databases. Unfortunately, there is no exact formula to
+ compute the space requirements. It depends on the number of
+ documents you are going to index but also on the various
+ options you use.
+ </p>
+ <p>As a temporary measure, 3.2 betas use a very inefficient
+ database structure to enable phrase searching. This will be
+ fixed before the release of 3.2.0. Currently, indexing a site of
+ around 10,000 documents gives a database of around 400MB using the
+ default setting for
+ <a href="attrs.html#max_doc_size">maximum document size</a> and storing the
+ <a href="attrs.html#max_head_length">first 50,000 bytes of each document</a>
+ to enable context to be displayed.
+ <!-- To give you an idea of the space
+ requirements, here is what I have deduced from our own
+ database size at San Diego State University.
+ </p>
+ <p>
+ If you keep around the wordlist database (for update digging
+ instead of initial digging) I found that multiplying the
+ number of documents covered by 12,000 will come pretty close
+ to the space required.
+ </p>
+ <p>
+ We have about 13,000 documents:
+ </p>
+<pre>
+ 13,000
+ 12,000 x
+ ===========
+ 156,000,000
+</pre>
+ or about 150 MB.
+ <p>
+ Without the wordlist database, the factor drops down to about
+ 7500:
+ </p>
+<pre>
+ 13,000
+ 7,500 x
+ ===========
+ 97,500,000
+</pre>
+ or about 93 MB.
+-->
+ <p>
+ Keep in mind that we keep at most 50,000 bytes of each
+ document. This may seen a lot, but most documents aren't very
+ big and it gives us a big enough chunk to almost always show
+ an excerpt of the matches.
+ </p>
+ <p>
+ You may find that if you store most of each document, the
+ databases are almost the same size, or even larger than the
+ documents themselves! Remember that if you're storing a
+ significant portion of each document (say 50,000 bytes as
+ above), you have that requirement, plus the size of the word
+ database and all the additional information about each document
+ (size, URL, date, etc.) required for searching.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/rundig.html b/debian/htdig/htdig-3.2.0b6/htdoc/rundig.html
new file mode 100644
index 00000000..0ac320e4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/rundig.html
@@ -0,0 +1,190 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: rundig
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ rundig
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <dl>
+ <dd>
+ <h2>
+ Synopsis
+ </h2>
+ </dd>
+ <dd>
+ rundig [<em>options</em>]
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Description
+ </h2>
+ </dd>
+ <dd>
+ Rundig is a shell script that builds the databases necessary
+ for a typical ht://Dig setup. It runs <a href="htdig.html">
+ htdig</a> first to build the initial database, then it runs
+ <a href="htpurge.html">htpurge</a> to clean up the document
+ and word databases that were created by htdig.
+ It then runs <a href="htnotify.html">htnotify</a>, and finally
+ runs <a href="htfuzzy.html">htfuzzy</a> if necessary, to build
+ the endings and synonyms databases if they're missing or
+ outdated. Rundig uses the "-i" option to htdig, so it always
+ reindexes your web site from scratch when you run it.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Options
+ </h2>
+ </dd>
+ <dd>
+ <dl compact>
+ <dt>
+ -a
+ </dt>
+ <dd>
+ Use alternate work files. Tells htdig and htpurge to
+ append <em>.work</em> to database files, causing a
+ second copy of the database to be built. This allows
+ the original files to be used by htsearch during the
+ indexing run. After htdig and htpurge are done building
+ the .work database files, rundig will move them into
+ place, replacing the original files.
+ <br>This option only works for building a new second
+ copy of the database from scratch, and not for updating
+ an existing database. To do that would require further
+ customization of the rundig script. It's not sufficient
+ to simply remove the "-i" option from htdig in the
+ script. See the <em>Customization</em> section below.
+ </dd>
+ <dt>
+ -c <em>configfile</em>
+ </dt>
+ <dd>
+ Use the specified <i>configfile</i> file instead of the
+ default.
+ </dd>
+ <dt>
+ -s
+ </dt>
+ <dd>
+ Print statistics about the dig after completion.
+ </dd>
+ <dt>
+ -v
+ </dt>
+ <dd>
+ Verbose mode. This increases the verbosity of the
+ programs. Using more than 2 is probably only useful for
+ debugging purposes. The default verbose mode (using
+ only one -v) gives a nice progress report while
+ digging and merging. This progress report can be a bit
+ cryptic, but is explained in the <a href="htdig.html">
+ htdig</a> documentation.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Files
+ </h2>
+ </dd>
+ <dd>
+ <dl>
+ <dt>
+ <a href="attrs.html#bin_dir">BIN_DIR</a>/htdig, htpurge, htnotify, htfuzzy
+ </dt>
+ <dd>
+ The executable programs called by rundig.
+ </dd>
+ <dt>
+ <a href="attrs.html#config_dir">CONFIG_DIR</a>/htdig.conf
+ </dt>
+ <dd>
+ The default configuration file.
+ </dd>
+ <dt>
+ <a href="attrs.html#common_dir">COMMON_DIR</a>/english.0, english.aff, synonyms
+ </dt>
+ <dd>
+ The default fuzzy match dictionary (input) files.
+ </dd>
+ </dl>
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ See Also
+ </h2>
+ </dd>
+ <dd>
+ <a href="htdig.html">htdig</a>,
+ <a href="htpurge.html">htpurge</a>,
+ <a href="htnotify.html">htnotify</a>,
+ <a href="htfuzzy.html">htfuzzy</a>,
+ <a href="htsearch.html" target="_top">htsearch</a>, and
+ <a href="attrs.html">Configuration file format</a>.
+ </dd>
+ </dl>
+ <dl>
+ <dd>
+ <h2>
+ Customization
+ </h2>
+ </dd>
+ <dd>
+ Because rundig is a shell script, it's easy to customize
+ it as needed. It's also necessary to customize it if you
+ change the settings of your
+ <a href="attrs.html#database_dir">database_dir</a> or
+ <a href="attrs.html#common_dir">common_dir</a> attributes
+ (you'll need to make the corresponding changes to the DBDIR
+ and COMMONDIR variables in the script), if you decide to
+ use other fuzzy algorithms that need their own databases
+ rebuilt, or if you change the names of the endings or
+ synonyms databases or source files. Before customizing the
+ script, be sure to familiarize yourself with the individual
+ programs in the package and how they interact.
+ <br>
+ <br>A common mistake is to assume that you can simply take
+ the "-i" option off the htdig command line in rundig to make
+ rundig update rather than reindexing from scratch. This will
+ work as long as you don't use the "-a" option to rundig.
+ The file manipulations that rundig -a does assume that you've
+ created a new database from scratch, and all files are moved
+ from .work files to the original file locations. However,
+ nothing is done before running htdig to move or copy the
+ db.wordlist and db.docdb to .work files before running htdig.
+ The end result is that a rundig -a will still reindex
+ your site from scratch. When it comes to more drastic changes
+ such as this, many users chose to write their own custom
+ script, rather than modifying rundig. For example, see the
+ <a href="http://www.htdig.org/files/contrib/scripts/rundig.sh">
+ rundig.sh</a> script in our contributed files.
+ </dd>
+ </dl>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/06/12 13:39:13 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/running.html b/debian/htdig/htdig-3.2.0b6/htdoc/running.html
new file mode 100644
index 00000000..36445f92
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/running.html
@@ -0,0 +1,137 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ Running ht://Dig
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Running ht://Dig
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ This document will attempt to show the steps needed to use
+ the ht://Dig system, after <a href="where.html">obtaining</a>,
+ <a href="install.html">installing</a> and
+ <a href="config.html">configuring</a> it.<br>
+ The main sections are:
+ </p>
+ <ul>
+ <li>
+ <a href="#rundig">Building the databases</a>
+ </li>
+ <li>
+ <a href="#testing">Testing and troubleshooting</a>
+ </li>
+ <li>
+ <a href="#maintenance">Maintaining the system</a>
+ </li>
+ </ul>
+ <hr noshade>
+ <h2>
+ <a name="rundig">Building the databases</a>
+ </h2>
+ <p>
+ After setting up all the <a href="config.html">configuration
+ files</a>, you can build the required databases simply by running
+ <a href="rundig.html">rundig</a>. This script will run
+ <a href="htdig.html">htdig</a> first to build the initial database,
+ then it runs <a href="htpurge.html">htpurge</a> to clean up the
+ document and word databases that were created by htdig.
+ It then runs <a href="htnotify.html">htnotify</a>, and finally
+ runs <a href="htfuzzy.html">htfuzzy</a> if necessary, to build
+ the endings and synonyms databases if they're missing or outdated.
+ The rundig script can be customized for your specific needs, or
+ you can develop your own script that runs any of these programs.
+ Read the reference sections for each of these programs to get a
+ better understanding of what each one does.
+ </p>
+ <p>
+ The <a href="htfuzzy.html">htfuzzy</a> program deserves a bit more
+ explaining. It is used to build databases that are used by some
+ of the fuzzy match algorithms selected by
+ <a href="htsearch.html" target="_top">htsearch</a>'s
+ <a href="attrs.html#search_algorithm">search_algorithm</a>
+ attribute. The <em>endings</em> and <em>synonyms</em> algorithms
+ use static dictionaries, so their databases only need to be rebuilt
+ by htfuzzy when the dictionary files are changed, or when ht://Dig
+ is initially installed. The rundig script handles the building of
+ these two databases as needed for the default setup. A few of the
+ other fuzzy match algorithms use databases that are derived from
+ the word database built by htdig/htpurge, so if you use these
+ algorithms you should rebuild their databases with htfuzzy every
+ time you update your index. This isn't done in rundig, but the
+ comments in the script show where you can add your htfuzzy commands
+ as needed. Some fuzzy match algorithms don't need their own
+ database, as they just operate on the word database, so they don't
+ need any special setup.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="testing">Testing and troubleshooting</a>
+ </h2>
+ <p>
+ Once the databases are built, you should test out htsearch.
+ It's recommended that you first try a few queries running
+ htsearch on the command line, as it helps to separate problems
+ that are specific to ht://Dig from web server or CGI problems.
+ Once you have that working, try running htsearch from your web
+ browser, using the search form you configured.
+ </p>
+ <p>
+ If you run into problems at any point in the building and testing
+ of your databases, there are many things you can do. All ht://Dig
+ programs feature a <strong>-v</strong> option to get some debugging
+ output. The more of these options you put on the command line, the
+ more output you'll usually get. To get help with common problems,
+ or with interpreting some of the debugging output, please look to
+ the ht://Dig <a href="FAQ.html">FAQ</a> (frequently asked questions)
+ as your first line of support. Most of the problems that ht://Dig
+ users have are explained there, and the on-line
+ <a href="http://www.htdig.org/FAQ.html">FAQ on the website</a> is
+ updated frequently as new problems arise. The FAQ will also tell
+ you where you can turn if your question isn't answered there.
+ Remember that questions may not be phrased exactly as you'd state
+ them, so look carefully for anything that seems similar to the
+ problem you're trying to solve.
+ </p>
+ <hr noshade>
+ <h2>
+ <a name="maintenance">Maintaining the system</a>
+ </h2>
+ <p>
+ Once everything is running, you have to deal with the question of
+ how you can keep everything running and up to date. The databases
+ don't automatically update themselves, of course, so you'll need
+ to figure out how to schedule automatic updates of the database.
+ Most users use the <strong>crontab</strong> facility on their
+ systems to schedule daily or weekly updates of their database.
+ This can be as simple as running "rundig" or "rundig -a" from
+ your crontab, or from a file in /etc/cron.daily if your system
+ uses this, to rebuild from scratch every night. For a small site,
+ this may take only a few minutes to run. Other sites will run
+ more elaborate update scripts, to update their existing databases
+ nightly, and schedule complete rebuilds less frequently, such as
+ monthly.
+ </p>
+ <p>
+ You need to pay close attention to how long updates take to run.
+ There are no database lockouts in ht://Dig, so you don't want to
+ schedule update or reindexing runs so frequently that they run
+ into each other.
+ </p>
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/triangle.gif b/debian/htdig/htdig-3.2.0b6/htdoc/triangle.gif
new file mode 100644
index 00000000..4cf2baf9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/triangle.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/up.gif b/debian/htdig/htdig-3.2.0b6/htdoc/up.gif
new file mode 100644
index 00000000..6272042f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/up.gif
Binary files differ
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/upgrade.html b/debian/htdig/htdig-3.2.0b6/htdoc/upgrade.html
new file mode 100644
index 00000000..9ea2708e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/upgrade.html
@@ -0,0 +1,73 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Upgrading to 3.2.x
+ </title>
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Upgrading from 3.1.x to 3.2.x
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ This guide is intended for those upgrading from 3.1.x or
+ earlier versions to 3.2.x. The best advice is
+ <em>don't</em>. That is, it is highly recommended that you
+ install the 3.2 release in a separate directory tree and treat
+ everything as if you were installing it for the first
+ time. This is not just because this is a beta release. There
+ are several main reasons for this:
+ <ul>
+ <li>The htmerge program now <strong>only</strong> merges. You
+ do not need to run it after running htdig as before. The
+ &quot;cleanup&quot; duties have been moved to the <a
+ href="htpurge.html">htpurge</a> program that also deletes
+ URLs. The sample rundig script is modified accordingly.</li>
+ <li>The default directory structure of this release is
+ different from previous releases, more in line with other UNIX
+ programs.</li>
+ <li>The databases are all in different formats. This version
+ cannot read databases from previous versions since the new
+ formats have significantly more information and this cannot be
+ reconstructed from old databases. This goes for fuzzy
+ databases as well. Future versions can use the new <a
+ href="htdump.html">htdump</a> and <a
+ href="htload.html">htload</a> programs to dump and load
+ databases to/from text files. These will also allow databases
+ to be transfered between platforms.
+ </li>
+ </ul>
+ Text files, such as config files, result templates, etc. are
+ the same between releases, so you will probably wish to copy
+ these to your new installation.</li>
+ </p>
+
+ <p>
+ Additionally, you may find things a bit rough at first. We
+ appreciate your comments and suggestions, especially in the
+ form of performance comparisons with the 3.1.x
+ releases. Additionally, there are several things planned for
+ the final 3.2.0 release that have not been implemented yet:
+ <ul>
+ <li>Proximity (near) searching.</li>
+ <li>Phrase searching method (in addition to and, or, and boolean).</li>
+ <li>Field-based searching--restricting to a header, title, etc.</li>
+ <li>Date-based searching--restricting to a range of dates.</li>
+ <li>Implementation of a more accurate scoring system.</li>
+ <li>Parallel indexing and searching.</li>
+ <li>Support for https: and ftp: access</li>
+ </ul>
+ Please send comments, patches, etc. to the &lt;<a
+ href="mailto:htdig3-dev@htdig.org">htdig3-dev@htdig.org</a>&gt;
+ mailing list.
+
+ <hr size="4" noshade>
+ Last modified: $Date: 2004/05/28 13:15:19 $
+ </body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/uses.html b/debian/htdig/htdig-3.2.0b6/htdoc/uses.html
new file mode 100644
index 00000000..12186f4b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/uses.html
@@ -0,0 +1,652 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>ht://Dig: Sites that Use ht://Dig</title>
+ </head>
+<body bgcolor="#eef7ff">
+ <h1>Uses of ht://Dig</h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for
+ license information.
+ </p>
+ <hr size="4" noshade>
+ <p>
+ The following is a list of publically available sites that use
+ ht://Dig. If you use ht://Dig on your public site and would like it to be on this list as
+ well, please fill out the form below. Your link will be added in the next update.<br>
+ <form method="get" action="http://cgi.htdig.org/cgi-bin/cgiemail/uses.txt">
+ Your name: <input name="required-name" size=40><br>
+ Your e-mail: <input name="required-mail"><br>
+ Page title: <input name="title" size=50><br>
+ Page URL: <input name="required-url" size=50><br>
+ <input type="hidden" name="success" value="/linksub.html">
+ <input type="submit" name="submit" value="Add the Link">
+ <input type="reset" name="reset" value="Clear">
+ </form>
+ Please note that this list does not represent any sort of
+ endorsement or recommendation of the sites or companies
+ listed. It's merely a list of known sites that use
+ ht://Dig. Additionally we maintain a related <a
+ href="isp.html">list of ISPs</a> that offer ht://Dig as part of
+ their web hosting services.
+ </p>
+<ul
+<li><a target="_top" href="http://www.htdig.org/">ht://Dig Search Software (yes, the developers use it)</a></li>
+</ul>
+
+<strong>A</strong>
+<ul>
+<li><a target="_new" href="http://www.rubin.ch/pgp/pgp">About PGP</a></li>
+<li><a target="_new" href="http://www.accessreports.com/statutes.html">Access Reports, Freedom of Information and Privacy</a></li>
+<li><a target="_new" href="http://www.active-high.co.uk/">Active High, Ltd.</a></li>
+<li><a target="_new" href="http://www.affaires-publiques.com/">Affaires publiques</a></li>
+<li><a target="_new" href="http://www.agbms.ch">AGB Media Services</a></li>
+<li><a target="_new" href="http://www.zf.jcu.cz/htdig/">Agricultural Faculty, University of South Bohemia</a></li>
+<li><a target="_new" href="http://www.akadia.com/">Akadia Information Technology, Bern, Switzerland</a></li>
+<li><a target="_new" href="http://www.aliacom.fr/">Aliacom</a></li>
+<li><a target="_new" href="http://www.alfred.edu/">Alfred University</a></li>
+<li><a target="_new" href="http://www.alphastate.com/">Alphastate Productions</a></li>
+<li><a target="_new" href="http://www.alzheimers-westput.org">Alzheimer's Association: Westchester / Putnam County</a></li>
+<li><a target="_new" href="http://www.fb.org/">American Farm Voice of Agriculture</a></li>
+<li><a target="_new" href="http://www.melander.dk/">Anders Melander's Lean Mean Delphi Machine</a></li>
+<li><a target="_new" href="http://www.andyary.com/">Andy's Art Attack</a></li>
+<li><a target="_new" href="http://www.projects.ml.org/">Another Linux Project Server</a></li>
+<li><a target="_new" href="http://gbi.aotea.org/">Aotea - Great Barrier Island</a></li>
+<li><a target="_new" href="http://www.uky.edu/OtherOrgs/AppalFor/">AppalFor - Appalachian Sustainable Forestry</a></li>
+<li><a target="_new" href="http://aquaweb.pair.com/">AquaWeb Fish Resources</a></li>
+<li><a target="_new" href="http://www.archis.org/">Archis, a magazine on architecture, city, and visual culture</a></li>
+<li><a target="_new" href="http://infosoc.uni-koeln.de/php/">Archive of German PHP Mailinglist</a></li>
+<li><a target="_new" href="http://acept.la.asu.edu/">Arizona Collaborative for Excellence in the Preparation of Teachers</a></li>
+<li><a target="_new" href="http://wildcat.arizona.edu/">Arizona Daily Wildcat</a></li>
+<li><a target="_new" href="http://www.east.asu.edu/">Arizona State University East</a></li>
+<li><a target="_new" href="http://www.artdsm.com/">The Art Department</a></li>
+<li><a target="_new" href="http://sgml.dgsca2.unam.mx/abcd/a/index.htm">Arte y Bibliotecas en los Medios. DGSCA / UNAM</a></li>
+<li><a target="_new" href="http://airlab.elet.polimi.it/">Artificial Intelligence &amp; Robotic Lab - Politechnico di Milano</a></li>
+<li><a target="_new" href="http://www.aslib.co.uk/">Aslib, The Association for Information Management</a></li>
+<li><a target="_new" href="http://adl.opengroup.org/">Assertion Definition Language Homepage</a></li>
+<li><a target="_new" href="http://w3.aspwire.net/">Associated Student Press</a></li>
+<li><a target="_new" href="http://www.aui.fr/">Association des Utilisateurs d'Internet</a></li>
+<li><a target="_new" href="http://www.masscolleges.org/">Association of Independent Colleges and Universities in Massachusetts</a></li>
+<li><a target="_new" href="http://www.astroinfo.ch/">Astronomical Information in Cyberspace</a></li>
+<li><a target="_new" href="http://www.astronomy.net/">Astronomy Net</a></li>
+<li><a target="_new" href="http://www.anart.no/">Atelier Nord</a></li>
+<li><a target="_new" href="http://www.atlasf1.com">Atlas F1 - The Journal of Formula One Motorsport</a></li>
+<li><a target="_new" href="http://www.austria.org/">Austria Press and Information Service</a></li>
+<li><a target="_new" href="http://www.alia.org.au/">Australian Library and Information Association</a></li>
+<li><a target="_new" href="http://www.austria-tourism.at/">Austrian National Tourist Office</a></li>
+<li><a target="_new" href="http://www.ansto.gov.au/">Australian Nuclear Science and Technology Organisation (ANSTO)</a></li>
+<li><a target="_new" href="http://www.parlament.gv.at/">The Austrian Parliament</a></li>
+</ul>
+
+<strong>B</strong>
+<ul>
+<li><a target="_new" href="http://www.bps.go.id/">Badan Pusat Statistik, Republic of Indonesia</a></li>
+<li><a target="_new" href="http://www.woelbern.de/">Bankhaus Woelbern</a></li>
+<li><a target="_new" href="http://www.barwil.com/">Barwil Agencies AS</a></li>
+<li><a target="_new" href="http://resnet.uoregon.edu/~gurney_j/">Base of Operations for John-Mark Gurney</a></li>
+<li><a target="_new" href="http://www.beeker.net/">Beeker.NET</a></li>
+<li><a target="_new" href="http://bezip.de/">BeZip - BeOS Shareware-Collection</a></li>
+<li><a target="_new" href="http://www.bibliomania.com/">Bibliomania: The Network Library</a></li>
+<li><a target="_new" href="http://www.uni-flensburg.de/">Bildungswissenschaftlichen Hochschule Flensburg, Universit&auml;t</a></li>
+<li><a target="_new" href="http://www.blizzardgames.com/">Blizzard Games Network</a></li>
+<li><a target="_new" href="http://www.bolek.com">Bolek's Lair</a></li>
+<li><a target="_new" href="http://www.boll.ch/">BOLL Engineering AG</a></li>
+<li><a target="_new" href="http://www.bluewaterweb.com/">Bluewater Books & Charts</a></li>
+<li><a target="_new" href="http://bmerc-www.bu.edu/index.html">BMERC: Computational Biology</a></li>
+<li><a target="_new" href="http://www.bcc.cuny.edu/">Bronx Community College</a></li>
+<li><a target="_new" href="http://www.bruker.de/analytic/nmr-dep/nmr-dep.htm">Bruker NMR</a></li>
+<li><a target="_new" href="http://webclass.cqu.edu.au/">Building a Web-based Education System</a></li>
+<li><a target="_new" href="http://www.bullets.net/">Bullets.Net</a></li>
+<li><a target="_new" href="http://www.bff-online.de/">Bundesamt f&uuml;r Finanzen (BfF)</a></li>
+<li><a target="_new" href="http://www.brandenburg.de">Bundesland Brandenburg (Germany)</a></li>
+<li><a target="_new" href="http://www.bcl.co.nz">Business Computers Ltd, Wellington, New Zealand</a></li>
+</ul>
+
+<strong>C</strong>
+<ul>
+<li><a target="_new" href="http://www.callaw.com/">Cal Law</a></li>
+<li><a target="_new" href="http://www.caldera.com/">Caldera, Inc. Vendor of OpenLinux</a></li>
+<li><a target="_new" href="http://www.courtinfo.ca.gov">California Courts</a></li>
+<li><a target="_new" href="http://www.csusb.edu/">California State University, San Bernardino</a></li>
+<li><a target="_new" href="http://www.damtp.cam.ac.uk/">Cambridge University Mathematics</a></li>
+<li><a target="_new" href="http://www.tibet.ca/">Canada Tibet Committee</a></li>
+<li><a target="_new" href="http://www.castrol.de/">Castrol</a></li>
+<li><a target="_new" href="http://www.cua.edu/">Catholic University of America</a></li>
+<li><a target="_new" href="http://www.cavaliers.org/">The Cavaliers Drum & Bugle Corps</a></li>
+<li><a target="_new" href="http://www.cd-info.com">The CD Information Center</a></li>
+<li><a target="_new" href="http://www.ci.cambridge.ma.us/">City of Cambridge, Massachusetts</a></li>
+<li><a target="_new" href="http://www.cjc-online.ca/">Canadian Journal of Communication</a></li>
+<li><a target="_new" href="http://www.cta-otc.gc.ca/">Canadian Transportation Agency</a></li>
+<li><a target="_new" href="http://lawwww.cwru.edu/">Case Western Reserve University School of Law</a></li>
+<li><a target="_new" href="http://info.ccone.at/">CCONE</a></li>
+<li><a target="_new" href="http://www.centenary.edu/">Centenary College of Louisiana</a></li>
+<li><a target="_new" href="http://www.centre.edu/">Centre College</a></li>
+<li><a target="_new" href="http://www.cenatoulouse.dgac.fr/">Centre d'&Eacute;tudes de la Navigation Aerienne</a></li>
+<li><a target="_new" href="http://www.cl-cpa.com/">Cheatham &amp; Lansford, CPA</a></li>
+<li><a target="_new" href="http://www.cheek.com/">Cheek Consulting</a></li>
+<li><a target="_new" href="http://www.optc.com/chemed-l-thread">Chemistry Education Discussion List Archive</a></li>
+<li><a target="_new" href="http://www.chemshow.com/">ChemShow</a></li>
+<li><a target="_new" href="http://www.chemie.uni-bonn.de/">Chemische Institute der Universitaet Bonn/Germany</a></li>
+<li><a target="_new" href="http://www.ivcfne.org/scripture.shtml">Christian Documents/Resources (Lydia)</a></li>
+<li><a target="_new" href="http://www.ladwp.com/">City of Los Angeles Dept of Water and Power</a></li>
+<li><a target="_new" href="http://ciumix.ci.uminho.pt/pesquisar/">CIUMix -- Pesquisar</a></li>
+<li><a target="_new" href="http://www.sneezy.org/clarinet/Databases/">The Clarinet Databases</a></li>
+<li><a target="_new" href="http://www.artdsm.com/clark/">The Clark / Stoecker Family Website</a></li>
+<li><a target="_new" href="http://www.clarkson.edu/">Clarkson University</a></li>
+<li><a target="_new" href="http://www.citfi.org/">Club de Informática y Telemática de la Facultad de Informatica Universidad Politécnica de Madrid</a></li>
+<li><a target="_new" href="http://www.dsm.org/">Club DSM</a></li>
+<li><a target="_new" href="http://dobc.unipv.it/scrineum/CDB/cdbhome.htm">Codice Diplomatico Bresciano</a></li>
+<li><a target="_new" href="http://www.coinfo.hu/">COINFO</a></li>
+<li><a target="_new" href="http://www.anglocampinas.com.br/">Colégio Anglo/Campinas</a></li>
+<li><a target="_new" href="http://www.cmmtypsych.net/">Community Psychology Network</a></li>
+<li><a target="_new" href="http://www.cdo.it/">Compagnia delle Opere - Official Website</a></li>
+<li><a target="_new" href="http://www.considines.com.au">Considines Customs Brokers</a></li>
+<li><a target="_new" href="http://www.cam-i.org/">Consortium for Advanced Manufacturing - International</a></li>
+<li><a target="_new" href="http://www.contesting.com/">Contesting Online</a></li>
+<li><a target="_new" href="http://www.contigo.com/">Contigo Software</a></li>
+<li><a target="_new" href="http://www.graphics.cornell.edu/">Cornell Program of Computer Graphics</a></li>
+<li><a target="_new" href="http://www.csp.org/">Council on Spiritual Practices</a></li>
+<li><a target="_new" href="http://www.cranfield.ac.uk/">Cranfield University</a></li>
+<li><a target="_new" href="http://ctan.loria.fr/">CTAN Navigator</a></li>
+<li><a target="_new" href="http://cyberspace.mit.edu/">cyberspace.mit.edu & narrative.mit.edu</a></li>
+<li><a target="_new" href="http://www.bassplace.com/">The Cyberstore for Bass Players</a></li>
+<li><a target="_new" href="http://www.drumplace.com/">The Cyberstore for Drum Players</a></li>
+<li><a target="_new" href="http://www.guitarplace.com/">The Cyberstore for Guitar Players</a></li>
+<li><a target="_new" href="http://www.pianoplace.com/">The Cyberstore for Piano Players</a></li>
+<li><a target="_new" href="http://www.cygnus.com/">Cygnus Solutions</a></li>
+</ul>
+
+<strong>D</strong>
+<ul>
+<li><a target="_new" href="http://www.dailycal.org/">The Daily Californian Online</a></li>
+<li><a target="_new" href="http://www.dal.ca/">Dalhousie Univeristy</a></li>
+<li><a target="_new" href="http://www.dims.co.uk/">David Ives Musical Services</a></li>
+<li><a target="_new" href="http://www.deam.de/">DeaM - Die etwas andere Medizin</a></li>
+<li><a target="_new" href="http://www.deis.unibo.it/">DEIS Home Page</a></li>
+<li><a target="_new" href="http://www.cpt.stm.tudelft.nl/cpt/">DelftChemTech: Delft department of Chemical Technology</a></li>
+<li><a target="_new" href="http://www-ti.informatik.uni-tuebingen.de/englisch/">Department of Computer Engineering, University of Tübingen</a></li>
+<li><a target="_new" href="http://mcb.berkeley.edu/">Department of Molecular and Cell Biology, University of California, Berkeley</a></li>
+<li><a target="_new" href="http://oe.soest.hawaii.edu/">Department of Ocean Engineering, University of Hawaii</a></li>
+<li><a target="_new" href="http://www.phys.ttu.edu/">Department of Physics, Texas Tech University</a></li>
+<li><a target="_new" href="http://dimm.alc.upv.es/">Departamento de Ingenieria Mecania y Materiales - EPSA</a></li>
+<li><a target="_new" href="http://www.depaul.edu/">Depaul University</a></li>
+<li><a target="_new" href="http://www.dernieres.com/">Dernieres.com : tous les titres, tous les jours</a></li>
+<li><a target="_new" href="http://www.desertsunonline.com/news/">The Desert Sun Newspaper- Palm Springs, CA</a></li>
+<li><a target="_new" href="http://www.messe.de/">Deutsche Messe AG Hannover</a></li>
+<li><a target="_new" href="http://www.disinfo.com/">|d|i|s|i|n|f|o|r|m|a|t|i|o|n|</a></li>
+<li><a target="_new" href="http://www.dolphins.ch/">Dolphins Network Systems</a></li>
+<li><a target="_new" href="http://www.downcity.net/">DownCity Internet</a></li>
+<li><a target="_new" href="http://www.drew.edu/">Drew University</a></li>
+</ul>
+
+<strong>E</strong>
+<ul>
+<li><a target="_new" href="http://eclipt.uni-klu.ac.at/">ECLiPt Homepage</a></li>
+<li><a target="_new" href="http://www.esil.univ-mrs.fr/">Ecole Superieure d'Ingenieurs de Lumniy</a></li>
+<li><a target="_new" href="http://esm2.imt-mrs.fr/htdig/">L'Ecole Superieure de Mecanique de Marseille</a></li>
+<li><a target="_new" href="http://edtech.sandi.net/">Educational Technology @ San Diego City Schools</a></li>
+<li><a target="_new" href="http://www.eltern.de/">ELTERN Magazin</a></li>
+<li><a target="_new" href="http://ingenieria.udea.edu.co/">Engineering Faculty of University of Antioquia</a></li>
+<li><a target="_new" href="http://www.emsl.pnl.gov/">Environmental Molecular Sciences Laboratory, Pacific National Laboratory</a></li>
+<li><a target="_new" href="http://www.erachampion.com/">ERA Champion Real Estate</a></li>
+<li><a target="_new" href="http://www.kostat.com/">ESD & EMI Shielding</a></li>
+<li><a target="_new" href="http://www.etext.org/">The ETEXT Archives</a></li>
+<li><a target="_new" href="http://www.4j.lane.edu/">Eugene School District 4J</a></li>
+<li><a target="_new" href="http://www.belgium.eu.net/">EUnet Belgium</a></li>
+<li><a target="_new" href="http://www.eic-npdc.org/">Euro Info Centre</a></li></ul>
+
+<strong>F</strong>
+<ul>
+<li><a target="_new" href="http://www.opensound.com/">4Front Technologies</a></li>
+<li><a target="_new" href="http://www-derecho.unex.es/">Facultad de Derecho de la Universidad de Extremadura (Spain)</a></li>
+<li><a target="_new" href="http://www.ed.brocku.ca/">Faculty of Education Brock University</a></li>
+<li><a target="_new" href="http://fanac.org/">FANAC Fan History Project</a></li>
+<li><a target="_new" href="http://www.finaid.org/">FinAid! The SmartStudent Guide to Financial Aid</a></li>
+<li><a target="_new" href="http://www.fitrex.com/">Fitrex.com - Your Free Internet Fitness Partner</a></li>
+<li><a target="_new" href="http://www.flame.org/htdig/">Flame.org</a></li>
+<li><a target="_new" href="http://www.hammerhart.de/floh/">Flohs Cannabis Archiv</a></li>
+<li><a target="_new" href="http://www.foebud.org/">FoeBuD e.V.</a></li>
+<li><a target="_new" href="http://cafe.fiifo.u-psud.fr/">Formation d'Ing&egrave;nieurs Informatique de la Facult&egrave; d'Orsay</a></li>
+<li><a target="_new" href="http://www.fz-rossendorf.de/">Forschungszentrum Rossendorf</a></li>
+<li><a target="_new" href="http://FourThought.com/">FourThought LLC: IT Consultants</a></li>
+<li><a target="_new" href="http://www.fhcrc.org/">Fred Hutchinson Cancer Research Center</a></li>
+<li><a target="_new" href="http://www.fxbbs.com/">FX Bulletin Board Systems</a></li>
+<li><a target="_new" href="http://www.flug.dk/">Fyns Linux User Group</a></li>
+</ul>
+
+<strong>G</strong>
+<ul>
+<li><a target="_new" href="http://www.gams.de/">GAMS GmbH</a></li>
+<li><a target="_new" href="http://www.rummage.co.za/">Garden Route Web Search</a></li>
+<li><a target="_new" href="http://www.gensuisse.ch/">Gen Suisse</a></li>
+<li><a target="_new" href="http://www.globalcardiology.org/">Global Cardiology Network</a></li>
+<li><a target="_new" href="http://www.gnu.org/">The GNU Project</a></li>
+<li><a target="_new" href="http://www.GoErie.com/">Go Erie.com</a></li>
+<li><a target="_new" href="http://www.godrules.net/">GodRules.NET</a></li>
+<li><a target="_new" href="http://www.gold.ac.uk/">Goldsmiths College - London University</a></li>
+<li><a target="_new" href="http://www.goofball.com">Goofball.com - Your Portal to Stupidity</a></li>
+<li><a target="_new" href="http://www.grapvinenet.com/">The GrapeVine Network, Ltd.</a></li>
+<li><a target="_new" href="http://www.greenengineer.com/">The Green Engineer</a></li>
+<li><a target="_new" href="http://www.greenpeace.org/">Greenpeace</a></li>
+<li><a target="_new" href="http://www.partenor.com/">Groupe Partenor</a></li>
+<li><a target="_new" href="http://developer.grup.com.tr/">Grup Yazilim Dokumantasyon Projesi</a></li>
+<li><a target="_new" href="http://www.gwensjewelry.com/">Gwen's Fine Jewelry and Gift Shop</a></li>
+</ul>
+
+<strong>H</strong>
+<ul>
+<li><a target="_new" href="http://lbs.hh.schule.de/">Hamburger Bildungsserver</a></li>
+<li><a target="_new" href="http://www.hsc.edu/">Hampden-Sydney College</a></li>
+<li><a target="_new" href="http://www.handmade.com/">Handmade Software</a></li>
+<li><a target="_new" href="http://www.hiid.harvard.edu/">Harvard Institute for International Development</a></li>
+<li><a target="_new" href="http://www.hnonline.de/">Heilbronn Online</a></li>
+<li><a target="_new" href="http://www.helpdepression.com/">HelpDepression.com</a></li>
+<li><a target="_new" href="http://www.hco.hagen.de">Historisches Centrum Online</a></li>
+<li><a target="_new" href="http://hno-worms.de">HNO-Gemeinschaftspraxis Worms</a></li>
+<li><a target="_new" href="http://home.odi.ca/">home.odi.ca</a></li>
+<li><a target="_new" href="http://www.hondamotor.ru">HONDA in Russia</a></li>
+<li><a target="_new" href="http://www.crhsc.umontreal.ca/">l'H&ocirc;pital Sacr&eacute;-Coeur de Montr&eacute;al</a></li>
+<li><a target="_new" href="http://web.horde.org/">The HORDE Project</a></li>
+<li><a target="_new" href="http://www.civicsymphony.org/">Houston Civic Symphony</a></li>
+<li><a target="_new" href="http://www.hpvelotechnik.com/">HP Velotechnik</a></li>
+<li><a target="_new" href="http://www.humanite.presse.fr/journal/recherche.html">L'Humanit&egrave; - Formulaire de recherche</a></li>
+</ul>
+
+<strong>I</strong>
+<ul>
+<li><a target="_new" href="http://iafol.iam.it/">IAFoL : it.arti.fumetti on line!</a></li>
+<li><a target="_new" href="http://www.8025.org/">IEEE 802.5 Token Ring Standards</a></li>
+<li><a target="_new" href="http://www.duesberg.com/">Infectious AIDS: Have We Been Misled?</a></li>
+<li><a target="_new" href="http://www.alte-buecher.de/">Informationen fuer Buechersammler und Antiquare</a></li>
+<li><a target="_new" href="http://www.itso.iu.edu/">Information Technology Security Office at Indiana University</a></li>
+<li><a target="_new" href="http://www.inonu.edu.tr/">Inonu University</a><li>
+<li><a target="_new" href="http://ib.ksc.komi.ru/">Institute of Biology of Komi Scientific Centre</a></li>
+<li><a target="_new" href="http://iecl.iuscomp.org/">Institute of European and Comparative Law</a></li>
+<li><a target="_new" href="http://www.imel.demokritos.gr/">Institute of Microelectronics - NCSR Demokritos</a></li>
+<li><a target="_new" href="http://www.interix.com/">Interix</a></li>
+<li><a target="_new" href="http://www.iasc.org.uk/">International Accounting Standards Committee</a></li>
+<li><a target="_new" href="http://www.hepatitis-c.de/">International Hepatitis C Forum</a></li>
+<li><a target="_new" href="http://www.invivo.net/">inVivo</a></li>
+<li><a target="_new" href="http://www.inwise.de/">InWise Internet Service</a></li>
+<li><a target="_new" href="http://www.ireq.ca/">IREQ (Hydro-Quebec's research institute)</a></li>
+<li><a target="_new" href="http://it.linst.ac.uk/">IT Dept for The London Institute</a></li>
+<li><a target="_new" href="http://www.grenoble.iufm.fr/">IUFM Grenoble</a></li>
+<li><a target="_new" href="http://iut.univ-tln.fr">IUT de Toulon et du Var</a></li>
+</ul>
+
+<strong>J</strong>
+<ul>
+<li><a target="_new" href="http://www.jcu.edu.au/">James Cook University, Australia</a></li>
+<li><a target="_new" href="http://www.kaldor.com.au/">John Kaldor Fabricmaker</a></li>
+</ul>
+
+<strong>K</strong>
+<ul>
+<li><a target="_new" href="http://www.kamat.com/">Kamat's Potpourri</a></li>
+<li><a target="_new" href="http://www.karicobs.com/">KARICO Business Services</a></li>
+<li><a target="_new" href="http://www.karobio.se/">Karo Bio AB</a></li>
+<li><a target="_new" href="http://www.kenyon.edu/">Kenyon College</a></li>
+<li><a target="_new" href="http://www.kevii.nus.edu.sg/">King Edward VII Hall, NUS.</a></li>
+<li><a target="_new" href="http://www.kitchenlink.com/">The Kitchen Link - What's Cooking on the Net</a></li>
+<li><a target="_new" href="http://kldp.org/">KLDP: Korean Linux Documentation Project</a></li>
+<li><a target="_new" href="http://www.konkursradet.no/">Konkursrådet</a></li>
+<li><a target="_new" href="http://atlantis.dvxs.nl/~kraken/">Kraken-mailinglijst</a></li>
+<li><a target="_new" href="http://www.kraeber.com/">Kraeber GmbH &amp; Co.</a></li>
+<li><a target="_new" href="http://www.plbio.kvl.dk/">KVL Department of Plant Biology</a></li>
+</ul>
+
+<strong>L</strong>
+<ul>
+<li><a target="_new" href="http://www.lirmm.fr/">Laboratoire d'Informatique, de Robotique et de Micro-Electronique de Montpellier</a></li>
+<li><a target="_new" href="http://www.linz.govt.nz">Land Information New Zealand</a></li>
+<li><a target="_new" href="http://larp-welt.de">LARP-Welt (search-engine and catalog for LARP-Pages/german)</a></li>
+<li><a target="_new" href="http://salvyhost.swappoint.com/ledders/home.html">Ledders Mailing List Archive</a></li>
+<li><a target="_new" href="http://www.letu.edu/">LeTourneau University</a></li>
+<li><a target="_new" href="http://www.ucolick.org/">Lick Observatory</a></li>
+<li><a target="_new" href="http://linuxcol.uniandes.edu.co">LinuxCOL: Usuarios Colombianos de Linux</a></li>
+<li><a target="_new" href="http://www.linux.com/">Linux.com</a></li>
+<li><a target="_new" href="http://metalab.unc.edu/LDP/">Linux Documentation Project</a></li>
+<li><a target="_new" href="http://www.linuxgazette.com/">Linux Gazette</a></li>
+<li><a target="_new" href="http://www.linuxjournal.com/">Linux Journal</a></li>
+<li><a target="_new" href="http://linuxkb.cheek.com/">Linux Knowledge Base</a></li>
+<li><a target="_new" href="http://linux-mandrake.com/">Linux-Mandrake</a></li>
+<li><a target="_new" href="http://www.linuxresources.com/">Linux Resources</a></li>
+<li><a target="_new" href="http://www.linuxtech.ch/">LinuxTech: Search Linux Newsgroups </a></li>
+<li><a target="_new" href="http://lwn.net/">Linux Weekly News</a></li>
+<li><a target="_new" href="http://www.lse.ac.uk/">The London School of Economics</a></li>
+<li><a target="_new" href="http://www.loonygames.com/">loonygames</a></li>
+<li><a target="_new" href="http://www.lycaeum.org/">the Lycaeum: The World's Largest Entheogenic Library and Community</a></li>
+</ul>
+
+<strong>M</strong>
+<ul>
+<li><a target="_new" href="http://www.mactech.com/">MacTech Magazine</a></li>
+<li><a target="_new" href="http://www.macup.com/">MACup Online</a></li>
+<li><a target="_new" href="http://www.mail-archive.com/">The Mail Archive</a></li>
+<li><a target="_new" href="http://www.mir.com.my/">Malaysian Internet Resources</a></li>
+<li><a target="_new" href="http://www.my-opensource.org">Malaysian Open-Source Group</a></li>
+<li><a target="_new" href="http://www.mvv.de/">Mannheimer Versorgungs- und Verkehrsgesellschaft</a></li>
+<li><a target="_new" href="http://www.mrs.org/">Materials Research Society</a></li>
+<li><a target="_new" href="http://www.maps.org">MAPS: Multidisciplinary Association for Psychedelic Studies</a></li>
+<li><a target="_new" href="http://www.mathconsult.ch/">MathConsult</a></li>
+<li><a target="_new" href="http://www.gorski.net/scripts/">Matt's Script Archive Mailing Lists</a></li>
+<li><a target="_new" href="http://www.mlanet.org/">Medical Library Association's Network of Health Information</a></li>
+<li><a target="_new" href="http://www.mediumgreen.com/">MediumGreen.com - A Home for Fiestaware Collectors</a></li>
+<li><a target="_new" href="http://www.cmhc.com/">Mental Health Net</a></li>
+<li><a target="_new" href="http://mercedes.pair.com/archives/">Mercedes Mailing List Archives</a></li>
+<li><a target="_new" href="http://www.MerlinsSolutions.com">Merlin's Solutions International Co., Ltd.</a></li>
+<li><a target="_new" href="http://www.messe-fn.de/">Messe Friedrichshafen</a></li>
+<li><a target="_new" href="http://www.metrolink.com/">Metro Link Incorporated</a></li>
+<li><a target="_new" href="http://www.michiganhotels.org/">Michigan Hotel Motel &amp; Resort Association</a></li>
+<li><a target="_new" href="http://www.cis.state.mi.us/mpsc">Michigan Public Service Commision</a></li>
+<li><a target="_new" href="http://www.mtac.pitt.edu/">Mid-Atlantic Technology Applications Center</a></li>
+<li><a target="_new" href="http://search.merp.com">Middle Earth Role Playing Search</a></li>
+<li><a target="_new" href="http://www.middleweb.com/">MiddleWeb</a></li>
+<li><a target="_new" href="http://www.mindspring.net/">MindSpring Enterprises</a></li>
+<li><a target="_new" href="http://www.pca.state.mn.us/">Minnesota Pollution Control Agency</li>
+<li><a target="_new" href="http://www.msstate.edu/">Mississippi State University</a></li>
+<li><a target="_new" href="http://www-tech.mit.edu/">The MIT Tech</a></li>
+<li><a target="_new" href="http://www.modulo.com.br/">Modulo Security Solutions</a></li>
+<li><a target="_new" href="http://www.monde-diplomatique.fr/">Le Monde Diplomatique</a></li>
+<li><a target="_new" href="http://www.monroe.lib.in.us/">Monroe County (IN) Public Library</a></li>
+<li><a target="_new" href="http://www.mortgagestats.com">MortgageStats.com</a></li>
+<li><a target="_new" href="http://www.Morrissey-solo.com/">Morrissey-solo.com</a></li>
+<li><a target="_new" href="http://www.mpei.ac.ru/">Moscow Power Engineering Institute</a></li>
+<li><a target="_new" href="http://www.mozilla.org/">Mozilla.org</a></li>
+<li><a target="_new" href="http://www.hgu.mrc.ac.uk/">MRC Human Genetics Unit (Edinburgh, UK)</a></li>
+<li><a target="_new" href="http://www.mahj.org/">Mus&eacute;e d'art et d'histoire du Judaisme</a></li>
+<li><a target="_new" href="http://www.music.ch/">music.ch - the yahoo of swiss music</a></li>
+<li><a target="_new" href="http://www.rockymusic.org/">The Musical World of Rocky Horror</a></li>
+</ul>
+
+<strong>N</strong>
+<ul>
+<li><a target="_new" href="http://library.gsfc.nasa.gov/">NASA Goddard Space Flight Center Library</a></li>
+<li><a target="_new" href="http://www.ksc.nasa.gov/">NASA - Kennedy Space Center</a></li>
+<li><a target="_new" href="http://shemesh.larc.nasa.gov/fm/">NASA Langley's Formal Methods Program</a></li>
+<li><a target="_new" href="http://www.nbi.ac.za/">National Botanical Institute, South Africa</a></li>
+<li><a target="_new" href="http://www.ncte.org/">National Council of Teachers of English</a></li>
+<li><a target="_new" href="http://www.nhgri.nih.gov/Search/">National Human Genome Research Institute</a></li>
+<li><a target="_new" href="http://www.nifl.gov/">National Institute for Literacy</a></li>
+<li><a target="_new" href="http://www.lib.cult.cu/">National Library of Cuba</a></li>
+<li><a target="_new" href="http://www.nrao.edu/">National Radio Astronomy Observatory</a></li>
+<li><a target="_new" href="http://www.nrc.ca/">National Research Council of Canada</a></li>
+<li><a target="_new" href="http://www.naturesgift.com/">Nature's Gift - Aromatherapy</a></li>
+<li><a target="_new" href="http://www.nllgg.nl/">Nederlandse Linux Gebruikers Groep</a></li>
+<li><a target="_new" href="http://www.nl.linux.org/">Nederlandse Linux Homepage</a></li>
+<li><a target="_new" href="http://mail-index.netbsd.org/mlist/">NetBSD Mailing Lists</a></li>
+<li><a target="_new" href="http://www.netbill.com/">NetBill Central</a></li>
+<li><a target="_new" href="http://netnews.org/">Netnews Association</a></li>
+<li><a target="_new" href="http://www.netobjectdays.org/">Net.ObjectDays 2000 conference</a></li>
+<li><a target="_new" href="http://www.netprolive.com/">NetProfessional Magazine</a></li>
+<li><a target="_new" href="http://www.networktechinc.com/">Network Technologies Inc.</a></li>
+<li><a target="_new" href="http://www.newham.gov.uk/">Newham Council - London Borough of Newham</a></li>
+<li><a target="_new" href="http://www.zerocut.com/">New Century Pictures-Avid Editing and Film Production</a></li>
+<li><a target="_new" href="http://www.nkl.spb.ru/">New Communication ltd. Saint-Petersburg, Russia</a></li>
+<li><a target="_new" href="http://www.nmt.edu/">New Mexico Institute of Mining and Technology</a></li>
+<li><a target="_new" href="http://www.nysaes.cornell.edu/">New York State Agricultural Experiment Station</a></li>
+<li><a target="_new" href="http://shop.nmc-schleswig.de/">NMC Nordland Medien Company, Schleswig</a></li>
+<li><a target="_new" href="http://www.csc.noaa.gov/">NOAA Coastal Services Center</a></li>
+<li><a target="_new" href="http://www.nobel.se/">The Nobel Foundation</a></li>
+<li><a target="_new" href="http://www.nokia.hu/">Nokia Magyarorszag</a></li>
+<li><a target="_new" href="http://www.nordlink.org/">Nordlink e.V.</a></li>
+<li><a target="_new" href="http://www.ndsu.nodak.edu/">North Dakota State University</a></li>
+<li><a target="_new" href="http://www.nlna.org/">Northern Liberties Neighbors</a></li>
+<li><a target="_new" href="http://www.nhh.no/">Norwegian School of Economics and Business Admin</a></li>
+<li><a target="_new" href="http://nw-raves.skylab.org/">NW-Raves List Archives</a></li>
+</ul>
+
+<strong>O</strong>
+<ul>
+<li><a target="_new" href="http://www.open.k12.or.us/">OPEN Clearinghouse</a></li>
+<li><a target="_new" href="http://www.oakland.edu/">Oakland University</a></li>
+<li><a target="_new" href="http://www.oasi.gpa.it/">OASI web pages</a></li>
+<li><a target="_new" href="http://www.oit.duke.edu/">Office of Information Technology, Duke University</a></li>
+<li><a target="_new" href="http://www.officepanelsystems.com/">Office Panel Systems</a></li>
+<li><a target="_new" href="http://www.arlo.net/">The Official Arlo Guthrie Homepage</a></li>
+<li><a target="_new" href="http://www.omegafilters.com/">Omega Optical, Inc.</a></li>
+<li><a target="_new" href="http://www.omni.it">Omnitech Internet service provider</a></li>
+<li><a target="_new" href="http://www.theonering.net/">TheOneRing.net - The complete Lord of the Rings site</a></li>
+<li><a target="_new" href="http://www.ocregister.com/">The Orange County Register</a></li>
+<li><a target="_new" href="http://www.outsourcing-search.com/">Outsourcing Information</a></li>
+<li><a target="_new" href="http://www.overheid.nl">Overheid.NL</a></li>
+<li><a target="_new" href="http://www.ox.ac.uk/">Oxford University</a></li>
+</ul>
+
+<strong>P</strong>
+<ul>
+<li><a target="_new" href="http://www.pnf.org/">Pacific Northwest Foundation</a></li>
+<li><a target="_new" href="http://bowling-france.net/">La Page du Bowling [LPDB]</a></li>
+<li><a target="_new" href="http://www.rabenou.org/">Pages juridiques de J&eacute;r&ocirc;me Rabenou</a></li>
+<li><a target="_new" href="http://www.laneta.apc.org/">Paginas en La Neta. Organizaciones de la Sociedad Civil Mexicana</a></li>
+<li><a target="_new" href="http://www.pcbs.org/">Palestinian Central Bureau of Statistics</a></li>
+<li><a target="_new" href="http://www.parliament.ge/">Parliament of Georgia</a></li>
+<li><a target="_new" href="http://www.pals.iastate.edu/">PALS - Partnerships to Advance Learning in Science</a></li>
+<li><a target="_new" href="http://www.pmsd.k12.pa.us/">Penn Manor School District - Lancaster PA</a></li>
+<li><a target="_new" href="http://www.pepsan.com">Pepsan.com</a></li>
+<li><a target="_new" href="http://www.psubs.org/">Personal Submersibles Organization</a></li>
+<li><a target="_new" href="http://pestalozzi.hbi-stuttgart.de/index2.html">Pestalozzi: biography-site in German (FH Stuttgart - HBI)</a></li>
+<li><a target="_new" href="http://www.pfaffenwinkel.net/">Pfaffenwinkel Region</a></li>
+<li><a target="_new" href="http://www.phelpsdodge.com/">Phelps Dodge Corporation</a></li>
+<li><a target="_new" href="http://www.physionet.org/">PhysioNet: Research Resource for Complex Physiologic Signals</a></li>
+<li><a target="_new" href="http://php.net/">PHP: Hypertext Preprocessor</a></li>
+<li><a target="_new" href="http://www.ptb.de/">Physikalish-Technische Bundesanstalt</a></li>
+<li><a target="_new" href="http://www.plattsburgh.edu/">Plattsburgh State University</a></li>
+<li><a target="_new" href="http://playerbbs.com/">Player Net</a></li>
+<li><a target="_new" href="http://www.po-net.prato.it/home.htm">PO-Net Rete Civica Provinciale di Prato (Italia)</a></li>
+<li><a target="_new" href="http://www.politik-digital.de/">Politik Digital (German)</a></li>
+<li><a target="_new" href="http://plug.skylab.org/">Portland Linux Users' Group Archives</a></li>
+<li><a target="_new" href="http://www.providence.edu/">Providence College</a></li>
+<li><a target="_new" href="http://www.psn.net/">PSN.net</a></li>
+</ul>
+
+<strong>Q</strong>
+<ul>
+<li><a target="_new" href="http://www.quartier-rural.org/">Quartier-Rural.org - Accueil</a></li>
+<li><a target="_new" href="http://www.austinrocky.org/">Queerios!</a></li>
+</ul>
+
+<strong>R</strong>
+<ul>
+<li><a target="_new" href="http://www.rainy-day-games.com/">Rainy Day Games - Premiere Portland Gaming Center</a></li>
+<li><a target="_new" href="http://www.resi.at/">Regionales Informationssystem</a></li>
+<li><a target="_new" href="http://www.reohr.com/">The Reohr Group</a></li>
+<li><a target="_new" href="http://www.ris.org/">Research on Internet in Slovenia</a></li>
+<li><a target="_new" href="http://www.ishwar.com/">Resource for religious texts</a></li>
+<li><a target="_new" href="http://www.cm.deakin.edu.au/rhodos/">The RHODOS Project</a></li>
+<li><a target="_new" href="http://www.thesaurus.com/">Roget's Thesaurus</a></li>
+<li><a target="_new" href="http://www.rsmas.miami.edu/">Rosenstiel School of Marine and Atmospheric Science, University of Miami</a></li>
+<li><a target="_new" href="http://www.kvl.dk/">Royal Veterinary and Agricultural University of Denmark</a></li>
+<li><a target="_new" href="http://docs.rutgers.edu/">Rutgers University Computing Documentation</a></li>
+</ul>
+
+<strong>S</strong>
+<ul>
+<li><a target="_new" href="http://qmail.sailnet.com/">SailNet Mailing List Archives</a></li>
+<li><a target="_new" href="http://it.sac.edu.ph/">St. Anthony's College Information Technology</a></li>
+<li><a target="_new" href="http://www.unity.unitysam.ru/">Samara Municipal Charity Public Foundation "UNITY"</a></li>
+<li><a target="_new" href="http://samsara.law.cwru.edu/">Samsara's Web Server</a></li>
+<li><a target="_new" href="http://www.sdsu.edu/">San Diego State University</a></li>
+<li><a target="_new" href="http://www.sfsu.edu/">San Francisco State University</a></li>
+<li><a target="_new" href="http://www.saphari.com/">Saphari Communications</a></li>
+<li><a target="_new" href="http://www.sbanetweb.com/">SBA.NET.WEB Internet Consultants</a></li>
+<li><a target="_new" href="http://www.cit.nepean.uws.edu.au/">School of Comuting and Information Technogy, UWS Nepean</a></li>
+<li><a target="_new" href="http://www.scd.ucar.edu/">Scientific Computing Division, National Corporation for Atmospheric Research</a></li>
+<li><a target="_new" href="http://www-rocq.inria.fr/scilab/">Scilab</a></li>
+<li><a target="_new" href="http://www.sckcen.be/">SCK-CEN Belgian Nuclear Research Centre</a></li>
+<li><a target="_new" href="http://www.scriptics.com/">Scriptics - The Tcl Platform Company</a></li>
+<li><a target="_new" href="http://www.scotangling.co.uk/">ScotAngling fishing guide for Scotland</a></li>
+<li><a target="_new" href="http://www.scottwishard.com/">Scott Wishard, Realtor</a></li>
+<li><a target="_new" href="http://www.seewithlasik.com">SeeWithLasik - Lasik and Laser Vision Correction</a></li>
+<li><a target="_new" href="http://www.riponce.com/">Serendipity Software</a></li>
+<li><a target="_new" href="http://www.diderotp7.jussieu.fr/">Serveur Web de l'Universit&egrave; Paris 7</a></li>
+<li><a target="_new" href="http://limbo.ime.usp.br/">Servidor de HyperNews do IME/USP</a></li>
+<li><a target="_new" href="http://www.secular.org/">The Secular Web</a></li>
+<li><a target="_new" href="http://www.sheflug.co.uk/">Sheffield Linux User Group</a></li>
+<li><a target="_new" href="http://www.shopinryeny.com/">ShopInRyeNY.com Rye's Home on the Internet</a></li>
+<li><a target="_new" href="http://www.simplenet.com/">SimpleNet</a></li>
+<li><a target="_new" href="http://pilotlist.decollage.org/">Le site de la liste de diffusion pilote</a></li>
+<li><a target="_new" href="http://www.sslug.dk">Skåne Sjælland Linux User Group</a></li>
+<li><a target="_new" href="http://www.soziologie.ch/suchen/">soziologie.ch</a></li>
+<li><a target="_new" href="http://www.unboxed.com/">Software Unboxed</a></li>
+<li><a target="_new" href="http://www.infographix.qc.ca/">Solution Infographix</a></li>
+<li><a target="_new" href="http://www.sorcery.net/">SorceryNet IRC Network</a></li>
+<li><a target="_new" href="http://seek.cmcog.state.sc.us/">South Caroline Web Seek</a></li>
+<li><a target="_new" href="http://www.sna.org/">Southern Nursery Association</a></li>
+<li><a target="_new" href="http://www.sedl.org/">Southwest Educational Development Laboraotry (SEDL)</a></li>
+<li><a target="_new" href="http://www.southwestern.edu/">Southwestern University</a></li>
+<li><a target="_new" href="http://harvest.rutgers.edu/projects/spectator">Spectator Project</a></li>
+<li><a target="_new" href="http://www.scrc.umanitoba.ca/">Spinal Cord Research Centre, University of Manitoba</a></li>
+<li><a target="_new" href="http://www.stamps.com">Stamps.com</a></li>
+<li><a target="_new" href="http://www.stardust.com/">Stardust.com</a></li>
+<li><a target="_new" href="http://www.cis.state.mi.us/">State of Michigan, Department of Consumer Industries</a></li>
+<li><a target="_new" href="http://www.state.wy.us/">The State of Wyoming</a></li>
+<li><a target="_new" href="http://www.stiefel.ca/">Stiefel Canada</a></li>
+<li><a target="_new" href="http://www.sssk.se/">Stockholms Skridskoseglarklubb, SSSK</a></li>
+<li><a target="_new" href="http://www.stone.com/">Stone Design - Mac OS X and WebObjects Software</a></li>
+<li><a target="_new" href="http://www.saas.nsw.edu.au/">Studio of Arts And Sciences - University of New South Wales</a></li>
+<li><a target="_new" href="http://www.summerworks.on.ca/">SummerWorks Theatre Festival</a></li>
+<li><a target="_new" href="http://www.summus.com/">Summus Limited</a></li>
+<li><a target="_new" href="http://www.hds.utc.fr/~huttler/support/">Support Technique UTC</a></li>
+<li><a target="_new" href="http://www.surfer.ch/">Surfer.ch - Swiss Internet Portal</a></li>
+<li><a target="_new" href="http://www.surfnetkids.com/">Surfing the Net with Kids</a></li>
+<li><a target="_new" href="http://www.surpac.com/">Surpac Software International</a></li>
+<li><a target="_new" href="http://srm.net/skoleside/">Svenn sin skoleside</a></li>
+<li><a target="_new" href="http://www.swin.edu.au/">Swinburne University of Technology</a></li>
+<li><a target="_new" href="http://search.ee.ethz.ch/">Swiss Federal Institute of Technology Zurich, Department of Electrical Engineering</a></li>
+<li><a target="_new" href="http://www.ch-open.ch/">Swiss Open Systems User Group</a></li>
+</ul>
+
+<strong>T</strong>
+<ul>
+<li><a target="_new" href="http://tacnet.missouri.org/">TACnet: Truman Area Community Network, Inc.</a></li>
+<li><a target="_new" href="http://www.tagnet.org/">TAGnet: Three Angels Global Networking</a></li>
+<li><a target="_new" href="http://www.tahoe.com/">Tahoe.com, Home of the Tahoe-Carson Area Newspapers</a></li>
+<li><a target="_new" href="http://www.tdyc.com/">TDYC!</a></li>
+<li><a target="_new" href="http://www.tu-chemnitz.de/">Technische Universitdt Chemnitz</a></li>
+<li><a target="_new" href="http://www.tu-dresden.de/">Technische Universitaet Dresden</a></li>
+<li><a target="_new" href="http://www.tue.nl/">Technische Universiteit Eindhoven</a></li>
+<li><a target="_new" href="http://www.tenantsunion.org/">The Tenants Union</a></li>
+<li><a target="_new" href="http://www.tva.gov/">Tennessee Valley Authority</a></li>
+<li><a target="_new" href="http://www.tenon.com/">Tenon Intersystems</a></li>
+<li><a target="_new" href="http://terhune.net/">terhune.net</a></li>
+<li><a target="_new" href="http://www.ticam.utexas.edu/">Texas Institute for Computational and Applied Mathematics</a></li>
+<li><a target="_new" href="http://www.tyc.state.tx.us/">Texas Youth Commission</a></li>
+<li><a target="_new" href="http://www.trakis.hu/">TRAKIS Nagykõrös Kft. - Home of professional welding machines</a></li>
+<li><a target="_new" href="http://www.trentu.ca/">Trent University</a></li>
+<li><a target="_new" href="http://www.together.net/">Together Networks</a></li>
+<li><a target="_new" href="http://www.maths.tcd.ie/">Trinity College Dublin, Maths Department</a></li>
+<li><a target="_new" href="http://www.twu.ca/">Trinity Western University</a></li>
+<li><a target="_new" href="http://www.tudols.com/">TuDols: The Ultimate Directory of Linux Software</a></li>
+<li><a target="_new" href="http://www.amft.tu-graz.ac.at/">TU-Graz Institut for Apparatebau, Mechanische Verfahrenstechnik und Feuerungstechnik</a></li>
+</ul>
+
+<strong>U</strong>
+<ul>
+<li><a target="_new" href="http://www.online.com.ua/">Ukraine Online -- Ukrainian Information Resources</a></li>
+<li><a target="_new" href="http://www.jura.uni-muenchen.de/suchen.html">&Uuml;ber den Web-Server der Juristischen Fakult&auml;t der LMU M&uuml;nchen</a></li>
+<li><a target="_new" href="http://www.kaththeol.uni-muenchen.de/suchen.html">&Uuml;ber einige Web-Server der Ludwig-Maximilians-Universit&auml;t M&uuml;nchen</a></li>
+<li><a target="_new" href="http://unipress.vecco.com/">Unipress Equipment - High Pressure Research Centre of Polish Academy of Sciences</a></li>
+<li><a target="_new" href="http://www.upalumni.org/">United Progressive Alumni</a></li>
+<li><a target="_new" href="http://www.uisp.it/">UISP Unione Italiana Sport Per tutti</a></li>
+<li><a target="_new" href="http://www.uni-giessen.de/htdig/">Universit&auml;t Giessen</a></li>
+<li><a target="_new" href="http://www.uni-hamburg.de/">UNIVERSIT&Auml;T HAMBURG</a></li>
+<li><a target="_new" href="http://www.ucv.cl/">Universidad Catslica de Valparamso - Chile</a></li>
+<li><a target="_new" href="http://www.unicamp.br">Universidade Estadual de Campinas - Brasil</a></li>
+<li><a target="_new" href="http://www.uni-ulm.de/">Universit&auml;t Ulm</a></li>
+<li><a target="_new" href="http://www.ups-tlse.fr/">Universit&eacute; Paul Sabatier - Toulouse III - France</a></li>
+<li><a target="_new" href="http://www.uqam.ca/">Universit&eacute; du Qu&eacute;bec &agrave; Montr&eacute;al</a></li>
+<li><a target="_new" href="http://www.unige.it/">Universitā degli Studi di Genova</a></li>
+<li><a target="_new" href="http://www.unipd.it/">Universita' degli Studi di Padova</a></li>
+<li><a target="_new" href="http://www.bath.ac.uk/">University of Bath</a></li>
+<li><a target="_new" href="http://www.uib.no/">University of Bergen, Norway</a></li>
+<li><a target="_new" href="http://www.brad.ac.uk/">University of Bradford</a></li>
+<li><a target="_new" href="http://www.uct.ac.za/">University of Cape Town</a></li>
+<li><a target="_new" href="http://www.uch.gr">University of Crete, Greece</a></li>
+<li><a target="_new" href="http://www.udayton.edu/">University of Dayton</a></li>
+<li><a target="_new" href="http://www.uea.ac.uk/">University of East Anglia, UK</a></li>
+<li><a target="_new" href="http://www.uga.edu/">University of Georgia</a></li>
+<li><a target="_new" href="http://www.hawaii.edu/">University of Hawaii</a></li>
+<li><a target="_new" href="http://www.le.ac.uk/">University of Leicester</a></li>
+<li><a target="_new" href="http://library.unomaha.edu/">University Library, University of Nebraska at Omaha</a></li>
+<li><a target="_new" href="http://www.ukc.ac.uk/">University of Kent at Canterbury</a></li>
+<li><a target="_new" href="http://www.som.umd.umich.edu/">University of Michigan - Dearborn - School of Management</a></li>
+<li><a target="_new" href="http://www.olemiss.edu/">University of Mississippi</a></li>
+<li><a target="_new" href="http://www.missouri.edu/">University of Missouri-Columbia</a></li>
+<li><a target="_new" href="http://www.cs.unc.edu/">University of North Carolina, Chapel Hill Computer Science Department</a></li>
+<li><a target="_new" href="http://www-lehre.informatik.uni-osnabrueck.de/">University of Osnabrueck, Germany</a></li>
+<li><a target="_new" href="http://www.oulu.fi/">University of Oulu, Finland</a></li>
+<li><a target="_new" href="http://www.library.uq.edu.au/">University of Queensland Library</a></li>
+<li><a target="_new" href="http://www.salford.ac.uk/">University of Salford</a></li>
+<li><a target="_new" href="http://www.univ-tlse1.fr/">Universit&eacute; des Sciences Sociales de Toulouse</a></li>
+<li><a target="_new" href="http://www.scar.utoronto.ca/">University of Toronto at Scarborough</a></li>
+<li><a target="_new" href="http://www.uwm.edu/">University of Wisconsin, Milwaukee</a></li>
+<li><a target="_new" href="http://www.uwosh.edu/">University of Wisconsin, Oshkosh</a></li>
+<li><a target="_new" href="http://www.uwrf.edu/">University of Wisconsin, River Falls</a></li>
+<li><a target="_new" href="http://www.wlv.ac.uk/">University of Wolverhampton</a></li>
+</ul>
+
+<strong>V</strong>
+<ul>
+<li><a target="_new" href="http://www.valinux.com">VA Linux Systems</a></li>
+<li><a target="_new" href="http://www.vallnet.com/">Valley Internet</a></li>
+<li><a target="_new" href="http://www.buchhandel-bayern.de/">Verband Bayrischer Verlage und Buchhandlungen e.V.</a></li>
+<li><a target="_new" href="http://vfr.tni.fr/">VFR Mailing List</a></li>
+<li><a target="_new" href="http://www.vidyanikethan.edu/chairman.htm">Vidyanikethan.Edu</a></li>
+<li><a target="_new" href="http://www.ville.montreal.qc.ca/">La Ville de Montr&eacute;al</a></li>
+<li><a target="_new" href="http://www.vtourist.com/">Virtual Tourist - Travel Community</a></li>
+<li><a target="_new" href="http://www.vpl.umd.edu/">Visualization and Presentation Laboratory, Univ. of Maryland</a></li>
+</ul>
+
+<strong>W</strong>
+<ul>
+<li><a target="_new" href="http://www.walsrode-net.de/">Walsrode Online</a></li>
+<li><a target="_new" href="http://www.id.wustl.edu/">Washington University Infectious Diseases Division</a></li>
+<li><a target="_new" href="http://www.wclv.com/">WCLV 95.5FM</a></li>
+<li><a target="_new" href="http://www.webmartial.com/">Webmartial.com, le portail des arts martiaux</a></li>
+<li><a target="_new" href="http://www.webline.dk/film/">WebLine Film list</a></li>
+<li><a target="_new" href="http://www.theweeklyjournal.com">The Weekly Journal</a></li>
+<li><a target="_new" href="http://www.weizmann.ac.il/">Weizmann Institute of Science</a></li>
+<li><a target="_new" href="http://www.WVoutside.com/">West Virginia Outside</a></li>
+<li><a target="_new" href="http://www.win.org/">Westplex Information Network</a></li>
+<li><a target="_new" href="http://www.whid.net/">WHiD.net</a></li>
+<li><a target="_new" href="http://www.darryl.com/winmac/">WinMac: The Windows-MacOS Cooperation List</a></li>
+<li><a target="_new" href="http://wso.williams.edu/">Williams Students Online</a></li>
+<li><a target="_new" href="http://www.willows.com/">Willows Software</a></li>
+<li><a target="_new" href="http://www.lexa.ru/">Wizards Guild: Russian software designers</a></li>
+<li><a target="_new" href="http://www.wmc.com.au/">WMC, a global mineral resources company</a></li>
+<li><a target="_new" href="http://historia.et.tudelft.nl/">Working Group on History, Technical University Delft</a></li>
+<li><a target="_new" href="http://wvde.state.wv.us/">WV Department of Education</a></li>
+<li><a target="_new" href="http://www.free.de/">www.free.de</a></li>
+<li><a target="_new" href="http://vlib.org/">The WWW Virtual Library</a></li>
+</ul>
+
+<strong>X</strong>
+<ul>
+<li><a target="_new" href="http://xmlfr.org/">&lt;XML&gt;fr</a></li>
+<li><a target="_new" href="http://www.xmoto.com/">XMoto</a></li>
+<li><a target="_new" href="http://xmmom.physics.ucsb.edu/">X-Ray Multi-Mirror Mission Optical Monitor</a></li>
+<li><a target="_new" href="http://www.xstrata.com/">Xstrata AG online</a></li>
+</ul>
+
+<strong>Y</strong>
+<ul>
+<li><a target="_new" href="http://www.library.yale.edu/">Yale University Library</a></li>
+<li><a target="_new" href="http://www.successpro.ws">Your Link to Success!</a></li>
+</ul>
+
+<strong>Z</strong>
+<ul>
+<li><a target="_new" href="http://www.zoos.de/">Zoos Suchseite</a></li>
+</ul>
+
+<hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+ <a href="http://sourceforge.net/">
+ <img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+
+</body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/htdoc/where.html b/debian/htdig/htdig-3.2.0b6/htdoc/where.html
new file mode 100644
index 00000000..94985beb
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htdoc/where.html
@@ -0,0 +1,126 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+ <head>
+ <title>
+ ht://Dig: Where to get it
+ </title>
+ <link rel="stylesheet" href="css/htdig.css">
+ </head>
+ <body bgcolor="#eef7ff">
+ <h1>
+ Where to get ht://Dig
+ </h1>
+ <p>
+ ht://Dig Copyright &copy; 1995-2004 <a href="THANKS.html">The ht://Dig Group</a><br>
+ Please see the file <a href="COPYING">COPYING</a> for license information.
+ </p>
+ <hr size="4" noshade>
+ <ul>
+ <li>The current production release of ht://Dig is <strong>3.1.6</strong>.</li>
+ <li>The current beta release of ht://Dig is <strong>3.2.0b6</strong>.</li>
+ </ul>
+ </p>
+ <p class="main">
+ The ht://Dig source releases are available from multiple sources around
+ the world. Note that releases are as a gzipped tar file (.tar.gz).
+ You will need the GNU gunzip program, part of the gzip package,
+ to extract the files in the archive. You can get the latest
+ version of any of the GNU tools from <a href="ftp://ftp.gnu.org/gnu/">
+ ftp://ftp.gnu.org/gnu/</a>.
+ </p>
+ <p class="main">
+ The latest documentation of ht://Dig is always available at
+ <a href="http://www.htdig.org/" target="_top">http://www.htdig.org/</a>
+ or any of the <a href="http://www.htdig.org/mirrors.html">ht://Dig
+ mirrors</a>.
+ The documentation for the latest beta release can be found at
+ <a href="http://www.htdig.org/dev/htdig-3.2/" target="_top">http://www.htdig.org/dev/htdig-3.2/</a>,
+ or the <a href="dev/htdig-3.2/" target="_top">dev/htdig-3.2</a>
+ subdirectory of most mirrors.
+ </p>
+
+<!--
+ <p><strong>Please Note:</strong> The current 3.2 beta version (3.2.0b3)
+ is vulnerable to a security hole in the htsearch CGI
+ program (the current production version 3.1.6 has been updated
+ to fix this). You can view details on the vulnerability from the
+ <a href="http://www.securityfocus.com/bid/3410">bugtraq
+ mailing list.</a> Pre-release snapshots of 3.2.0b4
+ are available from the development snapshots directory that
+ fix the problem. You can find them <a
+ href="http://www.htdig.org/files/snapshots/">here.</a>
+ </p>
+-->
+
+ <hr size="4" noshade>
+ <table border=0 cellspacing=5 cellpadding=3 align="center">
+ <tr><th>Site</th>
+ <th>Stable Release (3.1.6)</th><th>Beta Release (3.2.0b6)</th>
+ <th>Prior Releases</th><th>Contributed Binaries</th></tr>
+
+ <tr><td>htdig.org</td>
+ <td><a href="http://www.htdig.org/files/htdig-3.1.6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://www.htdig.org/files/htdig-3.2.0b6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://www.htdig.org/files/">(HTTP)</a></td>
+ <td><a href="http://www.htdig.org/files/binaries/">(HTTP)</a></td>
+ </tr>
+
+ <tr><td>htdig.sourceforge.net</td>
+ <td><a href="http://htdig.sourceforge.net/files/htdig-3.1.6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://htdig.sourceforge.net/files/htdig-3.2.0b6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://htdig.sourceforge.net/files/">(HTTP)</a></td>
+ <td><a href="http://htdig.sourceforge.net/files/binaries/">(HTTP)</a></td>
+ </tr>
+
+ <tr><td>download.sourceforge.net</td>
+ <td><a href="http://download.sourceforge.net/htdig/htdig-3.1.6.tar.gz">(HTTP)</a>
+ <a href="ftp://download.sourceforge.net/pub/sourceforge/htdig/htdig-3.1.6.tar.gz">(FTP)</a></td>
+ <td>&nbsp;</td>
+ <td><a href="http://download.sourceforge.net/htdig/">(HTTP)</a>
+ <a href="ftp://download.sourceforge.net/pub/sourceforge/htdig/">(FTP)</a></td>
+ <td>&nbsp;</td>
+ </tr>
+
+ <!-- commented out because they don't have a mirror anymore
+ <tr><td>htdig.europeanservers.net</td>
+ <td><a href="http://htdig.europeanservers.net/files/htdig-3.1.6.tar.gz">(HTTP)</a>
+ </td>
+ <td><a href="http://htdig.europeanservers.net/files/htdig-3.2.0b3.tar.gz">(HTTP)</a>
+ </td>
+ <td><a href="http://htdig.eruopeanservers.net/files/">(HTTP)</a>
+ </td>
+ <td><a href="http://htdig.europeanservers.net/files/binaries/">(HTTP)</a>
+ </td>
+ </tr>
+ -->
+
+ <tr><td>www.it.htdig.org</td>
+ <td><a href="http://www.it.htdig.org/files/htdig-3.1.6.tar.gz">(HTTP)</a>
+ <a href="ftp://www.it.htdig.org/pub/htdig/htdig-3.1.6.tar.gz">(FTP)</a></td>
+ <td><a href="http://www.it.htdig.org/files/htdig-3.2.0b6.tar.gz">(HTTP)</a>
+ <a href="ftp://www.it.htdig.org/pub/htdig/htdig-3.2.0b6.tar.gz">(FTP)</a></td>
+ <td><a href="http://ftp.it.htdig.org/pub/htdig/">(HTTP)</a>
+ <a href="ftp://ftp.it.htdig.org/pub/htdig/">(FTP)</a></td>
+ <td><a href="http://ftp.it.htdig.org/pub/htdig/binaries/">(HTTP)</a>
+ <a href="ftp://ftp.it.htdig.org/pub/htdig/binaries/">(FTP)</a></td>
+ <td>&nbsp;</td>
+ </tr>
+
+ <tr><td>opdenbrouw.nl</td>
+ <td><a href="http://www.opdenbrouw.nl/htdig/files/htdig-3.1.6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://www.opdenbrouw.nl/htdig/files/htdig-3.2.0b6.tar.gz">(HTTP)</a></td>
+ <td><a href="http://www.opdenbrouw.nl/htdig/files/">(HTTP)</a></td>
+ <td><a href="http://www.opdenbrouw.nl/htdig/files/binaries/">(HTTP)</a></td>
+ </tr>
+
+ </table>
+
+ <hr size="4" noshade>
+
+ Last modified: $Date: 2004/05/28 13:15:19 $
+<br>
+
+ <a href="http://sourceforge.net/">
+<img src="http://sourceforge.net/sflogo.php?group_id=4593&amp;type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a>
+ </body>
+</html>