summaryrefslogtreecommitdiffstats
path: root/indexlib
diff options
context:
space:
mode:
authortoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
committertoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
commit460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch)
tree67208f7c145782a7e90b123b982ca78d88cc2c87 /indexlib
downloadtdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz
tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'indexlib')
-rw-r--r--indexlib/GPL_V2280
-rw-r--r--indexlib/Makefile.am11
-rw-r--r--indexlib/README26
-rw-r--r--indexlib/bitio.h73
-rw-r--r--indexlib/bitio.tcc91
-rw-r--r--indexlib/bitstream.cpp108
-rw-r--r--indexlib/bitstream.h64
-rw-r--r--indexlib/boost-compat/.sconsign4
-rw-r--r--indexlib/boost-compat/README3
-rw-r--r--indexlib/boost-compat/checked_delete.hpp69
-rw-r--r--indexlib/boost-compat/config.hpp70
-rw-r--r--indexlib/boost-compat/config/abi/borland_prefix.hpp27
-rw-r--r--indexlib/boost-compat/config/abi/borland_suffix.hpp12
-rw-r--r--indexlib/boost-compat/config/abi/msvc_prefix.hpp8
-rw-r--r--indexlib/boost-compat/config/abi/msvc_suffix.hpp8
-rw-r--r--indexlib/boost-compat/config/abi_prefix.hpp20
-rw-r--r--indexlib/boost-compat/config/abi_suffix.hpp23
-rw-r--r--indexlib/boost-compat/config/auto_link.hpp336
-rw-r--r--indexlib/boost-compat/config/compiler/borland.hpp177
-rw-r--r--indexlib/boost-compat/config/compiler/comeau.hpp61
-rw-r--r--indexlib/boost-compat/config/compiler/common_edg.hpp53
-rw-r--r--indexlib/boost-compat/config/compiler/compaq_cxx.hpp20
-rw-r--r--indexlib/boost-compat/config/compiler/digitalmars.hpp49
-rw-r--r--indexlib/boost-compat/config/compiler/gcc.hpp96
-rw-r--r--indexlib/boost-compat/config/compiler/greenhills.hpp28
-rw-r--r--indexlib/boost-compat/config/compiler/hp_acc.hpp69
-rw-r--r--indexlib/boost-compat/config/compiler/intel.hpp151
-rw-r--r--indexlib/boost-compat/config/compiler/kai.hpp35
-rw-r--r--indexlib/boost-compat/config/compiler/metrowerks.hpp97
-rw-r--r--indexlib/boost-compat/config/compiler/mpw.hpp51
-rw-r--r--indexlib/boost-compat/config/compiler/sgi_mipspro.hpp24
-rw-r--r--indexlib/boost-compat/config/compiler/sunpro_cc.hpp89
-rw-r--r--indexlib/boost-compat/config/compiler/vacpp.hpp58
-rw-r--r--indexlib/boost-compat/config/compiler/visualc.hpp146
-rw-r--r--indexlib/boost-compat/config/platform/aix.hpp33
-rw-r--r--indexlib/boost-compat/config/platform/amigaos.hpp15
-rw-r--r--indexlib/boost-compat/config/platform/beos.hpp26
-rw-r--r--indexlib/boost-compat/config/platform/bsd.hpp70
-rw-r--r--indexlib/boost-compat/config/platform/cygwin.hpp48
-rw-r--r--indexlib/boost-compat/config/platform/hpux.hpp62
-rw-r--r--indexlib/boost-compat/config/platform/irix.hpp31
-rw-r--r--indexlib/boost-compat/config/platform/linux.hpp98
-rw-r--r--indexlib/boost-compat/config/platform/macos.hpp68
-rw-r--r--indexlib/boost-compat/config/platform/solaris.hpp21
-rw-r--r--indexlib/boost-compat/config/platform/win32.hpp50
-rw-r--r--indexlib/boost-compat/config/posix_features.hpp87
-rw-r--r--indexlib/boost-compat/config/requires_threads.hpp92
-rw-r--r--indexlib/boost-compat/config/select_compiler_config.hpp83
-rw-r--r--indexlib/boost-compat/config/select_platform_config.hpp86
-rw-r--r--indexlib/boost-compat/config/select_stdlib_config.hpp68
-rw-r--r--indexlib/boost-compat/config/stdlib/dinkumware.hpp106
-rw-r--r--indexlib/boost-compat/config/stdlib/libcomo.hpp46
-rw-r--r--indexlib/boost-compat/config/stdlib/libstdcpp3.hpp51
-rw-r--r--indexlib/boost-compat/config/stdlib/modena.hpp30
-rw-r--r--indexlib/boost-compat/config/stdlib/msl.hpp54
-rw-r--r--indexlib/boost-compat/config/stdlib/roguewave.hpp123
-rw-r--r--indexlib/boost-compat/config/stdlib/sgi.hpp111
-rw-r--r--indexlib/boost-compat/config/stdlib/stlport.hpp201
-rw-r--r--indexlib/boost-compat/config/stdlib/vacpp.hpp18
-rw-r--r--indexlib/boost-compat/config/suffix.hpp543
-rw-r--r--indexlib/boost-compat/config/user.hpp124
-rw-r--r--indexlib/boost-compat/intrusive_ptr.hpp272
-rw-r--r--indexlib/boost-compat/next_prior.hpp51
-rw-r--r--indexlib/boost-compat/noncopyable.hpp36
-rw-r--r--indexlib/boost-compat/remove_cv.hpp61
-rw-r--r--indexlib/boost-compat/scoped_ptr.hpp118
-rw-r--r--indexlib/boost-compat/shared_ptr.hpp473
-rw-r--r--indexlib/boost-compat/smart_ptr.hpp31
-rw-r--r--indexlib/boost-compat/static_assert.hpp11
-rw-r--r--indexlib/boost-compat/weak_ptr.hpp192
-rw-r--r--indexlib/compat.h58
-rw-r--r--indexlib/compressed.cpp185
-rw-r--r--indexlib/compressed.h127
-rw-r--r--indexlib/configure.in.in22
-rw-r--r--indexlib/create.cpp116
-rw-r--r--indexlib/create.h81
-rw-r--r--indexlib/docs/report.pdfbin0 -> 215750 bytes
-rw-r--r--indexlib/exception.cpp47
-rw-r--r--indexlib/exception.h63
-rw-r--r--indexlib/format.h26
-rw-r--r--indexlib/ifile.cpp177
-rw-r--r--indexlib/ifile.h74
-rw-r--r--indexlib/index.h114
-rw-r--r--indexlib/index_slow.h71
-rw-r--r--indexlib/indexlib-config.in60
-rw-r--r--indexlib/leafdata.cpp166
-rw-r--r--indexlib/leafdata.h147
-rw-r--r--indexlib/leafdatavector.cpp108
-rw-r--r--indexlib/leafdatavector.h57
-rw-r--r--indexlib/lockfile.cpp77
-rw-r--r--indexlib/lockfile.h72
-rw-r--r--indexlib/logfile.cpp54
-rw-r--r--indexlib/logfile.h42
-rw-r--r--indexlib/main.cpp211
-rw-r--r--indexlib/manager.h69
-rw-r--r--indexlib/match.cpp79
-rw-r--r--indexlib/match.h76
-rw-r--r--indexlib/mempool.h160
-rw-r--r--indexlib/mempool.tcc241
-rw-r--r--indexlib/memreference.h118
-rw-r--r--indexlib/memvector.h224
-rw-r--r--indexlib/memvector.tcc80
-rw-r--r--indexlib/mmap_manager.cpp100
-rw-r--r--indexlib/mmap_manager.h72
-rw-r--r--indexlib/path.h79
-rw-r--r--indexlib/pointer.h107
-rw-r--r--indexlib/quotes.cpp92
-rw-r--r--indexlib/quotes.h61
-rw-r--r--indexlib/result.h59
-rw-r--r--indexlib/slow.cpp64
-rw-r--r--indexlib/slow.h69
-rw-r--r--indexlib/stringarray.cpp114
-rw-r--r--indexlib/stringarray.h67
-rw-r--r--indexlib/stringset.cpp106
-rw-r--r--indexlib/stringset.h161
-rw-r--r--indexlib/tests/Makefile.am9
-rw-r--r--indexlib/tests/configure.in.in8
-rw-r--r--indexlib/tests/create-test.cpp28
-rw-r--r--indexlib/tests/ifile-test.cpp156
-rwxr-xr-xindexlib/tests/large-scale/do-test.zsh55
-rw-r--r--indexlib/tests/large-scale/generate.py51
-rw-r--r--indexlib/tests/leafdatavector-test.cpp129
-rw-r--r--indexlib/tests/match-test.cpp99
-rw-r--r--indexlib/tests/mempool-test.cpp53
-rw-r--r--indexlib/tests/memvector-test.cpp258
-rwxr-xr-xindexlib/tests/run-tests.zsh52
-rw-r--r--indexlib/tests/slow-test.cpp13
-rw-r--r--indexlib/tests/stringarray-test.cpp104
-rw-r--r--indexlib/tests/stringset-test.cpp194
-rw-r--r--indexlib/tests/testdriver.cpp61
-rw-r--r--indexlib/tests/tokenizer-test.cpp69
-rw-r--r--indexlib/thing.h168
-rw-r--r--indexlib/tokenizer.cpp300
-rw-r--r--indexlib/tokenizer.h28
-rw-r--r--indexlib/version.h14
135 files changed, 12229 insertions, 0 deletions
diff --git a/indexlib/GPL_V2 b/indexlib/GPL_V2
new file mode 100644
index 000000000..49166ad31
--- /dev/null
+++ b/indexlib/GPL_V2
@@ -0,0 +1,280 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
diff --git a/indexlib/Makefile.am b/indexlib/Makefile.am
new file mode 100644
index 000000000..3e5cdbda3
--- /dev/null
+++ b/indexlib/Makefile.am
@@ -0,0 +1,11 @@
+KDE_CXXFLAGS=-include $(srcdir)/compat.h $(USE_EXCEPTIONS)
+INCLUDES = $(all_includes)
+
+lib_LTLIBRARIES = libindex.la
+
+libindex_la_SOURCES = bitstream.cpp compressed.cpp create.cpp exception.cpp ifile.cpp leafdata.cpp leafdatavector.cpp lockfile.cpp logfile.cpp match.cpp mmap_manager.cpp quotes.cpp slow.cpp stringarray.cpp stringset.cpp tokenizer.cpp
+
+indexlibincludedir=$(includedir)/index
+indexlibinclude_HEADERS = create.h index.h lockfile.h
+bin_SCRIPTS = indexlib-config
+
diff --git a/indexlib/README b/indexlib/README
new file mode 100644
index 000000000..04b189dc2
--- /dev/null
+++ b/indexlib/README
@@ -0,0 +1,26 @@
+WHAT'S THIS?
+
+This is indexlib, an indexing library.
+
+HOW TO USE INDEXLIB
+
+The most important files are index.h and create.h which are the programmer's interface (API).
+
+HOW TO START HACKING INDEXLIB INTERNALS
+
+1. Understand the basics about how everything is really kept on disk and the file formats.
+ Reading the docs in the docs/ directory should be good enough.
+
+2. Email me (luis@luispedro.org) if you have any doubts.
+
+HOW TO DEBUG
+
+1. Get boost (http://www.boost.org/) and install it. There are packages for most linux distros.
+
+2. Enable debugging output (Recompile with boost, define DEBUG).
+
+3. Run the test suite (compile test.cpp and link with indexlib and boost_unit_test).
+
+4. If the unit tests are all OK, but you still think the code is broken, try to write a test case which catches it.
+
+
diff --git a/indexlib/bitio.h b/indexlib/bitio.h
new file mode 100644
index 000000000..8072d74b3
--- /dev/null
+++ b/indexlib/bitio.h
@@ -0,0 +1,73 @@
+#ifndef LPC_BITIO_H1103129408_INCLUDE_GUARD_
+#define LPC_BITIO_H1103129408_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <inttypes.h>
+#include "boost-compat/remove_cv.hpp"
+
+/**
+ * \namespace byte_io
+ *
+ * This namespace aggregates all input/output functions
+ * for our in-disk format, as well as information relating to
+ * that format.
+ *
+ */
+namespace byte_io {
+ template <typename T>
+ void write( unsigned char*, const T );
+
+ template<typename T>
+ T read( const unsigned char* );
+
+ template<typename T>
+ struct byte_lenght_struct { };
+
+ template<typename T>
+ struct byte_lenght_struct<const T> {
+ static const int value = byte_lenght_struct<T>::value;
+ };
+
+
+ /**
+ * Returns how many bytes type T occupies on disk. It's only defined
+ * for supported types.
+ */
+ template<typename T>
+ unsigned byte_lenght() {
+ return byte_lenght_struct<T>::value;
+ }
+}
+
+#include "bitio.tcc"
+
+#endif /* LPC_BITIO_H1103129408_INCLUDE_GUARD_ */
diff --git a/indexlib/bitio.tcc b/indexlib/bitio.tcc
new file mode 100644
index 000000000..2779cfedb
--- /dev/null
+++ b/indexlib/bitio.tcc
@@ -0,0 +1,91 @@
+#include "boost-compat/static_assert.hpp"
+#include "boost-compat/remove_cv.hpp"
+#ifdef HAVE_BOOST
+#include <boost/type_traits/is_same.hpp>
+#endif
+
+
+namespace byte_io {
+
+ template <typename T>
+ inline T no_const( const volatile T v ) {
+ return v;
+ }
+
+ template<typename T>
+ inline
+ void write( unsigned char* out, const volatile T d ) {
+ write( out, no_const( d ) );
+ }
+
+
+ template <typename T>
+ inline
+ T read( const unsigned char* out ) {
+ //BOOST_STATIC_ASSERT( !( ::boost::is_same<T,typename ::boost::remove_cv<T>::type>::value ) );
+ return read<typename ::boost::remove_cv<T>::type>( out );
+ }
+
+ template<>
+ inline
+ void write<uint8_t>( unsigned char* out, uint8_t d ) {
+ *out = d;
+ }
+
+ template<>
+ inline
+ uint8_t read<uint8_t>( const unsigned char* in ) {
+ return *in;
+ }
+
+ template<>
+ struct byte_lenght_struct<uint8_t> {
+ static const int value = 1;
+ };
+
+ template<>
+ inline
+ void write<uint16_t>( unsigned char* out, uint16_t d ) {
+ *out++ = ( ( d >> 0 ) & 0xff );
+ *out++ = ( ( d >> 8 ) & 0xff );
+ }
+
+ template<>
+ inline
+ uint16_t read<uint16_t>( const unsigned char* in ) {
+ uint16_t res = 0;
+ res |= ( ( *in++ & 0xff ) << 0 );
+ res |= ( ( *in++ & 0xff ) << 8 );
+ return res;
+ }
+
+ template<>
+ struct byte_lenght_struct<uint16_t> {
+ static const int value = 2;
+ };
+
+ template<>
+ inline
+ void write<uint32_t>( unsigned char* out, uint32_t d ) {
+ *out++ = ( ( d >> 0 ) & 0xff );
+ *out++ = ( ( d >> 8 ) & 0xff );
+ *out++ = ( ( d >> 16 ) & 0xff );
+ *out++ = ( ( d >> 24 ) & 0xff );
+ }
+
+ template<>
+ inline
+ uint32_t read<uint32_t>( const unsigned char* in ) {
+ uint32_t res = 0;
+ res |= ( ( *in++ & 0xff ) << 0 );
+ res |= ( ( *in++ & 0xff ) << 8 );
+ res |= ( ( *in++ & 0xff ) << 16 );
+ res |= ( ( *in++ & 0xff ) << 24 );
+ return res;
+ }
+ template<>
+ struct byte_lenght_struct<uint32_t> {
+ static const int value = 4;
+ };
+}
+
diff --git a/indexlib/bitstream.cpp b/indexlib/bitstream.cpp
new file mode 100644
index 000000000..62e8803cd
--- /dev/null
+++ b/indexlib/bitstream.cpp
@@ -0,0 +1,108 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "bitstream.h"
+#include "logfile.h"
+#include "format.h"
+
+#include <assert.h>
+
+bitstream::bitstream( unsigned char* data, unsigned size )
+ :bytes_( data ),
+ writeable_( true ),
+ size_( size ),
+ cur_( 0 ) { }
+
+bitstream::bitstream( const unsigned char* data, unsigned size )
+ :bytes_( const_cast<unsigned char*>( data ) ),
+ writeable_( false ),
+ size_( size ),
+ cur_( 0 ) { }
+
+bool bit( unsigned v, unsigned idx ) {
+ return ( v >> idx ) & 1;
+}
+
+bitstream& bitstream::operator << ( uint16_t x ) {
+ for ( unsigned i = 0; i != 16; ++i ) putbit( bit( x, i ) );
+ return *this;
+}
+
+bitstream& bitstream::operator << ( uint32_t x ) {
+ for ( unsigned i = 0; i != 32; ++i ) putbit( bit( x, i ) );
+ return *this;
+}
+
+bitstream& bitstream::operator >> ( uint16_t& v ) {
+ v = 0;
+ for ( int i = 0; i != 16; ++i ) {
+ v <<= 1;
+ v |= getbit();
+ }
+ return *this;
+}
+
+bitstream& bitstream::operator >> ( uint32_t& v ) {
+ v = 0;
+ for ( int i = 0; i != 32; ++i ) {
+ v <<= 1;
+ v |= getbit();
+ }
+ return *this;
+}
+
+void bitstream::putback( uint16_t ) {
+ assert( cur_ >= 16 );
+ cur_ -= 16;
+}
+
+void bitstream::putback( uint32_t ) {
+ assert( cur_ >= 32 );
+ cur_ -= 32;
+}
+
+bool bitstream::getbit() {
+ unsigned inbyte = cur_ % 8;
+ unsigned byte = cur_ / 8;
+ ++cur_;
+ return ( bytes_[ byte ] >> inbyte ) & 1;
+}
+
+void bitstream::putbit( bool value ) {
+ unsigned inbyte = cur_ % 8;
+ unsigned byte = cur_ / 8;
+ assert( byte < size_ );
+ if ( value ) bytes_[ byte ] |= ( 1 << inbyte );
+ else bytes_[ byte ] &= ~( 1 << inbyte );
+ ++cur_;
+}
+
+
diff --git a/indexlib/bitstream.h b/indexlib/bitstream.h
new file mode 100644
index 000000000..7cc491a0c
--- /dev/null
+++ b/indexlib/bitstream.h
@@ -0,0 +1,64 @@
+#ifndef LPC_BITSTREAM_H1102530057_INCLUDE_GUARD_
+#define LPC_BITSTREAM_H1102530057_INCLUDE_GUARD_
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+#include <vector>
+#include <inttypes.h>
+
+struct bitstream {
+ public:
+ bitstream( const unsigned char* data, unsigned size );
+ bitstream( unsigned char* data, unsigned size );
+
+ bitstream& operator << ( uint16_t );
+ bitstream& operator << ( uint32_t );
+
+ bitstream& operator >> ( uint16_t& );
+ bitstream& operator >> ( uint32_t& );
+
+ void putback( uint16_t );
+ void putback( uint32_t );
+
+ unsigned nbytes() const { return size_ / 8 + bool( size_ % 8 ); }
+ unsigned nbits() const { return size_; }
+ const unsigned char* as_byte_vector() const { return &bytes_[ 0 ]; }
+ private:
+ bool getbit();
+ unsigned char* bytes_;
+ const bool writeable_;
+ const unsigned size_;
+ unsigned cur_;
+ void putbit( bool );
+};
+
+template <typename T>
+ unsigned bits_for();
+
+#endif /* LPC_BITSTREAM_H1102530057_INCLUDE_GUARD_ */
diff --git a/indexlib/boost-compat/.sconsign b/indexlib/boost-compat/.sconsign
new file mode 100644
index 000000000..1324a92c7
--- /dev/null
+++ b/indexlib/boost-compat/.sconsign
@@ -0,0 +1,4 @@
+}q(Ustatic_assert.hppq(cSCons.Node.FS
+BuildInfo
+qoq}q(U timestampqJBUcsigqU 6c52b131fa324e8acff6db05b9a45438qubU remove_cv.hppq (hoq
+}q (hJBhU 494545128702963678f24d62a2bf7b38q ubUnoncopyable.hppq (hoq}q(hJBhU 3e704fc343b05e7287bed7d572f7611equbu. \ No newline at end of file
diff --git a/indexlib/boost-compat/README b/indexlib/boost-compat/README
new file mode 100644
index 000000000..de8231e93
--- /dev/null
+++ b/indexlib/boost-compat/README
@@ -0,0 +1,3 @@
+This is a copy of a small part of boost
+and a couple of headers which simulate non functional
+version of certain heavier things which are only used for debugging.
diff --git a/indexlib/boost-compat/checked_delete.hpp b/indexlib/boost-compat/checked_delete.hpp
new file mode 100644
index 000000000..9bb84e8e1
--- /dev/null
+++ b/indexlib/boost-compat/checked_delete.hpp
@@ -0,0 +1,69 @@
+#ifndef BOOST_CHECKED_DELETE_HPP_INCLUDED
+#define BOOST_CHECKED_DELETE_HPP_INCLUDED
+
+// MS compatible compilers support #pragma once
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1020)
+# pragma once
+#endif
+
+//
+// boost/checked_delete.hpp
+//
+// Copyright (c) 2002, 2003 Peter Dimov
+// Copyright (c) 2003 Daniel Frey
+// Copyright (c) 2003 Howard Hinnant
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/utility/checked_delete.html for documentation.
+//
+
+namespace boost
+{
+
+// verify that types are complete for increased safety
+
+template<class T> inline void checked_delete(T * x)
+{
+ // intentionally complex - simplification causes regressions
+ typedef char type_must_be_complete[ sizeof(T)? 1: -1 ];
+ (void) sizeof(type_must_be_complete);
+ delete x;
+}
+
+template<class T> inline void checked_array_delete(T * x)
+{
+ typedef char type_must_be_complete[ sizeof(T)? 1: -1 ];
+ (void) sizeof(type_must_be_complete);
+ delete [] x;
+}
+
+template<class T> struct checked_deleter
+{
+ typedef void result_type;
+ typedef T * argument_type;
+
+ void operator()(T * x) const
+ {
+ // boost:: disables ADL
+ boost::checked_delete(x);
+ }
+};
+
+template<class T> struct checked_array_deleter
+{
+ typedef void result_type;
+ typedef T * argument_type;
+
+ void operator()(T * x) const
+ {
+ boost::checked_array_delete(x);
+ }
+};
+
+} // namespace boost
+
+#endif // #ifndef BOOST_CHECKED_DELETE_HPP_INCLUDED
diff --git a/indexlib/boost-compat/config.hpp b/indexlib/boost-compat/config.hpp
new file mode 100644
index 000000000..055a27855
--- /dev/null
+++ b/indexlib/boost-compat/config.hpp
@@ -0,0 +1,70 @@
+// Boost config.hpp configuration header file ------------------------------//
+
+// (C) Copyright John Maddock 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/config for most recent version.
+
+// Boost config.hpp policy and rationale documentation has been moved to
+// http://www.boost.org/libs/config
+//
+// CAUTION: This file is intended to be completely stable -
+// DO NOT MODIFY THIS FILE!
+//
+
+#ifndef BOOST_CONFIG_HPP
+#define BOOST_CONFIG_HPP
+
+// if we don't have a user config, then use the default location:
+#if !defined(BOOST_USER_CONFIG) && !defined(BOOST_NO_USER_CONFIG)
+# define BOOST_USER_CONFIG <boost/config/user.hpp>
+#endif
+// include it first:
+#ifdef BOOST_USER_CONFIG
+# include BOOST_USER_CONFIG
+#endif
+
+// if we don't have a compiler config set, try and find one:
+#if !defined(BOOST_COMPILER_CONFIG) && !defined(BOOST_NO_COMPILER_CONFIG) && !defined(BOOST_NO_CONFIG)
+# include <boost/config/select_compiler_config.hpp>
+#endif
+// if we have a compiler config, include it now:
+#ifdef BOOST_COMPILER_CONFIG
+# include BOOST_COMPILER_CONFIG
+#endif
+
+// if we don't have a std library config set, try and find one:
+#if !defined(BOOST_STDLIB_CONFIG) && !defined(BOOST_NO_STDLIB_CONFIG) && !defined(BOOST_NO_CONFIG)
+# include <boost/config/select_stdlib_config.hpp>
+#endif
+// if we have a std library config, include it now:
+#ifdef BOOST_STDLIB_CONFIG
+# include BOOST_STDLIB_CONFIG
+#endif
+
+// if we don't have a platform config set, try and find one:
+#if !defined(BOOST_PLATFORM_CONFIG) && !defined(BOOST_NO_PLATFORM_CONFIG) && !defined(BOOST_NO_CONFIG)
+# include <boost/config/select_platform_config.hpp>
+#endif
+// if we have a platform config, include it now:
+#ifdef BOOST_PLATFORM_CONFIG
+# include BOOST_PLATFORM_CONFIG
+#endif
+
+// get config suffix code:
+#include <boost/config/suffix.hpp>
+
+#endif // BOOST_CONFIG_HPP
+
+
+
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/abi/borland_prefix.hpp b/indexlib/boost-compat/config/abi/borland_prefix.hpp
new file mode 100644
index 000000000..49f424949
--- /dev/null
+++ b/indexlib/boost-compat/config/abi/borland_prefix.hpp
@@ -0,0 +1,27 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// for C++ Builder the following options effect the ABI:
+//
+// -b (on or off - effect emum sizes)
+// -Vx (on or off - empty members)
+// -Ve (on or off - empty base classes)
+// -aX (alignment - 5 options).
+// -pX (Calling convention - 4 options)
+// -VmX (member pointer size and layout - 5 options)
+// -VC (on or off, changes name mangling)
+// -Vl (on or off, changes struct layout).
+
+// In addition the following warnings are sufficiently annoying (and
+// unfixable) to have them turned off by default:
+//
+// 8027 - functions containing [for|while] loops are not expanded inline
+// 8026 - functions taking class by value arguments are not expanded inline
+
+#pragma nopushoptwarn
+# pragma option push -Vx -Ve -a8 -b -pc -Vmv -VC- -Vl- -w-8027 -w-8026
+
+
+
diff --git a/indexlib/boost-compat/config/abi/borland_suffix.hpp b/indexlib/boost-compat/config/abi/borland_suffix.hpp
new file mode 100644
index 000000000..940535f38
--- /dev/null
+++ b/indexlib/boost-compat/config/abi/borland_suffix.hpp
@@ -0,0 +1,12 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# pragma option pop
+#pragma nopushoptwarn
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/abi/msvc_prefix.hpp b/indexlib/boost-compat/config/abi/msvc_prefix.hpp
new file mode 100644
index 000000000..3d3905c21
--- /dev/null
+++ b/indexlib/boost-compat/config/abi/msvc_prefix.hpp
@@ -0,0 +1,8 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma pack(push,8)
+
+
diff --git a/indexlib/boost-compat/config/abi/msvc_suffix.hpp b/indexlib/boost-compat/config/abi/msvc_suffix.hpp
new file mode 100644
index 000000000..a64d783eb
--- /dev/null
+++ b/indexlib/boost-compat/config/abi/msvc_suffix.hpp
@@ -0,0 +1,8 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma pack(pop)
+
+
diff --git a/indexlib/boost-compat/config/abi_prefix.hpp b/indexlib/boost-compat/config/abi_prefix.hpp
new file mode 100644
index 000000000..1733dc036
--- /dev/null
+++ b/indexlib/boost-compat/config/abi_prefix.hpp
@@ -0,0 +1,20 @@
+// abi_prefix header -------------------------------------------------------//
+
+// Copyright John Maddock 2003
+
+// Use, modification and distribution are subject to the Boost Software License,
+// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt).
+
+#ifndef BOOST_CONFIG_ABI_PREFIX_HPP
+# define BOOST_CONFIG_ABI_PREFIX_HPP
+#else
+# error double inclusion of header boost/config/abi_prefix.hpp is an error
+#endif
+
+#include <boost/config.hpp>
+
+// this must occur after all other includes and before any code appears:
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
diff --git a/indexlib/boost-compat/config/abi_suffix.hpp b/indexlib/boost-compat/config/abi_suffix.hpp
new file mode 100644
index 000000000..6339da631
--- /dev/null
+++ b/indexlib/boost-compat/config/abi_suffix.hpp
@@ -0,0 +1,23 @@
+// abi_sufffix header -------------------------------------------------------//
+
+// Copyright John Maddock 2003
+
+// Use, modification and distribution are subject to the Boost Software License,
+// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt).
+
+// This header should be #included AFTER code that was preceded by a #include
+// <boost/config/abi_prefix.hpp>.
+
+#ifndef BOOST_CONFIG_ABI_PREFIX_HPP
+# error Header boost/config/abi_prefix.hpp must only be used after boost/config/abi_prefix.hpp
+#else
+# undef BOOST_CONFIG_ABI_PREFIX_HPP
+#endif
+
+// the suffix header occurs after all of our code:
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
+
diff --git a/indexlib/boost-compat/config/auto_link.hpp b/indexlib/boost-compat/config/auto_link.hpp
new file mode 100644
index 000000000..b4e580ffa
--- /dev/null
+++ b/indexlib/boost-compat/config/auto_link.hpp
@@ -0,0 +1,336 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+ /*
+ * LOCATION: see http://www.boost.org for most recent version.
+ * FILE auto_link.hpp
+ * VERSION see <boost/version.hpp>
+ * DESCRIPTION: Automatic library inclusion for Borland/Microsoft compilers.
+ */
+
+/*************************************************************************
+
+USAGE:
+~~~~~~
+
+Before including this header you must define one or more of define the following macros:
+
+BOOST_LIB_NAME: Required: A string containing the basename of the library,
+ for example boost_regex.
+BOOST_DYN_LINK: Optional: when set link to dll rather than static library.
+BOOST_LIB_DIAGNOSTIC: Optional: when set the header will print out the name
+ of the library selected (useful for debugging).
+
+These macros will be undef'ed at the end of the header, further this header
+has no include guards - so be sure to include it only once from your library!
+
+Algorithm:
+~~~~~~~~~~
+
+Libraries for Borland and Microsoft compilers are automatically
+selected here, the name of the lib is selected according to the following
+formula:
+
+BOOST_LIB_PREFIX
+ + BOOST_LIB_NAME
+ + "_"
+ + BOOST_LIB_TOOLSET
+ + BOOST_LIB_THREAD_OPT
+ + BOOST_LIB_RT_OPT
+ "-"
+ + BOOST_LIB_VERSION
+
+These are defined as:
+
+BOOST_LIB_PREFIX: "lib" for static libraries otherwise "".
+
+BOOST_LIB_NAME: The base name of the lib ( for example boost_regex).
+
+BOOST_LIB_TOOLSET: The compiler toolset name (vc6, vc7, bcb5 etc).
+
+BOOST_LIB_THREAD_OPT: "-mt" for multithread builds, otherwise nothing.
+
+BOOST_LIB_RT_OPT: A suffix that indicates the runtime library used,
+ contains one or more of the following letters after
+ a hiphen:
+
+ s static runtime (dynamic if not present).
+ d debug build (release if not present).
+ g debug/diagnostic runtime (release if not present).
+ p STLPort Build.
+
+BOOST_LIB_VERSION: The Boost version, in the form x_y, for Boost version x.y.
+
+
+***************************************************************************/
+
+#ifdef __cplusplus
+# ifndef BOOST_CONFIG_HPP
+# include <boost/config.hpp>
+# endif
+#elif defined(_MSC_VER) && !defined(__MWERKS__) && !defined(__EDG_VERSION__)
+//
+// C language compatability (no, honestly)
+//
+# define BOOST_MSVC _MSC_VER
+# define BOOST_STRINGIZE(X) BOOST_DO_STRINGIZE(X)
+# define BOOST_DO_STRINGIZE(X) #X
+#endif
+//
+// Only include what follows for known and supported compilers:
+//
+#if defined(BOOST_MSVC) \
+ || defined(__BORLANDC__) \
+ || (defined(__MWERKS__) && defined(_WIN32) && (__MWERKS__ >= 0x3000)) \
+ || (defined(__ICL) && defined(_MSC_EXTENSIONS) && (_MSC_VER >= 1200))
+
+#ifndef BOOST_VERSION_HPP
+# include <boost/version.hpp>
+#endif
+
+#ifndef BOOST_LIB_NAME
+# error "Macro BOOST_LIB_NAME not set (internal error)"
+#endif
+
+//
+// error check:
+//
+#if defined(__MSVC_RUNTIME_CHECKS) && !defined(_DEBUG)
+# pragma message("Using the /RTC option without specifying a debug runtime will lead to linker errors")
+# pragma message("Hint: go to the code generation options and switch to one of the debugging runtimes")
+# error "Incompatible build options"
+#endif
+//
+// select toolset:
+//
+#if defined(BOOST_MSVC) && (BOOST_MSVC == 1200)
+
+ // vc6:
+# define BOOST_LIB_TOOLSET "vc6"
+
+#elif defined(BOOST_MSVC) && (BOOST_MSVC == 1300)
+
+ // vc7:
+# define BOOST_LIB_TOOLSET "vc7"
+
+#elif defined(BOOST_MSVC) && (BOOST_MSVC == 1310)
+
+ // vc71:
+# define BOOST_LIB_TOOLSET "vc71"
+
+#elif defined(BOOST_MSVC) && (BOOST_MSVC >= 1400)
+
+ // vc80:
+# define BOOST_LIB_TOOLSET "vc80"
+
+#elif defined(__BORLANDC__)
+
+ // CBuilder 6:
+# define BOOST_LIB_TOOLSET "bcb"
+
+#elif defined(__ICL)
+
+ // Intel C++, no version number:
+# define BOOST_LIB_TOOLSET "iw"
+
+#elif defined(__MWERKS__) && (__MWERKS__ <= 0x31FF )
+
+ // Metrowerks CodeWarrior 8.x
+# define BOOST_LIB_TOOLSET "cw8"
+
+#elif defined(__MWERKS__) && (__MWERKS__ <= 0x32FF )
+
+ // Metrowerks CodeWarrior 9.x
+# define BOOST_LIB_TOOLSET "cw9"
+
+#endif
+
+//
+// select thread opt:
+//
+#if defined(_MT) || defined(__MT__)
+# define BOOST_LIB_THREAD_OPT "-mt"
+#else
+# define BOOST_LIB_THREAD_OPT
+#endif
+
+#if defined(_MSC_VER) || defined(__MWERKS__)
+
+# ifdef _DLL
+
+# if (defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) && (defined(_STLP_OWN_IOSTREAMS) || defined(__STL_OWN_IOSTREAMS))
+
+# if defined(_DEBUG) && (defined(__STL_DEBUG) || defined(_STLP_DEBUG))
+# define BOOST_LIB_RT_OPT "-gdp"
+# elif defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-gdp"
+# pragma message("warning: STLPort debug versions are built with /D_STLP_DEBUG=1")
+# error "Build options aren't compatible with pre-built libraries"
+# else
+# define BOOST_LIB_RT_OPT "-p"
+# endif
+
+# elif defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)
+
+# if defined(_DEBUG) && (defined(__STL_DEBUG) || defined(_STLP_DEBUG))
+# define BOOST_LIB_RT_OPT "-gdpn"
+# elif defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-gdpn"
+# pragma message("warning: STLPort debug versions are built with /D_STLP_DEBUG=1")
+# error "Build options aren't compatible with pre-built libraries"
+# else
+# define BOOST_LIB_RT_OPT "-pn"
+# endif
+
+# else
+
+# if defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-gd"
+# else
+# define BOOST_LIB_RT_OPT
+# endif
+
+# endif
+
+# else
+
+# if (defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) && (defined(_STLP_OWN_IOSTREAMS) || defined(__STL_OWN_IOSTREAMS))
+
+# if defined(_DEBUG) && (defined(__STL_DEBUG) || defined(_STLP_DEBUG))
+# define BOOST_LIB_RT_OPT "-sgdp"
+# elif defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-sgdp"
+# pragma message("warning: STLPort debug versions are built with /D_STLP_DEBUG=1")
+# error "Build options aren't compatible with pre-built libraries"
+# else
+# define BOOST_LIB_RT_OPT "-sp"
+# endif
+
+# elif defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)
+
+# if defined(_DEBUG) && (defined(__STL_DEBUG) || defined(_STLP_DEBUG))
+# define BOOST_LIB_RT_OPT "-sgdpn"
+# elif defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-sgdpn"
+# pragma message("warning: STLPort debug versions are built with /D_STLP_DEBUG=1")
+# error "Build options aren't compatible with pre-built libraries"
+# else
+# define BOOST_LIB_RT_OPT "-spn"
+# endif
+
+# else
+
+# if defined(_DEBUG)
+# define BOOST_LIB_RT_OPT "-sgd"
+# else
+# define BOOST_LIB_RT_OPT "-s"
+# endif
+
+# endif
+
+# endif
+
+#elif defined(__BORLANDC__)
+
+//
+// figure out whether we want the debug builds or not:
+//
+#pragma defineonoption BOOST_BORLAND_DEBUG -v
+//
+// sanity check:
+//
+#if defined(__STL_DEBUG) || defined(_STLP_DEBUG)
+#error "Pre-built versions of the Boost libraries are not provided in STLPort-debug form"
+#endif
+
+# ifdef _RTLDLL
+
+# ifdef BOOST_BORLAND_DEBUG
+# define BOOST_LIB_RT_OPT "-d"
+# else
+# define BOOST_LIB_RT_OPT
+# endif
+
+# else
+
+# ifdef BOOST_BORLAND_DEBUG
+# define BOOST_LIB_RT_OPT "-sd"
+# else
+# define BOOST_LIB_RT_OPT "-s"
+# endif
+
+# endif
+
+#endif
+
+//
+// select linkage opt:
+//
+#if (defined(_DLL) || defined(_RTLDLL)) && defined(BOOST_DYN_LINK)
+# define BOOST_LIB_PREFIX
+#elif defined(BOOST_DYN_LINK)
+# error "Mixing a dll boost library with a static runtime is a really bad idea..."
+#else
+# define BOOST_LIB_PREFIX "lib"
+#endif
+
+//
+// now include the lib:
+//
+#if defined(BOOST_LIB_NAME) \
+ && defined(BOOST_LIB_PREFIX) \
+ && defined(BOOST_LIB_TOOLSET) \
+ && defined(BOOST_LIB_THREAD_OPT) \
+ && defined(BOOST_LIB_RT_OPT) \
+ && defined(BOOST_LIB_VERSION)
+
+# pragma comment(lib, BOOST_LIB_PREFIX BOOST_STRINGIZE(BOOST_LIB_NAME) "-" BOOST_LIB_TOOLSET BOOST_LIB_THREAD_OPT BOOST_LIB_RT_OPT "-" BOOST_LIB_VERSION ".lib")
+#ifdef BOOST_LIB_DIAGNOSTIC
+# pragma message ("Linking to lib file: " BOOST_LIB_PREFIX BOOST_STRINGIZE(BOOST_LIB_NAME) "-" BOOST_LIB_TOOLSET BOOST_LIB_THREAD_OPT BOOST_LIB_RT_OPT "-" BOOST_LIB_VERSION ".lib")
+#endif
+
+#else
+# error "some required macros where not defined (internal logic error)."
+#endif
+
+
+#endif // _MSC_VER || __BORLANDC__
+
+//
+// finally undef any macros we may have set:
+//
+#ifdef BOOST_LIB_PREFIX
+# undef BOOST_LIB_PREFIX
+#endif
+#if defined(BOOST_LIB_NAME)
+# undef BOOST_LIB_NAME
+#endif
+#if defined(BOOST_LIB_TOOLSET)
+# undef BOOST_LIB_TOOLSET
+#endif
+#if defined(BOOST_LIB_THREAD_OPT)
+# undef BOOST_LIB_THREAD_OPT
+#endif
+#if defined(BOOST_LIB_RT_OPT)
+# undef BOOST_LIB_RT_OPT
+#endif
+#if defined(BOOST_LIB_LINK_OPT)
+# undef BOOST_LIB_LINK_OPT
+#endif
+#if defined(BOOST_LIB_DEBUG_OPT)
+# undef BOOST_LIB_DEBUG_OPT
+#endif
+#if defined(BOOST_DYN_LINK)
+# undef BOOST_DYN_LINK
+#endif
+
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/borland.hpp b/indexlib/boost-compat/config/compiler/borland.hpp
new file mode 100644
index 000000000..531691eff
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/borland.hpp
@@ -0,0 +1,177 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Borland C++ compiler setup:
+
+// Version 5.0 and below:
+# if __BORLANDC__ <= 0x0550
+// Borland C++Builder 4 and 5:
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+# if __BORLANDC__ == 0x0550
+// Borland C++Builder 5, command-line compiler 5.5:
+# define BOOST_NO_OPERATORS_IN_NAMESPACE
+# endif
+# endif
+
+// Version 5.51 and below:
+#if (__BORLANDC__ <= 0x551)
+# define BOOST_NO_CV_SPECIALIZATIONS
+# define BOOST_NO_CV_VOID_SPECIALIZATIONS
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+# define BOOST_NO_DEDUCED_TYPENAME
+// workaround for missing WCHAR_MAX/WCHAR_MIN:
+#include <climits>
+#include <cwchar>
+#ifndef WCHAR_MAX
+# define WCHAR_MAX 0xffff
+#endif
+#ifndef WCHAR_MIN
+# define WCHAR_MIN 0
+#endif
+#endif
+
+#if (__BORLANDC__ <= 0x564)
+# define BOOST_NO_SFINAE
+#endif
+
+// Version 7.0 (Kylix) and below:
+#if (__BORLANDC__ <= 0x570)
+# define BOOST_NO_INTEGRAL_INT64_T
+# define BOOST_NO_DEPENDENT_NESTED_DERIVATIONS
+# define BOOST_NO_PRIVATE_IN_AGGREGATE
+# define BOOST_NO_USING_TEMPLATE
+# define BOOST_BCB_PARTIAL_SPECIALIZATION_BUG
+# define BOOST_NO_TEMPLATE_TEMPLATES
+# define BOOST_NO_USING_DECLARATION_OVERLOADS_FROM_TYPENAME_BASE
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+ // we shouldn't really need this - but too many things choke
+ // without it, this needs more investigation:
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+# ifdef NDEBUG
+ // fix broken <cstring> so that Boost.test works:
+# include <cstring>
+# undef strcmp
+# endif
+
+//
+// new bug in 5.61:
+#if (__BORLANDC__ >= 0x561) && (__BORLANDC__ <= 0x570)
+ // this seems to be needed by the command line compiler, but not the IDE:
+# define BOOST_NO_MEMBER_FUNCTION_SPECIALIZATIONS
+#endif
+
+# ifdef _WIN32
+# define BOOST_NO_SWPRINTF
+# elif defined(linux) || defined(__linux__) || defined(__linux)
+ // we should really be able to do without this
+ // but the wcs* functions aren't imported into std::
+# define BOOST_NO_STDC_NAMESPACE
+ // _CPPUNWIND doesn't get automatically set for some reason:
+# pragma defineonoption BOOST_CPPUNWIND -x
+# endif
+#endif
+
+//
+// Post 0x561 we have long long and stdint.h:
+#if __BORLANDC__ >= 0x561
+# ifndef __NO_LONG_LONG
+# define BOOST_HAS_LONG_LONG
+# endif
+ // On non-Win32 platforms let the platform config figure this out:
+# ifdef _WIN32
+# define BOOST_HAS_STDINT_H
+# endif
+#endif
+
+// Borland C++Builder 6 defaults to using STLPort. If _USE_OLD_RW_STL is
+// defined, then we have 0x560 or greater with the Rogue Wave implementation
+// which presumably has the std::DBL_MAX bug.
+#if ((__BORLANDC__ >= 0x550) && (__BORLANDC__ < 0x560)) || defined(_USE_OLD_RW_STL)
+// <climits> is partly broken, some macros define symbols that are really in
+// namespace std, so you end up having to use illegal constructs like
+// std::DBL_MAX, as a fix we'll just include float.h and have done with:
+#include <float.h>
+#endif
+//
+// __int64:
+//
+#if (__BORLANDC__ >= 0x530) && !defined(__STRICT_ANSI__)
+# define BOOST_HAS_MS_INT64
+#endif
+//
+// check for exception handling support:
+//
+#if !defined(_CPPUNWIND) && !defined(BOOST_CPPUNWIND) && !defined(__EXCEPTIONS)
+# define BOOST_NO_EXCEPTIONS
+#endif
+//
+// all versions have a <dirent.h>:
+//
+#ifndef __STRICT_ANSI__
+# define BOOST_HAS_DIRENT_H
+#endif
+//
+// all versions support __declspec:
+//
+#ifndef __STRICT_ANSI__
+# define BOOST_HAS_DECLSPEC
+#endif
+//
+// ABI fixing headers:
+//
+#if __BORLANDC__ < 0x600 // not implemented for version 6 compiler yet
+#ifndef BOOST_ABI_PREFIX
+# define BOOST_ABI_PREFIX "boost/config/abi/borland_prefix.hpp"
+#endif
+#ifndef BOOST_ABI_SUFFIX
+# define BOOST_ABI_SUFFIX "boost/config/abi/borland_suffix.hpp"
+#endif
+#endif
+//
+// Disable Win32 support in ANSI mode:
+//
+#if __BORLANDC__ < 0x600
+# pragma defineonoption BOOST_DISABLE_WIN32 -A
+#elif defined(__STRICT_ANSI__)
+# define BOOST_DISABLE_WIN32
+#endif
+//
+// MSVC compatibility mode does some nasty things:
+//
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+# define BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP
+# define BOOST_NO_VOID_RETURNS
+#endif
+
+#define BOOST_COMPILER "Borland C++ version " BOOST_STRINGIZE(__BORLANDC__)
+
+//
+// versions check:
+// we don't support Borland prior to version 5.4:
+#if __BORLANDC__ < 0x540
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 1536 (Builder X preview):
+#if (__BORLANDC__ > 1536)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# else
+# pragma message( "Unknown compiler version - please run the configure tests and report the results")
+# endif
+#endif
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/comeau.hpp b/indexlib/boost-compat/config/compiler/comeau.hpp
new file mode 100644
index 000000000..16a1b93cb
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/comeau.hpp
@@ -0,0 +1,61 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Douglas Gregor 2001.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright Aleksey Gurtovoy 2003.
+// (C) Copyright Beman Dawes 2003.
+// (C) Copyright Jens Maurer 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Comeau C++ compiler setup:
+
+#include "boost/config/compiler/common_edg.hpp"
+
+#if (__COMO_VERSION__ <= 4245)
+
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+
+# if defined(_MSC_VER) && _MSC_VER <= 1300
+# if _MSC_VER > 100
+ // only set this in non-strict mode:
+# define BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP
+# endif
+# endif
+
+// Void returns don't work when emulating VC 6 (Peter Dimov)
+
+# if defined(_MSC_VER) && (_MSC_VER == 1200)
+# define BOOST_NO_VOID_RETURNS
+# endif
+
+#endif // version 4245
+
+//
+// enable __int64 support in VC emulation mode
+//
+# if defined(_MSC_VER) && (_MSC_VER >= 1200)
+# define BOOST_HAS_MS_INT64
+# endif
+
+#define BOOST_COMPILER "Comeau compiler version " BOOST_STRINGIZE(__COMO_VERSION__)
+
+//
+// versions check:
+// we don't know Comeau prior to version 4245:
+#if __COMO_VERSION__ < 4245
+# error "Compiler not configured - please reconfigure"
+#endif
+//
+// last known and checked version is 4245:
+#if (__COMO_VERSION__ > 4245)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/common_edg.hpp b/indexlib/boost-compat/config/compiler/common_edg.hpp
new file mode 100644
index 000000000..c7c0b56ee
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/common_edg.hpp
@@ -0,0 +1,53 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// (C) Copyright Jens Maurer 2001.
+// (C) Copyright David Abrahams 2002.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+//
+// Options common to all edg based compilers.
+//
+// This is included from within the individual compiler mini-configs.
+
+#ifndef __EDG_VERSION__
+# error This file requires that __EDG_VERSION__ be defined.
+#endif
+
+#if (__EDG_VERSION__ <= 238)
+# define BOOST_NO_INTEGRAL_INT64_T
+# define BOOST_NO_SFINAE
+#endif
+
+#if (__EDG_VERSION__ <= 240)
+# define BOOST_NO_VOID_RETURNS
+#endif
+
+#if (__EDG_VERSION__ <= 241) && !defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
+# define BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP
+#endif
+
+#if (__EDG_VERSION__ <= 244) && !defined(BOOST_NO_TEMPLATE_TEMPLATES)
+# define BOOST_NO_TEMPLATE_TEMPLATES
+#endif
+
+// See also kai.hpp which checks a Kai-specific symbol for EH
+# if !defined(__KCC) && !defined(__EXCEPTIONS)
+# define BOOST_NO_EXCEPTIONS
+# endif
+
+# if !defined(__NO_LONG_LONG)
+# define BOOST_HAS_LONG_LONG
+# endif
+
+#ifdef c_plusplus
+// EDG has "long long" in non-strict mode
+// However, some libraries have insufficient "long long" support
+// #define BOOST_HAS_LONG_LONG
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/compaq_cxx.hpp b/indexlib/boost-compat/config/compiler/compaq_cxx.hpp
new file mode 100644
index 000000000..3c0c6b7ff
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/compaq_cxx.hpp
@@ -0,0 +1,20 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Dec Alpha True64 C++ compiler setup:
+
+#define BOOST_COMPILER "Dec Alpha True64 " BOOST_STRINGIZE(__DECCXX_VER)
+
+#include "boost/config/compiler/common_edg.hpp"
+
+//
+// versions check:
+// Nothing to do here?
+
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+
+
diff --git a/indexlib/boost-compat/config/compiler/digitalmars.hpp b/indexlib/boost-compat/config/compiler/digitalmars.hpp
new file mode 100644
index 000000000..32fc71faf
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/digitalmars.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) Christof Meerwald 2003
+// Copyright (C) Dan Watkins 2003
+//
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// Digital Mars C++ compiler setup:
+#define BOOST_COMPILER __DMC_VERSION_STRING__
+
+#define BOOST_HAS_LONG_LONG
+#define BOOST_HAS_PRAGMA_ONCE
+
+#if (__DMC__ <= 0x833)
+#define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+#define BOOST_NO_TEMPLATE_TEMPLATES
+#define BOOST_NEEDS_TOKEN_PASTING_OP_FOR_TOKENS_JUXTAPOSING
+#define BOOST_NO_ARRAY_TYPE_SPECIALIZATIONS
+#define BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+#endif
+#if (__DMC__ <= 0x840) || !defined(BOOST_STRICT_CONFIG)
+#define BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+#define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+#define BOOST_NO_OPERATORS_IN_NAMESPACE
+#define BOOST_NO_UNREACHABLE_RETURN_DETECTION
+#define BOOST_NO_SFINAE
+#define BOOST_NO_USING_TEMPLATE
+#define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+#endif
+
+//
+// has macros:
+#if (__DMC__ >= 0x840)
+#define BOOST_HAS_DIRENT_H
+#define BOOST_HAS_STDINT_H
+#define BOOST_HAS_WINTHREADS
+#endif
+
+
+// check for exception handling support:
+#ifndef _CPPUNWIND
+# define BOOST_NO_EXCEPTIONS
+#endif
+
+#if (__DMC__ < 0x840)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
diff --git a/indexlib/boost-compat/config/compiler/gcc.hpp b/indexlib/boost-compat/config/compiler/gcc.hpp
new file mode 100644
index 000000000..4dc3f608b
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/gcc.hpp
@@ -0,0 +1,96 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001 - 2002.
+// (C) Copyright Jens Maurer 2001 - 2002.
+// (C) Copyright Beman Dawes 2001 - 2003.
+// (C) Copyright Douglas Gregor 2002.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Synge Todo 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// GNU C++ compiler setup:
+
+# if __GNUC__ == 2 && __GNUC_MINOR__ == 91
+ // egcs 1.1 won't parse shared_ptr.hpp without this:
+# define BOOST_NO_AUTO_PTR
+# endif
+# if __GNUC__ == 2 && __GNUC_MINOR__ < 95
+ //
+ // Prior to gcc 2.95 member templates only partly
+ // work - define BOOST_MSVC6_MEMBER_TEMPLATES
+ // instead since inline member templates mostly work.
+ //
+# define BOOST_NO_MEMBER_TEMPLATES
+# if __GNUC_MINOR__ >= 9
+# define BOOST_MSVC6_MEMBER_TEMPLATES
+# endif
+# endif
+
+# if __GNUC__ == 2 && __GNUC_MINOR__ < 96
+# define BOOST_NO_SFINAE
+# endif
+
+# if __GNUC__ == 2 && __GNUC_MINOR__ <= 97
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+# define BOOST_NO_OPERATORS_IN_NAMESPACE
+# endif
+
+# if __GNUC__ < 3
+# define BOOST_NO_USING_DECLARATION_OVERLOADS_FROM_TYPENAME_BASE
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+# endif
+
+#ifndef __EXCEPTIONS
+# define BOOST_NO_EXCEPTIONS
+#endif
+
+//
+// Bug specific to gcc 3.1 and 3.2:
+//
+#if (__GNUC__ == 3) && ((__GNUC_MINOR__ == 1) || (__GNUC_MINOR__ == 2))
+# define BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+#endif
+
+//
+// Threading support: Turn this on unconditionally here (except for
+// those platforms where we can know for sure). It will get turned off again
+// later if no threading API is detected.
+//
+#if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__)
+# define BOOST_HAS_THREADS
+#endif
+
+//
+// gcc has "long long"
+//
+#define BOOST_HAS_LONG_LONG
+
+//
+// gcc implements the named return value optimization since version 3.1
+//
+#if __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 1 )
+#define BOOST_HAS_NRVO
+#endif
+
+#define BOOST_COMPILER "GNU C++ version " __VERSION__
+
+//
+// versions check:
+// we don't know gcc prior to version 2.90:
+#if (__GNUC__ == 2) && (__GNUC_MINOR__ < 90)
+# error "Compiler not configured - please reconfigure"
+#endif
+//
+// last known and checked version is 3.4:
+#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 4))
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# else
+# warning "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
diff --git a/indexlib/boost-compat/config/compiler/greenhills.hpp b/indexlib/boost-compat/config/compiler/greenhills.hpp
new file mode 100644
index 000000000..038b6b2b5
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/greenhills.hpp
@@ -0,0 +1,28 @@
+// (C) Copyright John Maddock 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Greenhills C++ compiler setup:
+
+#define BOOST_COMPILER "Greenhills C++ version " BOOST_STRINGIZE(__ghs)
+
+#include "boost/config/compiler/common_edg.hpp"
+
+//
+// versions check:
+// we don't support Greenhills prior to version 0:
+#if __ghs < 0
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 0:
+#if (__ghs > 0)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
diff --git a/indexlib/boost-compat/config/compiler/hp_acc.hpp b/indexlib/boost-compat/config/compiler/hp_acc.hpp
new file mode 100644
index 000000000..b5c587ab7
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/hp_acc.hpp
@@ -0,0 +1,69 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001 - 2003.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Toon Knapen 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// HP aCC C++ compiler setup:
+
+#if (__HP_aCC <= 33100)
+# define BOOST_NO_INTEGRAL_INT64_T
+# define BOOST_NO_OPERATORS_IN_NAMESPACE
+# if !defined(_NAMESPACE_STD)
+# define BOOST_NO_STD_LOCALE
+# define BOOST_NO_STRINGSTREAM
+# endif
+#endif
+
+#if (__HP_aCC <= 33300)
+// member templates are sufficiently broken that we disable them for now
+# define BOOST_NO_MEMBER_TEMPLATES
+# define BOOST_NO_DEPENDENT_NESTED_DERIVATIONS
+# define BOOST_NO_USING_DECLARATION_OVERLOADS_FROM_TYPENAME_BASE
+#endif
+
+#if (__HP_aCC <= 33900) || !defined(BOOST_STRICT_CONFIG)
+# define BOOST_NO_UNREACHABLE_RETURN_DETECTION
+# define BOOST_NO_TEMPLATE_TEMPLATES
+# define BOOST_NO_SWPRINTF
+# define BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS
+// std lib config should set this one already:
+//# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+// optional features rather than defects:
+#if (__HP_aCC >= 33900)
+# define BOOST_HAS_LONG_LONG
+# define BOOST_HAS_PARTIAL_STD_ALLOCATOR
+#endif
+
+#if (__HP_aCC >= 50000 ) && (__HP_aCC <= 53800 ) || (__HP_aCC < 31300 )
+# define BOOST_NO_MEMBER_TEMPLATE_KEYWORD
+#endif
+
+#define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+
+#define BOOST_COMPILER "HP aCC version " BOOST_STRINGIZE(__HP_aCC)
+
+//
+// versions check:
+// we don't support HP aCC prior to version 0:
+#if __HP_aCC < 33000
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 0:
+#if (__HP_aCC > 53800)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/intel.hpp b/indexlib/boost-compat/config/compiler/intel.hpp
new file mode 100644
index 000000000..060338bcf
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/intel.hpp
@@ -0,0 +1,151 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright Jens Maurer 2001.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Aleksey Gurtovoy 2002 - 2003.
+// (C) Copyright Guillaume Melquiond 2002 - 2003.
+// (C) Copyright Beman Dawes 2003.
+// (C) Copyright Martin Wille 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Intel compiler setup:
+
+#include "boost/config/compiler/common_edg.hpp"
+
+#if defined(__INTEL_COMPILER)
+# define BOOST_INTEL_CXX_VERSION __INTEL_COMPILER
+#elif defined(__ICL)
+# define BOOST_INTEL_CXX_VERSION __ICL
+#elif defined(__ICC)
+# define BOOST_INTEL_CXX_VERSION __ICC
+#elif defined(__ECC)
+# define BOOST_INTEL_CXX_VERSION __ECC
+#endif
+
+#define BOOST_COMPILER "Intel C++ version " BOOST_STRINGIZE(BOOST_INTEL_CXX_VERSION)
+#define BOOST_INTEL BOOST_INTEL_CXX_VERSION
+
+#if defined(_WIN32) || defined(_WIN64)
+# define BOOST_INTEL_WIN BOOST_INTEL
+#else
+# define BOOST_INTEL_LINUX BOOST_INTEL
+#endif
+
+#if (BOOST_INTEL_CXX_VERSION <= 500) && defined(_MSC_VER)
+# define BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+# define BOOST_NO_TEMPLATE_TEMPLATES
+#endif
+
+#if (BOOST_INTEL_CXX_VERSION <= 600)
+
+# if defined(_MSC_VER) && (_MSC_VER <= 1300) // added check for <= VC 7 (Peter Dimov)
+
+// Boost libraries assume strong standard conformance unless otherwise
+// indicated by a config macro. As configured by Intel, the EDG front-end
+// requires certain compiler options be set to achieve that strong conformance.
+// Particularly /Qoption,c,--arg_dep_lookup (reported by Kirk Klobe & Thomas Witt)
+// and /Zc:wchar_t,forScope. See boost-root/tools/build/intel-win32-tools.jam for
+// details as they apply to particular versions of the compiler. When the
+// compiler does not predefine a macro indicating if an option has been set,
+// this config file simply assumes the option has been set.
+// Thus BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP will not be defined, even if
+// the compiler option is not enabled.
+
+# define BOOST_NO_SWPRINTF
+# endif
+
+// Void returns, 64 bit integrals don't work when emulating VC 6 (Peter Dimov)
+
+# if defined(_MSC_VER) && (_MSC_VER <= 1200)
+# define BOOST_NO_VOID_RETURNS
+# define BOOST_NO_INTEGRAL_INT64_T
+# endif
+
+#endif
+
+#if (BOOST_INTEL_CXX_VERSION <= 710) && defined(_WIN32)
+# define BOOST_NO_POINTER_TO_MEMBER_TEMPLATE_PARAMETERS
+#endif
+
+// See http://aspn.activestate.com/ASPN/Mail/Message/boost/1614864
+#if BOOST_INTEL_CXX_VERSION < 600
+# define BOOST_NO_INTRINSIC_WCHAR_T
+#else
+// We should test the macro _WCHAR_T_DEFINED to check if the compiler
+// supports wchar_t natively. *BUT* there is a problem here: the standard
+// headers define this macro if they typedef wchar_t. Anyway, we're lucky
+// because they define it without a value, while Intel C++ defines it
+// to 1. So we can check its value to see if the macro was defined natively
+// or not.
+// Under UNIX, the situation is exactly the same, but the macro _WCHAR_T
+// is used instead.
+# if ((_WCHAR_T_DEFINED + 0) == 0) && ((_WCHAR_T + 0) == 0)
+# define BOOST_NO_INTRINSIC_WCHAR_T
+# endif
+#endif
+
+//
+// Verify that we have actually got BOOST_NO_INTRINSIC_WCHAR_T
+// set correctly, if we don't do this now, we will get errors later
+// in type_traits code among other things, getting this correct
+// for the Intel compiler is actually remarkably fragile and tricky:
+//
+#if defined(BOOST_NO_INTRINSIC_WCHAR_T)
+#include <cwchar>
+template< typename T > struct assert_no_intrinsic_wchar_t;
+template<> struct assert_no_intrinsic_wchar_t<wchar_t> { typedef void type; };
+// if you see an error here then you need to unset BOOST_NO_INTRINSIC_WCHAR_T
+// where it is defined above:
+typedef assert_no_intrinsic_wchar_t<unsigned short>::type assert_no_intrinsic_wchar_t_;
+#else
+template< typename T > struct assert_intrinsic_wchar_t;
+template<> struct assert_intrinsic_wchar_t<wchar_t> {};
+// if you see an error here then define BOOST_NO_INTRINSIC_WCHAR_T on the command line:
+template<> struct assert_intrinsic_wchar_t<unsigned short> {};
+#endif
+
+
+#if (BOOST_INTEL_CXX_VERSION <= 800) || !defined(BOOST_STRICT_CONFIG)
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+#endif
+
+#if _MSC_VER+0 >= 1000
+# if _MSC_VER >= 1200
+# define BOOST_HAS_MS_INT64
+# endif
+# define BOOST_NO_SWPRINTF
+#elif defined(_WIN32)
+# define BOOST_DISABLE_WIN32
+#endif
+
+// I checked version 6.0 build 020312Z, it implements the NRVO.
+// Correct this as you find out which version of the compiler
+// implemented the NRVO first. (Daniel Frey)
+#if (BOOST_INTEL_CXX_VERSION >= 600)
+# define BOOST_HAS_NRVO
+#endif
+
+//
+// versions check:
+// we don't support Intel prior to version 5.0:
+#if BOOST_INTEL_CXX_VERSION < 500
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version:
+#if (BOOST_INTEL_CXX_VERSION > 810)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# elif defined(_MSC_VER)
+# pragma message("Unknown compiler version - please run the configure tests and report the results")
+# endif
+#endif
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/kai.hpp b/indexlib/boost-compat/config/compiler/kai.hpp
new file mode 100644
index 000000000..de16f1a67
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/kai.hpp
@@ -0,0 +1,35 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright David Abrahams 2002.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Kai C++ compiler setup:
+
+#include "boost/config/compiler/common_edg.hpp"
+
+# if (__KCC_VERSION <= 4001) || !defined(BOOST_STRICT_CONFIG)
+ // at least on Sun, the contents of <cwchar> is not in namespace std
+# define BOOST_NO_STDC_NAMESPACE
+# endif
+
+// see also common_edg.hpp which needs a special check for __KCC
+# if !defined(_EXCEPTIONS)
+# define BOOST_NO_EXCEPTIONS
+# endif
+
+#define BOOST_COMPILER "Kai C++ version " BOOST_STRINGIZE(__KCC_VERSION)
+
+//
+// last known and checked version is 4001:
+#if (__KCC_VERSION > 4001)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/metrowerks.hpp b/indexlib/boost-compat/config/compiler/metrowerks.hpp
new file mode 100644
index 000000000..a74cadef1
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/metrowerks.hpp
@@ -0,0 +1,97 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Darin Adler 2001.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright David Abrahams 2001 - 2002.
+// (C) Copyright Beman Dawes 2001 - 2003.
+// (C) Copyright Stefan Slapeta 2004.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Metrowerks C++ compiler setup:
+
+// locale support is disabled when linking with the dynamic runtime
+# ifdef _MSL_NO_LOCALE
+# define BOOST_NO_STD_LOCALE
+# endif
+
+# if __MWERKS__ <= 0x2301 // 5.3
+# define BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+# define BOOST_NO_POINTER_TO_MEMBER_CONST
+# define BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS
+# define BOOST_NO_MEMBER_TEMPLATE_KEYWORD
+# endif
+
+# if __MWERKS__ <= 0x2401 // 6.2
+//# define BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+# endif
+
+# if(__MWERKS__ <= 0x2407) // 7.x
+# define BOOST_NO_MEMBER_FUNCTION_SPECIALIZATIONS
+# define BOOST_NO_UNREACHABLE_RETURN_DETECTION
+# endif
+
+# if(__MWERKS__ <= 0x3003) // 8.x
+# define BOOST_NO_SFINAE
+# endif
+
+# if(__MWERKS__ <= 0x3204) // 9.3
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+# endif
+
+#if !__option(wchar_type)
+# define BOOST_NO_INTRINSIC_WCHAR_T
+#endif
+
+#if !__option(exceptions)
+# define BOOST_NO_EXCEPTIONS
+#endif
+
+#if (__INTEL__ && _WIN32) || (__POWERPC__ && macintosh)
+# if __MWERKS__ == 0x3000
+# define BOOST_COMPILER_VERSION 8.0
+# elif __MWERKS__ == 0x3001
+# define BOOST_COMPILER_VERSION 8.1
+# elif __MWERKS__ == 0x3002
+# define BOOST_COMPILER_VERSION 8.2
+# elif __MWERKS__ == 0x3003
+# define BOOST_COMPILER_VERSION 8.3
+# elif __MWERKS__ == 0x3200
+# define BOOST_COMPILER_VERSION 9.0
+# elif __MWERKS__ == 0x3201
+# define BOOST_COMPILER_VERSION 9.1
+# elif __MWERKS__ == 0x3202
+# define BOOST_COMPILER_VERSION 9.2
+# elif __MWERKS__ == 0x3204
+# define BOOST_COMPILER_VERSION 9.3
+# else
+# define BOOST_COMPILER_VERSION __MWERKS__
+# endif
+#else
+# define BOOST_COMPILER_VERSION __MWERKS__
+#endif
+
+#define BOOST_COMPILER "Metrowerks CodeWarrior C++ version " BOOST_STRINGIZE(BOOST_COMPILER_VERSION)
+
+//
+// versions check:
+// we don't support Metrowerks prior to version 5.3:
+#if __MWERKS__ < 0x2301
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version:
+#if (__MWERKS__ > 0x3204)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/mpw.hpp b/indexlib/boost-compat/config/compiler/mpw.hpp
new file mode 100644
index 000000000..8ab2aacb6
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/mpw.hpp
@@ -0,0 +1,51 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// MPW C++ compilers setup:
+
+# if defined(__SC__)
+# define BOOST_COMPILER "MPW SCpp version " BOOST_STRINGIZE(__SC__)
+# elif defined(__MRC__)
+# define BOOST_COMPILER "MPW MrCpp version " BOOST_STRINGIZE(__MRC__)
+# else
+# error "Using MPW compiler configuration by mistake. Please update."
+# endif
+
+//
+// MPW 8.90:
+//
+#if (MPW_CPLUS <= 0x890) || !defined(BOOST_STRICT_CONFIG)
+# define BOOST_NO_CV_SPECIALIZATIONS
+# define BOOST_NO_DEPENDENT_NESTED_DERIVATIONS
+# define BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+# define BOOST_NO_INTRINSIC_WCHAR_T
+# define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+# define BOOST_NO_USING_TEMPLATE
+
+# define BOOST_NO_CWCHAR
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+
+# define BOOST_NO_STD_ALLOCATOR /* actually a bug with const reference overloading */
+#endif
+
+//
+// versions check:
+// we don't support MPW prior to version 8.9:
+#if MPW_CPLUS < 0x890
+# error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 0x890:
+#if (MPW_CPLUS > 0x890)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
diff --git a/indexlib/boost-compat/config/compiler/sgi_mipspro.hpp b/indexlib/boost-compat/config/compiler/sgi_mipspro.hpp
new file mode 100644
index 000000000..689b67eeb
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/sgi_mipspro.hpp
@@ -0,0 +1,24 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// SGI C++ compiler setup:
+
+#define BOOST_COMPILER "SGI Irix compiler version " BOOST_STRINGIZE(_COMPILER_VERSION)
+
+#include "boost/config/compiler/common_edg.hpp"
+
+//
+// Threading support:
+// Turn this on unconditionally here, it will get turned off again later
+// if no threading API is detected.
+//
+#define BOOST_HAS_THREADS
+//
+// version check:
+// probably nothing to do here?
+
+
diff --git a/indexlib/boost-compat/config/compiler/sunpro_cc.hpp b/indexlib/boost-compat/config/compiler/sunpro_cc.hpp
new file mode 100644
index 000000000..8a61199f8
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/sunpro_cc.hpp
@@ -0,0 +1,89 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Jens Maurer 2001 - 2003.
+// (C) Copyright Peter Dimov 2002.
+// (C) Copyright Aleksey Gurtovoy 2002 - 2003.
+// (C) Copyright David Abrahams 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Sun C++ compiler setup:
+
+# if __SUNPRO_CC <= 0x500
+# define BOOST_NO_MEMBER_TEMPLATES
+# define BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+# endif
+
+# if (__SUNPRO_CC <= 0x520)
+ //
+ // Sunpro 5.2 and earler:
+ //
+ // although sunpro 5.2 supports the syntax for
+ // inline initialization it often gets the value
+ // wrong, especially where the value is computed
+ // from other constants (J Maddock 6th May 2001)
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+
+ // Although sunpro 5.2 supports the syntax for
+ // partial specialization, it often seems to
+ // bind to the wrong specialization. Better
+ // to disable it until suppport becomes more stable
+ // (J Maddock 6th May 2001).
+# define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+# endif
+
+# if (__SUNPRO_CC <= 0x530) || !defined(BOOST_STRICT_CONFIG)
+ // Requesting debug info (-g) with Boost.Python results
+ // in an internal compiler error for "static const"
+ // initialized in-class.
+ // >> Assertion: (../links/dbg_cstabs.cc, line 611)
+ // while processing ../test.cpp at line 0.
+ // (Jens Maurer according to Gottfried Ganauge 04 Mar 2002)
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+
+ // SunPro 5.3 has better support for partial specialization,
+ // but breaks when compiling std::less<shared_ptr<T> >
+ // (Jens Maurer 4 Nov 2001).
+
+ // std::less specialization fixed as reported by George
+ // Heintzelman; partial specialization re-enabled
+ // (Peter Dimov 17 Jan 2002)
+
+//# define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+
+ // integral constant expressions with 64 bit numbers fail
+# define BOOST_NO_INTEGRAL_INT64_T
+# endif
+
+# if (__SUNPRO_CC <= 0x540) || !defined(BOOST_STRICT_CONFIG)
+# define BOOST_NO_TEMPLATE_TEMPLATES
+ // see http://lists.boost.org/MailArchives/boost/msg47184.php
+ // and http://lists.boost.org/MailArchives/boost/msg47220.php
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+# define BOOST_NO_SFINAE
+# define BOOST_NO_ARRAY_TYPE_SPECIALIZATIONS
+# endif
+
+#define BOOST_COMPILER "Sun compiler version " BOOST_STRINGIZE(__SUNPRO_CC)
+
+//
+// versions check:
+// we don't support sunpro prior to version 4:
+#if __SUNPRO_CC < 0x400
+#error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 0x530:
+#if (__SUNPRO_CC > 0x530)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/vacpp.hpp b/indexlib/boost-compat/config/compiler/vacpp.hpp
new file mode 100644
index 000000000..4cf0de7c1
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/vacpp.hpp
@@ -0,0 +1,58 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Toon Knapen 2001 - 2003.
+// (C) Copyright Lie-Quan Lee 2001.
+// (C) Copyright Markus Schpflin 2002 - 2003.
+// (C) Copyright Beman Dawes 2002 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Visual Age (IBM) C++ compiler setup:
+
+#if __IBMCPP__ <= 501
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+# define BOOST_NO_MEMBER_FUNCTION_SPECIALIZATIONS
+#endif
+
+#if (__IBMCPP__ <= 502)
+// Actually the compiler supports inclass member initialization but it
+// requires a definition for the class member and it doesn't recognize
+// it as an integral constant expression when used as a template argument.
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+# define BOOST_NO_INTEGRAL_INT64_T
+# define BOOST_NO_MEMBER_TEMPLATE_KEYWORD
+#endif
+
+#if (__IBMCPP__ <= 600) || !defined(BOOST_STRICT_CONFIG)
+# define BOOST_NO_POINTER_TO_MEMBER_TEMPLATE_PARAMETERS
+# define BOOST_MPL_CFG_ASSERT_USE_RELATION_NAMES 1
+#endif
+
+//
+// On AIX thread support seems to be indicated by _THREAD_SAFE:
+//
+#ifdef _THREAD_SAFE
+# define BOOST_HAS_THREADS
+#endif
+
+#define BOOST_COMPILER "IBM Visual Age version " BOOST_STRINGIZE(__IBMCPP__)
+
+//
+// versions check:
+// we don't support Visual age prior to version 5:
+#if __IBMCPP__ < 500
+#error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 600:
+#if (__IBMCPP__ > 600)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# endif
+#endif
+
+
+
+
diff --git a/indexlib/boost-compat/config/compiler/visualc.hpp b/indexlib/boost-compat/config/compiler/visualc.hpp
new file mode 100644
index 000000000..323532603
--- /dev/null
+++ b/indexlib/boost-compat/config/compiler/visualc.hpp
@@ -0,0 +1,146 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001 - 2002.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright Aleksey Gurtovoy 2002.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Beman Dawes 2002 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Microsoft Visual C++ compiler setup:
+
+#define BOOST_MSVC _MSC_VER
+
+// turn off the warnings before we #include anything
+#pragma warning( disable : 4503 ) // warning: decorated name length exceeded
+
+#if _MSC_VER < 1300 // 1200 == VC++ 6.0, 1201 == EVC4.2
+#pragma warning( disable : 4786 ) // ident trunc to '255' chars in debug info
+# define BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS
+# define BOOST_NO_VOID_RETURNS
+# define BOOST_NO_EXCEPTION_STD_NAMESPACE
+ // disable min/max macro defines on vc6:
+ //
+#endif
+
+#if (_MSC_VER <= 1300) // 1300 == VC++ 7.0
+
+#if !defined(_MSC_EXTENSIONS) && !defined(BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS) // VC7 bug with /Za
+# define BOOST_NO_DEPENDENT_TYPES_IN_TEMPLATE_VALUE_PARAMETERS
+#endif
+
+# define BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+# define BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+# define BOOST_NO_PRIVATE_IN_AGGREGATE
+# define BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP
+# define BOOST_NO_INTEGRAL_INT64_T
+# define BOOST_NO_DEDUCED_TYPENAME
+# define BOOST_NO_USING_DECLARATION_OVERLOADS_FROM_TYPENAME_BASE
+
+// VC++ 6/7 has member templates but they have numerous problems including
+// cases of silent failure, so for safety we define:
+# define BOOST_NO_MEMBER_TEMPLATES
+// For VC++ experts wishing to attempt workarounds, we define:
+# define BOOST_MSVC6_MEMBER_TEMPLATES
+
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+# define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
+# define BOOST_NO_CV_VOID_SPECIALIZATIONS
+# define BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+# define BOOST_NO_USING_TEMPLATE
+# define BOOST_NO_SWPRINTF
+# define BOOST_NO_TEMPLATE_TEMPLATES
+# define BOOST_NO_SFINAE
+# define BOOST_NO_POINTER_TO_MEMBER_TEMPLATE_PARAMETERS
+# if (_MSC_VER > 1200)
+# define BOOST_NO_MEMBER_FUNCTION_SPECIALIZATIONS
+# endif
+
+#endif
+
+#if _MSC_VER < 1310 // 1310 == VC++ 7.1
+# define BOOST_NO_SWPRINTF
+#endif
+
+#if _MSC_VER <= 1400 // 1400 == VC++ 8.0
+# define BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+#endif
+
+#ifndef _NATIVE_WCHAR_T_DEFINED
+# define BOOST_NO_INTRINSIC_WCHAR_T
+#endif
+
+#ifdef _WIN32_WCE
+# define BOOST_NO_THREADEX
+# define BOOST_NO_GETSYSTEMTIMEASFILETIME
+#endif
+
+//
+// check for exception handling support:
+#ifndef _CPPUNWIND
+# define BOOST_NO_EXCEPTIONS
+#endif
+
+//
+// __int64 support:
+//
+#if (_MSC_VER >= 1200)
+# define BOOST_HAS_MS_INT64
+#endif
+#if (_MSC_VER >= 1310) && defined(_MSC_EXTENSIONS)
+# define BOOST_HAS_LONG_LONG
+#endif
+//
+// disable Win32 API's if compiler extentions are
+// turned off:
+//
+#ifndef _MSC_EXTENSIONS
+# define BOOST_DISABLE_WIN32
+#endif
+
+//
+// all versions support __declspec:
+//
+#define BOOST_HAS_DECLSPEC
+//
+// prefix and suffix headers:
+//
+#ifndef BOOST_ABI_PREFIX
+# define BOOST_ABI_PREFIX "boost/config/abi/msvc_prefix.hpp"
+#endif
+#ifndef BOOST_ABI_SUFFIX
+# define BOOST_ABI_SUFFIX "boost/config/abi/msvc_suffix.hpp"
+#endif
+
+# if _MSC_VER == 1200
+# define BOOST_COMPILER_VERSION 6.0
+# elif _MSC_VER == 1300
+# define BOOST_COMPILER_VERSION 7.0
+# elif _MSC_VER == 1310
+# define BOOST_COMPILER_VERSION 7.1
+# elif _MSC_VER == 1400
+# define BOOST_COMPILER_VERSION 8.0
+# else
+# define BOOST_COMPILER_VERSION _MSC_VER
+# endif
+
+#define BOOST_COMPILER "Microsoft Visual C++ version " BOOST_STRINGIZE(BOOST_COMPILER_VERSION)
+
+//
+// versions check:
+// we don't support Visual C++ prior to version 6:
+#if _MSC_VER < 1200
+#error "Compiler not supported or configured - please reconfigure"
+#endif
+//
+// last known and checked version is 1310:
+#if (_MSC_VER > 1400)
+# if defined(BOOST_ASSERT_CONFIG)
+# error "Unknown compiler version - please run the configure tests and report the results"
+# else
+# pragma message("Unknown compiler version - please run the configure tests and report the results")
+# endif
+#endif
diff --git a/indexlib/boost-compat/config/platform/aix.hpp b/indexlib/boost-compat/config/platform/aix.hpp
new file mode 100644
index 000000000..894ef42ce
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/aix.hpp
@@ -0,0 +1,33 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// IBM/Aix specific config options:
+
+#define BOOST_PLATFORM "IBM Aix"
+
+#define BOOST_HAS_UNISTD_H
+#define BOOST_HAS_NL_TYPES_H
+#define BOOST_HAS_NANOSLEEP
+#define BOOST_HAS_CLOCK_GETTIME
+
+// This needs support in "boost/cstdint.hpp" exactly like FreeBSD.
+// This platform has header named <inttypes.h> which includes all
+// the things needed.
+#define BOOST_HAS_STDINT_H
+
+// Threading API's:
+#define BOOST_HAS_PTHREADS
+#define BOOST_HAS_PTHREAD_DELAY_NP
+#define BOOST_HAS_SCHED_YIELD
+//#define BOOST_HAS_PTHREAD_YIELD
+
+// boilerplate code:
+#include <boost/config/posix_features.hpp>
+
+
+
+
diff --git a/indexlib/boost-compat/config/platform/amigaos.hpp b/indexlib/boost-compat/config/platform/amigaos.hpp
new file mode 100644
index 000000000..34bcf4128
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/amigaos.hpp
@@ -0,0 +1,15 @@
+// (C) Copyright John Maddock 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+#define BOOST_PLATFORM "AmigaOS"
+
+#define BOOST_DISABLE_THREADS
+#define BOOST_NO_CWCHAR
+#define BOOST_NO_STD_WSTRING
+#define BOOST_NO_INTRINSIC_WCHAR_T
+
+
diff --git a/indexlib/boost-compat/config/platform/beos.hpp b/indexlib/boost-compat/config/platform/beos.hpp
new file mode 100644
index 000000000..48c3d8dc5
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/beos.hpp
@@ -0,0 +1,26 @@
+// (C) Copyright John Maddock 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// BeOS specific config options:
+
+#define BOOST_PLATFORM "BeOS"
+
+#define BOOST_NO_CWCHAR
+#define BOOST_NO_CWCTYPE
+#define BOOST_HAS_UNISTD_H
+
+#define BOOST_HAS_BETHREADS
+
+#ifndef BOOST_DISABLE_THREADS
+# define BOOST_HAS_THREADS
+#endif
+
+// boilerplate code:
+#include <boost/config/posix_features.hpp>
+
+
+
diff --git a/indexlib/boost-compat/config/platform/bsd.hpp b/indexlib/boost-compat/config/platform/bsd.hpp
new file mode 100644
index 000000000..4f04ed2a0
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/bsd.hpp
@@ -0,0 +1,70 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001.
+// (C) Copyright Douglas Gregor 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// generic BSD config options:
+
+#if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
+#error "This platform is not BSD"
+#endif
+
+#ifdef __FreeBSD__
+#define BOOST_PLATFORM "FreeBSD " BOOST_STRINGIZE(__FreeBSD__)
+#elif defined(__NetBSD__)
+#define BOOST_PLATFORM "NetBSD " BOOST_STRINGIZE(__NetBSD__)
+#elif defined(__OpenBSD__)
+#define BOOST_PLATFORM "OpenBSD " BOOST_STRINGIZE(__OpenBSD__)
+#endif
+
+//
+// is this the correct version check?
+// FreeBSD has <nl_types.h> but does not
+// advertise the fact in <unistd.h>:
+//
+#if defined(__FreeBSD__) && (__FreeBSD__ >= 3)
+# define BOOST_HAS_NL_TYPES_H
+#endif
+
+//
+// FreeBSD 3.x has pthreads support, but defines _POSIX_THREADS in <pthread.h>
+// and not in <unistd.h>
+//
+#if defined(__FreeBSD__) && (__FreeBSD__ <= 3)
+# define BOOST_HAS_PTHREADS
+#endif
+
+//
+// No wide character support in the BSD header files:
+//
+#define BOOST_NO_CWCHAR
+
+//
+// The BSD <ctype.h> has macros only, no functions:
+//
+#if !defined(__OpenBSD__)
+# define BOOST_NO_CTYPE_FUNCTIONS
+#endif
+
+//
+// thread API's not auto detected:
+//
+#define BOOST_HAS_SCHED_YIELD
+#define BOOST_HAS_NANOSLEEP
+#define BOOST_HAS_GETTIMEOFDAY
+#define BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+#define BOOST_HAS_SIGACTION
+
+// boilerplate code:
+#define BOOST_HAS_UNISTD_H
+#include <boost/config/posix_features.hpp>
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/platform/cygwin.hpp b/indexlib/boost-compat/config/platform/cygwin.hpp
new file mode 100644
index 000000000..0fd2ebe2d
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/cygwin.hpp
@@ -0,0 +1,48 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// cygwin specific config options:
+
+#define BOOST_PLATFORM "Cygwin"
+#define BOOST_NO_CWCTYPE
+#define BOOST_NO_CWCHAR
+#define BOOST_NO_SWPRINTF
+#define BOOST_HAS_DIRENT_H
+
+//
+// Threading API:
+// See if we have POSIX threads, if we do use them, otherwise
+// revert to native Win threads.
+#define BOOST_HAS_UNISTD_H
+#include <unistd.h>
+#if defined(_POSIX_THREADS) && (_POSIX_THREADS+0 >= 0) && !defined(BOOST_HAS_WINTHREADS)
+# define BOOST_HAS_PTHREADS
+# define BOOST_HAS_SCHED_YIELD
+# define BOOST_HAS_GETTIMEOFDAY
+# define BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+# define BOOST_HAS_SIGACTION
+#else
+# if !defined(BOOST_HAS_WINTHREADS)
+# define BOOST_HAS_WINTHREADS
+# endif
+# define BOOST_HAS_FTIME
+#endif
+
+//
+// find out if we have a stdint.h, there should be a better way to do this:
+//
+#include <sys/types.h>
+#ifdef _STDINT_H
+#define BOOST_HAS_STDINT_H
+#endif
+
+// boilerplate code:
+#include <boost/config/posix_features.hpp>
+
+
+
+
diff --git a/indexlib/boost-compat/config/platform/hpux.hpp b/indexlib/boost-compat/config/platform/hpux.hpp
new file mode 100644
index 000000000..21049059e
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/hpux.hpp
@@ -0,0 +1,62 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001 - 2003.
+// (C) Copyright David Abrahams 2002.
+// (C) Copyright Toon Knapen 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// hpux specific config options:
+
+#define BOOST_PLATFORM "HP-UX"
+
+// In principle, HP-UX has a nice <stdint.h> under the name <inttypes.h>
+// However, it has the following problem:
+// Use of UINT32_C(0) results in "0u l" for the preprocessed source
+// (verifyable with gcc 2.95.3, assumed for HP aCC)
+// #define BOOST_HAS_STDINT_H
+
+#define BOOST_NO_SWPRINTF
+#define BOOST_NO_CWCTYPE
+
+#ifdef __GNUC__
+ // GNU C on HP-UX does not support threads (checked up to gcc 3.3)
+# define BOOST_DISABLE_THREADS
+#endif
+
+// boilerplate code:
+#define BOOST_HAS_UNISTD_H
+#include <boost/config/posix_features.hpp>
+
+// the following are always available:
+#ifndef BOOST_HAS_GETTIMEOFDAY
+# define BOOST_HAS_GETTIMEOFDAY
+#endif
+#ifndef BOOST_HAS_SCHED_YIELD
+# define BOOST_HAS_SCHED_YIELD
+#endif
+#ifndef BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+# define BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+#endif
+#ifndef BOOST_HAS_NL_TYPES_H
+# define BOOST_HAS_NL_TYPES_H
+#endif
+#ifndef BOOST_HAS_NANOSLEEP
+# define BOOST_HAS_NANOSLEEP
+#endif
+#ifndef BOOST_HAS_GETTIMEOFDAY
+# define BOOST_HAS_GETTIMEOFDAY
+#endif
+#ifndef BOOST_HAS_DIRENT_H
+# define BOOST_HAS_DIRENT_H
+#endif
+#ifndef BOOST_HAS_CLOCK_GETTIME
+# define BOOST_HAS_CLOCK_GETTIME
+#endif
+#ifndef BOOST_HAS_SIGACTION
+# define BOOST_HAS_SIGACTION
+#endif
+
+
diff --git a/indexlib/boost-compat/config/platform/irix.hpp b/indexlib/boost-compat/config/platform/irix.hpp
new file mode 100644
index 000000000..aeae49c8b
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/irix.hpp
@@ -0,0 +1,31 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+
+// See http://www.boost.org for most recent version.
+
+// SGI Irix specific config options:
+
+#define BOOST_PLATFORM "SGI Irix"
+
+#define BOOST_NO_SWPRINTF
+//
+// these are not auto detected by POSIX feature tests:
+//
+#define BOOST_HAS_GETTIMEOFDAY
+#define BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+
+#ifdef __GNUC__
+ // GNU C on IRIX does not support threads (checked up to gcc 3.3)
+# define BOOST_DISABLE_THREADS
+#endif
+
+// boilerplate code:
+#define BOOST_HAS_UNISTD_H
+#include <boost/config/posix_features.hpp>
+
+
+
diff --git a/indexlib/boost-compat/config/platform/linux.hpp b/indexlib/boost-compat/config/platform/linux.hpp
new file mode 100644
index 000000000..51ae13347
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/linux.hpp
@@ -0,0 +1,98 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// linux specific config options:
+
+#define BOOST_PLATFORM "linux"
+
+// make sure we have __GLIBC_PREREQ if available at all
+#include <cstdlib>
+
+//
+// <stdint.h> added to glibc 2.1.1
+// We can only test for 2.1 though:
+//
+#if defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 1)))
+ // <stdint.h> defines int64_t unconditionally, but <sys/types.h> defines
+ // int64_t only if __GNUC__. Thus, assume a fully usable <stdint.h>
+ // only when using GCC.
+# if defined __GNUC__
+# define BOOST_HAS_STDINT_H
+# endif
+#endif
+
+#if defined(__LIBCOMO__)
+ //
+ // como on linux doesn't have std:: c functions:
+ // NOTE: versions of libcomo prior to beta28 have octal version numbering,
+ // e.g. version 25 is 21 (dec)
+ //
+# if __LIBCOMO_VERSION__ <= 20
+# define BOOST_NO_STDC_NAMESPACE
+# endif
+
+# if __LIBCOMO_VERSION__ <= 21
+# define BOOST_NO_SWPRINTF
+# endif
+
+#endif
+
+//
+// If glibc is past version 2 then we definitely have
+// gettimeofday, earlier versions may or may not have it:
+//
+#if defined(__GLIBC__) && (__GLIBC__ >= 2)
+# define BOOST_HAS_GETTIMEOFDAY
+#endif
+
+#ifdef __USE_POSIX199309
+# define BOOST_HAS_NANOSLEEP
+#endif
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+// __GLIBC_PREREQ is available since 2.1.2
+
+ // swprintf is available since glibc 2.2.0
+# if !__GLIBC_PREREQ(2,2) || (!defined(__USE_ISOC99) && !defined(__USE_UNIX98))
+# define BOOST_NO_SWPRINTF
+# endif
+#else
+# define BOOST_NO_SWPRINTF
+#endif
+
+// boilerplate code:
+#define BOOST_HAS_UNISTD_H
+#include <boost/config/posix_features.hpp>
+
+#ifndef __GNUC__
+//
+// if the compiler is not gcc we still need to be able to parse
+// the GNU system headers, some of which (mainly <stdint.h>)
+// use GNU specific extensions:
+//
+# ifndef __extension__
+# define __extension__
+# endif
+# ifndef __const__
+# define __const__ const
+# endif
+# ifndef __volatile__
+# define __volatile__ volatile
+# endif
+# ifndef __signed__
+# define __signed__ signed
+# endif
+# ifndef __typeof__
+# define __typeof__ typeof
+# endif
+# ifndef __inline__
+# define __inline__ inline
+# endif
+#endif
+
+
diff --git a/indexlib/boost-compat/config/platform/macos.hpp b/indexlib/boost-compat/config/platform/macos.hpp
new file mode 100644
index 000000000..3a5f41303
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/macos.hpp
@@ -0,0 +1,68 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001 - 2002.
+// (C) Copyright Bill Kempf 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Mac OS specific config options:
+
+#define BOOST_PLATFORM "Mac OS"
+
+#if __MACH__ && !defined(_MSL_USING_MSL_C)
+
+// Using the Mac OS X system BSD-style C library.
+
+# ifndef BOOST_HAS_UNISTD_H
+# define BOOST_HAS_UNISTD_H
+# endif
+// boilerplate code:
+# ifndef TARGET_CARBON
+# include <boost/config/posix_features.hpp>
+# endif
+# ifndef BOOST_HAS_STDINT_H
+# define BOOST_HAS_STDINT_H
+# endif
+
+//
+// BSD runtime has pthreads, sigaction, sched_yield and gettimeofday,
+// of these only pthreads are advertised in <unistd.h>, so set the
+// other options explicitly:
+//
+# define BOOST_HAS_SCHED_YIELD
+# define BOOST_HAS_GETTIMEOFDAY
+# define BOOST_HAS_SIGACTION
+
+# if (__GNUC__ < 3) && !defined( __APPLE_CC__)
+
+// GCC strange "ignore std" mode works better if you pretend everything
+// is in the std namespace, for the most part.
+
+# define BOOST_NO_STDC_NAMESPACE
+# endif
+
+#else
+
+// Using the MSL C library.
+
+// We will eventually support threads in non-Carbon builds, but we do
+// not support this yet.
+# if TARGET_CARBON
+
+# define BOOST_HAS_MPTASKS
+
+// The MP task implementation of Boost Threads aims to replace MP-unsafe
+// parts of the MSL, so we turn on threads unconditionally.
+# define BOOST_HAS_THREADS
+
+// The remote call manager depends on this.
+# define BOOST_BIND_ENABLE_PASCAL
+
+# endif
+
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/platform/solaris.hpp b/indexlib/boost-compat/config/platform/solaris.hpp
new file mode 100644
index 000000000..700dc3ce6
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/solaris.hpp
@@ -0,0 +1,21 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// sun specific config options:
+
+#define BOOST_PLATFORM "Sun Solaris"
+
+#define BOOST_HAS_GETTIMEOFDAY
+
+// boilerplate code:
+#define BOOST_HAS_UNISTD_H
+#include <boost/config/posix_features.hpp>
+
+
+
+
diff --git a/indexlib/boost-compat/config/platform/win32.hpp b/indexlib/boost-compat/config/platform/win32.hpp
new file mode 100644
index 000000000..548bff280
--- /dev/null
+++ b/indexlib/boost-compat/config/platform/win32.hpp
@@ -0,0 +1,50 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Bill Kempf 2001.
+// (C) Copyright Aleksey Gurtovoy 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Win32 specific config options:
+
+#define BOOST_PLATFORM "Win32"
+
+#if defined(__GNUC__) && !defined(BOOST_NO_SWPRINTF)
+# define BOOST_NO_SWPRINTF
+#endif
+
+#if !defined(__GNUC__) && !defined(BOOST_HAS_DECLSPEC)
+# define BOOST_HAS_DECLSPEC
+#endif
+
+#if defined(__MINGW32__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)))
+# define BOOST_HAS_STDINT_H
+# define __STDC_LIMIT_MACROS
+#endif
+
+//
+// Win32 will normally be using native Win32 threads,
+// but there is a pthread library avaliable as an option,
+// we used to disable this when BOOST_DISABLE_WIN32 was
+// defined but no longer - this should allow some
+// files to be compiled in strict mode - while maintaining
+// a consistent setting of BOOST_HAS_THREADS across
+// all translation units (needed for shared_ptr etc).
+//
+
+#ifdef _WIN32_WCE
+# define BOOST_NO_ANSI_APIS
+#endif
+
+#ifndef BOOST_HAS_PTHREADS
+# define BOOST_HAS_WINTHREADS
+#endif
+
+#ifndef BOOST_DISABLE_WIN32
+// WEK: Added
+#define BOOST_HAS_FTIME
+#define BOOST_WINDOWS 1
+
+#endif
diff --git a/indexlib/boost-compat/config/posix_features.hpp b/indexlib/boost-compat/config/posix_features.hpp
new file mode 100644
index 000000000..4afb476b7
--- /dev/null
+++ b/indexlib/boost-compat/config/posix_features.hpp
@@ -0,0 +1,87 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+
+// See http://www.boost.org for most recent version.
+
+// All POSIX feature tests go in this file,
+// Note that we test _POSIX_C_SOURCE and _XOPEN_SOURCE as well
+// _POSIX_VERSION and _XOPEN_VERSION: on some systems POSIX API's
+// may be present but none-functional unless _POSIX_C_SOURCE and
+// _XOPEN_SOURCE have been defined to the right value (it's up
+// to the user to do this *before* including any header, although
+// in most cases the compiler will do this for you).
+
+# if defined(BOOST_HAS_UNISTD_H)
+# include <unistd.h>
+
+ // XOpen has <nl_types.h>, but is this the correct version check?
+# if defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 3)
+# define BOOST_HAS_NL_TYPES_H
+# endif
+
+ // POSIX version 6 requires <stdint.h>
+# if defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200100)
+# define BOOST_HAS_STDINT_H
+# endif
+
+ // POSIX version 2 requires <dirent.h>
+# if defined(_POSIX_VERSION) && (_POSIX_VERSION >= 199009L)
+# define BOOST_HAS_DIRENT_H
+# endif
+
+ // POSIX version 3 requires <signal.h> to have sigaction:
+# if defined(_POSIX_VERSION) && (_POSIX_VERSION >= 199506L)
+# define BOOST_HAS_SIGACTION
+# endif
+ // POSIX defines _POSIX_THREADS > 0 for pthread support,
+ // however some platforms define _POSIX_THREADS without
+ // a value, hence the (_POSIX_THREADS+0 >= 0) check.
+ // Strictly speaking this may catch platforms with a
+ // non-functioning stub <pthreads.h>, but such occurrences should
+ // occur very rarely if at all.
+# if defined(_POSIX_THREADS) && (_POSIX_THREADS+0 >= 0) && !defined(BOOST_HAS_WINTHREADS) && !defined(BOOST_HAS_MPTASKS)
+# define BOOST_HAS_PTHREADS
+# endif
+
+ // BOOST_HAS_NANOSLEEP:
+ // This is predicated on _POSIX_TIMERS or _XOPEN_REALTIME:
+# if (defined(_POSIX_TIMERS) && (_POSIX_TIMERS+0 >= 0)) \
+ || (defined(_XOPEN_REALTIME) && (_XOPEN_REALTIME+0 >= 0))
+# define BOOST_HAS_NANOSLEEP
+# endif
+
+ // BOOST_HAS_CLOCK_GETTIME:
+ // This is predicated on _POSIX_TIMERS (also on _XOPEN_REALTIME
+ // but at least one platform - linux - defines that flag without
+ // defining clock_gettime):
+# if (defined(_POSIX_TIMERS) && (_POSIX_TIMERS+0 >= 0))
+# define BOOST_HAS_CLOCK_GETTIME
+# endif
+
+ // BOOST_HAS_SCHED_YIELD:
+ // This is predicated on _POSIX_PRIORITY_SCHEDULING or
+ // on _POSIX_THREAD_PRIORITY_SCHEDULING or on _XOPEN_REALTIME.
+# if defined(_POSIX_PRIORITY_SCHEDULING) && (_POSIX_PRIORITY_SCHEDULING+0 > 0)\
+ || (defined(_POSIX_THREAD_PRIORITY_SCHEDULING) && (_POSIX_THREAD_PRIORITY_SCHEDULING+0 > 0))\
+ || (defined(_XOPEN_REALTIME) && (_XOPEN_REALTIME+0 >= 0))
+# define BOOST_HAS_SCHED_YIELD
+# endif
+
+ // BOOST_HAS_GETTIMEOFDAY:
+ // BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE:
+ // These are predicated on _XOPEN_VERSION, and appears to be first released
+ // in issue 4, version 2 (_XOPEN_VERSION > 500).
+# if defined(_XOPEN_VERSION) && (_XOPEN_VERSION+0 >= 500)
+# define BOOST_HAS_GETTIMEOFDAY
+# if defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE+0 >= 500)
+# define BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+# endif
+# endif
+
+# endif
+
+
+
diff --git a/indexlib/boost-compat/config/requires_threads.hpp b/indexlib/boost-compat/config/requires_threads.hpp
new file mode 100644
index 000000000..cfaff2302
--- /dev/null
+++ b/indexlib/boost-compat/config/requires_threads.hpp
@@ -0,0 +1,92 @@
+// (C) Copyright John Maddock 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+
+#ifndef BOOST_CONFIG_REQUIRES_THREADS_HPP
+#define BOOST_CONFIG_REQUIRES_THREADS_HPP
+
+#ifndef BOOST_CONFIG_HPP
+# include <boost/config.hpp>
+#endif
+
+#if defined(BOOST_DISABLE_THREADS)
+
+//
+// special case to handle versions of gcc which don't currently support threads:
+//
+#if defined(__GNUC__) && ((__GNUC__ < 3) || (__GNUC_MINOR__ <= 3) || !defined(BOOST_STRICT_CONFIG))
+//
+// this is checked up to gcc 3.3:
+//
+#if defined(__sgi) || defined(__hpux)
+# error "Multi-threaded programs are not supported by gcc on HPUX or Irix (last checked with gcc 3.3)"
+#endif
+
+#endif
+
+# error "Threading support unavaliable: it has been explicitly disabled with BOOST_DISABLE_THREADS"
+
+#elif !defined(BOOST_HAS_THREADS)
+
+# if defined __COMO__
+// Comeau C++
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -D_MT (Windows) or -D_REENTRANT (Unix)"
+
+#elif defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC) || defined(__ECC)
+// Intel
+#ifdef _WIN32
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: either /MT /MTd /MD or /MDd"
+#else
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -openmp"
+#endif
+
+# elif defined __GNUC__
+// GNU C++:
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)"
+
+#elif defined __sgi
+// SGI MIPSpro C++
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -D_SGI_MP_SOURCE"
+
+#elif defined __DECCXX
+// Compaq Tru64 Unix cxx
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -pthread"
+
+#elif defined __BORLANDC__
+// Borland
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -tWM"
+
+#elif defined __MWERKS__
+// Metrowerks CodeWarrior
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: either -runtime sm, -runtime smd, -runtime dm, or -runtime dmd"
+
+#elif defined __SUNPRO_CC
+// Sun Workshop Compiler C++
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -mt"
+
+#elif defined __HP_aCC
+// HP aCC
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: -mt"
+
+#elif defined(__IBMCPP__)
+// IBM Visual Age
+# error "Compiler threading support is not turned on. Please compile the code with the xlC_r compiler"
+
+#elif defined _MSC_VER
+// Microsoft Visual C++
+//
+// Must remain the last #elif since some other vendors (Metrowerks, for
+// example) also #define _MSC_VER
+# error "Compiler threading support is not turned on. Please set the correct command line options for threading: either /MT /MTd /MD or /MDd"
+
+#else
+
+# error "Compiler threading support is not turned on. Please consult your compiler's documentation for the appropriate options to use"
+
+#endif // compilers
+
+#endif // BOOST_HAS_THREADS
+
+#endif // BOOST_CONFIG_REQUIRES_THREADS_HPP
diff --git a/indexlib/boost-compat/config/select_compiler_config.hpp b/indexlib/boost-compat/config/select_compiler_config.hpp
new file mode 100644
index 000000000..3453f1a35
--- /dev/null
+++ b/indexlib/boost-compat/config/select_compiler_config.hpp
@@ -0,0 +1,83 @@
+// Boost compiler configuration selection header file
+
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Martin Wille 2003.
+// (C) Copyright Guillaume Melquiond 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// locate which compiler we are using and define
+// BOOST_COMPILER_CONFIG as needed:
+
+# if defined __COMO__
+// Comeau C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/comeau.hpp"
+
+#elif defined __DMC__
+// Digital Mars C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/digitalmars.hpp"
+
+#elif defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC) || defined(__ECC)
+// Intel
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/intel.hpp"
+
+# elif defined __GNUC__
+// GNU C++:
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/gcc.hpp"
+
+#elif defined __KCC
+// Kai C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/kai.hpp"
+
+#elif defined __sgi
+// SGI MIPSpro C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/sgi_mipspro.hpp"
+
+#elif defined __DECCXX
+// Compaq Tru64 Unix cxx
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/compaq_cxx.hpp"
+
+#elif defined __ghs
+// Greenhills C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/greenhills.hpp"
+
+#elif defined __BORLANDC__
+// Borland
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/borland.hpp"
+
+#elif defined __MWERKS__
+// Metrowerks CodeWarrior
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/metrowerks.hpp"
+
+#elif defined __SUNPRO_CC
+// Sun Workshop Compiler C++
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/sunpro_cc.hpp"
+
+#elif defined __HP_aCC
+// HP aCC
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/hp_acc.hpp"
+
+#elif defined(__MRC__) || defined(__SC__)
+// MPW MrCpp or SCpp
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/mpw.hpp"
+
+#elif defined(__IBMCPP__)
+// IBM Visual Age
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/vacpp.hpp"
+
+#elif defined _MSC_VER
+// Microsoft Visual C++
+//
+// Must remain the last #elif since some other vendors (Metrowerks, for
+// example) also #define _MSC_VER
+# define BOOST_COMPILER_CONFIG "boost/config/compiler/visualc.hpp"
+
+#elif defined (BOOST_ASSERT_CONFIG)
+// this must come last - generate an error if we don't
+// recognise the compiler:
+# error "Unknown compiler - please configure (http://www.boost.org/libs/config/config.htm#configuring) and report the results to the main boost mailing list (http://www.boost.org/more/mailing_lists.htm#main)"
+
+#endif
diff --git a/indexlib/boost-compat/config/select_platform_config.hpp b/indexlib/boost-compat/config/select_platform_config.hpp
new file mode 100644
index 000000000..5699b2a2a
--- /dev/null
+++ b/indexlib/boost-compat/config/select_platform_config.hpp
@@ -0,0 +1,86 @@
+// Boost compiler configuration selection header file
+
+// (C) Copyright John Maddock 2001 - 2002.
+// (C) Copyright Jens Maurer 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// locate which platform we are on and define BOOST_PLATFORM_CONFIG as needed.
+// Note that we define the headers to include using "header_name" not
+// <header_name> in order to prevent macro expansion within the header
+// name (for example "linux" is a macro on linux systems).
+
+#if defined(linux) || defined(__linux) || defined(__linux__)
+// linux:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/linux.hpp"
+
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+// BSD:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/bsd.hpp"
+
+#elif defined(sun) || defined(__sun)
+// solaris:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/solaris.hpp"
+
+#elif defined(__sgi)
+// SGI Irix:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/irix.hpp"
+
+#elif defined(__hpux)
+// hp unix:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/hpux.hpp"
+
+#elif defined(__CYGWIN__)
+// cygwin is not win32:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/cygwin.hpp"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+// win32:
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/win32.hpp"
+
+#elif defined(__BEOS__)
+// BeOS
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/beos.hpp"
+
+#elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__)
+// MacOS
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/macos.hpp"
+
+#elif defined(__IBMCPP__)
+// IBM
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/aix.hpp"
+
+#elif defined(__amigaos__)
+// AmigaOS
+# define BOOST_PLATFORM_CONFIG "boost/config/platform/amigaos.hpp"
+
+#else
+
+# if defined(unix) \
+ || defined(__unix) \
+ || defined(_XOPEN_SOURCE) \
+ || defined(_POSIX_SOURCE)
+
+ // generic unix platform:
+
+# ifndef BOOST_HAS_UNISTD_H
+# define BOOST_HAS_UNISTD_H
+# endif
+
+# include <boost/config/posix_features.hpp>
+
+# endif
+
+# if defined (BOOST_ASSERT_CONFIG)
+ // this must come last - generate an error if we don't
+ // recognise the platform:
+# error "Unknown platform - please configure and report the results to boost.org"
+# endif
+
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/select_stdlib_config.hpp b/indexlib/boost-compat/config/select_stdlib_config.hpp
new file mode 100644
index 000000000..b7bf59143
--- /dev/null
+++ b/indexlib/boost-compat/config/select_stdlib_config.hpp
@@ -0,0 +1,68 @@
+// Boost compiler configuration selection header file
+
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001 - 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+
+// See http://www.boost.org for most recent version.
+
+// locate which std lib we are using and define BOOST_STDLIB_CONFIG as needed:
+
+// we need to include a std lib header here in order to detect which
+// library is in use, use <utility> as it's about the smallest
+// of the std lib headers - do not rely on this header being included -
+// users can short-circuit this header if they know whose std lib
+// they are using.
+
+#include <utility>
+
+#if defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)
+// STLPort library; this _must_ come first, otherwise since
+// STLport typically sits on top of some other library, we
+// can end up detecting that first rather than STLport:
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/stlport.hpp"
+
+#elif defined(__LIBCOMO__)
+// Comeau STL:
+#define BOOST_STDLIB_CONFIG "boost/config/stdlib/libcomo.hpp"
+
+#elif defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER)
+// Rogue Wave library:
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/roguewave.hpp"
+
+#elif defined(__GLIBCPP__) || defined(__GLIBCXX__)
+// GNU libstdc++ 3
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/libstdcpp3.hpp"
+
+#elif defined(__STL_CONFIG_H)
+// generic SGI STL
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/sgi.hpp"
+
+#elif defined(__MSL_CPP__)
+// MSL standard lib:
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/msl.hpp"
+
+#elif defined(__IBMCPP__)
+// take the default VACPP std lib
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/vacpp.hpp"
+
+#elif defined(MSIPL_COMPILE_H)
+// Modena C++ standard library
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/modena.hpp"
+
+#elif (defined(_YVALS) && !defined(__IBMCPP__)) || defined(_CPPLIB_VER)
+// Dinkumware Library (this has to appear after any possible replacement libraries):
+# define BOOST_STDLIB_CONFIG "boost/config/stdlib/dinkumware.hpp"
+
+#elif defined (BOOST_ASSERT_CONFIG)
+// this must come last - generate an error if we don't
+// recognise the library:
+# error "Unknown standard library - please configure and report the results to boost.org"
+
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/dinkumware.hpp b/indexlib/boost-compat/config/stdlib/dinkumware.hpp
new file mode 100644
index 000000000..aa214fc1b
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/dinkumware.hpp
@@ -0,0 +1,106 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright David Abrahams 2002.
+// (C) Copyright Guillaume Melquiond 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Dinkumware standard library config:
+
+#if !defined(_YVALS) && !defined(_CPPLIB_VER)
+#include <utility>
+#if !defined(_YVALS) && !defined(_CPPLIB_VER)
+#error This is not the Dinkumware lib!
+#endif
+#endif
+
+
+#if defined(_CPPLIB_VER) && (_CPPLIB_VER >= 306)
+ // full dinkumware 3.06 and above
+ // fully conforming provided the compiler supports it:
+# if !(defined(_GLOBAL_USING) && (_GLOBAL_USING+0 > 0)) && !defined(__BORLANDC__) && !defined(_STD) && !(defined(__ICC) && (__ICC >= 700)) // can be defined in yvals.h
+# define BOOST_NO_STDC_NAMESPACE
+# endif
+# if !(defined(_HAS_MEMBER_TEMPLATES_REBIND) && (_HAS_MEMBER_TEMPLATES_REBIND+0 > 0)) && !(defined(_MSC_VER) && (_MSC_VER > 1300)) && defined(BOOST_MSVC)
+# define BOOST_NO_STD_ALLOCATOR
+# endif
+# define BOOST_HAS_PARTIAL_STD_ALLOCATOR
+# if defined(BOOST_MSVC) && (BOOST_MSVC < 1300)
+ // if this lib version is set up for vc6 then there is no std::use_facet:
+# define BOOST_NO_STD_USE_FACET
+# define BOOST_HAS_TWO_ARG_USE_FACET
+ // C lib functions aren't in namespace std either:
+# define BOOST_NO_STDC_NAMESPACE
+ // and nor is <exception>
+# define BOOST_NO_EXCEPTION_STD_NAMESPACE
+# endif
+// There's no numeric_limits<long long> support unless _LONGLONG is defined:
+# if !defined(_LONGLONG) && (_CPPLIB_VER <= 310)
+# define BOOST_NO_MS_INT64_NUMERIC_LIMITS
+# endif
+// 3.06 appears to have (non-sgi versions of) <hash_set> & <hash_map>,
+// and no <slist> at all
+#else
+# define BOOST_MSVC_STD_ITERATOR 1
+# define BOOST_NO_STD_ITERATOR
+# define BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
+# define BOOST_NO_STD_ALLOCATOR
+# define BOOST_NO_STDC_NAMESPACE
+# define BOOST_NO_STD_USE_FACET
+# define BOOST_NO_STD_OUTPUT_ITERATOR_ASSIGN
+# define BOOST_HAS_MACRO_USE_FACET
+# ifndef _CPPLIB_VER
+ // Updated Dinkum library defines this, and provides
+ // its own min and max definitions.
+# define BOOST_NO_STD_MIN_MAX
+# define BOOST_NO_MS_INT64_NUMERIC_LIMITS
+# endif
+#endif
+
+//
+// std extension namespace is stdext for vc7.1 and later,
+// the same applies to other compilers that sit on top
+// of vc7.1 (Intel and Comeau):
+//
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) && !defined(__BORLANDC__)
+# define BOOST_STD_EXTENSION_NAMESPACE stdext
+#endif
+
+
+#if (defined(_MSC_VER) && (_MSC_VER <= 1300) && !defined(__BORLANDC__)) || !defined(_CPPLIB_VER) || (_CPPLIB_VER < 306)
+ // if we're using a dinkum lib that's
+ // been configured for VC6/7 then there is
+ // no iterator traits (true even for icl)
+# define BOOST_NO_STD_ITERATOR_TRAITS
+#endif
+
+#if defined(__ICL) && (__ICL < 800) && defined(_CPPLIB_VER) && (_CPPLIB_VER <= 310)
+// Intel C++ chokes over any non-trivial use of <locale>
+// this may be an overly restrictive define, but regex fails without it:
+# define BOOST_NO_STD_LOCALE
+#endif
+
+#ifdef _CPPLIB_VER
+# define BOOST_DINKUMWARE_STDLIB _CPPLIB_VER
+#else
+# define BOOST_DINKUMWARE_STDLIB 1
+#endif
+
+#ifdef _CPPLIB_VER
+# define BOOST_STDLIB "Dinkumware standard library version " BOOST_STRINGIZE(_CPPLIB_VER)
+#else
+# define BOOST_STDLIB "Dinkumware standard library version 1.x"
+#endif
+
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/libcomo.hpp b/indexlib/boost-compat/config/stdlib/libcomo.hpp
new file mode 100644
index 000000000..b2c8e4400
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/libcomo.hpp
@@ -0,0 +1,46 @@
+// (C) Copyright John Maddock 2002 - 2003.
+// (C) Copyright Jens Maurer 2002 - 2003.
+// (C) Copyright Beman Dawes 2002 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Comeau STL:
+
+#if !defined(__LIBCOMO__)
+# include <utility>
+# if !defined(__LIBCOMO__)
+# error "This is not the Comeau STL!"
+# endif
+#endif
+
+//
+// std::streambuf<wchar_t> is non-standard
+// NOTE: versions of libcomo prior to beta28 have octal version numbering,
+// e.g. version 25 is 21 (dec)
+#if __LIBCOMO_VERSION__ <= 22
+# define BOOST_NO_STD_WSTREAMBUF
+#endif
+
+#if (__LIBCOMO_VERSION__ <= 31) && defined(_WIN32)
+#define BOOST_NO_SWPRINTF
+#endif
+
+#if __LIBCOMO_VERSION__ >= 31
+# define BOOST_HAS_HASH
+# define BOOST_HAS_SLIST
+#endif
+
+//
+// Intrinsic type_traits support.
+// The SGI STL has it's own __type_traits class, which
+// has intrinsic compiler support with SGI's compilers.
+// Whatever map SGI style type traits to boost equivalents:
+//
+#define BOOST_HAS_SGI_TYPE_TRAITS
+
+#define BOOST_STDLIB "Comeau standard library " BOOST_STRINGIZE(__LIBCOMO_VERSION__)
+
+
diff --git a/indexlib/boost-compat/config/stdlib/libstdcpp3.hpp b/indexlib/boost-compat/config/stdlib/libstdcpp3.hpp
new file mode 100644
index 000000000..9774e8761
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/libstdcpp3.hpp
@@ -0,0 +1,51 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Jens Maurer 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// config for libstdc++ v3
+// not much to go in here:
+
+#ifdef __GLIBCXX__
+#define BOOST_STDLIB "GNU libstdc++ version " BOOST_STRINGIZE(__GLIBCXX__)
+#else
+#define BOOST_STDLIB "GNU libstdc++ version " BOOST_STRINGIZE(__GLIBCPP__)
+#endif
+
+#if !defined(_GLIBCPP_USE_WCHAR_T) && !defined(_GLIBCXX_USE_WCHAR_T)
+# define BOOST_NO_CWCHAR
+# define BOOST_NO_CWCTYPE
+# define BOOST_NO_STD_WSTRING
+# define BOOST_NO_STD_WSTREAMBUF
+#endif
+
+#if defined(__osf__) && !defined(_REENTRANT) && defined(_GLIBCXX_HAVE_GTHR_DEFAULT)
+// GCC 3.4 on Tru64 forces the definition of _REENTRANT when any std lib header
+// file is included, therefore for consistency we define it here as well.
+# define _REENTRANT
+#endif
+
+#ifdef __GLIBCXX__ // gcc 3.4 and greater:
+# ifdef _GLIBCXX_HAVE_GTHR_DEFAULT
+ //
+ // If the std lib has thread support turned on, then turn it on in Boost
+ // as well. We do this because some gcc-3.4 std lib headers define _REENTANT
+ // while others do not...
+ //
+# define BOOST_HAS_THREADS
+# else
+# define BOOST_DISABLE_THREADS
+# endif
+#endif
+
+
+#if !defined(_GLIBCPP_USE_LONG_LONG) \
+ && !defined(_GLIBCXX_USE_LONG_LONG)\
+ && defined(BOOST_HAS_LONG_LONG)
+// May have been set by compiler/*.hpp, but "long long" without library
+// support is useless.
+# undef BOOST_HAS_LONG_LONG
+#endif
diff --git a/indexlib/boost-compat/config/stdlib/modena.hpp b/indexlib/boost-compat/config/stdlib/modena.hpp
new file mode 100644
index 000000000..61e31b7d1
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/modena.hpp
@@ -0,0 +1,30 @@
+// (C) Copyright Jens Maurer 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Modena C++ standard library (comes with KAI C++)
+
+#if !defined(MSIPL_COMPILE_H)
+# include <utility>
+# if !defined(__MSIPL_COMPILE_H)
+# error "This is not the Modena C++ library!"
+# endif
+#endif
+
+#ifndef MSIPL_NL_TYPES
+#define BOOST_NO_STD_MESSAGES
+#endif
+
+#ifndef MSIPL_WCHART
+#define BOOST_NO_STD_WSTRING
+#endif
+
+#define BOOST_STDLIB "Modena C++ standard library"
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/msl.hpp b/indexlib/boost-compat/config/stdlib/msl.hpp
new file mode 100644
index 000000000..f8ad3d9ad
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/msl.hpp
@@ -0,0 +1,54 @@
+// (C) Copyright John Maddock 2001.
+// (C) Copyright Darin Adler 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Metrowerks standard library:
+
+#ifndef __MSL_CPP__
+# include <utility>
+# ifndef __MSL_CPP__
+# error This is not the MSL standard library!
+# endif
+#endif
+
+#if __MSL_CPP__ >= 0x6000 // Pro 6
+# define BOOST_HAS_HASH
+# define BOOST_STD_EXTENSION_NAMESPACE Metrowerks
+#endif
+#define BOOST_HAS_SLIST
+
+#if __MSL_CPP__ < 0x6209
+# define BOOST_NO_STD_MESSAGES
+#endif
+
+// check C lib version for <stdint.h>
+#include <cstddef>
+
+#if defined(__MSL__) && (__MSL__ >= 0x5000)
+# define BOOST_HAS_STDINT_H
+# if !defined(__PALMOS_TRAPS__)
+# define BOOST_HAS_UNISTD_H
+# endif
+ // boilerplate code:
+# include <boost/config/posix_features.hpp>
+#endif
+
+#if defined(_MWMT) || _MSL_THREADSAFE
+# define BOOST_HAS_THREADS
+#endif
+
+
+#define BOOST_STDLIB "Metrowerks Standard Library version " BOOST_STRINGIZE(__MSL_CPP__)
+
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/roguewave.hpp b/indexlib/boost-compat/config/stdlib/roguewave.hpp
new file mode 100644
index 000000000..ec3d881b5
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/roguewave.hpp
@@ -0,0 +1,123 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Jens Maurer 2001.
+// (C) Copyright David Abrahams 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Rogue Wave std lib:
+
+#if !defined(__STD_RWCOMPILER_H__) && !defined(_RWSTD_VER)
+# include <utility>
+# if !defined(__STD_RWCOMPILER_H__) && !defined(_RWSTD_VER)
+# error This is not the Rogue Wave standard library
+# endif
+#endif
+//
+// figure out a consistent version number:
+//
+#ifndef _RWSTD_VER
+# define BOOST_RWSTD_VER 0x010000
+#elif _RWSTD_VER < 0x010000
+# define BOOST_RWSTD_VER (_RWSTD_VER << 8)
+#else
+# define BOOST_RWSTD_VER _RWSTD_VER
+#endif
+
+#ifndef _RWSTD_VER
+# define BOOST_STDLIB "Rogue Wave standard library version (Unknown version)"
+#else
+# define BOOST_STDLIB "Rogue Wave standard library version " BOOST_STRINGIZE(_RWSTD_VER)
+#endif
+
+//
+// Prior to version 2.2.0 the primary template for std::numeric_limits
+// does not have compile time constants, even though specializations of that
+// template do:
+//
+#if BOOST_RWSTD_VER < 0x020200
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+#endif
+
+// Sun CC 5.5 patch 113817-07 adds long long specialization, but does not change the
+// library version number (http://sunsolve6.sun.com/search/document.do?assetkey=1-21-113817):
+#if BOOST_RWSTD_VER <= 0x020101 && (!defined(__SUNPRO_CC) || (__SUNPRO_CC < 0x550))
+# define BOOST_NO_LONG_LONG_NUMERIC_LIMITS
+# endif
+
+//
+// Borland version of numeric_limits lacks __int64 specialisation:
+//
+#ifdef __BORLANDC__
+# define BOOST_NO_MS_INT64_NUMERIC_LIMITS
+#endif
+
+//
+// No std::iterator if it can't figure out default template args:
+//
+#if defined(_RWSTD_NO_SIMPLE_DEFAULT_TEMPLATES) || defined(RWSTD_NO_SIMPLE_DEFAULT_TEMPLATES) || (BOOST_RWSTD_VER < 0x020000)
+# define BOOST_NO_STD_ITERATOR
+#endif
+
+//
+// No iterator traits without partial specialization:
+//
+#if defined(_RWSTD_NO_CLASS_PARTIAL_SPEC) || defined(RWSTD_NO_CLASS_PARTIAL_SPEC)
+# define BOOST_NO_STD_ITERATOR_TRAITS
+#endif
+
+//
+// Prior to version 2.0, std::auto_ptr was buggy, and there were no
+// new-style iostreams, and no conformant std::allocator:
+//
+#if (BOOST_RWSTD_VER < 0x020000)
+# define BOOST_NO_AUTO_PTR
+# define BOOST_NO_STRINGSTREAM
+# define BOOST_NO_STD_ALLOCATOR
+# define BOOST_NO_STD_LOCALE
+#endif
+
+//
+// No template iterator constructors without member template support:
+//
+#if defined(RWSTD_NO_MEMBER_TEMPLATES) || defined(_RWSTD_NO_MEMBER_TEMPLATES)
+# define BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
+#endif
+
+//
+// RW defines _RWSTD_ALLOCATOR if the allocator is conformant and in use
+// (the or _HPACC_ part is a hack - the library seems to define _RWSTD_ALLOCATOR
+// on HP aCC systems even though the allocator is in fact broken):
+//
+#if !defined(_RWSTD_ALLOCATOR) || (defined(__HP_aCC) && __HP_aCC <= 33100)
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+//
+// If we have a std::locale, we still may not have std::use_facet:
+//
+#if defined(_RWSTD_NO_TEMPLATE_ON_RETURN_TYPE) && !defined(BOOST_NO_STD_LOCALE)
+# define BOOST_NO_STD_USE_FACET
+# define BOOST_HAS_TWO_ARG_USE_FACET
+#endif
+
+//
+// There's no std::distance prior to version 2, or without
+// partial specialization support:
+//
+#if (BOOST_RWSTD_VER < 0x020000) || defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
+ #define BOOST_NO_STD_DISTANCE
+#endif
+
+//
+// Some versions of the rogue wave library don't have assignable
+// OutputIterators:
+//
+#if BOOST_RWSTD_VER < 0x020100
+# define BOOST_NO_STD_OUTPUT_ITERATOR_ASSIGN
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/sgi.hpp b/indexlib/boost-compat/config/stdlib/sgi.hpp
new file mode 100644
index 000000000..67f7a0a4b
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/sgi.hpp
@@ -0,0 +1,111 @@
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001.
+// (C) Copyright Jens Maurer 2001 - 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// generic SGI STL:
+
+#if !defined(__STL_CONFIG_H)
+# include <utility>
+# if !defined(__STL_CONFIG_H)
+# error "This is not the SGI STL!"
+# endif
+#endif
+
+//
+// No std::iterator traits without partial specialisation:
+//
+#if !defined(__STL_CLASS_PARTIAL_SPECIALIZATION)
+# define BOOST_NO_STD_ITERATOR_TRAITS
+#endif
+
+//
+// No std::stringstream with gcc < 3
+//
+#if defined(__GNUC__) && (__GNUC__ < 3) && \
+ ((__GNUC_MINOR__ < 95) || (__GNUC_MINOR__ == 96)) && \
+ !defined(__STL_USE_NEW_IOSTREAMS) || \
+ defined(__APPLE_CC__)
+ // Note that we only set this for GNU C++ prior to 2.95 since the
+ // latest patches for that release do contain a minimal <sstream>
+ // If you are running a 2.95 release prior to 2.95.3 then this will need
+ // setting, but there is no way to detect that automatically (other
+ // than by running the configure script).
+ // Also, the unofficial GNU C++ 2.96 included in RedHat 7.1 doesn't
+ // have <sstream>.
+# define BOOST_NO_STRINGSTREAM
+#endif
+
+//
+// Assume no std::locale without own iostreams (this may be an
+// incorrect assumption in some cases):
+//
+#if !defined(__SGI_STL_OWN_IOSTREAMS) && !defined(__STL_USE_NEW_IOSTREAMS)
+# define BOOST_NO_STD_LOCALE
+#endif
+
+//
+// Original native SGI streams have non-standard std::messages facet:
+//
+#if defined(__sgi) && (_COMPILER_VERSION <= 650) && !defined(__SGI_STL_OWN_IOSTREAMS)
+# define BOOST_NO_STD_LOCALE
+#endif
+
+//
+// SGI's new iostreams have missing "const" in messages<>::open
+//
+#if defined(__sgi) && (_COMPILER_VERSION <= 740) && defined(__STL_USE_NEW_IOSTREAMS)
+# define BOOST_NO_STD_MESSAGES
+#endif
+
+//
+// No template iterator constructors, or std::allocator
+// without member templates:
+//
+#if !defined(__STL_MEMBER_TEMPLATES)
+# define BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+//
+// We always have SGI style hash_set, hash_map, and slist:
+//
+#define BOOST_HAS_HASH
+#define BOOST_HAS_SLIST
+
+//
+// If this is GNU libstdc++2, then no <limits> and no std::wstring:
+//
+#if (defined(__GNUC__) && (__GNUC__ < 3))
+# include <string>
+# if defined(__BASTRING__)
+# define BOOST_NO_LIMITS
+// Note: <boost/limits.hpp> will provide compile-time constants
+# undef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+# define BOOST_NO_STD_WSTRING
+# endif
+#endif
+
+//
+// There is no standard iterator unless we have namespace support:
+//
+#if !defined(__STL_USE_NAMESPACES)
+# define BOOST_NO_STD_ITERATOR
+#endif
+
+//
+// Intrinsic type_traits support.
+// The SGI STL has it's own __type_traits class, which
+// has intrinsic compiler support with SGI's compilers.
+// Whatever map SGI style type traits to boost equivalents:
+//
+#define BOOST_HAS_SGI_TYPE_TRAITS
+
+#define BOOST_STDLIB "SGI standard library"
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/stlport.hpp b/indexlib/boost-compat/config/stdlib/stlport.hpp
new file mode 100644
index 000000000..4843ea59b
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/stlport.hpp
@@ -0,0 +1,201 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// (C) Copyright Darin Adler 2001.
+// (C) Copyright Jens Maurer 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// STLPort standard library config:
+
+#if !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION)
+# include <utility>
+# if !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION)
+# error "This is not STLPort!"
+# endif
+#endif
+
+//
+// __STL_STATIC_CONST_INIT_BUG implies BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+// for versions prior to 4.1(beta)
+//
+#if (defined(__STL_STATIC_CONST_INIT_BUG) || defined(_STLP_STATIC_CONST_INIT_BUG)) && (__SGI_STL_PORT <= 0x400)
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+#endif
+
+//
+// If STLport thinks that there is no partial specialisation, then there is no
+// std::iterator traits:
+//
+#if !(defined(_STLP_CLASS_PARTIAL_SPECIALIZATION) || defined(__STL_CLASS_PARTIAL_SPECIALIZATION))
+# define BOOST_NO_STD_ITERATOR_TRAITS
+#endif
+
+//
+// No new style iostreams on GCC without STLport's iostreams enabled:
+//
+#if (defined(__GNUC__) && (__GNUC__ < 3)) && !(defined(__SGI_STL_OWN_IOSTREAMS) || defined(_STLP_OWN_IOSTREAMS))
+# define BOOST_NO_STRINGSTREAM
+#endif
+
+//
+// No new iostreams implies no std::locale, and no std::stringstream:
+//
+#if defined(__STL_NO_IOSTREAMS) || defined(__STL_NO_NEW_IOSTREAMS) || defined(_STLP_NO_IOSTREAMS) || defined(_STLP_NO_NEW_IOSTREAMS)
+# define BOOST_NO_STD_LOCALE
+# define BOOST_NO_STRINGSTREAM
+#endif
+
+//
+// If the streams are not native, and we have a "using ::x" compiler bug
+// then the io stream facets are not available in namespace std::
+//
+#ifdef _STLPORT_VERSION
+# if !defined(_STLP_OWN_IOSTREAMS) && defined(_STLP_USE_NAMESPACES) && defined(BOOST_NO_USING_TEMPLATE) && !defined(__BORLANDC__)
+# define BOOST_NO_STD_LOCALE
+# endif
+#else
+# if !defined(__SGI_STL_OWN_IOSTREAMS) && defined(__STL_USE_NAMESPACES) && defined(BOOST_NO_USING_TEMPLATE) && !defined(__BORLANDC__)
+# define BOOST_NO_STD_LOCALE
+# endif
+#endif
+
+//
+// Without member template support enabled, their are no template
+// iterate constructors, and no std::allocator:
+//
+#if !(defined(__STL_MEMBER_TEMPLATES) || defined(_STLP_MEMBER_TEMPLATES))
+# define BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+//
+// however we always have at least a partial allocator:
+//
+#define BOOST_HAS_PARTIAL_STD_ALLOCATOR
+
+#if !defined(_STLP_MEMBER_TEMPLATE_CLASSES)
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+#if defined(_STLP_NO_MEMBER_TEMPLATE_KEYWORD) && defined(BOOST_MSVC) && (BOOST_MSVC <= 1300)
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+//
+// If STLport thinks there is no wchar_t at all, then we have to disable
+// the support for the relevant specilazations of std:: templates.
+//
+#if !defined(_STLP_HAS_WCHAR_T) && !defined(_STLP_WCHAR_T_IS_USHORT)
+# ifndef BOOST_NO_STD_WSTRING
+# define BOOST_NO_STD_WSTRING
+# endif
+# ifndef BOOST_NO_STD_WSTREAMBUF
+# define BOOST_NO_STD_WSTREAMBUF
+# endif
+#endif
+
+//
+// We always have SGI style hash_set, hash_map, and slist:
+//
+#define BOOST_HAS_HASH
+#define BOOST_HAS_SLIST
+
+//
+// STLport does a good job of importing names into namespace std::,
+// but doesn't always get them all, define BOOST_NO_STDC_NAMESPACE, since our
+// workaround does not conflict with STLports:
+//
+//
+// Harold Howe says:
+// Borland switched to STLport in BCB6. Defining BOOST_NO_STDC_NAMESPACE with
+// BCB6 does cause problems. If we detect C++ Builder, then don't define
+// BOOST_NO_STDC_NAMESPACE
+//
+#if !defined(__BORLANDC__) && !defined(__DMC__)
+//
+// If STLport is using it's own namespace, and the real names are in
+// the global namespace, then we duplicate STLport's using declarations
+// (by defining BOOST_NO_STDC_NAMESPACE), we do this because STLport doesn't
+// necessarily import all the names we need into namespace std::
+//
+# if (defined(__STL_IMPORT_VENDOR_CSTD) \
+ || defined(__STL_USE_OWN_NAMESPACE) \
+ || defined(_STLP_IMPORT_VENDOR_CSTD) \
+ || defined(_STLP_USE_OWN_NAMESPACE)) \
+ && (defined(__STL_VENDOR_GLOBAL_CSTD) || defined (_STLP_VENDOR_GLOBAL_CSTD))
+# define BOOST_NO_STDC_NAMESPACE
+# define BOOST_NO_EXCEPTION_STD_NAMESPACE
+# endif
+#elif defined(__BORLANDC__) && __BORLANDC__ < 0x560
+// STLport doesn't import std::abs correctly:
+#include <stdlib.h>
+namespace std { using ::abs; }
+// and strcmp/strcpy don't get imported either ('cos they are macros)
+#include <string.h>
+#ifdef strcpy
+# undef strcpy
+#endif
+#ifdef strcmp
+# undef strcmp
+#endif
+#ifdef _STLP_VENDOR_CSTD
+namespace std{ using _STLP_VENDOR_CSTD::strcmp; using _STLP_VENDOR_CSTD::strcpy; }
+#endif
+#endif
+
+//
+// std::use_facet may be non-standard, uses a class instead:
+//
+#if defined(__STL_NO_EXPLICIT_FUNCTION_TMPL_ARGS) || defined(_STLP_NO_EXPLICIT_FUNCTION_TMPL_ARGS)
+# define BOOST_NO_STD_USE_FACET
+# define BOOST_HAS_STLP_USE_FACET
+#endif
+
+//
+// If STLport thinks there are no wide functions, <cwchar> etc. is not working; but
+// only if BOOST_NO_STDC_NAMESPACE is not defined (if it is then we do the import
+// into std:: ourselves).
+//
+#if defined(_STLP_NO_NATIVE_WIDE_FUNCTIONS) && !defined(BOOST_NO_STDC_NAMESPACE)
+# define BOOST_NO_CWCHAR
+# define BOOST_NO_CWCTYPE
+#endif
+
+//
+// If STLport for some reason was configured so that it thinks that wchar_t
+// is not an intrinsic type, then we have to disable the support for it as
+// well (we would be missing required specializations otherwise).
+//
+#if !defined( _STLP_HAS_WCHAR_T) || defined(_STLP_WCHAR_T_IS_USHORT)
+# undef BOOST_NO_INTRINSIC_WCHAR_T
+# define BOOST_NO_INTRINSIC_WCHAR_T
+#endif
+
+//
+// Borland ships a version of STLport with C++ Builder 6 that lacks
+// hashtables and the like:
+//
+#if defined(__BORLANDC__) && (__BORLANDC__ == 0x560)
+# undef BOOST_HAS_HASH
+#endif
+
+//
+// gcc-2.95.3/STLPort does not like the using declarations we use to get ADL with std::min/max
+//
+#if defined(__GNUC__) && (__GNUC__ < 3)
+# include <algorithm> // for std::min and std::max
+# define BOOST_USING_STD_MIN() ((void)0)
+# define BOOST_USING_STD_MAX() ((void)0)
+namespace boost { using std::min; using std::max; }
+#endif
+
+#define BOOST_STDLIB "STLPort standard library version " BOOST_STRINGIZE(__SGI_STL_PORT)
+
+
+
+
+
+
+
+
diff --git a/indexlib/boost-compat/config/stdlib/vacpp.hpp b/indexlib/boost-compat/config/stdlib/vacpp.hpp
new file mode 100644
index 000000000..8321ee0cc
--- /dev/null
+++ b/indexlib/boost-compat/config/stdlib/vacpp.hpp
@@ -0,0 +1,18 @@
+// (C) Copyright John Maddock 2001 - 2002.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+#if __IBMCPP__ <= 501
+# define BOOST_NO_STD_ALLOCATOR
+#endif
+
+#define BOOST_HAS_MACRO_USE_FACET
+#define BOOST_NO_STD_MESSAGES
+
+#define BOOST_STDLIB "Visual Age default standard library"
+
+
+
diff --git a/indexlib/boost-compat/config/suffix.hpp b/indexlib/boost-compat/config/suffix.hpp
new file mode 100644
index 000000000..77d9decac
--- /dev/null
+++ b/indexlib/boost-compat/config/suffix.hpp
@@ -0,0 +1,543 @@
+// Boost config.hpp configuration header file ------------------------------//
+
+// (C) Copyright John Maddock 2001 - 2003.
+// (C) Copyright Darin Adler 2001.
+// (C) Copyright Peter Dimov 2001.
+// (C) Copyright Bill Kempf 2002.
+// (C) Copyright Jens Maurer 2002.
+// (C) Copyright David Abrahams 2002 - 2003.
+// (C) Copyright Gennaro Prota 2003.
+// (C) Copyright Eric Friedman 2003.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for most recent version.
+
+// Boost config.hpp policy and rationale documentation has been moved to
+// http://www.boost.org/libs/config
+//
+// This file is intended to be stable, and relatively unchanging.
+// It should contain boilerplate code only - no compiler specific
+// code unless it is unavoidable - no changes unless unavoidable.
+
+#ifndef BOOST_CONFIG_SUFFIX_HPP
+#define BOOST_CONFIG_SUFFIX_HPP
+
+//
+// look for long long by looking for the appropriate macros in <limits.h>.
+// Note that we use limits.h rather than climits for maximal portability,
+// remember that since these just declare a bunch of macros, there should be
+// no namespace issues from this.
+//
+#include <limits.h>
+# if !defined(BOOST_HAS_LONG_LONG) \
+ && !(defined(BOOST_MSVC) && BOOST_MSVC <=1300) && !defined(__BORLANDC__) \
+ && (defined(ULLONG_MAX) || defined(ULONG_LONG_MAX) || defined(ULONGLONG_MAX))
+# define BOOST_HAS_LONG_LONG
+#endif
+#if !defined(BOOST_HAS_LONG_LONG) && !defined(BOOST_NO_INTEGRAL_INT64_T)
+# define BOOST_NO_INTEGRAL_INT64_T
+#endif
+
+// GCC 3.x will clean up all of those nasty macro definitions that
+// BOOST_NO_CTYPE_FUNCTIONS is intended to help work around, so undefine
+// it under GCC 3.x.
+#if defined(__GNUC__) && (__GNUC__ >= 3) && defined(BOOST_NO_CTYPE_FUNCTIONS)
+# undef BOOST_NO_CTYPE_FUNCTIONS
+#endif
+
+
+//
+// Assume any extensions are in namespace std:: unless stated otherwise:
+//
+# ifndef BOOST_STD_EXTENSION_NAMESPACE
+# define BOOST_STD_EXTENSION_NAMESPACE std
+# endif
+
+//
+// If cv-qualified specializations are not allowed, then neither are cv-void ones:
+//
+# if defined(BOOST_NO_CV_SPECIALIZATIONS) \
+ && !defined(BOOST_NO_CV_VOID_SPECIALIZATIONS)
+# define BOOST_NO_CV_VOID_SPECIALIZATIONS
+# endif
+
+//
+// If there is no numeric_limits template, then it can't have any compile time
+// constants either!
+//
+# if defined(BOOST_NO_LIMITS) \
+ && !defined(BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS)
+# define BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+# define BOOST_NO_MS_INT64_NUMERIC_LIMITS
+# define BOOST_NO_LONG_LONG_NUMERIC_LIMITS
+# endif
+
+//
+// if there is no long long then there is no specialisation
+// for numeric_limits<long long> either:
+//
+#if !defined(BOOST_HAS_LONG_LONG) && !defined(BOOST_NO_LONG_LONG_NUMERIC_LIMITS)
+# define BOOST_NO_LONG_LONG_NUMERIC_LIMITS
+#endif
+
+//
+// if there is no __int64 then there is no specialisation
+// for numeric_limits<__int64> either:
+//
+#if !defined(BOOST_HAS_MS_INT64) && !defined(BOOST_NO_MS_INT64_NUMERIC_LIMITS)
+# define BOOST_NO_MS_INT64_NUMERIC_LIMITS
+#endif
+
+//
+// if member templates are supported then so is the
+// VC6 subset of member templates:
+//
+# if !defined(BOOST_NO_MEMBER_TEMPLATES) \
+ && !defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+# define BOOST_MSVC6_MEMBER_TEMPLATES
+# endif
+
+//
+// Without partial specialization, can't test for partial specialisation bugs:
+//
+# if defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) \
+ && !defined(BOOST_BCB_PARTIAL_SPECIALIZATION_BUG)
+# define BOOST_BCB_PARTIAL_SPECIALIZATION_BUG
+# endif
+
+//
+// Without partial specialization, we can't have array-type partial specialisations:
+//
+# if defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) \
+ && !defined(BOOST_NO_ARRAY_TYPE_SPECIALIZATIONS)
+# define BOOST_NO_ARRAY_TYPE_SPECIALIZATIONS
+# endif
+
+//
+// Without partial specialization, std::iterator_traits can't work:
+//
+# if defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) \
+ && !defined(BOOST_NO_STD_ITERATOR_TRAITS)
+# define BOOST_NO_STD_ITERATOR_TRAITS
+# endif
+
+//
+// Without member template support, we can't have template constructors
+// in the standard library either:
+//
+# if defined(BOOST_NO_MEMBER_TEMPLATES) \
+ && !defined(BOOST_MSVC6_MEMBER_TEMPLATES) \
+ && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
+# define BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
+# endif
+
+//
+// Without member template support, we can't have a conforming
+// std::allocator template either:
+//
+# if defined(BOOST_NO_MEMBER_TEMPLATES) \
+ && !defined(BOOST_MSVC6_MEMBER_TEMPLATES) \
+ && !defined(BOOST_NO_STD_ALLOCATOR)
+# define BOOST_NO_STD_ALLOCATOR
+# endif
+
+//
+// without ADL support then using declarations will break ADL as well:
+//
+#if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP) && !defined(BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL)
+# define BOOST_FUNCTION_SCOPE_USING_DECLARATION_BREAKS_ADL
+#endif
+
+//
+// If we have a standard allocator, then we have a partial one as well:
+//
+#if !defined(BOOST_NO_STD_ALLOCATOR)
+# define BOOST_HAS_PARTIAL_STD_ALLOCATOR
+#endif
+
+//
+// We can't have a working std::use_facet if there is no std::locale:
+//
+# if defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_NO_STD_USE_FACET)
+# define BOOST_NO_STD_USE_FACET
+# endif
+
+//
+// We can't have a std::messages facet if there is no std::locale:
+//
+# if defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_NO_STD_MESSAGES)
+# define BOOST_NO_STD_MESSAGES
+# endif
+
+//
+// We can't have a working std::wstreambuf if there is no std::locale:
+//
+# if defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_NO_STD_WSTREAMBUF)
+# define BOOST_NO_STD_WSTREAMBUF
+# endif
+
+//
+// We can't have a <cwctype> if there is no <cwchar>:
+//
+# if defined(BOOST_NO_CWCHAR) && !defined(BOOST_NO_CWCTYPE)
+# define BOOST_NO_CWCTYPE
+# endif
+
+//
+// We can't have a swprintf if there is no <cwchar>:
+//
+# if defined(BOOST_NO_CWCHAR) && !defined(BOOST_NO_SWPRINTF)
+# define BOOST_NO_SWPRINTF
+# endif
+
+//
+// If Win32 support is turned off, then we must turn off
+// threading support also, unless there is some other
+// thread API enabled:
+//
+#if defined(BOOST_DISABLE_WIN32) && defined(_WIN32) \
+ && !defined(BOOST_DISABLE_THREADS) && !defined(BOOST_HAS_PTHREADS)
+# define BOOST_DISABLE_THREADS
+#endif
+
+//
+// Turn on threading support if the compiler thinks that it's in
+// multithreaded mode. We put this here because there are only a
+// limited number of macros that identify this (if there's any missing
+// from here then add to the appropriate compiler section):
+//
+#if (defined(__MT__) || defined(_MT) || defined(_REENTRANT) \
+ || defined(_PTHREADS)) && !defined(BOOST_HAS_THREADS)
+# define BOOST_HAS_THREADS
+#endif
+
+//
+// Turn threading support off if BOOST_DISABLE_THREADS is defined:
+//
+#if defined(BOOST_DISABLE_THREADS) && defined(BOOST_HAS_THREADS)
+# undef BOOST_HAS_THREADS
+#endif
+
+//
+// Turn threading support off if we don't recognise the threading API:
+//
+#if defined(BOOST_HAS_THREADS) && !defined(BOOST_HAS_PTHREADS)\
+ && !defined(BOOST_HAS_WINTHREADS) && !defined(BOOST_HAS_BETHREADS)\
+ && !defined(BOOST_HAS_MPTASKS)
+# undef BOOST_HAS_THREADS
+#endif
+
+//
+// Turn threading detail macros off if we don't (want to) use threading
+//
+#ifndef BOOST_HAS_THREADS
+# undef BOOST_HAS_PTHREADS
+# undef BOOST_HAS_PTHREAD_MUTEXATTR_SETTYPE
+# undef BOOST_HAS_WINTHREADS
+# undef BOOST_HAS_BETHREADS
+# undef BOOST_HAS_MPTASKS
+#endif
+
+//
+// If the compiler claims to be C99 conformant, then it had better
+// have a <stdint.h>:
+//
+# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)
+# define BOOST_HAS_STDINT_H
+# endif
+
+//
+// Define BOOST_NO_SLIST and BOOST_NO_HASH if required.
+// Note that this is for backwards compatibility only.
+//
+# ifndef BOOST_HAS_SLIST
+# define BOOST_NO_SLIST
+# endif
+
+# ifndef BOOST_HAS_HASH
+# define BOOST_NO_HASH
+# endif
+
+// BOOST_HAS_ABI_HEADERS
+// This macro gets set if we have headers that fix the ABI,
+// and prevent ODR violations when linking to external libraries:
+#if defined(BOOST_ABI_PREFIX) && defined(BOOST_ABI_SUFFIX) && !defined(BOOST_HAS_ABI_HEADERS)
+# define BOOST_HAS_ABI_HEADERS
+#endif
+
+#if defined(BOOST_HAS_ABI_HEADERS) && defined(BOOST_DISABLE_ABI_HEADERS)
+# undef BOOST_HAS_ABI_HEADERS
+#endif
+
+// BOOST_NO_STDC_NAMESPACE workaround --------------------------------------//
+// Because std::size_t usage is so common, even in boost headers which do not
+// otherwise use the C library, the <cstddef> workaround is included here so
+// that ugly workaround code need not appear in many other boost headers.
+// NOTE WELL: This is a workaround for non-conforming compilers; <cstddef>
+// must still be #included in the usual places so that <cstddef> inclusion
+// works as expected with standard conforming compilers. The resulting
+// double inclusion of <cstddef> is harmless.
+
+# ifdef BOOST_NO_STDC_NAMESPACE
+# include <cstddef>
+ namespace std { using ::ptrdiff_t; using ::size_t; }
+# endif
+
+// Workaround for the unfortunate min/max macros defined by some platform headers
+
+#define BOOST_PREVENT_MACRO_SUBSTITUTION
+
+#ifndef BOOST_USING_STD_MIN
+# define BOOST_USING_STD_MIN() using std::min
+#endif
+
+#ifndef BOOST_USING_STD_MAX
+# define BOOST_USING_STD_MAX() using std::max
+#endif
+
+// BOOST_NO_STD_MIN_MAX workaround -----------------------------------------//
+
+# ifdef BOOST_NO_STD_MIN_MAX
+
+namespace std {
+ template <class _Tp>
+ inline const _Tp& min BOOST_PREVENT_MACRO_SUBSTITUTION (const _Tp& __a, const _Tp& __b) {
+ return __b < __a ? __b : __a;
+ }
+ template <class _Tp>
+ inline const _Tp& max BOOST_PREVENT_MACRO_SUBSTITUTION (const _Tp& __a, const _Tp& __b) {
+ return __a < __b ? __b : __a;
+ }
+}
+
+# endif
+
+// BOOST_STATIC_CONSTANT workaround --------------------------------------- //
+// On compilers which don't allow in-class initialization of static integral
+// constant members, we must use enums as a workaround if we want the constants
+// to be available at compile-time. This macro gives us a convenient way to
+// declare such constants.
+
+# ifdef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+# define BOOST_STATIC_CONSTANT(type, assignment) enum { assignment }
+# else
+# define BOOST_STATIC_CONSTANT(type, assignment) static const type assignment
+# endif
+
+// BOOST_USE_FACET / HAS_FACET workaround ----------------------------------//
+// When the standard library does not have a conforming std::use_facet there
+// are various workarounds available, but they differ from library to library.
+// The same problem occurs with has_facet.
+// These macros provide a consistent way to access a locale's facets.
+// Usage:
+// replace
+// std::use_facet<Type>(loc);
+// with
+// BOOST_USE_FACET(Type, loc);
+// Note do not add a std:: prefix to the front of BOOST_USE_FACET!
+// Use for BOOST_HAS_FACET is analagous.
+
+#if defined(BOOST_NO_STD_USE_FACET)
+# ifdef BOOST_HAS_TWO_ARG_USE_FACET
+# define BOOST_USE_FACET(Type, loc) std::use_facet(loc, static_cast<Type*>(0))
+# define BOOST_HAS_FACET(Type, loc) std::has_facet(loc, static_cast<Type*>(0))
+# elif defined(BOOST_HAS_MACRO_USE_FACET)
+# define BOOST_USE_FACET(Type, loc) std::_USE(loc, Type)
+# define BOOST_HAS_FACET(Type, loc) std::_HAS(loc, Type)
+# elif defined(BOOST_HAS_STLP_USE_FACET)
+# define BOOST_USE_FACET(Type, loc) (*std::_Use_facet<Type >(loc))
+# define BOOST_HAS_FACET(Type, loc) std::has_facet< Type >(loc)
+# endif
+#else
+# define BOOST_USE_FACET(Type, loc) std::use_facet< Type >(loc)
+# define BOOST_HAS_FACET(Type, loc) std::has_facet< Type >(loc)
+#endif
+
+// BOOST_NESTED_TEMPLATE workaround ------------------------------------------//
+// Member templates are supported by some compilers even though they can't use
+// the A::template member<U> syntax, as a workaround replace:
+//
+// typedef typename A::template rebind<U> binder;
+//
+// with:
+//
+// typedef typename A::BOOST_NESTED_TEMPLATE rebind<U> binder;
+
+#ifndef BOOST_NO_MEMBER_TEMPLATE_KEYWORD
+# define BOOST_NESTED_TEMPLATE template
+#else
+# define BOOST_NESTED_TEMPLATE
+#endif
+
+// BOOST_UNREACHABLE_RETURN(x) workaround -------------------------------------//
+// Normally evaluates to nothing, unless BOOST_NO_UNREACHABLE_RETURN_DETECTION
+// is defined, in which case it evaluates to return x; Use when you have a return
+// statement that can never be reached.
+
+#ifdef BOOST_NO_UNREACHABLE_RETURN_DETECTION
+# define BOOST_UNREACHABLE_RETURN(x) return x;
+#else
+# define BOOST_UNREACHABLE_RETURN(x)
+#endif
+
+// BOOST_DEDUCED_TYPENAME workaround ------------------------------------------//
+//
+// Some compilers don't support the use of `typename' for dependent
+// types in deduced contexts, e.g.
+//
+// template <class T> void f(T, typename T::type);
+// ^^^^^^^^
+// Replace these declarations with:
+//
+// template <class T> void f(T, BOOST_DEDUCED_TYPENAME T::type);
+
+#ifndef BOOST_NO_DEDUCED_TYPENAME
+# define BOOST_DEDUCED_TYPENAME typename
+#else
+# define BOOST_DEDUCED_TYPENAME
+#endif
+
+// long long workaround ------------------------------------------//
+// On gcc (and maybe other compilers?) long long is alway supported
+// but it's use may generate either warnings (with -ansi), or errors
+// (with -pedantic -ansi) unless it's use is prefixed by __extension__
+//
+#if defined(BOOST_HAS_LONG_LONG)
+namespace boost{
+# ifdef __GNUC__
+ __extension__ typedef long long long_long_type;
+ __extension__ typedef unsigned long long ulong_long_type;
+# else
+ typedef long long long_long_type;
+ typedef unsigned long long ulong_long_type;
+# endif
+}
+#endif
+
+// BOOST_[APPEND_]EXPLICIT_TEMPLATE_[NON_]TYPE macros --------------------------//
+//
+// Some compilers have problems with function templates whose
+// template parameters don't appear in the function parameter
+// list (basically they just link one instantiation of the
+// template in the final executable). These macros provide a
+// uniform way to cope with the problem with no effects on the
+// calling syntax.
+
+// Example:
+//
+// #include <iostream>
+// #include <ostream>
+// #include <typeinfo>
+//
+// template <int n>
+// void f() { std::cout << n << ' '; }
+//
+// template <typename T>
+// void g() { std::cout << typeid(T).name() << ' '; }
+//
+// int main() {
+// f<1>();
+// f<2>();
+//
+// g<int>();
+// g<double>();
+// }
+//
+// With VC++ 6.0 the output is:
+//
+// 2 2 double double
+//
+// To fix it, write
+//
+// template <int n>
+// void f(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, n)) { ... }
+//
+// template <typename T>
+// void g(BOOST_EXPLICIT_TEMPLATE_TYPE(T)) { ... }
+//
+
+
+#if defined BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+
+# include "boost/type.hpp"
+# include "boost/non_type.hpp"
+
+# define BOOST_EXPLICIT_TEMPLATE_TYPE(t) boost::type<t>* = 0
+# define BOOST_EXPLICIT_TEMPLATE_TYPE_SPEC(t) boost::type<t>*
+# define BOOST_EXPLICIT_TEMPLATE_NON_TYPE(t, v) boost::non_type<t, v>* = 0
+# define BOOST_EXPLICIT_TEMPLATE_NON_TYPE_SPEC(t, v) boost::non_type<t, v>*
+
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(t) \
+ , BOOST_EXPLICIT_TEMPLATE_TYPE(t)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(t) \
+ , BOOST_EXPLICIT_TEMPLATE_TYPE_SPEC(t)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(t, v) \
+ , BOOST_EXPLICIT_TEMPLATE_NON_TYPE(t, v)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE_SPEC(t, v) \
+ , BOOST_EXPLICIT_TEMPLATE_NON_TYPE_SPEC(t, v)
+
+#else
+
+// no workaround needed: expand to nothing
+
+# define BOOST_EXPLICIT_TEMPLATE_TYPE(t)
+# define BOOST_EXPLICIT_TEMPLATE_TYPE_SPEC(t)
+# define BOOST_EXPLICIT_TEMPLATE_NON_TYPE(t, v)
+# define BOOST_EXPLICIT_TEMPLATE_NON_TYPE_SPEC(t, v)
+
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(t)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(t)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(t, v)
+# define BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE_SPEC(t, v)
+
+
+#endif // defined BOOST_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS
+
+
+// ---------------------------------------------------------------------------//
+
+//
+// Helper macro BOOST_STRINGIZE:
+// Converts the parameter X to a string after macro replacement
+// on X has been performed.
+//
+#define BOOST_STRINGIZE(X) BOOST_DO_STRINGIZE(X)
+#define BOOST_DO_STRINGIZE(X) #X
+
+//
+// Helper macro BOOST_JOIN:
+// The following piece of macro magic joins the two
+// arguments together, even when one of the arguments is
+// itself a macro (see 16.3.1 in C++ standard). The key
+// is that macro expansion of macro arguments does not
+// occur in BOOST_DO_JOIN2 but does in BOOST_DO_JOIN.
+//
+#define BOOST_JOIN( X, Y ) BOOST_DO_JOIN( X, Y )
+#define BOOST_DO_JOIN( X, Y ) BOOST_DO_JOIN2(X,Y)
+#define BOOST_DO_JOIN2( X, Y ) X##Y
+
+//
+// Set some default values for compiler/library/platform names.
+// These are for debugging config setup only:
+//
+# ifndef BOOST_COMPILER
+# define BOOST_COMPILER "Unknown ISO C++ Compiler"
+# endif
+# ifndef BOOST_STDLIB
+# define BOOST_STDLIB "Unknown ISO standard library"
+# endif
+# ifndef BOOST_PLATFORM
+# if defined(unix) || defined(__unix) || defined(_XOPEN_SOURCE) \
+ || defined(_POSIX_SOURCE)
+# define BOOST_PLATFORM "Generic Unix"
+# else
+# define BOOST_PLATFORM "Unknown"
+# endif
+# endif
+
+#endif
+
+
+
diff --git a/indexlib/boost-compat/config/user.hpp b/indexlib/boost-compat/config/user.hpp
new file mode 100644
index 000000000..5a4a9d477
--- /dev/null
+++ b/indexlib/boost-compat/config/user.hpp
@@ -0,0 +1,124 @@
+// boost/config/user.hpp ---------------------------------------------------//
+
+// (C) Copyright John Maddock 2001.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// Do not check in modified versions of this file,
+// This file may be customized by the end user, but not by boost.
+
+//
+// Use this file to define a site and compiler specific
+// configuration policy:
+//
+
+// define this to locate a compiler config file:
+// #define BOOST_COMPILER_CONFIG <myheader>
+
+// define this to locate a stdlib config file:
+// #define BOOST_STDLIB_CONFIG <myheader>
+
+// define this to locate a platform config file:
+// #define BOOST_PLATFORM_CONFIG <myheader>
+
+// define this to disable compiler config,
+// use if your compiler config has nothing to set:
+// #define BOOST_NO_COMPILER_CONFIG
+
+// define this to disable stdlib config,
+// use if your stdlib config has nothing to set:
+// #define BOOST_NO_STDLIB_CONFIG
+
+// define this to disable platform config,
+// use if your platform config has nothing to set:
+// #define BOOST_NO_PLATFORM_CONFIG
+
+// define this to disable all config options,
+// excluding the user config. Use if your
+// setup is fully ISO compliant, and has no
+// useful extensions, or for autoconf generated
+// setups:
+// #define BOOST_NO_CONFIG
+
+// define this to make the config "optimistic"
+// about unknown compiler versions. Normally
+// unknown compiler versions are assumed to have
+// all the defects of the last known version, however
+// setting this flag, causes the config to assume
+// that unknown compiler versions are fully conformant
+// with the standard:
+// #define BOOST_STRICT_CONFIG
+
+// define this to cause the config to halt compilation
+// with an #error if it encounters anything unknown --
+// either an unknown compiler version or an unknown
+// compiler/platform/library:
+// #define BOOST_ASSERT_CONFIG
+
+
+// define if you want to disable threading support, even
+// when available:
+// #define BOOST_DISABLE_THREADS
+
+// define when you want to disable Win32 specific features
+// even when available:
+// #define BOOST_DISABLE_WIN32
+
+// BOOST_DISABLE_ABI_HEADERS: Stops boost headers from including any
+// prefix/suffix headers that normally control things like struct
+// packing and alignment.
+// #define BOOST_DISABLE_ABI_HEADERS
+
+// BOOST_ABI_PREFIX: A prefix header to include in place of whatever
+// boost.config would normally select, any replacement should set up
+// struct packing and alignment options as required.
+// #define BOOST_ABI_PREFIX my-header-name
+
+// BOOST_ABI_SUFFIX: A suffix header to include in place of whatever
+// boost.config would normally select, any replacement should undo
+// the effects of the prefix header.
+// #define BOOST_ABI_SUFFIX my-header-name
+
+// BOOST_ALL_DYN_LINK: Forces all libraries that have separate source,
+// to be linked as dll's rather than static libraries on Microsoft Windows
+// (this macro is used to turn on __declspec(dllimport) modifiers, so that
+// the compiler knows which symbols to look for in a dll rather than in a
+// static library). Note that there may be some libraries that can only
+// be statically linked (Boost.Test for example) and others which may only
+// be dynamically linked (Boost.Threads for example), in these cases this
+// macro has no effect.
+// #define BOOST_ALL_DYN_LINK
+
+// BOOST_WHATEVER_DYN_LINK: Forces library "whatever" to be linked as a dll
+// rather than a static library on Microsoft Windows: replace the WHATEVER
+// part of the macro name with the name of the library that you want to
+// dynamically link to, for example use BOOST_DATE_TIME_DYN_LINK or
+// BOOST_REGEX_DYN_LINK etc (this macro is used to turn on __declspec(dllimport)
+// modifiers, so that the compiler knows which symbols to look for in a dll
+// rather than in a static library).
+// Note that there may be some libraries that can only be statically linked
+// (Boost.Test for example) and others which may only be dynamically linked
+// (Boost.Threads for example), in these cases this macro is unsupported.
+// #define BOOST_WHATEVER_DYN_LINK
+
+// BOOST_ALL_NO_LIB: Tells the config system not to automatically select
+// which libraries to link against.
+// Normally if a compiler supports #pragma lib, then the correct library
+// build variant will be automatically selected and linked against,
+// simply by the act of including one of that library's headers.
+// This macro turns that feature off.
+// #define BOOST_ALL_NO_LIB
+
+// BOOST_WHATEVER_NO_LIB: Tells the config system not to automatically
+// select which library to link against for library "whatever",
+// replace WHATEVER in the macro name with the name of the library;
+// for example BOOST_DATE_TIME_NO_LIB or BOOST_REGEX_NO_LIB.
+// Normally if a compiler supports #pragma lib, then the correct library
+// build variant will be automatically selected and linked against, simply
+// by the act of including one of that library's headers. This macro turns
+// that feature off.
+// #define BOOST_WHATEVER_NO_LIB
+
+
+
diff --git a/indexlib/boost-compat/intrusive_ptr.hpp b/indexlib/boost-compat/intrusive_ptr.hpp
new file mode 100644
index 000000000..7efbadeea
--- /dev/null
+++ b/indexlib/boost-compat/intrusive_ptr.hpp
@@ -0,0 +1,272 @@
+#ifndef BOOST_INTRUSIVE_PTR_HPP_INCLUDED
+#define BOOST_INTRUSIVE_PTR_HPP_INCLUDED
+
+//
+// intrusive_ptr.hpp
+//
+// Copyright (c) 2001, 2002 Peter Dimov
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/smart_ptr/intrusive_ptr.html for documentation.
+//
+
+#include <boost/config.hpp>
+
+#ifdef BOOST_MSVC // moved here to work around VC++ compiler crash
+# pragma warning(push)
+# pragma warning(disable:4284) // odd return type for operator->
+#endif
+
+#include <boost/assert.hpp>
+#include <boost/detail/workaround.hpp>
+
+#include <functional> // for std::less
+#include <iosfwd> // for std::basic_ostream
+
+
+namespace boost
+{
+
+//
+// intrusive_ptr
+//
+// A smart pointer that uses intrusive reference counting.
+//
+// Relies on unqualified calls to
+//
+// void intrusive_ptr_add_ref(T * p);
+// void intrusive_ptr_release(T * p);
+//
+// (p != 0)
+//
+// The object is responsible for destroying itself.
+//
+
+template<class T> class intrusive_ptr
+{
+private:
+
+ typedef intrusive_ptr this_type;
+
+public:
+
+ typedef T element_type;
+
+ intrusive_ptr(): p_(0)
+ {
+ }
+
+ intrusive_ptr(T * p, bool add_ref = true): p_(p)
+ {
+ if(p_ != 0 && add_ref) intrusive_ptr_add_ref(p_);
+ }
+
+#if !defined(BOOST_NO_MEMBER_TEMPLATES) || defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+
+ template<class U> intrusive_ptr(intrusive_ptr<U> const & rhs): p_(rhs.get())
+ {
+ if(p_ != 0) intrusive_ptr_add_ref(p_);
+ }
+
+#endif
+
+ intrusive_ptr(intrusive_ptr const & rhs): p_(rhs.p_)
+ {
+ if(p_ != 0) intrusive_ptr_add_ref(p_);
+ }
+
+ ~intrusive_ptr()
+ {
+ if(p_ != 0) intrusive_ptr_release(p_);
+ }
+
+#if !defined(BOOST_NO_MEMBER_TEMPLATES) || defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+
+ template<class U> intrusive_ptr & operator=(intrusive_ptr<U> const & rhs)
+ {
+ this_type(rhs).swap(*this);
+ return *this;
+ }
+
+#endif
+
+ intrusive_ptr & operator=(intrusive_ptr const & rhs)
+ {
+ this_type(rhs).swap(*this);
+ return *this;
+ }
+
+ intrusive_ptr & operator=(T * rhs)
+ {
+ this_type(rhs).swap(*this);
+ return *this;
+ }
+
+ T * get() const
+ {
+ return p_;
+ }
+
+ T & operator*() const
+ {
+ return *p_;
+ }
+
+ T * operator->() const
+ {
+ return p_;
+ }
+
+#if defined(__SUNPRO_CC) && BOOST_WORKAROUND(__SUNPRO_CC, <= 0x530)
+
+ operator bool () const
+ {
+ return p_ != 0;
+ }
+
+#elif defined(__MWERKS__) && BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003))
+ typedef T * (this_type::*unspecified_bool_type)() const;
+
+ operator unspecified_bool_type() const // never throws
+ {
+ return p_ == 0? 0: &this_type::get;
+ }
+
+#else
+
+ typedef T * this_type::*unspecified_bool_type;
+
+ operator unspecified_bool_type () const
+ {
+ return p_ == 0? 0: &this_type::p_;
+ }
+
+#endif
+
+ // operator! is a Borland-specific workaround
+ bool operator! () const
+ {
+ return p_ == 0;
+ }
+
+ void swap(intrusive_ptr & rhs)
+ {
+ T * tmp = p_;
+ p_ = rhs.p_;
+ rhs.p_ = tmp;
+ }
+
+private:
+
+ T * p_;
+};
+
+template<class T, class U> inline bool operator==(intrusive_ptr<T> const & a, intrusive_ptr<U> const & b)
+{
+ return a.get() == b.get();
+}
+
+template<class T, class U> inline bool operator!=(intrusive_ptr<T> const & a, intrusive_ptr<U> const & b)
+{
+ return a.get() != b.get();
+}
+
+template<class T> inline bool operator==(intrusive_ptr<T> const & a, T * b)
+{
+ return a.get() == b;
+}
+
+template<class T> inline bool operator!=(intrusive_ptr<T> const & a, T * b)
+{
+ return a.get() != b;
+}
+
+template<class T> inline bool operator==(T * a, intrusive_ptr<T> const & b)
+{
+ return a == b.get();
+}
+
+template<class T> inline bool operator!=(T * a, intrusive_ptr<T> const & b)
+{
+ return a != b.get();
+}
+
+#if __GNUC__ == 2 && __GNUC_MINOR__ <= 96
+
+// Resolve the ambiguity between our op!= and the one in rel_ops
+
+template<class T> inline bool operator!=(intrusive_ptr<T> const & a, intrusive_ptr<T> const & b)
+{
+ return a.get() != b.get();
+}
+
+#endif
+
+template<class T> inline bool operator<(intrusive_ptr<T> const & a, intrusive_ptr<T> const & b)
+{
+ return std::less<T *>()(a.get(), b.get());
+}
+
+template<class T> void swap(intrusive_ptr<T> & lhs, intrusive_ptr<T> & rhs)
+{
+ lhs.swap(rhs);
+}
+
+// mem_fn support
+
+template<class T> T * get_pointer(intrusive_ptr<T> const & p)
+{
+ return p.get();
+}
+
+template<class T, class U> intrusive_ptr<T> static_pointer_cast(intrusive_ptr<U> const & p)
+{
+ return static_cast<T *>(p.get());
+}
+
+template<class T, class U> intrusive_ptr<T> const_pointer_cast(intrusive_ptr<U> const & p)
+{
+ return const_cast<T *>(p.get());
+}
+
+template<class T, class U> intrusive_ptr<T> dynamic_pointer_cast(intrusive_ptr<U> const & p)
+{
+ return dynamic_cast<T *>(p.get());
+}
+
+// operator<<
+
+#if defined(__GNUC__) && (__GNUC__ < 3)
+
+template<class Y> std::ostream & operator<< (std::ostream & os, intrusive_ptr<Y> const & p)
+{
+ os << p.get();
+ return os;
+}
+
+#else
+
+# if defined(BOOST_MSVC) && BOOST_WORKAROUND(BOOST_MSVC, <= 1200 && __SGI_STL_PORT)
+// MSVC6 has problems finding std::basic_ostream through the using declaration in namespace _STL
+using std::basic_ostream;
+template<class E, class T, class Y> basic_ostream<E, T> & operator<< (basic_ostream<E, T> & os, intrusive_ptr<Y> const & p)
+# else
+template<class E, class T, class Y> std::basic_ostream<E, T> & operator<< (std::basic_ostream<E, T> & os, intrusive_ptr<Y> const & p)
+# endif
+{
+ os << p.get();
+ return os;
+}
+
+#endif
+
+} // namespace boost
+
+#ifdef BOOST_MSVC
+# pragma warning(pop)
+#endif
+
+#endif // #ifndef BOOST_INTRUSIVE_PTR_HPP_INCLUDED
diff --git a/indexlib/boost-compat/next_prior.hpp b/indexlib/boost-compat/next_prior.hpp
new file mode 100644
index 000000000..e1d2e4289
--- /dev/null
+++ b/indexlib/boost-compat/next_prior.hpp
@@ -0,0 +1,51 @@
+// Boost next_prior.hpp header file ---------------------------------------//
+
+// (C) Copyright Dave Abrahams and Daniel Walker 1999-2003. Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/utility for documentation.
+
+// Revision History
+// 13 Dec 2003 Added next(x, n) and prior(x, n) (Daniel Walker)
+
+#ifndef BOOST_NEXT_PRIOR_HPP_INCLUDED
+#define BOOST_NEXT_PRIOR_HPP_INCLUDED
+
+#include <iterator>
+
+namespace boost {
+
+// Helper functions for classes like bidirectional iterators not supporting
+// operator+ and operator-
+//
+// Usage:
+// const std::list<T>::iterator p = get_some_iterator();
+// const std::list<T>::iterator prev = boost::prior(p);
+// const std::list<T>::iterator next = boost::next(prev, 2);
+
+// Contributed by Dave Abrahams
+
+template <class T>
+inline T next(T x) { return ++x; }
+
+template <class T, class Distance>
+inline T next(T x, Distance n)
+{
+ std::advance(x, n);
+ return x;
+}
+
+template <class T>
+inline T prior(T x) { return --x; }
+
+template <class T, class Distance>
+inline T prior(T x, Distance n)
+{
+ std::advance(x, -n);
+ return x;
+}
+
+} // namespace boost
+
+#endif // BOOST_NEXT_PRIOR_HPP_INCLUDED
diff --git a/indexlib/boost-compat/noncopyable.hpp b/indexlib/boost-compat/noncopyable.hpp
new file mode 100644
index 000000000..7770bdbd3
--- /dev/null
+++ b/indexlib/boost-compat/noncopyable.hpp
@@ -0,0 +1,36 @@
+// Boost noncopyable.hpp header file --------------------------------------//
+
+// (C) Copyright Beman Dawes 1999-2003. Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/utility for documentation.
+
+#ifndef BOOST_NONCOPYABLE_HPP_INCLUDED
+#define BOOST_NONCOPYABLE_HPP_INCLUDED
+
+namespace boost {
+
+// Private copy constructor and copy assignment ensure classes derived from
+// class noncopyable cannot be copied.
+
+// Contributed by Dave Abrahams
+
+namespace noncopyable_ // protection from unintended ADL
+{
+ class noncopyable
+ {
+ protected:
+ noncopyable() {}
+ ~noncopyable() {}
+ private: // emphasize the following members are private
+ noncopyable( const noncopyable& );
+ const noncopyable& operator=( const noncopyable& );
+ };
+}
+
+typedef noncopyable_::noncopyable noncopyable;
+
+} // namespace boost
+
+#endif // BOOST_NONCOPYABLE_HPP_INCLUDED
diff --git a/indexlib/boost-compat/remove_cv.hpp b/indexlib/boost-compat/remove_cv.hpp
new file mode 100644
index 000000000..ec7c1a956
--- /dev/null
+++ b/indexlib/boost-compat/remove_cv.hpp
@@ -0,0 +1,61 @@
+#ifndef BOOST_TT_REMOVE_CV_HPP_INCLUDED
+#define BOOST_TT_REMOVE_CV_HPP_INCLUDED
+#ifndef BOOST_TT_DETAIL_CV_TRAITS_IMPL_HPP_INCLUDED
+#define BOOST_TT_DETAIL_CV_TRAITS_IMPL_HPP_INCLUDED
+// ADAPTED (TAKEN) FROM BOOST
+//
+// (C) Copyright Dave Abrahams, Steve Cleary, Beman Dawes, Howard
+// Hinnant & John Maddock 2000.
+// Use, modification and distribution are subject to the Boost Software License,
+// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt).
+//
+// See http://www.boost.org/libs/type_traits for most recent version including documentation.
+//
+
+
+
+
+namespace boost {
+namespace detail {
+
+// implementation helper:
+
+template <typename T> struct cv_traits_imp {};
+
+template <typename T>
+struct cv_traits_imp<T*>
+{
+ typedef T unqualified_type;
+};
+
+template <typename T>
+struct cv_traits_imp<const T*>
+{
+ typedef T unqualified_type;
+};
+
+template <typename T>
+struct cv_traits_imp<volatile T*>
+{
+ typedef T unqualified_type;
+};
+
+template <typename T>
+struct cv_traits_imp<const volatile T*>
+{
+ typedef T unqualified_type;
+};
+
+} // namespace detail
+
+template <typename T>
+struct remove_cv {
+ typedef typename detail::cv_traits_imp<T*>::unqualified_type type;
+};
+} // namespace boost
+
+
+
+#endif
+#endif
diff --git a/indexlib/boost-compat/scoped_ptr.hpp b/indexlib/boost-compat/scoped_ptr.hpp
new file mode 100644
index 000000000..1260066ad
--- /dev/null
+++ b/indexlib/boost-compat/scoped_ptr.hpp
@@ -0,0 +1,118 @@
+#ifndef BOOST_SCOPED_PTR_HPP_INCLUDED
+#define BOOST_SCOPED_PTR_HPP_INCLUDED
+
+// ADAPTED FOR indexlib
+
+// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+// Copyright (c) 2001, 2002 Peter Dimov
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// http://www.boost.org/libs/smart_ptr/scoped_ptr.htm
+//
+
+#include "checked_delete.hpp"
+
+#include <memory> // for std::auto_ptr
+#include <assert.h>
+
+namespace boost
+{
+
+
+// scoped_ptr mimics a built-in pointer except that it guarantees deletion
+// of the object pointed to, either on destruction of the scoped_ptr or via
+// an explicit reset(). scoped_ptr is a simple solution for simple needs;
+// use shared_ptr or std::auto_ptr if your needs are more complex.
+
+template<class T> class scoped_ptr // noncopyable
+{
+private:
+
+ T * ptr;
+
+ scoped_ptr(scoped_ptr const &);
+ scoped_ptr & operator=(scoped_ptr const &);
+
+ typedef scoped_ptr<T> this_type;
+
+public:
+
+ typedef T element_type;
+
+ explicit scoped_ptr(T * p = 0): ptr(p) // never throws
+ {
+ }
+
+
+ explicit scoped_ptr(std::auto_ptr<T> p): ptr(p.release()) // never throws
+ {
+ }
+
+ ~scoped_ptr() // never throws
+ {
+ boost::checked_delete(ptr);
+ }
+
+ void reset(T * p = 0) // never throws
+ {
+ assert(p == 0 || p != ptr); // catch self-reset errors
+ this_type(p).swap(*this);
+ }
+
+ T & operator*() const // never throws
+ {
+ assert(ptr != 0);
+ return *ptr;
+ }
+
+ T * operator->() const // never throws
+ {
+ assert(ptr != 0);
+ return ptr;
+ }
+
+ T * get() const // never throws
+ {
+ return ptr;
+ }
+
+ // implicit conversion to "bool"
+
+ typedef T * this_type::*unspecified_bool_type;
+
+ operator unspecified_bool_type() const // never throws
+ {
+ return ptr == 0? 0: &this_type::ptr;
+ }
+
+ bool operator! () const // never throws
+ {
+ return ptr == 0;
+ }
+
+ void swap(scoped_ptr & b) // never throws
+ {
+ T * tmp = b.ptr;
+ b.ptr = ptr;
+ ptr = tmp;
+ }
+};
+
+template<class T> inline void swap(scoped_ptr<T> & a, scoped_ptr<T> & b) // never throws
+{
+ a.swap(b);
+}
+
+// get_pointer(p) is a generic way to say p.get()
+
+template<class T> inline T * get_pointer(scoped_ptr<T> const & p)
+{
+ return p.get();
+}
+
+} // namespace boost
+
+#endif // #ifndef BOOST_SCOPED_PTR_HPP_INCLUDED
diff --git a/indexlib/boost-compat/shared_ptr.hpp b/indexlib/boost-compat/shared_ptr.hpp
new file mode 100644
index 000000000..0a3bf6d86
--- /dev/null
+++ b/indexlib/boost-compat/shared_ptr.hpp
@@ -0,0 +1,473 @@
+#ifndef BOOST_SHARED_PTR_HPP_INCLUDED
+#define BOOST_SHARED_PTR_HPP_INCLUDED
+
+//
+// shared_ptr.hpp
+//
+// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+// Copyright (c) 2001, 2002, 2003 Peter Dimov
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/smart_ptr/shared_ptr.htm for documentation.
+//
+
+#include <boost/config.hpp> // for broken compiler workarounds
+
+#if defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+#include <boost/detail/shared_ptr_nmt.hpp>
+#else
+
+#include <boost/assert.hpp>
+#include <boost/checked_delete.hpp>
+#include <boost/throw_exception.hpp>
+#include <boost/detail/shared_count.hpp>
+#include <boost/detail/workaround.hpp>
+
+#include <memory> // for std::auto_ptr
+#include <algorithm> // for std::swap
+#include <functional> // for std::less
+#include <typeinfo> // for std::bad_cast
+#include <iosfwd> // for std::basic_ostream
+
+#ifdef BOOST_MSVC // moved here to work around VC++ compiler crash
+# pragma warning(push)
+# pragma warning(disable:4284) // odd return type for operator->
+#endif
+
+namespace boost
+{
+
+template<class T> class weak_ptr;
+template<class T> class enable_shared_from_this;
+
+namespace detail
+{
+
+struct static_cast_tag {};
+struct const_cast_tag {};
+struct dynamic_cast_tag {};
+struct polymorphic_cast_tag {};
+
+template<class T> struct shared_ptr_traits
+{
+ typedef T & reference;
+};
+
+template<> struct shared_ptr_traits<void>
+{
+ typedef void reference;
+};
+
+#if !defined(BOOST_NO_CV_VOID_SPECIALIZATIONS)
+
+template<> struct shared_ptr_traits<void const>
+{
+ typedef void reference;
+};
+
+template<> struct shared_ptr_traits<void volatile>
+{
+ typedef void reference;
+};
+
+template<> struct shared_ptr_traits<void const volatile>
+{
+ typedef void reference;
+};
+
+#endif
+
+// enable_shared_from_this support
+
+template<class T, class Y> void sp_enable_shared_from_this( shared_count const & pn, boost::enable_shared_from_this<T> const * pe, Y const * px )
+{
+ if(pe != 0) pe->_internal_weak_this._internal_assign(const_cast<Y*>(px), pn);
+}
+
+inline void sp_enable_shared_from_this( shared_count const & /*pn*/, ... )
+{
+}
+
+} // namespace detail
+
+
+//
+// shared_ptr
+//
+// An enhanced relative of scoped_ptr with reference counted copy semantics.
+// The object pointed to is deleted when the last shared_ptr pointing to it
+// is destroyed or reset.
+//
+
+template<class T> class shared_ptr
+{
+private:
+
+ // Borland 5.5.1 specific workaround
+ typedef shared_ptr<T> this_type;
+
+public:
+
+ typedef T element_type;
+ typedef T value_type;
+ typedef T * pointer;
+ typedef typename detail::shared_ptr_traits<T>::reference reference;
+
+ shared_ptr(): px(0), pn() // never throws in 1.30+
+ {
+ }
+
+ template<class Y>
+ explicit shared_ptr(Y * p): px(p), pn(p, checked_deleter<Y>()) // Y must be complete
+ {
+ detail::sp_enable_shared_from_this( pn, p, p );
+ }
+
+ //
+ // Requirements: D's copy constructor must not throw
+ //
+ // shared_ptr will release p by calling d(p)
+ //
+
+ template<class Y, class D> shared_ptr(Y * p, D d): px(p), pn(p, d)
+ {
+ detail::sp_enable_shared_from_this( pn, p, p );
+ }
+
+// generated copy constructor, assignment, destructor are fine...
+
+// except that Borland C++ has a bug, and g++ with -Wsynth warns
+#if defined(__BORLANDC__) || defined(__GNUC__)
+
+ shared_ptr & operator=(shared_ptr const & r) // never throws
+ {
+ px = r.px;
+ pn = r.pn; // shared_count::op= doesn't throw
+ return *this;
+ }
+
+#endif
+
+ template<class Y>
+ explicit shared_ptr(weak_ptr<Y> const & r): pn(r.pn) // may throw
+ {
+ // it is now safe to copy r.px, as pn(r.pn) did not throw
+ px = r.px;
+ }
+
+ template<class Y>
+ shared_ptr(shared_ptr<Y> const & r): px(r.px), pn(r.pn) // never throws
+ {
+ }
+
+ template<class Y>
+ shared_ptr(shared_ptr<Y> const & r, detail::static_cast_tag): px(static_cast<element_type *>(r.px)), pn(r.pn)
+ {
+ }
+
+ template<class Y>
+ shared_ptr(shared_ptr<Y> const & r, detail::const_cast_tag): px(const_cast<element_type *>(r.px)), pn(r.pn)
+ {
+ }
+
+ template<class Y>
+ shared_ptr(shared_ptr<Y> const & r, detail::dynamic_cast_tag): px(dynamic_cast<element_type *>(r.px)), pn(r.pn)
+ {
+ if(px == 0) // need to allocate new counter -- the cast failed
+ {
+ pn = detail::shared_count();
+ }
+ }
+
+ template<class Y>
+ shared_ptr(shared_ptr<Y> const & r, detail::polymorphic_cast_tag): px(dynamic_cast<element_type *>(r.px)), pn(r.pn)
+ {
+ if(px == 0)
+ {
+ boost::throw_exception(std::bad_cast());
+ }
+ }
+
+#ifndef BOOST_NO_AUTO_PTR
+
+ template<class Y>
+ explicit shared_ptr(std::auto_ptr<Y> & r): px(r.get()), pn()
+ {
+ Y * tmp = r.get();
+ pn = detail::shared_count(r);
+ detail::sp_enable_shared_from_this( pn, tmp, tmp );
+ }
+
+#endif
+
+#if !defined(BOOST_MSVC) || (BOOST_MSVC > 1200)
+
+ template<class Y>
+ shared_ptr & operator=(shared_ptr<Y> const & r) // never throws
+ {
+ px = r.px;
+ pn = r.pn; // shared_count::op= doesn't throw
+ return *this;
+ }
+
+#endif
+
+#ifndef BOOST_NO_AUTO_PTR
+
+ template<class Y>
+ shared_ptr & operator=(std::auto_ptr<Y> & r)
+ {
+ this_type(r).swap(*this);
+ return *this;
+ }
+
+#endif
+
+ void reset() // never throws in 1.30+
+ {
+ this_type().swap(*this);
+ }
+
+ template<class Y> void reset(Y * p) // Y must be complete
+ {
+ BOOST_ASSERT(p == 0 || p != px); // catch self-reset errors
+ this_type(p).swap(*this);
+ }
+
+ template<class Y, class D> void reset(Y * p, D d)
+ {
+ this_type(p, d).swap(*this);
+ }
+
+ reference operator* () const // never throws
+ {
+ BOOST_ASSERT(px != 0);
+ return *px;
+ }
+
+ T * operator-> () const // never throws
+ {
+ BOOST_ASSERT(px != 0);
+ return px;
+ }
+
+ T * get() const // never throws
+ {
+ return px;
+ }
+
+ // implicit conversion to "bool"
+
+#if defined(__SUNPRO_CC) && BOOST_WORKAROUND(__SUNPRO_CC, <= 0x530)
+
+ operator bool () const
+ {
+ return px != 0;
+ }
+
+#elif defined(__MWERKS__) && BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003))
+ typedef T * (this_type::*unspecified_bool_type)() const;
+
+ operator unspecified_bool_type() const // never throws
+ {
+ return px == 0? 0: &this_type::get;
+ }
+
+#else
+
+ typedef T * this_type::*unspecified_bool_type;
+
+ operator unspecified_bool_type() const // never throws
+ {
+ return px == 0? 0: &this_type::px;
+ }
+
+#endif
+
+ // operator! is redundant, but some compilers need it
+
+ bool operator! () const // never throws
+ {
+ return px == 0;
+ }
+
+ bool unique() const // never throws
+ {
+ return pn.unique();
+ }
+
+ long use_count() const // never throws
+ {
+ return pn.use_count();
+ }
+
+ void swap(shared_ptr<T> & other) // never throws
+ {
+ std::swap(px, other.px);
+ pn.swap(other.pn);
+ }
+
+ template<class Y> bool _internal_less(shared_ptr<Y> const & rhs) const
+ {
+ return pn < rhs.pn;
+ }
+
+ void * _internal_get_deleter(std::type_info const & ti) const
+ {
+ return pn.get_deleter(ti);
+ }
+
+// Tasteless as this may seem, making all members public allows member templates
+// to work in the absence of member template friends. (Matthew Langston)
+
+#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+
+private:
+
+ template<class Y> friend class shared_ptr;
+ template<class Y> friend class weak_ptr;
+
+
+#endif
+
+ T * px; // contained pointer
+ detail::shared_count pn; // reference counter
+
+}; // shared_ptr
+
+template<class T, class U> inline bool operator==(shared_ptr<T> const & a, shared_ptr<U> const & b)
+{
+ return a.get() == b.get();
+}
+
+template<class T, class U> inline bool operator!=(shared_ptr<T> const & a, shared_ptr<U> const & b)
+{
+ return a.get() != b.get();
+}
+
+#if __GNUC__ == 2 && __GNUC_MINOR__ <= 96
+
+// Resolve the ambiguity between our op!= and the one in rel_ops
+
+template<class T> inline bool operator!=(shared_ptr<T> const & a, shared_ptr<T> const & b)
+{
+ return a.get() != b.get();
+}
+
+#endif
+
+template<class T, class U> inline bool operator<(shared_ptr<T> const & a, shared_ptr<U> const & b)
+{
+ return a._internal_less(b);
+}
+
+template<class T> inline void swap(shared_ptr<T> & a, shared_ptr<T> & b)
+{
+ a.swap(b);
+}
+
+template<class T, class U> shared_ptr<T> static_pointer_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::static_cast_tag());
+}
+
+template<class T, class U> shared_ptr<T> const_pointer_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::const_cast_tag());
+}
+
+template<class T, class U> shared_ptr<T> dynamic_pointer_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::dynamic_cast_tag());
+}
+
+// shared_*_cast names are deprecated. Use *_pointer_cast instead.
+
+template<class T, class U> shared_ptr<T> shared_static_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::static_cast_tag());
+}
+
+template<class T, class U> shared_ptr<T> shared_dynamic_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::dynamic_cast_tag());
+}
+
+template<class T, class U> shared_ptr<T> shared_polymorphic_cast(shared_ptr<U> const & r)
+{
+ return shared_ptr<T>(r, detail::polymorphic_cast_tag());
+}
+
+template<class T, class U> shared_ptr<T> shared_polymorphic_downcast(shared_ptr<U> const & r)
+{
+ BOOST_ASSERT(dynamic_cast<T *>(r.get()) == r.get());
+ return shared_static_cast<T>(r);
+}
+
+// get_pointer() enables boost::mem_fn to recognize shared_ptr
+
+template<class T> inline T * get_pointer(shared_ptr<T> const & p)
+{
+ return p.get();
+}
+
+// operator<<
+
+#if defined(__GNUC__) && (__GNUC__ < 3)
+
+template<class Y> std::ostream & operator<< (std::ostream & os, shared_ptr<Y> const & p)
+{
+ os << p.get();
+ return os;
+}
+
+#else
+
+# if defined(BOOST_MSVC) && BOOST_WORKAROUND(BOOST_MSVC, <= 1200 && __SGI_STL_PORT)
+// MSVC6 has problems finding std::basic_ostream through the using declaration in namespace _STL
+using std::basic_ostream;
+template<class E, class T, class Y> basic_ostream<E, T> & operator<< (basic_ostream<E, T> & os, shared_ptr<Y> const & p)
+# else
+template<class E, class T, class Y> std::basic_ostream<E, T> & operator<< (std::basic_ostream<E, T> & os, shared_ptr<Y> const & p)
+# endif
+{
+ os << p.get();
+ return os;
+}
+
+#endif
+
+// get_deleter (experimental)
+
+#if (defined(__GNUC__) && (__GNUC__ < 3)) || (defined(__EDG_VERSION__) && (__EDG_VERSION__ <= 238))
+
+// g++ 2.9x doesn't allow static_cast<X const *>(void *)
+// apparently EDG 2.38 also doesn't accept it
+
+template<class D, class T> D * get_deleter(shared_ptr<T> const & p)
+{
+ void const * q = p._internal_get_deleter(typeid(D));
+ return const_cast<D *>(static_cast<D const *>(q));
+}
+
+#else
+
+template<class D, class T> D * get_deleter(shared_ptr<T> const & p)
+{
+ return static_cast<D *>(p._internal_get_deleter(typeid(D)));
+}
+
+#endif
+
+} // namespace boost
+
+#ifdef BOOST_MSVC
+# pragma warning(pop)
+#endif
+
+#endif // #if defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+
+#endif // #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
diff --git a/indexlib/boost-compat/smart_ptr.hpp b/indexlib/boost-compat/smart_ptr.hpp
new file mode 100644
index 000000000..c10de4572
--- /dev/null
+++ b/indexlib/boost-compat/smart_ptr.hpp
@@ -0,0 +1,31 @@
+#ifndef LPC_SMART_PTR_HPP1119293317_INCLUDE_GUARD_
+#define LPC_SMART_PTR_HPP1119293317_INCLUDE_GUARD_
+
+//
+// smart_ptr.hpp
+//
+// For convenience, this header includes the rest of the smart
+// pointer library headers.
+//
+// Copyright (c) 2003 Peter Dimov Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// http://www.boost.org/libs/smart_ptr/smart_ptr.htm
+//
+
+#include <boost/config.hpp>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+
+#if !defined(BOOST_NO_MEMBER_TEMPLATES) || defined(BOOST_MSVC6_MEMBER_TEMPLATES)
+# include <boost/weak_ptr.hpp>
+# include <boost/intrusive_ptr.hpp>
+# include <boost/enable_shared_from_this.hpp>
+#endif
+
+
+#endif /* LPC_SMART_PTR_HPP1119293317_INCLUDE_GUARD_ */
diff --git a/indexlib/boost-compat/static_assert.hpp b/indexlib/boost-compat/static_assert.hpp
new file mode 100644
index 000000000..76de9c49b
--- /dev/null
+++ b/indexlib/boost-compat/static_assert.hpp
@@ -0,0 +1,11 @@
+#ifndef LPC_STATIC_ASSERT_HPP1119293317_INCLUDE_GUARD_
+#define LPC_STATIC_ASSERT_HPP1119293317_INCLUDE_GUARD_
+
+#ifdef HAVE_BOOST
+#include <boost/static_assert.hpp>
+#elif !defined( BOOST_STATIC_ASSERT )
+#define BOOST_STATIC_ASSERT( x )
+#endif
+
+
+#endif /* LPC_STATIC_ASSERT_HPP1119293317_INCLUDE_GUARD_ */
diff --git a/indexlib/boost-compat/weak_ptr.hpp b/indexlib/boost-compat/weak_ptr.hpp
new file mode 100644
index 000000000..c23850079
--- /dev/null
+++ b/indexlib/boost-compat/weak_ptr.hpp
@@ -0,0 +1,192 @@
+#ifndef BOOST_WEAK_PTR_HPP_INCLUDED
+#define BOOST_WEAK_PTR_HPP_INCLUDED
+
+//
+// weak_ptr.hpp
+//
+// Copyright (c) 2001, 2002, 2003 Peter Dimov
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// See http://www.boost.org/libs/smart_ptr/weak_ptr.htm for documentation.
+//
+
+#include <boost/shared_ptr.hpp>
+
+#ifdef BOOST_MSVC // moved here to work around VC++ compiler crash
+# pragma warning(push)
+# pragma warning(disable:4284) // odd return type for operator->
+#endif
+
+namespace boost
+{
+
+template<class T> class weak_ptr
+{
+private:
+
+ // Borland 5.5.1 specific workarounds
+ typedef weak_ptr<T> this_type;
+
+public:
+
+ typedef T element_type;
+
+ weak_ptr(): px(0), pn() // never throws in 1.30+
+ {
+ }
+
+// generated copy constructor, assignment, destructor are fine
+
+
+//
+// The "obvious" converting constructor implementation:
+//
+// template<class Y>
+// weak_ptr(weak_ptr<Y> const & r): px(r.px), pn(r.pn) // never throws
+// {
+// }
+//
+// has a serious problem.
+//
+// r.px may already have been invalidated. The px(r.px)
+// conversion may require access to *r.px (virtual inheritance).
+//
+// It is not possible to avoid spurious access violations since
+// in multithreaded programs r.px may be invalidated at any point.
+//
+
+ template<class Y>
+ weak_ptr(weak_ptr<Y> const & r): pn(r.pn) // never throws
+ {
+ px = r.lock().get();
+ }
+
+ template<class Y>
+ weak_ptr(shared_ptr<Y> const & r): px(r.px), pn(r.pn) // never throws
+ {
+ }
+
+#if !defined(BOOST_MSVC) || (BOOST_MSVC > 1200)
+
+ template<class Y>
+ weak_ptr & operator=(weak_ptr<Y> const & r) // never throws
+ {
+ px = r.lock().get();
+ pn = r.pn;
+ return *this;
+ }
+
+ template<class Y>
+ weak_ptr & operator=(shared_ptr<Y> const & r) // never throws
+ {
+ px = r.px;
+ pn = r.pn;
+ return *this;
+ }
+
+#endif
+
+ shared_ptr<T> lock() const // never throws
+ {
+#if defined(BOOST_HAS_THREADS)
+
+ // optimization: avoid throw overhead
+ if(expired())
+ {
+ return shared_ptr<element_type>();
+ }
+
+ try
+ {
+ return shared_ptr<element_type>(*this);
+ }
+ catch(bad_weak_ptr const &)
+ {
+ // Q: how can we get here?
+ // A: another thread may have invalidated r after the use_count test above.
+ return shared_ptr<element_type>();
+ }
+
+#else
+
+ // optimization: avoid try/catch overhead when single threaded
+ return expired()? shared_ptr<element_type>(): shared_ptr<element_type>(*this);
+
+#endif
+ }
+
+ long use_count() const // never throws
+ {
+ return pn.use_count();
+ }
+
+ bool expired() const // never throws
+ {
+ return pn.use_count() == 0;
+ }
+
+ void reset() // never throws in 1.30+
+ {
+ this_type().swap(*this);
+ }
+
+ void swap(this_type & other) // never throws
+ {
+ std::swap(px, other.px);
+ pn.swap(other.pn);
+ }
+
+ void _internal_assign(T * px2, detail::shared_count const & pn2)
+ {
+ px = px2;
+ pn = pn2;
+ }
+
+ template<class Y> bool _internal_less(weak_ptr<Y> const & rhs) const
+ {
+ return pn < rhs.pn;
+ }
+
+// Tasteless as this may seem, making all members public allows member templates
+// to work in the absence of member template friends. (Matthew Langston)
+
+#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS
+
+private:
+
+ template<class Y> friend class weak_ptr;
+ template<class Y> friend class shared_ptr;
+
+#endif
+
+ T * px; // contained pointer
+ detail::weak_count pn; // reference counter
+
+}; // weak_ptr
+
+template<class T, class U> inline bool operator<(weak_ptr<T> const & a, weak_ptr<U> const & b)
+{
+ return a._internal_less(b);
+}
+
+template<class T> void swap(weak_ptr<T> & a, weak_ptr<T> & b)
+{
+ a.swap(b);
+}
+
+// deprecated, provided for backward compatibility
+template<class T> shared_ptr<T> make_shared(weak_ptr<T> const & r)
+{
+ return r.lock();
+}
+
+} // namespace boost
+
+#ifdef BOOST_MSVC
+# pragma warning(pop)
+#endif
+
+#endif // #ifndef BOOST_WEAK_PTR_HPP_INCLUDED
diff --git a/indexlib/compat.h b/indexlib/compat.h
new file mode 100644
index 000000000..f7c1c2702
--- /dev/null
+++ b/indexlib/compat.h
@@ -0,0 +1,58 @@
+#ifndef LPC_COMPAT_H_INCLUDE_GUARD_
+#define LPC_COMPAT_H_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Leo Savernik <l.savernik@aon.at>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifndef HAVE_STLNAMESPACE
+# include <stddef.h>
+// fake std::iterator
+namespace std {
+template <class _Category, class _Tp, class _Distance = ptrdiff_t,
+ class _Pointer = _Tp*, class _Reference = _Tp&>
+struct iterator {
+ typedef _Category iterator_category;
+ typedef _Tp value_type;
+ typedef _Distance difference_type;
+ typedef _Pointer pointer;
+ typedef _Reference reference;
+};
+}
+#endif
+
+/* compatibility replacements for functions not defined in older libstdc++ */
+template<class T> T kMin(const T &a, const T &b) { return a < b ? a : b; }
+template<class T> T kMax(const T &a, const T &b) { return a > b ? a : b; }
+
+#endif /* LPC_COMPAT_H_INCLUDE_GUARD_ */
diff --git a/indexlib/compressed.cpp b/indexlib/compressed.cpp
new file mode 100644
index 000000000..fdcc0219b
--- /dev/null
+++ b/indexlib/compressed.cpp
@@ -0,0 +1,185 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "compressed.h"
+#include "logfile.h"
+#include "bitio.h"
+#include "path.h"
+#include "format.h"
+#include <zlib.h>
+#include <cassert>
+#include <cstring>
+#include <iostream>
+
+namespace {
+ memory_manager* cmanager = 0;
+}
+
+void set_compression_manager( memory_manager* m ) {
+ cmanager = m;
+}
+memory_manager* get_comp_p() {
+ return cmanager;
+}
+
+
+compressed_file::compressed_file( std::string base ):
+ auxdata_( path_concat( base, "table" ) ),
+ data_( std::auto_ptr<memory_manager>( new mmap_manager( path_concat( base, "data" ) ) ) )
+{
+ if ( auxdata_.empty() ) auxdata_.push_back( 0 );
+}
+
+compressed_file::~compressed_file() {
+ write_back();
+}
+
+const unsigned char* compressed_file::ronly_base( const unsigned idx ) const {
+ logfile() << format( "%s ( addr: %s ) page: %s \n" ) % __PRETTY_FUNCTION__ % idx % pageidx( idx );
+ assert( idx < size() );
+ if ( pages_.size() <= pageidx( idx ) ) pages_.resize( pageidx( idx ) + 1 );
+ if ( !pages_[ pageidx( idx ) ] ) {
+ logfile() << format( "%s allocating a page %s\n" ) % __PRETTY_FUNCTION__ % pageidx( idx );
+ page* p = new page( true );
+ pages_[ pageidx( idx ) ] = p;
+ z_stream stream;
+ stream.zalloc = 0;
+ stream.zfree = 0;
+ stream.opaque = 0;
+ zlibcheck( inflateInit( &stream ) );
+ p->origin_ = compressed_pageptr::cast_from_uint32( compressed_data_for( pageidx( idx ) ) );
+ unsigned char* source = reinterpret_cast<unsigned char*>( p->origin_->data() );
+ stream.next_in = source;
+ stream.avail_in = ( 1 << p->origin_->capacity() ) - compressed_page::header_size;
+ stream.next_out = p->data_;
+ stream.avail_out = page_bytes + 1;
+ zlibcheck( inflate( &stream, Z_FINISH ), Z_STREAM_END );
+ //assert( !stream.avail_out );
+ zlibcheck( inflateEnd( &stream ) );
+ }
+ return & ( pages_[ pageidx( idx ) ]->data_[ inpageidx( idx ) ] );
+}
+
+void compressed_file::print( std::ostream& out ) const {
+ data_.print( out );
+}
+
+void compressed_file::remove( std::string base ) {
+ memvector<uint32_t>::remove( path_concat( base, "table" ) );
+ mmap_manager::remove( path_concat( base, "data" ) );
+}
+
+
+unsigned compressed_file::size() const {
+ return auxdata_[ 0 ];
+}
+
+void compressed_file::resize( const unsigned n_s ) {
+ logfile() << format( "%s ( %s )\n" ) % __PRETTY_FUNCTION__ % n_s;
+ if ( n_s <= size() ) return;
+ unsigned curpages = size() >> page_bits;
+ assert( !( size() % page_bytes ) );
+ const unsigned targetpages = ( n_s >> page_bits ) + bool( n_s % page_bytes );
+ auxdata_[ 0 ] = targetpages << page_bits;
+ logfile() << format( "Size set to %s (pages = %s, n_s = %s ) \n" ) % size() % targetpages % n_s;
+ auxdata_.resize( targetpages + 1 ); // 1 for the auxdata_[0]
+
+ static unsigned char empty[ page_bytes ] = { 0 };
+ unsigned char empty_compress[ compress_buffer_size ];
+ uLongf size = compress_buffer_size;
+ zlibcheck( compress( empty_compress, &size, empty, page_bytes ) );
+ while ( curpages < targetpages ) {
+ compressed_pageptr p = data_.allocate( size + compressed_page::header_size );
+ compressed_page::init( p );
+ p->grow_to_size( size );
+ std::memcpy( p->data(), empty_compress, size );
+ compressed_data_for ( curpages ) = p.cast_to_uint32();
+ ++curpages;
+ }
+}
+
+unsigned char* compressed_file::rw_base( unsigned idx ) const {
+ const unsigned char* res = ronly_base( idx );
+ pages_[ pageidx( idx ) ]->dirty_ = true;
+ logfile() << format( "rw_base( %s ), pageidx=%s returning %s\n" ) % idx % pageidx( idx ) % ( void* )res;
+ return const_cast<unsigned char*>( res );
+}
+
+void compressed_file::write_back() {
+ logfile() <<format( "%s\n" ) % __PRETTY_FUNCTION__;
+ for ( unsigned pagei = 0, size = pages_.size(); pagei != size; ++ pagei ) {
+ page* p = pages_[ pagei ];
+ if ( p && p->dirty_ ) {
+ unsigned char buffer[ compress_buffer_size ];
+ uLongf size = compress_buffer_size;
+ zlibcheck( compress( buffer, &size, p->data_, page_bytes ) );
+ {
+
+ unsigned char buffer2[ 4096 ] = {0};
+ z_stream stream;
+ stream.zalloc = 0;
+ stream.zfree = 0;
+ stream.opaque = 0;
+ zlibcheck( inflateInit( &stream ) );
+ stream.next_in = buffer;
+ stream.avail_in = size;
+ stream.next_out = buffer2;
+ stream.avail_out = page_bytes + 1;
+ zlibcheck( inflate( &stream, Z_FINISH ), Z_STREAM_END );
+ //assert( !stream.avail_out );
+ zlibcheck( inflateEnd( &stream ) );
+
+ assert( !memcmp( buffer2, p->data_, 4096 ) );
+ }
+
+ logfile() << format( "Compressed page %s to size %s\n" ) % pagei % size;
+ unsigned original = ( 1 << p->origin_->capacity() ) - compressed_page::header_size;
+ if ( size > original ) {
+ p->origin_ = data_.reallocate( p->origin_, size + compressed_page::header_size );
+ p->origin_->grow_to_size( size );
+ }
+ std::memcpy( p->origin_->data(), buffer, size );
+ compressed_data_for( pagei ) = p->origin_.cast_to_uint32();
+ } else {
+ logfile() << format( "write_back() not saving %s\n" ) % pagei;
+ }
+ }
+}
+
+
+void compressed_file::zlibcheck( int err, int expected ) const {
+ if ( err != expected ) {
+ std::cerr << "zlib reports an error: " << err << std::endl;
+ // throw compressed_file::zlib_exception( err );
+ abort();
+ }
+}
+
diff --git a/indexlib/compressed.h b/indexlib/compressed.h
new file mode 100644
index 000000000..7397c8215
--- /dev/null
+++ b/indexlib/compressed.h
@@ -0,0 +1,127 @@
+#ifndef LPC_COMPRESSED_H1108569807_INCLUDE_GUARD_
+#define LPC_COMPRESSED_H1108569807_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <iostream>
+#include "memvector.h"
+#include "manager.h"
+#include "mempool.h"
+#include "thing.h"
+#include "pointer.h"
+#include "format.h"
+
+void set_compression_manager( memory_manager* );
+memory_manager* get_comp_p();
+
+START_THING( compressed_page, thing<thing_manager<get_comp_p> > )
+ public: // private:
+ MEMBER( uint8_t, capacity, 0 )
+ enum { header_size = 1 };
+ MY_BASE( header_size )
+ public:
+ static void init( pointer<compressed_page> p ) {
+ p->set_capacity( 4 );
+ }
+ void * data() { return my_base(); }
+ const void * data() const { return my_base(); }
+ void grow_to_size( const unsigned size ) {
+ unsigned cur = ( 1 << capacity() ) - header_size;
+ if ( size <= cur ) return;
+ set_capacity( capacity() + 1 );
+ grow_to_size( size );
+ }
+ void print( std::ostream& out ) {
+ out << format( "\tsized %s\n" ) % ( 1 << capacity() );
+ }
+END_THING( compressed_page )
+DO_POINTER_SPECS( compressed_page )
+
+struct compressed_page_traits {
+ typedef compressed_page value_type;
+ typedef compressed_pageptr pointer;
+
+ static bool is_free( pointer p ) { return p->capacity() == 0; }
+ static void mark_free( pointer p ) { p->set_capacity( 0 ); }
+ static unsigned size_of( pointer p ) { return 1 << p->capacity(); }
+
+ static unsigned type_offset() { return compressed_page::header_size; }
+
+ static unsigned min_size() { return 16; } // zlib needs minimum 12 bytes
+ static unsigned max_size() { return 1024 * 4; }
+
+ static void set_manager( memory_manager* m ) { set_compression_manager( m ); }
+ static void print( std::ostream& out, pointer p ) { out << '[' << p << "] compressed_page:\n"; p->print( out ); }
+};
+
+class compressed_file : public memory_manager {
+ struct page;
+ friend struct page;
+ public:
+ compressed_file( std::string );
+ ~compressed_file();
+ const unsigned char* ronly_base( unsigned idx ) const;
+ unsigned char* rw_base( unsigned idx ) const;
+ unsigned size() const;
+ void resize( unsigned );
+ void print( std::ostream& ) const;
+ static void remove( std::string base );
+ private:
+
+ void write_back();
+ void zlibchecktrue( bool ) const;
+ void zlibcheck( int err, int expected = 0 ) const;
+
+ enum { page_bits = 12, page_bytes = 1 << page_bits, compress_buffer_size = page_bytes + 12 + page_bytes/50 };
+ static unsigned pageidx( unsigned idx ) { return idx >> page_bits; }
+ static unsigned inpageidx( unsigned idx ) { return idx & ( ( 1 << page_bits ) - 1 ); }
+ struct page {
+ explicit page( bool d = false ):
+ dirty_( d ) {
+ }
+ compressed_pageptr origin_;
+ bool dirty_;
+ unsigned char data_[ compressed_file::page_bytes ];
+ };
+ mutable std::vector<page*> pages_;
+ mutable memvector<uint32_t> auxdata_;
+ memory_reference<uint32_t> compressed_data_for( unsigned idx ) const {
+ return auxdata_[ idx + 1 ];
+ }
+ mempool<compressed_page_traits> data_;
+};
+
+
+
+#endif /* LPC_COMPRESSED_H1108569807_INCLUDE_GUARD_ */
diff --git a/indexlib/configure.in.in b/indexlib/configure.in.in
new file mode 100644
index 000000000..1f1a806a4
--- /dev/null
+++ b/indexlib/configure.in.in
@@ -0,0 +1,22 @@
+dnl The line below isn't really a comment, so don't remove
+dnl AC_OUTPUT(indexlib/indexlib-config)
+
+AC_ARG_WITH(boost-includedir,
+ AC_HELP_STRING([--with-boost-includedir],
+ [Specify the path to boost's headers]),
+ boost_includedir="$withval")
+
+AC_LANG_PUSH(C++)
+if test "x$boost_includedir" != "x"; then
+ CXXFLAGS="-I$boost_includedir $CXXFLAGS"
+fi
+
+KDE_CHECK_HEADER(boost/format.hpp, have_boost=yes,have_boost=no)
+
+AC_LANG_POP
+
+if test "x$have_boost" = "xyes"; then
+ CXXFLAGS="-DHAVE_BOOST $CXXFLAGS"
+fi
+
+ \ No newline at end of file
diff --git a/indexlib/create.cpp b/indexlib/create.cpp
new file mode 100644
index 000000000..1020cc268
--- /dev/null
+++ b/indexlib/create.cpp
@@ -0,0 +1,116 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "create.h"
+#include "quotes.h"
+#include "path.h"
+#include "version.h"
+#include <fstream>
+#include <unistd.h>
+
+namespace {
+
+indexlib::index_type::type type_of( const char* basename ) {
+ std::ifstream info( path_concat( basename, "info" ).c_str() );
+ if ( !info ) return indexlib::index_type::none;
+ std::string type;
+ std::string marker;
+ std::string ver;
+ int major, minor;
+ char sep;
+ std::getline( info, marker );
+ info >> ver >> major >> sep >> minor;
+ info >> type;
+ if ( !info ) return indexlib::index_type::none;
+ if ( type == "quotes" ) return indexlib::index_type::quotes;
+ if ( type == "ifile" ) return indexlib::index_type::ifile;
+ return indexlib::index_type::none;
+}
+}
+
+std::auto_ptr<indexlib::index> indexlib::create( const char* basename, indexlib::index_type::type flags ) {
+ using namespace indexlib::version;
+ if ( type_of( basename ) != indexlib::index_type::none ) return std::auto_ptr<indexlib::index>( 0 );
+ try {
+ if ( basename[ strlen( basename ) - 1 ] == '/' && !isdir( basename ) ) {
+ if ( !indexlib::detail::mkdir_trailing( basename ) ) return std::auto_ptr<indexlib::index>( 0 );
+ }
+ std::ofstream info( path_concat( basename, "info" ).c_str() );
+ info << marker << std::endl;
+ info << "version " << major << '.' << minor << "\n";
+ if ( flags == index_type::quotes ) {
+ info << "quotes" << std::endl;
+ return std::auto_ptr<indexlib::index>( new quotes( basename ) );
+ }
+ if ( flags == index_type::ifile ) {
+ info << "ifile" << std::endl;
+ return std::auto_ptr<indexlib::index>( new ifile( basename ) );
+ }
+ } catch ( const std::exception& e ) {
+ std::cerr << "index creation failed: " << e.what() << std::endl;
+ }
+ return std::auto_ptr<indexlib::index>( 0 );
+}
+
+std::auto_ptr<indexlib::index> indexlib::open( const char* basename, unsigned flags ) {
+ using namespace indexlib;
+ switch ( type_of( basename ) ) {
+ case index_type::ifile: return std::auto_ptr<indexlib::index>( new ifile( basename ) );
+ case index_type::quotes: return std::auto_ptr<indexlib::index>( new quotes( basename ) );
+ case index_type::none:
+ if ( flags == open_flags::fail_if_nonexistant ) return std::auto_ptr<indexlib::index>();
+ return create( basename, index_type::type( flags ) );
+ }
+ logfile() << format( "%s:%s: Unexpected code reached!\n" ) % __FILE__ % __LINE__;
+ return std::auto_ptr<indexlib::index>( 0 );
+}
+
+bool indexlib::exists( const char* basename ) {
+ return basename && ( type_of( basename ) == indexlib::index_type::none );
+}
+
+void indexlib::remove( const char* basename ) {
+ assert( basename );
+ if ( !basename ) return;
+
+ using namespace indexlib;
+ switch ( type_of( basename ) ) {
+ case index_type::ifile: ifile::remove( basename );
+ break;
+ case index_type::quotes: quotes::remove( basename );
+ break;
+ case index_type::none: /* do nothing */;
+ }
+ ::unlink( path_concat( basename, "info" ).c_str() );
+}
+
+
+
diff --git a/indexlib/create.h b/indexlib/create.h
new file mode 100644
index 000000000..6a495e984
--- /dev/null
+++ b/indexlib/create.h
@@ -0,0 +1,81 @@
+#ifndef LPC_CREATE_H1118420718_INCLUDE_GUARD_
+#define LPC_CREATE_H1118420718_INCLUDE_GUARD_
+
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "index.h"
+#include <memory>
+
+
+namespace indexlib {
+ namespace index_type {
+ enum type {
+ none,
+ ifile = 1,
+ quotes = 2
+ };
+ }
+ /**
+ * Construct an index from a basename.
+ *
+ * Basename can be either a directory in which case the index will consist of a bunch of
+ * files in that directory all starting with "index" (this is the prefered usage: an index is a directory)
+ * or it can be any other partial file name in which casethis name will be the starting name of all the
+ * index files.
+ *
+ * If basename ends with '/' a directory is created if it does not exist.
+ *
+ * This will return something like "new quotes(basename)" but by using this, you do not need to include quotes.h
+ * which needs boost headers also.
+ */
+ std::auto_ptr<index> create( const char* basename, index_type::type flags = index_type::quotes );
+ namespace open_flags {
+ enum type { none = 0,
+ create_ifile = index_type::ifile,
+ create_quotes = index_type::quotes,
+ fail_if_nonexistant };
+ }
+ std::auto_ptr<index> open( const char* basename, unsigned flags = open_flags::fail_if_nonexistant );
+
+ /**
+ * Removes the index.
+ */
+ void remove( const char* basename );
+ /**
+ * Returns true if an index with that name exists, false otherwise
+ */
+ bool exists( const char* basename );
+}
+
+
+
+#endif /* LPC_CREATE_H1118420718_INCLUDE_GUARD_ */
diff --git a/indexlib/docs/report.pdf b/indexlib/docs/report.pdf
new file mode 100644
index 000000000..f55921cde
--- /dev/null
+++ b/indexlib/docs/report.pdf
Binary files differ
diff --git a/indexlib/exception.cpp b/indexlib/exception.cpp
new file mode 100644
index 000000000..d638a985b
--- /dev/null
+++ b/indexlib/exception.cpp
@@ -0,0 +1,47 @@
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+#include "exception.h"
+#include <string.h>
+#include <errno.h>
+
+namespace {
+ std::string do_errno_string( std::string s ) {
+ std::string res = "error: " + s + "[";
+ res += strerror( errno );
+ res += "]";
+ return res;
+ }
+}
+
+indexlib::detail::errno_error::errno_error( std::string s ):
+ indexlib::detail::exception( do_errno_string( s ) )
+{
+}
+
diff --git a/indexlib/exception.h b/indexlib/exception.h
new file mode 100644
index 000000000..4592283f9
--- /dev/null
+++ b/indexlib/exception.h
@@ -0,0 +1,63 @@
+#ifndef LPC_EXCEPTION_H1123185561_INCLUDE_GUARD_
+#define LPC_EXCEPTION_H1123185561_INCLUDE_GUARD_
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <exception>
+#include <string>
+
+namespace indexlib { namespace detail {
+/**
+ * \brief Base class for indexlib exceptions
+ */
+class exception : public std::exception {
+ public:
+ ~exception() throw() { }
+ exception( std::string m ):
+ msg_( m )
+ {
+ }
+ virtual const char* what() const throw() {
+ return msg_.c_str();
+ }
+ private:
+ std::string msg_;
+};
+class errno_error : public exception {
+ public:
+ ~errno_error() throw() { }
+ errno_error( std::string );
+};
+
+}}
+
+
+
+#endif /* LPC_EXCEPTION_H1123185561_INCLUDE_GUARD_ */
diff --git a/indexlib/format.h b/indexlib/format.h
new file mode 100644
index 000000000..5176044a8
--- /dev/null
+++ b/indexlib/format.h
@@ -0,0 +1,26 @@
+#ifndef LPC_FORMAT_H1119018900_INCLUDE_GUARD_
+#define LPC_FORMAT_H1119018900_INCLUDE_GUARD_
+
+#if defined( HAVE_BOOST ) && defined( DEBUG )
+
+#include <boost/format.hpp>
+
+typedef boost::format format;
+#else
+
+struct null_format {
+ explicit null_format( const char* ) { }
+
+ template <typename T>
+ null_format& operator % ( const T& ) { return *this; }
+};
+
+template <typename OutStream>
+inline OutStream& operator << ( OutStream& out, const null_format& ) { return out; }
+
+typedef null_format format;
+
+#endif
+
+#endif /* LPC_FORMAT_H1119018900_INCLUDE_GUARD_ */
+
diff --git a/indexlib/ifile.cpp b/indexlib/ifile.cpp
new file mode 100644
index 000000000..ba5d7c3f8
--- /dev/null
+++ b/indexlib/ifile.cpp
@@ -0,0 +1,177 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "ifile.h"
+#include "logfile.h"
+#include "path.h"
+#include "result.h"
+#include <algorithm>
+#include <iterator>
+#include <set>
+#include <functional>
+#include <string.h>
+#include "format.h"
+#include "boost-compat/next_prior.hpp"
+
+
+ifile::ifile( std::string name ):
+ docnames_( path_concat( name, "docnames" ) ),
+ words_( path_concat( name, "words" ) ),
+ stopwords_( path_concat( name, "stopwords" ) ),
+ files_( path_concat( name, "files" ) ),
+ tokenizer_( indexlib::detail::get_tokenizer( "latin-1:european" ) )
+{
+ //logfile() << format( "ifile::ifile( \"%s\" )\n" ) % name;
+}
+
+void ifile::remove( std::string name ) {
+ stringarray::remove( path_concat( name, "docnames" ) );
+ stringset::remove( path_concat( name, "words" ) );
+ stringset::remove( path_concat( name, "stopwords" ) );
+ leafdatavector::remove( path_concat( name, "files" ) );
+}
+
+void ifile::add( const char* str, const char* doc ) {
+ using namespace boost;
+ //logfile() << format( "ifile::add( %s, %s )\n" ) % str % doc;
+ const unsigned docidx = docnames_.add( doc );
+ files_.resize( docidx + 1 );
+ std::vector<std::string> words = break_clean( str );
+ for ( std::vector<std::string>::const_iterator first = words.begin(), past = words.end(); first != past; ++first ) {
+ files_.add( words_.add( first->c_str() ) , docidx );
+ }
+}
+
+void ifile::remove_doc( const char* doc ) {
+ //logfile() << format( "%s( %s )\n" ) % __PRETTY_FUNCTION__ % doc;
+ unsigned idx;
+ for ( idx = 0; idx != ndocs(); ++idx ) {
+ if ( lookup_docname( idx ) == doc ) break;
+ }
+ if ( idx == ndocs() ) return;
+ //logfile() << format( "Removing %s\n" ) % idx;
+ docnames_.erase( idx );
+ files_.remove_references_to( idx );
+ // TODO: remove from words_ too if that's the case
+}
+
+std::auto_ptr<indexlib::result> ifile::everything() const {
+ std::vector<unsigned> res( ndocs() );
+ for ( unsigned i = 0; i != ndocs(); ++i ) res[ i ] = i;
+ return std::auto_ptr<indexlib::result>( new indexlib::detail::simple_result( res ) );
+}
+
+namespace {
+inline
+bool word_too_small( std::string str ) { return str.size() < 3; }
+}
+
+std::auto_ptr<indexlib::result> ifile::search( const char* str ) const {
+ using namespace indexlib::detail;
+ using indexlib::result;
+ assert( str );
+ if ( !*str ) return everything();
+ std::vector<std::string> words = break_clean( str );
+ if ( words.empty() ) return std::auto_ptr<result>( new empty_result );
+ words.erase( std::remove_if( words.begin(), words.end(), &word_too_small ), words.end() );
+ if ( words.empty() ) return everything();
+ std::set<unsigned> values = find_word( words[ 0 ] );
+ for ( std::vector<std::string>::const_iterator first = boost::next( words.begin() ), past = words.end();
+ first != past;
+ ++first ) {
+ std::set<unsigned> now = find_word( *first );
+ // merge the two
+ std::set<unsigned> next;
+ std::set_intersection( now.begin(), now.end(), values.begin(), values.end(), std::inserter( next, next.begin() ) );
+ next.swap( values );
+ }
+ std::auto_ptr<result> r(new simple_result( std::vector<unsigned>( values.begin(), values.end() ) ) );
+ return r;
+}
+
+void ifile::maintenance() {
+ //logfile() << __PRETTY_FUNCTION__ << '\n';
+ calc_stopwords();
+}
+
+void ifile::calc_stopwords() {
+ //logfile() << __PRETTY_FUNCTION__ << '\n';
+ const unsigned needed = ndocs() / 4;
+ stopwords_.clear();
+ for ( stringset::const_iterator word = words_.begin(), past = words_.end(); word != past; ++word ) {
+ logfile() << format( "%s(): \"%s\" %s\n" )
+ % __PRETTY_FUNCTION__
+ % *word
+ % files_.get( word.id() ).size();
+ if ( files_.get( word.id() ).size() >= needed ) {
+ stopwords_.add( *word );
+ //files_.erase( word.id() );
+ }
+ }
+}
+
+bool ifile::is_stop_word( std::string str ) const {
+ return stopwords_.count( str.c_str() );
+}
+
+bool ifile::invalid_word( std::string str ) {
+ return str.find_first_of( "0123456789" ) != std::string::npos || str.size() > 32;
+}
+
+
+
+std::set<unsigned> ifile::find_word( std::string word ) const {
+ //logfile() << format( "ifile::find_word( \"%s\" ): " ) % word;
+
+ std::set<unsigned> res;
+ for ( std::pair<stringset::const_iterator,stringset::const_iterator> limits = words_.upper_lower( word.c_str() );
+ limits.first != limits.second; ++limits.first) {
+ std::vector<unsigned> here = files_.get( limits.first.id() );
+ //logfile() << format( "in ifile::search( \"%s\" ) seeing %s.\n" ) % word % limits.first.id();
+ //std::copy( here.begin(), here.end(), std::ostream_iterator<unsigned>( logfile(), " - " ) );
+ //logfile() << "\n";
+ res.insert( here.begin(), here.end() );
+ }
+ //logfile() << format( "%s docs found.\n" ) % res.size();
+ return res;
+}
+
+std::vector<std::string> ifile::break_clean( const char* complete ) const {
+ std::vector<std::string> words = tokenizer_->string_to_words( complete );
+ std::sort( words.begin(), words.end() );
+ words.erase( std::unique( words.begin(), words.end() ), words.end() );
+ words.erase( std::remove_if( words.begin(), words.end(), &ifile::invalid_word ), words.end() );
+ words.erase( std::remove_if( words.begin(), words.end(), std::bind1st( std::mem_fun( &ifile::is_stop_word ), this ) ), words.end() );
+ return words;
+}
+
+
+
diff --git a/indexlib/ifile.h b/indexlib/ifile.h
new file mode 100644
index 000000000..db6cef90d
--- /dev/null
+++ b/indexlib/ifile.h
@@ -0,0 +1,74 @@
+#ifndef _IFILE_INCLUDE_GUARD_LPC_56465465798732
+#define _IFILE_INCLUDE_GUARD_LPC_56465465798732
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "index.h"
+#include "stringarray.h"
+#include "stringset.h"
+#include "leafdatavector.h"
+#include "tokenizer.h"
+#include <vector>
+#include <set>
+#include <string>
+
+struct ifile : public indexlib::index {
+ public:
+ ifile( std::string );
+ virtual void add( const char* str, const char* doc );
+ virtual std::auto_ptr<indexlib::result> search( const char* ) const;
+
+ virtual unsigned ndocs() const { return docnames_.size(); }
+ virtual std::string lookup_docname( unsigned idx ) const { return docnames_.get( idx ); }
+
+ virtual void remove_doc( const char* doc );
+
+ void maintenance();
+ static void remove( std::string );
+ private:
+ std::set<unsigned> find_word( std::string ) const;
+ std::vector<std::string> break_clean( const char* ) const;
+ virtual std::auto_ptr<indexlib::result> everything() const;
+
+ static bool invalid_word( std::string );
+ bool is_stop_word( std::string ) const;
+
+ void calc_stopwords();
+
+ stringarray docnames_;
+ stringset words_;
+ stringset stopwords_;
+ leafdatavector files_;
+ std::auto_ptr<indexlib::detail::tokenizer> tokenizer_;
+};
+
+#endif /* _IFILE_INCLUDE_GUARD_LPC_56465465798732 */
+
diff --git a/indexlib/index.h b/indexlib/index.h
new file mode 100644
index 000000000..26135b9f6
--- /dev/null
+++ b/indexlib/index.h
@@ -0,0 +1,114 @@
+#ifndef LPC_INDEX_H1105549284_INCLUDE_GUARD_
+#define LPC_INDEX_H1105549284_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <vector>
+#include <string>
+#include <memory>
+
+namespace indexlib {
+/**
+ * Represents a result set
+ *
+ * The reason we do not return directly the answer set is twofold ( though connected )
+ *
+ * - We may decide to partially compute the set in the result list() method
+ * - The possibility of implementing result_type::search to more efficiently handle repeated searches
+ */
+struct result {
+ public:
+ virtual ~result() { }
+ /**
+ * The answer set
+ */
+ virtual std::vector<unsigned> list() const = 0;
+ /**
+ * A new search.
+ *
+ * The reason for this function is that if the user is typing and we do repeated searches for
+ * "s", "st", "str", "stri" ... "string" we may (depending on the particular implementation)
+ * reuse the previous results for a speedup.
+ *
+ * @return null if the type does not support this or the particular search string makes it impossible to
+ * fulfill the search request.
+ */
+ virtual std::auto_ptr<result> search( const char* ) = 0;
+};
+
+struct index {
+ public:
+ virtual ~index() { }
+ /**
+ * Adds the document \param str under the name \param docname
+ */
+ virtual void add( const char* str, const char* docname ) = 0;
+ /**
+ * \see add( const char*, const char* );
+ */
+ virtual void add( std::string s, std::string d ) { add( s.c_str(), d.c_str() ); }
+
+ /**
+ * Removes the doc known as docname.
+ *
+ * In most implementations of index, this will probably be implemented by marking
+ * the document as deleted in some way and maintenance() will need to be called later.
+ */
+ virtual void remove_doc( const char* doc ) = 0;
+ void remove_doc( std::string doc ) { remove_doc( doc.c_str() ); }
+
+ /**
+ * Perform any maintenance tasks necessary.
+ *
+ * Should be called every so often, but it can take some time to perform these operations,
+ * so call them when the app is idle.
+ */
+ virtual void maintenance() { }
+
+ /**
+ * Returns all documents matching \param pattern.
+ */
+ virtual std::auto_ptr<result> search( const char* pattern ) const = 0;
+
+ /**
+ * Returns the number of docs indexed.
+ */
+ virtual unsigned ndocs() const = 0;
+ /**
+ * Since docs are returned by index, there names need to be looked up.
+ */
+ virtual std::string lookup_docname( unsigned ) const = 0;
+ private:
+};
+}
+
+#endif /* LPC_INDEX_H1105549284_INCLUDE_GUARD_ */
+
diff --git a/indexlib/index_slow.h b/indexlib/index_slow.h
new file mode 100644
index 000000000..ebe5e770d
--- /dev/null
+++ b/indexlib/index_slow.h
@@ -0,0 +1,71 @@
+#ifndef LPC_INDEX_SLOW_H1106061353_INCLUDE_GUARD_
+#define LPC_INDEX_SLOW_H1106061353_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "index.h"
+#include "slow.h"
+#include "boost-compat/noncopyable.hpp"
+
+/**
+ * \see class slow which should be merged into this one.
+ */
+class index_slow : public indexlib::index , public boost::noncopyable {
+ public:
+ index_slow( std::string name ):
+ impl_( name )
+ {
+ }
+
+
+ virtual void add( const char* str, const char* doc ) {
+ impl_.add( str, doc );
+ }
+
+ virtual std::vector<unsigned> search( const char* str ) {
+ return impl_.search( str );
+ }
+
+ virtual unsigned ndocs() const {
+ return impl_.ndocs();
+ }
+ virtual std::string lookup_docname( unsigned idx ) const {
+ return impl_.lookup_docname( idx );
+ }
+ private:
+ slow impl_;
+};
+
+
+
+
+#endif /* LPC_INDEX_SLOW_H1106061353_INCLUDE_GUARD_ */
diff --git a/indexlib/indexlib-config.in b/indexlib/indexlib-config.in
new file mode 100644
index 000000000..f478bad4c
--- /dev/null
+++ b/indexlib/indexlib-config.in
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+usage()
+{
+ echo "usage: $0 [OPTIONS]"
+cat << EOH
+
+options:
+ [--libs]
+ [--cflags]
+ [--version]
+ [--prefix]
+EOH
+}
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+flags=""
+
+if test $# -eq 0 ; then
+ usage
+ exit 0
+fi
+
+while test $# -gt 0
+do
+ case $1 in
+ --libs)
+ flags="$flags -L$libdir -lindex"
+ ;;
+ --cflags)
+ flags="$flags -I$includedir/index"
+ ;;
+ --version)
+ echo 0.94
+ ;;
+ --prefix)
+ echo $prefix
+ ;;
+ --help)
+ usage
+ exit 0
+ ;;
+ *)
+ echo "$0: unknown option $1"
+ echo
+ usage
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+if test -n "$flags"
+then
+ echo $flags
+fi
diff --git a/indexlib/leafdata.cpp b/indexlib/leafdata.cpp
new file mode 100644
index 000000000..95af29e06
--- /dev/null
+++ b/indexlib/leafdata.cpp
@@ -0,0 +1,166 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "leafdata.h"
+#include "logfile.h"
+#include <iostream>
+#include <algorithm>
+#include "boost-compat/next_prior.hpp"
+#include "format.h"
+
+namespace {
+ memory_manager* manager = 0;
+}
+
+memory_manager* get_leafdata_manager() { return manager ; }
+void set_leafdata_manager( memory_manager* m ) { manager = m; }
+
+uint32_t leaf_data::get_reference( unsigned idx ) const {
+ leafdata_iterator iter = begin();
+ while ( idx-- ) { *iter; ++iter; }
+ return *iter;
+}
+
+bool leaf_data::can_add( uint32_t ref ) const {
+ if ( ( capacity() - usedbytes() ) > ( 1 + byte_io::byte_lenght<uint32_t>() ) ) return true;
+ if ( capacity() == usedbytes() ) return false;
+ uint32_t last = 0;
+ for ( iterator first = begin(), past = end(); first != past; ++first ) {
+ assert( first < past );
+ last = *first;
+ if ( last == ref ) return true;
+ }
+ return ( ref > last && ( ref - last ) < 256 );
+}
+
+bool leaf_data::has_reference( uint32_t ref ) const {
+ for ( iterator first = begin(), past = end(); first != past; ++first ) {
+ uint32_t here = *first;
+ //logfile() << format( "leaf_data[%s]::has_reference( %s ): looking at %s\n" ) % idx_ % ref % here;
+
+ if ( here == ref ) {
+ //logfile() << format( "leaf_data[%s]::has_reference( %s ): true\n" ) % idx_ % ref;
+ return true;
+ }
+ }
+ //logfile() << format( "leaf_data[%s]::has_reference( %s ): false\n" ) % idx_ % ref;
+ return false;
+}
+
+void leaf_data::add_reference( uint32_t ref ) {
+ //logfile() << format( "leaf_data[%s]::add_reference( %s )\n" ) % idx_ % ref;
+ assert( can_add( ref ) );
+ if ( has_reference( ref ) ) return;
+ iterator first = begin();
+ const iterator past = end();
+ unsigned value = 0;
+ while ( first != past ) {
+ value = *first;
+ ++first;
+ }
+ ++ref;
+ if ( usedbytes() ) ++value;
+ unsigned char* target = const_cast<unsigned char*>( first.raw() );
+ assert( target == my_base() + usedbytes() );
+ if ( ref > value && ( ref - value ) < 256 ) {
+ assert( ref != value );
+ *target = ref - value;
+ set_usedbytes( usedbytes() + 1 );
+ } else {
+ *target++ = 0;
+ byte_io::write<uint32_t>( target, ref );
+ set_usedbytes( usedbytes() + 1 + byte_io::byte_lenght<uint32_t>() );
+ }
+ assert( usedbytes() <= capacity() );
+}
+
+void leaf_data::remove_reference( uint32_t ref ) {
+ //logfile() << format( "leaf_data[%s]::remove_reference( %s )\n" ) % idx_ % ref;
+ unsigned idx = 0;
+ iterator first = begin();
+ const iterator past = end();
+ for ( ; first != past; ++first ) {
+ if ( *first == ref ) break;
+ ++idx;
+ }
+ if ( first != past ) {
+ //assert( get_reference( idx ) == ref );
+ iterator next = boost::next( first );
+ unsigned nbytes = end().raw() - first.raw();
+ std::memmove( const_cast<unsigned char*>( first.raw() ), next.raw(), nbytes );
+ set_usedbytes( usedbytes() - nbytes );
+ unsigned char* iter = const_cast<unsigned char*>( first.raw() );
+ for ( ; iter < end().raw(); ++iter) {
+ if (*iter) --*iter;
+ else {
+ ++iter;
+ byte_io::write<uint32_t>(iter,byte_io::read<uint32_t>(iter)-1);
+ iter += byte_io::byte_lenght<uint32_t>();
+ }
+ }
+ }
+}
+
+unsigned leaf_data::nelems() const {
+ unsigned res = 0;
+ for ( iterator first = begin(), past =end(); first != past; ++first ) {
+ ++res;
+ *first;
+ }
+ return res;
+}
+
+
+unsigned leaf_data::next_byte_size() const {
+ return 2 * ( capacity() + data_offset );
+}
+
+void leaf_data::grow() {
+ set_capacity( ( next_byte_size() - data_offset ) );
+ memset( my_base() + usedbytes(), 0, capacity() - usedbytes() );
+}
+
+void leaf_data::construct( void* m ) {
+ unsigned s = leaf_data::start_bytes();
+ memset( m, 0, s );
+ byte_io::write<uint16_t>( reinterpret_cast<unsigned char*>( m ), ( s - data_offset ) );
+}
+
+void leaf_data::print( std::ostream& out ) const {
+ //out << format( "\tsize: %8s\n" ) % used();
+ out << format( "\tcapacity: %8s\n" ) % capacity();
+ //out << format( "\tnext: %8s\n" ) % next();
+ int i = 0;
+ for ( iterator first = begin(), past = end(); first != past; ++first ) {
+ out << format( "\tref[ %1% ] = %2%\n" ) % i++ % *first;
+ }
+}
+
diff --git a/indexlib/leafdata.h b/indexlib/leafdata.h
new file mode 100644
index 000000000..e499a2e6f
--- /dev/null
+++ b/indexlib/leafdata.h
@@ -0,0 +1,147 @@
+#ifndef LPC_LEAFDATA_H1102530057_INCLUDE_GUARD_
+#define LPC_LEAFDATA_H1102530057_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <iostream>
+#include <iterator>
+#include <inttypes.h>
+#include "pointer.h"
+#include "memvector.h"
+#include "thing.h"
+
+void set_leafdata_manager( memory_manager* );
+memory_manager* get_leafdata_manager();
+
+struct leafdata_iterator : public std::iterator<STD_NAMESPACE_PREFIX input_iterator_tag,uint32_t> {
+ public:
+ explicit leafdata_iterator( const unsigned char* d ):
+ data_( d ),
+ value_( 0 ),
+ valid_( true )
+ {
+ }
+
+ value_type operator *() {
+ assert( valid_ );
+ valid_ = false;
+
+ uint32_t delta = *data_++;
+ if ( delta ) {
+ value_ += delta;
+ } else {
+ value_ = byte_io::read<uint32_t>( data_ );
+ data_ += byte_io::byte_lenght<uint32_t>();
+ }
+ return value_ - 1;
+ }
+
+ const unsigned char* raw() const { return data_; }
+
+ leafdata_iterator& operator ++ () {
+ valid_ = true;
+ return *this;
+ }
+ bool operator == ( const leafdata_iterator& other ) const {
+ return data_ == other.data_;
+ }
+ bool operator < ( const leafdata_iterator& other ) const {
+ return data_ < other.data_;
+ }
+ private:
+ const unsigned char* data_;
+ uint32_t value_;
+ bool valid_;
+};
+
+inline
+bool operator != ( const leafdata_iterator& one, const leafdata_iterator& other ) {
+ return !( one == other );
+}
+
+START_THING( leaf_data, thing< thing_manager<&get_leafdata_manager> > )
+ private:
+ MEMBER( uint16_t, capacity, 0 )
+ MEMBER( uint16_t, usedbytes, 2 )
+ friend class leaf_data_pool_traits;
+ static const unsigned data_offset = 4;
+ public:
+ MY_BASE( data_offset )
+ public:
+
+ typedef leafdata_iterator iterator;
+ leafdata_iterator begin() const { return iterator( const_cast<unsigned char*>( my_base() ) ); }
+ leafdata_iterator end() const { return iterator( const_cast<unsigned char*>( my_base() ) + usedbytes() ); }
+
+ uint32_t get_reference( unsigned idx ) const;
+ void add_reference( uint32_t );
+ void remove_reference( uint32_t );
+
+ bool has_reference( uint32_t ) const;
+ bool can_add( uint32_t ) const;
+
+ unsigned nelems() const;
+ unsigned next_byte_size() const;
+ void grow();
+
+ void print( std::ostream& out ) const;
+
+ static void construct( void* );
+ static void init( pointer<leaf_data> p ) { construct( p.raw_pointer() ); }
+ static unsigned start_bytes() { return 16; }
+END_THING( leaf_data )
+
+DO_POINTER_SPECS( leaf_data )
+
+
+typedef leaf_data leafdata;
+typedef leaf_dataptr leafdataptr;
+
+struct leaf_data_pool_traits {
+ typedef leaf_data value_type;
+ typedef leaf_dataptr pointer;
+
+ static bool is_free( pointer p ) { return p->capacity() == 0; }
+ static void mark_free( pointer p ) { memset( p.raw_pointer(), 0, p->capacity() ); }
+ static unsigned size_of( pointer p ) { return p->capacity() + leafdata::data_offset; }
+
+ static unsigned type_offset() { return 2; }
+
+ static unsigned min_size() { return leaf_data::start_bytes(); }
+ static unsigned max_size() { return 1024 * 64; }
+
+ static void set_manager( memory_manager* m ) { return set_leafdata_manager( m ); }
+ static void print( std::ostream& out, pointer p ) { out << '[' << p << "] leafdata:\n"; p->print( out ); }
+};
+
+
+#endif /* LPC_LEAFDATA_H1102530057_INCLUDE_GUARD_ */
diff --git a/indexlib/leafdatavector.cpp b/indexlib/leafdatavector.cpp
new file mode 100644
index 000000000..56a3986c0
--- /dev/null
+++ b/indexlib/leafdatavector.cpp
@@ -0,0 +1,108 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "leafdatavector.h"
+#include "mmap_manager.h"
+#include "compressed.h"
+#include "logfile.h"
+#include "path.h"
+
+#include "format.h"
+#include <unistd.h>
+
+#ifdef USE_ZLIB_COMPRESSION
+typedef compressed_file leafdatavector_manager;
+#else
+typedef mmap_manager leafdatavector_manager;
+#endif
+
+leafdatavector::leafdatavector( std::string name ):
+ leafs_( std::auto_ptr<memory_manager>( new leafdatavector_manager( path_concat( name, "leafs" ) ) ) ),
+ table_( path_concat( name, "table" ) )
+{
+}
+
+void leafdatavector::remove( std::string name ) {
+ leafdatavector_manager::remove( path_concat( name, "leafs" ) );
+ memvector<leaf_dataptr>::remove( path_concat( name, "table" ) );
+}
+
+void leafdatavector::add( unsigned idx, unsigned what ) {
+ //logfile() << format( "leafdatavector::add( %s, %s )\n" ) % idx % what;
+ table_.resize( idx + 1 );
+ int32_t now = table_[ idx ];
+ if ( !now ) {
+ ++what;
+ table_[ idx ] = -int( what );
+ } else if ( now < 0 ) {
+ leafdataptr just = leafs_.allocate( leaf_data_pool_traits::min_size() );
+ leafdata::construct( just.raw_pointer() );
+ table_[ idx ] = just.cast_to_uint32();
+ just->add_reference( -now - 1 );
+ assert( just->can_add( what ) );
+ just->add_reference( what );
+ } else {
+ leafdataptr just = leafdataptr::cast_from_uint32( now );
+ if ( !just->can_add( what ) ) {
+ just = leafs_.reallocate( just, just->next_byte_size() );
+ just->grow();
+ table_[ idx ] = just.cast_to_uint32();
+ }
+ just->add_reference( what );
+ }
+}
+
+void leafdatavector::remove_references_to( unsigned ref ) {
+ //logfile() << format( "%s( %s )\n" ) % __PRETTY_FUNCTION__ % ref;
+ for ( unsigned idx = 0; idx != table_.size(); ++idx ) {
+ int32_t now = table_[ idx ];
+ if ( now == -int( ref ) ) table_[ idx ] = 0;
+ else if ( now > 0 ) leafdataptr::cast_from_uint32( now )->remove_reference( ref );
+ }
+}
+
+std::vector<unsigned> leafdatavector::get( unsigned idx ) const {
+ if ( idx >= table_.size() ) return std::vector<unsigned>();
+ int32_t now = table_[ idx ];
+ if ( now < 0 ) {
+ std::vector<unsigned> res;
+ res.push_back( -now - 1 );
+ return res;
+ } else if ( now > 0 ) {
+ logfile() << format( "%s( %s ) in %s\n" ) % __PRETTY_FUNCTION__ % idx % now;
+ leafdataptr just = leafdataptr::cast_from_uint32( now );
+ return std::vector<unsigned>( just->begin(), just->end() );
+ } else {
+ return std::vector<unsigned>();
+ }
+}
+
+
diff --git a/indexlib/leafdatavector.h b/indexlib/leafdatavector.h
new file mode 100644
index 000000000..60cdf9435
--- /dev/null
+++ b/indexlib/leafdatavector.h
@@ -0,0 +1,57 @@
+#ifndef LPC_LEAFDATAVECTOR_H1106249150_INCLUDE_GUARD_
+#define LPC_LEAFDATAVECTOR_H1106249150_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <vector>
+#include <string>
+#include "leafdata.h"
+#include "mempool.h"
+#include "memvector.h"
+
+struct leafdatavector {
+ public:
+ leafdatavector( std::string );
+ void add( unsigned idx, unsigned what );
+ std::vector<unsigned> get( unsigned idx ) const;
+ void resize( unsigned n_s ) {
+ table_.resize( n_s );
+ }
+ void remove_references_to( unsigned ref );
+ static void remove( std::string );
+ private:
+ mempool<leaf_data_pool_traits> leafs_;
+ memvector<uint32_t> table_;
+};
+
+
+#endif /* LPC_LEAFDATAVECTOR_H1106249150_INCLUDE_GUARD_ */
diff --git a/indexlib/lockfile.cpp b/indexlib/lockfile.cpp
new file mode 100644
index 000000000..d0225cda0
--- /dev/null
+++ b/indexlib/lockfile.cpp
@@ -0,0 +1,77 @@
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "lockfile.h"
+#include "format.h"
+#include "logfile.h"
+#include <iostream>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+
+using indexlib::detail::lockfile;
+
+lockfile::lockfile( std::string filename ):
+ filename_( filename ),
+ locked_( false ) {
+ }
+
+lockfile::~lockfile() {
+ if ( locked() ) unlock();
+}
+
+bool lockfile::locked() const {
+ return locked_;
+}
+
+bool lockfile::trylock() {
+ int fd = ::open( filename_.c_str(), O_RDWR | O_EXCL | O_CREAT, 0600 );
+ locked_ = false;
+ if ( fd >= 0 ) {
+ locked_ = true;
+ close( fd );
+ }
+ logfile() << format( "trylock(%s) returning %s (fd:%s) (error:%s)\n" ) % filename_ % locked_ %fd % strerror( errno );
+ return locked_;
+}
+
+void lockfile::unlock() {
+ if ( locked() ) {
+ unlink( filename_.c_str() );
+ locked_ = false;
+ }
+}
+
+void lockfile::force_unlock() {
+ unlink( filename_.c_str() );
+ locked_ = false;
+}
+
diff --git a/indexlib/lockfile.h b/indexlib/lockfile.h
new file mode 100644
index 000000000..427795195
--- /dev/null
+++ b/indexlib/lockfile.h
@@ -0,0 +1,72 @@
+#ifndef LPC_LOCKFILE_H1118420718_INCLUDE_GUARD_
+#define LPC_LOCKFILE_H1118420718_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <string>
+
+namespace indexlib { namespace detail {
+
+/**
+ * Implements a simple, file based lock.
+ *
+ * Given a filename, it can be used to ensure that only one such file is in use at the time.
+ */
+class lockfile {
+ public:
+ /** Creates the object, doesn't actually do anything */
+ explicit lockfile( std::string name );
+
+ /** if locked, unlocks */
+ ~lockfile();
+
+ /** Returns whether the file is locked */
+ bool locked() const;
+
+ /** Tries to lock the file returning true in case of success */
+ bool trylock();
+
+ /** Unlocks if you own the lock, otherwise it's a nop. */
+ void unlock();
+
+ /** This removes the lock, no matter what. */
+ void force_unlock();
+ private:
+ std::string filename_;
+ bool locked_;
+
+};
+
+}}
+
+
+
+#endif /* LPC_LOCKFILE_H1118420718_INCLUDE_GUARD_ */
diff --git a/indexlib/logfile.cpp b/indexlib/logfile.cpp
new file mode 100644
index 000000000..49797cb5d
--- /dev/null
+++ b/indexlib/logfile.cpp
@@ -0,0 +1,54 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "logfile.h"
+#include <fstream>
+
+static std::ostream* logfile_ = 0;
+
+std::ostream& logfile() {
+ if ( !logfile_ ) {
+ nolog();
+ if ( !logfile_ ) return std::cout;
+ }
+ return *logfile_;
+}
+
+void nolog() {
+ redirectlog( "/dev/null" );
+}
+
+void redirectlog( std::string fname ) {
+ if ( logfile_ ) delete logfile_;
+ logfile_ = new std::ofstream( fname.c_str() );
+}
+
+
diff --git a/indexlib/logfile.h b/indexlib/logfile.h
new file mode 100644
index 000000000..09d5e78d5
--- /dev/null
+++ b/indexlib/logfile.h
@@ -0,0 +1,42 @@
+#ifndef LPC_LOGFILE_H1103643194_INCLUDE_GUARD_
+#define LPC_LOGFILE_H1103643194_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <iostream>
+#include <string>
+
+void nolog();
+void redirectlog( std::string );
+std::ostream& logfile();
+
+#endif /* LPC_LOGFILE_H1103643194_INCLUDE_GUARD_ */
diff --git a/indexlib/main.cpp b/indexlib/main.cpp
new file mode 100644
index 000000000..eff248b47
--- /dev/null
+++ b/indexlib/main.cpp
@@ -0,0 +1,211 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "stringarray.h"
+#include "leafdata.h"
+#include "manager.h"
+#include "mmap_manager.h"
+#include "mempool.h"
+#include "compressed.h"
+#include "create.h"
+#include "tokenizer.h"
+#include <sstream>
+#include <map>
+#include <iostream>
+#include <cstdlib>
+#include <string>
+#include <fstream>
+#include <memory>
+#include <string.h>
+
+typedef std::auto_ptr<indexlib::index> index_smart;
+
+index_smart get_index( std::string name ) {
+ return indexlib::open( name.c_str(), indexlib::open_flags::create_quotes );
+}
+
+std::string read_stream( std::istream& in ) {
+ std::string res;
+ char c;
+ while ( in.get( c ) ) res.push_back( c );
+ return res;
+}
+std::string read_string( std::string file ) {
+ if ( file == "-" ) return read_stream( std::cin );
+ std::ifstream in( file.c_str() );
+ return read_stream( in );
+}
+
+void usage( int argc, char* argv[], const std::map<std::string, int (*)( int, char** )>& commands ) {
+ std::cout
+ << argv[ 0 ]
+ << " cmd [index]\n"
+ << "Possible Commands:\n\n";
+
+ for ( std::map<std::string, int (*)( int, char** )>::const_iterator first = commands.begin(), past = commands.end(); first != past; ++first ) {
+ std::cout << '\t' << first->first << '\n';
+ }
+ std::cout << std::endl;
+}
+
+int debug( int argc, char* argv[] ) {
+ using namespace indexlib;
+ using namespace indexlib::detail;
+ std::string type = argv[ 2 ];
+ std::string argument = argv[ 3 ];
+ if ( type == "print.sa" ) {
+ //nolog();
+ std::cout << "stringarray:\n";
+ stringarray sa( argument );
+ sa.print( std::cout );
+ } else if ( type == "print.compressed" ) {
+ compressed_file file( argument );
+ nolog();
+ std::cout << "compressed_file:\n";
+ file.print( std::cout );
+ } else if ( type == "break_up" ) {
+ std::auto_ptr<tokenizer> tok = get_tokenizer( "latin-1:european" );
+ if ( !tok.get() ) {
+ std::cerr << "Could not get tokenizer\n";
+ return 1;
+ }
+ nolog();
+ std::ostringstream whole_str;
+ whole_str << std::ifstream( argument.c_str() ).rdbuf();
+ std::vector<std::string> words = tok->string_to_words( whole_str.str().c_str() );
+ for ( std::vector<std::string>::const_iterator cur = words.begin(), past = words.end(); cur != past; ++cur ) {
+ std::cout << *cur << '\n';
+ }
+ } else {
+ std::cerr << "Unknown function\n";
+ return 1;
+ }
+ return 0;
+}
+
+int remove_doc( int argc, char* argv[] ) {
+ if ( argc < 4 ) {
+ std::cerr << "Filename argument for remove_doc is required\n";
+ return 1;
+ }
+ index_smart t = get_index( argv[ 2 ] );
+ t->remove_doc( argv[ 3 ] );
+ return 0;
+}
+
+int maintenance( int argc, char* argv[] ) {
+ index_smart t = get_index( argv[ 2 ] );
+ t->maintenance();
+ return 0;
+}
+
+int add( int argc, char* argv[] ) {
+ if ( argc < 4 ) {
+ std::cerr <<
+ "Input file argument is required\n"
+ "Name is optional (defaults to filename)\n";
+ return 1;
+ }
+ index_smart t = get_index( argv[ 2 ] );
+ std::string input;
+ if ( argv[ 4 ] ) input = argv[ 4 ];
+ else input = argv[ 3 ];
+ t->add( read_string( input ), argv[ 3 ] );
+
+ return 0;
+}
+
+int search( int argc, char* argv[] ) {
+ if ( argc < 4 ) {
+ std::cerr << "Search string is required\n";
+ return 1;
+ }
+ index_smart t = get_index( argv[ 2 ] );
+ std::vector<unsigned> files = t->search( argv[ 3 ] )->list();
+ for ( std::vector<unsigned>::const_iterator first = files.begin(), past = files.end();
+ first != past; ++first ) {
+ std::cout << t->lookup_docname( *first ) << std::endl;
+ }
+ return 0;
+}
+
+int list( int argc, char* argv[] ) {
+ index_smart t = get_index( argv[ 2 ] );
+
+ unsigned ndocs = t->ndocs();
+ for ( unsigned i = 0; i != ndocs; ++i ) {
+ std::cout << t->lookup_docname( i ) << std::endl;
+ }
+ return 0;
+}
+
+int remove( int argc, char* argv[] ) {
+ indexlib::remove( argv[ 2 ] );
+}
+
+
+int main( int argc, char* argv[]) try {
+ //nolog();
+
+ std::map<std::string, int (*)( int, char* [] )> handlers;
+ handlers[ "debug" ] = &debug;
+ handlers[ "remove" ] = &remove;
+ handlers[ "remove_doc" ] = &remove_doc;
+ handlers[ "maintenance" ] = &maintenance;
+ handlers[ "add" ] = &add;
+ handlers[ "search" ] = &search;
+ handlers[ "list" ] = &list;
+
+ if ( argc < 3 ) {
+ usage( argc, argv, handlers );
+ return 0;
+ }
+
+
+ int ( *handle )( int, char*[] ) = handlers[ argv[ 1 ] ];
+
+ if ( handle ) return handle( argc, argv );
+ else {
+ std::cerr << "Unkown command: " << argv[ 1 ] << std::endl;
+ return 1;
+
+ }
+} catch ( const char* msg ) {
+ std::cerr << "Error: " << msg << std::endl;
+ return 1;
+} catch ( std::exception& e ) {
+ std::cerr << "Std Error: " << e.what() << std::endl;
+ return 1;
+} catch ( ... ) {
+ std::cerr << "Some Unspecified error\n";
+ return 1;
+}
+
diff --git a/indexlib/manager.h b/indexlib/manager.h
new file mode 100644
index 000000000..c2672e239
--- /dev/null
+++ b/indexlib/manager.h
@@ -0,0 +1,69 @@
+#ifndef LPC_MANAGER_H1103129409_INCLUDE_GUARD_
+#define LPC_MANAGER_H1103129409_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+/**
+ * This gets memory and accesses it.
+ *
+ */
+class memory_manager {
+ public:
+ virtual ~memory_manager() { }
+ /**
+ * Return a reference to the memory at position \idx.
+ * An important interface bug is that it is not specified how far from \idx you
+ * can safely access.
+ */
+ virtual const unsigned char* ronly_base( unsigned idx ) const = 0;
+ /**
+ * Like \ref ronly_base except you can write on the block
+ */
+ virtual unsigned char* rw_base( unsigned ) const = 0;
+ /** Returns the size of the block */
+ virtual unsigned size() const = 0;
+ /** Resizes the block.
+ * This invalidates all previously returned pointers
+ */
+ virtual void resize( unsigned ) = 0;
+};
+
+template <memory_manager* ( *get_parent )()>
+struct thing_manager {
+ public:
+
+ const unsigned char* ronly_base( unsigned idx ) const { return get_parent()->ronly_base( idx ); }
+ unsigned char* rw_base( unsigned idx ) const { return get_parent()->rw_base( idx ); }
+};
+
+
+
+#endif /* LPC_MANAGER_H1103129409_INCLUDE_GUARD_ */
diff --git a/indexlib/match.cpp b/indexlib/match.cpp
new file mode 100644
index 000000000..a73dc5f5a
--- /dev/null
+++ b/indexlib/match.cpp
@@ -0,0 +1,79 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "match.h"
+#include "format.h"
+#include "compat.h"
+#include <iostream>
+#include <assert.h>
+
+namespace {
+ inline
+ void setbit( unsigned& u, unsigned idx ) {
+ u |= ( 1 << idx );
+ }
+ inline
+ bool getbit( unsigned u, unsigned idx ) {
+ return u & ( 1 << idx );
+ }
+}
+
+indexlib::Match::Match( std::string str, unsigned flags ):
+ masks_( 256 ),
+ caseinsensitive_( flags & caseinsensitive ),
+ pattern_rest_( str, kMin( str.size(), sizeof( unsigned ) * 8 - 1 ) )
+{
+ hot_bit_ = kMin( str.size(), sizeof( unsigned ) * 8 - 1 );
+ for ( unsigned i = 0; i != hot_bit_; ++i ) {
+ if ( caseinsensitive_ ) {
+ setbit( masks_[ ( unsigned char )std::toupper( str[ i ] ) ], i );
+ setbit( masks_[ ( unsigned char )std::tolower( str[ i ] ) ], i );
+ } else {
+ setbit( masks_[ ( unsigned char )str[ i ] ], i );
+ }
+ }
+}
+
+indexlib::Match::~Match() {
+}
+
+bool indexlib::Match::process( const char* string ) const {
+ unsigned state = 0;
+ while ( *string ) {
+ state |= 1;
+ state &= masks_[ ( unsigned char )*string ];
+ state <<= 1;
+ ++string;
+ if ( getbit( state, hot_bit_ ) && ( pattern_rest_ == std::string( string, pattern_rest_.size() ) ) ) return true;
+ }
+ return !hot_bit_;
+}
+
diff --git a/indexlib/match.h b/indexlib/match.h
new file mode 100644
index 000000000..568eb3d0d
--- /dev/null
+++ b/indexlib/match.h
@@ -0,0 +1,76 @@
+#ifndef LPC_MATCH_H1105564052_INCLUDE_GUARD_
+#define LPC_MATCH_H1105564052_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <string>
+#include <vector>
+#include <map>
+
+namespace indexlib {
+
+/**
+ * \brief A class for string matching
+ *
+ * This class represents a pattern in a pre-processed form so that searching for it in larger textes is
+ * very fast. This allows for a very fast way to ask "does this pattern appear in this text?"
+ *
+ * The implementation uses the shift-or algorithm which is very fast, but especially designed for
+ * patterns which are shorter than the number of bits in a word ( 32 bits on most architechtures ). For
+ * larger patterns, the first 32 characters are matched using shift-or and the next characters are strcmp()ed.
+ * Even for patterns larger than 32 characters, this should be a fast strategy.
+ */
+class Match {
+ public:
+ enum flags { caseinsensitive = 1 };
+ /** Construct an object to match string \param pattern
+ */
+ Match( std::string pattern, unsigned flags = 0 );
+ ~Match();
+
+ /**
+ * Returns true if the pattern appears in \param string
+ * It has no memory
+ */
+ bool process( const char* string ) const;
+ bool process( std::string str ) const { return process( str.c_str() ); }
+ private:
+ typedef std::vector<unsigned> masks_type;
+ masks_type masks_;
+ unsigned hot_bit_;
+ bool caseinsensitive_;
+ std::string pattern_rest_;
+};
+}
+
+
+#endif /* LPC_MATCH_H1105564052_INCLUDE_GUARD_ */
diff --git a/indexlib/mempool.h b/indexlib/mempool.h
new file mode 100644
index 000000000..e0cfc9a3a
--- /dev/null
+++ b/indexlib/mempool.h
@@ -0,0 +1,160 @@
+#ifndef LPC_MEMPOOL_H1103129409_INCLUDE_GUARD_
+#define LPC_MEMPOOL_H1103129409_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "memreference.h"
+#include "manager.h"
+#include "memreference.h"
+#include "thing.h"
+#include <cassert>
+#include <vector>
+#include <memory>
+#include <algorithm>
+#include <iostream>
+
+/**
+ * @short implement memory management for things
+ *
+ * This class implements memory management for pools of things.
+ * It uses a simple linked list memory management algorithm and
+ * it depends on being supplied the right trait for the type held.
+ */
+template <typename Traits>
+struct mempool /* : boost::noncopyable */ {
+ public:
+ typedef Traits traits_type;
+ typedef typename traits_type::value_type data_type;
+ typedef typename traits_type::pointer data_typeptr;
+ explicit mempool( std::auto_ptr<memory_manager> source );
+
+ /**
+ * Returns a memory block of size \param s.
+ * Analogous to malloc()
+ */
+ data_typeptr allocate( unsigned s );
+ /**
+ * Makes the memory pointed to by \param p of size \s or allocates a new block of size \s
+ * and moves the held data.
+ *
+ * Analogous to realloc()
+ * Unlike realloc(), does not support either reallocate( 0, s ) nor reallocate( p, 0 )
+ */
+ data_typeptr reallocate( data_typeptr, unsigned );
+ /** Releases the memory pointed to by \param p
+ * Analogous to free()
+ */
+ void deallocate( data_typeptr p );
+
+ /** Prints a half-readable description to \param out
+ * Mainly for debugging
+ */
+ void print( std::ostream& out ) const;
+
+ /**
+ * How big (in bytes) is the memory managed?
+ */
+ unsigned size() const { return manager_->size(); }
+ private:
+ /**
+ * Basically int( log_2( min( x + 1, min_size() ) ) )
+ */
+ static unsigned order_of( unsigned x ) {
+ assert( x > 0 );
+ if ( x < traits_type::min_size() ) x = traits_type::min_size();
+ --x;
+ unsigned res = 0;
+ while ( x ) {
+ ++res;
+ x >>= 1;
+ }
+ return res;
+ }
+ /**
+ * @returns order^2
+ */
+ static unsigned order_to_size( unsigned order ) {
+ return 1 << order;
+ }
+ static unsigned size_of( data_typeptr data ) {
+ return traits_type::size_of( data );
+ }
+
+ enum { min_order_for_free_node = 4 };
+
+ friend struct list_node_manager;
+ struct list_node_manager {
+ protected:
+ const mempool* parent_;
+ public:
+ explicit list_node_manager( const mempool* p = 0 ):parent_( p ) {}
+
+ void* rw_base( unsigned idx ) const {
+ return parent_->manager_->rw_base( idx );
+ }
+ const void* ronly_base( unsigned idx ) const {
+ return parent_->manager_->ronly_base( idx );
+ }
+ };
+
+ START_THING( list_node, thing<list_node_manager> )
+ void set_parent( const mempool* p ) { this->parent_ = p; }
+ MEMBER( uint16_t, order, 0 )
+ MEMBER( uint32_t, next, 2 )
+ MEMBER( uint32_t, prev, 6 )
+ END_THING( list_node )
+
+ list_nodeptr get_node( uint32_t p ) const;
+ /**
+ * Get the free list header for a given order
+ */
+ memory_reference<uint32_t> free_list( unsigned order );
+ uint32_t free_list( unsigned order ) const {
+ return const_cast<mempool*>( this )->free_list( order );
+ }
+ void insert_into_list( uint32_t where, unsigned order );
+ void remove_from_list( uint32_t where, unsigned order );
+ void break_up( uint32_t where );
+ void init_memory();
+ void fill_into_list( unsigned old_size );
+ void fill_into_list( unsigned old_size, unsigned order );
+
+ bool join( data_typeptr&, unsigned order );
+ void deallocate( data_typeptr, unsigned order );
+
+ std::auto_ptr<memory_manager> manager_;
+ memory_reference<uint32_t> max_order_;
+};
+
+#include "mempool.tcc"
+
+#endif /* LPC_MEMPOOL_H1103129409_INCLUDE_GUARD_ */
diff --git a/indexlib/mempool.tcc b/indexlib/mempool.tcc
new file mode 100644
index 000000000..19a5e6d44
--- /dev/null
+++ b/indexlib/mempool.tcc
@@ -0,0 +1,241 @@
+#include "format.h"
+#include <iostream>
+#include <cstring>
+#include "logfile.h"
+#include "compat.h"
+
+/* BASIC ALGORITHM AND STRUCTURE
+ *
+ * This is a memory pool manager which works by dividing its memory into
+ * blocks (all blocks have a size which is a power-of-two). Each block is either
+ * in use or in its corresponding free list.
+ *
+ * The free lists are doubly linked and there are head-pointers in
+ * the first page of the pool.
+ *
+ * POOL ORGANIZATION:
+ *
+ * FIRST PAGE
+ * max_order_: 32 bits
+ * [pseudo-order 0]: 32 bits
+ * [pseudo-order 1]: 32 bits
+ * [pseudo-order 2]: 32 bits
+ * [list order 3]: 32 bits
+ * [list order 4]: 32 bits
+ * [list order 5]: 32 bits
+ * [list order 5]: 32 bits
+ * ...
+ * [list order max_order_]: 32 bits
+ *
+ * SUBSEQUENT PAGES:
+ * nodes*
+ *
+ */
+
+template <typename Traits>
+mempool<Traits>::mempool( std::auto_ptr<memory_manager> source ):
+ manager_( source ),
+ max_order_( 0 )
+{
+ if ( !manager_->size() ) init_memory();
+ max_order_.assign( memory_reference<uint32_t>( manager_->rw_base( 0 ) ) );
+ if ( !max_order_ ) {
+ max_order_ = order_of( traits_type::max_size() );
+ }
+ traits_type::set_manager( manager_.get() );
+}
+
+template <typename Traits>
+typename mempool<Traits>::data_typeptr mempool<Traits>::allocate( unsigned size ) {
+ if ( size < traits_type::min_size() ) size = traits_type::min_size();
+ max_order_ = kMax<uint32_t>( order_of( size ), max_order_ );
+ const unsigned order = kMax<unsigned>( order_of( size ), min_order_for_free_node );
+ if ( uint32_t res = free_list( order ) ) {
+ free_list( order ) = get_node( res )->next();
+ if ( free_list( order ) ) get_node( free_list( order ) )->set_prev( 0 );
+ logfile() << format( "%s( %s ): (order %s) Returning %s\n" ) % __PRETTY_FUNCTION__ % size % order % res;
+ return data_typeptr::cast_from_uint32( res );
+ } else {
+ logfile() << format( "For size %s going up to %s\n") % size % max_order_;
+ for ( unsigned bigger = order + 1; bigger <= max_order_; ++bigger ) {
+ if ( uint32_t res = free_list( bigger ) ) {
+ while ( bigger > order ) {
+ break_up( res );
+ --bigger;
+ }
+ logfile() << format( "%s( %s ): recursing\n" ) % __PRETTY_FUNCTION__ % size;
+ return allocate( size );
+ }
+ }
+ const unsigned old_size = manager_->size();
+ manager_->resize( manager_->size() + order_to_size( order ) );
+ max_order_.assign( memory_reference<uint32_t>( manager_->rw_base( 0 ) ) );
+ fill_into_list( old_size, order );
+ return allocate( size );
+ }
+
+}
+
+template <typename Traits>
+void mempool<Traits>::fill_into_list( unsigned next_block, unsigned order ) {
+ logfile() << format( "%s( %s, %s )\n" ) % __PRETTY_FUNCTION__ % next_block % order;
+ const unsigned size = manager_->size();
+ const unsigned min_order =
+ kMax<unsigned>( min_order_for_free_node, order_of( traits_type::min_size() ) );
+ while ( next_block < size && order >= min_order ) {
+ const unsigned block_size = order_to_size( order );
+ while ( ( size - next_block ) >= block_size ) {
+ insert_into_list( next_block, order );
+ next_block += block_size;
+ }
+ --order;
+ }
+}
+
+template <typename Traits>
+void mempool<Traits>::fill_into_list( unsigned next_block ) {
+ fill_into_list( next_block, max_order_ );
+}
+
+template <typename Traits>
+void mempool<Traits>::init_memory() {
+ manager_->resize( 4096 );
+}
+
+template <typename Traits>
+void mempool<Traits>::print( std::ostream& out ) const {
+ uint32_t iterator = 0, end = manager_->size();
+
+ out << "free lists:\n";
+ for ( unsigned i = 0; i != max_order_ + 1; ++i ) {
+ out << "\t" << i << ": " << free_list( i ) << '\n';
+ }
+ out << '\n';
+
+ iterator = order_to_size( max_order_ );
+
+ while ( iterator < end ) {
+ data_typeptr p = data_typeptr::cast_from_uint32( iterator );
+ if ( traits_type::is_free( p ) ) {
+ out << '[' << iterator << "] free_node:\n";
+ list_nodeptr node = get_node( iterator );
+ out << "order:\t" << node->order() << '\n';
+ out << "prev:\t" << node->prev() << '\n';
+ out << "next:\t" << node->next() << '\n';
+ out << '\n';
+ iterator += order_to_size( node->order() );
+ } else {
+ out << format( "size_of(): %s\n" ) % traits_type::size_of( p );
+ traits_type::print( out, p );
+ iterator += traits_type::size_of( p );
+ }
+ }
+}
+
+template <typename Traits>
+memory_reference<uint32_t> mempool<Traits>::free_list( unsigned order ) {
+ assert( order );
+ return memory_reference<uint32_t>( manager_->rw_base( order * byte_io::byte_lenght<uint32_t>() ) );
+}
+
+template <typename Traits>
+typename mempool<Traits>::list_nodeptr mempool<Traits>::get_node( uint32_t p ) const {
+ assert( p );
+ list_nodeptr res = list_nodeptr::cast_from_uint32( p + Traits::type_offset() );
+ res->set_parent( this );
+ return res;
+}
+
+template <typename Traits>
+void mempool<Traits>::remove_from_list( uint32_t where, unsigned order ) {
+ logfile() << format( "%s( %s, %s )\n" ) % __PRETTY_FUNCTION__ % where % order;
+ list_nodeptr node = get_node( where );
+ if ( node->next() ) get_node( node->next() )->set_prev( node->prev() );
+ if ( node->prev() ) get_node( node->prev() )->set_next( node->next() );
+ if ( free_list( order ) == where ) free_list( order ) = node->next();
+}
+
+template <typename Traits>
+void mempool<Traits>::insert_into_list( uint32_t where, unsigned order ) {
+ logfile() << format( "%s( %s, %s )\n" ) % __PRETTY_FUNCTION__ % where % order;
+ traits_type::mark_free( data_typeptr::cast_from_uint32( where ) );
+ list_nodeptr new_node = get_node( where );
+ new_node->set_order( order );
+ new_node->set_next( free_list( order ) );
+ new_node->set_prev( 0 );
+ if ( free_list( order ) ) {
+ get_node( free_list( order ) )->set_prev( where );
+ }
+ free_list( order ) = where;
+}
+
+template <typename Traits>
+void mempool<Traits>::break_up( uint32_t where ) {
+ logfile() << "break_up( " << where << " )\n";
+ assert( traits_type::is_free( data_typeptr::cast_from_uint32( where ) ) );
+ const unsigned old_order = get_node( where )->order();
+ assert( old_order );
+ const unsigned new_order = old_order - 1;
+ remove_from_list( where, old_order );
+ insert_into_list( where + order_to_size( new_order ), new_order );
+ insert_into_list( where, new_order );
+}
+
+template <typename Traits>
+bool mempool<Traits>::join( data_typeptr& node, unsigned order ) {
+ logfile() << format( "%s( %s, %s )\n" ) % __PRETTY_FUNCTION__ % node.cast_to_uint32() % order;
+ const uint32_t byte_idx = node.cast_to_uint32();
+ const unsigned block_size = order_to_size( order );
+ const unsigned block_idx = byte_idx / block_size;
+ uint32_t partner;
+ if ( block_idx % 2 ) {
+ partner = byte_idx - block_idx;
+ } else {
+ partner = byte_idx + block_idx;
+ }
+ if ( partner >= manager_->size() ) return false;
+ bool res = traits_type::is_free( data_typeptr::cast_from_uint32( partner ) )
+ && get_node( partner )->order() == order;
+ if ( res ) {
+ node = ( block_idx % 2 ) ? data_typeptr::cast_from_uint32( partner ) : node;
+ remove_from_list( byte_idx, order );
+ remove_from_list( partner, order );
+ insert_into_list( node.cast_to_uint32(), order + 1 );
+ }
+ return res;
+}
+
+template <typename Traits>
+void mempool<Traits>::deallocate( data_typeptr data ) {
+ logfile() << "deallocate( " << data << " )\n";
+ unsigned order = order_of( size_of( data ) );
+ while ( ( order < max_order_ ) && join( data, order ) ) ++order;
+ deallocate( data, order );
+}
+
+template <typename Traits>
+void mempool<Traits>::deallocate( data_typeptr data, unsigned order ) {
+ logfile() << format( "%s( %s, %s )\n" ) % __PRETTY_FUNCTION__ % data.cast_to_uint32() % order;
+ assert( data );
+ traits_type::mark_free( data );
+ insert_into_list( data.cast_to_uint32(), order );
+}
+
+template <typename Traits>
+typename mempool<Traits>::data_typeptr mempool<Traits>::reallocate( data_typeptr data, unsigned size ) {
+ logfile() << format( "%s( %s, %s)\n" ) % __PRETTY_FUNCTION__ % data % size;
+ max_order_ = kMax<uint32_t>( max_order_, order_of( max_order_ ) );
+ const unsigned original_size = size_of( data );
+ unsigned char* temporary = static_cast<unsigned char*>( operator new( original_size ) );
+ std::memmove( temporary, data.raw_pointer(), original_size );
+
+ unsigned current = order_of( original_size );
+ unsigned desired = order_of( size );
+ while ( desired < current && join( data, current ) ) ++current;
+ if ( desired != current ) deallocate( data, current );
+ data = allocate( size );
+ std::memcpy( data.raw_pointer(), temporary, original_size );
+ operator delete( temporary );
+ return data;
+}
+
diff --git a/indexlib/memreference.h b/indexlib/memreference.h
new file mode 100644
index 000000000..32c55e75c
--- /dev/null
+++ b/indexlib/memreference.h
@@ -0,0 +1,118 @@
+#ifndef LPC_MEMREFERENCE_H1108569807_INCLUDE_GUARD_
+#define LPC_MEMREFERENCE_H1108569807_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "bitio.h"
+
+/**
+ * Works like a reference to a memory location
+ * which is written and read in our byte-format.
+ */
+template <typename T>
+struct memory_reference {
+ public:
+ explicit memory_reference( unsigned char* d ):
+ data_( d )
+ {
+ }
+ ~memory_reference() { }
+ memory_reference& operator = ( const memory_reference& other ) {
+ operator=( T( other ) );
+ return *this;
+ }
+ memory_reference& operator = ( const T& value ) {
+ byte_io::write<T>( data_, value );
+ return *this;
+ }
+ operator T () const {
+ return byte_io::read<T>( data_ );
+ }
+ memory_reference( const memory_reference& other ):
+ data_( other.data_ )
+ {
+ }
+ /**
+ * This is a sneaky method to change pointers
+ */
+ void assign( const memory_reference& other ) {
+ data_ = other.data_;
+ }
+ private:
+ unsigned char* data_;
+};
+
+
+// A mixed and fairly random collection of helper functions
+
+template <typename T, typename U>
+memory_reference<T> operator += ( memory_reference<T> ref, U v ) {
+ return ref = ref + v;
+}
+
+template <typename T>
+memory_reference<T> operator ++( memory_reference<T> ref ) {
+ return ref = ref + 1;
+}
+
+template <typename T>
+memory_reference<T> operator --( memory_reference<T> ref ) {
+ return ref = ref - 1;
+}
+
+template <typename T>
+T operator ++( memory_reference<T> ref, int ) {
+ T v = ref;
+ ref = ref + 1;
+ return v;
+}
+
+template <typename T>
+T operator --( memory_reference<T> ref, int ) {
+ T v = ref;
+ ref = ref - 1;
+ return v;
+}
+
+template <typename T, typename U>
+memory_reference<T> operator -= ( memory_reference<T> ref, U v ) {
+ return ref = ref - v;
+}
+
+template <typename T, typename U>
+memory_reference<T> operator *= ( memory_reference<T> ref, U v ) {
+ return ref = ref * v;
+}
+
+
+
+#endif /* LPC_MEMREFERENCE_H1108569807_INCLUDE_GUARD_ */
diff --git a/indexlib/memvector.h b/indexlib/memvector.h
new file mode 100644
index 000000000..e251c4470
--- /dev/null
+++ b/indexlib/memvector.h
@@ -0,0 +1,224 @@
+#ifndef LPC_MEMVECTOR_H1105049836_INCLUDE_GUARD_
+#define LPC_MEMVECTOR_H1105049836_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "memreference.h"
+#include "bitio.h"
+#include "compat.h"
+#include "manager.h"
+#include "boost-compat/static_assert.hpp"
+#include "boost-compat/scoped_ptr.hpp"
+#ifdef HAVE_BOOST
+#include <boost/type_traits/is_convertible.hpp>
+#endif
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <cstring>
+#include <assert.h>
+
+template <typename> class memory_iterator;
+template <typename> class memvector;
+
+template <typename T>
+struct memory_iterator : public std::iterator<STD_NAMESPACE_PREFIX random_access_iterator_tag,T> {
+ private:
+ public:
+ template <typename U>
+ memory_iterator( const memory_iterator<U>& other ):
+ data_( const_cast<unsigned char*>( other.raw() ) )
+ {
+ BOOST_STATIC_ASSERT( (boost::is_convertible<U, T>::value ) );
+ }
+ explicit memory_iterator( unsigned char* d ):
+ data_( d )
+ {
+ }
+ T operator* () const {
+ return byte_io::read<T>( data_ );
+ }
+ memory_reference<T> operator* () {
+ return memory_reference<T>( data_ );
+ }
+
+ memory_iterator& operator ++() {
+ data_ += byte_io::byte_lenght<T>();
+ return *this;
+ }
+
+ memory_iterator& operator --() {
+ data_ -= byte_io::byte_lenght<T>();
+ return *this;
+ }
+
+ memory_iterator& operator += ( ptrdiff_t dif ) {
+ data_ += dif * byte_io::byte_lenght<T>();
+ return *this;
+ }
+
+ ptrdiff_t operator - ( const memory_iterator<T>& other ) const {
+ assert( !( ( raw() - other.raw() )%byte_io::byte_lenght<T>() ) );
+ return ( raw() - other.raw() )/byte_io::byte_lenght<T>();
+ }
+
+ bool operator < ( const memory_iterator<T>& other ) const {
+ return ( this->raw() - other.raw() ) < 0;
+ }
+ const unsigned char* raw() const { return data_; }
+ private:
+ unsigned char* data_;
+};
+
+template <typename T, typename U>
+inline
+bool operator == ( const memory_iterator<T>& a, const memory_iterator<U>& b ) {
+ return a.raw() == b.raw();
+}
+
+template <typename T, typename U>
+inline
+bool operator != ( const memory_iterator<T>& a, const memory_iterator<U>& b ) {
+ return !( a == b );
+}
+
+template <typename T, typename U>
+inline
+bool operator <= ( const memory_iterator<T>& a, const memory_iterator<U>& b ) {
+ return !( b < a );
+}
+
+
+
+template <typename T>
+inline
+memory_iterator<T> operator + ( memory_iterator<T> iter, typename memory_iterator<T>::difference_type dif ) {
+ iter += dif;
+ return iter;
+}
+
+template <typename T>
+inline
+memory_iterator<T>& operator -= ( memory_iterator<T>& iter, typename memory_iterator<T>::difference_type dif ) {
+ iter += -dif;
+ return iter;
+}
+
+template <typename T>
+inline
+memory_iterator<T> operator - ( memory_iterator<T> iter, typename memory_iterator<T>::difference_type dif ) {
+ iter -= dif;
+ return iter;
+}
+
+
+template <typename T>
+inline
+memory_iterator<T> operator -- ( memory_iterator<T>& ref, int ) {
+ memory_iterator<T> copy = ref;
+ --ref;
+ return copy;
+}
+
+template<typename T>
+inline
+memory_iterator<T> operator ++ ( memory_iterator<T>& ref, int ) {
+ memory_iterator<T> copy = ref;
+ ++ref;
+ return copy;
+}
+
+/**
+ * A vector of T kept on disk.
+ *
+ * The interface is a subset of std::vector<T>'s interface.
+ */
+template <typename T>
+struct memvector {
+ public:
+ memvector( std::string );
+ ~memvector();
+
+ typedef T value_type;
+ typedef unsigned size_type;
+ typedef memory_iterator<T> iterator;
+ typedef memory_iterator<const T> const_iterator;
+
+ iterator begin() { return iterator( address_of( 0 ) ); }
+ iterator end() { return iterator( address_of( size() ) ); }
+
+ const_iterator begin() const { return const_iterator( address_of( 0 ) ); }
+ const_iterator end() const { return const_iterator( address_of( size() ) ); }
+
+ value_type operator[] ( unsigned idx ) const {
+ assert( idx < size() );
+ return byte_io::read<T>( address_of( idx ) );
+ }
+
+ memory_reference<T> operator[] ( unsigned idx ) {
+ assert( idx < size() );
+ return memory_reference<T>( address_of( idx ) );
+ }
+
+ /**
+ * For debugging, nothing else
+ */
+ void print( std::ostream& ) const;
+ size_type size() const { return byte_io::read<uint32_t>( data_->ronly_base( 0 ) ); }
+ bool empty() const { return !size(); }
+ void resize( size_type );
+
+ void insert( const_iterator, const value_type );
+ void erase( iterator );
+ void clear();
+ void push_back( value_type v ) { insert( end(), v ); }
+
+ /**
+ * Removes from disk
+ */
+ static void remove( std::string );
+
+ private:
+ boost::scoped_ptr<memory_manager> data_;
+ unsigned char* address_of( unsigned i ) {
+ return data_->rw_base(
+ byte_io::byte_lenght<unsigned>() +
+ i * byte_io::byte_lenght<T>() );
+ }
+ const unsigned char* address_of( unsigned i ) const {
+ return const_cast<memvector*>( this )->address_of( i );
+ }
+};
+
+#include "memvector.tcc"
+
+
+#endif /* LPC_MEMVECTOR_H1105049836_INCLUDE_GUARD_ */
diff --git a/indexlib/memvector.tcc b/indexlib/memvector.tcc
new file mode 100644
index 000000000..ef45c2b5f
--- /dev/null
+++ b/indexlib/memvector.tcc
@@ -0,0 +1,80 @@
+#include "format.h"
+#include "mmap_manager.h"
+#include <cstring>
+
+#include <cstring>
+
+template <typename T>
+memvector<T>::memvector( std::string fname ):
+ data_( new mmap_manager( fname ) )
+{
+ if ( !data_->size() ) {
+ data_->resize( byte_io::byte_lenght<unsigned>() );
+ byte_io::write<unsigned>( data_->rw_base( 0 ), 0 );
+ }
+}
+
+template <typename T>
+memvector<T>::~memvector() {
+}
+
+template <typename T>
+void memvector<T>::print( std::ostream& out ) const {
+ out << format( "size(): %s\n" ) % size();
+ for ( const_iterator first = begin(), past = end(); first != past; ++first ) {
+ out << *first << std::endl;
+ }
+}
+
+template <typename T>
+void memvector<T>::resize( size_type n_s ) {
+ if ( size() >= n_s ) return;
+
+ data_->resize( n_s * byte_io::byte_lenght<value_type>() + byte_io::byte_lenght<unsigned>() );
+ iterator p_end = end();
+ byte_io::write<unsigned>( data_->rw_base( 0 ), n_s );
+ while ( operator !=<unsigned, unsigned>(p_end, end()) ) {
+ *p_end = value_type();
+ ++p_end;
+
+ }
+}
+
+template<typename T>
+void memvector<T>::insert( const_iterator where, const value_type v ) {
+ assert( !( where < begin() ) );
+ assert( where <= end() );
+ const unsigned to_idx = where.raw() - data_->ronly_base( 0 );
+ data_->resize( ( size() + 1 ) * byte_io::byte_lenght<value_type>() + byte_io::byte_lenght<unsigned>() );
+ unsigned char* to = data_->rw_base( to_idx );
+ // make space:
+ std::memmove( to + byte_io::byte_lenght<value_type>(), to, end().raw() - to );
+ byte_io::write<value_type>( to, v );
+ byte_io::write<unsigned>( data_->rw_base( 0 ), size() + 1 );
+}
+
+template <typename T>
+void memvector<T>::erase( iterator where ) {
+
+ assert( size() );
+ assert( !( where < begin() ) );
+ assert( where < end() );
+
+ iterator next = where;
+ ++next;
+ std::memmove( const_cast<unsigned char*>( where.raw() ), next.raw(), end().raw() - next.raw() );
+ byte_io::write<uint32_t>( data_->rw_base( 0 ), size() - 1 );
+}
+
+template <typename T>
+void memvector<T>::clear() {
+ data_->resize( byte_io::byte_lenght<uint32_t>() );
+ byte_io::write<uint32_t>( data_->rw_base( 0 ), 0 );
+}
+
+template<typename T>
+void memvector<T>::remove( std::string fname ) {
+ ::unlink( fname.c_str() );
+}
+
+
diff --git a/indexlib/mmap_manager.cpp b/indexlib/mmap_manager.cpp
new file mode 100644
index 000000000..58ebd47e7
--- /dev/null
+++ b/indexlib/mmap_manager.cpp
@@ -0,0 +1,100 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "mmap_manager.h"
+#include "logfile.h"
+#include "exception.h"
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <cstring>
+#include "format.h"
+
+using indexlib::detail::errno_error;
+
+mmap_manager::mmap_manager( std::string filename )
+ :filename_( filename ),
+ pagesize_( ( size_t )sysconf( _SC_PAGESIZE ) ),
+ base_( 0 ),
+ size_( 0 )
+{
+ fd_ = open( filename.c_str(), O_RDWR | O_CREAT, 0644 );
+ logfile() << format( "open( %s, O_RDWR) returned %s\n" ) % filename % fd_;
+ if ( fd_ > 0 ) {
+ struct stat st;
+ if ( fstat( fd_, &st ) == -1 ) {
+ throw errno_error( "stat()" );
+ }
+ if ( st.st_size ) map( st.st_size );
+ } else {
+ fd_ = open( filename.c_str(), O_RDWR );
+ if ( !fd_ ) throw errno_error( "open()" );
+ }
+
+}
+
+mmap_manager::~mmap_manager()
+{
+ unmap();
+ close( fd_ );
+}
+
+void mmap_manager::resize( unsigned ns ) {
+ if ( size() >= ns ) return;
+ unsigned old_size = size();
+ unmap();
+ ns = ( ns / pagesize_ + bool( ns % pagesize_ ) ) * pagesize_;
+ ftruncate( fd_, ns );
+ map( ns );
+ logfile() << format( "Going to bzero from %s to %s)\n" ) % old_size % size();
+ memset( rw_base( old_size ), 0, size() - old_size );
+}
+
+void mmap_manager::unmap() {
+ if ( !base_ ) return;
+ if ( munmap( base_, size_ ) == -1 ) {
+ throw errno_error( "munmap()" ); // This should be BUG
+ }
+ base_ = 0;
+ size_ = 0;
+}
+
+void mmap_manager::map( unsigned size ) {
+ base_ = mmap( 0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd_, 0 );
+ if ( base_ == reinterpret_cast<void*>( -1 ) ) {
+ throw errno_error( "mmap()" );
+ }
+ size_ = size;
+}
+
diff --git a/indexlib/mmap_manager.h b/indexlib/mmap_manager.h
new file mode 100644
index 000000000..eaed5d8de
--- /dev/null
+++ b/indexlib/mmap_manager.h
@@ -0,0 +1,72 @@
+#ifndef LPC_MMAP_MANAGER_H1103129409_INCLUDE_GUARD_
+#define LPC_MMAP_MANAGER_H1103129409_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "manager.h"
+#include <string>
+#include <cstring>
+#include <unistd.h>
+#include <assert.h>
+
+class mmap_manager : public memory_manager {
+ public:
+ explicit mmap_manager( std::string );
+ ~mmap_manager();
+ const unsigned char* ronly_base( unsigned offset ) const {
+ assert( offset <= size() ); // allow 1-past-the-end but not more
+ return reinterpret_cast<unsigned char*>( base_ ) + offset;
+ }
+ unsigned char* rw_base( unsigned offset ) const {
+ assert( offset <= size() ); // as above
+ return reinterpret_cast<unsigned char*>( base_ ) + offset;
+ }
+ unsigned size() const {
+ return size_;
+ }
+ void resize( unsigned );
+
+ static void remove( std::string fname ) {
+ ::unlink( fname.c_str() );
+ }
+ private:
+ void unmap();
+ void map( unsigned );
+ std::string filename_;
+ const unsigned pagesize_;
+ int fd_;
+ void* base_;
+ unsigned size_;
+};
+
+
+#endif /* LPC_MMAP_MANAGER_H1103129409_INCLUDE_GUARD_ */
diff --git a/indexlib/path.h b/indexlib/path.h
new file mode 100644
index 000000000..36e151792
--- /dev/null
+++ b/indexlib/path.h
@@ -0,0 +1,79 @@
+#ifndef LPC_PATH_H1118420718_INCLUDE_GUARD_
+#define LPC_PATH_H1118420718_INCLUDE_GUARD_
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include <string>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+/**
+ * Returns whether \param p is the name of a directory
+ */
+inline
+bool isdir( const char* p ) {
+ struct stat st;
+ if ( stat( p, &st ) != 0 ) return false;
+ return st.st_mode & S_IFDIR;
+}
+
+inline
+bool isdir( std::string p ) { return isdir( p.c_str() ); }
+
+namespace indexlib { namespace detail {
+/**
+ * Wrapper around mkdir which handles trailing slashes.
+ */
+inline
+bool mkdir_trailing( std::string p ) {
+ while ( p.size() > 1 && p[ p.size() - 1 ] == '/' ) p.resize( p.size() - 1 );
+ if ( p.empty() ) return false;
+ return ::mkdir( p.c_str(), 0755 ) == 0;
+
+}
+
+}}
+/**
+ * If \param base is the basename of an index and \param ext the filename,
+ * then this returns strcat( basename, ext ) but it's smart enough to
+ * handle the case where basename is a directory differently.
+ */
+inline
+std::string path_concat( std::string base, std::string ext ) {
+ if ( isdir( base ) ) {
+ base += "/index";
+ }
+ return base + '.' + ext;
+}
+
+
+
+#endif /* LPC_PATH_H1118420718_INCLUDE_GUARD_ */
diff --git a/indexlib/pointer.h b/indexlib/pointer.h
new file mode 100644
index 000000000..0699be192
--- /dev/null
+++ b/indexlib/pointer.h
@@ -0,0 +1,107 @@
+#ifndef LPC_POINTER_H1103643194_INCLUDE_GUARD_
+#define LPC_POINTER_H1103643194_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <inttypes.h>
+#include <iostream>
+#include "boost-compat/static_assert.hpp"
+#ifdef HAVE_BOOST
+#include <boost/type_traits.hpp>
+#endif
+
+/**
+ * \class pointer
+ *
+ * Works with \ref thing in providing disk translucency (not transparency, but half-way there).
+ *
+ * This is a pointer like object which is really an index into a memory block which must be deferenced
+ * every time it is accessed. The main advantage of this is that it allows the pointer to remain valid even
+ * if the base memory block changes place.
+ *
+ * Actually, implementation-wise, thing does all this already and this is mainly an adapter.
+ */
+template <typename Thing >
+struct pointer: private Thing {
+ private:
+ typedef Thing base_type;
+
+ pointer( uint32_t i )
+ :base_type( i )
+ {
+ }
+ public:
+ pointer()
+ :base_type( 0 )
+ {
+ }
+ typedef Thing value_type;
+ template <typename D>
+ pointer( const pointer<D>& other )
+ :base_type( other.cast_to_uint32() )
+ {
+ typedef D derived_type;
+ BOOST_STATIC_ASSERT( (boost::is_convertible<derived_type, value_type>::value ) );
+ }
+ uint32_t cast_to_uint32() const {
+ return this->idx_;
+ }
+ static pointer cast_from_uint32( uint32_t d ) {
+ return d;
+ }
+ value_type& operator* () const {
+ return const_cast<value_type&>( static_cast<const value_type&>( *this ) );
+ }
+ value_type* operator -> () const {
+ return const_cast<value_type*>( static_cast<const value_type*>( this ) );
+ }
+ bool operator!() const {
+ return !this->idx_;
+ }
+
+ operator const volatile void*() const {
+ return this->idx_ ? this : 0;
+ }
+ void* raw_pointer() { return base_type::base(); }
+ private:
+};
+
+
+
+template <typename T>
+std::ostream& operator << ( std::ostream& out, const pointer<T>& p ) {
+ return out << p.cast_to_uint32();
+}
+
+
+
+#endif /* LPC_POINTER_H1103643194_INCLUDE_GUARD_ */
diff --git a/indexlib/quotes.cpp b/indexlib/quotes.cpp
new file mode 100644
index 000000000..b84ac86c4
--- /dev/null
+++ b/indexlib/quotes.cpp
@@ -0,0 +1,92 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "quotes.h"
+#include "match.h"
+#include "path.h"
+#include "result.h"
+#include "format.h"
+
+quotes::quotes( std::string name ):
+ impl_( path_concat( name, "base" ) ),
+ docs_( path_concat( name, "docs" ) )
+{
+}
+
+void quotes::remove( std::string name ) {
+ ifile::remove( path_concat( name, "base" ) );
+ stringarray::remove( path_concat( name, "docs" ) );
+}
+
+void quotes::add( const char * str, const char* doc ) {
+ assert( str );
+ assert( doc );
+ try {
+ impl_.add( str, doc );
+ docs_.add( str );
+ } catch ( const std::exception& e ) {
+ std::cerr << "error in quotes::add: " << e.what() << "\n";
+ }
+}
+
+void quotes::remove_doc( const char* doc ) {
+ logfile() << format( "%s( %s )\n" ) % __PRETTY_FUNCTION__ % doc;
+ for ( unsigned idx = 0; idx != docs_.size(); ++idx ) {
+ if ( !strcmp( docs_.get_cstr( idx ), doc ) ) {
+ docs_.erase( idx );
+ break;
+ }
+ }
+ impl_.remove_doc( doc );
+}
+
+std::auto_ptr<indexlib::result> quotes::search( const char* cstr ) const {
+ std::string str = cstr;
+ if ( str[ 0 ] != '\"' ) return impl_.search( cstr );
+ str = cstr + 1; // cut "
+ if ( str.size() && str[ str.size() - 1 ] == '\"' ) str.erase( str.size() - 1 );
+ std::auto_ptr<indexlib::result> prev = impl_.search( str.c_str() );
+ if ( str.find( ' ' ) != std::string::npos ) {
+ indexlib::Match m( str );
+ std::vector<unsigned> candidates = prev->list();
+ std::vector<unsigned> res;
+ res.reserve( candidates.size() );
+ for ( std::vector<unsigned>::const_iterator first = candidates.begin(), past = candidates.end();
+ first != past;
+ ++first ) {
+ if ( m.process( docs_.get_cstr( *first ) ) ) {
+ res.push_back( *first );
+ }
+ }
+ return std::auto_ptr<indexlib::result>( new indexlib::detail::simple_result( res ) );
+ } else { return prev; }
+}
+
diff --git a/indexlib/quotes.h b/indexlib/quotes.h
new file mode 100644
index 000000000..6af33e5d4
--- /dev/null
+++ b/indexlib/quotes.h
@@ -0,0 +1,61 @@
+#ifndef LPC_QUOTES_H1108078052_INCLUDE_GUARD_
+#define LPC_QUOTES_H1108078052_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "index.h"
+#include "ifile.h"
+#include "stringarray.h"
+#include <string>
+
+struct quotes : public indexlib::index {
+ public:
+ quotes( std::string );
+ virtual void add( const char* str, const char* doc );
+ virtual void remove_doc( const char* doc );
+ virtual std::auto_ptr<indexlib::result> search( const char* ) const;
+
+ virtual unsigned ndocs() const { return impl_.ndocs(); }
+ virtual std::string lookup_docname( unsigned d ) const { return impl_.lookup_docname( d ); }
+
+ virtual void maintenance() { impl_.maintenance(); }
+
+ static void remove( std::string base );
+ private:
+ ifile impl_;
+ stringarray docs_;
+};
+
+
+
+
+#endif /* LPC_QUOTES_H1108078052_INCLUDE_GUARD_ */
diff --git a/indexlib/result.h b/indexlib/result.h
new file mode 100644
index 000000000..67ed1445f
--- /dev/null
+++ b/indexlib/result.h
@@ -0,0 +1,59 @@
+#ifndef LPC_RESULT_H1118420718_INCLUDE_GUARD_
+#define LPC_RESULT_H1118420718_INCLUDE_GUARD_
+
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "index.h"
+
+namespace indexlib {
+namespace detail {
+
+struct simple_result : indexlib::result {
+ public:
+ simple_result( std::vector<unsigned> r ):res( r ) { }
+
+ std::vector<unsigned> list() const { return res; }
+ std::auto_ptr<result> search( const char* ) { return std::auto_ptr<result>( 0 ); }
+ private:
+ std::vector<unsigned> res;
+};
+
+struct empty_result : indexlib::result {
+ public:
+ std::vector<unsigned> list() const { return std::vector<unsigned>(); }
+ std::auto_ptr<result> search( const char* ) { return std::auto_ptr<result>(); }
+};
+} //namespace detail
+} //namespace indexlib
+
+
+#endif /* LPC_RESULT_H1118420718_INCLUDE_GUARD_ */
diff --git a/indexlib/slow.cpp b/indexlib/slow.cpp
new file mode 100644
index 000000000..fd0e362e6
--- /dev/null
+++ b/indexlib/slow.cpp
@@ -0,0 +1,64 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "slow.h"
+#include "match.h"
+#include "path.h"
+
+using std::string;
+
+slow::slow( string str ):
+ strings_( path_concat( str, "strings" ) ),
+ docs_( path_concat( str, "docs" ) )
+{
+}
+
+void slow::remove( string name )
+{
+ stringarray::remove( path_concat( name, "strings" ) );
+ stringarray::remove( path_concat( name, "docs" ) );
+}
+
+void slow::add( string str, string doc ) {
+ docs_.add( doc );
+ strings_.add( str );
+}
+
+std::vector<unsigned> slow::search( string str ) const {
+ std::vector<unsigned> res;
+ indexlib::Match m( str );
+
+ for ( unsigned i = 0; i != strings_.size(); ++i ) {
+ if ( m.process( strings_.get_cstr( i ) ) ) res.push_back( i );
+ }
+ return res;
+}
+
diff --git a/indexlib/slow.h b/indexlib/slow.h
new file mode 100644
index 000000000..afa45064b
--- /dev/null
+++ b/indexlib/slow.h
@@ -0,0 +1,69 @@
+#ifndef LPC_SLOW_H1106061353_INCLUDE_GUARD_
+#define LPC_SLOW_H1106061353_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <vector>
+#include <string>
+#include "stringarray.h"
+
+/**
+ * \brief A test index
+ *
+ * This is an "index" which just saves all the documents on disk and
+ * searches over them.
+ *
+ * This was intended as a benchmark. It should \em not be used in practice.
+ * The interface is similar to \ref class index
+ */
+struct slow {
+ public:
+ slow( std::string );
+
+ void add( std::string, std::string );
+ std::vector<unsigned> search( std::string ) const;
+ unsigned ndocs() const {
+ return docs_.size();
+ }
+ std::string lookup_docname( unsigned i ) const {
+ return docs_.get( i );
+ }
+
+ static void remove( std::string );
+
+ private:
+ stringarray strings_;
+ stringarray docs_;
+};
+
+
+#endif /* LPC_SLOW_H1106061353_INCLUDE_GUARD_ */
diff --git a/indexlib/stringarray.cpp b/indexlib/stringarray.cpp
new file mode 100644
index 000000000..fb3cf7339
--- /dev/null
+++ b/indexlib/stringarray.cpp
@@ -0,0 +1,114 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "stringarray.h"
+#include "mmap_manager.h"
+#include "bitio.h"
+#include "logfile.h"
+#include "path.h"
+#include "format.h"
+
+stringarray::stringarray( std::string filebase ):
+ data_( new mmap_manager( path_concat( filebase, "string-data" ) ) ),
+ indeces_( path_concat( filebase, "string-indeces" ) )
+{
+ if ( !data_->size() ) init_structure();
+}
+
+void stringarray::remove( std::string filebase ) {
+ unlink( path_concat( filebase, "string-data" ).c_str() );
+ unlink( path_concat( filebase, "string-indeces" ).c_str() );
+}
+
+stringarray::~stringarray() {
+}
+
+stringarray::index_type stringarray::add( std::string str ) {
+ const unsigned count = indeces_.size();
+ const index_type res = count;
+ const index_type previous = count ? indeces_[ count - 1 ] : 0;
+ const unsigned starti = count ? ( previous + get( count - 1 ).size() + 1 ) : 0;
+
+ logfile() << format( "add( %s ) at starti = %d, with count = %d\n" ) % str % starti % count;
+
+ if ( ( starti + str.size() + 1 ) > data_->size() ) {
+ data_->resize( data_->size() + str.size() + 1 );
+ }
+
+ strcpy( reinterpret_cast<char*>( data_->rw_base( starti ) ), str.c_str() );
+ indeces_.push_back( starti );
+ return res;
+}
+
+void stringarray::erase( index_type idx ) {
+ assert( idx < size() );
+ char* target = const_cast<char*>( get_cstr( idx ) );
+ if ( idx != size() - 1 ) {
+ const char* next = get_cstr( idx + 1 );
+ unsigned delta = strlen( target ) + 1;
+ std::memmove( target, next, data_->size() - indeces_[ idx + 1 ] );
+ // Hack: Don't compare the iterators directly, it ices gcc-2.95
+ for ( memvector<uint32_t>::iterator first = indeces_.begin() + idx, past = indeces_.end(); first.raw() != past.raw(); ++first ) {
+ *first -= delta;
+ }
+ }
+ indeces_.erase( indeces_.begin() + idx );
+}
+
+void stringarray::clear() {
+ data_->resize( 0 );
+ indeces_.clear();
+
+}
+
+const char* stringarray::get_cstr( index_type idx ) const {
+ uint32_t didx = indeces_[ idx ];
+ //logfile() << format( "stringarray::get( %s ): %s\n" ) % idx
+ // % std::string( reinterpret_cast<char*>( data_->base() ) + didx );
+ return reinterpret_cast<const char*>( data_->ronly_base( didx ) );
+}
+
+
+void stringarray::init_structure() {
+}
+
+void stringarray::print( std::ostream& out ) const {
+ for ( unsigned i = 0; i != indeces_.size(); ++i ) {
+ out << format( "string[ %s ] = %s\n" ) % i % get_cstr( i );
+ }
+}
+
+void stringarray::verify() const {
+ for ( unsigned i = 1; i < indeces_.size(); ++i ) {
+ assert( !*( data_->ronly_base( indeces_[ i ] - 1 ) ) );
+ }
+}
+
diff --git a/indexlib/stringarray.h b/indexlib/stringarray.h
new file mode 100644
index 000000000..0994c764f
--- /dev/null
+++ b/indexlib/stringarray.h
@@ -0,0 +1,67 @@
+#ifndef LPC_STRINGARRAY_H1104169100_INCLUDE_GUARD_
+#define LPC_STRINGARRAY_H1104169100_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <iostream>
+#include <string>
+#include "boost-compat/scoped_ptr.hpp"
+#include "manager.h"
+#include "memvector.h"
+
+struct stringarray {
+ public:
+ stringarray( std::string );
+ ~stringarray();
+ typedef unsigned index_type;
+
+ index_type add( std::string );
+ void erase( index_type );
+ void clear();
+
+ const char* get_cstr( index_type idx ) const;
+ std::string get( index_type idx ) const { return std::string( get_cstr( idx ) ); }
+
+ unsigned size() const { return indeces_.size(); }
+
+ void print( std::ostream& ) const;
+ void verify() const;
+ static void remove( std::string );
+ private:
+ void init_structure();
+
+ boost::scoped_ptr<memory_manager> data_;
+ memvector<uint32_t> indeces_;
+};
+
+
+#endif /* LPC_STRINGARRAY_H1104169100_INCLUDE_GUARD_ */
diff --git a/indexlib/stringset.cpp b/indexlib/stringset.cpp
new file mode 100644
index 000000000..e10885bac
--- /dev/null
+++ b/indexlib/stringset.cpp
@@ -0,0 +1,106 @@
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+#include "stringset.h"
+#include "path.h"
+#include <cstring>
+using std::strcmp;
+
+stringset::stringset( std::string name ):
+ strings_( path_concat( name, "strings-set" ) ),
+ ordered_( path_concat( name, "ordered-set" ) ),
+ trie_( path_concat( name, "trie" ) )
+{
+ if ( trie_.empty() ) {
+ trie_.resize( 256 );
+ if ( !ordered_.empty() ) {
+ unsigned char last = 0;
+ for ( unsigned i = 0; i != ordered_.size(); ++i ) {
+ unsigned char cur = static_cast<unsigned char>( strings_.get_cstr( ordered_[ i ] )[ 0 ] );
+ if ( cur != last ) {
+ trie_[ cur ] = i;
+ last = cur;
+ }
+ }
+ if ( last < 255 ) trie_[ last + 1 ] = ordered_.size();
+ }
+ }
+}
+
+void stringset::remove( std::string name ) {
+ stringarray::remove( path_concat( name, "strings-set" ) );
+ memvector<stringarray::index_type>::remove( path_concat( name, "ordered-set" ) );
+ memvector<stringarray::index_type>::remove( path_concat( name, "trie" ) );
+}
+
+std::pair<stringset::const_iterator, stringset::const_iterator> stringset::upper_lower( const char* str ) const {
+ const_iterator first = lower_bound( str );
+ const_iterator second = lower_bound( ( std::string( str ) + char( 254 ) ).c_str() );
+ return std::make_pair( first, second );
+}
+
+stringset::const_iterator stringset::lower_bound( const char* str ) const {
+ const_iterator top = begin() + trie_[ ( unsigned )str[ 0 ] ];
+ const_iterator bottom = begin() + trie_[ ( unsigned )str[ 0 ] + 1 ];
+ while ( top < bottom ) {
+ const_iterator middle = top + ( bottom - top ) / 2;
+ int c = strcmp( *middle, str );
+ if ( c == 0 ) return middle;
+ if ( c > 0 ) bottom = middle;
+ else top = middle + 1;
+ }
+ return top;
+}
+
+unsigned stringset::order_of( const char* str ) const {
+ const_iterator where = lower_bound( str );
+ return where == end() || strcmp( *where, str ) ? unsigned( -1 ) : where.idx_;
+}
+
+
+stringarray::index_type stringset::add( const char* str ) {
+ const_iterator where = lower_bound( str );
+ if ( where != end() && !strcmp( *where, str ) ) return where.id();
+ stringarray::index_type res = strings_.add( str );
+ ordered_.insert( ordered_.begin() + where.order(), res );
+ assert( ordered_.size() == strings_.size() );
+ for ( unsigned next = ( unsigned )str[ 0 ] + 1; next != 256; ++next ) {
+ ++trie_[ next ];
+ }
+ return res;
+}
+
+void stringset::clear() {
+ strings_.clear();
+ ordered_.clear();
+}
+
+
diff --git a/indexlib/stringset.h b/indexlib/stringset.h
new file mode 100644
index 000000000..0043d7a13
--- /dev/null
+++ b/indexlib/stringset.h
@@ -0,0 +1,161 @@
+#ifndef LPC_STRINGSET_H1106061353_INCLUDE_GUARD_
+#define LPC_STRINGSET_H1106061353_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include "memvector.h"
+#include "stringarray.h"
+#include <iterator>
+#include <iostream>
+#include <utility>
+
+
+struct stringset {
+ public:
+ struct const_iterator : public std::iterator<STD_NAMESPACE_PREFIX random_access_iterator_tag,const char*> {
+ const char* operator*() const {
+ return mother_->strings_.get_cstr( mother_->ordered_[ idx_ ] );
+ }
+ unsigned id() const {
+ return mother_->ordered_[ idx_ ];
+ }
+ unsigned order() const {
+ return idx_;
+ }
+ const_iterator& operator ++() {
+ ++idx_;
+ return *this;
+ }
+ const_iterator& operator --() {
+ --idx_;
+ return *this;
+ }
+
+ const_iterator& operator +=( ptrdiff_t d ) {
+ idx_ += d;
+ return *this;
+ }
+ const_iterator& operator -=( ptrdiff_t d ) {
+ idx_ -= d;
+ return *this;
+ }
+ const_iterator& operator = ( const const_iterator& other ) {
+ mother_ = other.mother_;
+ idx_ = other.idx_;
+ return *this;
+ }
+ ptrdiff_t operator - ( const const_iterator& other ) const {
+ assert( mother_ == other.mother_ );
+ return idx_ - other.idx_;
+ }
+ bool operator < ( const const_iterator& other ) const {
+ assert( mother_ == other.mother_ );
+ return idx_ < other.idx_;
+ }
+
+ bool operator == ( const const_iterator& other ) const {
+ return mother_ == other.mother_ && idx_ == other.idx_;
+ }
+
+ bool operator != ( const const_iterator& other ) const {
+ return !( *this == other );
+ }
+ const_iterator():
+ mother_( 0 ),
+ idx_( 0 )
+ {
+ }
+ const_iterator( const const_iterator& other ):
+ mother_( other.mother_ ),
+ idx_( other.idx_ )
+ {
+ }
+ private:
+ friend std::ostream& operator << ( std::ostream& out, const const_iterator& other );
+ friend class stringset;
+ const_iterator( const stringset* m, unsigned i ):
+ mother_( m ),
+ idx_( i )
+ {
+ }
+
+ const stringset* mother_;
+ unsigned idx_;
+
+ };
+ friend class const_iterator;
+ public:
+ stringset( std::string );
+ bool count( const char* str ) const { return order_of( str ) != unsigned( -1 ); }
+ unsigned id_of( const char* str ) const {
+ unsigned order = order_of( str );
+ return order == unsigned( -1 ) ?
+ unsigned( -1 ):
+ ordered_[ order ];
+ }
+ unsigned order_of( const char* ) const;
+ unsigned add( std::string str ) { return add( str.c_str() ); }
+ unsigned add( const char* );
+
+ void clear();
+
+ /**
+ * Returns std::make_pair( find( word ), find( word + 'Z' ) ) which makes it easy
+ * to implement word* searches
+ */
+ std::pair<const_iterator, const_iterator> upper_lower( const char* ) const;
+
+ const_iterator begin() const { return const_iterator( this, 0 ); }
+ const_iterator end() const { return const_iterator( this, ordered_.size() ); }
+ bool empty() const { return !ordered_.size(); }
+ unsigned size() const { return ordered_.size(); }
+
+ static void remove( std::string );
+ const_iterator lower_bound( const char* ) const;
+ private:
+ stringarray strings_;
+ memvector<stringarray::index_type> ordered_;
+ memvector<stringarray::index_type> trie_;
+};
+
+inline
+stringset::const_iterator operator + ( stringset::const_iterator a, stringset::const_iterator::difference_type d ) {
+ return a += d;
+}
+
+inline
+std::ostream& operator << ( std::ostream& out, const stringset::const_iterator& other ) {
+ return out << "[ " << other.idx_ << " ]";
+}
+
+
+#endif /* LPC_STRINGSET_H1106061353_INCLUDE_GUARD_ */
diff --git a/indexlib/tests/Makefile.am b/indexlib/tests/Makefile.am
new file mode 100644
index 000000000..468c69c64
--- /dev/null
+++ b/indexlib/tests/Makefile.am
@@ -0,0 +1,9 @@
+if enable_indexlib_unittests
+TESTDRIVER = testdriver
+else
+TESTDRIVER =
+endif
+noinst_PROGRAMS = $(TESTDRIVER)
+testdriver_SOURCES = testdriver.cpp create-test.cpp ifile-test.cpp leafdatavector-test.cpp match-test.cpp mempool-test.cpp memvector-test.cpp slow-test.cpp stringarray-test.cpp stringset-test.cpp tokenizer-test.cpp
+testdriver_CXXFLAGS = -I.. $(all_includes)
+testdriver_LDADD = ../libindex.la -lboost_unit_test_framework -lz
diff --git a/indexlib/tests/configure.in.in b/indexlib/tests/configure.in.in
new file mode 100644
index 000000000..5b1811045
--- /dev/null
+++ b/indexlib/tests/configure.in.in
@@ -0,0 +1,8 @@
+AC_ARG_ENABLE(indexlib-unittests,
+ [ --enable-indexlib-unittests Enables indexlib's unittests (used for debugging only, needs boost::unit_test)],
+ [case "${enableval}" in
+ yes) indexlib_unittests=true ;;
+ no) indexlib_unittests=false;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-indexlib-unittests) ;;
+ esac],[indexlib_unittests=false])
+ AM_CONDITIONAL(enable_indexlib_unittests, test x$indexlib_unittests = xtrue)
diff --git a/indexlib/tests/create-test.cpp b/indexlib/tests/create-test.cpp
new file mode 100644
index 000000000..beeb7d5f9
--- /dev/null
+++ b/indexlib/tests/create-test.cpp
@@ -0,0 +1,28 @@
+#include <boost/test/unit_test.hpp>
+#include "create.h"
+#include "index.h"
+
+using namespace ::boost::unit_test;
+
+namespace create_test {
+
+const char* fname = "create-test-delete-me/////";
+
+void cleanup() {
+ indexlib::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ std::auto_ptr<indexlib::index> ptr = indexlib::create( fname );
+ BOOST_CHECK( ptr.get() );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Create tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ return test;
+}
+
+}
+
diff --git a/indexlib/tests/ifile-test.cpp b/indexlib/tests/ifile-test.cpp
new file mode 100644
index 000000000..9b9f92832
--- /dev/null
+++ b/indexlib/tests/ifile-test.cpp
@@ -0,0 +1,156 @@
+#include <boost/test/unit_test.hpp>
+#include "ifile.h"
+#include <string>
+#include <stdarg.h>
+
+using namespace ::boost::unit_test;
+namespace ifile_test {
+//using indexlib::detail::ifile;
+const char* fname = "ifile-test-delete-me";
+void cleanup() {
+ ifile::remove( fname );
+}
+
+inline
+bool check_results( const ifile& ifi, const char* str, ... ) {
+ const char* s;
+ va_list args;
+ va_start( args, str );
+ std::vector<unsigned> res = ifi.search( str )->list();
+ unsigned i = 0;
+
+ while ( s = va_arg( args, const char* ) ) {
+ if ( i == res.size() ) return false;
+ if ( std::string( s ) != ifi.lookup_docname( res[ i++ ] ) ) return false;
+ }
+ va_end( args );
+ return i == res.size();
+}
+
+
+inline
+unsigned count_results( const ifile& ifi, const char* str ) {
+ return ifi.search( str )->list().size();
+}
+
+void simple() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "this", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list()[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "this" )->list()[ 0 ] ), "doc" );
+ ifi.add( "that", "doc2" );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list()[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "this" )->list()[ 0 ] ), "doc" );
+
+ BOOST_CHECK_EQUAL( ifi.search( "that" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "that" )->list()[ 0 ], 1 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "that" )->list()[ 0 ] ), "doc2" );
+}
+
+void ndocs() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one", "one" );
+ ifi.add( "one", "two" );
+ BOOST_CHECK_EQUAL( ifi.ndocs(), 2 );
+
+ ifi.add( "one", "three" );
+ ifi.add( "one", "four" );
+
+ BOOST_CHECK_EQUAL( ifi.ndocs(), 4 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 0 ), std::string( "one" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 1 ), std::string( "two" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 2 ), std::string( "three" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 3 ), std::string( "four" ) );
+}
+
+void space() {
+ cleanup();
+ ifile ifi( fname );
+
+ ifi.add( "one two three", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "two" )->list().size(), 1 );
+}
+
+void numbers() {
+ cleanup();
+ ifile ifi( fname );
+
+ ifi.add( "one 123 123456789 four444 five", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "123" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "123456789" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "four444" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "five" )->list().size(), 1 );
+}
+
+void partial() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "longword", "doc_0" );
+
+ BOOST_CHECK_EQUAL( ifi.search( "l" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "long" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "longword" )->list().size(), 1u );
+
+ BOOST_CHECK_EQUAL( ifi.search( "longword" )->list().size(), 1u );
+
+ ifi.add( "longnord", "doc_1" );
+ BOOST_CHECK_EQUAL( ifi.search( "l" )->list().size(), 2u );
+ BOOST_CHECK_EQUAL( ifi.search( "long" )->list().size(), 2u );
+ BOOST_CHECK_EQUAL( ifi.search( "longw" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "longn" )->list().size(), 1u );
+}
+
+void several() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one two three four", "0" );
+ ifi.add( "two three four", "1" );
+ ifi.add( "something else", "2" );
+ ifi.add( "something two", "3" );
+ ifi.add( "two something four", "4" );
+ ifi.add( "else something", "5" );
+ ifi.add( "else four", "6" );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "one" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "one two three four" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "two three four" ), 2u );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "one two" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "one" ), 1u );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "something else" ), 2u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "something two" ), 2u );
+}
+
+void remove_doc() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one two three four", "0" );
+ ifi.add( "two three four", "1" );
+ ifi.add( "three four five", "2" );
+ ifi.remove_doc( "1" );
+
+ BOOST_CHECK( check_results( ifi, "one", "0", NULL ) );
+ BOOST_CHECK( check_results( ifi, "two", "0", NULL ) );
+ BOOST_CHECK( check_results( ifi, "three", "0", "2", NULL ) );
+ BOOST_CHECK_EQUAL( count_results( ifi, "four" ), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Ifile tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &ndocs ) );
+ test->add( BOOST_TEST_CASE( &space ) );
+ //test->add( BOOST_TEST_CASE( &numbers ) );
+ test->add( BOOST_TEST_CASE( &partial ) );
+ test->add( BOOST_TEST_CASE( &several ) );
+ test->add( BOOST_TEST_CASE( &remove) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/large-scale/do-test.zsh b/indexlib/tests/large-scale/do-test.zsh
new file mode 100755
index 000000000..b8d47b45d
--- /dev/null
+++ b/indexlib/tests/large-scale/do-test.zsh
@@ -0,0 +1,55 @@
+#!/usr/bin/env zsh
+
+# SET INPUT FILE BELOW
+inputfile=$1
+inputfile=ulyss12.txt
+
+indexlibadmin=../../indexlibadmin
+index=index
+
+rm -rf index
+mkdir index
+
+if test -z $inputfile; then
+ cat <<-END 1>&2
+ This test needs a large input file as a seed.
+
+ You might consider using http://www.gutenberg.org/ as a starting point to get a file.
+
+ Please edit this script ($0) to set the input file.
+END
+ exit 1
+fi
+
+rm -rf output
+mkdir output/
+
+rm -rf tmp
+mkdir tmp/
+
+python generate.py < $inputfile
+
+$indexlibadmin remove $index
+for t in output/text_*; do
+ $indexlibadmin add $index $t
+done
+
+
+for w in output/words_*.list; do
+ $indexlibadmin search $index "`cat $w`" >tmp/got 2>/dev/null
+ source output/`basename $w list`script
+ if ! diff -q tmp/got tmp/expected; then
+ cat <<-END
+ Pattern $w was wrong!
+
+ Diff:
+ END
+ diff -u tmp/got tmp/expected
+ echo "End of Diff."
+ exit 1
+ fi
+done
+
+rm -f tmp/got tmp/expected tmp/pat
+rmdir tmp
+
diff --git a/indexlib/tests/large-scale/generate.py b/indexlib/tests/large-scale/generate.py
new file mode 100644
index 000000000..3a66df3be
--- /dev/null
+++ b/indexlib/tests/large-scale/generate.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+import random
+import re
+
+def init_chain(infile):
+ chain = {}
+ last = ('','')
+ for line in infile:
+ for word in line.split():
+ if not chain.has_key(last):
+ chain[last]=[]
+ chain[last].append(word)
+ last=(last[1],word)
+ chain[last]=None
+ return chain
+
+def output(chain,length,outputfile):
+ last = ('','')
+ start=2000
+ for i in range(length+start):
+ if chain[last] is None:
+ break
+ word = random.choice(chain[last])
+ last=(last[1],word)
+ if i > start:
+ outputfile.write(word)
+ outputfile.write(' ')
+ outputfile.write("\n")
+
+def get_words(chain,nwords,outputfile,scriptfile):
+ scriptfile.write("(for f in output/text_*; echo $f) > tmp/so_far\n")
+ for i in range(nwords):
+ word='1'
+ while re.compile("\d").search(word):
+ word=random.choice(random.choice(chain.keys()))
+ word=re.sub(r'\W','',word)
+ outputfile.write(word+"\n")
+ scriptfile.write("grep -i -E -e '(\W|^)%s' -l output/text_* >tmp/part_%s\n" % (word,word))
+ scriptfile.write("perl -e '($file1, $file2) = @ARGV; open F2, $file2; while (<F2>) {$h2{$_}++}; open F1, $file1; while (<F1>) {if ($h2{$_}) {print $_; $h2{$_} = 0;}}' tmp/part_%s tmp/so_far >tmp/so_far_\n" % word) # From scriptome
+ scriptfile.write("mv tmp/so_far_ tmp/so_far\n")
+ scriptfile.write("rm tmp/part_%s\n" % word)
+ scriptfile.write("mv tmp/so_far tmp/expected\n")
+
+
+chain=init_chain(file("/dev/stdin"))
+for i in range(10000):
+ output(chain,2000,file("output/text_"+str(i+1),'w'))
+
+
+for i in range(1000):
+ get_words(chain,random.randint(1,5),file("output/words_%s.list"%str(i+1),'w'),file("output/words_%s.script"%str(i+1),'w'))
diff --git a/indexlib/tests/leafdatavector-test.cpp b/indexlib/tests/leafdatavector-test.cpp
new file mode 100644
index 000000000..8763327e6
--- /dev/null
+++ b/indexlib/tests/leafdatavector-test.cpp
@@ -0,0 +1,129 @@
+#include <boost/test/unit_test.hpp>
+#include <map>
+#include "leafdatavector.h"
+
+using namespace ::boost::unit_test;
+namespace leafdatavector_test {
+
+const char* fname = "leafdatavector-test-delete-me";
+void cleanup() {
+ leafdatavector::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ leafdatavector f( fname );
+ f.add( 0, 1 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 1u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 1u );
+ f.add( 0, 2 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+}
+
+void persistent() {
+ cleanup();
+ {
+ leafdatavector f( fname );
+ f.add( 0, 1 );
+ }
+ {
+ leafdatavector f( fname );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 1u );
+ }
+}
+
+void complicated() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 1 );
+ f.add( 0, 3 );
+ f.add( 1, 3 );
+ f.add( 0, 2 );
+ f.add( 0, 4 );
+ f.add( 1, 8 );
+ f.add( 2, 4 );
+ f.add( 1, 5 );
+ f.add( 2, 5 );
+ f.add( 0, 5 );
+ f.add( 0, 9 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 6u );
+ BOOST_CHECK_EQUAL( f.get( 1 ).size(), 3u );
+ BOOST_CHECK_EQUAL( f.get( 2 ).size(), 2u );
+ std::vector<unsigned> one = f.get( 1 );
+ std::sort( one.begin(), one.end() );
+ BOOST_CHECK_EQUAL( one.size(), 3u );
+ BOOST_CHECK_EQUAL( one[ 0 ], 3u );
+ BOOST_CHECK_EQUAL( one[ 1 ], 5u );
+ BOOST_CHECK_EQUAL( one[ 2 ], 8u );
+}
+
+void unique() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 1 );
+ f.add( 0, 1 );
+ f.add( 0, 1 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 1u );
+
+ f.add( 0, 4 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+
+ f.add( 0, 1 );
+ f.add( 0, 4 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+
+}
+
+void large() {
+ cleanup();
+ leafdatavector f( fname );
+ std::map<uint, uint> counts;
+
+ for ( uint i = 0; i != 32; ++i ) {
+ for ( uint j = 0; j != 256 + 3; ++j ) {
+ uint ref = i * ( j + 51 ) / 13 + i % 75 + j + 3;
+ f.add( j, ref );
+ ++counts[ j ];
+ }
+ }
+ for ( std::map<uint,uint>::const_iterator first = counts.begin(), past = counts.end();
+ first != past; ++first ) {
+ BOOST_CHECK_EQUAL( first->second, f.get( first->first ).size() );
+ }
+
+}
+
+void one_zero() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 0 );
+ f.add( 0, 1 );
+ f.add( 0, 3 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 3u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 0u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 1 ], 1u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 2 ], 3u );
+
+}
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "leafdatavector tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &persistent ) );
+ test->add( BOOST_TEST_CASE( &complicated ) );
+ test->add( BOOST_TEST_CASE( &unique ) );
+ test->add( BOOST_TEST_CASE( &large ) );
+ test->add( BOOST_TEST_CASE( &one_zero ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/match-test.cpp b/indexlib/tests/match-test.cpp
new file mode 100644
index 000000000..16b8a8e95
--- /dev/null
+++ b/indexlib/tests/match-test.cpp
@@ -0,0 +1,99 @@
+#include <boost/test/unit_test.hpp>
+#include "match.h"
+
+using namespace ::boost::unit_test;
+namespace match_test {
+using indexlib::Match;
+
+void cleanup() {
+}
+
+void simple() {
+ cleanup();
+ Match m( "pat" );
+ BOOST_CHECK_EQUAL( m.process( "not here" ), false );
+ BOOST_CHECK_EQUAL( m.process( "p a t" ), false );
+ BOOST_CHECK_EQUAL( m.process( "pa t" ), false );
+
+
+ BOOST_CHECK_EQUAL( m.process( "pat" ), true );
+ BOOST_CHECK_EQUAL( m.process( "pattern" ), true );
+ BOOST_CHECK_EQUAL( m.process( " pat " ), true );
+ BOOST_CHECK_EQUAL( m.process( "zpat patx ipato " ), true );
+}
+
+void empty() {
+ cleanup();
+ {
+ Match m( "pat" );
+ BOOST_CHECK( !m.process( "" ) );
+ }
+ {
+ Match m( "" );
+ BOOST_CHECK( m.process( "" ) );
+ BOOST_CHECK( m.process( "string" ) );
+ }
+}
+
+
+void string() {
+ cleanup();
+ Match m( std::string( "pat" ) );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "not here" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "here pattern" ) ), true );
+}
+
+void casesensitive() {
+ cleanup();
+ Match m( std::string( "pat" ), ~Match::caseinsensitive );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "PAT" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "aPATa" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pAt" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pattern" ) ), true );
+}
+
+void caseinsensitive() {
+ cleanup();
+ Match m( std::string( "pat" ), Match::caseinsensitive );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "PAT" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "aPATa" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pAt" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pattern" ) ), true );
+}
+
+
+void verylarge() {
+ cleanup();
+ Match m( std::string( "pat0123456789012345678901234567890" ) );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "pat0123456789012345678901234567890" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat0123456789012345678901234567890" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat0123456789012345678901234567890xxxxxxxx" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x6789012345678901234567890xxxxxxxx" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x678901234567890123456789xxxxxxxxx" ) ), false );
+
+ m = Match( std::string( "12345678901234567890123456789012" ) );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat012345678901234567890123456789012xxxxxxxxx" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat012345678901234567890123456789012" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x678901234567890123456789xxxxxxxxx" ) ), false );
+}
+
+
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Match tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &empty ) );
+ test->add( BOOST_TEST_CASE( &string ) );
+ test->add( BOOST_TEST_CASE( &casesensitive ) );
+ test->add( BOOST_TEST_CASE( &caseinsensitive ) );
+ test->add( BOOST_TEST_CASE( &verylarge ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/mempool-test.cpp b/indexlib/tests/mempool-test.cpp
new file mode 100644
index 000000000..a0895243c
--- /dev/null
+++ b/indexlib/tests/mempool-test.cpp
@@ -0,0 +1,53 @@
+#include <boost/test/unit_test.hpp>
+#include "mempool.h"
+#include "leafdata.h"
+
+using namespace ::boost::unit_test;
+namespace mempool_test {
+const char* fname = "mempool-test-delete-me";
+void cleanup() {
+ ::unlink( fname );
+}
+
+void deallocate() {
+ cleanup();
+ mempool<leaf_data_pool_traits> pool( std::auto_ptr<memory_manager>( new mmap_manager( fname ) ) );
+
+ std::vector<leafdataptr> pointers;
+ for ( int i = 0; i != 32; ++i ) {
+ pointers.push_back( pool.allocate( 16 ) );
+ leafdata::init( pointers.back() );
+ }
+ const unsigned size = pool.size();
+
+ for ( int i = 0; i != pointers.size(); ++i ) {
+ pool.deallocate(pointers.at(i));
+ }
+
+ for ( int i = 0; i != 32; ++i ) {
+ pointers.push_back( pool.allocate( 16 ) );
+ leafdata::init( pointers.back() );
+ }
+ BOOST_CHECK_EQUAL( size, pool.size() );
+}
+
+void large() {
+ cleanup();
+ mempool<leaf_data_pool_traits> pool( std::auto_ptr<memory_manager>( new mmap_manager( fname ) ) );
+
+ pool.allocate( 4095 );
+ pool.allocate( 4097 );
+ pool.allocate( 4096*2 );
+ pool.allocate( 4096*4 );
+ pool.allocate( 4096*8 );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Mempool Tests" );
+ test->add( BOOST_TEST_CASE( &deallocate ) );
+ test->add( BOOST_TEST_CASE( &large ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/memvector-test.cpp b/indexlib/tests/memvector-test.cpp
new file mode 100644
index 000000000..60023a4d1
--- /dev/null
+++ b/indexlib/tests/memvector-test.cpp
@@ -0,0 +1,258 @@
+#include <boost/test/unit_test.hpp>
+#include <boost/format.hpp>
+#include <iostream>
+using namespace ::boost::unit_test;
+
+#include <unistd.h>
+#include "memvector.h"
+
+namespace memvector_test {
+
+const char* fname = "test.vector-delete-me";
+void cleanup() {
+ memvector<uint32_t>::remove( fname );
+}
+void test_size() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 3 );
+ test.push_back( 4 );
+ BOOST_CHECK_EQUAL( test.size(), 4u );
+}
+
+template <typename T>
+void test_put_recover() {
+ cleanup();
+ memvector<T> test( fname );
+ for ( int i = 0; i != 20; ++i ) {
+ test.push_back( T( i*13 + i*i*45 + 23 ) );
+ }
+ for ( int i = 0; i != 20; ++i ) {
+ BOOST_CHECK_EQUAL( test[ i ], T( i*13 + i*i*45 + 23 ) );
+ }
+}
+
+void resize() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.resize( 50 );
+ BOOST_CHECK_EQUAL( test.size(), 50u );
+}
+
+
+void test_persistent() {
+ cleanup();
+ {
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 3 );
+ test.push_back( 4 );
+ test.push_back( 5 );
+ }
+ {
+ memvector<uint32_t> test( fname );
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ for ( unsigned i = 0; i != test.size(); ++i )
+ BOOST_CHECK_EQUAL( test[ i ], i + 1 );
+ }
+}
+
+void test_insert() {
+ cleanup();
+ memvector<uint16_t> test( fname );
+ test.push_back( 12 );
+ test.push_back( 12 );
+ test.push_back( 12 );
+ test.push_back( 12 );
+
+ test.insert( test.begin() + 2, 13 );
+
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ BOOST_CHECK_EQUAL( test[ 0 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 13u );
+ BOOST_CHECK_EQUAL( test[ 3 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 4 ], 12u );
+}
+
+void test_iterator() {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 1u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 2u );
+
+ BOOST_CHECK_EQUAL( *test.begin(), 1u );
+ BOOST_CHECK_EQUAL( *( test.begin() + 1 ), 2u );
+
+ memvector<unsigned>::iterator iter = test.begin();
+
+ BOOST_CHECK_EQUAL( *iter, 1u );
+
+ BOOST_CHECK( test.begin() == iter );
+
+ *iter= 3;
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 3u );
+ BOOST_CHECK_EQUAL( *iter, 3u );
+ BOOST_CHECK_EQUAL( *test.begin(), 3u );
+
+ ++iter;
+
+ BOOST_CHECK_EQUAL( *iter, 2u );
+
+ *iter = 5;
+
+ BOOST_CHECK_EQUAL( *iter, 5u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 5u );
+
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+ test.push_back( 5 );
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+ test.push_back( 5 );
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+}
+
+void test_iteration() {
+ cleanup();
+ memvector<unsigned> test( fname );
+
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 5 );
+ test.push_back( 3 );
+
+ memvector<unsigned>::const_iterator iter = test.begin();
+
+ BOOST_CHECK( iter == test.begin() );
+ BOOST_CHECK( iter != test.end() );
+
+ BOOST_CHECK_EQUAL( *iter, 1u );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, 2u );
+ iter += 2;
+ BOOST_CHECK_EQUAL( *iter, 3u );
+ *iter = 7;
+ BOOST_CHECK_EQUAL( *iter, 7u );
+ --iter;
+ BOOST_CHECK_EQUAL( *iter, 5u );
+ BOOST_CHECK( iter != test.end() );
+ iter += 2;
+ BOOST_CHECK( iter == test.end() );
+}
+
+void test_sort() {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 10 );
+ test.push_back( 0 );
+ test.push_back( 14 );
+ test.push_back( 8 );
+ test.push_back( 12 );
+ test.push_back( 5 );
+ test.push_back( 4 );
+ test.push_back( 3 );
+
+
+ BOOST_CHECK_EQUAL( *std::min_element( test.begin(), test.end() ), 0 );
+ BOOST_CHECK( std::min_element( test.begin(), test.end() ) == test.begin() + 1 );
+ BOOST_CHECK_EQUAL( *std::max_element( test.begin(), test.end() ), 14 );
+ BOOST_CHECK( std::max_element( test.begin(), test.end() ) == test.begin() + 2 );
+
+ std::sort( test.begin(), test.end() );
+ BOOST_CHECK_EQUAL( test[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( test[ 1 ], 3 );
+ BOOST_CHECK_EQUAL( test[ 2 ], 4 );
+ BOOST_CHECK_EQUAL( test[ 3 ], 5 );
+ BOOST_CHECK_EQUAL( test[ 4 ], 8 );
+ BOOST_CHECK_EQUAL( test[ 5 ], 10 );
+ BOOST_CHECK_EQUAL( test[ 6 ], 12 );
+ BOOST_CHECK_EQUAL( test[ 7 ], 14 );
+}
+
+void remove() {
+ {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 1 );
+ BOOST_CHECK_EQUAL( test.size(), 1 );
+ }
+ memvector<unsigned>::remove( fname );
+ memvector<unsigned> test( fname );
+ BOOST_CHECK_EQUAL( test.size(), 0 );
+}
+
+void assign() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test[ 0 ] = 3;
+ BOOST_CHECK_EQUAL( test[ 0 ], 3u );
+}
+
+void erase() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test.push_back( 4 );
+ test.push_back( 8 );
+ test.push_back( 16 );
+ test.push_back( 32 );
+
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ test.erase( test.begin() + 1 );
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 2u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 8u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 16u );
+ BOOST_CHECK_EQUAL( test[ 3 ], 32u );
+ BOOST_CHECK_EQUAL( test.size(), 4u );
+
+ test.erase( test.begin() + 3 );
+ BOOST_CHECK_EQUAL( test[ 0 ], 2u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 8u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 16u );
+ BOOST_CHECK_EQUAL( test.size(), 3u );
+
+}
+
+void clear() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test.push_back( 4 );
+ test.push_back( 8 );
+ test.push_back( 16 );
+ test.push_back( 32 );
+
+ test.clear();
+
+
+ BOOST_CHECK_EQUAL( test.size(), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Memvector tests" );
+ test->add( BOOST_TEST_CASE( &test_size ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint32_t> ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint16_t> ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint8_t> ) );
+ test->add( BOOST_TEST_CASE( &resize ) );
+ test->add( BOOST_TEST_CASE( &test_persistent ) );
+ test->add( BOOST_TEST_CASE( &remove ) );
+ test->add( BOOST_TEST_CASE( &assign ) );
+ test->add( BOOST_TEST_CASE( &erase ) );
+ test->add( BOOST_TEST_CASE( &clear ) );
+ return test;
+
+}
+
+} // namespace
+
+
diff --git a/indexlib/tests/run-tests.zsh b/indexlib/tests/run-tests.zsh
new file mode 100755
index 000000000..46a10c7b4
--- /dev/null
+++ b/indexlib/tests/run-tests.zsh
@@ -0,0 +1,52 @@
+#!/usr/bin/env zsh
+
+index=delete-me
+files=(
+ one ' On October 11th 2005, the KDE Project released KOffice 1.4.2. KOffice is a free light-weight yet feature rich office solution that integrates with KDE, supports the OASIS OpenDocument file format as does OpenOffice.org 2 and provides filters for other office suites such as Microsoft Office. Read the KOffice 1.4.2 Release Notes.'
+ two 'KDE is a powerful Free Software graphical desktop environment for Linux and Unix workstations. It combines ease of use, contemporary functionality, and outstanding graphical design with the technological superiority of the Unix operating system. More... '
+ three 'The YaKuake Package for Debian sarge and sid.
+ Yet Another Kuake aka YaKuake VERSION 2.6
+ http://www.kde-look.org/content/show.php?content=29153
+
+ have fun!
+
+ Thx OldKid for compile on debian amd64.
+
+
+'
+ numbers '123456789'
+)
+expected=( \
+ kde "onetwothree"
+ noshow "Empty results"
+ poWeRFuL 'two'
+ 'kde BUT debian' "onetwo"
+ debian 'three'
+ '12345678' 'numbers'
+ )
+driver=./indexlibadmin
+unittests=./testdriver
+
+$unittests
+
+echo "Running tests on the command line..."
+
+mkdir $index
+for name data in $files; do
+ $driver add $index $name - <<<$data
+done
+
+for q res in $expected ; do
+ $driver search $index $q | tr -d '\n' | read got
+ if test $res != $got; then
+ echo "ERROR in test '$q'"
+ echo "EXPECTED:"
+ echo -$res-
+ echo "GOT:"
+ echo -$got-
+ fi
+done
+
+rm -rf $index
+
+echo "done."
diff --git a/indexlib/tests/slow-test.cpp b/indexlib/tests/slow-test.cpp
new file mode 100644
index 000000000..05b687913
--- /dev/null
+++ b/indexlib/tests/slow-test.cpp
@@ -0,0 +1,13 @@
+#include "slow.h"
+
+#include <boost/test/unit_test.hpp>
+
+using namespace ::boost::unit_test;
+namespace slow_test {
+const char* fname = "slow.test-delete-me";
+
+void cleanup() {
+ slow::remove( fname );
+}
+
+}
diff --git a/indexlib/tests/stringarray-test.cpp b/indexlib/tests/stringarray-test.cpp
new file mode 100644
index 000000000..d0f5ecefa
--- /dev/null
+++ b/indexlib/tests/stringarray-test.cpp
@@ -0,0 +1,104 @@
+#include <boost/test/unit_test.hpp>
+using namespace ::boost::unit_test;
+
+#include <unistd.h>
+#include "stringarray.h"
+
+namespace stringarray_test {
+
+const char* fname = "test.stringarray-delete-me";
+void cleanup() {
+ stringarray::remove( fname );
+}
+
+void test_size() {
+ stringarray test( fname );
+ test.add( "one" );
+ test.add( "one" );
+ test.add( "one" );
+ test.add( "one" );
+ //BOOST_CHECK_EQUAL( test.size(), 4 );
+ cleanup();
+}
+
+void test_put_recover() {
+ stringarray test( fname );
+ BOOST_CHECK_EQUAL( test.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( test.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( test.add( "three" ), 2 );
+ BOOST_CHECK_EQUAL( test.add( "four" ), 3 );
+
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "two" );
+ BOOST_CHECK_EQUAL( test.get( 2 ), "three" );
+ BOOST_CHECK_EQUAL( test.get( 3 ), "four" );
+
+ cleanup();
+}
+
+void test_persistent() {
+ {
+ stringarray test( fname );
+ BOOST_CHECK_EQUAL( test.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( test.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( test.add( "three" ), 2 );
+ BOOST_CHECK_EQUAL( test.add( "four" ), 3 );
+ }
+ {
+ stringarray test( fname );
+
+ //BOOST_CHECK_EQUAL( test.size(), 4 );
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "two" );
+ BOOST_CHECK_EQUAL( test.get( 2 ), "three" );
+ BOOST_CHECK_EQUAL( test.get( 3 ), "four" );
+
+ }
+ cleanup();
+}
+
+void cstr() {
+ stringarray test( fname );
+
+ test.add( "one" );
+ test.add( "two" );
+ test.add( "three" );
+ test.add( "four" );
+
+ BOOST_CHECK( !strcmp( test.get_cstr( 0 ), "one" ) );
+ BOOST_CHECK( strcmp( test.get_cstr( 0 ), "not one" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 1 ), "two" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 2 ), "three" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 3 ), "four" ) );
+
+ cleanup();
+}
+
+void erase() {
+ stringarray test( fname );
+
+ test.add( "one" );
+ test.add( "two" );
+ test.add( "three" );
+ test.add( "four" );
+
+ test.erase( 1 );
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "three" );
+ BOOST_CHECK_EQUAL( test.size(), 3u );
+ cleanup();
+}
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Memvector tests" );
+ test->add( BOOST_TEST_CASE( &test_size ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover ) );
+ test->add( BOOST_TEST_CASE( &test_persistent ) );
+ test->add( BOOST_TEST_CASE( &cstr ) );
+ test->add( BOOST_TEST_CASE( &erase ) );
+ return test;
+
+}
+
+} //namespace
diff --git a/indexlib/tests/stringset-test.cpp b/indexlib/tests/stringset-test.cpp
new file mode 100644
index 000000000..56d326950
--- /dev/null
+++ b/indexlib/tests/stringset-test.cpp
@@ -0,0 +1,194 @@
+#include <boost/test/unit_test.hpp>
+#include "stringset.h"
+
+using namespace ::boost::unit_test;
+namespace stringset_test {
+
+const char* fname = "stringset-test-delete-me";
+void cleanup() {
+ stringset::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string1" );
+ set.add( "string2" );
+
+ BOOST_CHECK( set.count( "string1" ) );
+ BOOST_CHECK( set.count( "string2" ) );
+
+ BOOST_CHECK( !set.count( "string3" ) );
+ BOOST_CHECK( !set.count( "other" ) );
+}
+
+void empty() {
+ cleanup();
+ stringset set( fname );
+ BOOST_CHECK( set.empty() );
+}
+
+
+void persistent() {
+ cleanup();
+ {
+ stringset set( fname );
+ set.add( "string" );
+ set.add( "victor" );
+ set.add( "file" );
+
+ BOOST_CHECK( set.count( "string" ) );
+ BOOST_CHECK( set.count( "victor" ) );
+ BOOST_CHECK( set.count( "file" ) );
+ }
+ {
+ stringset set( fname );
+ BOOST_CHECK( set.count( "string" ) );
+ BOOST_CHECK( set.count( "victor" ) );
+ BOOST_CHECK( set.count( "file" ) );
+ }
+}
+
+void iterator() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string" );
+
+ stringset::const_iterator iter = set.begin();
+
+ BOOST_CHECK_EQUAL( std::string( "string" ), *iter );
+ BOOST_CHECK_EQUAL( set.begin(), iter );
+ BOOST_CHECK( !( set.end() == iter ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( set.end(), iter );
+}
+
+void order() {
+ cleanup();
+ stringset set( fname );
+
+ set.add( "two" );
+ set.add( "wlast" );
+ set.add( "one" );
+
+ stringset::const_iterator iter = set.begin();
+
+ BOOST_CHECK_EQUAL( *iter, std::string( "one" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, std::string( "two" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, std::string( "wlast" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( iter, set.end() );
+}
+
+void order_of() {
+ cleanup();
+ stringset set( fname );
+ set.add( "one" );
+ BOOST_CHECK_EQUAL( set.order_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), unsigned( -1 ) );
+ set.add( "two" );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), 1 );
+ set.add( "before" );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), 2 );
+ BOOST_CHECK_EQUAL( set.order_of( "one" ), 1 );
+ BOOST_CHECK_EQUAL( set.order_of( "before" ), 0 );
+}
+
+void id_of() {
+ cleanup();
+ stringset set( fname );
+ set.add( "one" );
+ BOOST_CHECK_EQUAL( set.id_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), unsigned( -1 ) );
+ set.add( "two" );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), 1 );
+ set.add( "before" );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), 1 );
+ BOOST_CHECK_EQUAL( set.id_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.id_of( "before" ), 2 );
+}
+
+void add_return() {
+ cleanup();
+ stringset set( fname );
+ BOOST_CHECK_EQUAL( set.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( set.add( "before" ), 2 );
+}
+
+void lower() {
+ cleanup();
+ stringset set( fname );
+ set.add( "aab" );
+ set.add( "aac" );
+ set.add( "aba" );
+ set.add( "abc" );
+ set.add( "acc" );
+
+ BOOST_CHECK_EQUAL( std::string( *set.lower_bound( "ab" ) ), "aba" );
+ BOOST_CHECK_EQUAL( std::string( *set.lower_bound( "abz" ) ), "acc" );
+}
+
+void lower_upper() {
+ cleanup();
+ stringset set( fname );
+ set.add( "aab" );
+ set.add( "aac" );
+ set.add( "aba" );
+ set.add( "abc" );
+ set.add( "acc" );
+
+ std::pair<stringset::const_iterator,stringset::const_iterator> limits;
+ stringset::const_iterator& upper = limits.first;
+ stringset::const_iterator& lower = limits.second;
+
+
+ limits = set.upper_lower( "ab" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 2u );
+ BOOST_CHECK_EQUAL( std::string( *upper ), "aba" );
+ ++upper;
+ BOOST_CHECK_EQUAL( std::string( *upper ), "abc" );
+ ++upper;
+ BOOST_CHECK( upper == lower );
+
+ limits = set.upper_lower( "abc" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 1u );
+ BOOST_CHECK_EQUAL( std::string( *upper ), "abc" );
+
+ limits = set.upper_lower( "abz" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 0u );
+}
+
+void clear() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string1" );
+ set.add( "string2" );
+ set.add( "one" );
+ set.add( "two" );
+ set.add( "three" );
+
+ set.clear();
+ BOOST_CHECK_EQUAL( set.size(), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Stringset tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &empty ) );
+ test->add( BOOST_TEST_CASE( &persistent ) );
+ test->add( BOOST_TEST_CASE( &iterator ) );
+ test->add( BOOST_TEST_CASE( &order ) );
+ test->add( BOOST_TEST_CASE( &order_of ) );
+ test->add( BOOST_TEST_CASE( &id_of ) );
+ test->add( BOOST_TEST_CASE( &add_return ) );
+ test->add( BOOST_TEST_CASE( &lower ) );
+ test->add( BOOST_TEST_CASE( &lower_upper ) );
+ test->add( BOOST_TEST_CASE( &clear ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/testdriver.cpp b/indexlib/tests/testdriver.cpp
new file mode 100644
index 000000000..db11e0366
--- /dev/null
+++ b/indexlib/tests/testdriver.cpp
@@ -0,0 +1,61 @@
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+#include <boost/test/unit_test.hpp>
+#include "logfile.h"
+using namespace ::boost::unit_test;
+
+namespace memvector_test { test_suite* get_suite(); }
+namespace stringarray_test { test_suite* get_suite(); }
+namespace match_test { test_suite* get_suite(); }
+namespace stringset_test { test_suite* get_suite(); }
+namespace leafdatavector_test { test_suite* get_suite(); }
+namespace ifile_test { test_suite* get_suite(); }
+namespace mempool_test { test_suite* get_suite(); }
+namespace tokenizer_test { test_suite* get_suite(); }
+namespace create_test { test_suite* get_suite(); }
+
+test_suite* init_unit_test_suite( int argc, char* argv[] )
+{
+ redirectlog( "unittest.log" );
+ test_suite* test = BOOST_TEST_SUITE( "Master test suite" );
+
+ test->add( memvector_test::get_suite() );
+ test->add( stringarray_test::get_suite() );
+ test->add( match_test::get_suite() );
+ test->add( stringset_test::get_suite() );
+ test->add( leafdatavector_test::get_suite() );
+ test->add( ifile_test::get_suite() );
+ test->add( mempool_test::get_suite() );
+ test->add( tokenizer_test::get_suite() );
+ test->add( create_test::get_suite() );
+
+ return test;
+}
+
diff --git a/indexlib/tests/tokenizer-test.cpp b/indexlib/tests/tokenizer-test.cpp
new file mode 100644
index 000000000..372859d90
--- /dev/null
+++ b/indexlib/tests/tokenizer-test.cpp
@@ -0,0 +1,69 @@
+#include <boost/test/unit_test.hpp>
+#include "tokenizer.h"
+#include <cassert>
+
+using namespace ::boost::unit_test;
+namespace indexlib { namespace tests { namespace tokenizer_test {
+
+using indexlib::detail::tokenizer;
+using indexlib::detail::get_tokenizer;
+
+void simple() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one ,as, ''#`:ThReE, " );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "AS" );
+ expected.push_back( "THREE" );
+ expected.push_back( "AAACE" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected[ i ], tokens[ i ] );
+ }
+}
+
+void with_newlines() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one\ntwo\nthree" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "TWO" );
+ expected.push_back( "THREE" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
+void with_numbers() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one 012 123 four" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "012" );
+ expected.push_back( "123" );
+ expected.push_back( "FOUR" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &with_newlines ) );
+ test->add( BOOST_TEST_CASE( &with_numbers ) );
+ return test;
+}
+
+}}} //namespaces
diff --git a/indexlib/thing.h b/indexlib/thing.h
new file mode 100644
index 000000000..26afb07b9
--- /dev/null
+++ b/indexlib/thing.h
@@ -0,0 +1,168 @@
+#ifndef LPC_THING_H1103643194_INCLUDE_GUARD_
+#define LPC_THING_H1103643194_INCLUDE_GUARD_
+
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+
+
+#include <inttypes.h>
+#include "bitio.h"
+#include "pointer.h"
+
+/**
+ * \class thing
+ *
+ * This is perhaps one of the most important classes in the system.
+ *
+ * Ideally one would like to have something like:
+ *
+ * struct_on_disk X {
+ * int32_t a;
+ * int32_t b;
+ * };
+ *
+ * X var;
+ *
+ * And var would be accessed in our exact format. Since I want to control the exact format
+ * to be able to use the same index even in different architechtures, it's not really possible.
+ *
+ * So we would do:
+ *
+ * START_THING( X, simple_accessor )
+ * MEMBER( int32_t, a, 0 )
+ * MEMBER( int32_t, b, 4 )
+ * END_THING( X )
+ *
+ * This base class provides the machinery for this.
+ */
+template <typename accessor>
+struct thing : protected accessor { // this allow the emtpy base optimization
+ protected:
+ thing( uint32_t idx, const accessor& access = accessor() ):
+ accessor( access ),
+ idx_( idx )
+ {
+ }
+
+ unsigned char* base() {
+ return reinterpret_cast<unsigned char*>( accessor::rw_base( idx_ ) );
+ }
+ const unsigned char* base() const {
+ return reinterpret_cast<const unsigned char*>( accessor::ronly_base( idx_ ) );
+ }
+ public:
+ ~thing() { }
+ thing( const thing& other ):
+ accessor( static_cast<const accessor&>( other ) ),
+ idx_( other.idx_ )
+ {
+ }
+
+ thing& operator = ( const thing& other ) {
+ accessor::operator=( other );
+ idx_ = other.idx_;
+ return *this;
+ }
+ protected:
+ uint32_t idx_;
+};
+
+template <void * ( *get_base )()>
+struct simple_accessor {
+ public:
+ void* rw_base( unsigned idx ) const {
+ return reinterpret_cast<unsigned char*>( get_base() ) + idx;
+ }
+ const void* ronly_base( unsigned idx ) const {
+ return reinterpret_cast<const unsigned char*>( get_base() ) + idx;
+ }
+};
+
+
+#define START_THING( name, base ) \
+ class name : public base { \
+ friend class pointer<name>; \
+ protected: \
+ name ( const base& b ) \
+ :base( b ) \
+ { \
+ } \
+ \
+ name ( uint32_t i ) \
+ :base( i ) \
+ { \
+ } \
+ public:
+
+#define MEMBER( type, name, idx ) \
+ type name() const { \
+ const unsigned char* data = this->base() + idx; \
+ return byte_io::read<type>( data ); \
+ } \
+ \
+ void set_ ## name ( const type & n_ ## name ) { \
+ unsigned char* data = this->base() + idx; \
+ byte_io::write<type>( data, n_ ## name ); \
+ }
+
+#define MY_BASE( idx ) \
+ private: \
+ unsigned char* my_base() { return base() + idx; } \
+ const unsigned char* my_base() const { return base() + idx; } \
+
+
+#define END_THING( name ) \
+ }; \
+ \
+ typedef ::pointer< name > name ## ptr;
+
+#define DO_POINTER_SPECS( name ) \
+ namespace byte_io { \
+ template<> \
+ inline \
+ pointer<name> read< pointer<name> >( const unsigned char* in ) \
+ { \
+ return pointer< name >::cast_from_uint32( read<uint32_t>( in ) ); \
+ }\
+ template<> \
+ inline \
+ void write< pointer<name> >( unsigned char* out, pointer<name> p ) { \
+ write<uint32_t>( out, p.cast_to_uint32() ); \
+ } \
+ template<> \
+ struct byte_lenght_struct< pointer <name> > { \
+ static const unsigned value = byte_lenght_struct<uint32_t>::value; \
+ }; \
+ } // namespace
+
+
+
+
+#endif /* LPC_THING_H1103643194_INCLUDE_GUARD_ */
diff --git a/indexlib/tokenizer.cpp b/indexlib/tokenizer.cpp
new file mode 100644
index 000000000..b069c7482
--- /dev/null
+++ b/indexlib/tokenizer.cpp
@@ -0,0 +1,300 @@
+#include "tokenizer.h"
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <cassert>
+
+using std::string;
+using std::vector;
+
+namespace {
+vector<string> split( const char* str, const char delim ) {
+ assert( str );
+ vector<string> res;
+ while ( *str == delim ) ++str;
+ while ( *str ) {
+ const char* start = str++;
+ while ( *str && *str != delim ) ++str;
+ res.push_back( string( start, str ) );
+ while ( *str == delim ) ++str;
+ }
+ return res;
+}
+
+class latin1_tokenizer : public indexlib::detail::tokenizer {
+ private:
+ static const char stop = 46; // .
+ static void normalize( char& c ) {
+ const char result[] = {
+ stop, // [ 0 ]
+ stop, // [ 1 ]
+ stop, // [ 2 ]
+ stop, // [ 3 ]
+ stop, // [ 4 ]
+ stop, // [ 5 ]
+ stop, // [ 6 ]
+ stop, // [ 7 ]
+ stop, // ^H [ 8 ]
+ stop, // \t [ 9 ]
+ stop, // [ 10 ]
+ stop, // [ 11 ]
+ stop, // [ 12 ]
+ stop, // [ 13 ]
+ stop, // [ 14 ]
+ stop, // [ 15 ]
+ stop, // [ 16 ]
+ stop, // [ 17 ]
+ stop, // [ 18 ]
+ stop, // [ 19 ]
+ stop, // [ 20 ]
+ stop, // [ 21 ]
+ stop, // [ 22 ]
+ stop, // [ 23 ]
+ stop, // [ 24 ]
+ stop, // [ 25 ]
+ stop, // [ 26 ]
+ stop, // [ 27 ]
+ stop, // [ 28 ]
+ stop, // [ 29 ]
+ stop, // [ 30 ]
+ stop, // [ 31 ]
+ stop, // [ 32 ]
+ stop, // ! [ 33 ]
+ stop, // " [ 34 ]
+ stop, // # [ 35 ]
+ stop, // $ [ 36 ]
+ stop, // % [ 37 ]
+ stop, // & [ 38 ]
+ stop, // ' [ 39 ]
+ stop, // ( [ 40 ]
+ stop, // ) [ 41 ]
+ stop, // * [ 42 ]
+ stop, // + [ 43 ]
+ stop, // , [ 44 ]
+ stop, // - [ 45 ]
+ stop, // . [ 46 ]
+ stop, // / [ 47 ]
+ '0', // 0 [ 48 ]
+ '1', // 1 [ 49 ]
+ '2', // 2 [ 50 ]
+ '3', // 3 [ 51 ]
+ '4', // 4 [ 52 ]
+ '5', // 5 [ 53 ]
+ '6', // 6 [ 54 ]
+ '7', // 7 [ 55 ]
+ '8', // 8 [ 56 ]
+ '9', // 9 [ 57 ]
+ stop, // : [ 58 ]
+ stop, // ; [ 59 ]
+ stop, // < [ 60 ]
+ stop, // = [ 61 ]
+ stop, // > [ 62 ]
+ stop, // ? [ 63 ]
+ stop, // @ [ 64 ]
+ 'A', // A [ 65 ]
+ 'B', // B [ 66 ]
+ 'C', // C [ 67 ]
+ 'D', // D [ 68 ]
+ 'E', // E [ 69 ]
+ 'F', // F [ 70 ]
+ 'G', // G [ 71 ]
+ 'H', // H [ 72 ]
+ 'I', // I [ 73 ]
+ 'J', // J [ 74 ]
+ 'K', // K [ 75 ]
+ 'L', // L [ 76 ]
+ 'M', // M [ 77 ]
+ 'N', // N [ 78 ]
+ 'O', // O [ 79 ]
+ 'P', // P [ 80 ]
+ 'Q', // Q [ 81 ]
+ 'R', // R [ 82 ]
+ 'S', // S [ 83 ]
+ 'T', // T [ 84 ]
+ 'U', // U [ 85 ]
+ 'V', // V [ 86 ]
+ 'W', // W [ 87 ]
+ 'X', // X [ 88 ]
+ 'Y', // Y [ 89 ]
+ 'Z', // Z [ 90 ]
+ stop, // [ [ 91 ]
+ stop, // \ [ 92 ]
+ stop, // ] [ 93 ]
+ stop, // ^ [ 94 ]
+ stop, // _ [ 95 ]
+ stop, // ` [ 96 ]
+ 'A', // a [ 97 ]
+ 'B', // b [ 98 ]
+ 'C', // c [ 99 ]
+ 'D', // d [ 100 ]
+ 'E', // e [ 101 ]
+ 'F', // f [ 102 ]
+ 'G', // g [ 103 ]
+ 'H', // h [ 104 ]
+ 'I', // i [ 105 ]
+ 'J', // j [ 106 ]
+ 'K', // k [ 107 ]
+ 'L', // l [ 108 ]
+ 'M', // m [ 109 ]
+ 'N', // n [ 110 ]
+ 'O', // o [ 111 ]
+ 'P', // p [ 112 ]
+ 'Q', // q [ 113 ]
+ 'R', // r [ 114 ]
+ 'S', // s [ 115 ]
+ 'T', // t [ 116 ]
+ 'U', // u [ 117 ]
+ 'V', // v [ 118 ]
+ 'W', // w [ 119 ]
+ 'X', // x [ 120 ]
+ 'Y', // y [ 121 ]
+ 'Z', // z [ 122 ]
+ stop, // { [ 123 ]
+ stop, // | [ 124 ]
+ stop, // } [ 125 ]
+ stop, // ~ [ 126 ]
+ stop, //  [ 127 ]
+ stop, // [ 128 ]
+ stop, // [ 129 ]
+ stop, // [ 130 ]
+ stop, // [ 131 ]
+ stop, // [ 132 ]
+ stop, // [ 133 ]
+ stop, // [ 134 ]
+ stop, // [ 135 ]
+ stop, // [ 136 ]
+ stop, // [ 137 ]
+ stop, // [ 138 ]
+ stop, // [ 139 ]
+ stop, // [ 140 ]
+ stop, // [ 141 ]
+ stop, // [ 142 ]
+ stop, // [ 143 ]
+ stop, // [ 144 ]
+ stop, // [ 145 ]
+ stop, // [ 146 ]
+ stop, // [ 147 ]
+ stop, // [ 148 ]
+ stop, // [ 149 ]
+ stop, // [ 150 ]
+ stop, // [ 151 ]
+ stop, // [ 152 ]
+ stop, // [ 153 ]
+ stop, // [ 154 ]
+ stop, // [ 155 ]
+ stop, // [ 156 ]
+ stop, // [ 157 ]
+ stop, // [ 158 ]
+ stop, // [ 159 ]
+ stop, // [ 160 ]
+ stop, // [ 161 ]
+ stop, // [ 162 ]
+ stop, // [ 163 ]
+ stop, // [ 164 ]
+ stop, // [ 165 ]
+ stop, // [ 166 ]
+ stop, // [ 167 ]
+ stop, // [ 168 ]
+ stop, // [ 169 ]
+ stop, // [ 170 ]
+ stop, // [ 171 ]
+ stop, // [ 172 ]
+ stop, // [ 173 ]
+ stop, // [ 174 ]
+ stop, // [ 175 ]
+ stop, // [ 176 ]
+ stop, // [ 177 ]
+ stop, // [ 178 ]
+ stop, // [ 179 ]
+ stop, // [ 180 ]
+ stop, // [ 181 ]
+ stop, // [ 182 ]
+ stop, // [ 183 ]
+ stop, // [ 184 ]
+ stop, // [ 185 ]
+ stop, // [ 186 ]
+ stop, // [ 187 ]
+ stop, // [ 188 ]
+ stop, // [ 189 ]
+ stop, // [ 190 ]
+ stop, // [ 191 ]
+ 'A', // [ 192 ]
+ 'A', // [ 193 ]
+ 'A', // [ 194 ]
+ 'A', // [ 195 ]
+ 'A', // [ 196 ]
+ 'A', // [ 197 ]
+ 'A', // [ 198 ]
+ 'C', // [ 199 ]
+ 'E', // [ 200 ]
+ 'E', // [ 201 ]
+ 'E', // [ 202 ]
+ 'E', // [ 203 ]
+ 'I', // [ 204 ]
+ 'I', // [ 205 ]
+ 'I', // [ 206 ]
+ 'I', // [ 207 ]
+ 'D', // [ 208 ]
+ 'N', // [ 209 ]
+ 'O', // [ 210 ]
+ 'O', // [ 211 ]
+ 'O', // [ 212 ]
+ 'O', // [ 213 ]
+ 'O', // [ 214 ]
+ 'X', // [ 215 ]
+ 'O', // [ 216 ]
+ 'U', // [ 217 ]
+ 'U', // [ 218 ]
+ 'U', // [ 219 ]
+ 'U', // [ 220 ]
+ 'Y', // [ 221 ]
+ 'T', // [ 222 ]
+ 'S', // [ 223 ]
+ 'A', // [ 224 ]
+ 'A', // [ 225 ]
+ 'A', // [ 226 ]
+ 'A', // [ 227 ]
+ 'A', // [ 228 ]
+ 'A', // [ 229 ]
+ 'A', // [ 230 ]
+ 'C', // [ 231 ]
+ 'E', // [ 232 ]
+ 'E', // [ 233 ]
+ 'E', // [ 234 ]
+ 'E', // [ 235 ]
+ 'I', // [ 236 ]
+ 'I', // [ 237 ]
+ 'I', // [ 238 ]
+ 'I', // [ 239 ]
+ stop, // [ 240 ]
+ 'N', // [ 241 ]
+ 'O', // [ 242 ]
+ 'O', // [ 243 ]
+ 'O', // [ 244 ]
+ 'O', // [ 245 ]
+ 'O', // [ 246 ]
+ stop, // [ 247 ]
+ 'O', // [ 248 ]
+ 'U', // [ 249 ]
+ 'U', // [ 250 ]
+ 'U', // [ 251 ]
+ 'U', // [ 252 ]
+ 'Y', // [ 253 ]
+ 'T', // [ 254 ]
+ 'Y' // [ 255 ]
+ };
+ c = result[ static_cast<unsigned char>( c ) ];
+ }
+ std::vector<std::string> do_string_to_words( const char* str ) {
+ string complete = str;
+ std::for_each( complete.begin(), complete.end(), normalize );
+ return split( complete.c_str(), stop );
+ }
+};
+}
+
+
+std::auto_ptr<indexlib::detail::tokenizer> indexlib::detail::get_tokenizer( std::string name ) {
+ if ( name == "latin-1:european" ) return std::auto_ptr<indexlib::detail::tokenizer>( new latin1_tokenizer );
+ return std::auto_ptr<indexlib::detail::tokenizer>( 0 );
+}
diff --git a/indexlib/tokenizer.h b/indexlib/tokenizer.h
new file mode 100644
index 000000000..2494f2973
--- /dev/null
+++ b/indexlib/tokenizer.h
@@ -0,0 +1,28 @@
+#ifndef LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_
+#define LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <assert.h>
+
+namespace indexlib { namespace detail {
+
+class tokenizer {
+ public:
+ virtual ~tokenizer() { }
+ std::vector<std::string> string_to_words( const char* str ) {
+ assert( str );
+ return do_string_to_words( str );
+ }
+
+ private:
+ virtual std::vector<std::string> do_string_to_words( const char* ) = 0;
+};
+
+std::auto_ptr<tokenizer> get_tokenizer( std::string );
+}}
+
+
+
+#endif /* LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_ */
diff --git a/indexlib/version.h b/indexlib/version.h
new file mode 100644
index 000000000..4279fc18c
--- /dev/null
+++ b/indexlib/version.h
@@ -0,0 +1,14 @@
+#ifndef LPC_VERSION_H1118420718_INCLUDE_GUARD_
+#define LPC_VERSION_H1118420718_INCLUDE_GUARD_
+
+namespace indexlib {
+namespace version {
+
+const unsigned major = 0;
+const unsigned minor = 94;
+const char* const marker = "indexlib directory, see http://luispedro.org/software/index";
+
+}}
+
+
+#endif /* LPC_VERSION_H1118420718_INCLUDE_GUARD_ */