summaryrefslogtreecommitdiffstats
path: root/indexlib/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'indexlib/tokenizer.h')
-rw-r--r--indexlib/tokenizer.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/indexlib/tokenizer.h b/indexlib/tokenizer.h
new file mode 100644
index 000000000..2494f2973
--- /dev/null
+++ b/indexlib/tokenizer.h
@@ -0,0 +1,28 @@
+#ifndef LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_
+#define LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <assert.h>
+
+namespace indexlib { namespace detail {
+
+class tokenizer {
+ public:
+ virtual ~tokenizer() { }
+ std::vector<std::string> string_to_words( const char* str ) {
+ assert( str );
+ return do_string_to_words( str );
+ }
+
+ private:
+ virtual std::vector<std::string> do_string_to_words( const char* ) = 0;
+};
+
+std::auto_ptr<tokenizer> get_tokenizer( std::string );
+}}
+
+
+
+#endif /* LPC_TOKENIZER_H1118429480_INCLUDE_GUARD_ */