Index: third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_tokenizer1.c |
diff --git a/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_tokenizer1.c b/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_tokenizer1.c |
deleted file mode 100644 |
index fe4f9eb4b56200d1e34f2ed163f399702786609d..0000000000000000000000000000000000000000 |
--- a/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_tokenizer1.c |
+++ /dev/null |
@@ -1,233 +0,0 @@ |
-/* |
-** 2006 Oct 10 |
-** |
-** The author disclaims copyright to this source code. In place of |
-** a legal notice, here is a blessing: |
-** |
-** May you do good and not evil. |
-** May you find forgiveness for yourself and forgive others. |
-** May you share freely, never taking more than you give. |
-** |
-****************************************************************************** |
-** |
-** Implementation of the "simple" full-text-search tokenizer. |
-*/ |
- |
-/* |
-** The code in this file is only compiled if: |
-** |
-** * The FTS2 module is being built as an extension |
-** (in which case SQLITE_CORE is not defined), or |
-** |
-** * The FTS2 module is being built into the core of |
-** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). |
-*/ |
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) |
- |
- |
-#include <assert.h> |
-#include <stdlib.h> |
-#include <stdio.h> |
-#include <string.h> |
- |
-#include "sqlite3.h" |
-#include "sqlite3ext.h" |
-SQLITE_EXTENSION_INIT3 |
-#include "fts2_tokenizer.h" |
- |
-typedef struct simple_tokenizer { |
- sqlite3_tokenizer base; |
- char delim[128]; /* flag ASCII delimiters */ |
-} simple_tokenizer; |
- |
-typedef struct simple_tokenizer_cursor { |
- sqlite3_tokenizer_cursor base; |
- const char *pInput; /* input we are tokenizing */ |
- int nBytes; /* size of the input */ |
- int iOffset; /* current position in pInput */ |
- int iToken; /* index of next token to be returned */ |
- char *pToken; /* storage for current token */ |
- int nTokenAllocated; /* space allocated to zToken buffer */ |
-} simple_tokenizer_cursor; |
- |
- |
-/* Forward declaration */ |
-static const sqlite3_tokenizer_module simpleTokenizerModule; |
- |
-static int simpleDelim(simple_tokenizer *t, unsigned char c){ |
- return c<0x80 && t->delim[c]; |
-} |
- |
-/* |
-** Create a new tokenizer instance. |
-*/ |
-static int simpleCreate( |
- int argc, const char * const *argv, |
- sqlite3_tokenizer **ppTokenizer |
-){ |
- simple_tokenizer *t; |
- |
- t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); |
- if( t==NULL ) return SQLITE_NOMEM; |
- memset(t, 0, sizeof(*t)); |
- |
- /* TODO(shess) Delimiters need to remain the same from run to run, |
- ** else we need to reindex. One solution would be a meta-table to |
- ** track such information in the database, then we'd only want this |
- ** information on the initial create. |
- */ |
- if( argc>1 ){ |
- int i, n = strlen(argv[1]); |
- for(i=0; i<n; i++){ |
- unsigned char ch = argv[1][i]; |
- /* We explicitly don't support UTF-8 delimiters for now. */ |
- if( ch>=0x80 ){ |
- sqlite3_free(t); |
- return SQLITE_ERROR; |
- } |
- t->delim[ch] = 1; |
- } |
- } else { |
- /* Mark non-alphanumeric ASCII characters as delimiters */ |
- int i; |
- for(i=1; i<0x80; i++){ |
- t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') || |
- (i>='a' && i<='z')); |
- } |
- } |
- |
- *ppTokenizer = &t->base; |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Destroy a tokenizer |
-*/ |
-static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ |
- sqlite3_free(pTokenizer); |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Prepare to begin tokenizing a particular string. The input |
-** string to be tokenized is pInput[0..nBytes-1]. A cursor |
-** used to incrementally tokenize this string is returned in |
-** *ppCursor. |
-*/ |
-static int simpleOpen( |
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
- const char *pInput, int nBytes, /* String to be tokenized */ |
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
-){ |
- simple_tokenizer_cursor *c; |
- |
- c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); |
- if( c==NULL ) return SQLITE_NOMEM; |
- |
- c->pInput = pInput; |
- if( pInput==0 ){ |
- c->nBytes = 0; |
- }else if( nBytes<0 ){ |
- c->nBytes = (int)strlen(pInput); |
- }else{ |
- c->nBytes = nBytes; |
- } |
- c->iOffset = 0; /* start tokenizing at the beginning */ |
- c->iToken = 0; |
- c->pToken = NULL; /* no space allocated, yet. */ |
- c->nTokenAllocated = 0; |
- |
- *ppCursor = &c->base; |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Close a tokenization cursor previously opened by a call to |
-** simpleOpen() above. |
-*/ |
-static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ |
- simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
- sqlite3_free(c->pToken); |
- sqlite3_free(c); |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Extract the next token from a tokenization cursor. The cursor must |
-** have been opened by a prior call to simpleOpen(). |
-*/ |
-static int simpleNext( |
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
- const char **ppToken, /* OUT: *ppToken is the token text */ |
- int *pnBytes, /* OUT: Number of bytes in token */ |
- int *piStartOffset, /* OUT: Starting offset of token */ |
- int *piEndOffset, /* OUT: Ending offset of token */ |
- int *piPosition /* OUT: Position integer of token */ |
-){ |
- simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
- simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; |
- unsigned char *p = (unsigned char *)c->pInput; |
- |
- while( c->iOffset<c->nBytes ){ |
- int iStartOffset; |
- |
- /* Scan past delimiter characters */ |
- while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ |
- c->iOffset++; |
- } |
- |
- /* Count non-delimiter characters. */ |
- iStartOffset = c->iOffset; |
- while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ |
- c->iOffset++; |
- } |
- |
- if( c->iOffset>iStartOffset ){ |
- int i, n = c->iOffset-iStartOffset; |
- if( n>c->nTokenAllocated ){ |
- c->nTokenAllocated = n+20; |
- c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated); |
- if( c->pToken==NULL ) return SQLITE_NOMEM; |
- } |
- for(i=0; i<n; i++){ |
- /* TODO(shess) This needs expansion to handle UTF-8 |
- ** case-insensitivity. |
- */ |
- unsigned char ch = p[iStartOffset+i]; |
- c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch; |
- } |
- *ppToken = c->pToken; |
- *pnBytes = n; |
- *piStartOffset = iStartOffset; |
- *piEndOffset = c->iOffset; |
- *piPosition = c->iToken++; |
- |
- return SQLITE_OK; |
- } |
- } |
- return SQLITE_DONE; |
-} |
- |
-/* |
-** The set of routines that implement the simple tokenizer |
-*/ |
-static const sqlite3_tokenizer_module simpleTokenizerModule = { |
- 0, |
- simpleCreate, |
- simpleDestroy, |
- simpleOpen, |
- simpleClose, |
- simpleNext, |
-}; |
- |
-/* |
-** Allocate a new simple tokenizer. Return a pointer to the new |
-** tokenizer in *ppModule |
-*/ |
-void sqlite3Fts2SimpleTokenizerModule( |
- sqlite3_tokenizer_module const**ppModule |
-){ |
- *ppModule = &simpleTokenizerModule; |
-} |
- |
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ |