Index: third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_icu.c |
diff --git a/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_icu.c b/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_icu.c |
deleted file mode 100644 |
index 2670301f5198f3712b2afb9b87d5a45e80059201..0000000000000000000000000000000000000000 |
--- a/third_party/sqlite/sqlite-src-3080704/ext/fts2/fts2_icu.c |
+++ /dev/null |
@@ -1,260 +0,0 @@ |
-/* |
-** 2007 June 22 |
-** |
-** The author disclaims copyright to this source code. In place of |
-** a legal notice, here is a blessing: |
-** |
-** May you do good and not evil. |
-** May you find forgiveness for yourself and forgive others. |
-** May you share freely, never taking more than you give. |
-** |
-************************************************************************* |
-** This file implements a tokenizer for fts2 based on the ICU library. |
-** |
-** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $ |
-*/ |
- |
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) |
-#ifdef SQLITE_ENABLE_ICU |
- |
-#include <assert.h> |
-#include <string.h> |
-#include "fts2_tokenizer.h" |
- |
-#include <unicode/ubrk.h> |
-#include <unicode/ucol.h> |
-#include <unicode/ustring.h> |
-#include <unicode/utf16.h> |
- |
-typedef struct IcuTokenizer IcuTokenizer; |
-typedef struct IcuCursor IcuCursor; |
- |
-struct IcuTokenizer { |
- sqlite3_tokenizer base; |
- char *zLocale; |
-}; |
- |
-struct IcuCursor { |
- sqlite3_tokenizer_cursor base; |
- |
- UBreakIterator *pIter; /* ICU break-iterator object */ |
- int nChar; /* Number of UChar elements in pInput */ |
- UChar *aChar; /* Copy of input using utf-16 encoding */ |
- int *aOffset; /* Offsets of each character in utf-8 input */ |
- |
- int nBuffer; |
- char *zBuffer; |
- |
- int iToken; |
-}; |
- |
-/* |
-** Create a new tokenizer instance. |
-*/ |
-static int icuCreate( |
- int argc, /* Number of entries in argv[] */ |
- const char * const *argv, /* Tokenizer creation arguments */ |
- sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ |
-){ |
- IcuTokenizer *p; |
- int n = 0; |
- |
- if( argc>0 ){ |
- n = strlen(argv[0])+1; |
- } |
- p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); |
- if( !p ){ |
- return SQLITE_NOMEM; |
- } |
- memset(p, 0, sizeof(IcuTokenizer)); |
- |
- if( n ){ |
- p->zLocale = (char *)&p[1]; |
- memcpy(p->zLocale, argv[0], n); |
- } |
- |
- *ppTokenizer = (sqlite3_tokenizer *)p; |
- |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Destroy a tokenizer |
-*/ |
-static int icuDestroy(sqlite3_tokenizer *pTokenizer){ |
- IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
- sqlite3_free(p); |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Prepare to begin tokenizing a particular string. The input |
-** string to be tokenized is pInput[0..nBytes-1]. A cursor |
-** used to incrementally tokenize this string is returned in |
-** *ppCursor. |
-*/ |
-static int icuOpen( |
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
- const char *zInput, /* Input string */ |
- int nInput, /* Length of zInput in bytes */ |
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
-){ |
- IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
- IcuCursor *pCsr; |
- |
- const int32_t opt = U_FOLD_CASE_DEFAULT; |
- UErrorCode status = U_ZERO_ERROR; |
- int nChar; |
- |
- UChar32 c; |
- int iInput = 0; |
- int iOut = 0; |
- |
- *ppCursor = 0; |
- |
- if( nInput<0 ){ |
- nInput = strlen(zInput); |
- } |
- nChar = nInput+1; |
- pCsr = (IcuCursor *)sqlite3_malloc( |
- sizeof(IcuCursor) + /* IcuCursor */ |
- ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ |
- (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ |
- ); |
- if( !pCsr ){ |
- return SQLITE_NOMEM; |
- } |
- memset(pCsr, 0, sizeof(IcuCursor)); |
- pCsr->aChar = (UChar *)&pCsr[1]; |
- pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; |
- |
- pCsr->aOffset[iOut] = iInput; |
- U8_NEXT(zInput, iInput, nInput, c); |
- while( c>0 ){ |
- int isError = 0; |
- c = u_foldCase(c, opt); |
- U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); |
- if( isError ){ |
- sqlite3_free(pCsr); |
- return SQLITE_ERROR; |
- } |
- pCsr->aOffset[iOut] = iInput; |
- |
- if( iInput<nInput ){ |
- U8_NEXT(zInput, iInput, nInput, c); |
- }else{ |
- c = 0; |
- } |
- } |
- |
- pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); |
- if( !U_SUCCESS(status) ){ |
- sqlite3_free(pCsr); |
- return SQLITE_ERROR; |
- } |
- pCsr->nChar = iOut; |
- |
- ubrk_first(pCsr->pIter); |
- *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Close a tokenization cursor previously opened by a call to icuOpen(). |
-*/ |
-static int icuClose(sqlite3_tokenizer_cursor *pCursor){ |
- IcuCursor *pCsr = (IcuCursor *)pCursor; |
- ubrk_close(pCsr->pIter); |
- sqlite3_free(pCsr->zBuffer); |
- sqlite3_free(pCsr); |
- return SQLITE_OK; |
-} |
- |
-/* |
-** Extract the next token from a tokenization cursor. |
-*/ |
-static int icuNext( |
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
- const char **ppToken, /* OUT: *ppToken is the token text */ |
- int *pnBytes, /* OUT: Number of bytes in token */ |
- int *piStartOffset, /* OUT: Starting offset of token */ |
- int *piEndOffset, /* OUT: Ending offset of token */ |
- int *piPosition /* OUT: Position integer of token */ |
-){ |
- IcuCursor *pCsr = (IcuCursor *)pCursor; |
- |
- int iStart = 0; |
- int iEnd = 0; |
- int nByte = 0; |
- |
- while( iStart==iEnd ){ |
- UChar32 c; |
- |
- iStart = ubrk_current(pCsr->pIter); |
- iEnd = ubrk_next(pCsr->pIter); |
- if( iEnd==UBRK_DONE ){ |
- return SQLITE_DONE; |
- } |
- |
- while( iStart<iEnd ){ |
- int iWhite = iStart; |
- U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); |
- if( u_isspace(c) ){ |
- iStart = iWhite; |
- }else{ |
- break; |
- } |
- } |
- assert(iStart<=iEnd); |
- } |
- |
- do { |
- UErrorCode status = U_ZERO_ERROR; |
- if( nByte ){ |
- char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); |
- if( !zNew ){ |
- return SQLITE_NOMEM; |
- } |
- pCsr->zBuffer = zNew; |
- pCsr->nBuffer = nByte; |
- } |
- |
- u_strToUTF8( |
- pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ |
- &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ |
- &status /* Output success/failure */ |
- ); |
- } while( nByte>pCsr->nBuffer ); |
- |
- *ppToken = pCsr->zBuffer; |
- *pnBytes = nByte; |
- *piStartOffset = pCsr->aOffset[iStart]; |
- *piEndOffset = pCsr->aOffset[iEnd]; |
- *piPosition = pCsr->iToken++; |
- |
- return SQLITE_OK; |
-} |
- |
-/* |
-** The set of routines that implement the simple tokenizer |
-*/ |
-static const sqlite3_tokenizer_module icuTokenizerModule = { |
- 0, /* iVersion */ |
- icuCreate, /* xCreate */ |
- icuDestroy, /* xCreate */ |
- icuOpen, /* xOpen */ |
- icuClose, /* xClose */ |
- icuNext, /* xNext */ |
-}; |
- |
-/* |
-** Set *ppModule to point at the implementation of the ICU tokenizer. |
-*/ |
-void sqlite3Fts2IcuTokenizerModule( |
- sqlite3_tokenizer_module const**ppModule |
-){ |
- *ppModule = &icuTokenizerModule; |
-} |
- |
-#endif /* defined(SQLITE_ENABLE_ICU) */ |
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ |