| OLD | NEW |
| 1 /* | 1 /* |
| 2 ** 2007 June 22 | 2 ** 2007 June 22 |
| 3 ** | 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
| 6 ** | 6 ** |
| 7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
| 10 ** | 10 ** |
| 11 ************************************************************************* | 11 ************************************************************************* |
| 12 ** This file implements a tokenizer for fts3 based on the ICU library. | 12 ** This file implements a tokenizer for fts3 based on the ICU library. |
| 13 ** | |
| 14 ** $Id: fts3_icu.c,v 1.3 2008/09/01 18:34:20 danielk1977 Exp $ | |
| 15 */ | 13 */ |
| 16 | 14 #include "fts3Int.h" |
| 17 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 18 #ifdef SQLITE_ENABLE_ICU | 16 #ifdef SQLITE_ENABLE_ICU |
| 19 | 17 |
| 20 #include <assert.h> | 18 #include <assert.h> |
| 21 #include <string.h> | 19 #include <string.h> |
| 22 #include "fts3_tokenizer.h" | 20 #include "fts3_tokenizer.h" |
| 23 | 21 |
| 24 #include <unicode/ubrk.h> | 22 #include <unicode/ubrk.h> |
| 25 #include <unicode/ucol.h> | 23 #include <unicode/ucol.h> |
| 26 #include <unicode/ustring.h> | 24 #include <unicode/ustring.h> |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 105 const int32_t opt = U_FOLD_CASE_DEFAULT; | 103 const int32_t opt = U_FOLD_CASE_DEFAULT; |
| 106 UErrorCode status = U_ZERO_ERROR; | 104 UErrorCode status = U_ZERO_ERROR; |
| 107 int nChar; | 105 int nChar; |
| 108 | 106 |
| 109 UChar32 c; | 107 UChar32 c; |
| 110 int iInput = 0; | 108 int iInput = 0; |
| 111 int iOut = 0; | 109 int iOut = 0; |
| 112 | 110 |
| 113 *ppCursor = 0; | 111 *ppCursor = 0; |
| 114 | 112 |
| 115 if( nInput<0 ){ | 113 if( zInput==0 ){ |
| 114 nInput = 0; |
| 115 zInput = ""; |
| 116 }else if( nInput<0 ){ |
| 116 nInput = strlen(zInput); | 117 nInput = strlen(zInput); |
| 117 } | 118 } |
| 118 nChar = nInput+1; | 119 nChar = nInput+1; |
| 119 pCsr = (IcuCursor *)sqlite3_malloc( | 120 pCsr = (IcuCursor *)sqlite3_malloc( |
| 120 sizeof(IcuCursor) + /* IcuCursor */ | 121 sizeof(IcuCursor) + /* IcuCursor */ |
| 121 (nChar+1) * sizeof(int) + /* IcuCursor.aOffset[] */ | 122 ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ |
| 122 nChar * sizeof(UChar) /* IcuCursor.aChar[] */ | 123 (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ |
| 123 ); | 124 ); |
| 124 if( !pCsr ){ | 125 if( !pCsr ){ |
| 125 return SQLITE_NOMEM; | 126 return SQLITE_NOMEM; |
| 126 } | 127 } |
| 127 memset(pCsr, 0, sizeof(IcuCursor)); | 128 memset(pCsr, 0, sizeof(IcuCursor)); |
| 128 pCsr->aOffset = (int *)&pCsr[1]; | 129 pCsr->aChar = (UChar *)&pCsr[1]; |
| 129 pCsr->aChar = (UChar *)&pCsr->aOffset[nChar+1]; | 130 pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; |
| 130 | 131 |
| 131 pCsr->aOffset[iOut] = iInput; | 132 pCsr->aOffset[iOut] = iInput; |
| 132 U8_NEXT(zInput, iInput, nInput, c); | 133 U8_NEXT(zInput, iInput, nInput, c); |
| 133 while( c>0 ){ | 134 while( c>0 ){ |
| 134 int isError = 0; | 135 int isError = 0; |
| 135 c = u_foldCase(c, opt); | 136 c = u_foldCase(c, opt); |
| 136 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); | 137 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); |
| 137 if( isError ){ | 138 if( isError ){ |
| 138 sqlite3_free(pCsr); | 139 sqlite3_free(pCsr); |
| 139 return SQLITE_ERROR; | 140 return SQLITE_ERROR; |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 251 ** Set *ppModule to point at the implementation of the ICU tokenizer. | 252 ** Set *ppModule to point at the implementation of the ICU tokenizer. |
| 252 */ | 253 */ |
| 253 void sqlite3Fts3IcuTokenizerModule( | 254 void sqlite3Fts3IcuTokenizerModule( |
| 254 sqlite3_tokenizer_module const**ppModule | 255 sqlite3_tokenizer_module const**ppModule |
| 255 ){ | 256 ){ |
| 256 *ppModule = &icuTokenizerModule; | 257 *ppModule = &icuTokenizerModule; |
| 257 } | 258 } |
| 258 | 259 |
| 259 #endif /* defined(SQLITE_ENABLE_ICU) */ | 260 #endif /* defined(SQLITE_ENABLE_ICU) */ |
| 260 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 261 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| OLD | NEW |