Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(430)

Side by Side Diff: third_party/sqlite/src/ext/fts5/fts5_tokenize.c

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2014 May 31 2 ** 2014 May 31
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 ** Delete a "ascii" tokenizer. 55 ** Delete a "ascii" tokenizer.
56 */ 56 */
57 static void fts5AsciiDelete(Fts5Tokenizer *p){ 57 static void fts5AsciiDelete(Fts5Tokenizer *p){
58 sqlite3_free(p); 58 sqlite3_free(p);
59 } 59 }
60 60
61 /* 61 /*
62 ** Create an "ascii" tokenizer. 62 ** Create an "ascii" tokenizer.
63 */ 63 */
64 static int fts5AsciiCreate( 64 static int fts5AsciiCreate(
65 void *pCtx, 65 void *pUnused,
66 const char **azArg, int nArg, 66 const char **azArg, int nArg,
67 Fts5Tokenizer **ppOut 67 Fts5Tokenizer **ppOut
68 ){ 68 ){
69 int rc = SQLITE_OK; 69 int rc = SQLITE_OK;
70 AsciiTokenizer *p = 0; 70 AsciiTokenizer *p = 0;
71 UNUSED_PARAM(pUnused);
71 if( nArg%2 ){ 72 if( nArg%2 ){
72 rc = SQLITE_ERROR; 73 rc = SQLITE_ERROR;
73 }else{ 74 }else{
74 p = sqlite3_malloc(sizeof(AsciiTokenizer)); 75 p = sqlite3_malloc(sizeof(AsciiTokenizer));
75 if( p==0 ){ 76 if( p==0 ){
76 rc = SQLITE_NOMEM; 77 rc = SQLITE_NOMEM;
77 }else{ 78 }else{
78 int i; 79 int i;
79 memset(p, 0, sizeof(AsciiTokenizer)); 80 memset(p, 0, sizeof(AsciiTokenizer));
80 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); 81 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
(...skipping 28 matching lines...) Expand all
109 aOut[i] = c; 110 aOut[i] = c;
110 } 111 }
111 } 112 }
112 113
113 /* 114 /*
114 ** Tokenize some text using the ascii tokenizer. 115 ** Tokenize some text using the ascii tokenizer.
115 */ 116 */
116 static int fts5AsciiTokenize( 117 static int fts5AsciiTokenize(
117 Fts5Tokenizer *pTokenizer, 118 Fts5Tokenizer *pTokenizer,
118 void *pCtx, 119 void *pCtx,
119 int flags, 120 int iUnused,
120 const char *pText, int nText, 121 const char *pText, int nText,
121 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) 122 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
122 ){ 123 ){
123 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; 124 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
124 int rc = SQLITE_OK; 125 int rc = SQLITE_OK;
125 int ie; 126 int ie;
126 int is = 0; 127 int is = 0;
127 128
128 char aFold[64]; 129 char aFold[64];
129 int nFold = sizeof(aFold); 130 int nFold = sizeof(aFold);
130 char *pFold = aFold; 131 char *pFold = aFold;
131 unsigned char *a = p->aTokenChar; 132 unsigned char *a = p->aTokenChar;
132 133
134 UNUSED_PARAM(iUnused);
135
133 while( is<nText && rc==SQLITE_OK ){ 136 while( is<nText && rc==SQLITE_OK ){
134 int nByte; 137 int nByte;
135 138
136 /* Skip any leading divider characters. */ 139 /* Skip any leading divider characters. */
137 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ 140 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
138 is++; 141 is++;
139 } 142 }
140 if( is==nText ) break; 143 if( is==nText ) break;
141 144
142 /* Count the token characters */ 145 /* Count the token characters */
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 sqlite3_free(p->aFold); 319 sqlite3_free(p->aFold);
317 sqlite3_free(p); 320 sqlite3_free(p);
318 } 321 }
319 return; 322 return;
320 } 323 }
321 324
322 /* 325 /*
323 ** Create a "unicode61" tokenizer. 326 ** Create a "unicode61" tokenizer.
324 */ 327 */
325 static int fts5UnicodeCreate( 328 static int fts5UnicodeCreate(
326 void *pCtx, 329 void *pUnused,
327 const char **azArg, int nArg, 330 const char **azArg, int nArg,
328 Fts5Tokenizer **ppOut 331 Fts5Tokenizer **ppOut
329 ){ 332 ){
330 int rc = SQLITE_OK; /* Return code */ 333 int rc = SQLITE_OK; /* Return code */
331 Unicode61Tokenizer *p = 0; /* New tokenizer object */ 334 Unicode61Tokenizer *p = 0; /* New tokenizer object */
332 335
336 UNUSED_PARAM(pUnused);
337
333 if( nArg%2 ){ 338 if( nArg%2 ){
334 rc = SQLITE_ERROR; 339 rc = SQLITE_ERROR;
335 }else{ 340 }else{
336 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); 341 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
337 if( p ){ 342 if( p ){
338 int i; 343 int i;
339 memset(p, 0, sizeof(Unicode61Tokenizer)); 344 memset(p, 0, sizeof(Unicode61Tokenizer));
340 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); 345 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
341 p->bRemoveDiacritic = 1; 346 p->bRemoveDiacritic = 1;
342 p->nFold = 64; 347 p->nFold = 64;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 ** character (not a separator). 384 ** character (not a separator).
380 */ 385 */
381 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ 386 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
382 assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); 387 assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
383 return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); 388 return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
384 } 389 }
385 390
386 static int fts5UnicodeTokenize( 391 static int fts5UnicodeTokenize(
387 Fts5Tokenizer *pTokenizer, 392 Fts5Tokenizer *pTokenizer,
388 void *pCtx, 393 void *pCtx,
389 int flags, 394 int iUnused,
390 const char *pText, int nText, 395 const char *pText, int nText,
391 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) 396 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
392 ){ 397 ){
393 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; 398 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
394 int rc = SQLITE_OK; 399 int rc = SQLITE_OK;
395 unsigned char *a = p->aTokenChar; 400 unsigned char *a = p->aTokenChar;
396 401
397 unsigned char *zTerm = (unsigned char*)&pText[nText]; 402 unsigned char *zTerm = (unsigned char*)&pText[nText];
398 unsigned char *zCsr = (unsigned char *)pText; 403 unsigned char *zCsr = (unsigned char *)pText;
399 404
400 /* Output buffer */ 405 /* Output buffer */
401 char *aFold = p->aFold; 406 char *aFold = p->aFold;
402 int nFold = p->nFold; 407 int nFold = p->nFold;
403 const char *pEnd = &aFold[nFold-6]; 408 const char *pEnd = &aFold[nFold-6];
404 409
410 UNUSED_PARAM(iUnused);
411
405 /* Each iteration of this loop gobbles up a contiguous run of separators, 412 /* Each iteration of this loop gobbles up a contiguous run of separators,
406 ** then the next token. */ 413 ** then the next token. */
407 while( rc==SQLITE_OK ){ 414 while( rc==SQLITE_OK ){
408 int iCode; /* non-ASCII codepoint read from input */ 415 int iCode; /* non-ASCII codepoint read from input */
409 char *zOut = aFold; 416 char *zOut = aFold;
410 int is; 417 int is;
411 int ie; 418 int ie;
412 419
413 /* Skip any separator characters. */ 420 /* Skip any separator characters. */
414 while( 1 ){ 421 while( 1 ){
(...skipping 798 matching lines...) Expand 10 before | Expand all | Expand 10 after
1213 fts5_tokenizer x; 1220 fts5_tokenizer x;
1214 } aBuiltin[] = { 1221 } aBuiltin[] = {
1215 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, 1222 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
1216 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, 1223 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
1217 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, 1224 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
1218 }; 1225 };
1219 1226
1220 int rc = SQLITE_OK; /* Return code */ 1227 int rc = SQLITE_OK; /* Return code */
1221 int i; /* To iterate through builtin functions */ 1228 int i; /* To iterate through builtin functions */
1222 1229
1223 for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ 1230 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
1224 rc = pApi->xCreateTokenizer(pApi, 1231 rc = pApi->xCreateTokenizer(pApi,
1225 aBuiltin[i].zName, 1232 aBuiltin[i].zName,
1226 (void*)pApi, 1233 (void*)pApi,
1227 &aBuiltin[i].x, 1234 &aBuiltin[i].x,
1228 0 1235 0
1229 ); 1236 );
1230 } 1237 }
1231 1238
1232 return rc; 1239 return rc;
1233 } 1240 }
1234 1241
1235 1242
OLDNEW
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5_test_tok.c ('k') | third_party/sqlite/src/ext/fts5/fts5_unicode2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698