OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2014 Jun 09 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 ** This is an SQLite module implementing full-text search. |
| 14 */ |
| 15 |
| 16 |
| 17 #include "fts5Int.h" |
| 18 |
| 19 #define FTS5_DEFAULT_PAGE_SIZE 4050 |
| 20 #define FTS5_DEFAULT_AUTOMERGE 4 |
| 21 #define FTS5_DEFAULT_USERMERGE 4 |
| 22 #define FTS5_DEFAULT_CRISISMERGE 16 |
| 23 #define FTS5_DEFAULT_HASHSIZE (1024*1024) |
| 24 |
| 25 /* Maximum allowed page size */ |
| 26 #define FTS5_MAX_PAGE_SIZE (128*1024) |
| 27 |
| 28 static int fts5_iswhitespace(char x){ |
| 29 return (x==' '); |
| 30 } |
| 31 |
| 32 static int fts5_isopenquote(char x){ |
| 33 return (x=='"' || x=='\'' || x=='[' || x=='`'); |
| 34 } |
| 35 |
| 36 /* |
| 37 ** Argument pIn points to a character that is part of a nul-terminated |
| 38 ** string. Return a pointer to the first character following *pIn in |
| 39 ** the string that is not a white-space character. |
| 40 */ |
| 41 static const char *fts5ConfigSkipWhitespace(const char *pIn){ |
| 42 const char *p = pIn; |
| 43 if( p ){ |
| 44 while( fts5_iswhitespace(*p) ){ p++; } |
| 45 } |
| 46 return p; |
| 47 } |
| 48 |
| 49 /* |
| 50 ** Argument pIn points to a character that is part of a nul-terminated |
| 51 ** string. Return a pointer to the first character following *pIn in |
| 52 ** the string that is not a "bareword" character. |
| 53 */ |
| 54 static const char *fts5ConfigSkipBareword(const char *pIn){ |
| 55 const char *p = pIn; |
| 56 while ( sqlite3Fts5IsBareword(*p) ) p++; |
| 57 if( p==pIn ) p = 0; |
| 58 return p; |
| 59 } |
| 60 |
| 61 static int fts5_isdigit(char a){ |
| 62 return (a>='0' && a<='9'); |
| 63 } |
| 64 |
| 65 |
| 66 |
| 67 static const char *fts5ConfigSkipLiteral(const char *pIn){ |
| 68 const char *p = pIn; |
| 69 switch( *p ){ |
| 70 case 'n': case 'N': |
| 71 if( sqlite3_strnicmp("null", p, 4)==0 ){ |
| 72 p = &p[4]; |
| 73 }else{ |
| 74 p = 0; |
| 75 } |
| 76 break; |
| 77 |
| 78 case 'x': case 'X': |
| 79 p++; |
| 80 if( *p=='\'' ){ |
| 81 p++; |
| 82 while( (*p>='a' && *p<='f') |
| 83 || (*p>='A' && *p<='F') |
| 84 || (*p>='0' && *p<='9') |
| 85 ){ |
| 86 p++; |
| 87 } |
| 88 if( *p=='\'' && 0==((p-pIn)%2) ){ |
| 89 p++; |
| 90 }else{ |
| 91 p = 0; |
| 92 } |
| 93 }else{ |
| 94 p = 0; |
| 95 } |
| 96 break; |
| 97 |
| 98 case '\'': |
| 99 p++; |
| 100 while( p ){ |
| 101 if( *p=='\'' ){ |
| 102 p++; |
| 103 if( *p!='\'' ) break; |
| 104 } |
| 105 p++; |
| 106 if( *p==0 ) p = 0; |
| 107 } |
| 108 break; |
| 109 |
| 110 default: |
| 111 /* maybe a number */ |
| 112 if( *p=='+' || *p=='-' ) p++; |
| 113 while( fts5_isdigit(*p) ) p++; |
| 114 |
| 115 /* At this point, if the literal was an integer, the parse is |
| 116 ** finished. Or, if it is a floating point value, it may continue |
| 117 ** with either a decimal point or an 'E' character. */ |
| 118 if( *p=='.' && fts5_isdigit(p[1]) ){ |
| 119 p += 2; |
| 120 while( fts5_isdigit(*p) ) p++; |
| 121 } |
| 122 if( p==pIn ) p = 0; |
| 123 |
| 124 break; |
| 125 } |
| 126 |
| 127 return p; |
| 128 } |
| 129 |
| 130 /* |
| 131 ** The first character of the string pointed to by argument z is guaranteed |
| 132 ** to be an open-quote character (see function fts5_isopenquote()). |
| 133 ** |
| 134 ** This function searches for the corresponding close-quote character within |
| 135 ** the string and, if found, dequotes the string in place and adds a new |
| 136 ** nul-terminator byte. |
| 137 ** |
| 138 ** If the close-quote is found, the value returned is the byte offset of |
| 139 ** the character immediately following it. Or, if the close-quote is not |
| 140 ** found, -1 is returned. If -1 is returned, the buffer is left in an |
| 141 ** undefined state. |
| 142 */ |
| 143 static int fts5Dequote(char *z){ |
| 144 char q; |
| 145 int iIn = 1; |
| 146 int iOut = 0; |
| 147 q = z[0]; |
| 148 |
| 149 /* Set stack variable q to the close-quote character */ |
| 150 assert( q=='[' || q=='\'' || q=='"' || q=='`' ); |
| 151 if( q=='[' ) q = ']'; |
| 152 |
| 153 while( ALWAYS(z[iIn]) ){ |
| 154 if( z[iIn]==q ){ |
| 155 if( z[iIn+1]!=q ){ |
| 156 /* Character iIn was the close quote. */ |
| 157 iIn++; |
| 158 break; |
| 159 }else{ |
| 160 /* Character iIn and iIn+1 form an escaped quote character. Skip |
| 161 ** the input cursor past both and copy a single quote character |
| 162 ** to the output buffer. */ |
| 163 iIn += 2; |
| 164 z[iOut++] = q; |
| 165 } |
| 166 }else{ |
| 167 z[iOut++] = z[iIn++]; |
| 168 } |
| 169 } |
| 170 |
| 171 z[iOut] = '\0'; |
| 172 return iIn; |
| 173 } |
| 174 |
| 175 /* |
| 176 ** Convert an SQL-style quoted string into a normal string by removing |
| 177 ** the quote characters. The conversion is done in-place. If the |
| 178 ** input does not begin with a quote character, then this routine |
| 179 ** is a no-op. |
| 180 ** |
| 181 ** Examples: |
| 182 ** |
| 183 ** "abc" becomes abc |
| 184 ** 'xyz' becomes xyz |
| 185 ** [pqr] becomes pqr |
| 186 ** `mno` becomes mno |
| 187 */ |
| 188 void sqlite3Fts5Dequote(char *z){ |
| 189 char quote; /* Quote character (if any ) */ |
| 190 |
| 191 assert( 0==fts5_iswhitespace(z[0]) ); |
| 192 quote = z[0]; |
| 193 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ |
| 194 fts5Dequote(z); |
| 195 } |
| 196 } |
| 197 |
| 198 |
| 199 struct Fts5Enum { |
| 200 const char *zName; |
| 201 int eVal; |
| 202 }; |
| 203 typedef struct Fts5Enum Fts5Enum; |
| 204 |
| 205 static int fts5ConfigSetEnum( |
| 206 const Fts5Enum *aEnum, |
| 207 const char *zEnum, |
| 208 int *peVal |
| 209 ){ |
| 210 int nEnum = (int)strlen(zEnum); |
| 211 int i; |
| 212 int iVal = -1; |
| 213 |
| 214 for(i=0; aEnum[i].zName; i++){ |
| 215 if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ |
| 216 if( iVal>=0 ) return SQLITE_ERROR; |
| 217 iVal = aEnum[i].eVal; |
| 218 } |
| 219 } |
| 220 |
| 221 *peVal = iVal; |
| 222 return iVal<0 ? SQLITE_ERROR : SQLITE_OK; |
| 223 } |
| 224 |
| 225 /* |
| 226 ** Parse a "special" CREATE VIRTUAL TABLE directive and update |
| 227 ** configuration object pConfig as appropriate. |
| 228 ** |
| 229 ** If successful, object pConfig is updated and SQLITE_OK returned. If |
| 230 ** an error occurs, an SQLite error code is returned and an error message |
| 231 ** may be left in *pzErr. It is the responsibility of the caller to |
| 232 ** eventually free any such error message using sqlite3_free(). |
| 233 */ |
| 234 static int fts5ConfigParseSpecial( |
| 235 Fts5Global *pGlobal, |
| 236 Fts5Config *pConfig, /* Configuration object to update */ |
| 237 const char *zCmd, /* Special command to parse */ |
| 238 const char *zArg, /* Argument to parse */ |
| 239 char **pzErr /* OUT: Error message */ |
| 240 ){ |
| 241 int rc = SQLITE_OK; |
| 242 int nCmd = (int)strlen(zCmd); |
| 243 if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ |
| 244 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; |
| 245 const char *p; |
| 246 int bFirst = 1; |
| 247 if( pConfig->aPrefix==0 ){ |
| 248 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); |
| 249 if( rc ) return rc; |
| 250 } |
| 251 |
| 252 p = zArg; |
| 253 while( 1 ){ |
| 254 int nPre = 0; |
| 255 |
| 256 while( p[0]==' ' ) p++; |
| 257 if( bFirst==0 && p[0]==',' ){ |
| 258 p++; |
| 259 while( p[0]==' ' ) p++; |
| 260 }else if( p[0]=='\0' ){ |
| 261 break; |
| 262 } |
| 263 if( p[0]<'0' || p[0]>'9' ){ |
| 264 *pzErr = sqlite3_mprintf("malformed prefix=... directive"); |
| 265 rc = SQLITE_ERROR; |
| 266 break; |
| 267 } |
| 268 |
| 269 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){ |
| 270 *pzErr = sqlite3_mprintf( |
| 271 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES |
| 272 ); |
| 273 rc = SQLITE_ERROR; |
| 274 break; |
| 275 } |
| 276 |
| 277 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ |
| 278 nPre = nPre*10 + (p[0] - '0'); |
| 279 p++; |
| 280 } |
| 281 |
| 282 if( nPre<=0 || nPre>=1000 ){ |
| 283 *pzErr = sqlite3_mprintf("prefix length out of range (max 999)"); |
| 284 rc = SQLITE_ERROR; |
| 285 break; |
| 286 } |
| 287 |
| 288 pConfig->aPrefix[pConfig->nPrefix] = nPre; |
| 289 pConfig->nPrefix++; |
| 290 bFirst = 0; |
| 291 } |
| 292 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES ); |
| 293 return rc; |
| 294 } |
| 295 |
| 296 if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ |
| 297 const char *p = (const char*)zArg; |
| 298 int nArg = (int)strlen(zArg) + 1; |
| 299 char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); |
| 300 char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); |
| 301 char *pSpace = pDel; |
| 302 |
| 303 if( azArg && pSpace ){ |
| 304 if( pConfig->pTok ){ |
| 305 *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); |
| 306 rc = SQLITE_ERROR; |
| 307 }else{ |
| 308 for(nArg=0; p && *p; nArg++){ |
| 309 const char *p2 = fts5ConfigSkipWhitespace(p); |
| 310 if( *p2=='\'' ){ |
| 311 p = fts5ConfigSkipLiteral(p2); |
| 312 }else{ |
| 313 p = fts5ConfigSkipBareword(p2); |
| 314 } |
| 315 if( p ){ |
| 316 memcpy(pSpace, p2, p-p2); |
| 317 azArg[nArg] = pSpace; |
| 318 sqlite3Fts5Dequote(pSpace); |
| 319 pSpace += (p - p2) + 1; |
| 320 p = fts5ConfigSkipWhitespace(p); |
| 321 } |
| 322 } |
| 323 if( p==0 ){ |
| 324 *pzErr = sqlite3_mprintf("parse error in tokenize directive"); |
| 325 rc = SQLITE_ERROR; |
| 326 }else{ |
| 327 rc = sqlite3Fts5GetTokenizer(pGlobal, |
| 328 (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi, |
| 329 pzErr |
| 330 ); |
| 331 } |
| 332 } |
| 333 } |
| 334 |
| 335 sqlite3_free(azArg); |
| 336 sqlite3_free(pDel); |
| 337 return rc; |
| 338 } |
| 339 |
| 340 if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ |
| 341 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ |
| 342 *pzErr = sqlite3_mprintf("multiple content=... directives"); |
| 343 rc = SQLITE_ERROR; |
| 344 }else{ |
| 345 if( zArg[0] ){ |
| 346 pConfig->eContent = FTS5_CONTENT_EXTERNAL; |
| 347 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); |
| 348 }else{ |
| 349 pConfig->eContent = FTS5_CONTENT_NONE; |
| 350 } |
| 351 } |
| 352 return rc; |
| 353 } |
| 354 |
| 355 if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ |
| 356 if( pConfig->zContentRowid ){ |
| 357 *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); |
| 358 rc = SQLITE_ERROR; |
| 359 }else{ |
| 360 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); |
| 361 } |
| 362 return rc; |
| 363 } |
| 364 |
| 365 if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){ |
| 366 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ |
| 367 *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); |
| 368 rc = SQLITE_ERROR; |
| 369 }else{ |
| 370 pConfig->bColumnsize = (zArg[0]=='1'); |
| 371 } |
| 372 return rc; |
| 373 } |
| 374 |
| 375 if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){ |
| 376 const Fts5Enum aDetail[] = { |
| 377 { "none", FTS5_DETAIL_NONE }, |
| 378 { "full", FTS5_DETAIL_FULL }, |
| 379 { "columns", FTS5_DETAIL_COLUMNS }, |
| 380 { 0, 0 } |
| 381 }; |
| 382 |
| 383 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ |
| 384 *pzErr = sqlite3_mprintf("malformed detail=... directive"); |
| 385 } |
| 386 return rc; |
| 387 } |
| 388 |
| 389 *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); |
| 390 return SQLITE_ERROR; |
| 391 } |
| 392 |
| 393 /* |
| 394 ** Allocate an instance of the default tokenizer ("simple") at |
| 395 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error |
| 396 ** code if an error occurs. |
| 397 */ |
| 398 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ |
| 399 assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); |
| 400 return sqlite3Fts5GetTokenizer( |
| 401 pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0 |
| 402 ); |
| 403 } |
| 404 |
| 405 /* |
| 406 ** Gobble up the first bareword or quoted word from the input buffer zIn. |
| 407 ** Return a pointer to the character immediately following the last in |
| 408 ** the gobbled word if successful, or a NULL pointer otherwise (failed |
| 409 ** to find close-quote character). |
| 410 ** |
| 411 ** Before returning, set pzOut to point to a new buffer containing a |
| 412 ** nul-terminated, dequoted copy of the gobbled word. If the word was |
| 413 ** quoted, *pbQuoted is also set to 1 before returning. |
| 414 ** |
| 415 ** If *pRc is other than SQLITE_OK when this function is called, it is |
| 416 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this |
| 417 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* |
| 418 ** set if a parse error (failed to find close quote) occurs. |
| 419 */ |
| 420 static const char *fts5ConfigGobbleWord( |
| 421 int *pRc, /* IN/OUT: Error code */ |
| 422 const char *zIn, /* Buffer to gobble string/bareword from */ |
| 423 char **pzOut, /* OUT: malloc'd buffer containing str/bw */ |
| 424 int *pbQuoted /* OUT: Set to true if dequoting required */ |
| 425 ){ |
| 426 const char *zRet = 0; |
| 427 |
| 428 int nIn = (int)strlen(zIn); |
| 429 char *zOut = sqlite3_malloc(nIn+1); |
| 430 |
| 431 assert( *pRc==SQLITE_OK ); |
| 432 *pbQuoted = 0; |
| 433 *pzOut = 0; |
| 434 |
| 435 if( zOut==0 ){ |
| 436 *pRc = SQLITE_NOMEM; |
| 437 }else{ |
| 438 memcpy(zOut, zIn, nIn+1); |
| 439 if( fts5_isopenquote(zOut[0]) ){ |
| 440 int ii = fts5Dequote(zOut); |
| 441 zRet = &zIn[ii]; |
| 442 *pbQuoted = 1; |
| 443 }else{ |
| 444 zRet = fts5ConfigSkipBareword(zIn); |
| 445 if( zRet ){ |
| 446 zOut[zRet-zIn] = '\0'; |
| 447 } |
| 448 } |
| 449 } |
| 450 |
| 451 if( zRet==0 ){ |
| 452 sqlite3_free(zOut); |
| 453 }else{ |
| 454 *pzOut = zOut; |
| 455 } |
| 456 |
| 457 return zRet; |
| 458 } |
| 459 |
| 460 static int fts5ConfigParseColumn( |
| 461 Fts5Config *p, |
| 462 char *zCol, |
| 463 char *zArg, |
| 464 char **pzErr |
| 465 ){ |
| 466 int rc = SQLITE_OK; |
| 467 if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) |
| 468 || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) |
| 469 ){ |
| 470 *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); |
| 471 rc = SQLITE_ERROR; |
| 472 }else if( zArg ){ |
| 473 if( 0==sqlite3_stricmp(zArg, "unindexed") ){ |
| 474 p->abUnindexed[p->nCol] = 1; |
| 475 }else{ |
| 476 *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); |
| 477 rc = SQLITE_ERROR; |
| 478 } |
| 479 } |
| 480 |
| 481 p->azCol[p->nCol++] = zCol; |
| 482 return rc; |
| 483 } |
| 484 |
| 485 /* |
| 486 ** Populate the Fts5Config.zContentExprlist string. |
| 487 */ |
| 488 static int fts5ConfigMakeExprlist(Fts5Config *p){ |
| 489 int i; |
| 490 int rc = SQLITE_OK; |
| 491 Fts5Buffer buf = {0, 0, 0}; |
| 492 |
| 493 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); |
| 494 if( p->eContent!=FTS5_CONTENT_NONE ){ |
| 495 for(i=0; i<p->nCol; i++){ |
| 496 if( p->eContent==FTS5_CONTENT_EXTERNAL ){ |
| 497 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); |
| 498 }else{ |
| 499 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); |
| 500 } |
| 501 } |
| 502 } |
| 503 |
| 504 assert( p->zContentExprlist==0 ); |
| 505 p->zContentExprlist = (char*)buf.p; |
| 506 return rc; |
| 507 } |
| 508 |
| 509 /* |
| 510 ** Arguments nArg/azArg contain the string arguments passed to the xCreate |
| 511 ** or xConnect method of the virtual table. This function attempts to |
| 512 ** allocate an instance of Fts5Config containing the results of parsing |
| 513 ** those arguments. |
| 514 ** |
| 515 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the |
| 516 ** new Fts5Config object. If an error occurs, an SQLite error code is |
| 517 ** returned, *ppOut is set to NULL and an error message may be left in |
| 518 ** *pzErr. It is the responsibility of the caller to eventually free any |
| 519 ** such error message using sqlite3_free(). |
| 520 */ |
| 521 int sqlite3Fts5ConfigParse( |
| 522 Fts5Global *pGlobal, |
| 523 sqlite3 *db, |
| 524 int nArg, /* Number of arguments */ |
| 525 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ |
| 526 Fts5Config **ppOut, /* OUT: Results of parse */ |
| 527 char **pzErr /* OUT: Error message */ |
| 528 ){ |
| 529 int rc = SQLITE_OK; /* Return code */ |
| 530 Fts5Config *pRet; /* New object to return */ |
| 531 int i; |
| 532 int nByte; |
| 533 |
| 534 *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); |
| 535 if( pRet==0 ) return SQLITE_NOMEM; |
| 536 memset(pRet, 0, sizeof(Fts5Config)); |
| 537 pRet->db = db; |
| 538 pRet->iCookie = -1; |
| 539 |
| 540 nByte = nArg * (sizeof(char*) + sizeof(u8)); |
| 541 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); |
| 542 pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; |
| 543 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); |
| 544 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); |
| 545 pRet->bColumnsize = 1; |
| 546 pRet->eDetail = FTS5_DETAIL_FULL; |
| 547 #ifdef SQLITE_DEBUG |
| 548 pRet->bPrefixIndex = 1; |
| 549 #endif |
| 550 if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ |
| 551 *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); |
| 552 rc = SQLITE_ERROR; |
| 553 } |
| 554 |
| 555 for(i=3; rc==SQLITE_OK && i<nArg; i++){ |
| 556 const char *zOrig = azArg[i]; |
| 557 const char *z; |
| 558 char *zOne = 0; |
| 559 char *zTwo = 0; |
| 560 int bOption = 0; |
| 561 int bMustBeCol = 0; |
| 562 |
| 563 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); |
| 564 z = fts5ConfigSkipWhitespace(z); |
| 565 if( z && *z=='=' ){ |
| 566 bOption = 1; |
| 567 z++; |
| 568 if( bMustBeCol ) z = 0; |
| 569 } |
| 570 z = fts5ConfigSkipWhitespace(z); |
| 571 if( z && z[0] ){ |
| 572 int bDummy; |
| 573 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); |
| 574 if( z && z[0] ) z = 0; |
| 575 } |
| 576 |
| 577 if( rc==SQLITE_OK ){ |
| 578 if( z==0 ){ |
| 579 *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); |
| 580 rc = SQLITE_ERROR; |
| 581 }else{ |
| 582 if( bOption ){ |
| 583 rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr); |
| 584 }else{ |
| 585 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); |
| 586 zOne = 0; |
| 587 } |
| 588 } |
| 589 } |
| 590 |
| 591 sqlite3_free(zOne); |
| 592 sqlite3_free(zTwo); |
| 593 } |
| 594 |
| 595 /* If a tokenizer= option was successfully parsed, the tokenizer has |
| 596 ** already been allocated. Otherwise, allocate an instance of the default |
| 597 ** tokenizer (unicode61) now. */ |
| 598 if( rc==SQLITE_OK && pRet->pTok==0 ){ |
| 599 rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); |
| 600 } |
| 601 |
| 602 /* If no zContent option was specified, fill in the default values. */ |
| 603 if( rc==SQLITE_OK && pRet->zContent==0 ){ |
| 604 const char *zTail = 0; |
| 605 assert( pRet->eContent==FTS5_CONTENT_NORMAL |
| 606 || pRet->eContent==FTS5_CONTENT_NONE |
| 607 ); |
| 608 if( pRet->eContent==FTS5_CONTENT_NORMAL ){ |
| 609 zTail = "content"; |
| 610 }else if( pRet->bColumnsize ){ |
| 611 zTail = "docsize"; |
| 612 } |
| 613 |
| 614 if( zTail ){ |
| 615 pRet->zContent = sqlite3Fts5Mprintf( |
| 616 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail |
| 617 ); |
| 618 } |
| 619 } |
| 620 |
| 621 if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ |
| 622 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); |
| 623 } |
| 624 |
| 625 /* Formulate the zContentExprlist text */ |
| 626 if( rc==SQLITE_OK ){ |
| 627 rc = fts5ConfigMakeExprlist(pRet); |
| 628 } |
| 629 |
| 630 if( rc!=SQLITE_OK ){ |
| 631 sqlite3Fts5ConfigFree(pRet); |
| 632 *ppOut = 0; |
| 633 } |
| 634 return rc; |
| 635 } |
| 636 |
| 637 /* |
| 638 ** Free the configuration object passed as the only argument. |
| 639 */ |
| 640 void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ |
| 641 if( pConfig ){ |
| 642 int i; |
| 643 if( pConfig->pTok ){ |
| 644 pConfig->pTokApi->xDelete(pConfig->pTok); |
| 645 } |
| 646 sqlite3_free(pConfig->zDb); |
| 647 sqlite3_free(pConfig->zName); |
| 648 for(i=0; i<pConfig->nCol; i++){ |
| 649 sqlite3_free(pConfig->azCol[i]); |
| 650 } |
| 651 sqlite3_free(pConfig->azCol); |
| 652 sqlite3_free(pConfig->aPrefix); |
| 653 sqlite3_free(pConfig->zRank); |
| 654 sqlite3_free(pConfig->zRankArgs); |
| 655 sqlite3_free(pConfig->zContent); |
| 656 sqlite3_free(pConfig->zContentRowid); |
| 657 sqlite3_free(pConfig->zContentExprlist); |
| 658 sqlite3_free(pConfig); |
| 659 } |
| 660 } |
| 661 |
| 662 /* |
| 663 ** Call sqlite3_declare_vtab() based on the contents of the configuration |
| 664 ** object passed as the only argument. Return SQLITE_OK if successful, or |
| 665 ** an SQLite error code if an error occurs. |
| 666 */ |
| 667 int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ |
| 668 int i; |
| 669 int rc = SQLITE_OK; |
| 670 char *zSql; |
| 671 |
| 672 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); |
| 673 for(i=0; zSql && i<pConfig->nCol; i++){ |
| 674 const char *zSep = (i==0?"":", "); |
| 675 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); |
| 676 } |
| 677 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", |
| 678 zSql, pConfig->zName, FTS5_RANK_NAME |
| 679 ); |
| 680 |
| 681 assert( zSql || rc==SQLITE_NOMEM ); |
| 682 if( zSql ){ |
| 683 rc = sqlite3_declare_vtab(pConfig->db, zSql); |
| 684 sqlite3_free(zSql); |
| 685 } |
| 686 |
| 687 return rc; |
| 688 } |
| 689 |
| 690 /* |
| 691 ** Tokenize the text passed via the second and third arguments. |
| 692 ** |
| 693 ** The callback is invoked once for each token in the input text. The |
| 694 ** arguments passed to it are, in order: |
| 695 ** |
| 696 ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() |
| 697 ** const char *pToken // Pointer to buffer containing token |
| 698 ** int nToken // Size of token in bytes |
| 699 ** int iStart // Byte offset of start of token within input text |
| 700 ** int iEnd // Byte offset of end of token within input text |
| 701 ** int iPos // Position of token in input (first token is 0) |
| 702 ** |
| 703 ** If the callback returns a non-zero value the tokenization is abandoned |
| 704 ** and no further callbacks are issued. |
| 705 ** |
| 706 ** This function returns SQLITE_OK if successful or an SQLite error code |
| 707 ** if an error occurs. If the tokenization was abandoned early because |
| 708 ** the callback returned SQLITE_DONE, this is not an error and this function |
| 709 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early |
| 710 ** because the callback returned another non-zero value, it is assumed |
| 711 ** to be an SQLite error code and returned to the caller. |
| 712 */ |
| 713 int sqlite3Fts5Tokenize( |
| 714 Fts5Config *pConfig, /* FTS5 Configuration object */ |
| 715 int flags, /* FTS5_TOKENIZE_* flags */ |
| 716 const char *pText, int nText, /* Text to tokenize */ |
| 717 void *pCtx, /* Context passed to xToken() */ |
| 718 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
| 719 ){ |
| 720 if( pText==0 ) return SQLITE_OK; |
| 721 return pConfig->pTokApi->xTokenize( |
| 722 pConfig->pTok, pCtx, flags, pText, nText, xToken |
| 723 ); |
| 724 } |
| 725 |
| 726 /* |
| 727 ** Argument pIn points to the first character in what is expected to be |
| 728 ** a comma-separated list of SQL literals followed by a ')' character. |
| 729 ** If it actually is this, return a pointer to the ')'. Otherwise, return |
| 730 ** NULL to indicate a parse error. |
| 731 */ |
| 732 static const char *fts5ConfigSkipArgs(const char *pIn){ |
| 733 const char *p = pIn; |
| 734 |
| 735 while( 1 ){ |
| 736 p = fts5ConfigSkipWhitespace(p); |
| 737 p = fts5ConfigSkipLiteral(p); |
| 738 p = fts5ConfigSkipWhitespace(p); |
| 739 if( p==0 || *p==')' ) break; |
| 740 if( *p!=',' ){ |
| 741 p = 0; |
| 742 break; |
| 743 } |
| 744 p++; |
| 745 } |
| 746 |
| 747 return p; |
| 748 } |
| 749 |
| 750 /* |
| 751 ** Parameter zIn contains a rank() function specification. The format of |
| 752 ** this is: |
| 753 ** |
| 754 ** + Bareword (function name) |
| 755 ** + Open parenthesis - "(" |
| 756 ** + Zero or more SQL literals in a comma separated list |
| 757 ** + Close parenthesis - ")" |
| 758 */ |
| 759 int sqlite3Fts5ConfigParseRank( |
| 760 const char *zIn, /* Input string */ |
| 761 char **pzRank, /* OUT: Rank function name */ |
| 762 char **pzRankArgs /* OUT: Rank function arguments */ |
| 763 ){ |
| 764 const char *p = zIn; |
| 765 const char *pRank; |
| 766 char *zRank = 0; |
| 767 char *zRankArgs = 0; |
| 768 int rc = SQLITE_OK; |
| 769 |
| 770 *pzRank = 0; |
| 771 *pzRankArgs = 0; |
| 772 |
| 773 if( p==0 ){ |
| 774 rc = SQLITE_ERROR; |
| 775 }else{ |
| 776 p = fts5ConfigSkipWhitespace(p); |
| 777 pRank = p; |
| 778 p = fts5ConfigSkipBareword(p); |
| 779 |
| 780 if( p ){ |
| 781 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); |
| 782 if( zRank ) memcpy(zRank, pRank, p-pRank); |
| 783 }else{ |
| 784 rc = SQLITE_ERROR; |
| 785 } |
| 786 |
| 787 if( rc==SQLITE_OK ){ |
| 788 p = fts5ConfigSkipWhitespace(p); |
| 789 if( *p!='(' ) rc = SQLITE_ERROR; |
| 790 p++; |
| 791 } |
| 792 if( rc==SQLITE_OK ){ |
| 793 const char *pArgs; |
| 794 p = fts5ConfigSkipWhitespace(p); |
| 795 pArgs = p; |
| 796 if( *p!=')' ){ |
| 797 p = fts5ConfigSkipArgs(p); |
| 798 if( p==0 ){ |
| 799 rc = SQLITE_ERROR; |
| 800 }else{ |
| 801 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); |
| 802 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); |
| 803 } |
| 804 } |
| 805 } |
| 806 } |
| 807 |
| 808 if( rc!=SQLITE_OK ){ |
| 809 sqlite3_free(zRank); |
| 810 assert( zRankArgs==0 ); |
| 811 }else{ |
| 812 *pzRank = zRank; |
| 813 *pzRankArgs = zRankArgs; |
| 814 } |
| 815 return rc; |
| 816 } |
| 817 |
| 818 int sqlite3Fts5ConfigSetValue( |
| 819 Fts5Config *pConfig, |
| 820 const char *zKey, |
| 821 sqlite3_value *pVal, |
| 822 int *pbBadkey |
| 823 ){ |
| 824 int rc = SQLITE_OK; |
| 825 |
| 826 if( 0==sqlite3_stricmp(zKey, "pgsz") ){ |
| 827 int pgsz = 0; |
| 828 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 829 pgsz = sqlite3_value_int(pVal); |
| 830 } |
| 831 if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){ |
| 832 *pbBadkey = 1; |
| 833 }else{ |
| 834 pConfig->pgsz = pgsz; |
| 835 } |
| 836 } |
| 837 |
| 838 else if( 0==sqlite3_stricmp(zKey, "hashsize") ){ |
| 839 int nHashSize = -1; |
| 840 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 841 nHashSize = sqlite3_value_int(pVal); |
| 842 } |
| 843 if( nHashSize<=0 ){ |
| 844 *pbBadkey = 1; |
| 845 }else{ |
| 846 pConfig->nHashSize = nHashSize; |
| 847 } |
| 848 } |
| 849 |
| 850 else if( 0==sqlite3_stricmp(zKey, "automerge") ){ |
| 851 int nAutomerge = -1; |
| 852 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 853 nAutomerge = sqlite3_value_int(pVal); |
| 854 } |
| 855 if( nAutomerge<0 || nAutomerge>64 ){ |
| 856 *pbBadkey = 1; |
| 857 }else{ |
| 858 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
| 859 pConfig->nAutomerge = nAutomerge; |
| 860 } |
| 861 } |
| 862 |
| 863 else if( 0==sqlite3_stricmp(zKey, "usermerge") ){ |
| 864 int nUsermerge = -1; |
| 865 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 866 nUsermerge = sqlite3_value_int(pVal); |
| 867 } |
| 868 if( nUsermerge<2 || nUsermerge>16 ){ |
| 869 *pbBadkey = 1; |
| 870 }else{ |
| 871 pConfig->nUsermerge = nUsermerge; |
| 872 } |
| 873 } |
| 874 |
| 875 else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ |
| 876 int nCrisisMerge = -1; |
| 877 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 878 nCrisisMerge = sqlite3_value_int(pVal); |
| 879 } |
| 880 if( nCrisisMerge<0 ){ |
| 881 *pbBadkey = 1; |
| 882 }else{ |
| 883 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
| 884 pConfig->nCrisisMerge = nCrisisMerge; |
| 885 } |
| 886 } |
| 887 |
| 888 else if( 0==sqlite3_stricmp(zKey, "rank") ){ |
| 889 const char *zIn = (const char*)sqlite3_value_text(pVal); |
| 890 char *zRank; |
| 891 char *zRankArgs; |
| 892 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); |
| 893 if( rc==SQLITE_OK ){ |
| 894 sqlite3_free(pConfig->zRank); |
| 895 sqlite3_free(pConfig->zRankArgs); |
| 896 pConfig->zRank = zRank; |
| 897 pConfig->zRankArgs = zRankArgs; |
| 898 }else if( rc==SQLITE_ERROR ){ |
| 899 rc = SQLITE_OK; |
| 900 *pbBadkey = 1; |
| 901 } |
| 902 }else{ |
| 903 *pbBadkey = 1; |
| 904 } |
| 905 return rc; |
| 906 } |
| 907 |
| 908 /* |
| 909 ** Load the contents of the %_config table into memory. |
| 910 */ |
| 911 int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ |
| 912 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; |
| 913 char *zSql; |
| 914 sqlite3_stmt *p = 0; |
| 915 int rc = SQLITE_OK; |
| 916 int iVersion = 0; |
| 917 |
| 918 /* Set default values */ |
| 919 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; |
| 920 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
| 921 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE; |
| 922 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
| 923 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE; |
| 924 |
| 925 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); |
| 926 if( zSql ){ |
| 927 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); |
| 928 sqlite3_free(zSql); |
| 929 } |
| 930 |
| 931 assert( rc==SQLITE_OK || p==0 ); |
| 932 if( rc==SQLITE_OK ){ |
| 933 while( SQLITE_ROW==sqlite3_step(p) ){ |
| 934 const char *zK = (const char*)sqlite3_column_text(p, 0); |
| 935 sqlite3_value *pVal = sqlite3_column_value(p, 1); |
| 936 if( 0==sqlite3_stricmp(zK, "version") ){ |
| 937 iVersion = sqlite3_value_int(pVal); |
| 938 }else{ |
| 939 int bDummy = 0; |
| 940 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); |
| 941 } |
| 942 } |
| 943 rc = sqlite3_finalize(p); |
| 944 } |
| 945 |
| 946 if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ |
| 947 rc = SQLITE_ERROR; |
| 948 if( pConfig->pzErrmsg ){ |
| 949 assert( 0==*pConfig->pzErrmsg ); |
| 950 *pConfig->pzErrmsg = sqlite3_mprintf( |
| 951 "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", |
| 952 iVersion, FTS5_CURRENT_VERSION |
| 953 ); |
| 954 } |
| 955 } |
| 956 |
| 957 if( rc==SQLITE_OK ){ |
| 958 pConfig->iCookie = iCookie; |
| 959 } |
| 960 return rc; |
| 961 } |
OLD | NEW |