OLD | NEW |
(Empty) | |
| 1 /************** Begin file fts5.c ********************************************/ |
| 2 |
| 3 |
| 4 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) |
| 5 |
| 6 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 7 # define NDEBUG 1 |
| 8 #endif |
| 9 #if defined(NDEBUG) && defined(SQLITE_DEBUG) |
| 10 # undef NDEBUG |
| 11 #endif |
| 12 |
| 13 /* |
| 14 ** 2014 May 31 |
| 15 ** |
| 16 ** The author disclaims copyright to this source code. In place of |
| 17 ** a legal notice, here is a blessing: |
| 18 ** |
| 19 ** May you do good and not evil. |
| 20 ** May you find forgiveness for yourself and forgive others. |
| 21 ** May you share freely, never taking more than you give. |
| 22 ** |
| 23 ****************************************************************************** |
| 24 ** |
| 25 ** Interfaces to extend FTS5. Using the interfaces defined in this file, |
| 26 ** FTS5 may be extended with: |
| 27 ** |
| 28 ** * custom tokenizers, and |
| 29 ** * custom auxiliary functions. |
| 30 */ |
| 31 |
| 32 |
| 33 #ifndef _FTS5_H |
| 34 #define _FTS5_H |
| 35 |
| 36 /* #include "sqlite3.h" */ |
| 37 |
| 38 #if 0 |
| 39 extern "C" { |
| 40 #endif |
| 41 |
| 42 /************************************************************************* |
| 43 ** CUSTOM AUXILIARY FUNCTIONS |
| 44 ** |
| 45 ** Virtual table implementations may overload SQL functions by implementing |
| 46 ** the sqlite3_module.xFindFunction() method. |
| 47 */ |
| 48 |
| 49 typedef struct Fts5ExtensionApi Fts5ExtensionApi; |
| 50 typedef struct Fts5Context Fts5Context; |
| 51 typedef struct Fts5PhraseIter Fts5PhraseIter; |
| 52 |
| 53 typedef void (*fts5_extension_function)( |
| 54 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 55 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 56 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 57 int nVal, /* Number of values in apVal[] array */ |
| 58 sqlite3_value **apVal /* Array of trailing arguments */ |
| 59 ); |
| 60 |
| 61 struct Fts5PhraseIter { |
| 62 const unsigned char *a; |
| 63 const unsigned char *b; |
| 64 }; |
| 65 |
| 66 /* |
| 67 ** EXTENSION API FUNCTIONS |
| 68 ** |
| 69 ** xUserData(pFts): |
| 70 ** Return a copy of the context pointer the extension function was |
| 71 ** registered with. |
| 72 ** |
| 73 ** xColumnTotalSize(pFts, iCol, pnToken): |
| 74 ** If parameter iCol is less than zero, set output variable *pnToken |
| 75 ** to the total number of tokens in the FTS5 table. Or, if iCol is |
| 76 ** non-negative but less than the number of columns in the table, return |
| 77 ** the total number of tokens in column iCol, considering all rows in |
| 78 ** the FTS5 table. |
| 79 ** |
| 80 ** If parameter iCol is greater than or equal to the number of columns |
| 81 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. |
| 82 ** an OOM condition or IO error), an appropriate SQLite error code is |
| 83 ** returned. |
| 84 ** |
| 85 ** xColumnCount(pFts): |
| 86 ** Return the number of columns in the table. |
| 87 ** |
| 88 ** xColumnSize(pFts, iCol, pnToken): |
| 89 ** If parameter iCol is less than zero, set output variable *pnToken |
| 90 ** to the total number of tokens in the current row. Or, if iCol is |
| 91 ** non-negative but less than the number of columns in the table, set |
| 92 ** *pnToken to the number of tokens in column iCol of the current row. |
| 93 ** |
| 94 ** If parameter iCol is greater than or equal to the number of columns |
| 95 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. |
| 96 ** an OOM condition or IO error), an appropriate SQLite error code is |
| 97 ** returned. |
| 98 ** |
| 99 ** This function may be quite inefficient if used with an FTS5 table |
| 100 ** created with the "columnsize=0" option. |
| 101 ** |
| 102 ** xColumnText: |
| 103 ** This function attempts to retrieve the text of column iCol of the |
| 104 ** current document. If successful, (*pz) is set to point to a buffer |
| 105 ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes |
| 106 ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, |
| 107 ** if an error occurs, an SQLite error code is returned and the final values |
| 108 ** of (*pz) and (*pn) are undefined. |
| 109 ** |
| 110 ** xPhraseCount: |
| 111 ** Returns the number of phrases in the current query expression. |
| 112 ** |
| 113 ** xPhraseSize: |
| 114 ** Returns the number of tokens in phrase iPhrase of the query. Phrases |
| 115 ** are numbered starting from zero. |
| 116 ** |
| 117 ** xInstCount: |
| 118 ** Set *pnInst to the total number of occurrences of all phrases within |
| 119 ** the query within the current row. Return SQLITE_OK if successful, or |
| 120 ** an error code (i.e. SQLITE_NOMEM) if an error occurs. |
| 121 ** |
| 122 ** This API can be quite slow if used with an FTS5 table created with the |
| 123 ** "detail=none" or "detail=column" option. If the FTS5 table is created |
| 124 ** with either "detail=none" or "detail=column" and "content=" option |
| 125 ** (i.e. if it is a contentless table), then this API always returns 0. |
| 126 ** |
| 127 ** xInst: |
| 128 ** Query for the details of phrase match iIdx within the current row. |
| 129 ** Phrase matches are numbered starting from zero, so the iIdx argument |
| 130 ** should be greater than or equal to zero and smaller than the value |
| 131 ** output by xInstCount(). |
| 132 ** |
| 133 ** Usually, output parameter *piPhrase is set to the phrase number, *piCol |
| 134 ** to the column in which it occurs and *piOff the token offset of the |
| 135 ** first token of the phrase. The exception is if the table was created |
| 136 ** with the offsets=0 option specified. In this case *piOff is always |
| 137 ** set to -1. |
| 138 ** |
| 139 ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) |
| 140 ** if an error occurs. |
| 141 ** |
| 142 ** This API can be quite slow if used with an FTS5 table created with the |
| 143 ** "detail=none" or "detail=column" option. |
| 144 ** |
| 145 ** xRowid: |
| 146 ** Returns the rowid of the current row. |
| 147 ** |
| 148 ** xTokenize: |
| 149 ** Tokenize text using the tokenizer belonging to the FTS5 table. |
| 150 ** |
| 151 ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): |
| 152 ** This API function is used to query the FTS table for phrase iPhrase |
| 153 ** of the current query. Specifically, a query equivalent to: |
| 154 ** |
| 155 ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid |
| 156 ** |
| 157 ** with $p set to a phrase equivalent to the phrase iPhrase of the |
| 158 ** current query is executed. Any column filter that applies to |
| 159 ** phrase iPhrase of the current query is included in $p. For each |
| 160 ** row visited, the callback function passed as the fourth argument |
| 161 ** is invoked. The context and API objects passed to the callback |
| 162 ** function may be used to access the properties of each matched row. |
| 163 ** Invoking Api.xUserData() returns a copy of the pointer passed as |
| 164 ** the third argument to pUserData. |
| 165 ** |
| 166 ** If the callback function returns any value other than SQLITE_OK, the |
| 167 ** query is abandoned and the xQueryPhrase function returns immediately. |
| 168 ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. |
| 169 ** Otherwise, the error code is propagated upwards. |
| 170 ** |
| 171 ** If the query runs to completion without incident, SQLITE_OK is returned. |
| 172 ** Or, if some error occurs before the query completes or is aborted by |
| 173 ** the callback, an SQLite error code is returned. |
| 174 ** |
| 175 ** |
| 176 ** xSetAuxdata(pFts5, pAux, xDelete) |
| 177 ** |
| 178 ** Save the pointer passed as the second argument as the extension functions |
| 179 ** "auxiliary data". The pointer may then be retrieved by the current or any |
| 180 ** future invocation of the same fts5 extension function made as part of |
| 181 ** of the same MATCH query using the xGetAuxdata() API. |
| 182 ** |
| 183 ** Each extension function is allocated a single auxiliary data slot for |
| 184 ** each FTS query (MATCH expression). If the extension function is invoked |
| 185 ** more than once for a single FTS query, then all invocations share a |
| 186 ** single auxiliary data context. |
| 187 ** |
| 188 ** If there is already an auxiliary data pointer when this function is |
| 189 ** invoked, then it is replaced by the new pointer. If an xDelete callback |
| 190 ** was specified along with the original pointer, it is invoked at this |
| 191 ** point. |
| 192 ** |
| 193 ** The xDelete callback, if one is specified, is also invoked on the |
| 194 ** auxiliary data pointer after the FTS5 query has finished. |
| 195 ** |
| 196 ** If an error (e.g. an OOM condition) occurs within this function, an |
| 197 ** the auxiliary data is set to NULL and an error code returned. If the |
| 198 ** xDelete parameter was not NULL, it is invoked on the auxiliary data |
| 199 ** pointer before returning. |
| 200 ** |
| 201 ** |
| 202 ** xGetAuxdata(pFts5, bClear) |
| 203 ** |
| 204 ** Returns the current auxiliary data pointer for the fts5 extension |
| 205 ** function. See the xSetAuxdata() method for details. |
| 206 ** |
| 207 ** If the bClear argument is non-zero, then the auxiliary data is cleared |
| 208 ** (set to NULL) before this function returns. In this case the xDelete, |
| 209 ** if any, is not invoked. |
| 210 ** |
| 211 ** |
| 212 ** xRowCount(pFts5, pnRow) |
| 213 ** |
| 214 ** This function is used to retrieve the total number of rows in the table. |
| 215 ** In other words, the same value that would be returned by: |
| 216 ** |
| 217 ** SELECT count(*) FROM ftstable; |
| 218 ** |
| 219 ** xPhraseFirst() |
| 220 ** This function is used, along with type Fts5PhraseIter and the xPhraseNext |
| 221 ** method, to iterate through all instances of a single query phrase within |
| 222 ** the current row. This is the same information as is accessible via the |
| 223 ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient |
| 224 ** to use, this API may be faster under some circumstances. To iterate |
| 225 ** through instances of phrase iPhrase, use the following code: |
| 226 ** |
| 227 ** Fts5PhraseIter iter; |
| 228 ** int iCol, iOff; |
| 229 ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); |
| 230 ** iCol>=0; |
| 231 ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) |
| 232 ** ){ |
| 233 ** // An instance of phrase iPhrase at offset iOff of column iCol |
| 234 ** } |
| 235 ** |
| 236 ** The Fts5PhraseIter structure is defined above. Applications should not |
| 237 ** modify this structure directly - it should only be used as shown above |
| 238 ** with the xPhraseFirst() and xPhraseNext() API methods (and by |
| 239 ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). |
| 240 ** |
| 241 ** This API can be quite slow if used with an FTS5 table created with the |
| 242 ** "detail=none" or "detail=column" option. If the FTS5 table is created |
| 243 ** with either "detail=none" or "detail=column" and "content=" option |
| 244 ** (i.e. if it is a contentless table), then this API always iterates |
| 245 ** through an empty set (all calls to xPhraseFirst() set iCol to -1). |
| 246 ** |
| 247 ** xPhraseNext() |
| 248 ** See xPhraseFirst above. |
| 249 ** |
| 250 ** xPhraseFirstColumn() |
| 251 ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() |
| 252 ** and xPhraseNext() APIs described above. The difference is that instead |
| 253 ** of iterating through all instances of a phrase in the current row, these |
| 254 ** APIs are used to iterate through the set of columns in the current row |
| 255 ** that contain one or more instances of a specified phrase. For example: |
| 256 ** |
| 257 ** Fts5PhraseIter iter; |
| 258 ** int iCol; |
| 259 ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); |
| 260 ** iCol>=0; |
| 261 ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) |
| 262 ** ){ |
| 263 ** // Column iCol contains at least one instance of phrase iPhrase |
| 264 ** } |
| 265 ** |
| 266 ** This API can be quite slow if used with an FTS5 table created with the |
| 267 ** "detail=none" option. If the FTS5 table is created with either |
| 268 ** "detail=none" "content=" option (i.e. if it is a contentless table), |
| 269 ** then this API always iterates through an empty set (all calls to |
| 270 ** xPhraseFirstColumn() set iCol to -1). |
| 271 ** |
| 272 ** The information accessed using this API and its companion |
| 273 ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext |
| 274 ** (or xInst/xInstCount). The chief advantage of this API is that it is |
| 275 ** significantly more efficient than those alternatives when used with |
| 276 ** "detail=column" tables. |
| 277 ** |
| 278 ** xPhraseNextColumn() |
| 279 ** See xPhraseFirstColumn above. |
| 280 */ |
| 281 struct Fts5ExtensionApi { |
| 282 int iVersion; /* Currently always set to 3 */ |
| 283 |
| 284 void *(*xUserData)(Fts5Context*); |
| 285 |
| 286 int (*xColumnCount)(Fts5Context*); |
| 287 int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); |
| 288 int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); |
| 289 |
| 290 int (*xTokenize)(Fts5Context*, |
| 291 const char *pText, int nText, /* Text to tokenize */ |
| 292 void *pCtx, /* Context passed to xToken() */ |
| 293 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
| 294 ); |
| 295 |
| 296 int (*xPhraseCount)(Fts5Context*); |
| 297 int (*xPhraseSize)(Fts5Context*, int iPhrase); |
| 298 |
| 299 int (*xInstCount)(Fts5Context*, int *pnInst); |
| 300 int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); |
| 301 |
| 302 sqlite3_int64 (*xRowid)(Fts5Context*); |
| 303 int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); |
| 304 int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); |
| 305 |
| 306 int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, |
| 307 int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) |
| 308 ); |
| 309 int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); |
| 310 void *(*xGetAuxdata)(Fts5Context*, int bClear); |
| 311 |
| 312 int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); |
| 313 void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); |
| 314 |
| 315 int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); |
| 316 void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); |
| 317 }; |
| 318 |
| 319 /* |
| 320 ** CUSTOM AUXILIARY FUNCTIONS |
| 321 *************************************************************************/ |
| 322 |
| 323 /************************************************************************* |
| 324 ** CUSTOM TOKENIZERS |
| 325 ** |
| 326 ** Applications may also register custom tokenizer types. A tokenizer |
| 327 ** is registered by providing fts5 with a populated instance of the |
| 328 ** following structure. All structure methods must be defined, setting |
| 329 ** any member of the fts5_tokenizer struct to NULL leads to undefined |
| 330 ** behaviour. The structure methods are expected to function as follows: |
| 331 ** |
| 332 ** xCreate: |
| 333 ** This function is used to allocate and initialize a tokenizer instance. |
| 334 ** A tokenizer instance is required to actually tokenize text. |
| 335 ** |
| 336 ** The first argument passed to this function is a copy of the (void*) |
| 337 ** pointer provided by the application when the fts5_tokenizer object |
| 338 ** was registered with FTS5 (the third argument to xCreateTokenizer()). |
| 339 ** The second and third arguments are an array of nul-terminated strings |
| 340 ** containing the tokenizer arguments, if any, specified following the |
| 341 ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used |
| 342 ** to create the FTS5 table. |
| 343 ** |
| 344 ** The final argument is an output variable. If successful, (*ppOut) |
| 345 ** should be set to point to the new tokenizer handle and SQLITE_OK |
| 346 ** returned. If an error occurs, some value other than SQLITE_OK should |
| 347 ** be returned. In this case, fts5 assumes that the final value of *ppOut |
| 348 ** is undefined. |
| 349 ** |
| 350 ** xDelete: |
| 351 ** This function is invoked to delete a tokenizer handle previously |
| 352 ** allocated using xCreate(). Fts5 guarantees that this function will |
| 353 ** be invoked exactly once for each successful call to xCreate(). |
| 354 ** |
| 355 ** xTokenize: |
| 356 ** This function is expected to tokenize the nText byte string indicated |
| 357 ** by argument pText. pText may or may not be nul-terminated. The first |
| 358 ** argument passed to this function is a pointer to an Fts5Tokenizer object |
| 359 ** returned by an earlier call to xCreate(). |
| 360 ** |
| 361 ** The second argument indicates the reason that FTS5 is requesting |
| 362 ** tokenization of the supplied text. This is always one of the following |
| 363 ** four values: |
| 364 ** |
| 365 ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into |
| 366 ** or removed from the FTS table. The tokenizer is being invoked to |
| 367 ** determine the set of tokens to add to (or delete from) the |
| 368 ** FTS index. |
| 369 ** |
| 370 ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed |
| 371 ** against the FTS index. The tokenizer is being called to tokenize |
| 372 ** a bareword or quoted string specified as part of the query. |
| 373 ** |
| 374 ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as |
| 375 ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is |
| 376 ** followed by a "*" character, indicating that the last token |
| 377 ** returned by the tokenizer will be treated as a token prefix. |
| 378 ** |
| 379 ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to |
| 380 ** satisfy an fts5_api.xTokenize() request made by an auxiliary |
| 381 ** function. Or an fts5_api.xColumnSize() request made by the same |
| 382 ** on a columnsize=0 database. |
| 383 ** </ul> |
| 384 ** |
| 385 ** For each token in the input string, the supplied callback xToken() must |
| 386 ** be invoked. The first argument to it should be a copy of the pointer |
| 387 ** passed as the second argument to xTokenize(). The third and fourth |
| 388 ** arguments are a pointer to a buffer containing the token text, and the |
| 389 ** size of the token in bytes. The 4th and 5th arguments are the byte offsets |
| 390 ** of the first byte of and first byte immediately following the text from |
| 391 ** which the token is derived within the input. |
| 392 ** |
| 393 ** The second argument passed to the xToken() callback ("tflags") should |
| 394 ** normally be set to 0. The exception is if the tokenizer supports |
| 395 ** synonyms. In this case see the discussion below for details. |
| 396 ** |
| 397 ** FTS5 assumes the xToken() callback is invoked for each token in the |
| 398 ** order that they occur within the input text. |
| 399 ** |
| 400 ** If an xToken() callback returns any value other than SQLITE_OK, then |
| 401 ** the tokenization should be abandoned and the xTokenize() method should |
| 402 ** immediately return a copy of the xToken() return value. Or, if the |
| 403 ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, |
| 404 ** if an error occurs with the xTokenize() implementation itself, it |
| 405 ** may abandon the tokenization and return any error code other than |
| 406 ** SQLITE_OK or SQLITE_DONE. |
| 407 ** |
| 408 ** SYNONYM SUPPORT |
| 409 ** |
| 410 ** Custom tokenizers may also support synonyms. Consider a case in which a |
| 411 ** user wishes to query for a phrase such as "first place". Using the |
| 412 ** built-in tokenizers, the FTS5 query 'first + place' will match instances |
| 413 ** of "first place" within the document set, but not alternative forms |
| 414 ** such as "1st place". In some applications, it would be better to match |
| 415 ** all instances of "first place" or "1st place" regardless of which form |
| 416 ** the user specified in the MATCH query text. |
| 417 ** |
| 418 ** There are several ways to approach this in FTS5: |
| 419 ** |
| 420 ** <ol><li> By mapping all synonyms to a single token. In this case, the |
| 421 ** In the above example, this means that the tokenizer returns the |
| 422 ** same token for inputs "first" and "1st". Say that token is in |
| 423 ** fact "first", so that when the user inserts the document "I won |
| 424 ** 1st place" entries are added to the index for tokens "i", "won", |
| 425 ** "first" and "place". If the user then queries for '1st + place', |
| 426 ** the tokenizer substitutes "first" for "1st" and the query works |
| 427 ** as expected. |
| 428 ** |
| 429 ** <li> By adding multiple synonyms for a single term to the FTS index. |
| 430 ** In this case, when tokenizing query text, the tokenizer may |
| 431 ** provide multiple synonyms for a single term within the document. |
| 432 ** FTS5 then queries the index for each synonym individually. For |
| 433 ** example, faced with the query: |
| 434 ** |
| 435 ** <codeblock> |
| 436 ** ... MATCH 'first place'</codeblock> |
| 437 ** |
| 438 ** the tokenizer offers both "1st" and "first" as synonyms for the |
| 439 ** first token in the MATCH query and FTS5 effectively runs a query |
| 440 ** similar to: |
| 441 ** |
| 442 ** <codeblock> |
| 443 ** ... MATCH '(first OR 1st) place'</codeblock> |
| 444 ** |
| 445 ** except that, for the purposes of auxiliary functions, the query |
| 446 ** still appears to contain just two phrases - "(first OR 1st)" |
| 447 ** being treated as a single phrase. |
| 448 ** |
| 449 ** <li> By adding multiple synonyms for a single term to the FTS index. |
| 450 ** Using this method, when tokenizing document text, the tokenizer |
| 451 ** provides multiple synonyms for each token. So that when a |
| 452 ** document such as "I won first place" is tokenized, entries are |
| 453 ** added to the FTS index for "i", "won", "first", "1st" and |
| 454 ** "place". |
| 455 ** |
| 456 ** This way, even if the tokenizer does not provide synonyms |
| 457 ** when tokenizing query text (it should not - to do would be |
| 458 ** inefficient), it doesn't matter if the user queries for |
| 459 ** 'first + place' or '1st + place', as there are entires in the |
| 460 ** FTS index corresponding to both forms of the first token. |
| 461 ** </ol> |
| 462 ** |
| 463 ** Whether it is parsing document or query text, any call to xToken that |
| 464 ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit |
| 465 ** is considered to supply a synonym for the previous token. For example, |
| 466 ** when parsing the document "I won first place", a tokenizer that supports |
| 467 ** synonyms would call xToken() 5 times, as follows: |
| 468 ** |
| 469 ** <codeblock> |
| 470 ** xToken(pCtx, 0, "i", 1, 0, 1); |
| 471 ** xToken(pCtx, 0, "won", 3, 2, 5); |
| 472 ** xToken(pCtx, 0, "first", 5, 6, 11); |
| 473 ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); |
| 474 ** xToken(pCtx, 0, "place", 5, 12, 17); |
| 475 **</codeblock> |
| 476 ** |
| 477 ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time |
| 478 ** xToken() is called. Multiple synonyms may be specified for a single token |
| 479 ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. |
| 480 ** There is no limit to the number of synonyms that may be provided for a |
| 481 ** single token. |
| 482 ** |
| 483 ** In many cases, method (1) above is the best approach. It does not add |
| 484 ** extra data to the FTS index or require FTS5 to query for multiple terms, |
| 485 ** so it is efficient in terms of disk space and query speed. However, it |
| 486 ** does not support prefix queries very well. If, as suggested above, the |
| 487 ** token "first" is subsituted for "1st" by the tokenizer, then the query: |
| 488 ** |
| 489 ** <codeblock> |
| 490 ** ... MATCH '1s*'</codeblock> |
| 491 ** |
| 492 ** will not match documents that contain the token "1st" (as the tokenizer |
| 493 ** will probably not map "1s" to any prefix of "first"). |
| 494 ** |
| 495 ** For full prefix support, method (3) may be preferred. In this case, |
| 496 ** because the index contains entries for both "first" and "1st", prefix |
| 497 ** queries such as 'fi*' or '1s*' will match correctly. However, because |
| 498 ** extra entries are added to the FTS index, this method uses more space |
| 499 ** within the database. |
| 500 ** |
| 501 ** Method (2) offers a midpoint between (1) and (3). Using this method, |
| 502 ** a query such as '1s*' will match documents that contain the literal |
| 503 ** token "1st", but not "first" (assuming the tokenizer is not able to |
| 504 ** provide synonyms for prefixes). However, a non-prefix query like '1st' |
| 505 ** will match against "1st" and "first". This method does not require |
| 506 ** extra disk space, as no extra entries are added to the FTS index. |
| 507 ** On the other hand, it may require more CPU cycles to run MATCH queries, |
| 508 ** as separate queries of the FTS index are required for each synonym. |
| 509 ** |
| 510 ** When using methods (2) or (3), it is important that the tokenizer only |
| 511 ** provide synonyms when tokenizing document text (method (2)) or query |
| 512 ** text (method (3)), not both. Doing so will not cause any errors, but is |
| 513 ** inefficient. |
| 514 */ |
| 515 typedef struct Fts5Tokenizer Fts5Tokenizer; |
| 516 typedef struct fts5_tokenizer fts5_tokenizer; |
| 517 struct fts5_tokenizer { |
| 518 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); |
| 519 void (*xDelete)(Fts5Tokenizer*); |
| 520 int (*xTokenize)(Fts5Tokenizer*, |
| 521 void *pCtx, |
| 522 int flags, /* Mask of FTS5_TOKENIZE_* flags */ |
| 523 const char *pText, int nText, |
| 524 int (*xToken)( |
| 525 void *pCtx, /* Copy of 2nd argument to xTokenize() */ |
| 526 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 527 const char *pToken, /* Pointer to buffer containing token */ |
| 528 int nToken, /* Size of token in bytes */ |
| 529 int iStart, /* Byte offset of token within input text */ |
| 530 int iEnd /* Byte offset of end of token within input text */ |
| 531 ) |
| 532 ); |
| 533 }; |
| 534 |
| 535 /* Flags that may be passed as the third argument to xTokenize() */ |
| 536 #define FTS5_TOKENIZE_QUERY 0x0001 |
| 537 #define FTS5_TOKENIZE_PREFIX 0x0002 |
| 538 #define FTS5_TOKENIZE_DOCUMENT 0x0004 |
| 539 #define FTS5_TOKENIZE_AUX 0x0008 |
| 540 |
| 541 /* Flags that may be passed by the tokenizer implementation back to FTS5 |
| 542 ** as the third argument to the supplied xToken callback. */ |
| 543 #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ |
| 544 |
| 545 /* |
| 546 ** END OF CUSTOM TOKENIZERS |
| 547 *************************************************************************/ |
| 548 |
| 549 /************************************************************************* |
| 550 ** FTS5 EXTENSION REGISTRATION API |
| 551 */ |
| 552 typedef struct fts5_api fts5_api; |
| 553 struct fts5_api { |
| 554 int iVersion; /* Currently always set to 2 */ |
| 555 |
| 556 /* Create a new tokenizer */ |
| 557 int (*xCreateTokenizer)( |
| 558 fts5_api *pApi, |
| 559 const char *zName, |
| 560 void *pContext, |
| 561 fts5_tokenizer *pTokenizer, |
| 562 void (*xDestroy)(void*) |
| 563 ); |
| 564 |
| 565 /* Find an existing tokenizer */ |
| 566 int (*xFindTokenizer)( |
| 567 fts5_api *pApi, |
| 568 const char *zName, |
| 569 void **ppContext, |
| 570 fts5_tokenizer *pTokenizer |
| 571 ); |
| 572 |
| 573 /* Create a new auxiliary function */ |
| 574 int (*xCreateFunction)( |
| 575 fts5_api *pApi, |
| 576 const char *zName, |
| 577 void *pContext, |
| 578 fts5_extension_function xFunction, |
| 579 void (*xDestroy)(void*) |
| 580 ); |
| 581 }; |
| 582 |
| 583 /* |
| 584 ** END OF REGISTRATION API |
| 585 *************************************************************************/ |
| 586 |
| 587 #if 0 |
| 588 } /* end of the 'extern "C"' block */ |
| 589 #endif |
| 590 |
| 591 #endif /* _FTS5_H */ |
| 592 |
| 593 /* |
| 594 ** 2014 May 31 |
| 595 ** |
| 596 ** The author disclaims copyright to this source code. In place of |
| 597 ** a legal notice, here is a blessing: |
| 598 ** |
| 599 ** May you do good and not evil. |
| 600 ** May you find forgiveness for yourself and forgive others. |
| 601 ** May you share freely, never taking more than you give. |
| 602 ** |
| 603 ****************************************************************************** |
| 604 ** |
| 605 */ |
| 606 #ifndef _FTS5INT_H |
| 607 #define _FTS5INT_H |
| 608 |
| 609 /* #include "fts5.h" */ |
| 610 /* #include "sqlite3ext.h" */ |
| 611 SQLITE_EXTENSION_INIT1 |
| 612 |
| 613 /* #include <string.h> */ |
| 614 /* #include <assert.h> */ |
| 615 |
| 616 #ifndef SQLITE_AMALGAMATION |
| 617 |
| 618 typedef unsigned char u8; |
| 619 typedef unsigned int u32; |
| 620 typedef unsigned short u16; |
| 621 typedef short i16; |
| 622 typedef sqlite3_int64 i64; |
| 623 typedef sqlite3_uint64 u64; |
| 624 |
| 625 #define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0]))) |
| 626 |
| 627 #define testcase(x) |
| 628 #define ALWAYS(x) 1 |
| 629 #define NEVER(x) 0 |
| 630 |
| 631 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) |
| 632 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) |
| 633 |
| 634 /* |
| 635 ** Constants for the largest and smallest possible 64-bit signed integers. |
| 636 */ |
| 637 # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) |
| 638 # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) |
| 639 |
| 640 #endif |
| 641 |
| 642 /* Truncate very long tokens to this many bytes. Hard limit is |
| 643 ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset |
| 644 ** field that occurs at the start of each leaf page (see fts5_index.c). */ |
| 645 #define FTS5_MAX_TOKEN_SIZE 32768 |
| 646 |
| 647 /* |
| 648 ** Maximum number of prefix indexes on single FTS5 table. This must be |
| 649 ** less than 32. If it is set to anything large than that, an #error |
| 650 ** directive in fts5_index.c will cause the build to fail. |
| 651 */ |
| 652 #define FTS5_MAX_PREFIX_INDEXES 31 |
| 653 |
| 654 #define FTS5_DEFAULT_NEARDIST 10 |
| 655 #define FTS5_DEFAULT_RANK "bm25" |
| 656 |
| 657 /* Name of rank and rowid columns */ |
| 658 #define FTS5_RANK_NAME "rank" |
| 659 #define FTS5_ROWID_NAME "rowid" |
| 660 |
| 661 #ifdef SQLITE_DEBUG |
| 662 # define FTS5_CORRUPT sqlite3Fts5Corrupt() |
| 663 static int sqlite3Fts5Corrupt(void); |
| 664 #else |
| 665 # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB |
| 666 #endif |
| 667 |
| 668 /* |
| 669 ** The assert_nc() macro is similar to the assert() macro, except that it |
| 670 ** is used for assert() conditions that are true only if it can be |
| 671 ** guranteed that the database is not corrupt. |
| 672 */ |
| 673 #ifdef SQLITE_DEBUG |
| 674 SQLITE_API extern int sqlite3_fts5_may_be_corrupt; |
| 675 # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) |
| 676 #else |
| 677 # define assert_nc(x) assert(x) |
| 678 #endif |
| 679 |
| 680 /* Mark a function parameter as unused, to suppress nuisance compiler |
| 681 ** warnings. */ |
| 682 #ifndef UNUSED_PARAM |
| 683 # define UNUSED_PARAM(X) (void)(X) |
| 684 #endif |
| 685 |
| 686 #ifndef UNUSED_PARAM2 |
| 687 # define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y) |
| 688 #endif |
| 689 |
| 690 typedef struct Fts5Global Fts5Global; |
| 691 typedef struct Fts5Colset Fts5Colset; |
| 692 |
| 693 /* If a NEAR() clump or phrase may only match a specific set of columns, |
| 694 ** then an object of the following type is used to record the set of columns. |
| 695 ** Each entry in the aiCol[] array is a column that may be matched. |
| 696 ** |
| 697 ** This object is used by fts5_expr.c and fts5_index.c. |
| 698 */ |
| 699 struct Fts5Colset { |
| 700 int nCol; |
| 701 int aiCol[1]; |
| 702 }; |
| 703 |
| 704 |
| 705 |
| 706 /************************************************************************** |
| 707 ** Interface to code in fts5_config.c. fts5_config.c contains contains code |
| 708 ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. |
| 709 */ |
| 710 |
| 711 typedef struct Fts5Config Fts5Config; |
| 712 |
| 713 /* |
| 714 ** An instance of the following structure encodes all information that can |
| 715 ** be gleaned from the CREATE VIRTUAL TABLE statement. |
| 716 ** |
| 717 ** And all information loaded from the %_config table. |
| 718 ** |
| 719 ** nAutomerge: |
| 720 ** The minimum number of segments that an auto-merge operation should |
| 721 ** attempt to merge together. A value of 1 sets the object to use the |
| 722 ** compile time default. Zero disables auto-merge altogether. |
| 723 ** |
| 724 ** zContent: |
| 725 ** |
| 726 ** zContentRowid: |
| 727 ** The value of the content_rowid= option, if one was specified. Or |
| 728 ** the string "rowid" otherwise. This text is not quoted - if it is |
| 729 ** used as part of an SQL statement it needs to be quoted appropriately. |
| 730 ** |
| 731 ** zContentExprlist: |
| 732 ** |
| 733 ** pzErrmsg: |
| 734 ** This exists in order to allow the fts5_index.c module to return a |
| 735 ** decent error message if it encounters a file-format version it does |
| 736 ** not understand. |
| 737 ** |
| 738 ** bColumnsize: |
| 739 ** True if the %_docsize table is created. |
| 740 ** |
| 741 ** bPrefixIndex: |
| 742 ** This is only used for debugging. If set to false, any prefix indexes |
| 743 ** are ignored. This value is configured using: |
| 744 ** |
| 745 ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); |
| 746 ** |
| 747 */ |
| 748 struct Fts5Config { |
| 749 sqlite3 *db; /* Database handle */ |
| 750 char *zDb; /* Database holding FTS index (e.g. "main") */ |
| 751 char *zName; /* Name of FTS index */ |
| 752 int nCol; /* Number of columns */ |
| 753 char **azCol; /* Column names */ |
| 754 u8 *abUnindexed; /* True for unindexed columns */ |
| 755 int nPrefix; /* Number of prefix indexes */ |
| 756 int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ |
| 757 int eContent; /* An FTS5_CONTENT value */ |
| 758 char *zContent; /* content table */ |
| 759 char *zContentRowid; /* "content_rowid=" option value */ |
| 760 int bColumnsize; /* "columnsize=" option value (dflt==1) */ |
| 761 int eDetail; /* FTS5_DETAIL_XXX value */ |
| 762 char *zContentExprlist; |
| 763 Fts5Tokenizer *pTok; |
| 764 fts5_tokenizer *pTokApi; |
| 765 |
| 766 /* Values loaded from the %_config table */ |
| 767 int iCookie; /* Incremented when %_config is modified */ |
| 768 int pgsz; /* Approximate page size used in %_data */ |
| 769 int nAutomerge; /* 'automerge' setting */ |
| 770 int nCrisisMerge; /* Maximum allowed segments per level */ |
| 771 int nUsermerge; /* 'usermerge' setting */ |
| 772 int nHashSize; /* Bytes of memory for in-memory hash */ |
| 773 char *zRank; /* Name of rank function */ |
| 774 char *zRankArgs; /* Arguments to rank function */ |
| 775 |
| 776 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ |
| 777 char **pzErrmsg; |
| 778 |
| 779 #ifdef SQLITE_DEBUG |
| 780 int bPrefixIndex; /* True to use prefix-indexes */ |
| 781 #endif |
| 782 }; |
| 783 |
| 784 /* Current expected value of %_config table 'version' field */ |
| 785 #define FTS5_CURRENT_VERSION 4 |
| 786 |
| 787 #define FTS5_CONTENT_NORMAL 0 |
| 788 #define FTS5_CONTENT_NONE 1 |
| 789 #define FTS5_CONTENT_EXTERNAL 2 |
| 790 |
| 791 #define FTS5_DETAIL_FULL 0 |
| 792 #define FTS5_DETAIL_NONE 1 |
| 793 #define FTS5_DETAIL_COLUMNS 2 |
| 794 |
| 795 |
| 796 |
| 797 static int sqlite3Fts5ConfigParse( |
| 798 Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** |
| 799 ); |
| 800 static void sqlite3Fts5ConfigFree(Fts5Config*); |
| 801 |
| 802 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); |
| 803 |
| 804 static int sqlite3Fts5Tokenize( |
| 805 Fts5Config *pConfig, /* FTS5 Configuration object */ |
| 806 int flags, /* FTS5_TOKENIZE_* flags */ |
| 807 const char *pText, int nText, /* Text to tokenize */ |
| 808 void *pCtx, /* Context passed to xToken() */ |
| 809 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
| 810 ); |
| 811 |
| 812 static void sqlite3Fts5Dequote(char *z); |
| 813 |
| 814 /* Load the contents of the %_config table */ |
| 815 static int sqlite3Fts5ConfigLoad(Fts5Config*, int); |
| 816 |
| 817 /* Set the value of a single config attribute */ |
| 818 static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, i
nt*); |
| 819 |
| 820 static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); |
| 821 |
| 822 /* |
| 823 ** End of interface to code in fts5_config.c. |
| 824 **************************************************************************/ |
| 825 |
| 826 /************************************************************************** |
| 827 ** Interface to code in fts5_buffer.c. |
| 828 */ |
| 829 |
| 830 /* |
| 831 ** Buffer object for the incremental building of string data. |
| 832 */ |
| 833 typedef struct Fts5Buffer Fts5Buffer; |
| 834 struct Fts5Buffer { |
| 835 u8 *p; |
| 836 int n; |
| 837 int nSpace; |
| 838 }; |
| 839 |
| 840 static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); |
| 841 static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); |
| 842 static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); |
| 843 static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); |
| 844 static void sqlite3Fts5BufferFree(Fts5Buffer*); |
| 845 static void sqlite3Fts5BufferZero(Fts5Buffer*); |
| 846 static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); |
| 847 static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); |
| 848 |
| 849 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); |
| 850 |
| 851 #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) |
| 852 #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c) |
| 853 #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) |
| 854 #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) |
| 855 #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) |
| 856 |
| 857 #define fts5BufferGrow(pRc,pBuf,nn) ( \ |
| 858 (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ |
| 859 sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ |
| 860 ) |
| 861 |
| 862 /* Write and decode big-endian 32-bit integer values */ |
| 863 static void sqlite3Fts5Put32(u8*, int); |
| 864 static int sqlite3Fts5Get32(const u8*); |
| 865 |
| 866 #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) |
| 867 #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF) |
| 868 |
| 869 typedef struct Fts5PoslistReader Fts5PoslistReader; |
| 870 struct Fts5PoslistReader { |
| 871 /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ |
| 872 const u8 *a; /* Position list to iterate through */ |
| 873 int n; /* Size of buffer at a[] in bytes */ |
| 874 int i; /* Current offset in a[] */ |
| 875 |
| 876 u8 bFlag; /* For client use (any custom purpose) */ |
| 877 |
| 878 /* Output variables */ |
| 879 u8 bEof; /* Set to true at EOF */ |
| 880 i64 iPos; /* (iCol<<32) + iPos */ |
| 881 }; |
| 882 static int sqlite3Fts5PoslistReaderInit( |
| 883 const u8 *a, int n, /* Poslist buffer to iterate through */ |
| 884 Fts5PoslistReader *pIter /* Iterator object to initialize */ |
| 885 ); |
| 886 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); |
| 887 |
| 888 typedef struct Fts5PoslistWriter Fts5PoslistWriter; |
| 889 struct Fts5PoslistWriter { |
| 890 i64 iPrev; |
| 891 }; |
| 892 static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); |
| 893 static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); |
| 894 |
| 895 static int sqlite3Fts5PoslistNext64( |
| 896 const u8 *a, int n, /* Buffer containing poslist */ |
| 897 int *pi, /* IN/OUT: Offset within a[] */ |
| 898 i64 *piOff /* IN/OUT: Current offset */ |
| 899 ); |
| 900 |
| 901 /* Malloc utility */ |
| 902 static void *sqlite3Fts5MallocZero(int *pRc, int nByte); |
| 903 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); |
| 904 |
| 905 /* Character set tests (like isspace(), isalpha() etc.) */ |
| 906 static int sqlite3Fts5IsBareword(char t); |
| 907 |
| 908 |
| 909 /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ |
| 910 typedef struct Fts5Termset Fts5Termset; |
| 911 static int sqlite3Fts5TermsetNew(Fts5Termset**); |
| 912 static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPre
sent); |
| 913 static void sqlite3Fts5TermsetFree(Fts5Termset*); |
| 914 |
| 915 /* |
| 916 ** End of interface to code in fts5_buffer.c. |
| 917 **************************************************************************/ |
| 918 |
| 919 /************************************************************************** |
| 920 ** Interface to code in fts5_index.c. fts5_index.c contains contains code |
| 921 ** to access the data stored in the %_data table. |
| 922 */ |
| 923 |
| 924 typedef struct Fts5Index Fts5Index; |
| 925 typedef struct Fts5IndexIter Fts5IndexIter; |
| 926 |
| 927 struct Fts5IndexIter { |
| 928 i64 iRowid; |
| 929 const u8 *pData; |
| 930 int nData; |
| 931 u8 bEof; |
| 932 }; |
| 933 |
| 934 #define sqlite3Fts5IterEof(x) ((x)->bEof) |
| 935 |
| 936 /* |
| 937 ** Values used as part of the flags argument passed to IndexQuery(). |
| 938 */ |
| 939 #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ |
| 940 #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ |
| 941 #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ |
| 942 #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ |
| 943 |
| 944 /* The following are used internally by the fts5_index.c module. They are |
| 945 ** defined here only to make it easier to avoid clashes with the flags |
| 946 ** above. */ |
| 947 #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 |
| 948 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 |
| 949 |
| 950 /* |
| 951 ** Create/destroy an Fts5Index object. |
| 952 */ |
| 953 static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, c
har**); |
| 954 static int sqlite3Fts5IndexClose(Fts5Index *p); |
| 955 |
| 956 /* |
| 957 ** Return a simple checksum value based on the arguments. |
| 958 */ |
| 959 static u64 sqlite3Fts5IndexEntryCksum( |
| 960 i64 iRowid, |
| 961 int iCol, |
| 962 int iPos, |
| 963 int iIdx, |
| 964 const char *pTerm, |
| 965 int nTerm |
| 966 ); |
| 967 |
| 968 /* |
| 969 ** Argument p points to a buffer containing utf-8 text that is n bytes in |
| 970 ** size. Return the number of bytes in the nChar character prefix of the |
| 971 ** buffer, or 0 if there are less than nChar characters in total. |
| 972 */ |
| 973 static int sqlite3Fts5IndexCharlenToBytelen( |
| 974 const char *p, |
| 975 int nByte, |
| 976 int nChar |
| 977 ); |
| 978 |
| 979 /* |
| 980 ** Open a new iterator to iterate though all rowids that match the |
| 981 ** specified token or token prefix. |
| 982 */ |
| 983 static int sqlite3Fts5IndexQuery( |
| 984 Fts5Index *p, /* FTS index to query */ |
| 985 const char *pToken, int nToken, /* Token (or prefix) to query for */ |
| 986 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ |
| 987 Fts5Colset *pColset, /* Match these columns only */ |
| 988 Fts5IndexIter **ppIter /* OUT: New iterator object */ |
| 989 ); |
| 990 |
| 991 /* |
| 992 ** The various operations on open token or token prefix iterators opened |
| 993 ** using sqlite3Fts5IndexQuery(). |
| 994 */ |
| 995 static int sqlite3Fts5IterNext(Fts5IndexIter*); |
| 996 static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); |
| 997 |
| 998 /* |
| 999 ** Close an iterator opened by sqlite3Fts5IndexQuery(). |
| 1000 */ |
| 1001 static void sqlite3Fts5IterClose(Fts5IndexIter*); |
| 1002 |
| 1003 /* |
| 1004 ** This interface is used by the fts5vocab module. |
| 1005 */ |
| 1006 static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); |
| 1007 static int sqlite3Fts5IterNextScan(Fts5IndexIter*); |
| 1008 |
| 1009 |
| 1010 /* |
| 1011 ** Insert or remove data to or from the index. Each time a document is |
| 1012 ** added to or removed from the index, this function is called one or more |
| 1013 ** times. |
| 1014 ** |
| 1015 ** For an insert, it must be called once for each token in the new document. |
| 1016 ** If the operation is a delete, it must be called (at least) once for each |
| 1017 ** unique token in the document with an iCol value less than zero. The iPos |
| 1018 ** argument is ignored for a delete. |
| 1019 */ |
| 1020 static int sqlite3Fts5IndexWrite( |
| 1021 Fts5Index *p, /* Index to write to */ |
| 1022 int iCol, /* Column token appears in (-ve -> delete) */ |
| 1023 int iPos, /* Position of token within column */ |
| 1024 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 1025 ); |
| 1026 |
| 1027 /* |
| 1028 ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to |
| 1029 ** document iDocid. |
| 1030 */ |
| 1031 static int sqlite3Fts5IndexBeginWrite( |
| 1032 Fts5Index *p, /* Index to write to */ |
| 1033 int bDelete, /* True if current operation is a delete */ |
| 1034 i64 iDocid /* Docid to add or remove data from */ |
| 1035 ); |
| 1036 |
| 1037 /* |
| 1038 ** Flush any data stored in the in-memory hash tables to the database. |
| 1039 ** If the bCommit flag is true, also close any open blob handles. |
| 1040 */ |
| 1041 static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit); |
| 1042 |
| 1043 /* |
| 1044 ** Discard any data stored in the in-memory hash tables. Do not write it |
| 1045 ** to the database. Additionally, assume that the contents of the %_data |
| 1046 ** table may have changed on disk. So any in-memory caches of %_data |
| 1047 ** records must be invalidated. |
| 1048 */ |
| 1049 static int sqlite3Fts5IndexRollback(Fts5Index *p); |
| 1050 |
| 1051 /* |
| 1052 ** Get or set the "averages" values. |
| 1053 */ |
| 1054 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); |
| 1055 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); |
| 1056 |
| 1057 /* |
| 1058 ** Functions called by the storage module as part of integrity-check. |
| 1059 */ |
| 1060 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); |
| 1061 |
| 1062 /* |
| 1063 ** Called during virtual module initialization to register UDF |
| 1064 ** fts5_decode() with SQLite |
| 1065 */ |
| 1066 static int sqlite3Fts5IndexInit(sqlite3*); |
| 1067 |
| 1068 static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); |
| 1069 |
| 1070 /* |
| 1071 ** Return the total number of entries read from the %_data table by |
| 1072 ** this connection since it was created. |
| 1073 */ |
| 1074 static int sqlite3Fts5IndexReads(Fts5Index *p); |
| 1075 |
| 1076 static int sqlite3Fts5IndexReinit(Fts5Index *p); |
| 1077 static int sqlite3Fts5IndexOptimize(Fts5Index *p); |
| 1078 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); |
| 1079 static int sqlite3Fts5IndexReset(Fts5Index *p); |
| 1080 |
| 1081 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); |
| 1082 |
| 1083 /* |
| 1084 ** End of interface to code in fts5_index.c. |
| 1085 **************************************************************************/ |
| 1086 |
| 1087 /************************************************************************** |
| 1088 ** Interface to code in fts5_varint.c. |
| 1089 */ |
| 1090 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); |
| 1091 static int sqlite3Fts5GetVarintLen(u32 iVal); |
| 1092 static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); |
| 1093 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); |
| 1094 |
| 1095 #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) |
| 1096 #define fts5GetVarint sqlite3Fts5GetVarint |
| 1097 |
| 1098 #define fts5FastGetVarint32(a, iOff, nVal) { \ |
| 1099 nVal = (a)[iOff++]; \ |
| 1100 if( nVal & 0x80 ){ \ |
| 1101 iOff--; \ |
| 1102 iOff += fts5GetVarint32(&(a)[iOff], nVal); \ |
| 1103 } \ |
| 1104 } |
| 1105 |
| 1106 |
| 1107 /* |
| 1108 ** End of interface to code in fts5_varint.c. |
| 1109 **************************************************************************/ |
| 1110 |
| 1111 |
| 1112 /************************************************************************** |
| 1113 ** Interface to code in fts5.c. |
| 1114 */ |
| 1115 |
| 1116 static int sqlite3Fts5GetTokenizer( |
| 1117 Fts5Global*, |
| 1118 const char **azArg, |
| 1119 int nArg, |
| 1120 Fts5Tokenizer**, |
| 1121 fts5_tokenizer**, |
| 1122 char **pzErr |
| 1123 ); |
| 1124 |
| 1125 static Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, Fts5Config **); |
| 1126 |
| 1127 /* |
| 1128 ** End of interface to code in fts5.c. |
| 1129 **************************************************************************/ |
| 1130 |
| 1131 /************************************************************************** |
| 1132 ** Interface to code in fts5_hash.c. |
| 1133 */ |
| 1134 typedef struct Fts5Hash Fts5Hash; |
| 1135 |
| 1136 /* |
| 1137 ** Create a hash table, free a hash table. |
| 1138 */ |
| 1139 static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); |
| 1140 static void sqlite3Fts5HashFree(Fts5Hash*); |
| 1141 |
| 1142 static int sqlite3Fts5HashWrite( |
| 1143 Fts5Hash*, |
| 1144 i64 iRowid, /* Rowid for this entry */ |
| 1145 int iCol, /* Column token appears in (-ve -> delete) */ |
| 1146 int iPos, /* Position of token within column */ |
| 1147 char bByte, |
| 1148 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 1149 ); |
| 1150 |
| 1151 /* |
| 1152 ** Empty (but do not delete) a hash table. |
| 1153 */ |
| 1154 static void sqlite3Fts5HashClear(Fts5Hash*); |
| 1155 |
| 1156 static int sqlite3Fts5HashQuery( |
| 1157 Fts5Hash*, /* Hash table to query */ |
| 1158 const char *pTerm, int nTerm, /* Query term */ |
| 1159 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ |
| 1160 int *pnDoclist /* OUT: Size of doclist in bytes */ |
| 1161 ); |
| 1162 |
| 1163 static int sqlite3Fts5HashScanInit( |
| 1164 Fts5Hash*, /* Hash table to query */ |
| 1165 const char *pTerm, int nTerm /* Query prefix */ |
| 1166 ); |
| 1167 static void sqlite3Fts5HashScanNext(Fts5Hash*); |
| 1168 static int sqlite3Fts5HashScanEof(Fts5Hash*); |
| 1169 static void sqlite3Fts5HashScanEntry(Fts5Hash *, |
| 1170 const char **pzTerm, /* OUT: term (nul-terminated) */ |
| 1171 const u8 **ppDoclist, /* OUT: pointer to doclist */ |
| 1172 int *pnDoclist /* OUT: size of doclist in bytes */ |
| 1173 ); |
| 1174 |
| 1175 |
| 1176 /* |
| 1177 ** End of interface to code in fts5_hash.c. |
| 1178 **************************************************************************/ |
| 1179 |
| 1180 /************************************************************************** |
| 1181 ** Interface to code in fts5_storage.c. fts5_storage.c contains contains |
| 1182 ** code to access the data stored in the %_content and %_docsize tables. |
| 1183 */ |
| 1184 |
| 1185 #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ |
| 1186 #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ |
| 1187 #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ |
| 1188 |
| 1189 typedef struct Fts5Storage Fts5Storage; |
| 1190 |
| 1191 static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, c
har**); |
| 1192 static int sqlite3Fts5StorageClose(Fts5Storage *p); |
| 1193 static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); |
| 1194 |
| 1195 static int sqlite3Fts5DropAll(Fts5Config*); |
| 1196 static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, ch
ar **); |
| 1197 |
| 1198 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**); |
| 1199 static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*
); |
| 1200 static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); |
| 1201 |
| 1202 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p); |
| 1203 |
| 1204 static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, cha
r**); |
| 1205 static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stm
t*); |
| 1206 |
| 1207 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); |
| 1208 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); |
| 1209 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); |
| 1210 |
| 1211 static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit); |
| 1212 static int sqlite3Fts5StorageRollback(Fts5Storage *p); |
| 1213 |
| 1214 static int sqlite3Fts5StorageConfigValue( |
| 1215 Fts5Storage *p, const char*, sqlite3_value*, int |
| 1216 ); |
| 1217 |
| 1218 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); |
| 1219 static int sqlite3Fts5StorageRebuild(Fts5Storage *p); |
| 1220 static int sqlite3Fts5StorageOptimize(Fts5Storage *p); |
| 1221 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); |
| 1222 static int sqlite3Fts5StorageReset(Fts5Storage *p); |
| 1223 |
| 1224 /* |
| 1225 ** End of interface to code in fts5_storage.c. |
| 1226 **************************************************************************/ |
| 1227 |
| 1228 |
| 1229 /************************************************************************** |
| 1230 ** Interface to code in fts5_expr.c. |
| 1231 */ |
| 1232 typedef struct Fts5Expr Fts5Expr; |
| 1233 typedef struct Fts5ExprNode Fts5ExprNode; |
| 1234 typedef struct Fts5Parse Fts5Parse; |
| 1235 typedef struct Fts5Token Fts5Token; |
| 1236 typedef struct Fts5ExprPhrase Fts5ExprPhrase; |
| 1237 typedef struct Fts5ExprNearset Fts5ExprNearset; |
| 1238 |
| 1239 struct Fts5Token { |
| 1240 const char *p; /* Token text (not NULL terminated) */ |
| 1241 int n; /* Size of buffer p in bytes */ |
| 1242 }; |
| 1243 |
| 1244 /* Parse a MATCH expression. */ |
| 1245 static int sqlite3Fts5ExprNew( |
| 1246 Fts5Config *pConfig, |
| 1247 const char *zExpr, |
| 1248 Fts5Expr **ppNew, |
| 1249 char **pzErr |
| 1250 ); |
| 1251 |
| 1252 /* |
| 1253 ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); |
| 1254 ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); |
| 1255 ** rc = sqlite3Fts5ExprNext(pExpr) |
| 1256 ** ){ |
| 1257 ** // The document with rowid iRowid matches the expression! |
| 1258 ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); |
| 1259 ** } |
| 1260 */ |
| 1261 static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc)
; |
| 1262 static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); |
| 1263 static int sqlite3Fts5ExprEof(Fts5Expr*); |
| 1264 static i64 sqlite3Fts5ExprRowid(Fts5Expr*); |
| 1265 |
| 1266 static void sqlite3Fts5ExprFree(Fts5Expr*); |
| 1267 |
| 1268 /* Called during startup to register a UDF with SQLite */ |
| 1269 static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); |
| 1270 |
| 1271 static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); |
| 1272 static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); |
| 1273 static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); |
| 1274 |
| 1275 typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; |
| 1276 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); |
| 1277 static int sqlite3Fts5ExprPopulatePoslists( |
| 1278 Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int |
| 1279 ); |
| 1280 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); |
| 1281 |
| 1282 static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); |
| 1283 |
| 1284 static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); |
| 1285 |
| 1286 /******************************************* |
| 1287 ** The fts5_expr.c API above this point is used by the other hand-written |
| 1288 ** C code in this module. The interfaces below this point are called by |
| 1289 ** the parser code in fts5parse.y. */ |
| 1290 |
| 1291 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); |
| 1292 |
| 1293 static Fts5ExprNode *sqlite3Fts5ParseNode( |
| 1294 Fts5Parse *pParse, |
| 1295 int eType, |
| 1296 Fts5ExprNode *pLeft, |
| 1297 Fts5ExprNode *pRight, |
| 1298 Fts5ExprNearset *pNear |
| 1299 ); |
| 1300 |
| 1301 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( |
| 1302 Fts5Parse *pParse, |
| 1303 Fts5ExprNode *pLeft, |
| 1304 Fts5ExprNode *pRight |
| 1305 ); |
| 1306 |
| 1307 static Fts5ExprPhrase *sqlite3Fts5ParseTerm( |
| 1308 Fts5Parse *pParse, |
| 1309 Fts5ExprPhrase *pPhrase, |
| 1310 Fts5Token *pToken, |
| 1311 int bPrefix |
| 1312 ); |
| 1313 |
| 1314 static Fts5ExprNearset *sqlite3Fts5ParseNearset( |
| 1315 Fts5Parse*, |
| 1316 Fts5ExprNearset*, |
| 1317 Fts5ExprPhrase* |
| 1318 ); |
| 1319 |
| 1320 static Fts5Colset *sqlite3Fts5ParseColset( |
| 1321 Fts5Parse*, |
| 1322 Fts5Colset*, |
| 1323 Fts5Token * |
| 1324 ); |
| 1325 |
| 1326 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); |
| 1327 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); |
| 1328 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); |
| 1329 |
| 1330 static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*
); |
| 1331 static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5Colset*)
; |
| 1332 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); |
| 1333 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); |
| 1334 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); |
| 1335 |
| 1336 /* |
| 1337 ** End of interface to code in fts5_expr.c. |
| 1338 **************************************************************************/ |
| 1339 |
| 1340 |
| 1341 |
| 1342 /************************************************************************** |
| 1343 ** Interface to code in fts5_aux.c. |
| 1344 */ |
| 1345 |
| 1346 static int sqlite3Fts5AuxInit(fts5_api*); |
| 1347 /* |
| 1348 ** End of interface to code in fts5_aux.c. |
| 1349 **************************************************************************/ |
| 1350 |
| 1351 /************************************************************************** |
| 1352 ** Interface to code in fts5_tokenizer.c. |
| 1353 */ |
| 1354 |
| 1355 static int sqlite3Fts5TokenizerInit(fts5_api*); |
| 1356 /* |
| 1357 ** End of interface to code in fts5_tokenizer.c. |
| 1358 **************************************************************************/ |
| 1359 |
| 1360 /************************************************************************** |
| 1361 ** Interface to code in fts5_vocab.c. |
| 1362 */ |
| 1363 |
| 1364 static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); |
| 1365 |
| 1366 /* |
| 1367 ** End of interface to code in fts5_vocab.c. |
| 1368 **************************************************************************/ |
| 1369 |
| 1370 |
| 1371 /************************************************************************** |
| 1372 ** Interface to automatically generated code in fts5_unicode2.c. |
| 1373 */ |
| 1374 static int sqlite3Fts5UnicodeIsalnum(int c); |
| 1375 static int sqlite3Fts5UnicodeIsdiacritic(int c); |
| 1376 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); |
| 1377 /* |
| 1378 ** End of interface to code in fts5_unicode2.c. |
| 1379 **************************************************************************/ |
| 1380 |
| 1381 #endif |
| 1382 |
| 1383 #define FTS5_OR 1 |
| 1384 #define FTS5_AND 2 |
| 1385 #define FTS5_NOT 3 |
| 1386 #define FTS5_TERM 4 |
| 1387 #define FTS5_COLON 5 |
| 1388 #define FTS5_LP 6 |
| 1389 #define FTS5_RP 7 |
| 1390 #define FTS5_MINUS 8 |
| 1391 #define FTS5_LCP 9 |
| 1392 #define FTS5_RCP 10 |
| 1393 #define FTS5_STRING 11 |
| 1394 #define FTS5_COMMA 12 |
| 1395 #define FTS5_PLUS 13 |
| 1396 #define FTS5_STAR 14 |
| 1397 |
| 1398 /* |
| 1399 ** 2000-05-29 |
| 1400 ** |
| 1401 ** The author disclaims copyright to this source code. In place of |
| 1402 ** a legal notice, here is a blessing: |
| 1403 ** |
| 1404 ** May you do good and not evil. |
| 1405 ** May you find forgiveness for yourself and forgive others. |
| 1406 ** May you share freely, never taking more than you give. |
| 1407 ** |
| 1408 ************************************************************************* |
| 1409 ** Driver template for the LEMON parser generator. |
| 1410 ** |
| 1411 ** The "lemon" program processes an LALR(1) input grammar file, then uses |
| 1412 ** this template to construct a parser. The "lemon" program inserts text |
| 1413 ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the |
| 1414 ** interstitial "-" characters) contained in this template is changed into |
| 1415 ** the value of the %name directive from the grammar. Otherwise, the content |
| 1416 ** of this template is copied straight through into the generate parser |
| 1417 ** source file. |
| 1418 ** |
| 1419 ** The following is the concatenation of all %include directives from the |
| 1420 ** input grammar file: |
| 1421 */ |
| 1422 /* #include <stdio.h> */ |
| 1423 /************ Begin %include sections from the grammar ************************/ |
| 1424 |
| 1425 /* #include "fts5Int.h" */ |
| 1426 /* #include "fts5parse.h" */ |
| 1427 |
| 1428 /* |
| 1429 ** Disable all error recovery processing in the parser push-down |
| 1430 ** automaton. |
| 1431 */ |
| 1432 #define fts5YYNOERRORRECOVERY 1 |
| 1433 |
| 1434 /* |
| 1435 ** Make fts5yytestcase() the same as testcase() |
| 1436 */ |
| 1437 #define fts5yytestcase(X) testcase(X) |
| 1438 |
| 1439 /* |
| 1440 ** Indicate that sqlite3ParserFree() will never be called with a null |
| 1441 ** pointer. |
| 1442 */ |
| 1443 #define fts5YYPARSEFREENOTNULL 1 |
| 1444 |
| 1445 /* |
| 1446 ** Alternative datatype for the argument to the malloc() routine passed |
| 1447 ** into sqlite3ParserAlloc(). The default is size_t. |
| 1448 */ |
| 1449 #define fts5YYMALLOCARGTYPE u64 |
| 1450 |
| 1451 /**************** End of %include directives **********************************/ |
| 1452 /* These constants specify the various numeric values for terminal symbols |
| 1453 ** in a format understandable to "makeheaders". This section is blank unless |
| 1454 ** "lemon" is run with the "-m" command-line option. |
| 1455 ***************** Begin makeheaders token definitions *************************/ |
| 1456 /**************** End makeheaders token definitions ***************************/ |
| 1457 |
| 1458 /* The next sections is a series of control #defines. |
| 1459 ** various aspects of the generated parser. |
| 1460 ** fts5YYCODETYPE is the data type used to store the integer codes |
| 1461 ** that represent terminal and non-terminal symbols. |
| 1462 ** "unsigned char" is used if there are fewer than |
| 1463 ** 256 symbols. Larger types otherwise. |
| 1464 ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used
for |
| 1465 ** any terminal or nonterminal symbol. |
| 1466 ** fts5YYFALLBACK If defined, this indicates that one or more tokens |
| 1467 ** (also known as: "terminal symbols") have fall-back |
| 1468 ** values which should be used if the original symbol |
| 1469 ** would not parse. This permits keywords to sometimes |
| 1470 ** be used as identifiers, for example. |
| 1471 ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers |
| 1472 ** that indicate what to do in response to the next |
| 1473 ** token. |
| 1474 ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type fo
r terminal |
| 1475 ** symbols. Background: A "minor type" is a semantic |
| 1476 ** value associated with a terminal or non-terminal |
| 1477 ** symbols. For example, for an "ID" terminal symbol, |
| 1478 ** the minor type might be the name of the identifier. |
| 1479 ** Each non-terminal can have a different minor type. |
| 1480 ** Terminal symbols all have the same minor type, though. |
| 1481 ** This macros defines the minor type for terminal |
| 1482 ** symbols. |
| 1483 ** fts5YYMINORTYPE is the data type used for all minor types. |
| 1484 ** This is typically a union of many types, one of |
| 1485 ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in
the union |
| 1486 ** for terminal symbols is called "fts5yy0". |
| 1487 ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If |
| 1488 ** zero the stack is dynamically sized using realloc() |
| 1489 ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extr
a_argument |
| 1490 ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argu
ment |
| 1491 ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypP
arser |
| 1492 ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yy
pParser |
| 1493 ** fts5YYERRORSYMBOL is the code number of the error symbol. If not |
| 1494 ** defined, then do no error processing. |
| 1495 ** fts5YYNSTATE the combined number of states. |
| 1496 ** fts5YYNRULE the number of rules in the grammar |
| 1497 ** fts5YY_MAX_SHIFT Maximum value for shift actions |
| 1498 ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions |
| 1499 ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions |
| 1500 ** fts5YY_MIN_REDUCE Maximum value for reduce actions |
| 1501 ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error |
| 1502 ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept |
| 1503 ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op |
| 1504 */ |
| 1505 #ifndef INTERFACE |
| 1506 # define INTERFACE 1 |
| 1507 #endif |
| 1508 /************* Begin control #defines *****************************************/ |
| 1509 #define fts5YYCODETYPE unsigned char |
| 1510 #define fts5YYNOCODE 28 |
| 1511 #define fts5YYACTIONTYPE unsigned char |
| 1512 #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token |
| 1513 typedef union { |
| 1514 int fts5yyinit; |
| 1515 sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0; |
| 1516 int fts5yy4; |
| 1517 Fts5Colset* fts5yy11; |
| 1518 Fts5ExprNode* fts5yy24; |
| 1519 Fts5ExprNearset* fts5yy46; |
| 1520 Fts5ExprPhrase* fts5yy53; |
| 1521 } fts5YYMINORTYPE; |
| 1522 #ifndef fts5YYSTACKDEPTH |
| 1523 #define fts5YYSTACKDEPTH 100 |
| 1524 #endif |
| 1525 #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse; |
| 1526 #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse |
| 1527 #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse = fts5yypParser->pParse |
| 1528 #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse = pParse |
| 1529 #define fts5YYNSTATE 29 |
| 1530 #define fts5YYNRULE 26 |
| 1531 #define fts5YY_MAX_SHIFT 28 |
| 1532 #define fts5YY_MIN_SHIFTREDUCE 45 |
| 1533 #define fts5YY_MAX_SHIFTREDUCE 70 |
| 1534 #define fts5YY_MIN_REDUCE 71 |
| 1535 #define fts5YY_MAX_REDUCE 96 |
| 1536 #define fts5YY_ERROR_ACTION 97 |
| 1537 #define fts5YY_ACCEPT_ACTION 98 |
| 1538 #define fts5YY_NO_ACTION 99 |
| 1539 /************* End control #defines *******************************************/ |
| 1540 |
| 1541 /* Define the fts5yytestcase() macro to be a no-op if is not already defined |
| 1542 ** otherwise. |
| 1543 ** |
| 1544 ** Applications can choose to define fts5yytestcase() in the %include section |
| 1545 ** to a macro that can assist in verifying code coverage. For production |
| 1546 ** code the fts5yytestcase() macro should be turned off. But it is useful |
| 1547 ** for testing. |
| 1548 */ |
| 1549 #ifndef fts5yytestcase |
| 1550 # define fts5yytestcase(X) |
| 1551 #endif |
| 1552 |
| 1553 |
| 1554 /* Next are the tables used to determine what action to take based on the |
| 1555 ** current state and lookahead token. These tables are used to implement |
| 1556 ** functions that take a state number and lookahead value and return an |
| 1557 ** action integer. |
| 1558 ** |
| 1559 ** Suppose the action integer is N. Then the action is determined as |
| 1560 ** follows |
| 1561 ** |
| 1562 ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahea
d |
| 1563 ** token onto the stack and goto state N. |
| 1564 ** |
| 1565 ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then |
| 1566 ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTRED
UCE. |
| 1567 ** |
| 1568 ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE |
| 1569 ** and fts5YY_MAX_REDUCE |
| 1570 ** |
| 1571 ** N == fts5YY_ERROR_ACTION A syntax error has occurred. |
| 1572 ** |
| 1573 ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. |
| 1574 ** |
| 1575 ** N == fts5YY_NO_ACTION No such action. Denotes unused |
| 1576 ** slots in the fts5yy_action[] table. |
| 1577 ** |
| 1578 ** The action table is constructed as a single large table named fts5yy_action[]
. |
| 1579 ** Given state S and lookahead X, the action is computed as either: |
| 1580 ** |
| 1581 ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] |
| 1582 ** (B) N = fts5yy_default[S] |
| 1583 ** |
| 1584 ** The (A) formula is preferred. The B formula is used instead if: |
| 1585 ** (1) The fts5yy_shift_ofst[S]+X value is out of range, or |
| 1586 ** (2) fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X, or |
| 1587 ** (3) fts5yy_shift_ofst[S] equal fts5YY_SHIFT_USE_DFLT. |
| 1588 ** (Implementation note: fts5YY_SHIFT_USE_DFLT is chosen so that |
| 1589 ** fts5YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. |
| 1590 ** Hence only tests (1) and (2) need to be evaluated.) |
| 1591 ** |
| 1592 ** The formulas above are for computing the action when the lookahead is |
| 1593 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after |
| 1594 ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of |
| 1595 ** the fts5yy_shift_ofst[] array and fts5YY_REDUCE_USE_DFLT is used in place of |
| 1596 ** fts5YY_SHIFT_USE_DFLT. |
| 1597 ** |
| 1598 ** The following are the tables generated in this section: |
| 1599 ** |
| 1600 ** fts5yy_action[] A single table containing all actions. |
| 1601 ** fts5yy_lookahead[] A table containing the lookahead for each entry in |
| 1602 ** fts5yy_action. Used to detect hash collisions. |
| 1603 ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for |
| 1604 ** shifting terminals. |
| 1605 ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for |
| 1606 ** shifting non-terminals after a reduce. |
| 1607 ** fts5yy_default[] Default action for each state. |
| 1608 ** |
| 1609 *********** Begin parsing tables **********************************************/ |
| 1610 #define fts5YY_ACTTAB_COUNT (85) |
| 1611 static const fts5YYACTIONTYPE fts5yy_action[] = { |
| 1612 /* 0 */ 98, 16, 51, 5, 53, 27, 83, 7, 26, 15, |
| 1613 /* 10 */ 51, 5, 53, 27, 13, 69, 26, 48, 51, 5, |
| 1614 /* 20 */ 53, 27, 19, 11, 26, 9, 20, 51, 5, 53, |
| 1615 /* 30 */ 27, 13, 22, 26, 28, 51, 5, 53, 27, 68, |
| 1616 /* 40 */ 1, 26, 19, 11, 17, 9, 52, 10, 53, 27, |
| 1617 /* 50 */ 23, 24, 26, 54, 3, 4, 2, 26, 6, 21, |
| 1618 /* 60 */ 49, 71, 3, 4, 2, 7, 56, 59, 55, 59, |
| 1619 /* 70 */ 4, 2, 12, 69, 58, 60, 18, 67, 62, 69, |
| 1620 /* 80 */ 25, 66, 8, 14, 2, |
| 1621 }; |
| 1622 static const fts5YYCODETYPE fts5yy_lookahead[] = { |
| 1623 /* 0 */ 16, 17, 18, 19, 20, 21, 5, 6, 24, 17, |
| 1624 /* 10 */ 18, 19, 20, 21, 11, 14, 24, 17, 18, 19, |
| 1625 /* 20 */ 20, 21, 8, 9, 24, 11, 17, 18, 19, 20, |
| 1626 /* 30 */ 21, 11, 12, 24, 17, 18, 19, 20, 21, 26, |
| 1627 /* 40 */ 6, 24, 8, 9, 22, 11, 18, 11, 20, 21, |
| 1628 /* 50 */ 24, 25, 24, 20, 1, 2, 3, 24, 23, 24, |
| 1629 /* 60 */ 7, 0, 1, 2, 3, 6, 10, 11, 10, 11, |
| 1630 /* 70 */ 2, 3, 9, 14, 11, 11, 22, 26, 7, 14, |
| 1631 /* 80 */ 13, 11, 5, 11, 3, |
| 1632 }; |
| 1633 #define fts5YY_SHIFT_USE_DFLT (85) |
| 1634 #define fts5YY_SHIFT_COUNT (28) |
| 1635 #define fts5YY_SHIFT_MIN (0) |
| 1636 #define fts5YY_SHIFT_MAX (81) |
| 1637 static const unsigned char fts5yy_shift_ofst[] = { |
| 1638 /* 0 */ 34, 34, 34, 34, 34, 14, 20, 3, 36, 1, |
| 1639 /* 10 */ 59, 64, 64, 65, 65, 53, 61, 56, 58, 63, |
| 1640 /* 20 */ 68, 67, 70, 67, 71, 72, 67, 77, 81, |
| 1641 }; |
| 1642 #define fts5YY_REDUCE_USE_DFLT (-17) |
| 1643 #define fts5YY_REDUCE_COUNT (14) |
| 1644 #define fts5YY_REDUCE_MIN (-16) |
| 1645 #define fts5YY_REDUCE_MAX (54) |
| 1646 static const signed char fts5yy_reduce_ofst[] = { |
| 1647 /* 0 */ -16, -8, 0, 9, 17, 28, 26, 35, 33, 13, |
| 1648 /* 10 */ 13, 22, 54, 13, 51, |
| 1649 }; |
| 1650 static const fts5YYACTIONTYPE fts5yy_default[] = { |
| 1651 /* 0 */ 97, 97, 97, 97, 97, 76, 91, 97, 97, 96, |
| 1652 /* 10 */ 96, 97, 97, 96, 96, 97, 97, 97, 97, 97, |
| 1653 /* 20 */ 73, 89, 97, 90, 97, 97, 87, 97, 72, |
| 1654 }; |
| 1655 /********** End of lemon-generated parsing tables *****************************/ |
| 1656 |
| 1657 /* The next table maps tokens (terminal symbols) into fallback tokens. |
| 1658 ** If a construct like the following: |
| 1659 ** |
| 1660 ** %fallback ID X Y Z. |
| 1661 ** |
| 1662 ** appears in the grammar, then ID becomes a fallback token for X, Y, |
| 1663 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser |
| 1664 ** but it does not parse, the type of the token is changed to ID and |
| 1665 ** the parse is retried before an error is thrown. |
| 1666 ** |
| 1667 ** This feature can be used, for example, to cause some keywords in a language |
| 1668 ** to revert to identifiers if they keyword does not apply in the context where |
| 1669 ** it appears. |
| 1670 */ |
| 1671 #ifdef fts5YYFALLBACK |
| 1672 static const fts5YYCODETYPE fts5yyFallback[] = { |
| 1673 }; |
| 1674 #endif /* fts5YYFALLBACK */ |
| 1675 |
| 1676 /* The following structure represents a single element of the |
| 1677 ** parser's stack. Information stored includes: |
| 1678 ** |
| 1679 ** + The state number for the parser at this level of the stack. |
| 1680 ** |
| 1681 ** + The value of the token stored at this level of the stack. |
| 1682 ** (In other words, the "major" token.) |
| 1683 ** |
| 1684 ** + The semantic value stored at this level of the stack. This is |
| 1685 ** the information used by the action routines in the grammar. |
| 1686 ** It is sometimes called the "minor" token. |
| 1687 ** |
| 1688 ** After the "shift" half of a SHIFTREDUCE action, the stateno field |
| 1689 ** actually contains the reduce action for the second half of the |
| 1690 ** SHIFTREDUCE. |
| 1691 */ |
| 1692 struct fts5yyStackEntry { |
| 1693 fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUC
E */ |
| 1694 fts5YYCODETYPE major; /* The major token value. This is the code |
| 1695 ** number for the token at this stack level */ |
| 1696 fts5YYMINORTYPE minor; /* The user-supplied minor token value. This |
| 1697 ** is the value of the token */ |
| 1698 }; |
| 1699 typedef struct fts5yyStackEntry fts5yyStackEntry; |
| 1700 |
| 1701 /* The state of the parser is completely contained in an instance of |
| 1702 ** the following structure */ |
| 1703 struct fts5yyParser { |
| 1704 fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack *
/ |
| 1705 #ifdef fts5YYTRACKMAXSTACKDEPTH |
| 1706 int fts5yyhwm; /* High-water mark of the stack */ |
| 1707 #endif |
| 1708 #ifndef fts5YYNOERRORRECOVERY |
| 1709 int fts5yyerrcnt; /* Shifts left before out of the error */ |
| 1710 #endif |
| 1711 sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument *
/ |
| 1712 #if fts5YYSTACKDEPTH<=0 |
| 1713 int fts5yystksz; /* Current side of the stack */ |
| 1714 fts5yyStackEntry *fts5yystack; /* The parser's stack */ |
| 1715 fts5yyStackEntry fts5yystk0; /* First stack entry */ |
| 1716 #else |
| 1717 fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */ |
| 1718 #endif |
| 1719 }; |
| 1720 typedef struct fts5yyParser fts5yyParser; |
| 1721 |
| 1722 #ifndef NDEBUG |
| 1723 /* #include <stdio.h> */ |
| 1724 static FILE *fts5yyTraceFILE = 0; |
| 1725 static char *fts5yyTracePrompt = 0; |
| 1726 #endif /* NDEBUG */ |
| 1727 |
| 1728 #ifndef NDEBUG |
| 1729 /* |
| 1730 ** Turn parser tracing on by giving a stream to which to write the trace |
| 1731 ** and a prompt to preface each trace message. Tracing is turned off |
| 1732 ** by making either argument NULL |
| 1733 ** |
| 1734 ** Inputs: |
| 1735 ** <ul> |
| 1736 ** <li> A FILE* to which trace output should be written. |
| 1737 ** If NULL, then tracing is turned off. |
| 1738 ** <li> A prefix string written at the beginning of every |
| 1739 ** line of trace output. If NULL, then tracing is |
| 1740 ** turned off. |
| 1741 ** </ul> |
| 1742 ** |
| 1743 ** Outputs: |
| 1744 ** None. |
| 1745 */ |
| 1746 static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ |
| 1747 fts5yyTraceFILE = TraceFILE; |
| 1748 fts5yyTracePrompt = zTracePrompt; |
| 1749 if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; |
| 1750 else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; |
| 1751 } |
| 1752 #endif /* NDEBUG */ |
| 1753 |
| 1754 #ifndef NDEBUG |
| 1755 /* For tracing shifts, the names of all terminals and nonterminals |
| 1756 ** are required. The following table supplies these names */ |
| 1757 static const char *const fts5yyTokenName[] = { |
| 1758 "$", "OR", "AND", "NOT", |
| 1759 "TERM", "COLON", "LP", "RP", |
| 1760 "MINUS", "LCP", "RCP", "STRING", |
| 1761 "COMMA", "PLUS", "STAR", "error", |
| 1762 "input", "expr", "cnearset", "exprlist", |
| 1763 "nearset", "colset", "colsetlist", "nearphrases", |
| 1764 "phrase", "neardist_opt", "star_opt", |
| 1765 }; |
| 1766 #endif /* NDEBUG */ |
| 1767 |
| 1768 #ifndef NDEBUG |
| 1769 /* For tracing reduce actions, the names of all rules are required. |
| 1770 */ |
| 1771 static const char *const fts5yyRuleName[] = { |
| 1772 /* 0 */ "input ::= expr", |
| 1773 /* 1 */ "expr ::= expr AND expr", |
| 1774 /* 2 */ "expr ::= expr OR expr", |
| 1775 /* 3 */ "expr ::= expr NOT expr", |
| 1776 /* 4 */ "expr ::= LP expr RP", |
| 1777 /* 5 */ "expr ::= exprlist", |
| 1778 /* 6 */ "exprlist ::= cnearset", |
| 1779 /* 7 */ "exprlist ::= exprlist cnearset", |
| 1780 /* 8 */ "cnearset ::= nearset", |
| 1781 /* 9 */ "cnearset ::= colset COLON nearset", |
| 1782 /* 10 */ "colset ::= MINUS LCP colsetlist RCP", |
| 1783 /* 11 */ "colset ::= LCP colsetlist RCP", |
| 1784 /* 12 */ "colset ::= STRING", |
| 1785 /* 13 */ "colset ::= MINUS STRING", |
| 1786 /* 14 */ "colsetlist ::= colsetlist STRING", |
| 1787 /* 15 */ "colsetlist ::= STRING", |
| 1788 /* 16 */ "nearset ::= phrase", |
| 1789 /* 17 */ "nearset ::= STRING LP nearphrases neardist_opt RP", |
| 1790 /* 18 */ "nearphrases ::= phrase", |
| 1791 /* 19 */ "nearphrases ::= nearphrases phrase", |
| 1792 /* 20 */ "neardist_opt ::=", |
| 1793 /* 21 */ "neardist_opt ::= COMMA STRING", |
| 1794 /* 22 */ "phrase ::= phrase PLUS STRING star_opt", |
| 1795 /* 23 */ "phrase ::= STRING star_opt", |
| 1796 /* 24 */ "star_opt ::= STAR", |
| 1797 /* 25 */ "star_opt ::=", |
| 1798 }; |
| 1799 #endif /* NDEBUG */ |
| 1800 |
| 1801 |
| 1802 #if fts5YYSTACKDEPTH<=0 |
| 1803 /* |
| 1804 ** Try to increase the size of the parser stack. Return the number |
| 1805 ** of errors. Return 0 on success. |
| 1806 */ |
| 1807 static int fts5yyGrowStack(fts5yyParser *p){ |
| 1808 int newSize; |
| 1809 int idx; |
| 1810 fts5yyStackEntry *pNew; |
| 1811 |
| 1812 newSize = p->fts5yystksz*2 + 100; |
| 1813 idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0; |
| 1814 if( p->fts5yystack==&p->fts5yystk0 ){ |
| 1815 pNew = malloc(newSize*sizeof(pNew[0])); |
| 1816 if( pNew ) pNew[0] = p->fts5yystk0; |
| 1817 }else{ |
| 1818 pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0])); |
| 1819 } |
| 1820 if( pNew ){ |
| 1821 p->fts5yystack = pNew; |
| 1822 p->fts5yytos = &p->fts5yystack[idx]; |
| 1823 #ifndef NDEBUG |
| 1824 if( fts5yyTraceFILE ){ |
| 1825 fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n", |
| 1826 fts5yyTracePrompt, p->fts5yystksz, newSize); |
| 1827 } |
| 1828 #endif |
| 1829 p->fts5yystksz = newSize; |
| 1830 } |
| 1831 return pNew==0; |
| 1832 } |
| 1833 #endif |
| 1834 |
| 1835 /* Datatype of the argument to the memory allocated passed as the |
| 1836 ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by |
| 1837 ** putting an appropriate #define in the %include section of the input |
| 1838 ** grammar. |
| 1839 */ |
| 1840 #ifndef fts5YYMALLOCARGTYPE |
| 1841 # define fts5YYMALLOCARGTYPE size_t |
| 1842 #endif |
| 1843 |
| 1844 /* Initialize a new parser that has already been allocated. |
| 1845 */ |
| 1846 static void sqlite3Fts5ParserInit(void *fts5yypParser){ |
| 1847 fts5yyParser *pParser = (fts5yyParser*)fts5yypParser; |
| 1848 #ifdef fts5YYTRACKMAXSTACKDEPTH |
| 1849 pParser->fts5yyhwm = 0; |
| 1850 #endif |
| 1851 #if fts5YYSTACKDEPTH<=0 |
| 1852 pParser->fts5yytos = NULL; |
| 1853 pParser->fts5yystack = NULL; |
| 1854 pParser->fts5yystksz = 0; |
| 1855 if( fts5yyGrowStack(pParser) ){ |
| 1856 pParser->fts5yystack = &pParser->fts5yystk0; |
| 1857 pParser->fts5yystksz = 1; |
| 1858 } |
| 1859 #endif |
| 1860 #ifndef fts5YYNOERRORRECOVERY |
| 1861 pParser->fts5yyerrcnt = -1; |
| 1862 #endif |
| 1863 pParser->fts5yytos = pParser->fts5yystack; |
| 1864 pParser->fts5yystack[0].stateno = 0; |
| 1865 pParser->fts5yystack[0].major = 0; |
| 1866 } |
| 1867 |
| 1868 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK |
| 1869 /* |
| 1870 ** This function allocates a new parser. |
| 1871 ** The only argument is a pointer to a function which works like |
| 1872 ** malloc. |
| 1873 ** |
| 1874 ** Inputs: |
| 1875 ** A pointer to the function used to allocate memory. |
| 1876 ** |
| 1877 ** Outputs: |
| 1878 ** A pointer to a parser. This pointer is used in subsequent calls |
| 1879 ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. |
| 1880 */ |
| 1881 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE)){ |
| 1882 fts5yyParser *pParser; |
| 1883 pParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyPars
er) ); |
| 1884 if( pParser ) sqlite3Fts5ParserInit(pParser); |
| 1885 return pParser; |
| 1886 } |
| 1887 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ |
| 1888 |
| 1889 |
| 1890 /* The following function deletes the "minor type" or semantic value |
| 1891 ** associated with a symbol. The symbol can be either a terminal |
| 1892 ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is |
| 1893 ** a pointer to the value to be deleted. The code used to do the |
| 1894 ** deletions is derived from the %destructor and/or %token_destructor |
| 1895 ** directives of the input grammar. |
| 1896 */ |
| 1897 static void fts5yy_destructor( |
| 1898 fts5yyParser *fts5yypParser, /* The parser */ |
| 1899 fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */ |
| 1900 fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ |
| 1901 ){ |
| 1902 sqlite3Fts5ParserARG_FETCH; |
| 1903 switch( fts5yymajor ){ |
| 1904 /* Here is inserted the actions which take place when a |
| 1905 ** terminal or non-terminal is destroyed. This can happen |
| 1906 ** when the symbol is popped from the stack during a |
| 1907 ** reduce or during error processing or when a parser is |
| 1908 ** being destroyed before it is finished parsing. |
| 1909 ** |
| 1910 ** Note: during a reduce, the only symbols destroyed are those |
| 1911 ** which appear on the RHS of the rule, but which are *not* used |
| 1912 ** inside the C code. |
| 1913 */ |
| 1914 /********* Begin destructor definitions ***************************************/ |
| 1915 case 16: /* input */ |
| 1916 { |
| 1917 (void)pParse; |
| 1918 } |
| 1919 break; |
| 1920 case 17: /* expr */ |
| 1921 case 18: /* cnearset */ |
| 1922 case 19: /* exprlist */ |
| 1923 { |
| 1924 sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); |
| 1925 } |
| 1926 break; |
| 1927 case 20: /* nearset */ |
| 1928 case 23: /* nearphrases */ |
| 1929 { |
| 1930 sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); |
| 1931 } |
| 1932 break; |
| 1933 case 21: /* colset */ |
| 1934 case 22: /* colsetlist */ |
| 1935 { |
| 1936 sqlite3_free((fts5yypminor->fts5yy11)); |
| 1937 } |
| 1938 break; |
| 1939 case 24: /* phrase */ |
| 1940 { |
| 1941 sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); |
| 1942 } |
| 1943 break; |
| 1944 /********* End destructor definitions *****************************************/ |
| 1945 default: break; /* If no destructor action specified: do nothing */ |
| 1946 } |
| 1947 } |
| 1948 |
| 1949 /* |
| 1950 ** Pop the parser's stack once. |
| 1951 ** |
| 1952 ** If there is a destructor routine associated with the token which |
| 1953 ** is popped from the stack, then call it. |
| 1954 */ |
| 1955 static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ |
| 1956 fts5yyStackEntry *fts5yytos; |
| 1957 assert( pParser->fts5yytos!=0 ); |
| 1958 assert( pParser->fts5yytos > pParser->fts5yystack ); |
| 1959 fts5yytos = pParser->fts5yytos--; |
| 1960 #ifndef NDEBUG |
| 1961 if( fts5yyTraceFILE ){ |
| 1962 fprintf(fts5yyTraceFILE,"%sPopping %s\n", |
| 1963 fts5yyTracePrompt, |
| 1964 fts5yyTokenName[fts5yytos->major]); |
| 1965 } |
| 1966 #endif |
| 1967 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); |
| 1968 } |
| 1969 |
| 1970 /* |
| 1971 ** Clear all secondary memory allocations from the parser |
| 1972 */ |
| 1973 static void sqlite3Fts5ParserFinalize(void *p){ |
| 1974 fts5yyParser *pParser = (fts5yyParser*)p; |
| 1975 while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pPars
er); |
| 1976 #if fts5YYSTACKDEPTH<=0 |
| 1977 if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack); |
| 1978 #endif |
| 1979 } |
| 1980 |
| 1981 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK |
| 1982 /* |
| 1983 ** Deallocate and destroy a parser. Destructors are called for |
| 1984 ** all stack elements before shutting the parser down. |
| 1985 ** |
| 1986 ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it |
| 1987 ** is defined in a %include section of the input grammar) then it is |
| 1988 ** assumed that the input pointer is never NULL. |
| 1989 */ |
| 1990 static void sqlite3Fts5ParserFree( |
| 1991 void *p, /* The parser to be deleted */ |
| 1992 void (*freeProc)(void*) /* Function used to reclaim memory */ |
| 1993 ){ |
| 1994 #ifndef fts5YYPARSEFREENEVERNULL |
| 1995 if( p==0 ) return; |
| 1996 #endif |
| 1997 sqlite3Fts5ParserFinalize(p); |
| 1998 (*freeProc)(p); |
| 1999 } |
| 2000 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ |
| 2001 |
| 2002 /* |
| 2003 ** Return the peak depth of the stack for a parser. |
| 2004 */ |
| 2005 #ifdef fts5YYTRACKMAXSTACKDEPTH |
| 2006 static int sqlite3Fts5ParserStackPeak(void *p){ |
| 2007 fts5yyParser *pParser = (fts5yyParser*)p; |
| 2008 return pParser->fts5yyhwm; |
| 2009 } |
| 2010 #endif |
| 2011 |
| 2012 /* |
| 2013 ** Find the appropriate action for a parser given the terminal |
| 2014 ** look-ahead token iLookAhead. |
| 2015 */ |
| 2016 static unsigned int fts5yy_find_shift_action( |
| 2017 fts5yyParser *pParser, /* The parser */ |
| 2018 fts5YYCODETYPE iLookAhead /* The look-ahead token */ |
| 2019 ){ |
| 2020 int i; |
| 2021 int stateno = pParser->fts5yytos->stateno; |
| 2022 |
| 2023 if( stateno>=fts5YY_MIN_REDUCE ) return stateno; |
| 2024 assert( stateno <= fts5YY_SHIFT_COUNT ); |
| 2025 do{ |
| 2026 i = fts5yy_shift_ofst[stateno]; |
| 2027 assert( iLookAhead!=fts5YYNOCODE ); |
| 2028 i += iLookAhead; |
| 2029 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){ |
| 2030 #ifdef fts5YYFALLBACK |
| 2031 fts5YYCODETYPE iFallback; /* Fallback token */ |
| 2032 if( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) |
| 2033 && (iFallback = fts5yyFallback[iLookAhead])!=0 ){ |
| 2034 #ifndef NDEBUG |
| 2035 if( fts5yyTraceFILE ){ |
| 2036 fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n", |
| 2037 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFa
llback]); |
| 2038 } |
| 2039 #endif |
| 2040 assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate
*/ |
| 2041 iLookAhead = iFallback; |
| 2042 continue; |
| 2043 } |
| 2044 #endif |
| 2045 #ifdef fts5YYWILDCARD |
| 2046 { |
| 2047 int j = i - iLookAhead + fts5YYWILDCARD; |
| 2048 if( |
| 2049 #if fts5YY_SHIFT_MIN+fts5YYWILDCARD<0 |
| 2050 j>=0 && |
| 2051 #endif |
| 2052 #if fts5YY_SHIFT_MAX+fts5YYWILDCARD>=fts5YY_ACTTAB_COUNT |
| 2053 j<fts5YY_ACTTAB_COUNT && |
| 2054 #endif |
| 2055 fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 |
| 2056 ){ |
| 2057 #ifndef NDEBUG |
| 2058 if( fts5yyTraceFILE ){ |
| 2059 fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n", |
| 2060 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], |
| 2061 fts5yyTokenName[fts5YYWILDCARD]); |
| 2062 } |
| 2063 #endif /* NDEBUG */ |
| 2064 return fts5yy_action[j]; |
| 2065 } |
| 2066 } |
| 2067 #endif /* fts5YYWILDCARD */ |
| 2068 return fts5yy_default[stateno]; |
| 2069 }else{ |
| 2070 return fts5yy_action[i]; |
| 2071 } |
| 2072 }while(1); |
| 2073 } |
| 2074 |
| 2075 /* |
| 2076 ** Find the appropriate action for a parser given the non-terminal |
| 2077 ** look-ahead token iLookAhead. |
| 2078 */ |
| 2079 static int fts5yy_find_reduce_action( |
| 2080 int stateno, /* Current state number */ |
| 2081 fts5YYCODETYPE iLookAhead /* The look-ahead token */ |
| 2082 ){ |
| 2083 int i; |
| 2084 #ifdef fts5YYERRORSYMBOL |
| 2085 if( stateno>fts5YY_REDUCE_COUNT ){ |
| 2086 return fts5yy_default[stateno]; |
| 2087 } |
| 2088 #else |
| 2089 assert( stateno<=fts5YY_REDUCE_COUNT ); |
| 2090 #endif |
| 2091 i = fts5yy_reduce_ofst[stateno]; |
| 2092 assert( i!=fts5YY_REDUCE_USE_DFLT ); |
| 2093 assert( iLookAhead!=fts5YYNOCODE ); |
| 2094 i += iLookAhead; |
| 2095 #ifdef fts5YYERRORSYMBOL |
| 2096 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){ |
| 2097 return fts5yy_default[stateno]; |
| 2098 } |
| 2099 #else |
| 2100 assert( i>=0 && i<fts5YY_ACTTAB_COUNT ); |
| 2101 assert( fts5yy_lookahead[i]==iLookAhead ); |
| 2102 #endif |
| 2103 return fts5yy_action[i]; |
| 2104 } |
| 2105 |
| 2106 /* |
| 2107 ** The following routine is called if the stack overflows. |
| 2108 */ |
| 2109 static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ |
| 2110 sqlite3Fts5ParserARG_FETCH; |
| 2111 #ifndef NDEBUG |
| 2112 if( fts5yyTraceFILE ){ |
| 2113 fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt); |
| 2114 } |
| 2115 #endif |
| 2116 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parse
r_stack(fts5yypParser); |
| 2117 /* Here code is inserted which will execute if the parser |
| 2118 ** stack every overflows */ |
| 2119 /******** Begin %stack_overflow code ******************************************/ |
| 2120 |
| 2121 sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); |
| 2122 /******** End %stack_overflow code ********************************************/ |
| 2123 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument
var */ |
| 2124 } |
| 2125 |
| 2126 /* |
| 2127 ** Print tracing information for a SHIFT action |
| 2128 */ |
| 2129 #ifndef NDEBUG |
| 2130 static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState){ |
| 2131 if( fts5yyTraceFILE ){ |
| 2132 if( fts5yyNewState<fts5YYNSTATE ){ |
| 2133 fprintf(fts5yyTraceFILE,"%sShift '%s', go to state %d\n", |
| 2134 fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yytos->major], |
| 2135 fts5yyNewState); |
| 2136 }else{ |
| 2137 fprintf(fts5yyTraceFILE,"%sShift '%s'\n", |
| 2138 fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yytos->major]); |
| 2139 } |
| 2140 } |
| 2141 } |
| 2142 #else |
| 2143 # define fts5yyTraceShift(X,Y) |
| 2144 #endif |
| 2145 |
| 2146 /* |
| 2147 ** Perform a shift action. |
| 2148 */ |
| 2149 static void fts5yy_shift( |
| 2150 fts5yyParser *fts5yypParser, /* The parser to be shifted */ |
| 2151 int fts5yyNewState, /* The new state to shift in */ |
| 2152 int fts5yyMajor, /* The major token to shift in */ |
| 2153 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift
in */ |
| 2154 ){ |
| 2155 fts5yyStackEntry *fts5yytos; |
| 2156 fts5yypParser->fts5yytos++; |
| 2157 #ifdef fts5YYTRACKMAXSTACKDEPTH |
| 2158 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser
->fts5yyhwm ){ |
| 2159 fts5yypParser->fts5yyhwm++; |
| 2160 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yyp
Parser->fts5yystack) ); |
| 2161 } |
| 2162 #endif |
| 2163 #if fts5YYSTACKDEPTH>0 |
| 2164 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5YYSTACKDEPTH] ){ |
| 2165 fts5yypParser->fts5yytos--; |
| 2166 fts5yyStackOverflow(fts5yypParser); |
| 2167 return; |
| 2168 } |
| 2169 #else |
| 2170 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5y
ystksz] ){ |
| 2171 if( fts5yyGrowStack(fts5yypParser) ){ |
| 2172 fts5yypParser->fts5yytos--; |
| 2173 fts5yyStackOverflow(fts5yypParser); |
| 2174 return; |
| 2175 } |
| 2176 } |
| 2177 #endif |
| 2178 if( fts5yyNewState > fts5YY_MAX_SHIFT ){ |
| 2179 fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE; |
| 2180 } |
| 2181 fts5yytos = fts5yypParser->fts5yytos; |
| 2182 fts5yytos->stateno = (fts5YYACTIONTYPE)fts5yyNewState; |
| 2183 fts5yytos->major = (fts5YYCODETYPE)fts5yyMajor; |
| 2184 fts5yytos->minor.fts5yy0 = fts5yyMinor; |
| 2185 fts5yyTraceShift(fts5yypParser, fts5yyNewState); |
| 2186 } |
| 2187 |
| 2188 /* The following table contains information about every rule that |
| 2189 ** is used during the reduce. |
| 2190 */ |
| 2191 static const struct { |
| 2192 fts5YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ |
| 2193 unsigned char nrhs; /* Number of right-hand side symbols in the rule */ |
| 2194 } fts5yyRuleInfo[] = { |
| 2195 { 16, 1 }, |
| 2196 { 17, 3 }, |
| 2197 { 17, 3 }, |
| 2198 { 17, 3 }, |
| 2199 { 17, 3 }, |
| 2200 { 17, 1 }, |
| 2201 { 19, 1 }, |
| 2202 { 19, 2 }, |
| 2203 { 18, 1 }, |
| 2204 { 18, 3 }, |
| 2205 { 21, 4 }, |
| 2206 { 21, 3 }, |
| 2207 { 21, 1 }, |
| 2208 { 21, 2 }, |
| 2209 { 22, 2 }, |
| 2210 { 22, 1 }, |
| 2211 { 20, 1 }, |
| 2212 { 20, 5 }, |
| 2213 { 23, 1 }, |
| 2214 { 23, 2 }, |
| 2215 { 25, 0 }, |
| 2216 { 25, 2 }, |
| 2217 { 24, 4 }, |
| 2218 { 24, 2 }, |
| 2219 { 26, 1 }, |
| 2220 { 26, 0 }, |
| 2221 }; |
| 2222 |
| 2223 static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ |
| 2224 |
| 2225 /* |
| 2226 ** Perform a reduce action and the shift that must immediately |
| 2227 ** follow the reduce. |
| 2228 */ |
| 2229 static void fts5yy_reduce( |
| 2230 fts5yyParser *fts5yypParser, /* The parser */ |
| 2231 unsigned int fts5yyruleno /* Number of the rule by which to reduce */ |
| 2232 ){ |
| 2233 int fts5yygoto; /* The next state */ |
| 2234 int fts5yyact; /* The next action */ |
| 2235 fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ |
| 2236 int fts5yysize; /* Amount to pop the stack */ |
| 2237 sqlite3Fts5ParserARG_FETCH; |
| 2238 fts5yymsp = fts5yypParser->fts5yytos; |
| 2239 #ifndef NDEBUG |
| 2240 if( fts5yyTraceFILE && fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yy
RuleName[0])) ){ |
| 2241 fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs; |
| 2242 fprintf(fts5yyTraceFILE, "%sReduce [%s], go to state %d.\n", fts5yyTraceProm
pt, |
| 2243 fts5yyRuleName[fts5yyruleno], fts5yymsp[-fts5yysize].stateno); |
| 2244 } |
| 2245 #endif /* NDEBUG */ |
| 2246 |
| 2247 /* Check that the stack is large enough to grow by a single entry |
| 2248 ** if the RHS of the rule is empty. This ensures that there is room |
| 2249 ** enough on the stack to push the LHS value */ |
| 2250 if( fts5yyRuleInfo[fts5yyruleno].nrhs==0 ){ |
| 2251 #ifdef fts5YYTRACKMAXSTACKDEPTH |
| 2252 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypPars
er->fts5yyhwm ){ |
| 2253 fts5yypParser->fts5yyhwm++; |
| 2254 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5y
ypParser->fts5yystack)); |
| 2255 } |
| 2256 #endif |
| 2257 #if fts5YYSTACKDEPTH>0 |
| 2258 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1
] ){ |
| 2259 fts5yyStackOverflow(fts5yypParser); |
| 2260 return; |
| 2261 } |
| 2262 #else |
| 2263 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts
5yystksz-1] ){ |
| 2264 if( fts5yyGrowStack(fts5yypParser) ){ |
| 2265 fts5yyStackOverflow(fts5yypParser); |
| 2266 return; |
| 2267 } |
| 2268 fts5yymsp = fts5yypParser->fts5yytos; |
| 2269 } |
| 2270 #endif |
| 2271 } |
| 2272 |
| 2273 switch( fts5yyruleno ){ |
| 2274 /* Beginning here are the reduction cases. A typical example |
| 2275 ** follows: |
| 2276 ** case 0: |
| 2277 ** #line <lineno> <grammarfile> |
| 2278 ** { ... } // User supplied code |
| 2279 ** #line <lineno> <thisfile> |
| 2280 ** break; |
| 2281 */ |
| 2282 /********** Begin reduce actions **********************************************/ |
| 2283 fts5YYMINORTYPE fts5yylhsminor; |
| 2284 case 0: /* input ::= expr */ |
| 2285 { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } |
| 2286 break; |
| 2287 case 1: /* expr ::= expr AND expr */ |
| 2288 { |
| 2289 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2]
.minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
| 2290 } |
| 2291 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2292 break; |
| 2293 case 2: /* expr ::= expr OR expr */ |
| 2294 { |
| 2295 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].
minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
| 2296 } |
| 2297 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2298 break; |
| 2299 case 3: /* expr ::= expr NOT expr */ |
| 2300 { |
| 2301 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2]
.minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
| 2302 } |
| 2303 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2304 break; |
| 2305 case 4: /* expr ::= LP expr RP */ |
| 2306 {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} |
| 2307 break; |
| 2308 case 5: /* expr ::= exprlist */ |
| 2309 case 6: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==6); |
| 2310 {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} |
| 2311 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2312 break; |
| 2313 case 7: /* exprlist ::= exprlist cnearset */ |
| 2314 { |
| 2315 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].mi
nor.fts5yy24, fts5yymsp[0].minor.fts5yy24); |
| 2316 } |
| 2317 fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2318 break; |
| 2319 case 8: /* cnearset ::= nearset */ |
| 2320 { |
| 2321 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5
yymsp[0].minor.fts5yy46); |
| 2322 } |
| 2323 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2324 break; |
| 2325 case 9: /* cnearset ::= colset COLON nearset */ |
| 2326 { |
| 2327 sqlite3Fts5ParseSetColset(pParse, fts5yymsp[0].minor.fts5yy46, fts5yymsp[-2].m
inor.fts5yy11); |
| 2328 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5
yymsp[0].minor.fts5yy46); |
| 2329 } |
| 2330 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
| 2331 break; |
| 2332 case 10: /* colset ::= MINUS LCP colsetlist RCP */ |
| 2333 { |
| 2334 fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yyms
p[-1].minor.fts5yy11); |
| 2335 } |
| 2336 break; |
| 2337 case 11: /* colset ::= LCP colsetlist RCP */ |
| 2338 { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } |
| 2339 break; |
| 2340 case 12: /* colset ::= STRING */ |
| 2341 { |
| 2342 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].mino
r.fts5yy0); |
| 2343 } |
| 2344 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
| 2345 break; |
| 2346 case 13: /* colset ::= MINUS STRING */ |
| 2347 { |
| 2348 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0]
.minor.fts5yy0); |
| 2349 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[
-1].minor.fts5yy11); |
| 2350 } |
| 2351 break; |
| 2352 case 14: /* colsetlist ::= colsetlist STRING */ |
| 2353 { |
| 2354 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.f
ts5yy11, &fts5yymsp[0].minor.fts5yy0); } |
| 2355 fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
| 2356 break; |
| 2357 case 15: /* colsetlist ::= STRING */ |
| 2358 { |
| 2359 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].mino
r.fts5yy0); |
| 2360 } |
| 2361 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
| 2362 break; |
| 2363 case 16: /* nearset ::= phrase */ |
| 2364 { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].mino
r.fts5yy53); } |
| 2365 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
| 2366 break; |
| 2367 case 17: /* nearset ::= STRING LP nearphrases neardist_opt RP */ |
| 2368 { |
| 2369 sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); |
| 2370 sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-
1].minor.fts5yy0); |
| 2371 fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; |
| 2372 } |
| 2373 fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
| 2374 break; |
| 2375 case 18: /* nearphrases ::= phrase */ |
| 2376 { |
| 2377 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].mino
r.fts5yy53); |
| 2378 } |
| 2379 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
| 2380 break; |
| 2381 case 19: /* nearphrases ::= nearphrases phrase */ |
| 2382 { |
| 2383 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.
fts5yy46, fts5yymsp[0].minor.fts5yy53); |
| 2384 } |
| 2385 fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
| 2386 break; |
| 2387 case 20: /* neardist_opt ::= */ |
| 2388 { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } |
| 2389 break; |
| 2390 case 21: /* neardist_opt ::= COMMA STRING */ |
| 2391 { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } |
| 2392 break; |
| 2393 case 22: /* phrase ::= phrase PLUS STRING star_opt */ |
| 2394 { |
| 2395 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts
5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); |
| 2396 } |
| 2397 fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; |
| 2398 break; |
| 2399 case 23: /* phrase ::= STRING star_opt */ |
| 2400 { |
| 2401 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor
.fts5yy0, fts5yymsp[0].minor.fts5yy4); |
| 2402 } |
| 2403 fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; |
| 2404 break; |
| 2405 case 24: /* star_opt ::= STAR */ |
| 2406 { fts5yymsp[0].minor.fts5yy4 = 1; } |
| 2407 break; |
| 2408 case 25: /* star_opt ::= */ |
| 2409 { fts5yymsp[1].minor.fts5yy4 = 0; } |
| 2410 break; |
| 2411 default: |
| 2412 break; |
| 2413 /********** End reduce actions ************************************************/ |
| 2414 }; |
| 2415 assert( fts5yyruleno<sizeof(fts5yyRuleInfo)/sizeof(fts5yyRuleInfo[0]) ); |
| 2416 fts5yygoto = fts5yyRuleInfo[fts5yyruleno].lhs; |
| 2417 fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs; |
| 2418 fts5yyact = fts5yy_find_reduce_action(fts5yymsp[-fts5yysize].stateno,(fts5YYCO
DETYPE)fts5yygoto); |
| 2419 if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){ |
| 2420 if( fts5yyact>fts5YY_MAX_SHIFT ){ |
| 2421 fts5yyact += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE; |
| 2422 } |
| 2423 fts5yymsp -= fts5yysize-1; |
| 2424 fts5yypParser->fts5yytos = fts5yymsp; |
| 2425 fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact; |
| 2426 fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto; |
| 2427 fts5yyTraceShift(fts5yypParser, fts5yyact); |
| 2428 }else{ |
| 2429 assert( fts5yyact == fts5YY_ACCEPT_ACTION ); |
| 2430 fts5yypParser->fts5yytos -= fts5yysize; |
| 2431 fts5yy_accept(fts5yypParser); |
| 2432 } |
| 2433 } |
| 2434 |
| 2435 /* |
| 2436 ** The following code executes when the parse fails |
| 2437 */ |
| 2438 #ifndef fts5YYNOERRORRECOVERY |
| 2439 static void fts5yy_parse_failed( |
| 2440 fts5yyParser *fts5yypParser /* The parser */ |
| 2441 ){ |
| 2442 sqlite3Fts5ParserARG_FETCH; |
| 2443 #ifndef NDEBUG |
| 2444 if( fts5yyTraceFILE ){ |
| 2445 fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt); |
| 2446 } |
| 2447 #endif |
| 2448 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser
_stack(fts5yypParser); |
| 2449 /* Here code is inserted which will be executed whenever the |
| 2450 ** parser fails */ |
| 2451 /************ Begin %parse_failure code ***************************************/ |
| 2452 /************ End %parse_failure code *****************************************/ |
| 2453 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v
ariable */ |
| 2454 } |
| 2455 #endif /* fts5YYNOERRORRECOVERY */ |
| 2456 |
| 2457 /* |
| 2458 ** The following code executes when a syntax error first occurs. |
| 2459 */ |
| 2460 static void fts5yy_syntax_error( |
| 2461 fts5yyParser *fts5yypParser, /* The parser */ |
| 2462 int fts5yymajor, /* The major type of the error token */ |
| 2463 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the er
ror token */ |
| 2464 ){ |
| 2465 sqlite3Fts5ParserARG_FETCH; |
| 2466 #define FTS5TOKEN fts5yyminor |
| 2467 /************ Begin %syntax_error code ****************************************/ |
| 2468 |
| 2469 UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */ |
| 2470 sqlite3Fts5ParseError( |
| 2471 pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p |
| 2472 ); |
| 2473 /************ End %syntax_error code ******************************************/ |
| 2474 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v
ariable */ |
| 2475 } |
| 2476 |
| 2477 /* |
| 2478 ** The following is executed when the parser accepts |
| 2479 */ |
| 2480 static void fts5yy_accept( |
| 2481 fts5yyParser *fts5yypParser /* The parser */ |
| 2482 ){ |
| 2483 sqlite3Fts5ParserARG_FETCH; |
| 2484 #ifndef NDEBUG |
| 2485 if( fts5yyTraceFILE ){ |
| 2486 fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt); |
| 2487 } |
| 2488 #endif |
| 2489 #ifndef fts5YYNOERRORRECOVERY |
| 2490 fts5yypParser->fts5yyerrcnt = -1; |
| 2491 #endif |
| 2492 assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack ); |
| 2493 /* Here code is inserted which will be executed whenever the |
| 2494 ** parser accepts */ |
| 2495 /*********** Begin %parse_accept code *****************************************/ |
| 2496 /*********** End %parse_accept code *******************************************/ |
| 2497 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v
ariable */ |
| 2498 } |
| 2499 |
| 2500 /* The main parser program. |
| 2501 ** The first argument is a pointer to a structure obtained from |
| 2502 ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. |
| 2503 ** The second argument is the major token number. The third is |
| 2504 ** the minor token. The fourth optional argument is whatever the |
| 2505 ** user wants (and specified in the grammar) and is available for |
| 2506 ** use by the action routines. |
| 2507 ** |
| 2508 ** Inputs: |
| 2509 ** <ul> |
| 2510 ** <li> A pointer to the parser (an opaque structure.) |
| 2511 ** <li> The major token number. |
| 2512 ** <li> The minor token number. |
| 2513 ** <li> An option argument of a grammar-specified type. |
| 2514 ** </ul> |
| 2515 ** |
| 2516 ** Outputs: |
| 2517 ** None. |
| 2518 */ |
| 2519 static void sqlite3Fts5Parser( |
| 2520 void *fts5yyp, /* The parser */ |
| 2521 int fts5yymajor, /* The major token code number */ |
| 2522 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */ |
| 2523 sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter
*/ |
| 2524 ){ |
| 2525 fts5YYMINORTYPE fts5yyminorunion; |
| 2526 unsigned int fts5yyact; /* The parser action. */ |
| 2527 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) |
| 2528 int fts5yyendofinput; /* True if we are at the end of input */ |
| 2529 #endif |
| 2530 #ifdef fts5YYERRORSYMBOL |
| 2531 int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ |
| 2532 #endif |
| 2533 fts5yyParser *fts5yypParser; /* The parser */ |
| 2534 |
| 2535 fts5yypParser = (fts5yyParser*)fts5yyp; |
| 2536 assert( fts5yypParser->fts5yytos!=0 ); |
| 2537 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) |
| 2538 fts5yyendofinput = (fts5yymajor==0); |
| 2539 #endif |
| 2540 sqlite3Fts5ParserARG_STORE; |
| 2541 |
| 2542 #ifndef NDEBUG |
| 2543 if( fts5yyTraceFILE ){ |
| 2544 fprintf(fts5yyTraceFILE,"%sInput '%s'\n",fts5yyTracePrompt,fts5yyTokenName[f
ts5yymajor]); |
| 2545 } |
| 2546 #endif |
| 2547 |
| 2548 do{ |
| 2549 fts5yyact = fts5yy_find_shift_action(fts5yypParser,(fts5YYCODETYPE)fts5yymaj
or); |
| 2550 if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){ |
| 2551 fts5yy_shift(fts5yypParser,fts5yyact,fts5yymajor,fts5yyminor); |
| 2552 #ifndef fts5YYNOERRORRECOVERY |
| 2553 fts5yypParser->fts5yyerrcnt--; |
| 2554 #endif |
| 2555 fts5yymajor = fts5YYNOCODE; |
| 2556 }else if( fts5yyact <= fts5YY_MAX_REDUCE ){ |
| 2557 fts5yy_reduce(fts5yypParser,fts5yyact-fts5YY_MIN_REDUCE); |
| 2558 }else{ |
| 2559 assert( fts5yyact == fts5YY_ERROR_ACTION ); |
| 2560 fts5yyminorunion.fts5yy0 = fts5yyminor; |
| 2561 #ifdef fts5YYERRORSYMBOL |
| 2562 int fts5yymx; |
| 2563 #endif |
| 2564 #ifndef NDEBUG |
| 2565 if( fts5yyTraceFILE ){ |
| 2566 fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt); |
| 2567 } |
| 2568 #endif |
| 2569 #ifdef fts5YYERRORSYMBOL |
| 2570 /* A syntax error has occurred. |
| 2571 ** The response to an error depends upon whether or not the |
| 2572 ** grammar defines an error token "ERROR". |
| 2573 ** |
| 2574 ** This is what we do if the grammar does define ERROR: |
| 2575 ** |
| 2576 ** * Call the %syntax_error function. |
| 2577 ** |
| 2578 ** * Begin popping the stack until we enter a state where |
| 2579 ** it is legal to shift the error symbol, then shift |
| 2580 ** the error symbol. |
| 2581 ** |
| 2582 ** * Set the error count to three. |
| 2583 ** |
| 2584 ** * Begin accepting and shifting new tokens. No new error |
| 2585 ** processing will occur until three tokens have been |
| 2586 ** shifted successfully. |
| 2587 ** |
| 2588 */ |
| 2589 if( fts5yypParser->fts5yyerrcnt<0 ){ |
| 2590 fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); |
| 2591 } |
| 2592 fts5yymx = fts5yypParser->fts5yytos->major; |
| 2593 if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ |
| 2594 #ifndef NDEBUG |
| 2595 if( fts5yyTraceFILE ){ |
| 2596 fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n", |
| 2597 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); |
| 2598 } |
| 2599 #endif |
| 2600 fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yymin
orunion); |
| 2601 fts5yymajor = fts5YYNOCODE; |
| 2602 }else{ |
| 2603 while( fts5yypParser->fts5yytos >= fts5yypParser->fts5yystack |
| 2604 && fts5yymx != fts5YYERRORSYMBOL |
| 2605 && (fts5yyact = fts5yy_find_reduce_action( |
| 2606 fts5yypParser->fts5yytos->stateno, |
| 2607 fts5YYERRORSYMBOL)) >= fts5YY_MIN_REDUCE |
| 2608 ){ |
| 2609 fts5yy_pop_parser_stack(fts5yypParser); |
| 2610 } |
| 2611 if( fts5yypParser->fts5yytos < fts5yypParser->fts5yystack || fts5yymajor
==0 ){ |
| 2612 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yymin
orunion); |
| 2613 fts5yy_parse_failed(fts5yypParser); |
| 2614 #ifndef fts5YYNOERRORRECOVERY |
| 2615 fts5yypParser->fts5yyerrcnt = -1; |
| 2616 #endif |
| 2617 fts5yymajor = fts5YYNOCODE; |
| 2618 }else if( fts5yymx!=fts5YYERRORSYMBOL ){ |
| 2619 fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); |
| 2620 } |
| 2621 } |
| 2622 fts5yypParser->fts5yyerrcnt = 3; |
| 2623 fts5yyerrorhit = 1; |
| 2624 #elif defined(fts5YYNOERRORRECOVERY) |
| 2625 /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to |
| 2626 ** do any kind of error recovery. Instead, simply invoke the syntax |
| 2627 ** error routine and continue going as if nothing had happened. |
| 2628 ** |
| 2629 ** Applications can set this macro (for example inside %include) if |
| 2630 ** they intend to abandon the parse upon the first syntax error seen. |
| 2631 */ |
| 2632 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); |
| 2633 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorun
ion); |
| 2634 fts5yymajor = fts5YYNOCODE; |
| 2635 |
| 2636 #else /* fts5YYERRORSYMBOL is not defined */ |
| 2637 /* This is what we do if the grammar does not define ERROR: |
| 2638 ** |
| 2639 ** * Report an error message, and throw away the input token. |
| 2640 ** |
| 2641 ** * If the input token is $, then fail the parse. |
| 2642 ** |
| 2643 ** As before, subsequent error messages are suppressed until |
| 2644 ** three input tokens have been successfully shifted. |
| 2645 */ |
| 2646 if( fts5yypParser->fts5yyerrcnt<=0 ){ |
| 2647 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); |
| 2648 } |
| 2649 fts5yypParser->fts5yyerrcnt = 3; |
| 2650 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorun
ion); |
| 2651 if( fts5yyendofinput ){ |
| 2652 fts5yy_parse_failed(fts5yypParser); |
| 2653 #ifndef fts5YYNOERRORRECOVERY |
| 2654 fts5yypParser->fts5yyerrcnt = -1; |
| 2655 #endif |
| 2656 } |
| 2657 fts5yymajor = fts5YYNOCODE; |
| 2658 #endif |
| 2659 } |
| 2660 }while( fts5yymajor!=fts5YYNOCODE && fts5yypParser->fts5yytos>fts5yypParser->f
ts5yystack ); |
| 2661 #ifndef NDEBUG |
| 2662 if( fts5yyTraceFILE ){ |
| 2663 fts5yyStackEntry *i; |
| 2664 char cDiv = '['; |
| 2665 fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); |
| 2666 for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ |
| 2667 fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]); |
| 2668 cDiv = ' '; |
| 2669 } |
| 2670 fprintf(fts5yyTraceFILE,"]\n"); |
| 2671 } |
| 2672 #endif |
| 2673 return; |
| 2674 } |
| 2675 |
| 2676 /* |
| 2677 ** 2014 May 31 |
| 2678 ** |
| 2679 ** The author disclaims copyright to this source code. In place of |
| 2680 ** a legal notice, here is a blessing: |
| 2681 ** |
| 2682 ** May you do good and not evil. |
| 2683 ** May you find forgiveness for yourself and forgive others. |
| 2684 ** May you share freely, never taking more than you give. |
| 2685 ** |
| 2686 ****************************************************************************** |
| 2687 */ |
| 2688 |
| 2689 |
| 2690 /* #include "fts5Int.h" */ |
| 2691 #include <math.h> /* amalgamator: keep */ |
| 2692 |
| 2693 /* |
| 2694 ** Object used to iterate through all "coalesced phrase instances" in |
| 2695 ** a single column of the current row. If the phrase instances in the |
| 2696 ** column being considered do not overlap, this object simply iterates |
| 2697 ** through them. Or, if they do overlap (share one or more tokens in |
| 2698 ** common), each set of overlapping instances is treated as a single |
| 2699 ** match. See documentation for the highlight() auxiliary function for |
| 2700 ** details. |
| 2701 ** |
| 2702 ** Usage is: |
| 2703 ** |
| 2704 ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); |
| 2705 ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); |
| 2706 ** rc = fts5CInstIterNext(&iter) |
| 2707 ** ){ |
| 2708 ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); |
| 2709 ** } |
| 2710 ** |
| 2711 */ |
| 2712 typedef struct CInstIter CInstIter; |
| 2713 struct CInstIter { |
| 2714 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ |
| 2715 Fts5Context *pFts; /* First arg to pass to pApi functions */ |
| 2716 int iCol; /* Column to search */ |
| 2717 int iInst; /* Next phrase instance index */ |
| 2718 int nInst; /* Total number of phrase instances */ |
| 2719 |
| 2720 /* Output variables */ |
| 2721 int iStart; /* First token in coalesced phrase instance */ |
| 2722 int iEnd; /* Last token in coalesced phrase instance */ |
| 2723 }; |
| 2724 |
| 2725 /* |
| 2726 ** Advance the iterator to the next coalesced phrase instance. Return |
| 2727 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
| 2728 */ |
| 2729 static int fts5CInstIterNext(CInstIter *pIter){ |
| 2730 int rc = SQLITE_OK; |
| 2731 pIter->iStart = -1; |
| 2732 pIter->iEnd = -1; |
| 2733 |
| 2734 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ |
| 2735 int ip; int ic; int io; |
| 2736 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); |
| 2737 if( rc==SQLITE_OK ){ |
| 2738 if( ic==pIter->iCol ){ |
| 2739 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); |
| 2740 if( pIter->iStart<0 ){ |
| 2741 pIter->iStart = io; |
| 2742 pIter->iEnd = iEnd; |
| 2743 }else if( io<=pIter->iEnd ){ |
| 2744 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; |
| 2745 }else{ |
| 2746 break; |
| 2747 } |
| 2748 } |
| 2749 pIter->iInst++; |
| 2750 } |
| 2751 } |
| 2752 |
| 2753 return rc; |
| 2754 } |
| 2755 |
| 2756 /* |
| 2757 ** Initialize the iterator object indicated by the final parameter to |
| 2758 ** iterate through coalesced phrase instances in column iCol. |
| 2759 */ |
| 2760 static int fts5CInstIterInit( |
| 2761 const Fts5ExtensionApi *pApi, |
| 2762 Fts5Context *pFts, |
| 2763 int iCol, |
| 2764 CInstIter *pIter |
| 2765 ){ |
| 2766 int rc; |
| 2767 |
| 2768 memset(pIter, 0, sizeof(CInstIter)); |
| 2769 pIter->pApi = pApi; |
| 2770 pIter->pFts = pFts; |
| 2771 pIter->iCol = iCol; |
| 2772 rc = pApi->xInstCount(pFts, &pIter->nInst); |
| 2773 |
| 2774 if( rc==SQLITE_OK ){ |
| 2775 rc = fts5CInstIterNext(pIter); |
| 2776 } |
| 2777 |
| 2778 return rc; |
| 2779 } |
| 2780 |
| 2781 |
| 2782 |
| 2783 /************************************************************************* |
| 2784 ** Start of highlight() implementation. |
| 2785 */ |
| 2786 typedef struct HighlightContext HighlightContext; |
| 2787 struct HighlightContext { |
| 2788 CInstIter iter; /* Coalesced Instance Iterator */ |
| 2789 int iPos; /* Current token offset in zIn[] */ |
| 2790 int iRangeStart; /* First token to include */ |
| 2791 int iRangeEnd; /* If non-zero, last token to include */ |
| 2792 const char *zOpen; /* Opening highlight */ |
| 2793 const char *zClose; /* Closing highlight */ |
| 2794 const char *zIn; /* Input text */ |
| 2795 int nIn; /* Size of input text in bytes */ |
| 2796 int iOff; /* Current offset within zIn[] */ |
| 2797 char *zOut; /* Output value */ |
| 2798 }; |
| 2799 |
| 2800 /* |
| 2801 ** Append text to the HighlightContext output string - p->zOut. Argument |
| 2802 ** z points to a buffer containing n bytes of text to append. If n is |
| 2803 ** negative, everything up until the first '\0' is appended to the output. |
| 2804 ** |
| 2805 ** If *pRc is set to any value other than SQLITE_OK when this function is |
| 2806 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, |
| 2807 ** *pRc is set to an error code before returning. |
| 2808 */ |
| 2809 static void fts5HighlightAppend( |
| 2810 int *pRc, |
| 2811 HighlightContext *p, |
| 2812 const char *z, int n |
| 2813 ){ |
| 2814 if( *pRc==SQLITE_OK ){ |
| 2815 if( n<0 ) n = (int)strlen(z); |
| 2816 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); |
| 2817 if( p->zOut==0 ) *pRc = SQLITE_NOMEM; |
| 2818 } |
| 2819 } |
| 2820 |
| 2821 /* |
| 2822 ** Tokenizer callback used by implementation of highlight() function. |
| 2823 */ |
| 2824 static int fts5HighlightCb( |
| 2825 void *pContext, /* Pointer to HighlightContext object */ |
| 2826 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 2827 const char *pToken, /* Buffer containing token */ |
| 2828 int nToken, /* Size of token in bytes */ |
| 2829 int iStartOff, /* Start offset of token */ |
| 2830 int iEndOff /* End offset of token */ |
| 2831 ){ |
| 2832 HighlightContext *p = (HighlightContext*)pContext; |
| 2833 int rc = SQLITE_OK; |
| 2834 int iPos; |
| 2835 |
| 2836 UNUSED_PARAM2(pToken, nToken); |
| 2837 |
| 2838 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; |
| 2839 iPos = p->iPos++; |
| 2840 |
| 2841 if( p->iRangeEnd>0 ){ |
| 2842 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; |
| 2843 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; |
| 2844 } |
| 2845 |
| 2846 if( iPos==p->iter.iStart ){ |
| 2847 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); |
| 2848 fts5HighlightAppend(&rc, p, p->zOpen, -1); |
| 2849 p->iOff = iStartOff; |
| 2850 } |
| 2851 |
| 2852 if( iPos==p->iter.iEnd ){ |
| 2853 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ |
| 2854 fts5HighlightAppend(&rc, p, p->zOpen, -1); |
| 2855 } |
| 2856 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
| 2857 fts5HighlightAppend(&rc, p, p->zClose, -1); |
| 2858 p->iOff = iEndOff; |
| 2859 if( rc==SQLITE_OK ){ |
| 2860 rc = fts5CInstIterNext(&p->iter); |
| 2861 } |
| 2862 } |
| 2863 |
| 2864 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ |
| 2865 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
| 2866 p->iOff = iEndOff; |
| 2867 if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){ |
| 2868 fts5HighlightAppend(&rc, p, p->zClose, -1); |
| 2869 } |
| 2870 } |
| 2871 |
| 2872 return rc; |
| 2873 } |
| 2874 |
| 2875 /* |
| 2876 ** Implementation of highlight() function. |
| 2877 */ |
| 2878 static void fts5HighlightFunction( |
| 2879 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 2880 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 2881 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 2882 int nVal, /* Number of values in apVal[] array */ |
| 2883 sqlite3_value **apVal /* Array of trailing arguments */ |
| 2884 ){ |
| 2885 HighlightContext ctx; |
| 2886 int rc; |
| 2887 int iCol; |
| 2888 |
| 2889 if( nVal!=3 ){ |
| 2890 const char *zErr = "wrong number of arguments to function highlight()"; |
| 2891 sqlite3_result_error(pCtx, zErr, -1); |
| 2892 return; |
| 2893 } |
| 2894 |
| 2895 iCol = sqlite3_value_int(apVal[0]); |
| 2896 memset(&ctx, 0, sizeof(HighlightContext)); |
| 2897 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
| 2898 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
| 2899 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); |
| 2900 |
| 2901 if( ctx.zIn ){ |
| 2902 if( rc==SQLITE_OK ){ |
| 2903 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); |
| 2904 } |
| 2905 |
| 2906 if( rc==SQLITE_OK ){ |
| 2907 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
| 2908 } |
| 2909 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
| 2910 |
| 2911 if( rc==SQLITE_OK ){ |
| 2912 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
| 2913 } |
| 2914 sqlite3_free(ctx.zOut); |
| 2915 } |
| 2916 if( rc!=SQLITE_OK ){ |
| 2917 sqlite3_result_error_code(pCtx, rc); |
| 2918 } |
| 2919 } |
| 2920 /* |
| 2921 ** End of highlight() implementation. |
| 2922 **************************************************************************/ |
| 2923 |
| 2924 /* |
| 2925 ** Context object passed to the fts5SentenceFinderCb() function. |
| 2926 */ |
| 2927 typedef struct Fts5SFinder Fts5SFinder; |
| 2928 struct Fts5SFinder { |
| 2929 int iPos; /* Current token position */ |
| 2930 int nFirstAlloc; /* Allocated size of aFirst[] */ |
| 2931 int nFirst; /* Number of entries in aFirst[] */ |
| 2932 int *aFirst; /* Array of first token in each sentence */ |
| 2933 const char *zDoc; /* Document being tokenized */ |
| 2934 }; |
| 2935 |
| 2936 /* |
| 2937 ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if |
| 2938 ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an |
| 2939 ** error occurs. |
| 2940 */ |
| 2941 static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ |
| 2942 if( p->nFirstAlloc==p->nFirst ){ |
| 2943 int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; |
| 2944 int *aNew; |
| 2945 |
| 2946 aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int)); |
| 2947 if( aNew==0 ) return SQLITE_NOMEM; |
| 2948 p->aFirst = aNew; |
| 2949 p->nFirstAlloc = nNew; |
| 2950 } |
| 2951 p->aFirst[p->nFirst++] = iAdd; |
| 2952 return SQLITE_OK; |
| 2953 } |
| 2954 |
| 2955 /* |
| 2956 ** This function is an xTokenize() callback used by the auxiliary snippet() |
| 2957 ** function. Its job is to identify tokens that are the first in a sentence. |
| 2958 ** For each such token, an entry is added to the SFinder.aFirst[] array. |
| 2959 */ |
| 2960 static int fts5SentenceFinderCb( |
| 2961 void *pContext, /* Pointer to HighlightContext object */ |
| 2962 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 2963 const char *pToken, /* Buffer containing token */ |
| 2964 int nToken, /* Size of token in bytes */ |
| 2965 int iStartOff, /* Start offset of token */ |
| 2966 int iEndOff /* End offset of token */ |
| 2967 ){ |
| 2968 int rc = SQLITE_OK; |
| 2969 |
| 2970 UNUSED_PARAM2(pToken, nToken); |
| 2971 UNUSED_PARAM(iEndOff); |
| 2972 |
| 2973 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ |
| 2974 Fts5SFinder *p = (Fts5SFinder*)pContext; |
| 2975 if( p->iPos>0 ){ |
| 2976 int i; |
| 2977 char c = 0; |
| 2978 for(i=iStartOff-1; i>=0; i--){ |
| 2979 c = p->zDoc[i]; |
| 2980 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; |
| 2981 } |
| 2982 if( i!=iStartOff-1 && (c=='.' || c==':') ){ |
| 2983 rc = fts5SentenceFinderAdd(p, p->iPos); |
| 2984 } |
| 2985 }else{ |
| 2986 rc = fts5SentenceFinderAdd(p, 0); |
| 2987 } |
| 2988 p->iPos++; |
| 2989 } |
| 2990 return rc; |
| 2991 } |
| 2992 |
| 2993 static int fts5SnippetScore( |
| 2994 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 2995 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 2996 int nDocsize, /* Size of column in tokens */ |
| 2997 unsigned char *aSeen, /* Array with one element per query phrase */ |
| 2998 int iCol, /* Column to score */ |
| 2999 int iPos, /* Starting offset to score */ |
| 3000 int nToken, /* Max tokens per snippet */ |
| 3001 int *pnScore, /* OUT: Score */ |
| 3002 int *piPos /* OUT: Adjusted offset */ |
| 3003 ){ |
| 3004 int rc; |
| 3005 int i; |
| 3006 int ip = 0; |
| 3007 int ic = 0; |
| 3008 int iOff = 0; |
| 3009 int iFirst = -1; |
| 3010 int nInst; |
| 3011 int nScore = 0; |
| 3012 int iLast = 0; |
| 3013 |
| 3014 rc = pApi->xInstCount(pFts, &nInst); |
| 3015 for(i=0; i<nInst && rc==SQLITE_OK; i++){ |
| 3016 rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); |
| 3017 if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){ |
| 3018 nScore += (aSeen[ip] ? 1 : 1000); |
| 3019 aSeen[ip] = 1; |
| 3020 if( iFirst<0 ) iFirst = iOff; |
| 3021 iLast = iOff + pApi->xPhraseSize(pFts, ip); |
| 3022 } |
| 3023 } |
| 3024 |
| 3025 *pnScore = nScore; |
| 3026 if( piPos ){ |
| 3027 int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; |
| 3028 if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; |
| 3029 if( iAdj<0 ) iAdj = 0; |
| 3030 *piPos = iAdj; |
| 3031 } |
| 3032 |
| 3033 return rc; |
| 3034 } |
| 3035 |
| 3036 /* |
| 3037 ** Implementation of snippet() function. |
| 3038 */ |
| 3039 static void fts5SnippetFunction( |
| 3040 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 3041 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 3042 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 3043 int nVal, /* Number of values in apVal[] array */ |
| 3044 sqlite3_value **apVal /* Array of trailing arguments */ |
| 3045 ){ |
| 3046 HighlightContext ctx; |
| 3047 int rc = SQLITE_OK; /* Return code */ |
| 3048 int iCol; /* 1st argument to snippet() */ |
| 3049 const char *zEllips; /* 4th argument to snippet() */ |
| 3050 int nToken; /* 5th argument to snippet() */ |
| 3051 int nInst = 0; /* Number of instance matches this row */ |
| 3052 int i; /* Used to iterate through instances */ |
| 3053 int nPhrase; /* Number of phrases in query */ |
| 3054 unsigned char *aSeen; /* Array of "seen instance" flags */ |
| 3055 int iBestCol; /* Column containing best snippet */ |
| 3056 int iBestStart = 0; /* First token of best snippet */ |
| 3057 int nBestScore = 0; /* Score of best snippet */ |
| 3058 int nColSize = 0; /* Total size of iBestCol in tokens */ |
| 3059 Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ |
| 3060 int nCol; |
| 3061 |
| 3062 if( nVal!=5 ){ |
| 3063 const char *zErr = "wrong number of arguments to function snippet()"; |
| 3064 sqlite3_result_error(pCtx, zErr, -1); |
| 3065 return; |
| 3066 } |
| 3067 |
| 3068 nCol = pApi->xColumnCount(pFts); |
| 3069 memset(&ctx, 0, sizeof(HighlightContext)); |
| 3070 iCol = sqlite3_value_int(apVal[0]); |
| 3071 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
| 3072 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
| 3073 zEllips = (const char*)sqlite3_value_text(apVal[3]); |
| 3074 nToken = sqlite3_value_int(apVal[4]); |
| 3075 |
| 3076 iBestCol = (iCol>=0 ? iCol : 0); |
| 3077 nPhrase = pApi->xPhraseCount(pFts); |
| 3078 aSeen = sqlite3_malloc(nPhrase); |
| 3079 if( aSeen==0 ){ |
| 3080 rc = SQLITE_NOMEM; |
| 3081 } |
| 3082 if( rc==SQLITE_OK ){ |
| 3083 rc = pApi->xInstCount(pFts, &nInst); |
| 3084 } |
| 3085 |
| 3086 memset(&sFinder, 0, sizeof(Fts5SFinder)); |
| 3087 for(i=0; i<nCol; i++){ |
| 3088 if( iCol<0 || iCol==i ){ |
| 3089 int nDoc; |
| 3090 int nDocsize; |
| 3091 int ii; |
| 3092 sFinder.iPos = 0; |
| 3093 sFinder.nFirst = 0; |
| 3094 rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); |
| 3095 if( rc!=SQLITE_OK ) break; |
| 3096 rc = pApi->xTokenize(pFts, |
| 3097 sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb |
| 3098 ); |
| 3099 if( rc!=SQLITE_OK ) break; |
| 3100 rc = pApi->xColumnSize(pFts, i, &nDocsize); |
| 3101 if( rc!=SQLITE_OK ) break; |
| 3102 |
| 3103 for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){ |
| 3104 int ip, ic, io; |
| 3105 int iAdj; |
| 3106 int nScore; |
| 3107 int jj; |
| 3108 |
| 3109 rc = pApi->xInst(pFts, ii, &ip, &ic, &io); |
| 3110 if( ic!=i || rc!=SQLITE_OK ) continue; |
| 3111 memset(aSeen, 0, nPhrase); |
| 3112 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
| 3113 io, nToken, &nScore, &iAdj |
| 3114 ); |
| 3115 if( rc==SQLITE_OK && nScore>nBestScore ){ |
| 3116 nBestScore = nScore; |
| 3117 iBestCol = i; |
| 3118 iBestStart = iAdj; |
| 3119 nColSize = nDocsize; |
| 3120 } |
| 3121 |
| 3122 if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ |
| 3123 for(jj=0; jj<(sFinder.nFirst-1); jj++){ |
| 3124 if( sFinder.aFirst[jj+1]>io ) break; |
| 3125 } |
| 3126 |
| 3127 if( sFinder.aFirst[jj]<io ){ |
| 3128 memset(aSeen, 0, nPhrase); |
| 3129 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
| 3130 sFinder.aFirst[jj], nToken, &nScore, 0 |
| 3131 ); |
| 3132 |
| 3133 nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); |
| 3134 if( rc==SQLITE_OK && nScore>nBestScore ){ |
| 3135 nBestScore = nScore; |
| 3136 iBestCol = i; |
| 3137 iBestStart = sFinder.aFirst[jj]; |
| 3138 nColSize = nDocsize; |
| 3139 } |
| 3140 } |
| 3141 } |
| 3142 } |
| 3143 } |
| 3144 } |
| 3145 |
| 3146 if( rc==SQLITE_OK ){ |
| 3147 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); |
| 3148 } |
| 3149 if( rc==SQLITE_OK && nColSize==0 ){ |
| 3150 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); |
| 3151 } |
| 3152 if( ctx.zIn ){ |
| 3153 if( rc==SQLITE_OK ){ |
| 3154 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); |
| 3155 } |
| 3156 |
| 3157 ctx.iRangeStart = iBestStart; |
| 3158 ctx.iRangeEnd = iBestStart + nToken - 1; |
| 3159 |
| 3160 if( iBestStart>0 ){ |
| 3161 fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
| 3162 } |
| 3163 |
| 3164 /* Advance iterator ctx.iter so that it points to the first coalesced |
| 3165 ** phrase instance at or following position iBestStart. */ |
| 3166 while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){ |
| 3167 rc = fts5CInstIterNext(&ctx.iter); |
| 3168 } |
| 3169 |
| 3170 if( rc==SQLITE_OK ){ |
| 3171 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
| 3172 } |
| 3173 if( ctx.iRangeEnd>=(nColSize-1) ){ |
| 3174 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
| 3175 }else{ |
| 3176 fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
| 3177 } |
| 3178 } |
| 3179 if( rc==SQLITE_OK ){ |
| 3180 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
| 3181 }else{ |
| 3182 sqlite3_result_error_code(pCtx, rc); |
| 3183 } |
| 3184 sqlite3_free(ctx.zOut); |
| 3185 sqlite3_free(aSeen); |
| 3186 sqlite3_free(sFinder.aFirst); |
| 3187 } |
| 3188 |
| 3189 /************************************************************************/ |
| 3190 |
| 3191 /* |
| 3192 ** The first time the bm25() function is called for a query, an instance |
| 3193 ** of the following structure is allocated and populated. |
| 3194 */ |
| 3195 typedef struct Fts5Bm25Data Fts5Bm25Data; |
| 3196 struct Fts5Bm25Data { |
| 3197 int nPhrase; /* Number of phrases in query */ |
| 3198 double avgdl; /* Average number of tokens in each row */ |
| 3199 double *aIDF; /* IDF for each phrase */ |
| 3200 double *aFreq; /* Array used to calculate phrase freq. */ |
| 3201 }; |
| 3202 |
| 3203 /* |
| 3204 ** Callback used by fts5Bm25GetData() to count the number of rows in the |
| 3205 ** table matched by each individual phrase within the query. |
| 3206 */ |
| 3207 static int fts5CountCb( |
| 3208 const Fts5ExtensionApi *pApi, |
| 3209 Fts5Context *pFts, |
| 3210 void *pUserData /* Pointer to sqlite3_int64 variable */ |
| 3211 ){ |
| 3212 sqlite3_int64 *pn = (sqlite3_int64*)pUserData; |
| 3213 UNUSED_PARAM2(pApi, pFts); |
| 3214 (*pn)++; |
| 3215 return SQLITE_OK; |
| 3216 } |
| 3217 |
| 3218 /* |
| 3219 ** Set *ppData to point to the Fts5Bm25Data object for the current query. |
| 3220 ** If the object has not already been allocated, allocate and populate it |
| 3221 ** now. |
| 3222 */ |
| 3223 static int fts5Bm25GetData( |
| 3224 const Fts5ExtensionApi *pApi, |
| 3225 Fts5Context *pFts, |
| 3226 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ |
| 3227 ){ |
| 3228 int rc = SQLITE_OK; /* Return code */ |
| 3229 Fts5Bm25Data *p; /* Object to return */ |
| 3230 |
| 3231 p = pApi->xGetAuxdata(pFts, 0); |
| 3232 if( p==0 ){ |
| 3233 int nPhrase; /* Number of phrases in query */ |
| 3234 sqlite3_int64 nRow = 0; /* Number of rows in table */ |
| 3235 sqlite3_int64 nToken = 0; /* Number of tokens in table */ |
| 3236 int nByte; /* Bytes of space to allocate */ |
| 3237 int i; |
| 3238 |
| 3239 /* Allocate the Fts5Bm25Data object */ |
| 3240 nPhrase = pApi->xPhraseCount(pFts); |
| 3241 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); |
| 3242 p = (Fts5Bm25Data*)sqlite3_malloc(nByte); |
| 3243 if( p==0 ){ |
| 3244 rc = SQLITE_NOMEM; |
| 3245 }else{ |
| 3246 memset(p, 0, nByte); |
| 3247 p->nPhrase = nPhrase; |
| 3248 p->aIDF = (double*)&p[1]; |
| 3249 p->aFreq = &p->aIDF[nPhrase]; |
| 3250 } |
| 3251 |
| 3252 /* Calculate the average document length for this FTS5 table */ |
| 3253 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); |
| 3254 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); |
| 3255 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; |
| 3256 |
| 3257 /* Calculate an IDF for each phrase in the query */ |
| 3258 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ |
| 3259 sqlite3_int64 nHit = 0; |
| 3260 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); |
| 3261 if( rc==SQLITE_OK ){ |
| 3262 /* Calculate the IDF (Inverse Document Frequency) for phrase i. |
| 3263 ** This is done using the standard BM25 formula as found on wikipedia: |
| 3264 ** |
| 3265 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) |
| 3266 ** |
| 3267 ** where "N" is the total number of documents in the set and nHit |
| 3268 ** is the number that contain at least one instance of the phrase |
| 3269 ** under consideration. |
| 3270 ** |
| 3271 ** The problem with this is that if (N < 2*nHit), the IDF is |
| 3272 ** negative. Which is undesirable. So the mimimum allowable IDF is |
| 3273 ** (1e-6) - roughly the same as a term that appears in just over |
| 3274 ** half of set of 5,000,000 documents. */ |
| 3275 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); |
| 3276 if( idf<=0.0 ) idf = 1e-6; |
| 3277 p->aIDF[i] = idf; |
| 3278 } |
| 3279 } |
| 3280 |
| 3281 if( rc!=SQLITE_OK ){ |
| 3282 sqlite3_free(p); |
| 3283 }else{ |
| 3284 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); |
| 3285 } |
| 3286 if( rc!=SQLITE_OK ) p = 0; |
| 3287 } |
| 3288 *ppData = p; |
| 3289 return rc; |
| 3290 } |
| 3291 |
| 3292 /* |
| 3293 ** Implementation of bm25() function. |
| 3294 */ |
| 3295 static void fts5Bm25Function( |
| 3296 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 3297 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 3298 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 3299 int nVal, /* Number of values in apVal[] array */ |
| 3300 sqlite3_value **apVal /* Array of trailing arguments */ |
| 3301 ){ |
| 3302 const double k1 = 1.2; /* Constant "k1" from BM25 formula */ |
| 3303 const double b = 0.75; /* Constant "b" from BM25 formula */ |
| 3304 int rc = SQLITE_OK; /* Error code */ |
| 3305 double score = 0.0; /* SQL function return value */ |
| 3306 Fts5Bm25Data *pData; /* Values allocated/calculated once only */ |
| 3307 int i; /* Iterator variable */ |
| 3308 int nInst = 0; /* Value returned by xInstCount() */ |
| 3309 double D = 0.0; /* Total number of tokens in row */ |
| 3310 double *aFreq = 0; /* Array of phrase freq. for current row */ |
| 3311 |
| 3312 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) |
| 3313 ** for each phrase in the query for the current row. */ |
| 3314 rc = fts5Bm25GetData(pApi, pFts, &pData); |
| 3315 if( rc==SQLITE_OK ){ |
| 3316 aFreq = pData->aFreq; |
| 3317 memset(aFreq, 0, sizeof(double) * pData->nPhrase); |
| 3318 rc = pApi->xInstCount(pFts, &nInst); |
| 3319 } |
| 3320 for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
| 3321 int ip; int ic; int io; |
| 3322 rc = pApi->xInst(pFts, i, &ip, &ic, &io); |
| 3323 if( rc==SQLITE_OK ){ |
| 3324 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; |
| 3325 aFreq[ip] += w; |
| 3326 } |
| 3327 } |
| 3328 |
| 3329 /* Figure out the total size of the current row in tokens. */ |
| 3330 if( rc==SQLITE_OK ){ |
| 3331 int nTok; |
| 3332 rc = pApi->xColumnSize(pFts, -1, &nTok); |
| 3333 D = (double)nTok; |
| 3334 } |
| 3335 |
| 3336 /* Determine the BM25 score for the current row. */ |
| 3337 for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){ |
| 3338 score += pData->aIDF[i] * ( |
| 3339 ( aFreq[i] * (k1 + 1.0) ) / |
| 3340 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) |
| 3341 ); |
| 3342 } |
| 3343 |
| 3344 /* If no error has occurred, return the calculated score. Otherwise, |
| 3345 ** throw an SQL exception. */ |
| 3346 if( rc==SQLITE_OK ){ |
| 3347 sqlite3_result_double(pCtx, -1.0 * score); |
| 3348 }else{ |
| 3349 sqlite3_result_error_code(pCtx, rc); |
| 3350 } |
| 3351 } |
| 3352 |
| 3353 static int sqlite3Fts5AuxInit(fts5_api *pApi){ |
| 3354 struct Builtin { |
| 3355 const char *zFunc; /* Function name (nul-terminated) */ |
| 3356 void *pUserData; /* User-data pointer */ |
| 3357 fts5_extension_function xFunc;/* Callback function */ |
| 3358 void (*xDestroy)(void*); /* Destructor function */ |
| 3359 } aBuiltin [] = { |
| 3360 { "snippet", 0, fts5SnippetFunction, 0 }, |
| 3361 { "highlight", 0, fts5HighlightFunction, 0 }, |
| 3362 { "bm25", 0, fts5Bm25Function, 0 }, |
| 3363 }; |
| 3364 int rc = SQLITE_OK; /* Return code */ |
| 3365 int i; /* To iterate through builtin functions */ |
| 3366 |
| 3367 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ |
| 3368 rc = pApi->xCreateFunction(pApi, |
| 3369 aBuiltin[i].zFunc, |
| 3370 aBuiltin[i].pUserData, |
| 3371 aBuiltin[i].xFunc, |
| 3372 aBuiltin[i].xDestroy |
| 3373 ); |
| 3374 } |
| 3375 |
| 3376 return rc; |
| 3377 } |
| 3378 |
| 3379 |
| 3380 |
| 3381 /* |
| 3382 ** 2014 May 31 |
| 3383 ** |
| 3384 ** The author disclaims copyright to this source code. In place of |
| 3385 ** a legal notice, here is a blessing: |
| 3386 ** |
| 3387 ** May you do good and not evil. |
| 3388 ** May you find forgiveness for yourself and forgive others. |
| 3389 ** May you share freely, never taking more than you give. |
| 3390 ** |
| 3391 ****************************************************************************** |
| 3392 */ |
| 3393 |
| 3394 |
| 3395 |
| 3396 /* #include "fts5Int.h" */ |
| 3397 |
| 3398 static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ |
| 3399 if( (u32)pBuf->nSpace<nByte ){ |
| 3400 u32 nNew = pBuf->nSpace ? pBuf->nSpace : 64; |
| 3401 u8 *pNew; |
| 3402 while( nNew<nByte ){ |
| 3403 nNew = nNew * 2; |
| 3404 } |
| 3405 pNew = sqlite3_realloc(pBuf->p, nNew); |
| 3406 if( pNew==0 ){ |
| 3407 *pRc = SQLITE_NOMEM; |
| 3408 return 1; |
| 3409 }else{ |
| 3410 pBuf->nSpace = nNew; |
| 3411 pBuf->p = pNew; |
| 3412 } |
| 3413 } |
| 3414 return 0; |
| 3415 } |
| 3416 |
| 3417 |
| 3418 /* |
| 3419 ** Encode value iVal as an SQLite varint and append it to the buffer object |
| 3420 ** pBuf. If an OOM error occurs, set the error code in p. |
| 3421 */ |
| 3422 static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ |
| 3423 if( fts5BufferGrow(pRc, pBuf, 9) ) return; |
| 3424 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); |
| 3425 } |
| 3426 |
| 3427 static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ |
| 3428 aBuf[0] = (iVal>>24) & 0x00FF; |
| 3429 aBuf[1] = (iVal>>16) & 0x00FF; |
| 3430 aBuf[2] = (iVal>> 8) & 0x00FF; |
| 3431 aBuf[3] = (iVal>> 0) & 0x00FF; |
| 3432 } |
| 3433 |
| 3434 static int sqlite3Fts5Get32(const u8 *aBuf){ |
| 3435 return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3]; |
| 3436 } |
| 3437 |
| 3438 /* |
| 3439 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set |
| 3440 ** the error code in p. If an error has already occurred when this function |
| 3441 ** is called, it is a no-op. |
| 3442 */ |
| 3443 static void sqlite3Fts5BufferAppendBlob( |
| 3444 int *pRc, |
| 3445 Fts5Buffer *pBuf, |
| 3446 u32 nData, |
| 3447 const u8 *pData |
| 3448 ){ |
| 3449 assert_nc( *pRc || nData>=0 ); |
| 3450 if( fts5BufferGrow(pRc, pBuf, nData) ) return; |
| 3451 memcpy(&pBuf->p[pBuf->n], pData, nData); |
| 3452 pBuf->n += nData; |
| 3453 } |
| 3454 |
| 3455 /* |
| 3456 ** Append the nul-terminated string zStr to the buffer pBuf. This function |
| 3457 ** ensures that the byte following the buffer data is set to 0x00, even |
| 3458 ** though this byte is not included in the pBuf->n count. |
| 3459 */ |
| 3460 static void sqlite3Fts5BufferAppendString( |
| 3461 int *pRc, |
| 3462 Fts5Buffer *pBuf, |
| 3463 const char *zStr |
| 3464 ){ |
| 3465 int nStr = (int)strlen(zStr); |
| 3466 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); |
| 3467 pBuf->n--; |
| 3468 } |
| 3469 |
| 3470 /* |
| 3471 ** Argument zFmt is a printf() style format string. This function performs |
| 3472 ** the printf() style processing, then appends the results to buffer pBuf. |
| 3473 ** |
| 3474 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte |
| 3475 ** following the buffer data is set to 0x00, even though this byte is not |
| 3476 ** included in the pBuf->n count. |
| 3477 */ |
| 3478 static void sqlite3Fts5BufferAppendPrintf( |
| 3479 int *pRc, |
| 3480 Fts5Buffer *pBuf, |
| 3481 char *zFmt, ... |
| 3482 ){ |
| 3483 if( *pRc==SQLITE_OK ){ |
| 3484 char *zTmp; |
| 3485 va_list ap; |
| 3486 va_start(ap, zFmt); |
| 3487 zTmp = sqlite3_vmprintf(zFmt, ap); |
| 3488 va_end(ap); |
| 3489 |
| 3490 if( zTmp==0 ){ |
| 3491 *pRc = SQLITE_NOMEM; |
| 3492 }else{ |
| 3493 sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); |
| 3494 sqlite3_free(zTmp); |
| 3495 } |
| 3496 } |
| 3497 } |
| 3498 |
| 3499 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ |
| 3500 char *zRet = 0; |
| 3501 if( *pRc==SQLITE_OK ){ |
| 3502 va_list ap; |
| 3503 va_start(ap, zFmt); |
| 3504 zRet = sqlite3_vmprintf(zFmt, ap); |
| 3505 va_end(ap); |
| 3506 if( zRet==0 ){ |
| 3507 *pRc = SQLITE_NOMEM; |
| 3508 } |
| 3509 } |
| 3510 return zRet; |
| 3511 } |
| 3512 |
| 3513 |
| 3514 /* |
| 3515 ** Free any buffer allocated by pBuf. Zero the structure before returning. |
| 3516 */ |
| 3517 static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ |
| 3518 sqlite3_free(pBuf->p); |
| 3519 memset(pBuf, 0, sizeof(Fts5Buffer)); |
| 3520 } |
| 3521 |
| 3522 /* |
| 3523 ** Zero the contents of the buffer object. But do not free the associated |
| 3524 ** memory allocation. |
| 3525 */ |
| 3526 static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ |
| 3527 pBuf->n = 0; |
| 3528 } |
| 3529 |
| 3530 /* |
| 3531 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an |
| 3532 ** the error code in p. If an error has already occurred when this function |
| 3533 ** is called, it is a no-op. |
| 3534 */ |
| 3535 static void sqlite3Fts5BufferSet( |
| 3536 int *pRc, |
| 3537 Fts5Buffer *pBuf, |
| 3538 int nData, |
| 3539 const u8 *pData |
| 3540 ){ |
| 3541 pBuf->n = 0; |
| 3542 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); |
| 3543 } |
| 3544 |
| 3545 static int sqlite3Fts5PoslistNext64( |
| 3546 const u8 *a, int n, /* Buffer containing poslist */ |
| 3547 int *pi, /* IN/OUT: Offset within a[] */ |
| 3548 i64 *piOff /* IN/OUT: Current offset */ |
| 3549 ){ |
| 3550 int i = *pi; |
| 3551 if( i>=n ){ |
| 3552 /* EOF */ |
| 3553 *piOff = -1; |
| 3554 return 1; |
| 3555 }else{ |
| 3556 i64 iOff = *piOff; |
| 3557 int iVal; |
| 3558 fts5FastGetVarint32(a, i, iVal); |
| 3559 if( iVal==1 ){ |
| 3560 fts5FastGetVarint32(a, i, iVal); |
| 3561 iOff = ((i64)iVal) << 32; |
| 3562 fts5FastGetVarint32(a, i, iVal); |
| 3563 } |
| 3564 *piOff = iOff + (iVal-2); |
| 3565 *pi = i; |
| 3566 return 0; |
| 3567 } |
| 3568 } |
| 3569 |
| 3570 |
| 3571 /* |
| 3572 ** Advance the iterator object passed as the only argument. Return true |
| 3573 ** if the iterator reaches EOF, or false otherwise. |
| 3574 */ |
| 3575 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ |
| 3576 if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ |
| 3577 pIter->bEof = 1; |
| 3578 } |
| 3579 return pIter->bEof; |
| 3580 } |
| 3581 |
| 3582 static int sqlite3Fts5PoslistReaderInit( |
| 3583 const u8 *a, int n, /* Poslist buffer to iterate through */ |
| 3584 Fts5PoslistReader *pIter /* Iterator object to initialize */ |
| 3585 ){ |
| 3586 memset(pIter, 0, sizeof(*pIter)); |
| 3587 pIter->a = a; |
| 3588 pIter->n = n; |
| 3589 sqlite3Fts5PoslistReaderNext(pIter); |
| 3590 return pIter->bEof; |
| 3591 } |
| 3592 |
| 3593 /* |
| 3594 ** Append position iPos to the position list being accumulated in buffer |
| 3595 ** pBuf, which must be already be large enough to hold the new data. |
| 3596 ** The previous position written to this list is *piPrev. *piPrev is set |
| 3597 ** to iPos before returning. |
| 3598 */ |
| 3599 static void sqlite3Fts5PoslistSafeAppend( |
| 3600 Fts5Buffer *pBuf, |
| 3601 i64 *piPrev, |
| 3602 i64 iPos |
| 3603 ){ |
| 3604 static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; |
| 3605 if( (iPos & colmask) != (*piPrev & colmask) ){ |
| 3606 pBuf->p[pBuf->n++] = 1; |
| 3607 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); |
| 3608 *piPrev = (iPos & colmask); |
| 3609 } |
| 3610 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); |
| 3611 *piPrev = iPos; |
| 3612 } |
| 3613 |
| 3614 static int sqlite3Fts5PoslistWriterAppend( |
| 3615 Fts5Buffer *pBuf, |
| 3616 Fts5PoslistWriter *pWriter, |
| 3617 i64 iPos |
| 3618 ){ |
| 3619 int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ |
| 3620 if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc; |
| 3621 sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); |
| 3622 return SQLITE_OK; |
| 3623 } |
| 3624 |
| 3625 static void *sqlite3Fts5MallocZero(int *pRc, int nByte){ |
| 3626 void *pRet = 0; |
| 3627 if( *pRc==SQLITE_OK ){ |
| 3628 pRet = sqlite3_malloc(nByte); |
| 3629 if( pRet==0 && nByte>0 ){ |
| 3630 *pRc = SQLITE_NOMEM; |
| 3631 }else{ |
| 3632 memset(pRet, 0, nByte); |
| 3633 } |
| 3634 } |
| 3635 return pRet; |
| 3636 } |
| 3637 |
| 3638 /* |
| 3639 ** Return a nul-terminated copy of the string indicated by pIn. If nIn |
| 3640 ** is non-negative, then it is the length of the string in bytes. Otherwise, |
| 3641 ** the length of the string is determined using strlen(). |
| 3642 ** |
| 3643 ** It is the responsibility of the caller to eventually free the returned |
| 3644 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. |
| 3645 */ |
| 3646 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ |
| 3647 char *zRet = 0; |
| 3648 if( *pRc==SQLITE_OK ){ |
| 3649 if( nIn<0 ){ |
| 3650 nIn = (int)strlen(pIn); |
| 3651 } |
| 3652 zRet = (char*)sqlite3_malloc(nIn+1); |
| 3653 if( zRet ){ |
| 3654 memcpy(zRet, pIn, nIn); |
| 3655 zRet[nIn] = '\0'; |
| 3656 }else{ |
| 3657 *pRc = SQLITE_NOMEM; |
| 3658 } |
| 3659 } |
| 3660 return zRet; |
| 3661 } |
| 3662 |
| 3663 |
| 3664 /* |
| 3665 ** Return true if character 't' may be part of an FTS5 bareword, or false |
| 3666 ** otherwise. Characters that may be part of barewords: |
| 3667 ** |
| 3668 ** * All non-ASCII characters, |
| 3669 ** * The 52 upper and lower case ASCII characters, and |
| 3670 ** * The 10 integer ASCII characters. |
| 3671 ** * The underscore character "_" (0x5F). |
| 3672 ** * The unicode "subsitute" character (0x1A). |
| 3673 */ |
| 3674 static int sqlite3Fts5IsBareword(char t){ |
| 3675 u8 aBareword[128] = { |
| 3676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ |
| 3677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ |
| 3678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ |
| 3679 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ |
| 3680 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ |
| 3681 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ |
| 3682 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ |
| 3683 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ |
| 3684 }; |
| 3685 |
| 3686 return (t & 0x80) || aBareword[(int)t]; |
| 3687 } |
| 3688 |
| 3689 |
| 3690 /************************************************************************* |
| 3691 */ |
| 3692 typedef struct Fts5TermsetEntry Fts5TermsetEntry; |
| 3693 struct Fts5TermsetEntry { |
| 3694 char *pTerm; |
| 3695 int nTerm; |
| 3696 int iIdx; /* Index (main or aPrefix[] entry) */ |
| 3697 Fts5TermsetEntry *pNext; |
| 3698 }; |
| 3699 |
| 3700 struct Fts5Termset { |
| 3701 Fts5TermsetEntry *apHash[512]; |
| 3702 }; |
| 3703 |
| 3704 static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ |
| 3705 int rc = SQLITE_OK; |
| 3706 *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); |
| 3707 return rc; |
| 3708 } |
| 3709 |
| 3710 static int sqlite3Fts5TermsetAdd( |
| 3711 Fts5Termset *p, |
| 3712 int iIdx, |
| 3713 const char *pTerm, int nTerm, |
| 3714 int *pbPresent |
| 3715 ){ |
| 3716 int rc = SQLITE_OK; |
| 3717 *pbPresent = 0; |
| 3718 if( p ){ |
| 3719 int i; |
| 3720 u32 hash = 13; |
| 3721 Fts5TermsetEntry *pEntry; |
| 3722 |
| 3723 /* Calculate a hash value for this term. This is the same hash checksum |
| 3724 ** used by the fts5_hash.c module. This is not important for correct |
| 3725 ** operation of the module, but is necessary to ensure that some tests |
| 3726 ** designed to produce hash table collisions really do work. */ |
| 3727 for(i=nTerm-1; i>=0; i--){ |
| 3728 hash = (hash << 3) ^ hash ^ pTerm[i]; |
| 3729 } |
| 3730 hash = (hash << 3) ^ hash ^ iIdx; |
| 3731 hash = hash % ArraySize(p->apHash); |
| 3732 |
| 3733 for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ |
| 3734 if( pEntry->iIdx==iIdx |
| 3735 && pEntry->nTerm==nTerm |
| 3736 && memcmp(pEntry->pTerm, pTerm, nTerm)==0 |
| 3737 ){ |
| 3738 *pbPresent = 1; |
| 3739 break; |
| 3740 } |
| 3741 } |
| 3742 |
| 3743 if( pEntry==0 ){ |
| 3744 pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); |
| 3745 if( pEntry ){ |
| 3746 pEntry->pTerm = (char*)&pEntry[1]; |
| 3747 pEntry->nTerm = nTerm; |
| 3748 pEntry->iIdx = iIdx; |
| 3749 memcpy(pEntry->pTerm, pTerm, nTerm); |
| 3750 pEntry->pNext = p->apHash[hash]; |
| 3751 p->apHash[hash] = pEntry; |
| 3752 } |
| 3753 } |
| 3754 } |
| 3755 |
| 3756 return rc; |
| 3757 } |
| 3758 |
| 3759 static void sqlite3Fts5TermsetFree(Fts5Termset *p){ |
| 3760 if( p ){ |
| 3761 u32 i; |
| 3762 for(i=0; i<ArraySize(p->apHash); i++){ |
| 3763 Fts5TermsetEntry *pEntry = p->apHash[i]; |
| 3764 while( pEntry ){ |
| 3765 Fts5TermsetEntry *pDel = pEntry; |
| 3766 pEntry = pEntry->pNext; |
| 3767 sqlite3_free(pDel); |
| 3768 } |
| 3769 } |
| 3770 sqlite3_free(p); |
| 3771 } |
| 3772 } |
| 3773 |
| 3774 /* |
| 3775 ** 2014 Jun 09 |
| 3776 ** |
| 3777 ** The author disclaims copyright to this source code. In place of |
| 3778 ** a legal notice, here is a blessing: |
| 3779 ** |
| 3780 ** May you do good and not evil. |
| 3781 ** May you find forgiveness for yourself and forgive others. |
| 3782 ** May you share freely, never taking more than you give. |
| 3783 ** |
| 3784 ****************************************************************************** |
| 3785 ** |
| 3786 ** This is an SQLite module implementing full-text search. |
| 3787 */ |
| 3788 |
| 3789 |
| 3790 /* #include "fts5Int.h" */ |
| 3791 |
| 3792 #define FTS5_DEFAULT_PAGE_SIZE 4050 |
| 3793 #define FTS5_DEFAULT_AUTOMERGE 4 |
| 3794 #define FTS5_DEFAULT_USERMERGE 4 |
| 3795 #define FTS5_DEFAULT_CRISISMERGE 16 |
| 3796 #define FTS5_DEFAULT_HASHSIZE (1024*1024) |
| 3797 |
| 3798 /* Maximum allowed page size */ |
| 3799 #define FTS5_MAX_PAGE_SIZE (128*1024) |
| 3800 |
| 3801 static int fts5_iswhitespace(char x){ |
| 3802 return (x==' '); |
| 3803 } |
| 3804 |
| 3805 static int fts5_isopenquote(char x){ |
| 3806 return (x=='"' || x=='\'' || x=='[' || x=='`'); |
| 3807 } |
| 3808 |
| 3809 /* |
| 3810 ** Argument pIn points to a character that is part of a nul-terminated |
| 3811 ** string. Return a pointer to the first character following *pIn in |
| 3812 ** the string that is not a white-space character. |
| 3813 */ |
| 3814 static const char *fts5ConfigSkipWhitespace(const char *pIn){ |
| 3815 const char *p = pIn; |
| 3816 if( p ){ |
| 3817 while( fts5_iswhitespace(*p) ){ p++; } |
| 3818 } |
| 3819 return p; |
| 3820 } |
| 3821 |
| 3822 /* |
| 3823 ** Argument pIn points to a character that is part of a nul-terminated |
| 3824 ** string. Return a pointer to the first character following *pIn in |
| 3825 ** the string that is not a "bareword" character. |
| 3826 */ |
| 3827 static const char *fts5ConfigSkipBareword(const char *pIn){ |
| 3828 const char *p = pIn; |
| 3829 while ( sqlite3Fts5IsBareword(*p) ) p++; |
| 3830 if( p==pIn ) p = 0; |
| 3831 return p; |
| 3832 } |
| 3833 |
| 3834 static int fts5_isdigit(char a){ |
| 3835 return (a>='0' && a<='9'); |
| 3836 } |
| 3837 |
| 3838 |
| 3839 |
| 3840 static const char *fts5ConfigSkipLiteral(const char *pIn){ |
| 3841 const char *p = pIn; |
| 3842 switch( *p ){ |
| 3843 case 'n': case 'N': |
| 3844 if( sqlite3_strnicmp("null", p, 4)==0 ){ |
| 3845 p = &p[4]; |
| 3846 }else{ |
| 3847 p = 0; |
| 3848 } |
| 3849 break; |
| 3850 |
| 3851 case 'x': case 'X': |
| 3852 p++; |
| 3853 if( *p=='\'' ){ |
| 3854 p++; |
| 3855 while( (*p>='a' && *p<='f') |
| 3856 || (*p>='A' && *p<='F') |
| 3857 || (*p>='0' && *p<='9') |
| 3858 ){ |
| 3859 p++; |
| 3860 } |
| 3861 if( *p=='\'' && 0==((p-pIn)%2) ){ |
| 3862 p++; |
| 3863 }else{ |
| 3864 p = 0; |
| 3865 } |
| 3866 }else{ |
| 3867 p = 0; |
| 3868 } |
| 3869 break; |
| 3870 |
| 3871 case '\'': |
| 3872 p++; |
| 3873 while( p ){ |
| 3874 if( *p=='\'' ){ |
| 3875 p++; |
| 3876 if( *p!='\'' ) break; |
| 3877 } |
| 3878 p++; |
| 3879 if( *p==0 ) p = 0; |
| 3880 } |
| 3881 break; |
| 3882 |
| 3883 default: |
| 3884 /* maybe a number */ |
| 3885 if( *p=='+' || *p=='-' ) p++; |
| 3886 while( fts5_isdigit(*p) ) p++; |
| 3887 |
| 3888 /* At this point, if the literal was an integer, the parse is |
| 3889 ** finished. Or, if it is a floating point value, it may continue |
| 3890 ** with either a decimal point or an 'E' character. */ |
| 3891 if( *p=='.' && fts5_isdigit(p[1]) ){ |
| 3892 p += 2; |
| 3893 while( fts5_isdigit(*p) ) p++; |
| 3894 } |
| 3895 if( p==pIn ) p = 0; |
| 3896 |
| 3897 break; |
| 3898 } |
| 3899 |
| 3900 return p; |
| 3901 } |
| 3902 |
| 3903 /* |
| 3904 ** The first character of the string pointed to by argument z is guaranteed |
| 3905 ** to be an open-quote character (see function fts5_isopenquote()). |
| 3906 ** |
| 3907 ** This function searches for the corresponding close-quote character within |
| 3908 ** the string and, if found, dequotes the string in place and adds a new |
| 3909 ** nul-terminator byte. |
| 3910 ** |
| 3911 ** If the close-quote is found, the value returned is the byte offset of |
| 3912 ** the character immediately following it. Or, if the close-quote is not |
| 3913 ** found, -1 is returned. If -1 is returned, the buffer is left in an |
| 3914 ** undefined state. |
| 3915 */ |
| 3916 static int fts5Dequote(char *z){ |
| 3917 char q; |
| 3918 int iIn = 1; |
| 3919 int iOut = 0; |
| 3920 q = z[0]; |
| 3921 |
| 3922 /* Set stack variable q to the close-quote character */ |
| 3923 assert( q=='[' || q=='\'' || q=='"' || q=='`' ); |
| 3924 if( q=='[' ) q = ']'; |
| 3925 |
| 3926 while( ALWAYS(z[iIn]) ){ |
| 3927 if( z[iIn]==q ){ |
| 3928 if( z[iIn+1]!=q ){ |
| 3929 /* Character iIn was the close quote. */ |
| 3930 iIn++; |
| 3931 break; |
| 3932 }else{ |
| 3933 /* Character iIn and iIn+1 form an escaped quote character. Skip |
| 3934 ** the input cursor past both and copy a single quote character |
| 3935 ** to the output buffer. */ |
| 3936 iIn += 2; |
| 3937 z[iOut++] = q; |
| 3938 } |
| 3939 }else{ |
| 3940 z[iOut++] = z[iIn++]; |
| 3941 } |
| 3942 } |
| 3943 |
| 3944 z[iOut] = '\0'; |
| 3945 return iIn; |
| 3946 } |
| 3947 |
| 3948 /* |
| 3949 ** Convert an SQL-style quoted string into a normal string by removing |
| 3950 ** the quote characters. The conversion is done in-place. If the |
| 3951 ** input does not begin with a quote character, then this routine |
| 3952 ** is a no-op. |
| 3953 ** |
| 3954 ** Examples: |
| 3955 ** |
| 3956 ** "abc" becomes abc |
| 3957 ** 'xyz' becomes xyz |
| 3958 ** [pqr] becomes pqr |
| 3959 ** `mno` becomes mno |
| 3960 */ |
| 3961 static void sqlite3Fts5Dequote(char *z){ |
| 3962 char quote; /* Quote character (if any ) */ |
| 3963 |
| 3964 assert( 0==fts5_iswhitespace(z[0]) ); |
| 3965 quote = z[0]; |
| 3966 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ |
| 3967 fts5Dequote(z); |
| 3968 } |
| 3969 } |
| 3970 |
| 3971 |
| 3972 struct Fts5Enum { |
| 3973 const char *zName; |
| 3974 int eVal; |
| 3975 }; |
| 3976 typedef struct Fts5Enum Fts5Enum; |
| 3977 |
| 3978 static int fts5ConfigSetEnum( |
| 3979 const Fts5Enum *aEnum, |
| 3980 const char *zEnum, |
| 3981 int *peVal |
| 3982 ){ |
| 3983 int nEnum = (int)strlen(zEnum); |
| 3984 int i; |
| 3985 int iVal = -1; |
| 3986 |
| 3987 for(i=0; aEnum[i].zName; i++){ |
| 3988 if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ |
| 3989 if( iVal>=0 ) return SQLITE_ERROR; |
| 3990 iVal = aEnum[i].eVal; |
| 3991 } |
| 3992 } |
| 3993 |
| 3994 *peVal = iVal; |
| 3995 return iVal<0 ? SQLITE_ERROR : SQLITE_OK; |
| 3996 } |
| 3997 |
| 3998 /* |
| 3999 ** Parse a "special" CREATE VIRTUAL TABLE directive and update |
| 4000 ** configuration object pConfig as appropriate. |
| 4001 ** |
| 4002 ** If successful, object pConfig is updated and SQLITE_OK returned. If |
| 4003 ** an error occurs, an SQLite error code is returned and an error message |
| 4004 ** may be left in *pzErr. It is the responsibility of the caller to |
| 4005 ** eventually free any such error message using sqlite3_free(). |
| 4006 */ |
| 4007 static int fts5ConfigParseSpecial( |
| 4008 Fts5Global *pGlobal, |
| 4009 Fts5Config *pConfig, /* Configuration object to update */ |
| 4010 const char *zCmd, /* Special command to parse */ |
| 4011 const char *zArg, /* Argument to parse */ |
| 4012 char **pzErr /* OUT: Error message */ |
| 4013 ){ |
| 4014 int rc = SQLITE_OK; |
| 4015 int nCmd = (int)strlen(zCmd); |
| 4016 if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ |
| 4017 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; |
| 4018 const char *p; |
| 4019 int bFirst = 1; |
| 4020 if( pConfig->aPrefix==0 ){ |
| 4021 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); |
| 4022 if( rc ) return rc; |
| 4023 } |
| 4024 |
| 4025 p = zArg; |
| 4026 while( 1 ){ |
| 4027 int nPre = 0; |
| 4028 |
| 4029 while( p[0]==' ' ) p++; |
| 4030 if( bFirst==0 && p[0]==',' ){ |
| 4031 p++; |
| 4032 while( p[0]==' ' ) p++; |
| 4033 }else if( p[0]=='\0' ){ |
| 4034 break; |
| 4035 } |
| 4036 if( p[0]<'0' || p[0]>'9' ){ |
| 4037 *pzErr = sqlite3_mprintf("malformed prefix=... directive"); |
| 4038 rc = SQLITE_ERROR; |
| 4039 break; |
| 4040 } |
| 4041 |
| 4042 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){ |
| 4043 *pzErr = sqlite3_mprintf( |
| 4044 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES |
| 4045 ); |
| 4046 rc = SQLITE_ERROR; |
| 4047 break; |
| 4048 } |
| 4049 |
| 4050 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ |
| 4051 nPre = nPre*10 + (p[0] - '0'); |
| 4052 p++; |
| 4053 } |
| 4054 |
| 4055 if( nPre<=0 || nPre>=1000 ){ |
| 4056 *pzErr = sqlite3_mprintf("prefix length out of range (max 999)"); |
| 4057 rc = SQLITE_ERROR; |
| 4058 break; |
| 4059 } |
| 4060 |
| 4061 pConfig->aPrefix[pConfig->nPrefix] = nPre; |
| 4062 pConfig->nPrefix++; |
| 4063 bFirst = 0; |
| 4064 } |
| 4065 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES ); |
| 4066 return rc; |
| 4067 } |
| 4068 |
| 4069 if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ |
| 4070 const char *p = (const char*)zArg; |
| 4071 int nArg = (int)strlen(zArg) + 1; |
| 4072 char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); |
| 4073 char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); |
| 4074 char *pSpace = pDel; |
| 4075 |
| 4076 if( azArg && pSpace ){ |
| 4077 if( pConfig->pTok ){ |
| 4078 *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); |
| 4079 rc = SQLITE_ERROR; |
| 4080 }else{ |
| 4081 for(nArg=0; p && *p; nArg++){ |
| 4082 const char *p2 = fts5ConfigSkipWhitespace(p); |
| 4083 if( *p2=='\'' ){ |
| 4084 p = fts5ConfigSkipLiteral(p2); |
| 4085 }else{ |
| 4086 p = fts5ConfigSkipBareword(p2); |
| 4087 } |
| 4088 if( p ){ |
| 4089 memcpy(pSpace, p2, p-p2); |
| 4090 azArg[nArg] = pSpace; |
| 4091 sqlite3Fts5Dequote(pSpace); |
| 4092 pSpace += (p - p2) + 1; |
| 4093 p = fts5ConfigSkipWhitespace(p); |
| 4094 } |
| 4095 } |
| 4096 if( p==0 ){ |
| 4097 *pzErr = sqlite3_mprintf("parse error in tokenize directive"); |
| 4098 rc = SQLITE_ERROR; |
| 4099 }else{ |
| 4100 rc = sqlite3Fts5GetTokenizer(pGlobal, |
| 4101 (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi, |
| 4102 pzErr |
| 4103 ); |
| 4104 } |
| 4105 } |
| 4106 } |
| 4107 |
| 4108 sqlite3_free(azArg); |
| 4109 sqlite3_free(pDel); |
| 4110 return rc; |
| 4111 } |
| 4112 |
| 4113 if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ |
| 4114 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ |
| 4115 *pzErr = sqlite3_mprintf("multiple content=... directives"); |
| 4116 rc = SQLITE_ERROR; |
| 4117 }else{ |
| 4118 if( zArg[0] ){ |
| 4119 pConfig->eContent = FTS5_CONTENT_EXTERNAL; |
| 4120 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); |
| 4121 }else{ |
| 4122 pConfig->eContent = FTS5_CONTENT_NONE; |
| 4123 } |
| 4124 } |
| 4125 return rc; |
| 4126 } |
| 4127 |
| 4128 if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ |
| 4129 if( pConfig->zContentRowid ){ |
| 4130 *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); |
| 4131 rc = SQLITE_ERROR; |
| 4132 }else{ |
| 4133 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); |
| 4134 } |
| 4135 return rc; |
| 4136 } |
| 4137 |
| 4138 if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){ |
| 4139 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ |
| 4140 *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); |
| 4141 rc = SQLITE_ERROR; |
| 4142 }else{ |
| 4143 pConfig->bColumnsize = (zArg[0]=='1'); |
| 4144 } |
| 4145 return rc; |
| 4146 } |
| 4147 |
| 4148 if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){ |
| 4149 const Fts5Enum aDetail[] = { |
| 4150 { "none", FTS5_DETAIL_NONE }, |
| 4151 { "full", FTS5_DETAIL_FULL }, |
| 4152 { "columns", FTS5_DETAIL_COLUMNS }, |
| 4153 { 0, 0 } |
| 4154 }; |
| 4155 |
| 4156 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ |
| 4157 *pzErr = sqlite3_mprintf("malformed detail=... directive"); |
| 4158 } |
| 4159 return rc; |
| 4160 } |
| 4161 |
| 4162 *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); |
| 4163 return SQLITE_ERROR; |
| 4164 } |
| 4165 |
| 4166 /* |
| 4167 ** Allocate an instance of the default tokenizer ("simple") at |
| 4168 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error |
| 4169 ** code if an error occurs. |
| 4170 */ |
| 4171 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ |
| 4172 assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); |
| 4173 return sqlite3Fts5GetTokenizer( |
| 4174 pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0 |
| 4175 ); |
| 4176 } |
| 4177 |
| 4178 /* |
| 4179 ** Gobble up the first bareword or quoted word from the input buffer zIn. |
| 4180 ** Return a pointer to the character immediately following the last in |
| 4181 ** the gobbled word if successful, or a NULL pointer otherwise (failed |
| 4182 ** to find close-quote character). |
| 4183 ** |
| 4184 ** Before returning, set pzOut to point to a new buffer containing a |
| 4185 ** nul-terminated, dequoted copy of the gobbled word. If the word was |
| 4186 ** quoted, *pbQuoted is also set to 1 before returning. |
| 4187 ** |
| 4188 ** If *pRc is other than SQLITE_OK when this function is called, it is |
| 4189 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this |
| 4190 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* |
| 4191 ** set if a parse error (failed to find close quote) occurs. |
| 4192 */ |
| 4193 static const char *fts5ConfigGobbleWord( |
| 4194 int *pRc, /* IN/OUT: Error code */ |
| 4195 const char *zIn, /* Buffer to gobble string/bareword from */ |
| 4196 char **pzOut, /* OUT: malloc'd buffer containing str/bw */ |
| 4197 int *pbQuoted /* OUT: Set to true if dequoting required */ |
| 4198 ){ |
| 4199 const char *zRet = 0; |
| 4200 |
| 4201 int nIn = (int)strlen(zIn); |
| 4202 char *zOut = sqlite3_malloc(nIn+1); |
| 4203 |
| 4204 assert( *pRc==SQLITE_OK ); |
| 4205 *pbQuoted = 0; |
| 4206 *pzOut = 0; |
| 4207 |
| 4208 if( zOut==0 ){ |
| 4209 *pRc = SQLITE_NOMEM; |
| 4210 }else{ |
| 4211 memcpy(zOut, zIn, nIn+1); |
| 4212 if( fts5_isopenquote(zOut[0]) ){ |
| 4213 int ii = fts5Dequote(zOut); |
| 4214 zRet = &zIn[ii]; |
| 4215 *pbQuoted = 1; |
| 4216 }else{ |
| 4217 zRet = fts5ConfigSkipBareword(zIn); |
| 4218 if( zRet ){ |
| 4219 zOut[zRet-zIn] = '\0'; |
| 4220 } |
| 4221 } |
| 4222 } |
| 4223 |
| 4224 if( zRet==0 ){ |
| 4225 sqlite3_free(zOut); |
| 4226 }else{ |
| 4227 *pzOut = zOut; |
| 4228 } |
| 4229 |
| 4230 return zRet; |
| 4231 } |
| 4232 |
| 4233 static int fts5ConfigParseColumn( |
| 4234 Fts5Config *p, |
| 4235 char *zCol, |
| 4236 char *zArg, |
| 4237 char **pzErr |
| 4238 ){ |
| 4239 int rc = SQLITE_OK; |
| 4240 if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) |
| 4241 || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) |
| 4242 ){ |
| 4243 *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); |
| 4244 rc = SQLITE_ERROR; |
| 4245 }else if( zArg ){ |
| 4246 if( 0==sqlite3_stricmp(zArg, "unindexed") ){ |
| 4247 p->abUnindexed[p->nCol] = 1; |
| 4248 }else{ |
| 4249 *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); |
| 4250 rc = SQLITE_ERROR; |
| 4251 } |
| 4252 } |
| 4253 |
| 4254 p->azCol[p->nCol++] = zCol; |
| 4255 return rc; |
| 4256 } |
| 4257 |
| 4258 /* |
| 4259 ** Populate the Fts5Config.zContentExprlist string. |
| 4260 */ |
| 4261 static int fts5ConfigMakeExprlist(Fts5Config *p){ |
| 4262 int i; |
| 4263 int rc = SQLITE_OK; |
| 4264 Fts5Buffer buf = {0, 0, 0}; |
| 4265 |
| 4266 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); |
| 4267 if( p->eContent!=FTS5_CONTENT_NONE ){ |
| 4268 for(i=0; i<p->nCol; i++){ |
| 4269 if( p->eContent==FTS5_CONTENT_EXTERNAL ){ |
| 4270 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); |
| 4271 }else{ |
| 4272 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); |
| 4273 } |
| 4274 } |
| 4275 } |
| 4276 |
| 4277 assert( p->zContentExprlist==0 ); |
| 4278 p->zContentExprlist = (char*)buf.p; |
| 4279 return rc; |
| 4280 } |
| 4281 |
| 4282 /* |
| 4283 ** Arguments nArg/azArg contain the string arguments passed to the xCreate |
| 4284 ** or xConnect method of the virtual table. This function attempts to |
| 4285 ** allocate an instance of Fts5Config containing the results of parsing |
| 4286 ** those arguments. |
| 4287 ** |
| 4288 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the |
| 4289 ** new Fts5Config object. If an error occurs, an SQLite error code is |
| 4290 ** returned, *ppOut is set to NULL and an error message may be left in |
| 4291 ** *pzErr. It is the responsibility of the caller to eventually free any |
| 4292 ** such error message using sqlite3_free(). |
| 4293 */ |
| 4294 static int sqlite3Fts5ConfigParse( |
| 4295 Fts5Global *pGlobal, |
| 4296 sqlite3 *db, |
| 4297 int nArg, /* Number of arguments */ |
| 4298 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ |
| 4299 Fts5Config **ppOut, /* OUT: Results of parse */ |
| 4300 char **pzErr /* OUT: Error message */ |
| 4301 ){ |
| 4302 int rc = SQLITE_OK; /* Return code */ |
| 4303 Fts5Config *pRet; /* New object to return */ |
| 4304 int i; |
| 4305 int nByte; |
| 4306 |
| 4307 *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); |
| 4308 if( pRet==0 ) return SQLITE_NOMEM; |
| 4309 memset(pRet, 0, sizeof(Fts5Config)); |
| 4310 pRet->db = db; |
| 4311 pRet->iCookie = -1; |
| 4312 |
| 4313 nByte = nArg * (sizeof(char*) + sizeof(u8)); |
| 4314 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); |
| 4315 pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; |
| 4316 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); |
| 4317 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); |
| 4318 pRet->bColumnsize = 1; |
| 4319 pRet->eDetail = FTS5_DETAIL_FULL; |
| 4320 #ifdef SQLITE_DEBUG |
| 4321 pRet->bPrefixIndex = 1; |
| 4322 #endif |
| 4323 if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ |
| 4324 *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); |
| 4325 rc = SQLITE_ERROR; |
| 4326 } |
| 4327 |
| 4328 for(i=3; rc==SQLITE_OK && i<nArg; i++){ |
| 4329 const char *zOrig = azArg[i]; |
| 4330 const char *z; |
| 4331 char *zOne = 0; |
| 4332 char *zTwo = 0; |
| 4333 int bOption = 0; |
| 4334 int bMustBeCol = 0; |
| 4335 |
| 4336 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); |
| 4337 z = fts5ConfigSkipWhitespace(z); |
| 4338 if( z && *z=='=' ){ |
| 4339 bOption = 1; |
| 4340 z++; |
| 4341 if( bMustBeCol ) z = 0; |
| 4342 } |
| 4343 z = fts5ConfigSkipWhitespace(z); |
| 4344 if( z && z[0] ){ |
| 4345 int bDummy; |
| 4346 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); |
| 4347 if( z && z[0] ) z = 0; |
| 4348 } |
| 4349 |
| 4350 if( rc==SQLITE_OK ){ |
| 4351 if( z==0 ){ |
| 4352 *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); |
| 4353 rc = SQLITE_ERROR; |
| 4354 }else{ |
| 4355 if( bOption ){ |
| 4356 rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr); |
| 4357 }else{ |
| 4358 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); |
| 4359 zOne = 0; |
| 4360 } |
| 4361 } |
| 4362 } |
| 4363 |
| 4364 sqlite3_free(zOne); |
| 4365 sqlite3_free(zTwo); |
| 4366 } |
| 4367 |
| 4368 /* If a tokenizer= option was successfully parsed, the tokenizer has |
| 4369 ** already been allocated. Otherwise, allocate an instance of the default |
| 4370 ** tokenizer (unicode61) now. */ |
| 4371 if( rc==SQLITE_OK && pRet->pTok==0 ){ |
| 4372 rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); |
| 4373 } |
| 4374 |
| 4375 /* If no zContent option was specified, fill in the default values. */ |
| 4376 if( rc==SQLITE_OK && pRet->zContent==0 ){ |
| 4377 const char *zTail = 0; |
| 4378 assert( pRet->eContent==FTS5_CONTENT_NORMAL |
| 4379 || pRet->eContent==FTS5_CONTENT_NONE |
| 4380 ); |
| 4381 if( pRet->eContent==FTS5_CONTENT_NORMAL ){ |
| 4382 zTail = "content"; |
| 4383 }else if( pRet->bColumnsize ){ |
| 4384 zTail = "docsize"; |
| 4385 } |
| 4386 |
| 4387 if( zTail ){ |
| 4388 pRet->zContent = sqlite3Fts5Mprintf( |
| 4389 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail |
| 4390 ); |
| 4391 } |
| 4392 } |
| 4393 |
| 4394 if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ |
| 4395 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); |
| 4396 } |
| 4397 |
| 4398 /* Formulate the zContentExprlist text */ |
| 4399 if( rc==SQLITE_OK ){ |
| 4400 rc = fts5ConfigMakeExprlist(pRet); |
| 4401 } |
| 4402 |
| 4403 if( rc!=SQLITE_OK ){ |
| 4404 sqlite3Fts5ConfigFree(pRet); |
| 4405 *ppOut = 0; |
| 4406 } |
| 4407 return rc; |
| 4408 } |
| 4409 |
| 4410 /* |
| 4411 ** Free the configuration object passed as the only argument. |
| 4412 */ |
| 4413 static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ |
| 4414 if( pConfig ){ |
| 4415 int i; |
| 4416 if( pConfig->pTok ){ |
| 4417 pConfig->pTokApi->xDelete(pConfig->pTok); |
| 4418 } |
| 4419 sqlite3_free(pConfig->zDb); |
| 4420 sqlite3_free(pConfig->zName); |
| 4421 for(i=0; i<pConfig->nCol; i++){ |
| 4422 sqlite3_free(pConfig->azCol[i]); |
| 4423 } |
| 4424 sqlite3_free(pConfig->azCol); |
| 4425 sqlite3_free(pConfig->aPrefix); |
| 4426 sqlite3_free(pConfig->zRank); |
| 4427 sqlite3_free(pConfig->zRankArgs); |
| 4428 sqlite3_free(pConfig->zContent); |
| 4429 sqlite3_free(pConfig->zContentRowid); |
| 4430 sqlite3_free(pConfig->zContentExprlist); |
| 4431 sqlite3_free(pConfig); |
| 4432 } |
| 4433 } |
| 4434 |
| 4435 /* |
| 4436 ** Call sqlite3_declare_vtab() based on the contents of the configuration |
| 4437 ** object passed as the only argument. Return SQLITE_OK if successful, or |
| 4438 ** an SQLite error code if an error occurs. |
| 4439 */ |
| 4440 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ |
| 4441 int i; |
| 4442 int rc = SQLITE_OK; |
| 4443 char *zSql; |
| 4444 |
| 4445 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); |
| 4446 for(i=0; zSql && i<pConfig->nCol; i++){ |
| 4447 const char *zSep = (i==0?"":", "); |
| 4448 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); |
| 4449 } |
| 4450 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", |
| 4451 zSql, pConfig->zName, FTS5_RANK_NAME |
| 4452 ); |
| 4453 |
| 4454 assert( zSql || rc==SQLITE_NOMEM ); |
| 4455 if( zSql ){ |
| 4456 rc = sqlite3_declare_vtab(pConfig->db, zSql); |
| 4457 sqlite3_free(zSql); |
| 4458 } |
| 4459 |
| 4460 return rc; |
| 4461 } |
| 4462 |
| 4463 /* |
| 4464 ** Tokenize the text passed via the second and third arguments. |
| 4465 ** |
| 4466 ** The callback is invoked once for each token in the input text. The |
| 4467 ** arguments passed to it are, in order: |
| 4468 ** |
| 4469 ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() |
| 4470 ** const char *pToken // Pointer to buffer containing token |
| 4471 ** int nToken // Size of token in bytes |
| 4472 ** int iStart // Byte offset of start of token within input text |
| 4473 ** int iEnd // Byte offset of end of token within input text |
| 4474 ** int iPos // Position of token in input (first token is 0) |
| 4475 ** |
| 4476 ** If the callback returns a non-zero value the tokenization is abandoned |
| 4477 ** and no further callbacks are issued. |
| 4478 ** |
| 4479 ** This function returns SQLITE_OK if successful or an SQLite error code |
| 4480 ** if an error occurs. If the tokenization was abandoned early because |
| 4481 ** the callback returned SQLITE_DONE, this is not an error and this function |
| 4482 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early |
| 4483 ** because the callback returned another non-zero value, it is assumed |
| 4484 ** to be an SQLite error code and returned to the caller. |
| 4485 */ |
| 4486 static int sqlite3Fts5Tokenize( |
| 4487 Fts5Config *pConfig, /* FTS5 Configuration object */ |
| 4488 int flags, /* FTS5_TOKENIZE_* flags */ |
| 4489 const char *pText, int nText, /* Text to tokenize */ |
| 4490 void *pCtx, /* Context passed to xToken() */ |
| 4491 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
| 4492 ){ |
| 4493 if( pText==0 ) return SQLITE_OK; |
| 4494 return pConfig->pTokApi->xTokenize( |
| 4495 pConfig->pTok, pCtx, flags, pText, nText, xToken |
| 4496 ); |
| 4497 } |
| 4498 |
| 4499 /* |
| 4500 ** Argument pIn points to the first character in what is expected to be |
| 4501 ** a comma-separated list of SQL literals followed by a ')' character. |
| 4502 ** If it actually is this, return a pointer to the ')'. Otherwise, return |
| 4503 ** NULL to indicate a parse error. |
| 4504 */ |
| 4505 static const char *fts5ConfigSkipArgs(const char *pIn){ |
| 4506 const char *p = pIn; |
| 4507 |
| 4508 while( 1 ){ |
| 4509 p = fts5ConfigSkipWhitespace(p); |
| 4510 p = fts5ConfigSkipLiteral(p); |
| 4511 p = fts5ConfigSkipWhitespace(p); |
| 4512 if( p==0 || *p==')' ) break; |
| 4513 if( *p!=',' ){ |
| 4514 p = 0; |
| 4515 break; |
| 4516 } |
| 4517 p++; |
| 4518 } |
| 4519 |
| 4520 return p; |
| 4521 } |
| 4522 |
| 4523 /* |
| 4524 ** Parameter zIn contains a rank() function specification. The format of |
| 4525 ** this is: |
| 4526 ** |
| 4527 ** + Bareword (function name) |
| 4528 ** + Open parenthesis - "(" |
| 4529 ** + Zero or more SQL literals in a comma separated list |
| 4530 ** + Close parenthesis - ")" |
| 4531 */ |
| 4532 static int sqlite3Fts5ConfigParseRank( |
| 4533 const char *zIn, /* Input string */ |
| 4534 char **pzRank, /* OUT: Rank function name */ |
| 4535 char **pzRankArgs /* OUT: Rank function arguments */ |
| 4536 ){ |
| 4537 const char *p = zIn; |
| 4538 const char *pRank; |
| 4539 char *zRank = 0; |
| 4540 char *zRankArgs = 0; |
| 4541 int rc = SQLITE_OK; |
| 4542 |
| 4543 *pzRank = 0; |
| 4544 *pzRankArgs = 0; |
| 4545 |
| 4546 if( p==0 ){ |
| 4547 rc = SQLITE_ERROR; |
| 4548 }else{ |
| 4549 p = fts5ConfigSkipWhitespace(p); |
| 4550 pRank = p; |
| 4551 p = fts5ConfigSkipBareword(p); |
| 4552 |
| 4553 if( p ){ |
| 4554 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); |
| 4555 if( zRank ) memcpy(zRank, pRank, p-pRank); |
| 4556 }else{ |
| 4557 rc = SQLITE_ERROR; |
| 4558 } |
| 4559 |
| 4560 if( rc==SQLITE_OK ){ |
| 4561 p = fts5ConfigSkipWhitespace(p); |
| 4562 if( *p!='(' ) rc = SQLITE_ERROR; |
| 4563 p++; |
| 4564 } |
| 4565 if( rc==SQLITE_OK ){ |
| 4566 const char *pArgs; |
| 4567 p = fts5ConfigSkipWhitespace(p); |
| 4568 pArgs = p; |
| 4569 if( *p!=')' ){ |
| 4570 p = fts5ConfigSkipArgs(p); |
| 4571 if( p==0 ){ |
| 4572 rc = SQLITE_ERROR; |
| 4573 }else{ |
| 4574 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); |
| 4575 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); |
| 4576 } |
| 4577 } |
| 4578 } |
| 4579 } |
| 4580 |
| 4581 if( rc!=SQLITE_OK ){ |
| 4582 sqlite3_free(zRank); |
| 4583 assert( zRankArgs==0 ); |
| 4584 }else{ |
| 4585 *pzRank = zRank; |
| 4586 *pzRankArgs = zRankArgs; |
| 4587 } |
| 4588 return rc; |
| 4589 } |
| 4590 |
| 4591 static int sqlite3Fts5ConfigSetValue( |
| 4592 Fts5Config *pConfig, |
| 4593 const char *zKey, |
| 4594 sqlite3_value *pVal, |
| 4595 int *pbBadkey |
| 4596 ){ |
| 4597 int rc = SQLITE_OK; |
| 4598 |
| 4599 if( 0==sqlite3_stricmp(zKey, "pgsz") ){ |
| 4600 int pgsz = 0; |
| 4601 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 4602 pgsz = sqlite3_value_int(pVal); |
| 4603 } |
| 4604 if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){ |
| 4605 *pbBadkey = 1; |
| 4606 }else{ |
| 4607 pConfig->pgsz = pgsz; |
| 4608 } |
| 4609 } |
| 4610 |
| 4611 else if( 0==sqlite3_stricmp(zKey, "hashsize") ){ |
| 4612 int nHashSize = -1; |
| 4613 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 4614 nHashSize = sqlite3_value_int(pVal); |
| 4615 } |
| 4616 if( nHashSize<=0 ){ |
| 4617 *pbBadkey = 1; |
| 4618 }else{ |
| 4619 pConfig->nHashSize = nHashSize; |
| 4620 } |
| 4621 } |
| 4622 |
| 4623 else if( 0==sqlite3_stricmp(zKey, "automerge") ){ |
| 4624 int nAutomerge = -1; |
| 4625 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 4626 nAutomerge = sqlite3_value_int(pVal); |
| 4627 } |
| 4628 if( nAutomerge<0 || nAutomerge>64 ){ |
| 4629 *pbBadkey = 1; |
| 4630 }else{ |
| 4631 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
| 4632 pConfig->nAutomerge = nAutomerge; |
| 4633 } |
| 4634 } |
| 4635 |
| 4636 else if( 0==sqlite3_stricmp(zKey, "usermerge") ){ |
| 4637 int nUsermerge = -1; |
| 4638 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 4639 nUsermerge = sqlite3_value_int(pVal); |
| 4640 } |
| 4641 if( nUsermerge<2 || nUsermerge>16 ){ |
| 4642 *pbBadkey = 1; |
| 4643 }else{ |
| 4644 pConfig->nUsermerge = nUsermerge; |
| 4645 } |
| 4646 } |
| 4647 |
| 4648 else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ |
| 4649 int nCrisisMerge = -1; |
| 4650 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
| 4651 nCrisisMerge = sqlite3_value_int(pVal); |
| 4652 } |
| 4653 if( nCrisisMerge<0 ){ |
| 4654 *pbBadkey = 1; |
| 4655 }else{ |
| 4656 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
| 4657 pConfig->nCrisisMerge = nCrisisMerge; |
| 4658 } |
| 4659 } |
| 4660 |
| 4661 else if( 0==sqlite3_stricmp(zKey, "rank") ){ |
| 4662 const char *zIn = (const char*)sqlite3_value_text(pVal); |
| 4663 char *zRank; |
| 4664 char *zRankArgs; |
| 4665 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); |
| 4666 if( rc==SQLITE_OK ){ |
| 4667 sqlite3_free(pConfig->zRank); |
| 4668 sqlite3_free(pConfig->zRankArgs); |
| 4669 pConfig->zRank = zRank; |
| 4670 pConfig->zRankArgs = zRankArgs; |
| 4671 }else if( rc==SQLITE_ERROR ){ |
| 4672 rc = SQLITE_OK; |
| 4673 *pbBadkey = 1; |
| 4674 } |
| 4675 }else{ |
| 4676 *pbBadkey = 1; |
| 4677 } |
| 4678 return rc; |
| 4679 } |
| 4680 |
| 4681 /* |
| 4682 ** Load the contents of the %_config table into memory. |
| 4683 */ |
| 4684 static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ |
| 4685 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; |
| 4686 char *zSql; |
| 4687 sqlite3_stmt *p = 0; |
| 4688 int rc = SQLITE_OK; |
| 4689 int iVersion = 0; |
| 4690 |
| 4691 /* Set default values */ |
| 4692 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; |
| 4693 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
| 4694 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE; |
| 4695 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
| 4696 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE; |
| 4697 |
| 4698 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); |
| 4699 if( zSql ){ |
| 4700 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); |
| 4701 sqlite3_free(zSql); |
| 4702 } |
| 4703 |
| 4704 assert( rc==SQLITE_OK || p==0 ); |
| 4705 if( rc==SQLITE_OK ){ |
| 4706 while( SQLITE_ROW==sqlite3_step(p) ){ |
| 4707 const char *zK = (const char*)sqlite3_column_text(p, 0); |
| 4708 sqlite3_value *pVal = sqlite3_column_value(p, 1); |
| 4709 if( 0==sqlite3_stricmp(zK, "version") ){ |
| 4710 iVersion = sqlite3_value_int(pVal); |
| 4711 }else{ |
| 4712 int bDummy = 0; |
| 4713 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); |
| 4714 } |
| 4715 } |
| 4716 rc = sqlite3_finalize(p); |
| 4717 } |
| 4718 |
| 4719 if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ |
| 4720 rc = SQLITE_ERROR; |
| 4721 if( pConfig->pzErrmsg ){ |
| 4722 assert( 0==*pConfig->pzErrmsg ); |
| 4723 *pConfig->pzErrmsg = sqlite3_mprintf( |
| 4724 "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", |
| 4725 iVersion, FTS5_CURRENT_VERSION |
| 4726 ); |
| 4727 } |
| 4728 } |
| 4729 |
| 4730 if( rc==SQLITE_OK ){ |
| 4731 pConfig->iCookie = iCookie; |
| 4732 } |
| 4733 return rc; |
| 4734 } |
| 4735 |
| 4736 /* |
| 4737 ** 2014 May 31 |
| 4738 ** |
| 4739 ** The author disclaims copyright to this source code. In place of |
| 4740 ** a legal notice, here is a blessing: |
| 4741 ** |
| 4742 ** May you do good and not evil. |
| 4743 ** May you find forgiveness for yourself and forgive others. |
| 4744 ** May you share freely, never taking more than you give. |
| 4745 ** |
| 4746 ****************************************************************************** |
| 4747 ** |
| 4748 */ |
| 4749 |
| 4750 |
| 4751 |
| 4752 /* #include "fts5Int.h" */ |
| 4753 /* #include "fts5parse.h" */ |
| 4754 |
| 4755 /* |
| 4756 ** All token types in the generated fts5parse.h file are greater than 0. |
| 4757 */ |
| 4758 #define FTS5_EOF 0 |
| 4759 |
| 4760 #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) |
| 4761 |
| 4762 typedef struct Fts5ExprTerm Fts5ExprTerm; |
| 4763 |
| 4764 /* |
| 4765 ** Functions generated by lemon from fts5parse.y. |
| 4766 */ |
| 4767 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); |
| 4768 static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); |
| 4769 static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); |
| 4770 #ifndef NDEBUG |
| 4771 /* #include <stdio.h> */ |
| 4772 static void sqlite3Fts5ParserTrace(FILE*, char*); |
| 4773 #endif |
| 4774 |
| 4775 |
| 4776 struct Fts5Expr { |
| 4777 Fts5Index *pIndex; |
| 4778 Fts5Config *pConfig; |
| 4779 Fts5ExprNode *pRoot; |
| 4780 int bDesc; /* Iterate in descending rowid order */ |
| 4781 int nPhrase; /* Number of phrases in expression */ |
| 4782 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ |
| 4783 }; |
| 4784 |
| 4785 /* |
| 4786 ** eType: |
| 4787 ** Expression node type. Always one of: |
| 4788 ** |
| 4789 ** FTS5_AND (nChild, apChild valid) |
| 4790 ** FTS5_OR (nChild, apChild valid) |
| 4791 ** FTS5_NOT (nChild, apChild valid) |
| 4792 ** FTS5_STRING (pNear valid) |
| 4793 ** FTS5_TERM (pNear valid) |
| 4794 */ |
| 4795 struct Fts5ExprNode { |
| 4796 int eType; /* Node type */ |
| 4797 int bEof; /* True at EOF */ |
| 4798 int bNomatch; /* True if entry is not a match */ |
| 4799 |
| 4800 /* Next method for this node. */ |
| 4801 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); |
| 4802 |
| 4803 i64 iRowid; /* Current rowid */ |
| 4804 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ |
| 4805 |
| 4806 /* Child nodes. For a NOT node, this array always contains 2 entries. For |
| 4807 ** AND or OR nodes, it contains 2 or more entries. */ |
| 4808 int nChild; /* Number of child nodes */ |
| 4809 Fts5ExprNode *apChild[1]; /* Array of child nodes */ |
| 4810 }; |
| 4811 |
| 4812 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) |
| 4813 |
| 4814 /* |
| 4815 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be |
| 4816 ** used as if it has the same signature as the xNext() methods themselves. |
| 4817 */ |
| 4818 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) |
| 4819 |
| 4820 /* |
| 4821 ** An instance of the following structure represents a single search term |
| 4822 ** or term prefix. |
| 4823 */ |
| 4824 struct Fts5ExprTerm { |
| 4825 int bPrefix; /* True for a prefix term */ |
| 4826 char *zTerm; /* nul-terminated term */ |
| 4827 Fts5IndexIter *pIter; /* Iterator for this term */ |
| 4828 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ |
| 4829 }; |
| 4830 |
| 4831 /* |
| 4832 ** A phrase. One or more terms that must appear in a contiguous sequence |
| 4833 ** within a document for it to match. |
| 4834 */ |
| 4835 struct Fts5ExprPhrase { |
| 4836 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ |
| 4837 Fts5Buffer poslist; /* Current position list */ |
| 4838 int nTerm; /* Number of entries in aTerm[] */ |
| 4839 Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */ |
| 4840 }; |
| 4841 |
| 4842 /* |
| 4843 ** One or more phrases that must appear within a certain token distance of |
| 4844 ** each other within each matching document. |
| 4845 */ |
| 4846 struct Fts5ExprNearset { |
| 4847 int nNear; /* NEAR parameter */ |
| 4848 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ |
| 4849 int nPhrase; /* Number of entries in aPhrase[] array */ |
| 4850 Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */ |
| 4851 }; |
| 4852 |
| 4853 |
| 4854 /* |
| 4855 ** Parse context. |
| 4856 */ |
| 4857 struct Fts5Parse { |
| 4858 Fts5Config *pConfig; |
| 4859 char *zErr; |
| 4860 int rc; |
| 4861 int nPhrase; /* Size of apPhrase array */ |
| 4862 Fts5ExprPhrase **apPhrase; /* Array of all phrases */ |
| 4863 Fts5ExprNode *pExpr; /* Result of a successful parse */ |
| 4864 }; |
| 4865 |
| 4866 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ |
| 4867 va_list ap; |
| 4868 va_start(ap, zFmt); |
| 4869 if( pParse->rc==SQLITE_OK ){ |
| 4870 pParse->zErr = sqlite3_vmprintf(zFmt, ap); |
| 4871 pParse->rc = SQLITE_ERROR; |
| 4872 } |
| 4873 va_end(ap); |
| 4874 } |
| 4875 |
| 4876 static int fts5ExprIsspace(char t){ |
| 4877 return t==' ' || t=='\t' || t=='\n' || t=='\r'; |
| 4878 } |
| 4879 |
| 4880 /* |
| 4881 ** Read the first token from the nul-terminated string at *pz. |
| 4882 */ |
| 4883 static int fts5ExprGetToken( |
| 4884 Fts5Parse *pParse, |
| 4885 const char **pz, /* IN/OUT: Pointer into buffer */ |
| 4886 Fts5Token *pToken |
| 4887 ){ |
| 4888 const char *z = *pz; |
| 4889 int tok; |
| 4890 |
| 4891 /* Skip past any whitespace */ |
| 4892 while( fts5ExprIsspace(*z) ) z++; |
| 4893 |
| 4894 pToken->p = z; |
| 4895 pToken->n = 1; |
| 4896 switch( *z ){ |
| 4897 case '(': tok = FTS5_LP; break; |
| 4898 case ')': tok = FTS5_RP; break; |
| 4899 case '{': tok = FTS5_LCP; break; |
| 4900 case '}': tok = FTS5_RCP; break; |
| 4901 case ':': tok = FTS5_COLON; break; |
| 4902 case ',': tok = FTS5_COMMA; break; |
| 4903 case '+': tok = FTS5_PLUS; break; |
| 4904 case '*': tok = FTS5_STAR; break; |
| 4905 case '-': tok = FTS5_MINUS; break; |
| 4906 case '\0': tok = FTS5_EOF; break; |
| 4907 |
| 4908 case '"': { |
| 4909 const char *z2; |
| 4910 tok = FTS5_STRING; |
| 4911 |
| 4912 for(z2=&z[1]; 1; z2++){ |
| 4913 if( z2[0]=='"' ){ |
| 4914 z2++; |
| 4915 if( z2[0]!='"' ) break; |
| 4916 } |
| 4917 if( z2[0]=='\0' ){ |
| 4918 sqlite3Fts5ParseError(pParse, "unterminated string"); |
| 4919 return FTS5_EOF; |
| 4920 } |
| 4921 } |
| 4922 pToken->n = (z2 - z); |
| 4923 break; |
| 4924 } |
| 4925 |
| 4926 default: { |
| 4927 const char *z2; |
| 4928 if( sqlite3Fts5IsBareword(z[0])==0 ){ |
| 4929 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); |
| 4930 return FTS5_EOF; |
| 4931 } |
| 4932 tok = FTS5_STRING; |
| 4933 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); |
| 4934 pToken->n = (z2 - z); |
| 4935 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; |
| 4936 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; |
| 4937 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; |
| 4938 break; |
| 4939 } |
| 4940 } |
| 4941 |
| 4942 *pz = &pToken->p[pToken->n]; |
| 4943 return tok; |
| 4944 } |
| 4945 |
| 4946 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } |
| 4947 static void fts5ParseFree(void *p){ sqlite3_free(p); } |
| 4948 |
| 4949 static int sqlite3Fts5ExprNew( |
| 4950 Fts5Config *pConfig, /* FTS5 Configuration */ |
| 4951 const char *zExpr, /* Expression text */ |
| 4952 Fts5Expr **ppNew, |
| 4953 char **pzErr |
| 4954 ){ |
| 4955 Fts5Parse sParse; |
| 4956 Fts5Token token; |
| 4957 const char *z = zExpr; |
| 4958 int t; /* Next token type */ |
| 4959 void *pEngine; |
| 4960 Fts5Expr *pNew; |
| 4961 |
| 4962 *ppNew = 0; |
| 4963 *pzErr = 0; |
| 4964 memset(&sParse, 0, sizeof(sParse)); |
| 4965 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); |
| 4966 if( pEngine==0 ){ return SQLITE_NOMEM; } |
| 4967 sParse.pConfig = pConfig; |
| 4968 |
| 4969 do { |
| 4970 t = fts5ExprGetToken(&sParse, &z, &token); |
| 4971 sqlite3Fts5Parser(pEngine, t, token, &sParse); |
| 4972 }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); |
| 4973 sqlite3Fts5ParserFree(pEngine, fts5ParseFree); |
| 4974 |
| 4975 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); |
| 4976 if( sParse.rc==SQLITE_OK ){ |
| 4977 *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); |
| 4978 if( pNew==0 ){ |
| 4979 sParse.rc = SQLITE_NOMEM; |
| 4980 sqlite3Fts5ParseNodeFree(sParse.pExpr); |
| 4981 }else{ |
| 4982 if( !sParse.pExpr ){ |
| 4983 const int nByte = sizeof(Fts5ExprNode); |
| 4984 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte); |
| 4985 if( pNew->pRoot ){ |
| 4986 pNew->pRoot->bEof = 1; |
| 4987 } |
| 4988 }else{ |
| 4989 pNew->pRoot = sParse.pExpr; |
| 4990 } |
| 4991 pNew->pIndex = 0; |
| 4992 pNew->pConfig = pConfig; |
| 4993 pNew->apExprPhrase = sParse.apPhrase; |
| 4994 pNew->nPhrase = sParse.nPhrase; |
| 4995 sParse.apPhrase = 0; |
| 4996 } |
| 4997 }else{ |
| 4998 sqlite3Fts5ParseNodeFree(sParse.pExpr); |
| 4999 } |
| 5000 |
| 5001 sqlite3_free(sParse.apPhrase); |
| 5002 *pzErr = sParse.zErr; |
| 5003 return sParse.rc; |
| 5004 } |
| 5005 |
| 5006 /* |
| 5007 ** Free the expression node object passed as the only argument. |
| 5008 */ |
| 5009 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ |
| 5010 if( p ){ |
| 5011 int i; |
| 5012 for(i=0; i<p->nChild; i++){ |
| 5013 sqlite3Fts5ParseNodeFree(p->apChild[i]); |
| 5014 } |
| 5015 sqlite3Fts5ParseNearsetFree(p->pNear); |
| 5016 sqlite3_free(p); |
| 5017 } |
| 5018 } |
| 5019 |
| 5020 /* |
| 5021 ** Free the expression object passed as the only argument. |
| 5022 */ |
| 5023 static void sqlite3Fts5ExprFree(Fts5Expr *p){ |
| 5024 if( p ){ |
| 5025 sqlite3Fts5ParseNodeFree(p->pRoot); |
| 5026 sqlite3_free(p->apExprPhrase); |
| 5027 sqlite3_free(p); |
| 5028 } |
| 5029 } |
| 5030 |
| 5031 /* |
| 5032 ** Argument pTerm must be a synonym iterator. Return the current rowid |
| 5033 ** that it points to. |
| 5034 */ |
| 5035 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ |
| 5036 i64 iRet = 0; |
| 5037 int bRetValid = 0; |
| 5038 Fts5ExprTerm *p; |
| 5039 |
| 5040 assert( pTerm->pSynonym ); |
| 5041 assert( bDesc==0 || bDesc==1 ); |
| 5042 for(p=pTerm; p; p=p->pSynonym){ |
| 5043 if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
| 5044 i64 iRowid = p->pIter->iRowid; |
| 5045 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ |
| 5046 iRet = iRowid; |
| 5047 bRetValid = 1; |
| 5048 } |
| 5049 } |
| 5050 } |
| 5051 |
| 5052 if( pbEof && bRetValid==0 ) *pbEof = 1; |
| 5053 return iRet; |
| 5054 } |
| 5055 |
| 5056 /* |
| 5057 ** Argument pTerm must be a synonym iterator. |
| 5058 */ |
| 5059 static int fts5ExprSynonymList( |
| 5060 Fts5ExprTerm *pTerm, |
| 5061 i64 iRowid, |
| 5062 Fts5Buffer *pBuf, /* Use this buffer for space if required */ |
| 5063 u8 **pa, int *pn |
| 5064 ){ |
| 5065 Fts5PoslistReader aStatic[4]; |
| 5066 Fts5PoslistReader *aIter = aStatic; |
| 5067 int nIter = 0; |
| 5068 int nAlloc = 4; |
| 5069 int rc = SQLITE_OK; |
| 5070 Fts5ExprTerm *p; |
| 5071 |
| 5072 assert( pTerm->pSynonym ); |
| 5073 for(p=pTerm; p; p=p->pSynonym){ |
| 5074 Fts5IndexIter *pIter = p->pIter; |
| 5075 if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){ |
| 5076 if( pIter->nData==0 ) continue; |
| 5077 if( nIter==nAlloc ){ |
| 5078 int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; |
| 5079 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); |
| 5080 if( aNew==0 ){ |
| 5081 rc = SQLITE_NOMEM; |
| 5082 goto synonym_poslist_out; |
| 5083 } |
| 5084 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); |
| 5085 nAlloc = nAlloc*2; |
| 5086 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 5087 aIter = aNew; |
| 5088 } |
| 5089 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); |
| 5090 assert( aIter[nIter].bEof==0 ); |
| 5091 nIter++; |
| 5092 } |
| 5093 } |
| 5094 |
| 5095 if( nIter==1 ){ |
| 5096 *pa = (u8*)aIter[0].a; |
| 5097 *pn = aIter[0].n; |
| 5098 }else{ |
| 5099 Fts5PoslistWriter writer = {0}; |
| 5100 i64 iPrev = -1; |
| 5101 fts5BufferZero(pBuf); |
| 5102 while( 1 ){ |
| 5103 int i; |
| 5104 i64 iMin = FTS5_LARGEST_INT64; |
| 5105 for(i=0; i<nIter; i++){ |
| 5106 if( aIter[i].bEof==0 ){ |
| 5107 if( aIter[i].iPos==iPrev ){ |
| 5108 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; |
| 5109 } |
| 5110 if( aIter[i].iPos<iMin ){ |
| 5111 iMin = aIter[i].iPos; |
| 5112 } |
| 5113 } |
| 5114 } |
| 5115 if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; |
| 5116 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); |
| 5117 iPrev = iMin; |
| 5118 } |
| 5119 if( rc==SQLITE_OK ){ |
| 5120 *pa = pBuf->p; |
| 5121 *pn = pBuf->n; |
| 5122 } |
| 5123 } |
| 5124 |
| 5125 synonym_poslist_out: |
| 5126 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 5127 return rc; |
| 5128 } |
| 5129 |
| 5130 |
| 5131 /* |
| 5132 ** All individual term iterators in pPhrase are guaranteed to be valid and |
| 5133 ** pointing to the same rowid when this function is called. This function |
| 5134 ** checks if the current rowid really is a match, and if so populates |
| 5135 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch |
| 5136 ** is set to true if this is really a match, or false otherwise. |
| 5137 ** |
| 5138 ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
| 5139 ** otherwise. It is not considered an error code if the current rowid is |
| 5140 ** not a match. |
| 5141 */ |
| 5142 static int fts5ExprPhraseIsMatch( |
| 5143 Fts5ExprNode *pNode, /* Node pPhrase belongs to */ |
| 5144 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ |
| 5145 int *pbMatch /* OUT: Set to true if really a match */ |
| 5146 ){ |
| 5147 Fts5PoslistWriter writer = {0}; |
| 5148 Fts5PoslistReader aStatic[4]; |
| 5149 Fts5PoslistReader *aIter = aStatic; |
| 5150 int i; |
| 5151 int rc = SQLITE_OK; |
| 5152 |
| 5153 fts5BufferZero(&pPhrase->poslist); |
| 5154 |
| 5155 /* If the aStatic[] array is not large enough, allocate a large array |
| 5156 ** using sqlite3_malloc(). This approach could be improved upon. */ |
| 5157 if( pPhrase->nTerm>ArraySize(aStatic) ){ |
| 5158 int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; |
| 5159 aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); |
| 5160 if( !aIter ) return SQLITE_NOMEM; |
| 5161 } |
| 5162 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); |
| 5163 |
| 5164 /* Initialize a term iterator for each term in the phrase */ |
| 5165 for(i=0; i<pPhrase->nTerm; i++){ |
| 5166 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
| 5167 int n = 0; |
| 5168 int bFlag = 0; |
| 5169 u8 *a = 0; |
| 5170 if( pTerm->pSynonym ){ |
| 5171 Fts5Buffer buf = {0, 0, 0}; |
| 5172 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); |
| 5173 if( rc ){ |
| 5174 sqlite3_free(a); |
| 5175 goto ismatch_out; |
| 5176 } |
| 5177 if( a==buf.p ) bFlag = 1; |
| 5178 }else{ |
| 5179 a = (u8*)pTerm->pIter->pData; |
| 5180 n = pTerm->pIter->nData; |
| 5181 } |
| 5182 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); |
| 5183 aIter[i].bFlag = (u8)bFlag; |
| 5184 if( aIter[i].bEof ) goto ismatch_out; |
| 5185 } |
| 5186 |
| 5187 while( 1 ){ |
| 5188 int bMatch; |
| 5189 i64 iPos = aIter[0].iPos; |
| 5190 do { |
| 5191 bMatch = 1; |
| 5192 for(i=0; i<pPhrase->nTerm; i++){ |
| 5193 Fts5PoslistReader *pPos = &aIter[i]; |
| 5194 i64 iAdj = iPos + i; |
| 5195 if( pPos->iPos!=iAdj ){ |
| 5196 bMatch = 0; |
| 5197 while( pPos->iPos<iAdj ){ |
| 5198 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; |
| 5199 } |
| 5200 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; |
| 5201 } |
| 5202 } |
| 5203 }while( bMatch==0 ); |
| 5204 |
| 5205 /* Append position iPos to the output */ |
| 5206 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); |
| 5207 if( rc!=SQLITE_OK ) goto ismatch_out; |
| 5208 |
| 5209 for(i=0; i<pPhrase->nTerm; i++){ |
| 5210 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; |
| 5211 } |
| 5212 } |
| 5213 |
| 5214 ismatch_out: |
| 5215 *pbMatch = (pPhrase->poslist.n>0); |
| 5216 for(i=0; i<pPhrase->nTerm; i++){ |
| 5217 if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); |
| 5218 } |
| 5219 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 5220 return rc; |
| 5221 } |
| 5222 |
| 5223 typedef struct Fts5LookaheadReader Fts5LookaheadReader; |
| 5224 struct Fts5LookaheadReader { |
| 5225 const u8 *a; /* Buffer containing position list */ |
| 5226 int n; /* Size of buffer a[] in bytes */ |
| 5227 int i; /* Current offset in position list */ |
| 5228 i64 iPos; /* Current position */ |
| 5229 i64 iLookahead; /* Next position */ |
| 5230 }; |
| 5231 |
| 5232 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) |
| 5233 |
| 5234 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ |
| 5235 p->iPos = p->iLookahead; |
| 5236 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ |
| 5237 p->iLookahead = FTS5_LOOKAHEAD_EOF; |
| 5238 } |
| 5239 return (p->iPos==FTS5_LOOKAHEAD_EOF); |
| 5240 } |
| 5241 |
| 5242 static int fts5LookaheadReaderInit( |
| 5243 const u8 *a, int n, /* Buffer to read position list from */ |
| 5244 Fts5LookaheadReader *p /* Iterator object to initialize */ |
| 5245 ){ |
| 5246 memset(p, 0, sizeof(Fts5LookaheadReader)); |
| 5247 p->a = a; |
| 5248 p->n = n; |
| 5249 fts5LookaheadReaderNext(p); |
| 5250 return fts5LookaheadReaderNext(p); |
| 5251 } |
| 5252 |
| 5253 typedef struct Fts5NearTrimmer Fts5NearTrimmer; |
| 5254 struct Fts5NearTrimmer { |
| 5255 Fts5LookaheadReader reader; /* Input iterator */ |
| 5256 Fts5PoslistWriter writer; /* Writer context */ |
| 5257 Fts5Buffer *pOut; /* Output poslist */ |
| 5258 }; |
| 5259 |
| 5260 /* |
| 5261 ** The near-set object passed as the first argument contains more than |
| 5262 ** one phrase. All phrases currently point to the same row. The |
| 5263 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function |
| 5264 ** tests if the current row contains instances of each phrase sufficiently |
| 5265 ** close together to meet the NEAR constraint. Non-zero is returned if it |
| 5266 ** does, or zero otherwise. |
| 5267 ** |
| 5268 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this |
| 5269 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) |
| 5270 ** occurs within this function (*pRc) is set accordingly before returning. |
| 5271 ** The return value is undefined in both these cases. |
| 5272 ** |
| 5273 ** If no error occurs and non-zero (a match) is returned, the position-list |
| 5274 ** of each phrase object is edited to contain only those entries that |
| 5275 ** meet the constraint before returning. |
| 5276 */ |
| 5277 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ |
| 5278 Fts5NearTrimmer aStatic[4]; |
| 5279 Fts5NearTrimmer *a = aStatic; |
| 5280 Fts5ExprPhrase **apPhrase = pNear->apPhrase; |
| 5281 |
| 5282 int i; |
| 5283 int rc = *pRc; |
| 5284 int bMatch; |
| 5285 |
| 5286 assert( pNear->nPhrase>1 ); |
| 5287 |
| 5288 /* If the aStatic[] array is not large enough, allocate a large array |
| 5289 ** using sqlite3_malloc(). This approach could be improved upon. */ |
| 5290 if( pNear->nPhrase>ArraySize(aStatic) ){ |
| 5291 int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; |
| 5292 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); |
| 5293 }else{ |
| 5294 memset(aStatic, 0, sizeof(aStatic)); |
| 5295 } |
| 5296 if( rc!=SQLITE_OK ){ |
| 5297 *pRc = rc; |
| 5298 return 0; |
| 5299 } |
| 5300 |
| 5301 /* Initialize a lookahead iterator for each phrase. After passing the |
| 5302 ** buffer and buffer size to the lookaside-reader init function, zero |
| 5303 ** the phrase poslist buffer. The new poslist for the phrase (containing |
| 5304 ** the same entries as the original with some entries removed on account |
| 5305 ** of the NEAR constraint) is written over the original even as it is |
| 5306 ** being read. This is safe as the entries for the new poslist are a |
| 5307 ** subset of the old, so it is not possible for data yet to be read to |
| 5308 ** be overwritten. */ |
| 5309 for(i=0; i<pNear->nPhrase; i++){ |
| 5310 Fts5Buffer *pPoslist = &apPhrase[i]->poslist; |
| 5311 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); |
| 5312 pPoslist->n = 0; |
| 5313 a[i].pOut = pPoslist; |
| 5314 } |
| 5315 |
| 5316 while( 1 ){ |
| 5317 int iAdv; |
| 5318 i64 iMin; |
| 5319 i64 iMax; |
| 5320 |
| 5321 /* This block advances the phrase iterators until they point to a set of |
| 5322 ** entries that together comprise a match. */ |
| 5323 iMax = a[0].reader.iPos; |
| 5324 do { |
| 5325 bMatch = 1; |
| 5326 for(i=0; i<pNear->nPhrase; i++){ |
| 5327 Fts5LookaheadReader *pPos = &a[i].reader; |
| 5328 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; |
| 5329 if( pPos->iPos<iMin || pPos->iPos>iMax ){ |
| 5330 bMatch = 0; |
| 5331 while( pPos->iPos<iMin ){ |
| 5332 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; |
| 5333 } |
| 5334 if( pPos->iPos>iMax ) iMax = pPos->iPos; |
| 5335 } |
| 5336 } |
| 5337 }while( bMatch==0 ); |
| 5338 |
| 5339 /* Add an entry to each output position list */ |
| 5340 for(i=0; i<pNear->nPhrase; i++){ |
| 5341 i64 iPos = a[i].reader.iPos; |
| 5342 Fts5PoslistWriter *pWriter = &a[i].writer; |
| 5343 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ |
| 5344 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); |
| 5345 } |
| 5346 } |
| 5347 |
| 5348 iAdv = 0; |
| 5349 iMin = a[0].reader.iLookahead; |
| 5350 for(i=0; i<pNear->nPhrase; i++){ |
| 5351 if( a[i].reader.iLookahead < iMin ){ |
| 5352 iMin = a[i].reader.iLookahead; |
| 5353 iAdv = i; |
| 5354 } |
| 5355 } |
| 5356 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; |
| 5357 } |
| 5358 |
| 5359 ismatch_out: { |
| 5360 int bRet = a[0].pOut->n>0; |
| 5361 *pRc = rc; |
| 5362 if( a!=aStatic ) sqlite3_free(a); |
| 5363 return bRet; |
| 5364 } |
| 5365 } |
| 5366 |
| 5367 /* |
| 5368 ** Advance iterator pIter until it points to a value equal to or laster |
| 5369 ** than the initial value of *piLast. If this means the iterator points |
| 5370 ** to a value laster than *piLast, update *piLast to the new lastest value. |
| 5371 ** |
| 5372 ** If the iterator reaches EOF, set *pbEof to true before returning. If |
| 5373 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc |
| 5374 ** are set, return a non-zero value. Otherwise, return zero. |
| 5375 */ |
| 5376 static int fts5ExprAdvanceto( |
| 5377 Fts5IndexIter *pIter, /* Iterator to advance */ |
| 5378 int bDesc, /* True if iterator is "rowid DESC" */ |
| 5379 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
| 5380 int *pRc, /* OUT: Error code */ |
| 5381 int *pbEof /* OUT: Set to true if EOF */ |
| 5382 ){ |
| 5383 i64 iLast = *piLast; |
| 5384 i64 iRowid; |
| 5385 |
| 5386 iRowid = pIter->iRowid; |
| 5387 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
| 5388 int rc = sqlite3Fts5IterNextFrom(pIter, iLast); |
| 5389 if( rc || sqlite3Fts5IterEof(pIter) ){ |
| 5390 *pRc = rc; |
| 5391 *pbEof = 1; |
| 5392 return 1; |
| 5393 } |
| 5394 iRowid = pIter->iRowid; |
| 5395 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); |
| 5396 } |
| 5397 *piLast = iRowid; |
| 5398 |
| 5399 return 0; |
| 5400 } |
| 5401 |
| 5402 static int fts5ExprSynonymAdvanceto( |
| 5403 Fts5ExprTerm *pTerm, /* Term iterator to advance */ |
| 5404 int bDesc, /* True if iterator is "rowid DESC" */ |
| 5405 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
| 5406 int *pRc /* OUT: Error code */ |
| 5407 ){ |
| 5408 int rc = SQLITE_OK; |
| 5409 i64 iLast = *piLast; |
| 5410 Fts5ExprTerm *p; |
| 5411 int bEof = 0; |
| 5412 |
| 5413 for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ |
| 5414 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 5415 i64 iRowid = p->pIter->iRowid; |
| 5416 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
| 5417 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); |
| 5418 } |
| 5419 } |
| 5420 } |
| 5421 |
| 5422 if( rc!=SQLITE_OK ){ |
| 5423 *pRc = rc; |
| 5424 bEof = 1; |
| 5425 }else{ |
| 5426 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); |
| 5427 } |
| 5428 return bEof; |
| 5429 } |
| 5430 |
| 5431 |
| 5432 static int fts5ExprNearTest( |
| 5433 int *pRc, |
| 5434 Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
| 5435 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ |
| 5436 ){ |
| 5437 Fts5ExprNearset *pNear = pNode->pNear; |
| 5438 int rc = *pRc; |
| 5439 |
| 5440 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
| 5441 Fts5ExprTerm *pTerm; |
| 5442 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; |
| 5443 pPhrase->poslist.n = 0; |
| 5444 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
| 5445 Fts5IndexIter *pIter = pTerm->pIter; |
| 5446 if( sqlite3Fts5IterEof(pIter)==0 ){ |
| 5447 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ |
| 5448 pPhrase->poslist.n = 1; |
| 5449 } |
| 5450 } |
| 5451 } |
| 5452 return pPhrase->poslist.n; |
| 5453 }else{ |
| 5454 int i; |
| 5455 |
| 5456 /* Check that each phrase in the nearset matches the current row. |
| 5457 ** Populate the pPhrase->poslist buffers at the same time. If any |
| 5458 ** phrase is not a match, break out of the loop early. */ |
| 5459 for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ |
| 5460 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 5461 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ |
| 5462 int bMatch = 0; |
| 5463 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); |
| 5464 if( bMatch==0 ) break; |
| 5465 }else{ |
| 5466 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
| 5467 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); |
| 5468 } |
| 5469 } |
| 5470 |
| 5471 *pRc = rc; |
| 5472 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ |
| 5473 return 1; |
| 5474 } |
| 5475 return 0; |
| 5476 } |
| 5477 } |
| 5478 |
| 5479 |
| 5480 /* |
| 5481 ** Initialize all term iterators in the pNear object. If any term is found |
| 5482 ** to match no documents at all, return immediately without initializing any |
| 5483 ** further iterators. |
| 5484 ** |
| 5485 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5486 ** SQLITE_OK. It is not considered an error if some term matches zero |
| 5487 ** documents. |
| 5488 */ |
| 5489 static int fts5ExprNearInitAll( |
| 5490 Fts5Expr *pExpr, |
| 5491 Fts5ExprNode *pNode |
| 5492 ){ |
| 5493 Fts5ExprNearset *pNear = pNode->pNear; |
| 5494 int i; |
| 5495 |
| 5496 assert( pNode->bNomatch==0 ); |
| 5497 for(i=0; i<pNear->nPhrase; i++){ |
| 5498 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 5499 if( pPhrase->nTerm==0 ){ |
| 5500 pNode->bEof = 1; |
| 5501 return SQLITE_OK; |
| 5502 }else{ |
| 5503 int j; |
| 5504 for(j=0; j<pPhrase->nTerm; j++){ |
| 5505 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
| 5506 Fts5ExprTerm *p; |
| 5507 int bHit = 0; |
| 5508 |
| 5509 for(p=pTerm; p; p=p->pSynonym){ |
| 5510 int rc; |
| 5511 if( p->pIter ){ |
| 5512 sqlite3Fts5IterClose(p->pIter); |
| 5513 p->pIter = 0; |
| 5514 } |
| 5515 rc = sqlite3Fts5IndexQuery( |
| 5516 pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), |
| 5517 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | |
| 5518 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), |
| 5519 pNear->pColset, |
| 5520 &p->pIter |
| 5521 ); |
| 5522 assert( (rc==SQLITE_OK)==(p->pIter!=0) ); |
| 5523 if( rc!=SQLITE_OK ) return rc; |
| 5524 if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
| 5525 bHit = 1; |
| 5526 } |
| 5527 } |
| 5528 |
| 5529 if( bHit==0 ){ |
| 5530 pNode->bEof = 1; |
| 5531 return SQLITE_OK; |
| 5532 } |
| 5533 } |
| 5534 } |
| 5535 } |
| 5536 |
| 5537 pNode->bEof = 0; |
| 5538 return SQLITE_OK; |
| 5539 } |
| 5540 |
| 5541 /* |
| 5542 ** If pExpr is an ASC iterator, this function returns a value with the |
| 5543 ** same sign as: |
| 5544 ** |
| 5545 ** (iLhs - iRhs) |
| 5546 ** |
| 5547 ** Otherwise, if this is a DESC iterator, the opposite is returned: |
| 5548 ** |
| 5549 ** (iRhs - iLhs) |
| 5550 */ |
| 5551 static int fts5RowidCmp( |
| 5552 Fts5Expr *pExpr, |
| 5553 i64 iLhs, |
| 5554 i64 iRhs |
| 5555 ){ |
| 5556 assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); |
| 5557 if( pExpr->bDesc==0 ){ |
| 5558 if( iLhs<iRhs ) return -1; |
| 5559 return (iLhs > iRhs); |
| 5560 }else{ |
| 5561 if( iLhs>iRhs ) return -1; |
| 5562 return (iLhs < iRhs); |
| 5563 } |
| 5564 } |
| 5565 |
| 5566 static void fts5ExprSetEof(Fts5ExprNode *pNode){ |
| 5567 int i; |
| 5568 pNode->bEof = 1; |
| 5569 pNode->bNomatch = 0; |
| 5570 for(i=0; i<pNode->nChild; i++){ |
| 5571 fts5ExprSetEof(pNode->apChild[i]); |
| 5572 } |
| 5573 } |
| 5574 |
| 5575 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ |
| 5576 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ |
| 5577 Fts5ExprNearset *pNear = pNode->pNear; |
| 5578 int i; |
| 5579 for(i=0; i<pNear->nPhrase; i++){ |
| 5580 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 5581 pPhrase->poslist.n = 0; |
| 5582 } |
| 5583 }else{ |
| 5584 int i; |
| 5585 for(i=0; i<pNode->nChild; i++){ |
| 5586 fts5ExprNodeZeroPoslist(pNode->apChild[i]); |
| 5587 } |
| 5588 } |
| 5589 } |
| 5590 |
| 5591 |
| 5592 |
| 5593 /* |
| 5594 ** Compare the values currently indicated by the two nodes as follows: |
| 5595 ** |
| 5596 ** res = (*p1) - (*p2) |
| 5597 ** |
| 5598 ** Nodes that point to values that come later in the iteration order are |
| 5599 ** considered to be larger. Nodes at EOF are the largest of all. |
| 5600 ** |
| 5601 ** This means that if the iteration order is ASC, then numerically larger |
| 5602 ** rowids are considered larger. Or if it is the default DESC, numerically |
| 5603 ** smaller rowids are larger. |
| 5604 */ |
| 5605 static int fts5NodeCompare( |
| 5606 Fts5Expr *pExpr, |
| 5607 Fts5ExprNode *p1, |
| 5608 Fts5ExprNode *p2 |
| 5609 ){ |
| 5610 if( p2->bEof ) return -1; |
| 5611 if( p1->bEof ) return +1; |
| 5612 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); |
| 5613 } |
| 5614 |
| 5615 /* |
| 5616 ** All individual term iterators in pNear are guaranteed to be valid when |
| 5617 ** this function is called. This function checks if all term iterators |
| 5618 ** point to the same rowid, and if not, advances them until they do. |
| 5619 ** If an EOF is reached before this happens, *pbEof is set to true before |
| 5620 ** returning. |
| 5621 ** |
| 5622 ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
| 5623 ** otherwise. It is not considered an error code if an iterator reaches |
| 5624 ** EOF. |
| 5625 */ |
| 5626 static int fts5ExprNodeTest_STRING( |
| 5627 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 5628 Fts5ExprNode *pNode |
| 5629 ){ |
| 5630 Fts5ExprNearset *pNear = pNode->pNear; |
| 5631 Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; |
| 5632 int rc = SQLITE_OK; |
| 5633 i64 iLast; /* Lastest rowid any iterator points to */ |
| 5634 int i, j; /* Phrase and token index, respectively */ |
| 5635 int bMatch; /* True if all terms are at the same rowid */ |
| 5636 const int bDesc = pExpr->bDesc; |
| 5637 |
| 5638 /* Check that this node should not be FTS5_TERM */ |
| 5639 assert( pNear->nPhrase>1 |
| 5640 || pNear->apPhrase[0]->nTerm>1 |
| 5641 || pNear->apPhrase[0]->aTerm[0].pSynonym |
| 5642 ); |
| 5643 |
| 5644 /* Initialize iLast, the "lastest" rowid any iterator points to. If the |
| 5645 ** iterator skips through rowids in the default ascending order, this means |
| 5646 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it |
| 5647 ** means the minimum rowid. */ |
| 5648 if( pLeft->aTerm[0].pSynonym ){ |
| 5649 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); |
| 5650 }else{ |
| 5651 iLast = pLeft->aTerm[0].pIter->iRowid; |
| 5652 } |
| 5653 |
| 5654 do { |
| 5655 bMatch = 1; |
| 5656 for(i=0; i<pNear->nPhrase; i++){ |
| 5657 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 5658 for(j=0; j<pPhrase->nTerm; j++){ |
| 5659 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
| 5660 if( pTerm->pSynonym ){ |
| 5661 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); |
| 5662 if( iRowid==iLast ) continue; |
| 5663 bMatch = 0; |
| 5664 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ |
| 5665 pNode->bNomatch = 0; |
| 5666 pNode->bEof = 1; |
| 5667 return rc; |
| 5668 } |
| 5669 }else{ |
| 5670 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; |
| 5671 if( pIter->iRowid==iLast || pIter->bEof ) continue; |
| 5672 bMatch = 0; |
| 5673 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ |
| 5674 return rc; |
| 5675 } |
| 5676 } |
| 5677 } |
| 5678 } |
| 5679 }while( bMatch==0 ); |
| 5680 |
| 5681 pNode->iRowid = iLast; |
| 5682 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK); |
| 5683 assert( pNode->bEof==0 || pNode->bNomatch==0 ); |
| 5684 |
| 5685 return rc; |
| 5686 } |
| 5687 |
| 5688 /* |
| 5689 ** Advance the first term iterator in the first phrase of pNear. Set output |
| 5690 ** variable *pbEof to true if it reaches EOF or if an error occurs. |
| 5691 ** |
| 5692 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 5693 ** occurs. |
| 5694 */ |
| 5695 static int fts5ExprNodeNext_STRING( |
| 5696 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 5697 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ |
| 5698 int bFromValid, |
| 5699 i64 iFrom |
| 5700 ){ |
| 5701 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; |
| 5702 int rc = SQLITE_OK; |
| 5703 |
| 5704 pNode->bNomatch = 0; |
| 5705 if( pTerm->pSynonym ){ |
| 5706 int bEof = 1; |
| 5707 Fts5ExprTerm *p; |
| 5708 |
| 5709 /* Find the firstest rowid any synonym points to. */ |
| 5710 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); |
| 5711 |
| 5712 /* Advance each iterator that currently points to iRowid. Or, if iFrom |
| 5713 ** is valid - each iterator that points to a rowid before iFrom. */ |
| 5714 for(p=pTerm; p; p=p->pSynonym){ |
| 5715 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 5716 i64 ii = p->pIter->iRowid; |
| 5717 if( ii==iRowid |
| 5718 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) |
| 5719 ){ |
| 5720 if( bFromValid ){ |
| 5721 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); |
| 5722 }else{ |
| 5723 rc = sqlite3Fts5IterNext(p->pIter); |
| 5724 } |
| 5725 if( rc!=SQLITE_OK ) break; |
| 5726 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 5727 bEof = 0; |
| 5728 } |
| 5729 }else{ |
| 5730 bEof = 0; |
| 5731 } |
| 5732 } |
| 5733 } |
| 5734 |
| 5735 /* Set the EOF flag if either all synonym iterators are at EOF or an |
| 5736 ** error has occurred. */ |
| 5737 pNode->bEof = (rc || bEof); |
| 5738 }else{ |
| 5739 Fts5IndexIter *pIter = pTerm->pIter; |
| 5740 |
| 5741 assert( Fts5NodeIsString(pNode) ); |
| 5742 if( bFromValid ){ |
| 5743 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
| 5744 }else{ |
| 5745 rc = sqlite3Fts5IterNext(pIter); |
| 5746 } |
| 5747 |
| 5748 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); |
| 5749 } |
| 5750 |
| 5751 if( pNode->bEof==0 ){ |
| 5752 assert( rc==SQLITE_OK ); |
| 5753 rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
| 5754 } |
| 5755 |
| 5756 return rc; |
| 5757 } |
| 5758 |
| 5759 |
| 5760 static int fts5ExprNodeTest_TERM( |
| 5761 Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
| 5762 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ |
| 5763 ){ |
| 5764 /* As this "NEAR" object is actually a single phrase that consists |
| 5765 ** of a single term only, grab pointers into the poslist managed by the |
| 5766 ** fts5_index.c iterator object. This is much faster than synthesizing |
| 5767 ** a new poslist the way we have to for more complicated phrase or NEAR |
| 5768 ** expressions. */ |
| 5769 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; |
| 5770 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
| 5771 |
| 5772 assert( pNode->eType==FTS5_TERM ); |
| 5773 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); |
| 5774 assert( pPhrase->aTerm[0].pSynonym==0 ); |
| 5775 |
| 5776 pPhrase->poslist.n = pIter->nData; |
| 5777 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 5778 pPhrase->poslist.p = (u8*)pIter->pData; |
| 5779 } |
| 5780 pNode->iRowid = pIter->iRowid; |
| 5781 pNode->bNomatch = (pPhrase->poslist.n==0); |
| 5782 return SQLITE_OK; |
| 5783 } |
| 5784 |
| 5785 /* |
| 5786 ** xNext() method for a node of type FTS5_TERM. |
| 5787 */ |
| 5788 static int fts5ExprNodeNext_TERM( |
| 5789 Fts5Expr *pExpr, |
| 5790 Fts5ExprNode *pNode, |
| 5791 int bFromValid, |
| 5792 i64 iFrom |
| 5793 ){ |
| 5794 int rc; |
| 5795 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; |
| 5796 |
| 5797 assert( pNode->bEof==0 ); |
| 5798 if( bFromValid ){ |
| 5799 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
| 5800 }else{ |
| 5801 rc = sqlite3Fts5IterNext(pIter); |
| 5802 } |
| 5803 if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ |
| 5804 rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
| 5805 }else{ |
| 5806 pNode->bEof = 1; |
| 5807 pNode->bNomatch = 0; |
| 5808 } |
| 5809 return rc; |
| 5810 } |
| 5811 |
| 5812 static void fts5ExprNodeTest_OR( |
| 5813 Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
| 5814 Fts5ExprNode *pNode /* Expression node to test */ |
| 5815 ){ |
| 5816 Fts5ExprNode *pNext = pNode->apChild[0]; |
| 5817 int i; |
| 5818 |
| 5819 for(i=1; i<pNode->nChild; i++){ |
| 5820 Fts5ExprNode *pChild = pNode->apChild[i]; |
| 5821 int cmp = fts5NodeCompare(pExpr, pNext, pChild); |
| 5822 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ |
| 5823 pNext = pChild; |
| 5824 } |
| 5825 } |
| 5826 pNode->iRowid = pNext->iRowid; |
| 5827 pNode->bEof = pNext->bEof; |
| 5828 pNode->bNomatch = pNext->bNomatch; |
| 5829 } |
| 5830 |
| 5831 static int fts5ExprNodeNext_OR( |
| 5832 Fts5Expr *pExpr, |
| 5833 Fts5ExprNode *pNode, |
| 5834 int bFromValid, |
| 5835 i64 iFrom |
| 5836 ){ |
| 5837 int i; |
| 5838 i64 iLast = pNode->iRowid; |
| 5839 |
| 5840 for(i=0; i<pNode->nChild; i++){ |
| 5841 Fts5ExprNode *p1 = pNode->apChild[i]; |
| 5842 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); |
| 5843 if( p1->bEof==0 ){ |
| 5844 if( (p1->iRowid==iLast) |
| 5845 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) |
| 5846 ){ |
| 5847 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); |
| 5848 if( rc!=SQLITE_OK ) return rc; |
| 5849 } |
| 5850 } |
| 5851 } |
| 5852 |
| 5853 fts5ExprNodeTest_OR(pExpr, pNode); |
| 5854 return SQLITE_OK; |
| 5855 } |
| 5856 |
| 5857 /* |
| 5858 ** Argument pNode is an FTS5_AND node. |
| 5859 */ |
| 5860 static int fts5ExprNodeTest_AND( |
| 5861 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 5862 Fts5ExprNode *pAnd /* FTS5_AND node to advance */ |
| 5863 ){ |
| 5864 int iChild; |
| 5865 i64 iLast = pAnd->iRowid; |
| 5866 int rc = SQLITE_OK; |
| 5867 int bMatch; |
| 5868 |
| 5869 assert( pAnd->bEof==0 ); |
| 5870 do { |
| 5871 pAnd->bNomatch = 0; |
| 5872 bMatch = 1; |
| 5873 for(iChild=0; iChild<pAnd->nChild; iChild++){ |
| 5874 Fts5ExprNode *pChild = pAnd->apChild[iChild]; |
| 5875 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); |
| 5876 if( cmp>0 ){ |
| 5877 /* Advance pChild until it points to iLast or laster */ |
| 5878 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); |
| 5879 if( rc!=SQLITE_OK ) return rc; |
| 5880 } |
| 5881 |
| 5882 /* If the child node is now at EOF, so is the parent AND node. Otherwise, |
| 5883 ** the child node is guaranteed to have advanced at least as far as |
| 5884 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the |
| 5885 ** new lastest rowid seen so far. */ |
| 5886 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); |
| 5887 if( pChild->bEof ){ |
| 5888 fts5ExprSetEof(pAnd); |
| 5889 bMatch = 1; |
| 5890 break; |
| 5891 }else if( iLast!=pChild->iRowid ){ |
| 5892 bMatch = 0; |
| 5893 iLast = pChild->iRowid; |
| 5894 } |
| 5895 |
| 5896 if( pChild->bNomatch ){ |
| 5897 pAnd->bNomatch = 1; |
| 5898 } |
| 5899 } |
| 5900 }while( bMatch==0 ); |
| 5901 |
| 5902 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ |
| 5903 fts5ExprNodeZeroPoslist(pAnd); |
| 5904 } |
| 5905 pAnd->iRowid = iLast; |
| 5906 return SQLITE_OK; |
| 5907 } |
| 5908 |
| 5909 static int fts5ExprNodeNext_AND( |
| 5910 Fts5Expr *pExpr, |
| 5911 Fts5ExprNode *pNode, |
| 5912 int bFromValid, |
| 5913 i64 iFrom |
| 5914 ){ |
| 5915 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
| 5916 if( rc==SQLITE_OK ){ |
| 5917 rc = fts5ExprNodeTest_AND(pExpr, pNode); |
| 5918 } |
| 5919 return rc; |
| 5920 } |
| 5921 |
| 5922 static int fts5ExprNodeTest_NOT( |
| 5923 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 5924 Fts5ExprNode *pNode /* FTS5_NOT node to advance */ |
| 5925 ){ |
| 5926 int rc = SQLITE_OK; |
| 5927 Fts5ExprNode *p1 = pNode->apChild[0]; |
| 5928 Fts5ExprNode *p2 = pNode->apChild[1]; |
| 5929 assert( pNode->nChild==2 ); |
| 5930 |
| 5931 while( rc==SQLITE_OK && p1->bEof==0 ){ |
| 5932 int cmp = fts5NodeCompare(pExpr, p1, p2); |
| 5933 if( cmp>0 ){ |
| 5934 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); |
| 5935 cmp = fts5NodeCompare(pExpr, p1, p2); |
| 5936 } |
| 5937 assert( rc!=SQLITE_OK || cmp<=0 ); |
| 5938 if( cmp || p2->bNomatch ) break; |
| 5939 rc = fts5ExprNodeNext(pExpr, p1, 0, 0); |
| 5940 } |
| 5941 pNode->bEof = p1->bEof; |
| 5942 pNode->bNomatch = p1->bNomatch; |
| 5943 pNode->iRowid = p1->iRowid; |
| 5944 if( p1->bEof ){ |
| 5945 fts5ExprNodeZeroPoslist(p2); |
| 5946 } |
| 5947 return rc; |
| 5948 } |
| 5949 |
| 5950 static int fts5ExprNodeNext_NOT( |
| 5951 Fts5Expr *pExpr, |
| 5952 Fts5ExprNode *pNode, |
| 5953 int bFromValid, |
| 5954 i64 iFrom |
| 5955 ){ |
| 5956 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
| 5957 if( rc==SQLITE_OK ){ |
| 5958 rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
| 5959 } |
| 5960 return rc; |
| 5961 } |
| 5962 |
| 5963 /* |
| 5964 ** If pNode currently points to a match, this function returns SQLITE_OK |
| 5965 ** without modifying it. Otherwise, pNode is advanced until it does point |
| 5966 ** to a match or EOF is reached. |
| 5967 */ |
| 5968 static int fts5ExprNodeTest( |
| 5969 Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
| 5970 Fts5ExprNode *pNode /* Expression node to test */ |
| 5971 ){ |
| 5972 int rc = SQLITE_OK; |
| 5973 if( pNode->bEof==0 ){ |
| 5974 switch( pNode->eType ){ |
| 5975 |
| 5976 case FTS5_STRING: { |
| 5977 rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
| 5978 break; |
| 5979 } |
| 5980 |
| 5981 case FTS5_TERM: { |
| 5982 rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
| 5983 break; |
| 5984 } |
| 5985 |
| 5986 case FTS5_AND: { |
| 5987 rc = fts5ExprNodeTest_AND(pExpr, pNode); |
| 5988 break; |
| 5989 } |
| 5990 |
| 5991 case FTS5_OR: { |
| 5992 fts5ExprNodeTest_OR(pExpr, pNode); |
| 5993 break; |
| 5994 } |
| 5995 |
| 5996 default: assert( pNode->eType==FTS5_NOT ); { |
| 5997 rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
| 5998 break; |
| 5999 } |
| 6000 } |
| 6001 } |
| 6002 return rc; |
| 6003 } |
| 6004 |
| 6005 |
| 6006 /* |
| 6007 ** Set node pNode, which is part of expression pExpr, to point to the first |
| 6008 ** match. If there are no matches, set the Node.bEof flag to indicate EOF. |
| 6009 ** |
| 6010 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
| 6011 ** It is not an error if there are no matches. |
| 6012 */ |
| 6013 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ |
| 6014 int rc = SQLITE_OK; |
| 6015 pNode->bEof = 0; |
| 6016 pNode->bNomatch = 0; |
| 6017 |
| 6018 if( Fts5NodeIsString(pNode) ){ |
| 6019 /* Initialize all term iterators in the NEAR object. */ |
| 6020 rc = fts5ExprNearInitAll(pExpr, pNode); |
| 6021 }else if( pNode->xNext==0 ){ |
| 6022 pNode->bEof = 1; |
| 6023 }else{ |
| 6024 int i; |
| 6025 int nEof = 0; |
| 6026 for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){ |
| 6027 Fts5ExprNode *pChild = pNode->apChild[i]; |
| 6028 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); |
| 6029 assert( pChild->bEof==0 || pChild->bEof==1 ); |
| 6030 nEof += pChild->bEof; |
| 6031 } |
| 6032 pNode->iRowid = pNode->apChild[0]->iRowid; |
| 6033 |
| 6034 switch( pNode->eType ){ |
| 6035 case FTS5_AND: |
| 6036 if( nEof>0 ) fts5ExprSetEof(pNode); |
| 6037 break; |
| 6038 |
| 6039 case FTS5_OR: |
| 6040 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); |
| 6041 break; |
| 6042 |
| 6043 default: |
| 6044 assert( pNode->eType==FTS5_NOT ); |
| 6045 pNode->bEof = pNode->apChild[0]->bEof; |
| 6046 break; |
| 6047 } |
| 6048 } |
| 6049 |
| 6050 if( rc==SQLITE_OK ){ |
| 6051 rc = fts5ExprNodeTest(pExpr, pNode); |
| 6052 } |
| 6053 return rc; |
| 6054 } |
| 6055 |
| 6056 |
| 6057 /* |
| 6058 ** Begin iterating through the set of documents in index pIdx matched by |
| 6059 ** the MATCH expression passed as the first argument. If the "bDesc" |
| 6060 ** parameter is passed a non-zero value, iteration is in descending rowid |
| 6061 ** order. Or, if it is zero, in ascending order. |
| 6062 ** |
| 6063 ** If iterating in ascending rowid order (bDesc==0), the first document |
| 6064 ** visited is that with the smallest rowid that is larger than or equal |
| 6065 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), |
| 6066 ** then the first document visited must have a rowid smaller than or |
| 6067 ** equal to iFirst. |
| 6068 ** |
| 6069 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
| 6070 ** is not considered an error if the query does not match any documents. |
| 6071 */ |
| 6072 static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bD
esc){ |
| 6073 Fts5ExprNode *pRoot = p->pRoot; |
| 6074 int rc; /* Return code */ |
| 6075 |
| 6076 p->pIndex = pIdx; |
| 6077 p->bDesc = bDesc; |
| 6078 rc = fts5ExprNodeFirst(p, pRoot); |
| 6079 |
| 6080 /* If not at EOF but the current rowid occurs earlier than iFirst in |
| 6081 ** the iteration order, move to document iFirst or later. */ |
| 6082 if( rc==SQLITE_OK |
| 6083 && 0==pRoot->bEof |
| 6084 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 |
| 6085 ){ |
| 6086 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); |
| 6087 } |
| 6088 |
| 6089 /* If the iterator is not at a real match, skip forward until it is. */ |
| 6090 while( pRoot->bNomatch ){ |
| 6091 assert( pRoot->bEof==0 && rc==SQLITE_OK ); |
| 6092 rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
| 6093 } |
| 6094 return rc; |
| 6095 } |
| 6096 |
| 6097 /* |
| 6098 ** Move to the next document |
| 6099 ** |
| 6100 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
| 6101 ** is not considered an error if the query does not match any documents. |
| 6102 */ |
| 6103 static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ |
| 6104 int rc; |
| 6105 Fts5ExprNode *pRoot = p->pRoot; |
| 6106 assert( pRoot->bEof==0 && pRoot->bNomatch==0 ); |
| 6107 do { |
| 6108 rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
| 6109 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) ); |
| 6110 }while( pRoot->bNomatch ); |
| 6111 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ |
| 6112 pRoot->bEof = 1; |
| 6113 } |
| 6114 return rc; |
| 6115 } |
| 6116 |
| 6117 static int sqlite3Fts5ExprEof(Fts5Expr *p){ |
| 6118 return p->pRoot->bEof; |
| 6119 } |
| 6120 |
| 6121 static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ |
| 6122 return p->pRoot->iRowid; |
| 6123 } |
| 6124 |
| 6125 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ |
| 6126 int rc = SQLITE_OK; |
| 6127 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); |
| 6128 return rc; |
| 6129 } |
| 6130 |
| 6131 /* |
| 6132 ** Free the phrase object passed as the only argument. |
| 6133 */ |
| 6134 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ |
| 6135 if( pPhrase ){ |
| 6136 int i; |
| 6137 for(i=0; i<pPhrase->nTerm; i++){ |
| 6138 Fts5ExprTerm *pSyn; |
| 6139 Fts5ExprTerm *pNext; |
| 6140 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
| 6141 sqlite3_free(pTerm->zTerm); |
| 6142 sqlite3Fts5IterClose(pTerm->pIter); |
| 6143 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ |
| 6144 pNext = pSyn->pSynonym; |
| 6145 sqlite3Fts5IterClose(pSyn->pIter); |
| 6146 fts5BufferFree((Fts5Buffer*)&pSyn[1]); |
| 6147 sqlite3_free(pSyn); |
| 6148 } |
| 6149 } |
| 6150 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); |
| 6151 sqlite3_free(pPhrase); |
| 6152 } |
| 6153 } |
| 6154 |
| 6155 /* |
| 6156 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated |
| 6157 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is |
| 6158 ** appended to it and the results returned. |
| 6159 ** |
| 6160 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and |
| 6161 ** NULL returned. |
| 6162 */ |
| 6163 static Fts5ExprNearset *sqlite3Fts5ParseNearset( |
| 6164 Fts5Parse *pParse, /* Parse context */ |
| 6165 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ |
| 6166 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ |
| 6167 ){ |
| 6168 const int SZALLOC = 8; |
| 6169 Fts5ExprNearset *pRet = 0; |
| 6170 |
| 6171 if( pParse->rc==SQLITE_OK ){ |
| 6172 if( pPhrase==0 ){ |
| 6173 return pNear; |
| 6174 } |
| 6175 if( pNear==0 ){ |
| 6176 int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); |
| 6177 pRet = sqlite3_malloc(nByte); |
| 6178 if( pRet==0 ){ |
| 6179 pParse->rc = SQLITE_NOMEM; |
| 6180 }else{ |
| 6181 memset(pRet, 0, nByte); |
| 6182 } |
| 6183 }else if( (pNear->nPhrase % SZALLOC)==0 ){ |
| 6184 int nNew = pNear->nPhrase + SZALLOC; |
| 6185 int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); |
| 6186 |
| 6187 pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); |
| 6188 if( pRet==0 ){ |
| 6189 pParse->rc = SQLITE_NOMEM; |
| 6190 } |
| 6191 }else{ |
| 6192 pRet = pNear; |
| 6193 } |
| 6194 } |
| 6195 |
| 6196 if( pRet==0 ){ |
| 6197 assert( pParse->rc!=SQLITE_OK ); |
| 6198 sqlite3Fts5ParseNearsetFree(pNear); |
| 6199 sqlite3Fts5ParsePhraseFree(pPhrase); |
| 6200 }else{ |
| 6201 if( pRet->nPhrase>0 ){ |
| 6202 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; |
| 6203 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] ); |
| 6204 if( pPhrase->nTerm==0 ){ |
| 6205 fts5ExprPhraseFree(pPhrase); |
| 6206 pRet->nPhrase--; |
| 6207 pParse->nPhrase--; |
| 6208 pPhrase = pLast; |
| 6209 }else if( pLast->nTerm==0 ){ |
| 6210 fts5ExprPhraseFree(pLast); |
| 6211 pParse->apPhrase[pParse->nPhrase-2] = pPhrase; |
| 6212 pParse->nPhrase--; |
| 6213 pRet->nPhrase--; |
| 6214 } |
| 6215 } |
| 6216 pRet->apPhrase[pRet->nPhrase++] = pPhrase; |
| 6217 } |
| 6218 return pRet; |
| 6219 } |
| 6220 |
| 6221 typedef struct TokenCtx TokenCtx; |
| 6222 struct TokenCtx { |
| 6223 Fts5ExprPhrase *pPhrase; |
| 6224 int rc; |
| 6225 }; |
| 6226 |
| 6227 /* |
| 6228 ** Callback for tokenizing terms used by ParseTerm(). |
| 6229 */ |
| 6230 static int fts5ParseTokenize( |
| 6231 void *pContext, /* Pointer to Fts5InsertCtx object */ |
| 6232 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 6233 const char *pToken, /* Buffer containing token */ |
| 6234 int nToken, /* Size of token in bytes */ |
| 6235 int iUnused1, /* Start offset of token */ |
| 6236 int iUnused2 /* End offset of token */ |
| 6237 ){ |
| 6238 int rc = SQLITE_OK; |
| 6239 const int SZALLOC = 8; |
| 6240 TokenCtx *pCtx = (TokenCtx*)pContext; |
| 6241 Fts5ExprPhrase *pPhrase = pCtx->pPhrase; |
| 6242 |
| 6243 UNUSED_PARAM2(iUnused1, iUnused2); |
| 6244 |
| 6245 /* If an error has already occurred, this is a no-op */ |
| 6246 if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; |
| 6247 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 6248 |
| 6249 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ |
| 6250 Fts5ExprTerm *pSyn; |
| 6251 int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; |
| 6252 pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); |
| 6253 if( pSyn==0 ){ |
| 6254 rc = SQLITE_NOMEM; |
| 6255 }else{ |
| 6256 memset(pSyn, 0, nByte); |
| 6257 pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); |
| 6258 memcpy(pSyn->zTerm, pToken, nToken); |
| 6259 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; |
| 6260 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; |
| 6261 } |
| 6262 }else{ |
| 6263 Fts5ExprTerm *pTerm; |
| 6264 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ |
| 6265 Fts5ExprPhrase *pNew; |
| 6266 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); |
| 6267 |
| 6268 pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, |
| 6269 sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew |
| 6270 ); |
| 6271 if( pNew==0 ){ |
| 6272 rc = SQLITE_NOMEM; |
| 6273 }else{ |
| 6274 if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); |
| 6275 pCtx->pPhrase = pPhrase = pNew; |
| 6276 pNew->nTerm = nNew - SZALLOC; |
| 6277 } |
| 6278 } |
| 6279 |
| 6280 if( rc==SQLITE_OK ){ |
| 6281 pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; |
| 6282 memset(pTerm, 0, sizeof(Fts5ExprTerm)); |
| 6283 pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); |
| 6284 } |
| 6285 } |
| 6286 |
| 6287 pCtx->rc = rc; |
| 6288 return rc; |
| 6289 } |
| 6290 |
| 6291 |
| 6292 /* |
| 6293 ** Free the phrase object passed as the only argument. |
| 6294 */ |
| 6295 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ |
| 6296 fts5ExprPhraseFree(pPhrase); |
| 6297 } |
| 6298 |
| 6299 /* |
| 6300 ** Free the phrase object passed as the second argument. |
| 6301 */ |
| 6302 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ |
| 6303 if( pNear ){ |
| 6304 int i; |
| 6305 for(i=0; i<pNear->nPhrase; i++){ |
| 6306 fts5ExprPhraseFree(pNear->apPhrase[i]); |
| 6307 } |
| 6308 sqlite3_free(pNear->pColset); |
| 6309 sqlite3_free(pNear); |
| 6310 } |
| 6311 } |
| 6312 |
| 6313 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ |
| 6314 assert( pParse->pExpr==0 ); |
| 6315 pParse->pExpr = p; |
| 6316 } |
| 6317 |
| 6318 /* |
| 6319 ** This function is called by the parser to process a string token. The |
| 6320 ** string may or may not be quoted. In any case it is tokenized and a |
| 6321 ** phrase object consisting of all tokens returned. |
| 6322 */ |
| 6323 static Fts5ExprPhrase *sqlite3Fts5ParseTerm( |
| 6324 Fts5Parse *pParse, /* Parse context */ |
| 6325 Fts5ExprPhrase *pAppend, /* Phrase to append to */ |
| 6326 Fts5Token *pToken, /* String to tokenize */ |
| 6327 int bPrefix /* True if there is a trailing "*" */ |
| 6328 ){ |
| 6329 Fts5Config *pConfig = pParse->pConfig; |
| 6330 TokenCtx sCtx; /* Context object passed to callback */ |
| 6331 int rc; /* Tokenize return code */ |
| 6332 char *z = 0; |
| 6333 |
| 6334 memset(&sCtx, 0, sizeof(TokenCtx)); |
| 6335 sCtx.pPhrase = pAppend; |
| 6336 |
| 6337 rc = fts5ParseStringFromToken(pToken, &z); |
| 6338 if( rc==SQLITE_OK ){ |
| 6339 int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0); |
| 6340 int n; |
| 6341 sqlite3Fts5Dequote(z); |
| 6342 n = (int)strlen(z); |
| 6343 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); |
| 6344 } |
| 6345 sqlite3_free(z); |
| 6346 if( rc || (rc = sCtx.rc) ){ |
| 6347 pParse->rc = rc; |
| 6348 fts5ExprPhraseFree(sCtx.pPhrase); |
| 6349 sCtx.pPhrase = 0; |
| 6350 }else{ |
| 6351 |
| 6352 if( pAppend==0 ){ |
| 6353 if( (pParse->nPhrase % 8)==0 ){ |
| 6354 int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); |
| 6355 Fts5ExprPhrase **apNew; |
| 6356 apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); |
| 6357 if( apNew==0 ){ |
| 6358 pParse->rc = SQLITE_NOMEM; |
| 6359 fts5ExprPhraseFree(sCtx.pPhrase); |
| 6360 return 0; |
| 6361 } |
| 6362 pParse->apPhrase = apNew; |
| 6363 } |
| 6364 pParse->nPhrase++; |
| 6365 } |
| 6366 |
| 6367 if( sCtx.pPhrase==0 ){ |
| 6368 /* This happens when parsing a token or quoted phrase that contains |
| 6369 ** no token characters at all. (e.g ... MATCH '""'). */ |
| 6370 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase)); |
| 6371 }else if( sCtx.pPhrase->nTerm ){ |
| 6372 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; |
| 6373 } |
| 6374 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; |
| 6375 } |
| 6376 |
| 6377 return sCtx.pPhrase; |
| 6378 } |
| 6379 |
| 6380 /* |
| 6381 ** Create a new FTS5 expression by cloning phrase iPhrase of the |
| 6382 ** expression passed as the second argument. |
| 6383 */ |
| 6384 static int sqlite3Fts5ExprClonePhrase( |
| 6385 Fts5Expr *pExpr, |
| 6386 int iPhrase, |
| 6387 Fts5Expr **ppNew |
| 6388 ){ |
| 6389 int rc = SQLITE_OK; /* Return code */ |
| 6390 Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ |
| 6391 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ |
| 6392 TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ |
| 6393 |
| 6394 pOrig = pExpr->apExprPhrase[iPhrase]; |
| 6395 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); |
| 6396 if( rc==SQLITE_OK ){ |
| 6397 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, |
| 6398 sizeof(Fts5ExprPhrase*)); |
| 6399 } |
| 6400 if( rc==SQLITE_OK ){ |
| 6401 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, |
| 6402 sizeof(Fts5ExprNode)); |
| 6403 } |
| 6404 if( rc==SQLITE_OK ){ |
| 6405 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, |
| 6406 sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); |
| 6407 } |
| 6408 if( rc==SQLITE_OK ){ |
| 6409 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; |
| 6410 if( pColsetOrig ){ |
| 6411 int nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int); |
| 6412 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); |
| 6413 if( pColset ){ |
| 6414 memcpy(pColset, pColsetOrig, nByte); |
| 6415 } |
| 6416 pNew->pRoot->pNear->pColset = pColset; |
| 6417 } |
| 6418 } |
| 6419 |
| 6420 if( pOrig->nTerm ){ |
| 6421 int i; /* Used to iterate through phrase terms */ |
| 6422 for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ |
| 6423 int tflags = 0; |
| 6424 Fts5ExprTerm *p; |
| 6425 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ |
| 6426 const char *zTerm = p->zTerm; |
| 6427 rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), |
| 6428 0, 0); |
| 6429 tflags = FTS5_TOKEN_COLOCATED; |
| 6430 } |
| 6431 if( rc==SQLITE_OK ){ |
| 6432 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; |
| 6433 } |
| 6434 } |
| 6435 }else{ |
| 6436 /* This happens when parsing a token or quoted phrase that contains |
| 6437 ** no token characters at all. (e.g ... MATCH '""'). */ |
| 6438 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); |
| 6439 } |
| 6440 |
| 6441 if( rc==SQLITE_OK ){ |
| 6442 /* All the allocations succeeded. Put the expression object together. */ |
| 6443 pNew->pIndex = pExpr->pIndex; |
| 6444 pNew->pConfig = pExpr->pConfig; |
| 6445 pNew->nPhrase = 1; |
| 6446 pNew->apExprPhrase[0] = sCtx.pPhrase; |
| 6447 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; |
| 6448 pNew->pRoot->pNear->nPhrase = 1; |
| 6449 sCtx.pPhrase->pNode = pNew->pRoot; |
| 6450 |
| 6451 if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ |
| 6452 pNew->pRoot->eType = FTS5_TERM; |
| 6453 pNew->pRoot->xNext = fts5ExprNodeNext_TERM; |
| 6454 }else{ |
| 6455 pNew->pRoot->eType = FTS5_STRING; |
| 6456 pNew->pRoot->xNext = fts5ExprNodeNext_STRING; |
| 6457 } |
| 6458 }else{ |
| 6459 sqlite3Fts5ExprFree(pNew); |
| 6460 fts5ExprPhraseFree(sCtx.pPhrase); |
| 6461 pNew = 0; |
| 6462 } |
| 6463 |
| 6464 *ppNew = pNew; |
| 6465 return rc; |
| 6466 } |
| 6467 |
| 6468 |
| 6469 /* |
| 6470 ** Token pTok has appeared in a MATCH expression where the NEAR operator |
| 6471 ** is expected. If token pTok does not contain "NEAR", store an error |
| 6472 ** in the pParse object. |
| 6473 */ |
| 6474 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ |
| 6475 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ |
| 6476 sqlite3Fts5ParseError( |
| 6477 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p |
| 6478 ); |
| 6479 } |
| 6480 } |
| 6481 |
| 6482 static void sqlite3Fts5ParseSetDistance( |
| 6483 Fts5Parse *pParse, |
| 6484 Fts5ExprNearset *pNear, |
| 6485 Fts5Token *p |
| 6486 ){ |
| 6487 if( pNear ){ |
| 6488 int nNear = 0; |
| 6489 int i; |
| 6490 if( p->n ){ |
| 6491 for(i=0; i<p->n; i++){ |
| 6492 char c = (char)p->p[i]; |
| 6493 if( c<'0' || c>'9' ){ |
| 6494 sqlite3Fts5ParseError( |
| 6495 pParse, "expected integer, got \"%.*s\"", p->n, p->p |
| 6496 ); |
| 6497 return; |
| 6498 } |
| 6499 nNear = nNear * 10 + (p->p[i] - '0'); |
| 6500 } |
| 6501 }else{ |
| 6502 nNear = FTS5_DEFAULT_NEARDIST; |
| 6503 } |
| 6504 pNear->nNear = nNear; |
| 6505 } |
| 6506 } |
| 6507 |
| 6508 /* |
| 6509 ** The second argument passed to this function may be NULL, or it may be |
| 6510 ** an existing Fts5Colset object. This function returns a pointer to |
| 6511 ** a new colset object containing the contents of (p) with new value column |
| 6512 ** number iCol appended. |
| 6513 ** |
| 6514 ** If an OOM error occurs, store an error code in pParse and return NULL. |
| 6515 ** The old colset object (if any) is not freed in this case. |
| 6516 */ |
| 6517 static Fts5Colset *fts5ParseColset( |
| 6518 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
| 6519 Fts5Colset *p, /* Existing colset object */ |
| 6520 int iCol /* New column to add to colset object */ |
| 6521 ){ |
| 6522 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ |
| 6523 Fts5Colset *pNew; /* New colset object to return */ |
| 6524 |
| 6525 assert( pParse->rc==SQLITE_OK ); |
| 6526 assert( iCol>=0 && iCol<pParse->pConfig->nCol ); |
| 6527 |
| 6528 pNew = sqlite3_realloc(p, sizeof(Fts5Colset) + sizeof(int)*nCol); |
| 6529 if( pNew==0 ){ |
| 6530 pParse->rc = SQLITE_NOMEM; |
| 6531 }else{ |
| 6532 int *aiCol = pNew->aiCol; |
| 6533 int i, j; |
| 6534 for(i=0; i<nCol; i++){ |
| 6535 if( aiCol[i]==iCol ) return pNew; |
| 6536 if( aiCol[i]>iCol ) break; |
| 6537 } |
| 6538 for(j=nCol; j>i; j--){ |
| 6539 aiCol[j] = aiCol[j-1]; |
| 6540 } |
| 6541 aiCol[i] = iCol; |
| 6542 pNew->nCol = nCol+1; |
| 6543 |
| 6544 #ifndef NDEBUG |
| 6545 /* Check that the array is in order and contains no duplicate entries. */ |
| 6546 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); |
| 6547 #endif |
| 6548 } |
| 6549 |
| 6550 return pNew; |
| 6551 } |
| 6552 |
| 6553 /* |
| 6554 ** Allocate and return an Fts5Colset object specifying the inverse of |
| 6555 ** the colset passed as the second argument. Free the colset passed |
| 6556 ** as the second argument before returning. |
| 6557 */ |
| 6558 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p
){ |
| 6559 Fts5Colset *pRet; |
| 6560 int nCol = pParse->pConfig->nCol; |
| 6561 |
| 6562 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, |
| 6563 sizeof(Fts5Colset) + sizeof(int)*nCol |
| 6564 ); |
| 6565 if( pRet ){ |
| 6566 int i; |
| 6567 int iOld = 0; |
| 6568 for(i=0; i<nCol; i++){ |
| 6569 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ |
| 6570 pRet->aiCol[pRet->nCol++] = i; |
| 6571 }else{ |
| 6572 iOld++; |
| 6573 } |
| 6574 } |
| 6575 } |
| 6576 |
| 6577 sqlite3_free(p); |
| 6578 return pRet; |
| 6579 } |
| 6580 |
| 6581 static Fts5Colset *sqlite3Fts5ParseColset( |
| 6582 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
| 6583 Fts5Colset *pColset, /* Existing colset object */ |
| 6584 Fts5Token *p |
| 6585 ){ |
| 6586 Fts5Colset *pRet = 0; |
| 6587 int iCol; |
| 6588 char *z; /* Dequoted copy of token p */ |
| 6589 |
| 6590 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); |
| 6591 if( pParse->rc==SQLITE_OK ){ |
| 6592 Fts5Config *pConfig = pParse->pConfig; |
| 6593 sqlite3Fts5Dequote(z); |
| 6594 for(iCol=0; iCol<pConfig->nCol; iCol++){ |
| 6595 if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; |
| 6596 } |
| 6597 if( iCol==pConfig->nCol ){ |
| 6598 sqlite3Fts5ParseError(pParse, "no such column: %s", z); |
| 6599 }else{ |
| 6600 pRet = fts5ParseColset(pParse, pColset, iCol); |
| 6601 } |
| 6602 sqlite3_free(z); |
| 6603 } |
| 6604 |
| 6605 if( pRet==0 ){ |
| 6606 assert( pParse->rc!=SQLITE_OK ); |
| 6607 sqlite3_free(pColset); |
| 6608 } |
| 6609 |
| 6610 return pRet; |
| 6611 } |
| 6612 |
| 6613 static void sqlite3Fts5ParseSetColset( |
| 6614 Fts5Parse *pParse, |
| 6615 Fts5ExprNearset *pNear, |
| 6616 Fts5Colset *pColset |
| 6617 ){ |
| 6618 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 6619 pParse->rc = SQLITE_ERROR; |
| 6620 pParse->zErr = sqlite3_mprintf( |
| 6621 "fts5: column queries are not supported (detail=none)" |
| 6622 ); |
| 6623 sqlite3_free(pColset); |
| 6624 return; |
| 6625 } |
| 6626 |
| 6627 if( pNear ){ |
| 6628 pNear->pColset = pColset; |
| 6629 }else{ |
| 6630 sqlite3_free(pColset); |
| 6631 } |
| 6632 } |
| 6633 |
| 6634 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ |
| 6635 switch( pNode->eType ){ |
| 6636 case FTS5_STRING: { |
| 6637 Fts5ExprNearset *pNear = pNode->pNear; |
| 6638 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 |
| 6639 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 |
| 6640 ){ |
| 6641 pNode->eType = FTS5_TERM; |
| 6642 pNode->xNext = fts5ExprNodeNext_TERM; |
| 6643 }else{ |
| 6644 pNode->xNext = fts5ExprNodeNext_STRING; |
| 6645 } |
| 6646 break; |
| 6647 }; |
| 6648 |
| 6649 case FTS5_OR: { |
| 6650 pNode->xNext = fts5ExprNodeNext_OR; |
| 6651 break; |
| 6652 }; |
| 6653 |
| 6654 case FTS5_AND: { |
| 6655 pNode->xNext = fts5ExprNodeNext_AND; |
| 6656 break; |
| 6657 }; |
| 6658 |
| 6659 default: assert( pNode->eType==FTS5_NOT ); { |
| 6660 pNode->xNext = fts5ExprNodeNext_NOT; |
| 6661 break; |
| 6662 }; |
| 6663 } |
| 6664 } |
| 6665 |
| 6666 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ |
| 6667 if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ |
| 6668 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; |
| 6669 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); |
| 6670 p->nChild += pSub->nChild; |
| 6671 sqlite3_free(pSub); |
| 6672 }else{ |
| 6673 p->apChild[p->nChild++] = pSub; |
| 6674 } |
| 6675 } |
| 6676 |
| 6677 /* |
| 6678 ** Allocate and return a new expression object. If anything goes wrong (i.e. |
| 6679 ** OOM error), leave an error code in pParse and return NULL. |
| 6680 */ |
| 6681 static Fts5ExprNode *sqlite3Fts5ParseNode( |
| 6682 Fts5Parse *pParse, /* Parse context */ |
| 6683 int eType, /* FTS5_STRING, AND, OR or NOT */ |
| 6684 Fts5ExprNode *pLeft, /* Left hand child expression */ |
| 6685 Fts5ExprNode *pRight, /* Right hand child expression */ |
| 6686 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ |
| 6687 ){ |
| 6688 Fts5ExprNode *pRet = 0; |
| 6689 |
| 6690 if( pParse->rc==SQLITE_OK ){ |
| 6691 int nChild = 0; /* Number of children of returned node */ |
| 6692 int nByte; /* Bytes of space to allocate for this node */ |
| 6693 |
| 6694 assert( (eType!=FTS5_STRING && !pNear) |
| 6695 || (eType==FTS5_STRING && !pLeft && !pRight) |
| 6696 ); |
| 6697 if( eType==FTS5_STRING && pNear==0 ) return 0; |
| 6698 if( eType!=FTS5_STRING && pLeft==0 ) return pRight; |
| 6699 if( eType!=FTS5_STRING && pRight==0 ) return pLeft; |
| 6700 |
| 6701 if( eType==FTS5_NOT ){ |
| 6702 nChild = 2; |
| 6703 }else if( eType==FTS5_AND || eType==FTS5_OR ){ |
| 6704 nChild = 2; |
| 6705 if( pLeft->eType==eType ) nChild += pLeft->nChild-1; |
| 6706 if( pRight->eType==eType ) nChild += pRight->nChild-1; |
| 6707 } |
| 6708 |
| 6709 nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1); |
| 6710 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); |
| 6711 |
| 6712 if( pRet ){ |
| 6713 pRet->eType = eType; |
| 6714 pRet->pNear = pNear; |
| 6715 fts5ExprAssignXNext(pRet); |
| 6716 if( eType==FTS5_STRING ){ |
| 6717 int iPhrase; |
| 6718 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ |
| 6719 pNear->apPhrase[iPhrase]->pNode = pRet; |
| 6720 if( pNear->apPhrase[iPhrase]->nTerm==0 ){ |
| 6721 pRet->xNext = 0; |
| 6722 pRet->eType = FTS5_EOF; |
| 6723 } |
| 6724 } |
| 6725 |
| 6726 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL |
| 6727 && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm>1) |
| 6728 ){ |
| 6729 assert( pParse->rc==SQLITE_OK ); |
| 6730 pParse->rc = SQLITE_ERROR; |
| 6731 assert( pParse->zErr==0 ); |
| 6732 pParse->zErr = sqlite3_mprintf( |
| 6733 "fts5: %s queries are not supported (detail!=full)", |
| 6734 pNear->nPhrase==1 ? "phrase": "NEAR" |
| 6735 ); |
| 6736 sqlite3_free(pRet); |
| 6737 pRet = 0; |
| 6738 } |
| 6739 |
| 6740 }else{ |
| 6741 fts5ExprAddChildren(pRet, pLeft); |
| 6742 fts5ExprAddChildren(pRet, pRight); |
| 6743 } |
| 6744 } |
| 6745 } |
| 6746 |
| 6747 if( pRet==0 ){ |
| 6748 assert( pParse->rc!=SQLITE_OK ); |
| 6749 sqlite3Fts5ParseNodeFree(pLeft); |
| 6750 sqlite3Fts5ParseNodeFree(pRight); |
| 6751 sqlite3Fts5ParseNearsetFree(pNear); |
| 6752 } |
| 6753 return pRet; |
| 6754 } |
| 6755 |
| 6756 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( |
| 6757 Fts5Parse *pParse, /* Parse context */ |
| 6758 Fts5ExprNode *pLeft, /* Left hand child expression */ |
| 6759 Fts5ExprNode *pRight /* Right hand child expression */ |
| 6760 ){ |
| 6761 Fts5ExprNode *pRet = 0; |
| 6762 Fts5ExprNode *pPrev; |
| 6763 |
| 6764 if( pParse->rc ){ |
| 6765 sqlite3Fts5ParseNodeFree(pLeft); |
| 6766 sqlite3Fts5ParseNodeFree(pRight); |
| 6767 }else{ |
| 6768 |
| 6769 assert( pLeft->eType==FTS5_STRING |
| 6770 || pLeft->eType==FTS5_TERM |
| 6771 || pLeft->eType==FTS5_EOF |
| 6772 || pLeft->eType==FTS5_AND |
| 6773 ); |
| 6774 assert( pRight->eType==FTS5_STRING |
| 6775 || pRight->eType==FTS5_TERM |
| 6776 || pRight->eType==FTS5_EOF |
| 6777 ); |
| 6778 |
| 6779 if( pLeft->eType==FTS5_AND ){ |
| 6780 pPrev = pLeft->apChild[pLeft->nChild-1]; |
| 6781 }else{ |
| 6782 pPrev = pLeft; |
| 6783 } |
| 6784 assert( pPrev->eType==FTS5_STRING |
| 6785 || pPrev->eType==FTS5_TERM |
| 6786 || pPrev->eType==FTS5_EOF |
| 6787 ); |
| 6788 |
| 6789 if( pRight->eType==FTS5_EOF ){ |
| 6790 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] ); |
| 6791 sqlite3Fts5ParseNodeFree(pRight); |
| 6792 pRet = pLeft; |
| 6793 pParse->nPhrase--; |
| 6794 } |
| 6795 else if( pPrev->eType==FTS5_EOF ){ |
| 6796 Fts5ExprPhrase **ap; |
| 6797 |
| 6798 if( pPrev==pLeft ){ |
| 6799 pRet = pRight; |
| 6800 }else{ |
| 6801 pLeft->apChild[pLeft->nChild-1] = pRight; |
| 6802 pRet = pLeft; |
| 6803 } |
| 6804 |
| 6805 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; |
| 6806 assert( ap[0]==pPrev->pNear->apPhrase[0] ); |
| 6807 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); |
| 6808 pParse->nPhrase--; |
| 6809 |
| 6810 sqlite3Fts5ParseNodeFree(pPrev); |
| 6811 } |
| 6812 else{ |
| 6813 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0); |
| 6814 } |
| 6815 } |
| 6816 |
| 6817 return pRet; |
| 6818 } |
| 6819 |
| 6820 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ |
| 6821 int nByte = 0; |
| 6822 Fts5ExprTerm *p; |
| 6823 char *zQuoted; |
| 6824 |
| 6825 /* Determine the maximum amount of space required. */ |
| 6826 for(p=pTerm; p; p=p->pSynonym){ |
| 6827 nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; |
| 6828 } |
| 6829 zQuoted = sqlite3_malloc(nByte); |
| 6830 |
| 6831 if( zQuoted ){ |
| 6832 int i = 0; |
| 6833 for(p=pTerm; p; p=p->pSynonym){ |
| 6834 char *zIn = p->zTerm; |
| 6835 zQuoted[i++] = '"'; |
| 6836 while( *zIn ){ |
| 6837 if( *zIn=='"' ) zQuoted[i++] = '"'; |
| 6838 zQuoted[i++] = *zIn++; |
| 6839 } |
| 6840 zQuoted[i++] = '"'; |
| 6841 if( p->pSynonym ) zQuoted[i++] = '|'; |
| 6842 } |
| 6843 if( pTerm->bPrefix ){ |
| 6844 zQuoted[i++] = ' '; |
| 6845 zQuoted[i++] = '*'; |
| 6846 } |
| 6847 zQuoted[i++] = '\0'; |
| 6848 } |
| 6849 return zQuoted; |
| 6850 } |
| 6851 |
| 6852 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ |
| 6853 char *zNew; |
| 6854 va_list ap; |
| 6855 va_start(ap, zFmt); |
| 6856 zNew = sqlite3_vmprintf(zFmt, ap); |
| 6857 va_end(ap); |
| 6858 if( zApp && zNew ){ |
| 6859 char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); |
| 6860 sqlite3_free(zNew); |
| 6861 zNew = zNew2; |
| 6862 } |
| 6863 sqlite3_free(zApp); |
| 6864 return zNew; |
| 6865 } |
| 6866 |
| 6867 /* |
| 6868 ** Compose a tcl-readable representation of expression pExpr. Return a |
| 6869 ** pointer to a buffer containing that representation. It is the |
| 6870 ** responsibility of the caller to at some point free the buffer using |
| 6871 ** sqlite3_free(). |
| 6872 */ |
| 6873 static char *fts5ExprPrintTcl( |
| 6874 Fts5Config *pConfig, |
| 6875 const char *zNearsetCmd, |
| 6876 Fts5ExprNode *pExpr |
| 6877 ){ |
| 6878 char *zRet = 0; |
| 6879 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
| 6880 Fts5ExprNearset *pNear = pExpr->pNear; |
| 6881 int i; |
| 6882 int iTerm; |
| 6883 |
| 6884 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); |
| 6885 if( zRet==0 ) return 0; |
| 6886 if( pNear->pColset ){ |
| 6887 int *aiCol = pNear->pColset->aiCol; |
| 6888 int nCol = pNear->pColset->nCol; |
| 6889 if( nCol==1 ){ |
| 6890 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); |
| 6891 }else{ |
| 6892 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); |
| 6893 for(i=1; i<pNear->pColset->nCol; i++){ |
| 6894 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); |
| 6895 } |
| 6896 zRet = fts5PrintfAppend(zRet, "} "); |
| 6897 } |
| 6898 if( zRet==0 ) return 0; |
| 6899 } |
| 6900 |
| 6901 if( pNear->nPhrase>1 ){ |
| 6902 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); |
| 6903 if( zRet==0 ) return 0; |
| 6904 } |
| 6905 |
| 6906 zRet = fts5PrintfAppend(zRet, "--"); |
| 6907 if( zRet==0 ) return 0; |
| 6908 |
| 6909 for(i=0; i<pNear->nPhrase; i++){ |
| 6910 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 6911 |
| 6912 zRet = fts5PrintfAppend(zRet, " {"); |
| 6913 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ |
| 6914 char *zTerm = pPhrase->aTerm[iTerm].zTerm; |
| 6915 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); |
| 6916 if( pPhrase->aTerm[iTerm].bPrefix ){ |
| 6917 zRet = fts5PrintfAppend(zRet, "*"); |
| 6918 } |
| 6919 } |
| 6920 |
| 6921 if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); |
| 6922 if( zRet==0 ) return 0; |
| 6923 } |
| 6924 |
| 6925 }else{ |
| 6926 char const *zOp = 0; |
| 6927 int i; |
| 6928 switch( pExpr->eType ){ |
| 6929 case FTS5_AND: zOp = "AND"; break; |
| 6930 case FTS5_NOT: zOp = "NOT"; break; |
| 6931 default: |
| 6932 assert( pExpr->eType==FTS5_OR ); |
| 6933 zOp = "OR"; |
| 6934 break; |
| 6935 } |
| 6936 |
| 6937 zRet = sqlite3_mprintf("%s", zOp); |
| 6938 for(i=0; zRet && i<pExpr->nChild; i++){ |
| 6939 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); |
| 6940 if( !z ){ |
| 6941 sqlite3_free(zRet); |
| 6942 zRet = 0; |
| 6943 }else{ |
| 6944 zRet = fts5PrintfAppend(zRet, " [%z]", z); |
| 6945 } |
| 6946 } |
| 6947 } |
| 6948 |
| 6949 return zRet; |
| 6950 } |
| 6951 |
| 6952 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ |
| 6953 char *zRet = 0; |
| 6954 if( pExpr->eType==0 ){ |
| 6955 return sqlite3_mprintf("\"\""); |
| 6956 }else |
| 6957 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
| 6958 Fts5ExprNearset *pNear = pExpr->pNear; |
| 6959 int i; |
| 6960 int iTerm; |
| 6961 |
| 6962 if( pNear->pColset ){ |
| 6963 int iCol = pNear->pColset->aiCol[0]; |
| 6964 zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]); |
| 6965 if( zRet==0 ) return 0; |
| 6966 } |
| 6967 |
| 6968 if( pNear->nPhrase>1 ){ |
| 6969 zRet = fts5PrintfAppend(zRet, "NEAR("); |
| 6970 if( zRet==0 ) return 0; |
| 6971 } |
| 6972 |
| 6973 for(i=0; i<pNear->nPhrase; i++){ |
| 6974 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 6975 if( i!=0 ){ |
| 6976 zRet = fts5PrintfAppend(zRet, " "); |
| 6977 if( zRet==0 ) return 0; |
| 6978 } |
| 6979 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ |
| 6980 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); |
| 6981 if( zTerm ){ |
| 6982 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); |
| 6983 sqlite3_free(zTerm); |
| 6984 } |
| 6985 if( zTerm==0 || zRet==0 ){ |
| 6986 sqlite3_free(zRet); |
| 6987 return 0; |
| 6988 } |
| 6989 } |
| 6990 } |
| 6991 |
| 6992 if( pNear->nPhrase>1 ){ |
| 6993 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); |
| 6994 if( zRet==0 ) return 0; |
| 6995 } |
| 6996 |
| 6997 }else{ |
| 6998 char const *zOp = 0; |
| 6999 int i; |
| 7000 |
| 7001 switch( pExpr->eType ){ |
| 7002 case FTS5_AND: zOp = " AND "; break; |
| 7003 case FTS5_NOT: zOp = " NOT "; break; |
| 7004 default: |
| 7005 assert( pExpr->eType==FTS5_OR ); |
| 7006 zOp = " OR "; |
| 7007 break; |
| 7008 } |
| 7009 |
| 7010 for(i=0; i<pExpr->nChild; i++){ |
| 7011 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); |
| 7012 if( z==0 ){ |
| 7013 sqlite3_free(zRet); |
| 7014 zRet = 0; |
| 7015 }else{ |
| 7016 int e = pExpr->apChild[i]->eType; |
| 7017 int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF); |
| 7018 zRet = fts5PrintfAppend(zRet, "%s%s%z%s", |
| 7019 (i==0 ? "" : zOp), |
| 7020 (b?"(":""), z, (b?")":"") |
| 7021 ); |
| 7022 } |
| 7023 if( zRet==0 ) break; |
| 7024 } |
| 7025 } |
| 7026 |
| 7027 return zRet; |
| 7028 } |
| 7029 |
| 7030 /* |
| 7031 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) |
| 7032 ** and fts5_expr_tcl() (bTcl!=0). |
| 7033 */ |
| 7034 static void fts5ExprFunction( |
| 7035 sqlite3_context *pCtx, /* Function call context */ |
| 7036 int nArg, /* Number of args */ |
| 7037 sqlite3_value **apVal, /* Function arguments */ |
| 7038 int bTcl |
| 7039 ){ |
| 7040 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); |
| 7041 sqlite3 *db = sqlite3_context_db_handle(pCtx); |
| 7042 const char *zExpr = 0; |
| 7043 char *zErr = 0; |
| 7044 Fts5Expr *pExpr = 0; |
| 7045 int rc; |
| 7046 int i; |
| 7047 |
| 7048 const char **azConfig; /* Array of arguments for Fts5Config */ |
| 7049 const char *zNearsetCmd = "nearset"; |
| 7050 int nConfig; /* Size of azConfig[] */ |
| 7051 Fts5Config *pConfig = 0; |
| 7052 int iArg = 1; |
| 7053 |
| 7054 if( nArg<1 ){ |
| 7055 zErr = sqlite3_mprintf("wrong number of arguments to function %s", |
| 7056 bTcl ? "fts5_expr_tcl" : "fts5_expr" |
| 7057 ); |
| 7058 sqlite3_result_error(pCtx, zErr, -1); |
| 7059 sqlite3_free(zErr); |
| 7060 return; |
| 7061 } |
| 7062 |
| 7063 if( bTcl && nArg>1 ){ |
| 7064 zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); |
| 7065 iArg = 2; |
| 7066 } |
| 7067 |
| 7068 nConfig = 3 + (nArg-iArg); |
| 7069 azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); |
| 7070 if( azConfig==0 ){ |
| 7071 sqlite3_result_error_nomem(pCtx); |
| 7072 return; |
| 7073 } |
| 7074 azConfig[0] = 0; |
| 7075 azConfig[1] = "main"; |
| 7076 azConfig[2] = "tbl"; |
| 7077 for(i=3; iArg<nArg; iArg++){ |
| 7078 azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]); |
| 7079 } |
| 7080 |
| 7081 zExpr = (const char*)sqlite3_value_text(apVal[0]); |
| 7082 |
| 7083 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); |
| 7084 if( rc==SQLITE_OK ){ |
| 7085 rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); |
| 7086 } |
| 7087 if( rc==SQLITE_OK ){ |
| 7088 char *zText; |
| 7089 if( pExpr->pRoot->xNext==0 ){ |
| 7090 zText = sqlite3_mprintf(""); |
| 7091 }else if( bTcl ){ |
| 7092 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); |
| 7093 }else{ |
| 7094 zText = fts5ExprPrint(pConfig, pExpr->pRoot); |
| 7095 } |
| 7096 if( zText==0 ){ |
| 7097 rc = SQLITE_NOMEM; |
| 7098 }else{ |
| 7099 sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); |
| 7100 sqlite3_free(zText); |
| 7101 } |
| 7102 } |
| 7103 |
| 7104 if( rc!=SQLITE_OK ){ |
| 7105 if( zErr ){ |
| 7106 sqlite3_result_error(pCtx, zErr, -1); |
| 7107 sqlite3_free(zErr); |
| 7108 }else{ |
| 7109 sqlite3_result_error_code(pCtx, rc); |
| 7110 } |
| 7111 } |
| 7112 sqlite3_free((void *)azConfig); |
| 7113 sqlite3Fts5ConfigFree(pConfig); |
| 7114 sqlite3Fts5ExprFree(pExpr); |
| 7115 } |
| 7116 |
| 7117 static void fts5ExprFunctionHr( |
| 7118 sqlite3_context *pCtx, /* Function call context */ |
| 7119 int nArg, /* Number of args */ |
| 7120 sqlite3_value **apVal /* Function arguments */ |
| 7121 ){ |
| 7122 fts5ExprFunction(pCtx, nArg, apVal, 0); |
| 7123 } |
| 7124 static void fts5ExprFunctionTcl( |
| 7125 sqlite3_context *pCtx, /* Function call context */ |
| 7126 int nArg, /* Number of args */ |
| 7127 sqlite3_value **apVal /* Function arguments */ |
| 7128 ){ |
| 7129 fts5ExprFunction(pCtx, nArg, apVal, 1); |
| 7130 } |
| 7131 |
| 7132 /* |
| 7133 ** The implementation of an SQLite user-defined-function that accepts a |
| 7134 ** single integer as an argument. If the integer is an alpha-numeric |
| 7135 ** unicode code point, 1 is returned. Otherwise 0. |
| 7136 */ |
| 7137 static void fts5ExprIsAlnum( |
| 7138 sqlite3_context *pCtx, /* Function call context */ |
| 7139 int nArg, /* Number of args */ |
| 7140 sqlite3_value **apVal /* Function arguments */ |
| 7141 ){ |
| 7142 int iCode; |
| 7143 if( nArg!=1 ){ |
| 7144 sqlite3_result_error(pCtx, |
| 7145 "wrong number of arguments to function fts5_isalnum", -1 |
| 7146 ); |
| 7147 return; |
| 7148 } |
| 7149 iCode = sqlite3_value_int(apVal[0]); |
| 7150 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode)); |
| 7151 } |
| 7152 |
| 7153 static void fts5ExprFold( |
| 7154 sqlite3_context *pCtx, /* Function call context */ |
| 7155 int nArg, /* Number of args */ |
| 7156 sqlite3_value **apVal /* Function arguments */ |
| 7157 ){ |
| 7158 if( nArg!=1 && nArg!=2 ){ |
| 7159 sqlite3_result_error(pCtx, |
| 7160 "wrong number of arguments to function fts5_fold", -1 |
| 7161 ); |
| 7162 }else{ |
| 7163 int iCode; |
| 7164 int bRemoveDiacritics = 0; |
| 7165 iCode = sqlite3_value_int(apVal[0]); |
| 7166 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); |
| 7167 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); |
| 7168 } |
| 7169 } |
| 7170 |
| 7171 /* |
| 7172 ** This is called during initialization to register the fts5_expr() scalar |
| 7173 ** UDF with the SQLite handle passed as the only argument. |
| 7174 */ |
| 7175 static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ |
| 7176 struct Fts5ExprFunc { |
| 7177 const char *z; |
| 7178 void (*x)(sqlite3_context*,int,sqlite3_value**); |
| 7179 } aFunc[] = { |
| 7180 { "fts5_expr", fts5ExprFunctionHr }, |
| 7181 { "fts5_expr_tcl", fts5ExprFunctionTcl }, |
| 7182 { "fts5_isalnum", fts5ExprIsAlnum }, |
| 7183 { "fts5_fold", fts5ExprFold }, |
| 7184 }; |
| 7185 int i; |
| 7186 int rc = SQLITE_OK; |
| 7187 void *pCtx = (void*)pGlobal; |
| 7188 |
| 7189 for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){ |
| 7190 struct Fts5ExprFunc *p = &aFunc[i]; |
| 7191 rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); |
| 7192 } |
| 7193 |
| 7194 /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */ |
| 7195 #ifndef NDEBUG |
| 7196 (void)sqlite3Fts5ParserTrace; |
| 7197 #endif |
| 7198 |
| 7199 return rc; |
| 7200 } |
| 7201 |
| 7202 /* |
| 7203 ** Return the number of phrases in expression pExpr. |
| 7204 */ |
| 7205 static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ |
| 7206 return (pExpr ? pExpr->nPhrase : 0); |
| 7207 } |
| 7208 |
| 7209 /* |
| 7210 ** Return the number of terms in the iPhrase'th phrase in pExpr. |
| 7211 */ |
| 7212 static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ |
| 7213 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; |
| 7214 return pExpr->apExprPhrase[iPhrase]->nTerm; |
| 7215 } |
| 7216 |
| 7217 /* |
| 7218 ** This function is used to access the current position list for phrase |
| 7219 ** iPhrase. |
| 7220 */ |
| 7221 static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ |
| 7222 int nRet; |
| 7223 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
| 7224 Fts5ExprNode *pNode = pPhrase->pNode; |
| 7225 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ |
| 7226 *pa = pPhrase->poslist.p; |
| 7227 nRet = pPhrase->poslist.n; |
| 7228 }else{ |
| 7229 *pa = 0; |
| 7230 nRet = 0; |
| 7231 } |
| 7232 return nRet; |
| 7233 } |
| 7234 |
| 7235 struct Fts5PoslistPopulator { |
| 7236 Fts5PoslistWriter writer; |
| 7237 int bOk; /* True if ok to populate */ |
| 7238 int bMiss; |
| 7239 }; |
| 7240 |
| 7241 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int b
Live){ |
| 7242 Fts5PoslistPopulator *pRet; |
| 7243 pRet = sqlite3_malloc(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
| 7244 if( pRet ){ |
| 7245 int i; |
| 7246 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
| 7247 for(i=0; i<pExpr->nPhrase; i++){ |
| 7248 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; |
| 7249 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
| 7250 assert( pExpr->apExprPhrase[i]->nTerm==1 ); |
| 7251 if( bLive && |
| 7252 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) |
| 7253 ){ |
| 7254 pRet[i].bMiss = 1; |
| 7255 }else{ |
| 7256 pBuf->n = 0; |
| 7257 } |
| 7258 } |
| 7259 } |
| 7260 return pRet; |
| 7261 } |
| 7262 |
| 7263 struct Fts5ExprCtx { |
| 7264 Fts5Expr *pExpr; |
| 7265 Fts5PoslistPopulator *aPopulator; |
| 7266 i64 iOff; |
| 7267 }; |
| 7268 typedef struct Fts5ExprCtx Fts5ExprCtx; |
| 7269 |
| 7270 /* |
| 7271 ** TODO: Make this more efficient! |
| 7272 */ |
| 7273 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ |
| 7274 int i; |
| 7275 for(i=0; i<pColset->nCol; i++){ |
| 7276 if( pColset->aiCol[i]==iCol ) return 1; |
| 7277 } |
| 7278 return 0; |
| 7279 } |
| 7280 |
| 7281 static int fts5ExprPopulatePoslistsCb( |
| 7282 void *pCtx, /* Copy of 2nd argument to xTokenize() */ |
| 7283 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 7284 const char *pToken, /* Pointer to buffer containing token */ |
| 7285 int nToken, /* Size of token in bytes */ |
| 7286 int iUnused1, /* Byte offset of token within input text */ |
| 7287 int iUnused2 /* Byte offset of end of token within input text */ |
| 7288 ){ |
| 7289 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; |
| 7290 Fts5Expr *pExpr = p->pExpr; |
| 7291 int i; |
| 7292 |
| 7293 UNUSED_PARAM2(iUnused1, iUnused2); |
| 7294 |
| 7295 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 7296 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; |
| 7297 for(i=0; i<pExpr->nPhrase; i++){ |
| 7298 Fts5ExprTerm *pTerm; |
| 7299 if( p->aPopulator[i].bOk==0 ) continue; |
| 7300 for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
| 7301 int nTerm = (int)strlen(pTerm->zTerm); |
| 7302 if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) |
| 7303 && memcmp(pTerm->zTerm, pToken, nTerm)==0 |
| 7304 ){ |
| 7305 int rc = sqlite3Fts5PoslistWriterAppend( |
| 7306 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff |
| 7307 ); |
| 7308 if( rc ) return rc; |
| 7309 break; |
| 7310 } |
| 7311 } |
| 7312 } |
| 7313 return SQLITE_OK; |
| 7314 } |
| 7315 |
| 7316 static int sqlite3Fts5ExprPopulatePoslists( |
| 7317 Fts5Config *pConfig, |
| 7318 Fts5Expr *pExpr, |
| 7319 Fts5PoslistPopulator *aPopulator, |
| 7320 int iCol, |
| 7321 const char *z, int n |
| 7322 ){ |
| 7323 int i; |
| 7324 Fts5ExprCtx sCtx; |
| 7325 sCtx.pExpr = pExpr; |
| 7326 sCtx.aPopulator = aPopulator; |
| 7327 sCtx.iOff = (((i64)iCol) << 32) - 1; |
| 7328 |
| 7329 for(i=0; i<pExpr->nPhrase; i++){ |
| 7330 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
| 7331 Fts5Colset *pColset = pNode->pNear->pColset; |
| 7332 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) |
| 7333 || aPopulator[i].bMiss |
| 7334 ){ |
| 7335 aPopulator[i].bOk = 0; |
| 7336 }else{ |
| 7337 aPopulator[i].bOk = 1; |
| 7338 } |
| 7339 } |
| 7340 |
| 7341 return sqlite3Fts5Tokenize(pConfig, |
| 7342 FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb |
| 7343 ); |
| 7344 } |
| 7345 |
| 7346 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ |
| 7347 if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){ |
| 7348 pNode->pNear->apPhrase[0]->poslist.n = 0; |
| 7349 }else{ |
| 7350 int i; |
| 7351 for(i=0; i<pNode->nChild; i++){ |
| 7352 fts5ExprClearPoslists(pNode->apChild[i]); |
| 7353 } |
| 7354 } |
| 7355 } |
| 7356 |
| 7357 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ |
| 7358 pNode->iRowid = iRowid; |
| 7359 pNode->bEof = 0; |
| 7360 switch( pNode->eType ){ |
| 7361 case FTS5_TERM: |
| 7362 case FTS5_STRING: |
| 7363 return (pNode->pNear->apPhrase[0]->poslist.n>0); |
| 7364 |
| 7365 case FTS5_AND: { |
| 7366 int i; |
| 7367 for(i=0; i<pNode->nChild; i++){ |
| 7368 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ |
| 7369 fts5ExprClearPoslists(pNode); |
| 7370 return 0; |
| 7371 } |
| 7372 } |
| 7373 break; |
| 7374 } |
| 7375 |
| 7376 case FTS5_OR: { |
| 7377 int i; |
| 7378 int bRet = 0; |
| 7379 for(i=0; i<pNode->nChild; i++){ |
| 7380 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ |
| 7381 bRet = 1; |
| 7382 } |
| 7383 } |
| 7384 return bRet; |
| 7385 } |
| 7386 |
| 7387 default: { |
| 7388 assert( pNode->eType==FTS5_NOT ); |
| 7389 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) |
| 7390 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) |
| 7391 ){ |
| 7392 fts5ExprClearPoslists(pNode); |
| 7393 return 0; |
| 7394 } |
| 7395 break; |
| 7396 } |
| 7397 } |
| 7398 return 1; |
| 7399 } |
| 7400 |
| 7401 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ |
| 7402 fts5ExprCheckPoslists(pExpr->pRoot, iRowid); |
| 7403 } |
| 7404 |
| 7405 /* |
| 7406 ** This function is only called for detail=columns tables. |
| 7407 */ |
| 7408 static int sqlite3Fts5ExprPhraseCollist( |
| 7409 Fts5Expr *pExpr, |
| 7410 int iPhrase, |
| 7411 const u8 **ppCollist, |
| 7412 int *pnCollist |
| 7413 ){ |
| 7414 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
| 7415 Fts5ExprNode *pNode = pPhrase->pNode; |
| 7416 int rc = SQLITE_OK; |
| 7417 |
| 7418 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); |
| 7419 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 7420 |
| 7421 if( pNode->bEof==0 |
| 7422 && pNode->iRowid==pExpr->pRoot->iRowid |
| 7423 && pPhrase->poslist.n>0 |
| 7424 ){ |
| 7425 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; |
| 7426 if( pTerm->pSynonym ){ |
| 7427 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; |
| 7428 rc = fts5ExprSynonymList( |
| 7429 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist |
| 7430 ); |
| 7431 }else{ |
| 7432 *ppCollist = pPhrase->aTerm[0].pIter->pData; |
| 7433 *pnCollist = pPhrase->aTerm[0].pIter->nData; |
| 7434 } |
| 7435 }else{ |
| 7436 *ppCollist = 0; |
| 7437 *pnCollist = 0; |
| 7438 } |
| 7439 |
| 7440 return rc; |
| 7441 } |
| 7442 |
| 7443 |
| 7444 /* |
| 7445 ** 2014 August 11 |
| 7446 ** |
| 7447 ** The author disclaims copyright to this source code. In place of |
| 7448 ** a legal notice, here is a blessing: |
| 7449 ** |
| 7450 ** May you do good and not evil. |
| 7451 ** May you find forgiveness for yourself and forgive others. |
| 7452 ** May you share freely, never taking more than you give. |
| 7453 ** |
| 7454 ****************************************************************************** |
| 7455 ** |
| 7456 */ |
| 7457 |
| 7458 |
| 7459 |
| 7460 /* #include "fts5Int.h" */ |
| 7461 |
| 7462 typedef struct Fts5HashEntry Fts5HashEntry; |
| 7463 |
| 7464 /* |
| 7465 ** This file contains the implementation of an in-memory hash table used |
| 7466 ** to accumuluate "term -> doclist" content before it is flused to a level-0 |
| 7467 ** segment. |
| 7468 */ |
| 7469 |
| 7470 |
| 7471 struct Fts5Hash { |
| 7472 int eDetail; /* Copy of Fts5Config.eDetail */ |
| 7473 int *pnByte; /* Pointer to bytes counter */ |
| 7474 int nEntry; /* Number of entries currently in hash */ |
| 7475 int nSlot; /* Size of aSlot[] array */ |
| 7476 Fts5HashEntry *pScan; /* Current ordered scan item */ |
| 7477 Fts5HashEntry **aSlot; /* Array of hash slots */ |
| 7478 }; |
| 7479 |
| 7480 /* |
| 7481 ** Each entry in the hash table is represented by an object of the |
| 7482 ** following type. Each object, its key (zKey[]) and its current data |
| 7483 ** are stored in a single memory allocation. The position list data |
| 7484 ** immediately follows the key data in memory. |
| 7485 ** |
| 7486 ** The data that follows the key is in a similar, but not identical format |
| 7487 ** to the doclist data stored in the database. It is: |
| 7488 ** |
| 7489 ** * Rowid, as a varint |
| 7490 ** * Position list, without 0x00 terminator. |
| 7491 ** * Size of previous position list and rowid, as a 4 byte |
| 7492 ** big-endian integer. |
| 7493 ** |
| 7494 ** iRowidOff: |
| 7495 ** Offset of last rowid written to data area. Relative to first byte of |
| 7496 ** structure. |
| 7497 ** |
| 7498 ** nData: |
| 7499 ** Bytes of data written since iRowidOff. |
| 7500 */ |
| 7501 struct Fts5HashEntry { |
| 7502 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ |
| 7503 Fts5HashEntry *pScanNext; /* Next entry in sorted order */ |
| 7504 |
| 7505 int nAlloc; /* Total size of allocation */ |
| 7506 int iSzPoslist; /* Offset of space for 4-byte poslist size */ |
| 7507 int nData; /* Total bytes of data (incl. structure) */ |
| 7508 int nKey; /* Length of zKey[] in bytes */ |
| 7509 u8 bDel; /* Set delete-flag @ iSzPoslist */ |
| 7510 u8 bContent; /* Set content-flag (detail=none mode) */ |
| 7511 i16 iCol; /* Column of last value written */ |
| 7512 int iPos; /* Position of last value written */ |
| 7513 i64 iRowid; /* Rowid of last value written */ |
| 7514 char zKey[8]; /* Nul-terminated entry key */ |
| 7515 }; |
| 7516 |
| 7517 /* |
| 7518 ** Size of Fts5HashEntry without the zKey[] array. |
| 7519 */ |
| 7520 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8) |
| 7521 |
| 7522 |
| 7523 |
| 7524 /* |
| 7525 ** Allocate a new hash table. |
| 7526 */ |
| 7527 static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte
){ |
| 7528 int rc = SQLITE_OK; |
| 7529 Fts5Hash *pNew; |
| 7530 |
| 7531 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); |
| 7532 if( pNew==0 ){ |
| 7533 rc = SQLITE_NOMEM; |
| 7534 }else{ |
| 7535 int nByte; |
| 7536 memset(pNew, 0, sizeof(Fts5Hash)); |
| 7537 pNew->pnByte = pnByte; |
| 7538 pNew->eDetail = pConfig->eDetail; |
| 7539 |
| 7540 pNew->nSlot = 1024; |
| 7541 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; |
| 7542 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); |
| 7543 if( pNew->aSlot==0 ){ |
| 7544 sqlite3_free(pNew); |
| 7545 *ppNew = 0; |
| 7546 rc = SQLITE_NOMEM; |
| 7547 }else{ |
| 7548 memset(pNew->aSlot, 0, nByte); |
| 7549 } |
| 7550 } |
| 7551 return rc; |
| 7552 } |
| 7553 |
| 7554 /* |
| 7555 ** Free a hash table object. |
| 7556 */ |
| 7557 static void sqlite3Fts5HashFree(Fts5Hash *pHash){ |
| 7558 if( pHash ){ |
| 7559 sqlite3Fts5HashClear(pHash); |
| 7560 sqlite3_free(pHash->aSlot); |
| 7561 sqlite3_free(pHash); |
| 7562 } |
| 7563 } |
| 7564 |
| 7565 /* |
| 7566 ** Empty (but do not delete) a hash table. |
| 7567 */ |
| 7568 static void sqlite3Fts5HashClear(Fts5Hash *pHash){ |
| 7569 int i; |
| 7570 for(i=0; i<pHash->nSlot; i++){ |
| 7571 Fts5HashEntry *pNext; |
| 7572 Fts5HashEntry *pSlot; |
| 7573 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ |
| 7574 pNext = pSlot->pHashNext; |
| 7575 sqlite3_free(pSlot); |
| 7576 } |
| 7577 } |
| 7578 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); |
| 7579 pHash->nEntry = 0; |
| 7580 } |
| 7581 |
| 7582 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ |
| 7583 int i; |
| 7584 unsigned int h = 13; |
| 7585 for(i=n-1; i>=0; i--){ |
| 7586 h = (h << 3) ^ h ^ p[i]; |
| 7587 } |
| 7588 return (h % nSlot); |
| 7589 } |
| 7590 |
| 7591 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ |
| 7592 int i; |
| 7593 unsigned int h = 13; |
| 7594 for(i=n-1; i>=0; i--){ |
| 7595 h = (h << 3) ^ h ^ p[i]; |
| 7596 } |
| 7597 h = (h << 3) ^ h ^ b; |
| 7598 return (h % nSlot); |
| 7599 } |
| 7600 |
| 7601 /* |
| 7602 ** Resize the hash table by doubling the number of slots. |
| 7603 */ |
| 7604 static int fts5HashResize(Fts5Hash *pHash){ |
| 7605 int nNew = pHash->nSlot*2; |
| 7606 int i; |
| 7607 Fts5HashEntry **apNew; |
| 7608 Fts5HashEntry **apOld = pHash->aSlot; |
| 7609 |
| 7610 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*)); |
| 7611 if( !apNew ) return SQLITE_NOMEM; |
| 7612 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); |
| 7613 |
| 7614 for(i=0; i<pHash->nSlot; i++){ |
| 7615 while( apOld[i] ){ |
| 7616 int iHash; |
| 7617 Fts5HashEntry *p = apOld[i]; |
| 7618 apOld[i] = p->pHashNext; |
| 7619 iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey)); |
| 7620 p->pHashNext = apNew[iHash]; |
| 7621 apNew[iHash] = p; |
| 7622 } |
| 7623 } |
| 7624 |
| 7625 sqlite3_free(apOld); |
| 7626 pHash->nSlot = nNew; |
| 7627 pHash->aSlot = apNew; |
| 7628 return SQLITE_OK; |
| 7629 } |
| 7630 |
| 7631 static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){ |
| 7632 if( p->iSzPoslist ){ |
| 7633 u8 *pPtr = (u8*)p; |
| 7634 if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
| 7635 assert( p->nData==p->iSzPoslist ); |
| 7636 if( p->bDel ){ |
| 7637 pPtr[p->nData++] = 0x00; |
| 7638 if( p->bContent ){ |
| 7639 pPtr[p->nData++] = 0x00; |
| 7640 } |
| 7641 } |
| 7642 }else{ |
| 7643 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ |
| 7644 int nPos = nSz*2 + p->bDel; /* Value of nPos field */ |
| 7645 |
| 7646 assert( p->bDel==0 || p->bDel==1 ); |
| 7647 if( nPos<=127 ){ |
| 7648 pPtr[p->iSzPoslist] = (u8)nPos; |
| 7649 }else{ |
| 7650 int nByte = sqlite3Fts5GetVarintLen((u32)nPos); |
| 7651 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); |
| 7652 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); |
| 7653 p->nData += (nByte-1); |
| 7654 } |
| 7655 } |
| 7656 |
| 7657 p->iSzPoslist = 0; |
| 7658 p->bDel = 0; |
| 7659 p->bContent = 0; |
| 7660 } |
| 7661 } |
| 7662 |
| 7663 /* |
| 7664 ** Add an entry to the in-memory hash table. The key is the concatenation |
| 7665 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). |
| 7666 ** |
| 7667 ** (bByte || pToken) -> (iRowid,iCol,iPos) |
| 7668 ** |
| 7669 ** Or, if iCol is negative, then the value is a delete marker. |
| 7670 */ |
| 7671 static int sqlite3Fts5HashWrite( |
| 7672 Fts5Hash *pHash, |
| 7673 i64 iRowid, /* Rowid for this entry */ |
| 7674 int iCol, /* Column token appears in (-ve -> delete) */ |
| 7675 int iPos, /* Position of token within column */ |
| 7676 char bByte, /* First byte of token */ |
| 7677 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 7678 ){ |
| 7679 unsigned int iHash; |
| 7680 Fts5HashEntry *p; |
| 7681 u8 *pPtr; |
| 7682 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ |
| 7683 int bNew; /* If non-delete entry should be written */ |
| 7684 |
| 7685 bNew = (pHash->eDetail==FTS5_DETAIL_FULL); |
| 7686 |
| 7687 /* Attempt to locate an existing hash entry */ |
| 7688 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
| 7689 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
| 7690 if( p->zKey[0]==bByte |
| 7691 && p->nKey==nToken |
| 7692 && memcmp(&p->zKey[1], pToken, nToken)==0 |
| 7693 ){ |
| 7694 break; |
| 7695 } |
| 7696 } |
| 7697 |
| 7698 /* If an existing hash entry cannot be found, create a new one. */ |
| 7699 if( p==0 ){ |
| 7700 /* Figure out how much space to allocate */ |
| 7701 int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64; |
| 7702 if( nByte<128 ) nByte = 128; |
| 7703 |
| 7704 /* Grow the Fts5Hash.aSlot[] array if necessary. */ |
| 7705 if( (pHash->nEntry*2)>=pHash->nSlot ){ |
| 7706 int rc = fts5HashResize(pHash); |
| 7707 if( rc!=SQLITE_OK ) return rc; |
| 7708 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
| 7709 } |
| 7710 |
| 7711 /* Allocate new Fts5HashEntry and add it to the hash table. */ |
| 7712 p = (Fts5HashEntry*)sqlite3_malloc(nByte); |
| 7713 if( !p ) return SQLITE_NOMEM; |
| 7714 memset(p, 0, FTS5_HASHENTRYSIZE); |
| 7715 p->nAlloc = nByte; |
| 7716 p->zKey[0] = bByte; |
| 7717 memcpy(&p->zKey[1], pToken, nToken); |
| 7718 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) ); |
| 7719 p->nKey = nToken; |
| 7720 p->zKey[nToken+1] = '\0'; |
| 7721 p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE; |
| 7722 p->pHashNext = pHash->aSlot[iHash]; |
| 7723 pHash->aSlot[iHash] = p; |
| 7724 pHash->nEntry++; |
| 7725 |
| 7726 /* Add the first rowid field to the hash-entry */ |
| 7727 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); |
| 7728 p->iRowid = iRowid; |
| 7729 |
| 7730 p->iSzPoslist = p->nData; |
| 7731 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
| 7732 p->nData += 1; |
| 7733 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
| 7734 } |
| 7735 |
| 7736 nIncr += p->nData; |
| 7737 }else{ |
| 7738 |
| 7739 /* Appending to an existing hash-entry. Check that there is enough |
| 7740 ** space to append the largest possible new entry. Worst case scenario |
| 7741 ** is: |
| 7742 ** |
| 7743 ** + 9 bytes for a new rowid, |
| 7744 ** + 4 byte reserved for the "poslist size" varint. |
| 7745 ** + 1 byte for a "new column" byte, |
| 7746 ** + 3 bytes for a new column number (16-bit max) as a varint, |
| 7747 ** + 5 bytes for the new position offset (32-bit max). |
| 7748 */ |
| 7749 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ |
| 7750 int nNew = p->nAlloc * 2; |
| 7751 Fts5HashEntry *pNew; |
| 7752 Fts5HashEntry **pp; |
| 7753 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); |
| 7754 if( pNew==0 ) return SQLITE_NOMEM; |
| 7755 pNew->nAlloc = nNew; |
| 7756 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); |
| 7757 *pp = pNew; |
| 7758 p = pNew; |
| 7759 } |
| 7760 nIncr -= p->nData; |
| 7761 } |
| 7762 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); |
| 7763 |
| 7764 pPtr = (u8*)p; |
| 7765 |
| 7766 /* If this is a new rowid, append the 4-byte size field for the previous |
| 7767 ** entry, and the new rowid for this entry. */ |
| 7768 if( iRowid!=p->iRowid ){ |
| 7769 fts5HashAddPoslistSize(pHash, p); |
| 7770 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); |
| 7771 p->iRowid = iRowid; |
| 7772 bNew = 1; |
| 7773 p->iSzPoslist = p->nData; |
| 7774 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
| 7775 p->nData += 1; |
| 7776 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
| 7777 p->iPos = 0; |
| 7778 } |
| 7779 } |
| 7780 |
| 7781 if( iCol>=0 ){ |
| 7782 if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
| 7783 p->bContent = 1; |
| 7784 }else{ |
| 7785 /* Append a new column value, if necessary */ |
| 7786 assert( iCol>=p->iCol ); |
| 7787 if( iCol!=p->iCol ){ |
| 7788 if( pHash->eDetail==FTS5_DETAIL_FULL ){ |
| 7789 pPtr[p->nData++] = 0x01; |
| 7790 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); |
| 7791 p->iCol = (i16)iCol; |
| 7792 p->iPos = 0; |
| 7793 }else{ |
| 7794 bNew = 1; |
| 7795 p->iCol = (i16)(iPos = iCol); |
| 7796 } |
| 7797 } |
| 7798 |
| 7799 /* Append the new position offset, if necessary */ |
| 7800 if( bNew ){ |
| 7801 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); |
| 7802 p->iPos = iPos; |
| 7803 } |
| 7804 } |
| 7805 }else{ |
| 7806 /* This is a delete. Set the delete flag. */ |
| 7807 p->bDel = 1; |
| 7808 } |
| 7809 |
| 7810 nIncr += p->nData; |
| 7811 *pHash->pnByte += nIncr; |
| 7812 return SQLITE_OK; |
| 7813 } |
| 7814 |
| 7815 |
| 7816 /* |
| 7817 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, |
| 7818 ** each sorted in key order. This function merges the two lists into a |
| 7819 ** single list and returns a pointer to its first element. |
| 7820 */ |
| 7821 static Fts5HashEntry *fts5HashEntryMerge( |
| 7822 Fts5HashEntry *pLeft, |
| 7823 Fts5HashEntry *pRight |
| 7824 ){ |
| 7825 Fts5HashEntry *p1 = pLeft; |
| 7826 Fts5HashEntry *p2 = pRight; |
| 7827 Fts5HashEntry *pRet = 0; |
| 7828 Fts5HashEntry **ppOut = &pRet; |
| 7829 |
| 7830 while( p1 || p2 ){ |
| 7831 if( p1==0 ){ |
| 7832 *ppOut = p2; |
| 7833 p2 = 0; |
| 7834 }else if( p2==0 ){ |
| 7835 *ppOut = p1; |
| 7836 p1 = 0; |
| 7837 }else{ |
| 7838 int i = 0; |
| 7839 while( p1->zKey[i]==p2->zKey[i] ) i++; |
| 7840 |
| 7841 if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ |
| 7842 /* p2 is smaller */ |
| 7843 *ppOut = p2; |
| 7844 ppOut = &p2->pScanNext; |
| 7845 p2 = p2->pScanNext; |
| 7846 }else{ |
| 7847 /* p1 is smaller */ |
| 7848 *ppOut = p1; |
| 7849 ppOut = &p1->pScanNext; |
| 7850 p1 = p1->pScanNext; |
| 7851 } |
| 7852 *ppOut = 0; |
| 7853 } |
| 7854 } |
| 7855 |
| 7856 return pRet; |
| 7857 } |
| 7858 |
| 7859 /* |
| 7860 ** Extract all tokens from hash table iHash and link them into a list |
| 7861 ** in sorted order. The hash table is cleared before returning. It is |
| 7862 ** the responsibility of the caller to free the elements of the returned |
| 7863 ** list. |
| 7864 */ |
| 7865 static int fts5HashEntrySort( |
| 7866 Fts5Hash *pHash, |
| 7867 const char *pTerm, int nTerm, /* Query prefix, if any */ |
| 7868 Fts5HashEntry **ppSorted |
| 7869 ){ |
| 7870 const int nMergeSlot = 32; |
| 7871 Fts5HashEntry **ap; |
| 7872 Fts5HashEntry *pList; |
| 7873 int iSlot; |
| 7874 int i; |
| 7875 |
| 7876 *ppSorted = 0; |
| 7877 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); |
| 7878 if( !ap ) return SQLITE_NOMEM; |
| 7879 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); |
| 7880 |
| 7881 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ |
| 7882 Fts5HashEntry *pIter; |
| 7883 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ |
| 7884 if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){ |
| 7885 Fts5HashEntry *pEntry = pIter; |
| 7886 pEntry->pScanNext = 0; |
| 7887 for(i=0; ap[i]; i++){ |
| 7888 pEntry = fts5HashEntryMerge(pEntry, ap[i]); |
| 7889 ap[i] = 0; |
| 7890 } |
| 7891 ap[i] = pEntry; |
| 7892 } |
| 7893 } |
| 7894 } |
| 7895 |
| 7896 pList = 0; |
| 7897 for(i=0; i<nMergeSlot; i++){ |
| 7898 pList = fts5HashEntryMerge(pList, ap[i]); |
| 7899 } |
| 7900 |
| 7901 pHash->nEntry = 0; |
| 7902 sqlite3_free(ap); |
| 7903 *ppSorted = pList; |
| 7904 return SQLITE_OK; |
| 7905 } |
| 7906 |
| 7907 /* |
| 7908 ** Query the hash table for a doclist associated with term pTerm/nTerm. |
| 7909 */ |
| 7910 static int sqlite3Fts5HashQuery( |
| 7911 Fts5Hash *pHash, /* Hash table to query */ |
| 7912 const char *pTerm, int nTerm, /* Query term */ |
| 7913 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ |
| 7914 int *pnDoclist /* OUT: Size of doclist in bytes */ |
| 7915 ){ |
| 7916 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); |
| 7917 Fts5HashEntry *p; |
| 7918 |
| 7919 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
| 7920 if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; |
| 7921 } |
| 7922 |
| 7923 if( p ){ |
| 7924 fts5HashAddPoslistSize(pHash, p); |
| 7925 *ppDoclist = (const u8*)&p->zKey[nTerm+1]; |
| 7926 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); |
| 7927 }else{ |
| 7928 *ppDoclist = 0; |
| 7929 *pnDoclist = 0; |
| 7930 } |
| 7931 |
| 7932 return SQLITE_OK; |
| 7933 } |
| 7934 |
| 7935 static int sqlite3Fts5HashScanInit( |
| 7936 Fts5Hash *p, /* Hash table to query */ |
| 7937 const char *pTerm, int nTerm /* Query prefix */ |
| 7938 ){ |
| 7939 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); |
| 7940 } |
| 7941 |
| 7942 static void sqlite3Fts5HashScanNext(Fts5Hash *p){ |
| 7943 assert( !sqlite3Fts5HashScanEof(p) ); |
| 7944 p->pScan = p->pScan->pScanNext; |
| 7945 } |
| 7946 |
| 7947 static int sqlite3Fts5HashScanEof(Fts5Hash *p){ |
| 7948 return (p->pScan==0); |
| 7949 } |
| 7950 |
| 7951 static void sqlite3Fts5HashScanEntry( |
| 7952 Fts5Hash *pHash, |
| 7953 const char **pzTerm, /* OUT: term (nul-terminated) */ |
| 7954 const u8 **ppDoclist, /* OUT: pointer to doclist */ |
| 7955 int *pnDoclist /* OUT: size of doclist in bytes */ |
| 7956 ){ |
| 7957 Fts5HashEntry *p; |
| 7958 if( (p = pHash->pScan) ){ |
| 7959 int nTerm = (int)strlen(p->zKey); |
| 7960 fts5HashAddPoslistSize(pHash, p); |
| 7961 *pzTerm = p->zKey; |
| 7962 *ppDoclist = (const u8*)&p->zKey[nTerm+1]; |
| 7963 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); |
| 7964 }else{ |
| 7965 *pzTerm = 0; |
| 7966 *ppDoclist = 0; |
| 7967 *pnDoclist = 0; |
| 7968 } |
| 7969 } |
| 7970 |
| 7971 |
| 7972 /* |
| 7973 ** 2014 May 31 |
| 7974 ** |
| 7975 ** The author disclaims copyright to this source code. In place of |
| 7976 ** a legal notice, here is a blessing: |
| 7977 ** |
| 7978 ** May you do good and not evil. |
| 7979 ** May you find forgiveness for yourself and forgive others. |
| 7980 ** May you share freely, never taking more than you give. |
| 7981 ** |
| 7982 ****************************************************************************** |
| 7983 ** |
| 7984 ** Low level access to the FTS index stored in the database file. The |
| 7985 ** routines in this file file implement all read and write access to the |
| 7986 ** %_data table. Other parts of the system access this functionality via |
| 7987 ** the interface defined in fts5Int.h. |
| 7988 */ |
| 7989 |
| 7990 |
| 7991 /* #include "fts5Int.h" */ |
| 7992 |
| 7993 /* |
| 7994 ** Overview: |
| 7995 ** |
| 7996 ** The %_data table contains all the FTS indexes for an FTS5 virtual table. |
| 7997 ** As well as the main term index, there may be up to 31 prefix indexes. |
| 7998 ** The format is similar to FTS3/4, except that: |
| 7999 ** |
| 8000 ** * all segment b-tree leaf data is stored in fixed size page records |
| 8001 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is |
| 8002 ** taken to ensure it is possible to iterate in either direction through |
| 8003 ** the entries in a doclist, or to seek to a specific entry within a |
| 8004 ** doclist, without loading it into memory. |
| 8005 ** |
| 8006 ** * large doclists that span many pages have associated "doclist index" |
| 8007 ** records that contain a copy of the first rowid on each page spanned by |
| 8008 ** the doclist. This is used to speed up seek operations, and merges of |
| 8009 ** large doclists with very small doclists. |
| 8010 ** |
| 8011 ** * extra fields in the "structure record" record the state of ongoing |
| 8012 ** incremental merge operations. |
| 8013 ** |
| 8014 */ |
| 8015 |
| 8016 |
| 8017 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ |
| 8018 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ |
| 8019 |
| 8020 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ |
| 8021 |
| 8022 #define FTS5_MAIN_PREFIX '0' |
| 8023 |
| 8024 #if FTS5_MAX_PREFIX_INDEXES > 31 |
| 8025 # error "FTS5_MAX_PREFIX_INDEXES is too large" |
| 8026 #endif |
| 8027 |
| 8028 /* |
| 8029 ** Details: |
| 8030 ** |
| 8031 ** The %_data table managed by this module, |
| 8032 ** |
| 8033 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); |
| 8034 ** |
| 8035 ** , contains the following 5 types of records. See the comments surrounding |
| 8036 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are |
| 8037 ** assigned to each fo them. |
| 8038 ** |
| 8039 ** 1. Structure Records: |
| 8040 ** |
| 8041 ** The set of segments that make up an index - the index structure - are |
| 8042 ** recorded in a single record within the %_data table. The record consists |
| 8043 ** of a single 32-bit configuration cookie value followed by a list of |
| 8044 ** SQLite varints. If the FTS table features more than one index (because |
| 8045 ** there are one or more prefix indexes), it is guaranteed that all share |
| 8046 ** the same cookie value. |
| 8047 ** |
| 8048 ** Immediately following the configuration cookie, the record begins with |
| 8049 ** three varints: |
| 8050 ** |
| 8051 ** + number of levels, |
| 8052 ** + total number of segments on all levels, |
| 8053 ** + value of write counter. |
| 8054 ** |
| 8055 ** Then, for each level from 0 to nMax: |
| 8056 ** |
| 8057 ** + number of input segments in ongoing merge. |
| 8058 ** + total number of segments in level. |
| 8059 ** + for each segment from oldest to newest: |
| 8060 ** + segment id (always > 0) |
| 8061 ** + first leaf page number (often 1, always greater than 0) |
| 8062 ** + final leaf page number |
| 8063 ** |
| 8064 ** 2. The Averages Record: |
| 8065 ** |
| 8066 ** A single record within the %_data table. The data is a list of varints. |
| 8067 ** The first value is the number of rows in the index. Then, for each column |
| 8068 ** from left to right, the total number of tokens in the column for all |
| 8069 ** rows of the table. |
| 8070 ** |
| 8071 ** 3. Segment leaves: |
| 8072 ** |
| 8073 ** TERM/DOCLIST FORMAT: |
| 8074 ** |
| 8075 ** Most of each segment leaf is taken up by term/doclist data. The |
| 8076 ** general format of term/doclist, starting with the first term |
| 8077 ** on the leaf page, is: |
| 8078 ** |
| 8079 ** varint : size of first term |
| 8080 ** blob: first term data |
| 8081 ** doclist: first doclist |
| 8082 ** zero-or-more { |
| 8083 ** varint: number of bytes in common with previous term |
| 8084 ** varint: number of bytes of new term data (nNew) |
| 8085 ** blob: nNew bytes of new term data |
| 8086 ** doclist: next doclist |
| 8087 ** } |
| 8088 ** |
| 8089 ** doclist format: |
| 8090 ** |
| 8091 ** varint: first rowid |
| 8092 ** poslist: first poslist |
| 8093 ** zero-or-more { |
| 8094 ** varint: rowid delta (always > 0) |
| 8095 ** poslist: next poslist |
| 8096 ** } |
| 8097 ** |
| 8098 ** poslist format: |
| 8099 ** |
| 8100 ** varint: size of poslist in bytes multiplied by 2, not including |
| 8101 ** this field. Plus 1 if this entry carries the "delete" flag. |
| 8102 ** collist: collist for column 0 |
| 8103 ** zero-or-more { |
| 8104 ** 0x01 byte |
| 8105 ** varint: column number (I) |
| 8106 ** collist: collist for column I |
| 8107 ** } |
| 8108 ** |
| 8109 ** collist format: |
| 8110 ** |
| 8111 ** varint: first offset + 2 |
| 8112 ** zero-or-more { |
| 8113 ** varint: offset delta + 2 |
| 8114 ** } |
| 8115 ** |
| 8116 ** PAGE FORMAT |
| 8117 ** |
| 8118 ** Each leaf page begins with a 4-byte header containing 2 16-bit |
| 8119 ** unsigned integer fields in big-endian format. They are: |
| 8120 ** |
| 8121 ** * The byte offset of the first rowid on the page, if it exists |
| 8122 ** and occurs before the first term (otherwise 0). |
| 8123 ** |
| 8124 ** * The byte offset of the start of the page footer. If the page |
| 8125 ** footer is 0 bytes in size, then this field is the same as the |
| 8126 ** size of the leaf page in bytes. |
| 8127 ** |
| 8128 ** The page footer consists of a single varint for each term located |
| 8129 ** on the page. Each varint is the byte offset of the current term |
| 8130 ** within the page, delta-compressed against the previous value. In |
| 8131 ** other words, the first varint in the footer is the byte offset of |
| 8132 ** the first term, the second is the byte offset of the second less that |
| 8133 ** of the first, and so on. |
| 8134 ** |
| 8135 ** The term/doclist format described above is accurate if the entire |
| 8136 ** term/doclist data fits on a single leaf page. If this is not the case, |
| 8137 ** the format is changed in two ways: |
| 8138 ** |
| 8139 ** + if the first rowid on a page occurs before the first term, it |
| 8140 ** is stored as a literal value: |
| 8141 ** |
| 8142 ** varint: first rowid |
| 8143 ** |
| 8144 ** + the first term on each page is stored in the same way as the |
| 8145 ** very first term of the segment: |
| 8146 ** |
| 8147 ** varint : size of first term |
| 8148 ** blob: first term data |
| 8149 ** |
| 8150 ** 5. Segment doclist indexes: |
| 8151 ** |
| 8152 ** Doclist indexes are themselves b-trees, however they usually consist of |
| 8153 ** a single leaf record only. The format of each doclist index leaf page |
| 8154 ** is: |
| 8155 ** |
| 8156 ** * Flags byte. Bits are: |
| 8157 ** 0x01: Clear if leaf is also the root page, otherwise set. |
| 8158 ** |
| 8159 ** * Page number of fts index leaf page. As a varint. |
| 8160 ** |
| 8161 ** * First rowid on page indicated by previous field. As a varint. |
| 8162 ** |
| 8163 ** * A list of varints, one for each subsequent termless page. A |
| 8164 ** positive delta if the termless page contains at least one rowid, |
| 8165 ** or an 0x00 byte otherwise. |
| 8166 ** |
| 8167 ** Internal doclist index nodes are: |
| 8168 ** |
| 8169 ** * Flags byte. Bits are: |
| 8170 ** 0x01: Clear for root page, otherwise set. |
| 8171 ** |
| 8172 ** * Page number of first child page. As a varint. |
| 8173 ** |
| 8174 ** * Copy of first rowid on page indicated by previous field. As a varint. |
| 8175 ** |
| 8176 ** * A list of delta-encoded varints - the first rowid on each subsequent |
| 8177 ** child page. |
| 8178 ** |
| 8179 */ |
| 8180 |
| 8181 /* |
| 8182 ** Rowids for the averages and structure records in the %_data table. |
| 8183 */ |
| 8184 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ |
| 8185 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ |
| 8186 |
| 8187 /* |
| 8188 ** Macros determining the rowids used by segment leaves and dlidx leaves |
| 8189 ** and nodes. All nodes and leaves are stored in the %_data table with large |
| 8190 ** positive rowids. |
| 8191 ** |
| 8192 ** Each segment has a unique non-zero 16-bit id. |
| 8193 ** |
| 8194 ** The rowid for each segment leaf is found by passing the segment id and |
| 8195 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered |
| 8196 ** sequentially starting from 1. |
| 8197 */ |
| 8198 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ |
| 8199 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ |
| 8200 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ |
| 8201 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ |
| 8202 |
| 8203 #define fts5_dri(segid, dlidx, height, pgno) ( \ |
| 8204 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ |
| 8205 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ |
| 8206 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ |
| 8207 ((i64)(pgno)) \ |
| 8208 ) |
| 8209 |
| 8210 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) |
| 8211 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) |
| 8212 |
| 8213 /* |
| 8214 ** Maximum segments permitted in a single index |
| 8215 */ |
| 8216 #define FTS5_MAX_SEGMENT 2000 |
| 8217 |
| 8218 #ifdef SQLITE_DEBUG |
| 8219 static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } |
| 8220 #endif |
| 8221 |
| 8222 |
| 8223 /* |
| 8224 ** Each time a blob is read from the %_data table, it is padded with this |
| 8225 ** many zero bytes. This makes it easier to decode the various record formats |
| 8226 ** without overreading if the records are corrupt. |
| 8227 */ |
| 8228 #define FTS5_DATA_ZERO_PADDING 8 |
| 8229 #define FTS5_DATA_PADDING 20 |
| 8230 |
| 8231 typedef struct Fts5Data Fts5Data; |
| 8232 typedef struct Fts5DlidxIter Fts5DlidxIter; |
| 8233 typedef struct Fts5DlidxLvl Fts5DlidxLvl; |
| 8234 typedef struct Fts5DlidxWriter Fts5DlidxWriter; |
| 8235 typedef struct Fts5Iter Fts5Iter; |
| 8236 typedef struct Fts5PageWriter Fts5PageWriter; |
| 8237 typedef struct Fts5SegIter Fts5SegIter; |
| 8238 typedef struct Fts5DoclistIter Fts5DoclistIter; |
| 8239 typedef struct Fts5SegWriter Fts5SegWriter; |
| 8240 typedef struct Fts5Structure Fts5Structure; |
| 8241 typedef struct Fts5StructureLevel Fts5StructureLevel; |
| 8242 typedef struct Fts5StructureSegment Fts5StructureSegment; |
| 8243 |
| 8244 struct Fts5Data { |
| 8245 u8 *p; /* Pointer to buffer containing record */ |
| 8246 int nn; /* Size of record in bytes */ |
| 8247 int szLeaf; /* Size of leaf without page-index */ |
| 8248 }; |
| 8249 |
| 8250 /* |
| 8251 ** One object per %_data table. |
| 8252 */ |
| 8253 struct Fts5Index { |
| 8254 Fts5Config *pConfig; /* Virtual table configuration */ |
| 8255 char *zDataTbl; /* Name of %_data table */ |
| 8256 int nWorkUnit; /* Leaf pages in a "unit" of work */ |
| 8257 |
| 8258 /* |
| 8259 ** Variables related to the accumulation of tokens and doclists within the |
| 8260 ** in-memory hash tables before they are flushed to disk. |
| 8261 */ |
| 8262 Fts5Hash *pHash; /* Hash table for in-memory data */ |
| 8263 int nPendingData; /* Current bytes of pending data */ |
| 8264 i64 iWriteRowid; /* Rowid for current doc being written */ |
| 8265 int bDelete; /* Current write is a delete */ |
| 8266 |
| 8267 /* Error state. */ |
| 8268 int rc; /* Current error code */ |
| 8269 |
| 8270 /* State used by the fts5DataXXX() functions. */ |
| 8271 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ |
| 8272 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ |
| 8273 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ |
| 8274 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ |
| 8275 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ |
| 8276 sqlite3_stmt *pIdxSelect; |
| 8277 int nRead; /* Total number of blocks read */ |
| 8278 |
| 8279 sqlite3_stmt *pDataVersion; |
| 8280 i64 iStructVersion; /* data_version when pStruct read */ |
| 8281 Fts5Structure *pStruct; /* Current db structure (or NULL) */ |
| 8282 }; |
| 8283 |
| 8284 struct Fts5DoclistIter { |
| 8285 u8 *aEof; /* Pointer to 1 byte past end of doclist */ |
| 8286 |
| 8287 /* Output variables. aPoslist==0 at EOF */ |
| 8288 i64 iRowid; |
| 8289 u8 *aPoslist; |
| 8290 int nPoslist; |
| 8291 int nSize; |
| 8292 }; |
| 8293 |
| 8294 /* |
| 8295 ** The contents of the "structure" record for each index are represented |
| 8296 ** using an Fts5Structure record in memory. Which uses instances of the |
| 8297 ** other Fts5StructureXXX types as components. |
| 8298 */ |
| 8299 struct Fts5StructureSegment { |
| 8300 int iSegid; /* Segment id */ |
| 8301 int pgnoFirst; /* First leaf page number in segment */ |
| 8302 int pgnoLast; /* Last leaf page number in segment */ |
| 8303 }; |
| 8304 struct Fts5StructureLevel { |
| 8305 int nMerge; /* Number of segments in incr-merge */ |
| 8306 int nSeg; /* Total number of segments on level */ |
| 8307 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ |
| 8308 }; |
| 8309 struct Fts5Structure { |
| 8310 int nRef; /* Object reference count */ |
| 8311 u64 nWriteCounter; /* Total leaves written to level 0 */ |
| 8312 int nSegment; /* Total segments in this structure */ |
| 8313 int nLevel; /* Number of levels in this index */ |
| 8314 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */ |
| 8315 }; |
| 8316 |
| 8317 /* |
| 8318 ** An object of type Fts5SegWriter is used to write to segments. |
| 8319 */ |
| 8320 struct Fts5PageWriter { |
| 8321 int pgno; /* Page number for this page */ |
| 8322 int iPrevPgidx; /* Previous value written into pgidx */ |
| 8323 Fts5Buffer buf; /* Buffer containing leaf data */ |
| 8324 Fts5Buffer pgidx; /* Buffer containing page-index */ |
| 8325 Fts5Buffer term; /* Buffer containing previous term on page */ |
| 8326 }; |
| 8327 struct Fts5DlidxWriter { |
| 8328 int pgno; /* Page number for this page */ |
| 8329 int bPrevValid; /* True if iPrev is valid */ |
| 8330 i64 iPrev; /* Previous rowid value written to page */ |
| 8331 Fts5Buffer buf; /* Buffer containing page data */ |
| 8332 }; |
| 8333 struct Fts5SegWriter { |
| 8334 int iSegid; /* Segid to write to */ |
| 8335 Fts5PageWriter writer; /* PageWriter object */ |
| 8336 i64 iPrevRowid; /* Previous rowid written to current leaf */ |
| 8337 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ |
| 8338 u8 bFirstRowidInPage; /* True if next rowid is first in page */ |
| 8339 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ |
| 8340 u8 bFirstTermInPage; /* True if next term will be first in leaf */ |
| 8341 int nLeafWritten; /* Number of leaf pages written */ |
| 8342 int nEmpty; /* Number of contiguous term-less nodes */ |
| 8343 |
| 8344 int nDlidx; /* Allocated size of aDlidx[] array */ |
| 8345 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ |
| 8346 |
| 8347 /* Values to insert into the %_idx table */ |
| 8348 Fts5Buffer btterm; /* Next term to insert into %_idx table */ |
| 8349 int iBtPage; /* Page number corresponding to btterm */ |
| 8350 }; |
| 8351 |
| 8352 typedef struct Fts5CResult Fts5CResult; |
| 8353 struct Fts5CResult { |
| 8354 u16 iFirst; /* aSeg[] index of firstest iterator */ |
| 8355 u8 bTermEq; /* True if the terms are equal */ |
| 8356 }; |
| 8357 |
| 8358 /* |
| 8359 ** Object for iterating through a single segment, visiting each term/rowid |
| 8360 ** pair in the segment. |
| 8361 ** |
| 8362 ** pSeg: |
| 8363 ** The segment to iterate through. |
| 8364 ** |
| 8365 ** iLeafPgno: |
| 8366 ** Current leaf page number within segment. |
| 8367 ** |
| 8368 ** iLeafOffset: |
| 8369 ** Byte offset within the current leaf that is the first byte of the |
| 8370 ** position list data (one byte passed the position-list size field). |
| 8371 ** rowid field of the current entry. Usually this is the size field of the |
| 8372 ** position list data. The exception is if the rowid for the current entry |
| 8373 ** is the last thing on the leaf page. |
| 8374 ** |
| 8375 ** pLeaf: |
| 8376 ** Buffer containing current leaf page data. Set to NULL at EOF. |
| 8377 ** |
| 8378 ** iTermLeafPgno, iTermLeafOffset: |
| 8379 ** Leaf page number containing the last term read from the segment. And |
| 8380 ** the offset immediately following the term data. |
| 8381 ** |
| 8382 ** flags: |
| 8383 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: |
| 8384 ** |
| 8385 ** FTS5_SEGITER_ONETERM: |
| 8386 ** If set, set the iterator to point to EOF after the current doclist |
| 8387 ** has been exhausted. Do not proceed to the next term in the segment. |
| 8388 ** |
| 8389 ** FTS5_SEGITER_REVERSE: |
| 8390 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If |
| 8391 ** it is set, iterate through rowid in descending order instead of the |
| 8392 ** default ascending order. |
| 8393 ** |
| 8394 ** iRowidOffset/nRowidOffset/aRowidOffset: |
| 8395 ** These are used if the FTS5_SEGITER_REVERSE flag is set. |
| 8396 ** |
| 8397 ** For each rowid on the page corresponding to the current term, the |
| 8398 ** corresponding aRowidOffset[] entry is set to the byte offset of the |
| 8399 ** start of the "position-list-size" field within the page. |
| 8400 ** |
| 8401 ** iTermIdx: |
| 8402 ** Index of current term on iTermLeafPgno. |
| 8403 */ |
| 8404 struct Fts5SegIter { |
| 8405 Fts5StructureSegment *pSeg; /* Segment to iterate through */ |
| 8406 int flags; /* Mask of configuration flags */ |
| 8407 int iLeafPgno; /* Current leaf page number */ |
| 8408 Fts5Data *pLeaf; /* Current leaf data */ |
| 8409 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ |
| 8410 int iLeafOffset; /* Byte offset within current leaf */ |
| 8411 |
| 8412 /* Next method */ |
| 8413 void (*xNext)(Fts5Index*, Fts5SegIter*, int*); |
| 8414 |
| 8415 /* The page and offset from which the current term was read. The offset |
| 8416 ** is the offset of the first rowid in the current doclist. */ |
| 8417 int iTermLeafPgno; |
| 8418 int iTermLeafOffset; |
| 8419 |
| 8420 int iPgidxOff; /* Next offset in pgidx */ |
| 8421 int iEndofDoclist; |
| 8422 |
| 8423 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ |
| 8424 int iRowidOffset; /* Current entry in aRowidOffset[] */ |
| 8425 int nRowidOffset; /* Allocated size of aRowidOffset[] array */ |
| 8426 int *aRowidOffset; /* Array of offset to rowid fields */ |
| 8427 |
| 8428 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ |
| 8429 |
| 8430 /* Variables populated based on current entry. */ |
| 8431 Fts5Buffer term; /* Current term */ |
| 8432 i64 iRowid; /* Current rowid */ |
| 8433 int nPos; /* Number of bytes in current position list */ |
| 8434 u8 bDel; /* True if the delete flag is set */ |
| 8435 }; |
| 8436 |
| 8437 /* |
| 8438 ** Argument is a pointer to an Fts5Data structure that contains a |
| 8439 ** leaf page. |
| 8440 */ |
| 8441 #define ASSERT_SZLEAF_OK(x) assert( \ |
| 8442 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ |
| 8443 ) |
| 8444 |
| 8445 #define FTS5_SEGITER_ONETERM 0x01 |
| 8446 #define FTS5_SEGITER_REVERSE 0x02 |
| 8447 |
| 8448 /* |
| 8449 ** Argument is a pointer to an Fts5Data structure that contains a leaf |
| 8450 ** page. This macro evaluates to true if the leaf contains no terms, or |
| 8451 ** false if it contains at least one term. |
| 8452 */ |
| 8453 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) |
| 8454 |
| 8455 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) |
| 8456 |
| 8457 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) |
| 8458 |
| 8459 /* |
| 8460 ** Object for iterating through the merged results of one or more segments, |
| 8461 ** visiting each term/rowid pair in the merged data. |
| 8462 ** |
| 8463 ** nSeg is always a power of two greater than or equal to the number of |
| 8464 ** segments that this object is merging data from. Both the aSeg[] and |
| 8465 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded |
| 8466 ** with zeroed objects - these are handled as if they were iterators opened |
| 8467 ** on empty segments. |
| 8468 ** |
| 8469 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an |
| 8470 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the |
| 8471 ** comparison in this context is the index of the iterator that currently |
| 8472 ** points to the smaller term/rowid combination. Iterators at EOF are |
| 8473 ** considered to be greater than all other iterators. |
| 8474 ** |
| 8475 ** aFirst[1] contains the index in aSeg[] of the iterator that points to |
| 8476 ** the smallest key overall. aFirst[0] is unused. |
| 8477 ** |
| 8478 ** poslist: |
| 8479 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. |
| 8480 ** There is no way to tell if this is populated or not. |
| 8481 */ |
| 8482 struct Fts5Iter { |
| 8483 Fts5IndexIter base; /* Base class containing output vars */ |
| 8484 |
| 8485 Fts5Index *pIndex; /* Index that owns this iterator */ |
| 8486 Fts5Structure *pStruct; /* Database structure for this iterator */ |
| 8487 Fts5Buffer poslist; /* Buffer containing current poslist */ |
| 8488 Fts5Colset *pColset; /* Restrict matches to these columns */ |
| 8489 |
| 8490 /* Invoked to set output variables. */ |
| 8491 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); |
| 8492 |
| 8493 int nSeg; /* Size of aSeg[] array */ |
| 8494 int bRev; /* True to iterate in reverse order */ |
| 8495 u8 bSkipEmpty; /* True to skip deleted entries */ |
| 8496 |
| 8497 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ |
| 8498 Fts5CResult *aFirst; /* Current merge state (see above) */ |
| 8499 Fts5SegIter aSeg[1]; /* Array of segment iterators */ |
| 8500 }; |
| 8501 |
| 8502 |
| 8503 /* |
| 8504 ** An instance of the following type is used to iterate through the contents |
| 8505 ** of a doclist-index record. |
| 8506 ** |
| 8507 ** pData: |
| 8508 ** Record containing the doclist-index data. |
| 8509 ** |
| 8510 ** bEof: |
| 8511 ** Set to true once iterator has reached EOF. |
| 8512 ** |
| 8513 ** iOff: |
| 8514 ** Set to the current offset within record pData. |
| 8515 */ |
| 8516 struct Fts5DlidxLvl { |
| 8517 Fts5Data *pData; /* Data for current page of this level */ |
| 8518 int iOff; /* Current offset into pData */ |
| 8519 int bEof; /* At EOF already */ |
| 8520 int iFirstOff; /* Used by reverse iterators */ |
| 8521 |
| 8522 /* Output variables */ |
| 8523 int iLeafPgno; /* Page number of current leaf page */ |
| 8524 i64 iRowid; /* First rowid on leaf iLeafPgno */ |
| 8525 }; |
| 8526 struct Fts5DlidxIter { |
| 8527 int nLvl; |
| 8528 int iSegid; |
| 8529 Fts5DlidxLvl aLvl[1]; |
| 8530 }; |
| 8531 |
| 8532 static void fts5PutU16(u8 *aOut, u16 iVal){ |
| 8533 aOut[0] = (iVal>>8); |
| 8534 aOut[1] = (iVal&0xFF); |
| 8535 } |
| 8536 |
| 8537 static u16 fts5GetU16(const u8 *aIn){ |
| 8538 return ((u16)aIn[0] << 8) + aIn[1]; |
| 8539 } |
| 8540 |
| 8541 /* |
| 8542 ** Allocate and return a buffer at least nByte bytes in size. |
| 8543 ** |
| 8544 ** If an OOM error is encountered, return NULL and set the error code in |
| 8545 ** the Fts5Index handle passed as the first argument. |
| 8546 */ |
| 8547 static void *fts5IdxMalloc(Fts5Index *p, int nByte){ |
| 8548 return sqlite3Fts5MallocZero(&p->rc, nByte); |
| 8549 } |
| 8550 |
| 8551 /* |
| 8552 ** Compare the contents of the pLeft buffer with the pRight/nRight blob. |
| 8553 ** |
| 8554 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
| 8555 ** +ve if pRight is smaller than pLeft. In other words: |
| 8556 ** |
| 8557 ** res = *pLeft - *pRight |
| 8558 */ |
| 8559 #ifdef SQLITE_DEBUG |
| 8560 static int fts5BufferCompareBlob( |
| 8561 Fts5Buffer *pLeft, /* Left hand side of comparison */ |
| 8562 const u8 *pRight, int nRight /* Right hand side of comparison */ |
| 8563 ){ |
| 8564 int nCmp = MIN(pLeft->n, nRight); |
| 8565 int res = memcmp(pLeft->p, pRight, nCmp); |
| 8566 return (res==0 ? (pLeft->n - nRight) : res); |
| 8567 } |
| 8568 #endif |
| 8569 |
| 8570 /* |
| 8571 ** Compare the contents of the two buffers using memcmp(). If one buffer |
| 8572 ** is a prefix of the other, it is considered the lesser. |
| 8573 ** |
| 8574 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
| 8575 ** +ve if pRight is smaller than pLeft. In other words: |
| 8576 ** |
| 8577 ** res = *pLeft - *pRight |
| 8578 */ |
| 8579 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ |
| 8580 int nCmp = MIN(pLeft->n, pRight->n); |
| 8581 int res = memcmp(pLeft->p, pRight->p, nCmp); |
| 8582 return (res==0 ? (pLeft->n - pRight->n) : res); |
| 8583 } |
| 8584 |
| 8585 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ |
| 8586 int ret; |
| 8587 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); |
| 8588 return ret; |
| 8589 } |
| 8590 |
| 8591 /* |
| 8592 ** Close the read-only blob handle, if it is open. |
| 8593 */ |
| 8594 static void fts5CloseReader(Fts5Index *p){ |
| 8595 if( p->pReader ){ |
| 8596 sqlite3_blob *pReader = p->pReader; |
| 8597 p->pReader = 0; |
| 8598 sqlite3_blob_close(pReader); |
| 8599 } |
| 8600 } |
| 8601 |
| 8602 |
| 8603 /* |
| 8604 ** Retrieve a record from the %_data table. |
| 8605 ** |
| 8606 ** If an error occurs, NULL is returned and an error left in the |
| 8607 ** Fts5Index object. |
| 8608 */ |
| 8609 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ |
| 8610 Fts5Data *pRet = 0; |
| 8611 if( p->rc==SQLITE_OK ){ |
| 8612 int rc = SQLITE_OK; |
| 8613 |
| 8614 if( p->pReader ){ |
| 8615 /* This call may return SQLITE_ABORT if there has been a savepoint |
| 8616 ** rollback since it was last used. In this case a new blob handle |
| 8617 ** is required. */ |
| 8618 sqlite3_blob *pBlob = p->pReader; |
| 8619 p->pReader = 0; |
| 8620 rc = sqlite3_blob_reopen(pBlob, iRowid); |
| 8621 assert( p->pReader==0 ); |
| 8622 p->pReader = pBlob; |
| 8623 if( rc!=SQLITE_OK ){ |
| 8624 fts5CloseReader(p); |
| 8625 } |
| 8626 if( rc==SQLITE_ABORT ) rc = SQLITE_OK; |
| 8627 } |
| 8628 |
| 8629 /* If the blob handle is not open at this point, open it and seek |
| 8630 ** to the requested entry. */ |
| 8631 if( p->pReader==0 && rc==SQLITE_OK ){ |
| 8632 Fts5Config *pConfig = p->pConfig; |
| 8633 rc = sqlite3_blob_open(pConfig->db, |
| 8634 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader |
| 8635 ); |
| 8636 } |
| 8637 |
| 8638 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls |
| 8639 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. |
| 8640 ** All the reasons those functions might return SQLITE_ERROR - missing |
| 8641 ** table, missing row, non-blob/text in block column - indicate |
| 8642 ** backing store corruption. */ |
| 8643 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; |
| 8644 |
| 8645 if( rc==SQLITE_OK ){ |
| 8646 u8 *aOut = 0; /* Read blob data into this buffer */ |
| 8647 int nByte = sqlite3_blob_bytes(p->pReader); |
| 8648 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; |
| 8649 pRet = (Fts5Data*)sqlite3_malloc(nAlloc); |
| 8650 if( pRet ){ |
| 8651 pRet->nn = nByte; |
| 8652 aOut = pRet->p = (u8*)&pRet[1]; |
| 8653 }else{ |
| 8654 rc = SQLITE_NOMEM; |
| 8655 } |
| 8656 |
| 8657 if( rc==SQLITE_OK ){ |
| 8658 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); |
| 8659 } |
| 8660 if( rc!=SQLITE_OK ){ |
| 8661 sqlite3_free(pRet); |
| 8662 pRet = 0; |
| 8663 }else{ |
| 8664 /* TODO1: Fix this */ |
| 8665 pRet->szLeaf = fts5GetU16(&pRet->p[2]); |
| 8666 } |
| 8667 } |
| 8668 p->rc = rc; |
| 8669 p->nRead++; |
| 8670 } |
| 8671 |
| 8672 assert( (pRet==0)==(p->rc!=SQLITE_OK) ); |
| 8673 return pRet; |
| 8674 } |
| 8675 |
| 8676 /* |
| 8677 ** Release a reference to data record returned by an earlier call to |
| 8678 ** fts5DataRead(). |
| 8679 */ |
| 8680 static void fts5DataRelease(Fts5Data *pData){ |
| 8681 sqlite3_free(pData); |
| 8682 } |
| 8683 |
| 8684 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ |
| 8685 Fts5Data *pRet = fts5DataRead(p, iRowid); |
| 8686 if( pRet ){ |
| 8687 if( pRet->szLeaf>pRet->nn ){ |
| 8688 p->rc = FTS5_CORRUPT; |
| 8689 fts5DataRelease(pRet); |
| 8690 pRet = 0; |
| 8691 } |
| 8692 } |
| 8693 return pRet; |
| 8694 } |
| 8695 |
| 8696 static int fts5IndexPrepareStmt( |
| 8697 Fts5Index *p, |
| 8698 sqlite3_stmt **ppStmt, |
| 8699 char *zSql |
| 8700 ){ |
| 8701 if( p->rc==SQLITE_OK ){ |
| 8702 if( zSql ){ |
| 8703 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0); |
| 8704 }else{ |
| 8705 p->rc = SQLITE_NOMEM; |
| 8706 } |
| 8707 } |
| 8708 sqlite3_free(zSql); |
| 8709 return p->rc; |
| 8710 } |
| 8711 |
| 8712 |
| 8713 /* |
| 8714 ** INSERT OR REPLACE a record into the %_data table. |
| 8715 */ |
| 8716 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ |
| 8717 if( p->rc!=SQLITE_OK ) return; |
| 8718 |
| 8719 if( p->pWriter==0 ){ |
| 8720 Fts5Config *pConfig = p->pConfig; |
| 8721 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( |
| 8722 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", |
| 8723 pConfig->zDb, pConfig->zName |
| 8724 )); |
| 8725 if( p->rc ) return; |
| 8726 } |
| 8727 |
| 8728 sqlite3_bind_int64(p->pWriter, 1, iRowid); |
| 8729 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); |
| 8730 sqlite3_step(p->pWriter); |
| 8731 p->rc = sqlite3_reset(p->pWriter); |
| 8732 } |
| 8733 |
| 8734 /* |
| 8735 ** Execute the following SQL: |
| 8736 ** |
| 8737 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast |
| 8738 */ |
| 8739 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ |
| 8740 if( p->rc!=SQLITE_OK ) return; |
| 8741 |
| 8742 if( p->pDeleter==0 ){ |
| 8743 int rc; |
| 8744 Fts5Config *pConfig = p->pConfig; |
| 8745 char *zSql = sqlite3_mprintf( |
| 8746 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", |
| 8747 pConfig->zDb, pConfig->zName |
| 8748 ); |
| 8749 if( zSql==0 ){ |
| 8750 rc = SQLITE_NOMEM; |
| 8751 }else{ |
| 8752 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); |
| 8753 sqlite3_free(zSql); |
| 8754 } |
| 8755 if( rc!=SQLITE_OK ){ |
| 8756 p->rc = rc; |
| 8757 return; |
| 8758 } |
| 8759 } |
| 8760 |
| 8761 sqlite3_bind_int64(p->pDeleter, 1, iFirst); |
| 8762 sqlite3_bind_int64(p->pDeleter, 2, iLast); |
| 8763 sqlite3_step(p->pDeleter); |
| 8764 p->rc = sqlite3_reset(p->pDeleter); |
| 8765 } |
| 8766 |
| 8767 /* |
| 8768 ** Remove all records associated with segment iSegid. |
| 8769 */ |
| 8770 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ |
| 8771 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); |
| 8772 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; |
| 8773 fts5DataDelete(p, iFirst, iLast); |
| 8774 if( p->pIdxDeleter==0 ){ |
| 8775 Fts5Config *pConfig = p->pConfig; |
| 8776 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( |
| 8777 "DELETE FROM '%q'.'%q_idx' WHERE segid=?", |
| 8778 pConfig->zDb, pConfig->zName |
| 8779 )); |
| 8780 } |
| 8781 if( p->rc==SQLITE_OK ){ |
| 8782 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); |
| 8783 sqlite3_step(p->pIdxDeleter); |
| 8784 p->rc = sqlite3_reset(p->pIdxDeleter); |
| 8785 } |
| 8786 } |
| 8787 |
| 8788 /* |
| 8789 ** Release a reference to an Fts5Structure object returned by an earlier |
| 8790 ** call to fts5StructureRead() or fts5StructureDecode(). |
| 8791 */ |
| 8792 static void fts5StructureRelease(Fts5Structure *pStruct){ |
| 8793 if( pStruct && 0>=(--pStruct->nRef) ){ |
| 8794 int i; |
| 8795 assert( pStruct->nRef==0 ); |
| 8796 for(i=0; i<pStruct->nLevel; i++){ |
| 8797 sqlite3_free(pStruct->aLevel[i].aSeg); |
| 8798 } |
| 8799 sqlite3_free(pStruct); |
| 8800 } |
| 8801 } |
| 8802 |
| 8803 static void fts5StructureRef(Fts5Structure *pStruct){ |
| 8804 pStruct->nRef++; |
| 8805 } |
| 8806 |
| 8807 /* |
| 8808 ** Deserialize and return the structure record currently stored in serialized |
| 8809 ** form within buffer pData/nData. |
| 8810 ** |
| 8811 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
| 8812 ** are over-allocated by one slot. This allows the structure contents |
| 8813 ** to be more easily edited. |
| 8814 ** |
| 8815 ** If an error occurs, *ppOut is set to NULL and an SQLite error code |
| 8816 ** returned. Otherwise, *ppOut is set to point to the new object and |
| 8817 ** SQLITE_OK returned. |
| 8818 */ |
| 8819 static int fts5StructureDecode( |
| 8820 const u8 *pData, /* Buffer containing serialized structure */ |
| 8821 int nData, /* Size of buffer pData in bytes */ |
| 8822 int *piCookie, /* Configuration cookie value */ |
| 8823 Fts5Structure **ppOut /* OUT: Deserialized object */ |
| 8824 ){ |
| 8825 int rc = SQLITE_OK; |
| 8826 int i = 0; |
| 8827 int iLvl; |
| 8828 int nLevel = 0; |
| 8829 int nSegment = 0; |
| 8830 int nByte; /* Bytes of space to allocate at pRet */ |
| 8831 Fts5Structure *pRet = 0; /* Structure object to return */ |
| 8832 |
| 8833 /* Grab the cookie value */ |
| 8834 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); |
| 8835 i = 4; |
| 8836 |
| 8837 /* Read the total number of levels and segments from the start of the |
| 8838 ** structure record. */ |
| 8839 i += fts5GetVarint32(&pData[i], nLevel); |
| 8840 i += fts5GetVarint32(&pData[i], nSegment); |
| 8841 nByte = ( |
| 8842 sizeof(Fts5Structure) + /* Main structure */ |
| 8843 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */ |
| 8844 ); |
| 8845 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); |
| 8846 |
| 8847 if( pRet ){ |
| 8848 pRet->nRef = 1; |
| 8849 pRet->nLevel = nLevel; |
| 8850 pRet->nSegment = nSegment; |
| 8851 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); |
| 8852 |
| 8853 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ |
| 8854 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; |
| 8855 int nTotal = 0; |
| 8856 int iSeg; |
| 8857 |
| 8858 if( i>=nData ){ |
| 8859 rc = FTS5_CORRUPT; |
| 8860 }else{ |
| 8861 i += fts5GetVarint32(&pData[i], pLvl->nMerge); |
| 8862 i += fts5GetVarint32(&pData[i], nTotal); |
| 8863 assert( nTotal>=pLvl->nMerge ); |
| 8864 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, |
| 8865 nTotal * sizeof(Fts5StructureSegment) |
| 8866 ); |
| 8867 } |
| 8868 |
| 8869 if( rc==SQLITE_OK ){ |
| 8870 pLvl->nSeg = nTotal; |
| 8871 for(iSeg=0; iSeg<nTotal; iSeg++){ |
| 8872 if( i>=nData ){ |
| 8873 rc = FTS5_CORRUPT; |
| 8874 break; |
| 8875 } |
| 8876 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); |
| 8877 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); |
| 8878 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); |
| 8879 } |
| 8880 } |
| 8881 } |
| 8882 if( rc!=SQLITE_OK ){ |
| 8883 fts5StructureRelease(pRet); |
| 8884 pRet = 0; |
| 8885 } |
| 8886 } |
| 8887 |
| 8888 *ppOut = pRet; |
| 8889 return rc; |
| 8890 } |
| 8891 |
| 8892 /* |
| 8893 ** |
| 8894 */ |
| 8895 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ |
| 8896 if( *pRc==SQLITE_OK ){ |
| 8897 Fts5Structure *pStruct = *ppStruct; |
| 8898 int nLevel = pStruct->nLevel; |
| 8899 int nByte = ( |
| 8900 sizeof(Fts5Structure) + /* Main structure */ |
| 8901 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ |
| 8902 ); |
| 8903 |
| 8904 pStruct = sqlite3_realloc(pStruct, nByte); |
| 8905 if( pStruct ){ |
| 8906 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); |
| 8907 pStruct->nLevel++; |
| 8908 *ppStruct = pStruct; |
| 8909 }else{ |
| 8910 *pRc = SQLITE_NOMEM; |
| 8911 } |
| 8912 } |
| 8913 } |
| 8914 |
| 8915 /* |
| 8916 ** Extend level iLvl so that there is room for at least nExtra more |
| 8917 ** segments. |
| 8918 */ |
| 8919 static void fts5StructureExtendLevel( |
| 8920 int *pRc, |
| 8921 Fts5Structure *pStruct, |
| 8922 int iLvl, |
| 8923 int nExtra, |
| 8924 int bInsert |
| 8925 ){ |
| 8926 if( *pRc==SQLITE_OK ){ |
| 8927 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 8928 Fts5StructureSegment *aNew; |
| 8929 int nByte; |
| 8930 |
| 8931 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); |
| 8932 aNew = sqlite3_realloc(pLvl->aSeg, nByte); |
| 8933 if( aNew ){ |
| 8934 if( bInsert==0 ){ |
| 8935 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); |
| 8936 }else{ |
| 8937 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); |
| 8938 memmove(&aNew[nExtra], aNew, nMove); |
| 8939 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); |
| 8940 } |
| 8941 pLvl->aSeg = aNew; |
| 8942 }else{ |
| 8943 *pRc = SQLITE_NOMEM; |
| 8944 } |
| 8945 } |
| 8946 } |
| 8947 |
| 8948 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ |
| 8949 Fts5Structure *pRet = 0; |
| 8950 Fts5Config *pConfig = p->pConfig; |
| 8951 int iCookie; /* Configuration cookie */ |
| 8952 Fts5Data *pData; |
| 8953 |
| 8954 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); |
| 8955 if( p->rc==SQLITE_OK ){ |
| 8956 /* TODO: Do we need this if the leaf-index is appended? Probably... */ |
| 8957 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); |
| 8958 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); |
| 8959 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ |
| 8960 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); |
| 8961 } |
| 8962 fts5DataRelease(pData); |
| 8963 if( p->rc!=SQLITE_OK ){ |
| 8964 fts5StructureRelease(pRet); |
| 8965 pRet = 0; |
| 8966 } |
| 8967 } |
| 8968 |
| 8969 return pRet; |
| 8970 } |
| 8971 |
| 8972 static i64 fts5IndexDataVersion(Fts5Index *p){ |
| 8973 i64 iVersion = 0; |
| 8974 |
| 8975 if( p->rc==SQLITE_OK ){ |
| 8976 if( p->pDataVersion==0 ){ |
| 8977 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, |
| 8978 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) |
| 8979 ); |
| 8980 if( p->rc ) return 0; |
| 8981 } |
| 8982 |
| 8983 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ |
| 8984 iVersion = sqlite3_column_int64(p->pDataVersion, 0); |
| 8985 } |
| 8986 p->rc = sqlite3_reset(p->pDataVersion); |
| 8987 } |
| 8988 |
| 8989 return iVersion; |
| 8990 } |
| 8991 |
| 8992 /* |
| 8993 ** Read, deserialize and return the structure record. |
| 8994 ** |
| 8995 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
| 8996 ** are over-allocated as described for function fts5StructureDecode() |
| 8997 ** above. |
| 8998 ** |
| 8999 ** If an error occurs, NULL is returned and an error code left in the |
| 9000 ** Fts5Index handle. If an error has already occurred when this function |
| 9001 ** is called, it is a no-op. |
| 9002 */ |
| 9003 static Fts5Structure *fts5StructureRead(Fts5Index *p){ |
| 9004 |
| 9005 if( p->pStruct==0 ){ |
| 9006 p->iStructVersion = fts5IndexDataVersion(p); |
| 9007 if( p->rc==SQLITE_OK ){ |
| 9008 p->pStruct = fts5StructureReadUncached(p); |
| 9009 } |
| 9010 } |
| 9011 |
| 9012 #if 0 |
| 9013 else{ |
| 9014 Fts5Structure *pTest = fts5StructureReadUncached(p); |
| 9015 if( pTest ){ |
| 9016 int i, j; |
| 9017 assert_nc( p->pStruct->nSegment==pTest->nSegment ); |
| 9018 assert_nc( p->pStruct->nLevel==pTest->nLevel ); |
| 9019 for(i=0; i<pTest->nLevel; i++){ |
| 9020 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge ); |
| 9021 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg ); |
| 9022 for(j=0; j<pTest->aLevel[i].nSeg; j++){ |
| 9023 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; |
| 9024 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; |
| 9025 assert_nc( p1->iSegid==p2->iSegid ); |
| 9026 assert_nc( p1->pgnoFirst==p2->pgnoFirst ); |
| 9027 assert_nc( p1->pgnoLast==p2->pgnoLast ); |
| 9028 } |
| 9029 } |
| 9030 fts5StructureRelease(pTest); |
| 9031 } |
| 9032 } |
| 9033 #endif |
| 9034 |
| 9035 if( p->rc!=SQLITE_OK ) return 0; |
| 9036 assert( p->iStructVersion!=0 ); |
| 9037 assert( p->pStruct!=0 ); |
| 9038 fts5StructureRef(p->pStruct); |
| 9039 return p->pStruct; |
| 9040 } |
| 9041 |
| 9042 static void fts5StructureInvalidate(Fts5Index *p){ |
| 9043 if( p->pStruct ){ |
| 9044 fts5StructureRelease(p->pStruct); |
| 9045 p->pStruct = 0; |
| 9046 } |
| 9047 } |
| 9048 |
| 9049 /* |
| 9050 ** Return the total number of segments in index structure pStruct. This |
| 9051 ** function is only ever used as part of assert() conditions. |
| 9052 */ |
| 9053 #ifdef SQLITE_DEBUG |
| 9054 static int fts5StructureCountSegments(Fts5Structure *pStruct){ |
| 9055 int nSegment = 0; /* Total number of segments */ |
| 9056 if( pStruct ){ |
| 9057 int iLvl; /* Used to iterate through levels */ |
| 9058 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 9059 nSegment += pStruct->aLevel[iLvl].nSeg; |
| 9060 } |
| 9061 } |
| 9062 |
| 9063 return nSegment; |
| 9064 } |
| 9065 #endif |
| 9066 |
| 9067 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ |
| 9068 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \ |
| 9069 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ |
| 9070 (pBuf)->n += nBlob; \ |
| 9071 } |
| 9072 |
| 9073 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \ |
| 9074 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ |
| 9075 assert( (pBuf)->nSpace>=(pBuf)->n ); \ |
| 9076 } |
| 9077 |
| 9078 |
| 9079 /* |
| 9080 ** Serialize and store the "structure" record. |
| 9081 ** |
| 9082 ** If an error occurs, leave an error code in the Fts5Index object. If an |
| 9083 ** error has already occurred, this function is a no-op. |
| 9084 */ |
| 9085 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ |
| 9086 if( p->rc==SQLITE_OK ){ |
| 9087 Fts5Buffer buf; /* Buffer to serialize record into */ |
| 9088 int iLvl; /* Used to iterate through levels */ |
| 9089 int iCookie; /* Cookie value to store */ |
| 9090 |
| 9091 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
| 9092 memset(&buf, 0, sizeof(Fts5Buffer)); |
| 9093 |
| 9094 /* Append the current configuration cookie */ |
| 9095 iCookie = p->pConfig->iCookie; |
| 9096 if( iCookie<0 ) iCookie = 0; |
| 9097 |
| 9098 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){ |
| 9099 sqlite3Fts5Put32(buf.p, iCookie); |
| 9100 buf.n = 4; |
| 9101 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel); |
| 9102 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment); |
| 9103 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter); |
| 9104 } |
| 9105 |
| 9106 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 9107 int iSeg; /* Used to iterate through segments */ |
| 9108 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 9109 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); |
| 9110 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); |
| 9111 assert( pLvl->nMerge<=pLvl->nSeg ); |
| 9112 |
| 9113 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
| 9114 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); |
| 9115 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); |
| 9116 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); |
| 9117 } |
| 9118 } |
| 9119 |
| 9120 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); |
| 9121 fts5BufferFree(&buf); |
| 9122 } |
| 9123 } |
| 9124 |
| 9125 #if 0 |
| 9126 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); |
| 9127 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ |
| 9128 int rc = SQLITE_OK; |
| 9129 Fts5Buffer buf; |
| 9130 memset(&buf, 0, sizeof(buf)); |
| 9131 fts5DebugStructure(&rc, &buf, pStruct); |
| 9132 fprintf(stdout, "%s: %s\n", zCaption, buf.p); |
| 9133 fflush(stdout); |
| 9134 fts5BufferFree(&buf); |
| 9135 } |
| 9136 #else |
| 9137 # define fts5PrintStructure(x,y) |
| 9138 #endif |
| 9139 |
| 9140 static int fts5SegmentSize(Fts5StructureSegment *pSeg){ |
| 9141 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; |
| 9142 } |
| 9143 |
| 9144 /* |
| 9145 ** Return a copy of index structure pStruct. Except, promote as many |
| 9146 ** segments as possible to level iPromote. If an OOM occurs, NULL is |
| 9147 ** returned. |
| 9148 */ |
| 9149 static void fts5StructurePromoteTo( |
| 9150 Fts5Index *p, |
| 9151 int iPromote, |
| 9152 int szPromote, |
| 9153 Fts5Structure *pStruct |
| 9154 ){ |
| 9155 int il, is; |
| 9156 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; |
| 9157 |
| 9158 if( pOut->nMerge==0 ){ |
| 9159 for(il=iPromote+1; il<pStruct->nLevel; il++){ |
| 9160 Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; |
| 9161 if( pLvl->nMerge ) return; |
| 9162 for(is=pLvl->nSeg-1; is>=0; is--){ |
| 9163 int sz = fts5SegmentSize(&pLvl->aSeg[is]); |
| 9164 if( sz>szPromote ) return; |
| 9165 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); |
| 9166 if( p->rc ) return; |
| 9167 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); |
| 9168 pOut->nSeg++; |
| 9169 pLvl->nSeg--; |
| 9170 } |
| 9171 } |
| 9172 } |
| 9173 } |
| 9174 |
| 9175 /* |
| 9176 ** A new segment has just been written to level iLvl of index structure |
| 9177 ** pStruct. This function determines if any segments should be promoted |
| 9178 ** as a result. Segments are promoted in two scenarios: |
| 9179 ** |
| 9180 ** a) If the segment just written is smaller than one or more segments |
| 9181 ** within the previous populated level, it is promoted to the previous |
| 9182 ** populated level. |
| 9183 ** |
| 9184 ** b) If the segment just written is larger than the newest segment on |
| 9185 ** the next populated level, then that segment, and any other adjacent |
| 9186 ** segments that are also smaller than the one just written, are |
| 9187 ** promoted. |
| 9188 ** |
| 9189 ** If one or more segments are promoted, the structure object is updated |
| 9190 ** to reflect this. |
| 9191 */ |
| 9192 static void fts5StructurePromote( |
| 9193 Fts5Index *p, /* FTS5 backend object */ |
| 9194 int iLvl, /* Index level just updated */ |
| 9195 Fts5Structure *pStruct /* Index structure */ |
| 9196 ){ |
| 9197 if( p->rc==SQLITE_OK ){ |
| 9198 int iTst; |
| 9199 int iPromote = -1; |
| 9200 int szPromote = 0; /* Promote anything this size or smaller */ |
| 9201 Fts5StructureSegment *pSeg; /* Segment just written */ |
| 9202 int szSeg; /* Size of segment just written */ |
| 9203 int nSeg = pStruct->aLevel[iLvl].nSeg; |
| 9204 |
| 9205 if( nSeg==0 ) return; |
| 9206 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; |
| 9207 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); |
| 9208 |
| 9209 /* Check for condition (a) */ |
| 9210 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); |
| 9211 if( iTst>=0 ){ |
| 9212 int i; |
| 9213 int szMax = 0; |
| 9214 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; |
| 9215 assert( pTst->nMerge==0 ); |
| 9216 for(i=0; i<pTst->nSeg; i++){ |
| 9217 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; |
| 9218 if( sz>szMax ) szMax = sz; |
| 9219 } |
| 9220 if( szMax>=szSeg ){ |
| 9221 /* Condition (a) is true. Promote the newest segment on level |
| 9222 ** iLvl to level iTst. */ |
| 9223 iPromote = iTst; |
| 9224 szPromote = szMax; |
| 9225 } |
| 9226 } |
| 9227 |
| 9228 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() |
| 9229 ** is a no-op if it is not. */ |
| 9230 if( iPromote<0 ){ |
| 9231 iPromote = iLvl; |
| 9232 szPromote = szSeg; |
| 9233 } |
| 9234 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); |
| 9235 } |
| 9236 } |
| 9237 |
| 9238 |
| 9239 /* |
| 9240 ** Advance the iterator passed as the only argument. If the end of the |
| 9241 ** doclist-index page is reached, return non-zero. |
| 9242 */ |
| 9243 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ |
| 9244 Fts5Data *pData = pLvl->pData; |
| 9245 |
| 9246 if( pLvl->iOff==0 ){ |
| 9247 assert( pLvl->bEof==0 ); |
| 9248 pLvl->iOff = 1; |
| 9249 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); |
| 9250 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); |
| 9251 pLvl->iFirstOff = pLvl->iOff; |
| 9252 }else{ |
| 9253 int iOff; |
| 9254 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ |
| 9255 if( pData->p[iOff] ) break; |
| 9256 } |
| 9257 |
| 9258 if( iOff<pData->nn ){ |
| 9259 i64 iVal; |
| 9260 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; |
| 9261 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); |
| 9262 pLvl->iRowid += iVal; |
| 9263 pLvl->iOff = iOff; |
| 9264 }else{ |
| 9265 pLvl->bEof = 1; |
| 9266 } |
| 9267 } |
| 9268 |
| 9269 return pLvl->bEof; |
| 9270 } |
| 9271 |
| 9272 /* |
| 9273 ** Advance the iterator passed as the only argument. |
| 9274 */ |
| 9275 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
| 9276 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
| 9277 |
| 9278 assert( iLvl<pIter->nLvl ); |
| 9279 if( fts5DlidxLvlNext(pLvl) ){ |
| 9280 if( (iLvl+1) < pIter->nLvl ){ |
| 9281 fts5DlidxIterNextR(p, pIter, iLvl+1); |
| 9282 if( pLvl[1].bEof==0 ){ |
| 9283 fts5DataRelease(pLvl->pData); |
| 9284 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 9285 pLvl->pData = fts5DataRead(p, |
| 9286 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
| 9287 ); |
| 9288 if( pLvl->pData ) fts5DlidxLvlNext(pLvl); |
| 9289 } |
| 9290 } |
| 9291 } |
| 9292 |
| 9293 return pIter->aLvl[0].bEof; |
| 9294 } |
| 9295 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 9296 return fts5DlidxIterNextR(p, pIter, 0); |
| 9297 } |
| 9298 |
| 9299 /* |
| 9300 ** The iterator passed as the first argument has the following fields set |
| 9301 ** as follows. This function sets up the rest of the iterator so that it |
| 9302 ** points to the first rowid in the doclist-index. |
| 9303 ** |
| 9304 ** pData: |
| 9305 ** pointer to doclist-index record, |
| 9306 ** |
| 9307 ** When this function is called pIter->iLeafPgno is the page number the |
| 9308 ** doclist is associated with (the one featuring the term). |
| 9309 */ |
| 9310 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ |
| 9311 int i; |
| 9312 for(i=0; i<pIter->nLvl; i++){ |
| 9313 fts5DlidxLvlNext(&pIter->aLvl[i]); |
| 9314 } |
| 9315 return pIter->aLvl[0].bEof; |
| 9316 } |
| 9317 |
| 9318 |
| 9319 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 9320 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; |
| 9321 } |
| 9322 |
| 9323 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 9324 int i; |
| 9325 |
| 9326 /* Advance each level to the last entry on the last page */ |
| 9327 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ |
| 9328 Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; |
| 9329 while( fts5DlidxLvlNext(pLvl)==0 ); |
| 9330 pLvl->bEof = 0; |
| 9331 |
| 9332 if( i>0 ){ |
| 9333 Fts5DlidxLvl *pChild = &pLvl[-1]; |
| 9334 fts5DataRelease(pChild->pData); |
| 9335 memset(pChild, 0, sizeof(Fts5DlidxLvl)); |
| 9336 pChild->pData = fts5DataRead(p, |
| 9337 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) |
| 9338 ); |
| 9339 } |
| 9340 } |
| 9341 } |
| 9342 |
| 9343 /* |
| 9344 ** Move the iterator passed as the only argument to the previous entry. |
| 9345 */ |
| 9346 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ |
| 9347 int iOff = pLvl->iOff; |
| 9348 |
| 9349 assert( pLvl->bEof==0 ); |
| 9350 if( iOff<=pLvl->iFirstOff ){ |
| 9351 pLvl->bEof = 1; |
| 9352 }else{ |
| 9353 u8 *a = pLvl->pData->p; |
| 9354 i64 iVal; |
| 9355 int iLimit; |
| 9356 int ii; |
| 9357 int nZero = 0; |
| 9358 |
| 9359 /* Currently iOff points to the first byte of a varint. This block |
| 9360 ** decrements iOff until it points to the first byte of the previous |
| 9361 ** varint. Taking care not to read any memory locations that occur |
| 9362 ** before the buffer in memory. */ |
| 9363 iLimit = (iOff>9 ? iOff-9 : 0); |
| 9364 for(iOff--; iOff>iLimit; iOff--){ |
| 9365 if( (a[iOff-1] & 0x80)==0 ) break; |
| 9366 } |
| 9367 |
| 9368 fts5GetVarint(&a[iOff], (u64*)&iVal); |
| 9369 pLvl->iRowid -= iVal; |
| 9370 pLvl->iLeafPgno--; |
| 9371 |
| 9372 /* Skip backwards past any 0x00 varints. */ |
| 9373 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ |
| 9374 nZero++; |
| 9375 } |
| 9376 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ |
| 9377 /* The byte immediately before the last 0x00 byte has the 0x80 bit |
| 9378 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 |
| 9379 ** bytes before a[ii]. */ |
| 9380 int bZero = 0; /* True if last 0x00 counts */ |
| 9381 if( (ii-8)>=pLvl->iFirstOff ){ |
| 9382 int j; |
| 9383 for(j=1; j<=8 && (a[ii-j] & 0x80); j++); |
| 9384 bZero = (j>8); |
| 9385 } |
| 9386 if( bZero==0 ) nZero--; |
| 9387 } |
| 9388 pLvl->iLeafPgno -= nZero; |
| 9389 pLvl->iOff = iOff - nZero; |
| 9390 } |
| 9391 |
| 9392 return pLvl->bEof; |
| 9393 } |
| 9394 |
| 9395 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
| 9396 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
| 9397 |
| 9398 assert( iLvl<pIter->nLvl ); |
| 9399 if( fts5DlidxLvlPrev(pLvl) ){ |
| 9400 if( (iLvl+1) < pIter->nLvl ){ |
| 9401 fts5DlidxIterPrevR(p, pIter, iLvl+1); |
| 9402 if( pLvl[1].bEof==0 ){ |
| 9403 fts5DataRelease(pLvl->pData); |
| 9404 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 9405 pLvl->pData = fts5DataRead(p, |
| 9406 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
| 9407 ); |
| 9408 if( pLvl->pData ){ |
| 9409 while( fts5DlidxLvlNext(pLvl)==0 ); |
| 9410 pLvl->bEof = 0; |
| 9411 } |
| 9412 } |
| 9413 } |
| 9414 } |
| 9415 |
| 9416 return pIter->aLvl[0].bEof; |
| 9417 } |
| 9418 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 9419 return fts5DlidxIterPrevR(p, pIter, 0); |
| 9420 } |
| 9421 |
| 9422 /* |
| 9423 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). |
| 9424 */ |
| 9425 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ |
| 9426 if( pIter ){ |
| 9427 int i; |
| 9428 for(i=0; i<pIter->nLvl; i++){ |
| 9429 fts5DataRelease(pIter->aLvl[i].pData); |
| 9430 } |
| 9431 sqlite3_free(pIter); |
| 9432 } |
| 9433 } |
| 9434 |
| 9435 static Fts5DlidxIter *fts5DlidxIterInit( |
| 9436 Fts5Index *p, /* Fts5 Backend to iterate within */ |
| 9437 int bRev, /* True for ORDER BY ASC */ |
| 9438 int iSegid, /* Segment id */ |
| 9439 int iLeafPg /* Leaf page number to load dlidx for */ |
| 9440 ){ |
| 9441 Fts5DlidxIter *pIter = 0; |
| 9442 int i; |
| 9443 int bDone = 0; |
| 9444 |
| 9445 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
| 9446 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); |
| 9447 Fts5DlidxIter *pNew; |
| 9448 |
| 9449 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); |
| 9450 if( pNew==0 ){ |
| 9451 p->rc = SQLITE_NOMEM; |
| 9452 }else{ |
| 9453 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); |
| 9454 Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; |
| 9455 pIter = pNew; |
| 9456 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 9457 pLvl->pData = fts5DataRead(p, iRowid); |
| 9458 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ |
| 9459 bDone = 1; |
| 9460 } |
| 9461 pIter->nLvl = i+1; |
| 9462 } |
| 9463 } |
| 9464 |
| 9465 if( p->rc==SQLITE_OK ){ |
| 9466 pIter->iSegid = iSegid; |
| 9467 if( bRev==0 ){ |
| 9468 fts5DlidxIterFirst(pIter); |
| 9469 }else{ |
| 9470 fts5DlidxIterLast(p, pIter); |
| 9471 } |
| 9472 } |
| 9473 |
| 9474 if( p->rc!=SQLITE_OK ){ |
| 9475 fts5DlidxIterFree(pIter); |
| 9476 pIter = 0; |
| 9477 } |
| 9478 |
| 9479 return pIter; |
| 9480 } |
| 9481 |
| 9482 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ |
| 9483 return pIter->aLvl[0].iRowid; |
| 9484 } |
| 9485 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ |
| 9486 return pIter->aLvl[0].iLeafPgno; |
| 9487 } |
| 9488 |
| 9489 /* |
| 9490 ** Load the next leaf page into the segment iterator. |
| 9491 */ |
| 9492 static void fts5SegIterNextPage( |
| 9493 Fts5Index *p, /* FTS5 backend object */ |
| 9494 Fts5SegIter *pIter /* Iterator to advance to next page */ |
| 9495 ){ |
| 9496 Fts5Data *pLeaf; |
| 9497 Fts5StructureSegment *pSeg = pIter->pSeg; |
| 9498 fts5DataRelease(pIter->pLeaf); |
| 9499 pIter->iLeafPgno++; |
| 9500 if( pIter->pNextLeaf ){ |
| 9501 pIter->pLeaf = pIter->pNextLeaf; |
| 9502 pIter->pNextLeaf = 0; |
| 9503 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ |
| 9504 pIter->pLeaf = fts5LeafRead(p, |
| 9505 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) |
| 9506 ); |
| 9507 }else{ |
| 9508 pIter->pLeaf = 0; |
| 9509 } |
| 9510 pLeaf = pIter->pLeaf; |
| 9511 |
| 9512 if( pLeaf ){ |
| 9513 pIter->iPgidxOff = pLeaf->szLeaf; |
| 9514 if( fts5LeafIsTermless(pLeaf) ){ |
| 9515 pIter->iEndofDoclist = pLeaf->nn+1; |
| 9516 }else{ |
| 9517 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], |
| 9518 pIter->iEndofDoclist |
| 9519 ); |
| 9520 } |
| 9521 } |
| 9522 } |
| 9523 |
| 9524 /* |
| 9525 ** Argument p points to a buffer containing a varint to be interpreted as a |
| 9526 ** position list size field. Read the varint and return the number of bytes |
| 9527 ** read. Before returning, set *pnSz to the number of bytes in the position |
| 9528 ** list, and *pbDel to true if the delete flag is set, or false otherwise. |
| 9529 */ |
| 9530 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ |
| 9531 int nSz; |
| 9532 int n = 0; |
| 9533 fts5FastGetVarint32(p, n, nSz); |
| 9534 assert_nc( nSz>=0 ); |
| 9535 *pnSz = nSz/2; |
| 9536 *pbDel = nSz & 0x0001; |
| 9537 return n; |
| 9538 } |
| 9539 |
| 9540 /* |
| 9541 ** Fts5SegIter.iLeafOffset currently points to the first byte of a |
| 9542 ** position-list size field. Read the value of the field and store it |
| 9543 ** in the following variables: |
| 9544 ** |
| 9545 ** Fts5SegIter.nPos |
| 9546 ** Fts5SegIter.bDel |
| 9547 ** |
| 9548 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the |
| 9549 ** position list content (if any). |
| 9550 */ |
| 9551 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ |
| 9552 if( p->rc==SQLITE_OK ){ |
| 9553 int iOff = pIter->iLeafOffset; /* Offset to read at */ |
| 9554 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 9555 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 9556 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); |
| 9557 pIter->bDel = 0; |
| 9558 pIter->nPos = 1; |
| 9559 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
| 9560 pIter->bDel = 1; |
| 9561 iOff++; |
| 9562 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
| 9563 pIter->nPos = 1; |
| 9564 iOff++; |
| 9565 }else{ |
| 9566 pIter->nPos = 0; |
| 9567 } |
| 9568 } |
| 9569 }else{ |
| 9570 int nSz; |
| 9571 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); |
| 9572 pIter->bDel = (nSz & 0x0001); |
| 9573 pIter->nPos = nSz>>1; |
| 9574 assert_nc( pIter->nPos>=0 ); |
| 9575 } |
| 9576 pIter->iLeafOffset = iOff; |
| 9577 } |
| 9578 } |
| 9579 |
| 9580 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ |
| 9581 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
| 9582 int iOff = pIter->iLeafOffset; |
| 9583 |
| 9584 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 9585 if( iOff>=pIter->pLeaf->szLeaf ){ |
| 9586 fts5SegIterNextPage(p, pIter); |
| 9587 if( pIter->pLeaf==0 ){ |
| 9588 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; |
| 9589 return; |
| 9590 } |
| 9591 iOff = 4; |
| 9592 a = pIter->pLeaf->p; |
| 9593 } |
| 9594 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
| 9595 pIter->iLeafOffset = iOff; |
| 9596 } |
| 9597 |
| 9598 /* |
| 9599 ** Fts5SegIter.iLeafOffset currently points to the first byte of the |
| 9600 ** "nSuffix" field of a term. Function parameter nKeep contains the value |
| 9601 ** of the "nPrefix" field (if there was one - it is passed 0 if this is |
| 9602 ** the first term in the segment). |
| 9603 ** |
| 9604 ** This function populates: |
| 9605 ** |
| 9606 ** Fts5SegIter.term |
| 9607 ** Fts5SegIter.rowid |
| 9608 ** |
| 9609 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of |
| 9610 ** the first position list. The position list belonging to document |
| 9611 ** (Fts5SegIter.iRowid). |
| 9612 */ |
| 9613 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ |
| 9614 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
| 9615 int iOff = pIter->iLeafOffset; /* Offset to read at */ |
| 9616 int nNew; /* Bytes of new data */ |
| 9617 |
| 9618 iOff += fts5GetVarint32(&a[iOff], nNew); |
| 9619 if( iOff+nNew>pIter->pLeaf->nn ){ |
| 9620 p->rc = FTS5_CORRUPT; |
| 9621 return; |
| 9622 } |
| 9623 pIter->term.n = nKeep; |
| 9624 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
| 9625 iOff += nNew; |
| 9626 pIter->iTermLeafOffset = iOff; |
| 9627 pIter->iTermLeafPgno = pIter->iLeafPgno; |
| 9628 pIter->iLeafOffset = iOff; |
| 9629 |
| 9630 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ |
| 9631 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 9632 }else{ |
| 9633 int nExtra; |
| 9634 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); |
| 9635 pIter->iEndofDoclist += nExtra; |
| 9636 } |
| 9637 |
| 9638 fts5SegIterLoadRowid(p, pIter); |
| 9639 } |
| 9640 |
| 9641 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); |
| 9642 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); |
| 9643 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); |
| 9644 |
| 9645 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ |
| 9646 if( pIter->flags & FTS5_SEGITER_REVERSE ){ |
| 9647 pIter->xNext = fts5SegIterNext_Reverse; |
| 9648 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 9649 pIter->xNext = fts5SegIterNext_None; |
| 9650 }else{ |
| 9651 pIter->xNext = fts5SegIterNext; |
| 9652 } |
| 9653 } |
| 9654 |
| 9655 /* |
| 9656 ** Initialize the iterator object pIter to iterate through the entries in |
| 9657 ** segment pSeg. The iterator is left pointing to the first entry when |
| 9658 ** this function returns. |
| 9659 ** |
| 9660 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 9661 ** an error has already occurred when this function is called, it is a no-op. |
| 9662 */ |
| 9663 static void fts5SegIterInit( |
| 9664 Fts5Index *p, /* FTS index object */ |
| 9665 Fts5StructureSegment *pSeg, /* Description of segment */ |
| 9666 Fts5SegIter *pIter /* Object to populate */ |
| 9667 ){ |
| 9668 if( pSeg->pgnoFirst==0 ){ |
| 9669 /* This happens if the segment is being used as an input to an incremental |
| 9670 ** merge and all data has already been "trimmed". See function |
| 9671 ** fts5TrimSegments() for details. In this case leave the iterator empty. |
| 9672 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is |
| 9673 ** at EOF already. */ |
| 9674 assert( pIter->pLeaf==0 ); |
| 9675 return; |
| 9676 } |
| 9677 |
| 9678 if( p->rc==SQLITE_OK ){ |
| 9679 memset(pIter, 0, sizeof(*pIter)); |
| 9680 fts5SegIterSetNext(p, pIter); |
| 9681 pIter->pSeg = pSeg; |
| 9682 pIter->iLeafPgno = pSeg->pgnoFirst-1; |
| 9683 fts5SegIterNextPage(p, pIter); |
| 9684 } |
| 9685 |
| 9686 if( p->rc==SQLITE_OK ){ |
| 9687 pIter->iLeafOffset = 4; |
| 9688 assert_nc( pIter->pLeaf->nn>4 ); |
| 9689 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); |
| 9690 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; |
| 9691 fts5SegIterLoadTerm(p, pIter, 0); |
| 9692 fts5SegIterLoadNPos(p, pIter); |
| 9693 } |
| 9694 } |
| 9695 |
| 9696 /* |
| 9697 ** This function is only ever called on iterators created by calls to |
| 9698 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. |
| 9699 ** |
| 9700 ** The iterator is in an unusual state when this function is called: the |
| 9701 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of |
| 9702 ** the position-list size field for the first relevant rowid on the page. |
| 9703 ** Fts5SegIter.rowid is set, but nPos and bDel are not. |
| 9704 ** |
| 9705 ** This function advances the iterator so that it points to the last |
| 9706 ** relevant rowid on the page and, if necessary, initializes the |
| 9707 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator |
| 9708 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first |
| 9709 ** byte of the position list content associated with said rowid. |
| 9710 */ |
| 9711 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ |
| 9712 int eDetail = p->pConfig->eDetail; |
| 9713 int n = pIter->pLeaf->szLeaf; |
| 9714 int i = pIter->iLeafOffset; |
| 9715 u8 *a = pIter->pLeaf->p; |
| 9716 int iRowidOffset = 0; |
| 9717 |
| 9718 if( n>pIter->iEndofDoclist ){ |
| 9719 n = pIter->iEndofDoclist; |
| 9720 } |
| 9721 |
| 9722 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 9723 while( 1 ){ |
| 9724 i64 iDelta = 0; |
| 9725 |
| 9726 if( eDetail==FTS5_DETAIL_NONE ){ |
| 9727 /* todo */ |
| 9728 if( i<n && a[i]==0 ){ |
| 9729 i++; |
| 9730 if( i<n && a[i]==0 ) i++; |
| 9731 } |
| 9732 }else{ |
| 9733 int nPos; |
| 9734 int bDummy; |
| 9735 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); |
| 9736 i += nPos; |
| 9737 } |
| 9738 if( i>=n ) break; |
| 9739 i += fts5GetVarint(&a[i], (u64*)&iDelta); |
| 9740 pIter->iRowid += iDelta; |
| 9741 |
| 9742 /* If necessary, grow the pIter->aRowidOffset[] array. */ |
| 9743 if( iRowidOffset>=pIter->nRowidOffset ){ |
| 9744 int nNew = pIter->nRowidOffset + 8; |
| 9745 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); |
| 9746 if( aNew==0 ){ |
| 9747 p->rc = SQLITE_NOMEM; |
| 9748 break; |
| 9749 } |
| 9750 pIter->aRowidOffset = aNew; |
| 9751 pIter->nRowidOffset = nNew; |
| 9752 } |
| 9753 |
| 9754 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; |
| 9755 pIter->iLeafOffset = i; |
| 9756 } |
| 9757 pIter->iRowidOffset = iRowidOffset; |
| 9758 fts5SegIterLoadNPos(p, pIter); |
| 9759 } |
| 9760 |
| 9761 /* |
| 9762 ** |
| 9763 */ |
| 9764 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ |
| 9765 assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
| 9766 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 9767 |
| 9768 fts5DataRelease(pIter->pLeaf); |
| 9769 pIter->pLeaf = 0; |
| 9770 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ |
| 9771 Fts5Data *pNew; |
| 9772 pIter->iLeafPgno--; |
| 9773 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( |
| 9774 pIter->pSeg->iSegid, pIter->iLeafPgno |
| 9775 )); |
| 9776 if( pNew ){ |
| 9777 /* iTermLeafOffset may be equal to szLeaf if the term is the last |
| 9778 ** thing on the page - i.e. the first rowid is on the following page. |
| 9779 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ |
| 9780 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ |
| 9781 assert( pIter->pLeaf==0 ); |
| 9782 if( pIter->iTermLeafOffset<pNew->szLeaf ){ |
| 9783 pIter->pLeaf = pNew; |
| 9784 pIter->iLeafOffset = pIter->iTermLeafOffset; |
| 9785 } |
| 9786 }else{ |
| 9787 int iRowidOff; |
| 9788 iRowidOff = fts5LeafFirstRowidOff(pNew); |
| 9789 if( iRowidOff ){ |
| 9790 pIter->pLeaf = pNew; |
| 9791 pIter->iLeafOffset = iRowidOff; |
| 9792 } |
| 9793 } |
| 9794 |
| 9795 if( pIter->pLeaf ){ |
| 9796 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; |
| 9797 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); |
| 9798 break; |
| 9799 }else{ |
| 9800 fts5DataRelease(pNew); |
| 9801 } |
| 9802 } |
| 9803 } |
| 9804 |
| 9805 if( pIter->pLeaf ){ |
| 9806 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 9807 fts5SegIterReverseInitPage(p, pIter); |
| 9808 } |
| 9809 } |
| 9810 |
| 9811 /* |
| 9812 ** Return true if the iterator passed as the second argument currently |
| 9813 ** points to a delete marker. A delete marker is an entry with a 0 byte |
| 9814 ** position-list. |
| 9815 */ |
| 9816 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ |
| 9817 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
| 9818 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); |
| 9819 } |
| 9820 |
| 9821 /* |
| 9822 ** Advance iterator pIter to the next entry. |
| 9823 ** |
| 9824 ** This version of fts5SegIterNext() is only used by reverse iterators. |
| 9825 */ |
| 9826 static void fts5SegIterNext_Reverse( |
| 9827 Fts5Index *p, /* FTS5 backend object */ |
| 9828 Fts5SegIter *pIter, /* Iterator to advance */ |
| 9829 int *pbUnused /* Unused */ |
| 9830 ){ |
| 9831 assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
| 9832 assert( pIter->pNextLeaf==0 ); |
| 9833 UNUSED_PARAM(pbUnused); |
| 9834 |
| 9835 if( pIter->iRowidOffset>0 ){ |
| 9836 u8 *a = pIter->pLeaf->p; |
| 9837 int iOff; |
| 9838 i64 iDelta; |
| 9839 |
| 9840 pIter->iRowidOffset--; |
| 9841 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; |
| 9842 fts5SegIterLoadNPos(p, pIter); |
| 9843 iOff = pIter->iLeafOffset; |
| 9844 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ |
| 9845 iOff += pIter->nPos; |
| 9846 } |
| 9847 fts5GetVarint(&a[iOff], (u64*)&iDelta); |
| 9848 pIter->iRowid -= iDelta; |
| 9849 }else{ |
| 9850 fts5SegIterReverseNewPage(p, pIter); |
| 9851 } |
| 9852 } |
| 9853 |
| 9854 /* |
| 9855 ** Advance iterator pIter to the next entry. |
| 9856 ** |
| 9857 ** This version of fts5SegIterNext() is only used if detail=none and the |
| 9858 ** iterator is not a reverse direction iterator. |
| 9859 */ |
| 9860 static void fts5SegIterNext_None( |
| 9861 Fts5Index *p, /* FTS5 backend object */ |
| 9862 Fts5SegIter *pIter, /* Iterator to advance */ |
| 9863 int *pbNewTerm /* OUT: Set for new term */ |
| 9864 ){ |
| 9865 int iOff; |
| 9866 |
| 9867 assert( p->rc==SQLITE_OK ); |
| 9868 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); |
| 9869 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 9870 |
| 9871 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 9872 iOff = pIter->iLeafOffset; |
| 9873 |
| 9874 /* Next entry is on the next page */ |
| 9875 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ |
| 9876 fts5SegIterNextPage(p, pIter); |
| 9877 if( p->rc || pIter->pLeaf==0 ) return; |
| 9878 pIter->iRowid = 0; |
| 9879 iOff = 4; |
| 9880 } |
| 9881 |
| 9882 if( iOff<pIter->iEndofDoclist ){ |
| 9883 /* Next entry is on the current page */ |
| 9884 i64 iDelta; |
| 9885 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); |
| 9886 pIter->iLeafOffset = iOff; |
| 9887 pIter->iRowid += iDelta; |
| 9888 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ |
| 9889 if( pIter->pSeg ){ |
| 9890 int nKeep = 0; |
| 9891 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ |
| 9892 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); |
| 9893 } |
| 9894 pIter->iLeafOffset = iOff; |
| 9895 fts5SegIterLoadTerm(p, pIter, nKeep); |
| 9896 }else{ |
| 9897 const u8 *pList = 0; |
| 9898 const char *zTerm = 0; |
| 9899 int nList; |
| 9900 sqlite3Fts5HashScanNext(p->pHash); |
| 9901 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
| 9902 if( pList==0 ) goto next_none_eof; |
| 9903 pIter->pLeaf->p = (u8*)pList; |
| 9904 pIter->pLeaf->nn = nList; |
| 9905 pIter->pLeaf->szLeaf = nList; |
| 9906 pIter->iEndofDoclist = nList; |
| 9907 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); |
| 9908 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
| 9909 } |
| 9910 |
| 9911 if( pbNewTerm ) *pbNewTerm = 1; |
| 9912 }else{ |
| 9913 goto next_none_eof; |
| 9914 } |
| 9915 |
| 9916 fts5SegIterLoadNPos(p, pIter); |
| 9917 |
| 9918 return; |
| 9919 next_none_eof: |
| 9920 fts5DataRelease(pIter->pLeaf); |
| 9921 pIter->pLeaf = 0; |
| 9922 } |
| 9923 |
| 9924 |
| 9925 /* |
| 9926 ** Advance iterator pIter to the next entry. |
| 9927 ** |
| 9928 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It |
| 9929 ** is not considered an error if the iterator reaches EOF. If an error has |
| 9930 ** already occurred when this function is called, it is a no-op. |
| 9931 */ |
| 9932 static void fts5SegIterNext( |
| 9933 Fts5Index *p, /* FTS5 backend object */ |
| 9934 Fts5SegIter *pIter, /* Iterator to advance */ |
| 9935 int *pbNewTerm /* OUT: Set for new term */ |
| 9936 ){ |
| 9937 Fts5Data *pLeaf = pIter->pLeaf; |
| 9938 int iOff; |
| 9939 int bNewTerm = 0; |
| 9940 int nKeep = 0; |
| 9941 u8 *a; |
| 9942 int n; |
| 9943 |
| 9944 assert( pbNewTerm==0 || *pbNewTerm==0 ); |
| 9945 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 9946 |
| 9947 /* Search for the end of the position list within the current page. */ |
| 9948 a = pLeaf->p; |
| 9949 n = pLeaf->szLeaf; |
| 9950 |
| 9951 ASSERT_SZLEAF_OK(pLeaf); |
| 9952 iOff = pIter->iLeafOffset + pIter->nPos; |
| 9953 |
| 9954 if( iOff<n ){ |
| 9955 /* The next entry is on the current page. */ |
| 9956 assert_nc( iOff<=pIter->iEndofDoclist ); |
| 9957 if( iOff>=pIter->iEndofDoclist ){ |
| 9958 bNewTerm = 1; |
| 9959 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ |
| 9960 iOff += fts5GetVarint32(&a[iOff], nKeep); |
| 9961 } |
| 9962 }else{ |
| 9963 u64 iDelta; |
| 9964 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); |
| 9965 pIter->iRowid += iDelta; |
| 9966 assert_nc( iDelta>0 ); |
| 9967 } |
| 9968 pIter->iLeafOffset = iOff; |
| 9969 |
| 9970 }else if( pIter->pSeg==0 ){ |
| 9971 const u8 *pList = 0; |
| 9972 const char *zTerm = 0; |
| 9973 int nList = 0; |
| 9974 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); |
| 9975 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ |
| 9976 sqlite3Fts5HashScanNext(p->pHash); |
| 9977 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
| 9978 } |
| 9979 if( pList==0 ){ |
| 9980 fts5DataRelease(pIter->pLeaf); |
| 9981 pIter->pLeaf = 0; |
| 9982 }else{ |
| 9983 pIter->pLeaf->p = (u8*)pList; |
| 9984 pIter->pLeaf->nn = nList; |
| 9985 pIter->pLeaf->szLeaf = nList; |
| 9986 pIter->iEndofDoclist = nList+1; |
| 9987 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), |
| 9988 (u8*)zTerm); |
| 9989 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
| 9990 *pbNewTerm = 1; |
| 9991 } |
| 9992 }else{ |
| 9993 iOff = 0; |
| 9994 /* Next entry is not on the current page */ |
| 9995 while( iOff==0 ){ |
| 9996 fts5SegIterNextPage(p, pIter); |
| 9997 pLeaf = pIter->pLeaf; |
| 9998 if( pLeaf==0 ) break; |
| 9999 ASSERT_SZLEAF_OK(pLeaf); |
| 10000 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ |
| 10001 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); |
| 10002 pIter->iLeafOffset = iOff; |
| 10003 |
| 10004 if( pLeaf->nn>pLeaf->szLeaf ){ |
| 10005 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
| 10006 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist |
| 10007 ); |
| 10008 } |
| 10009 } |
| 10010 else if( pLeaf->nn>pLeaf->szLeaf ){ |
| 10011 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
| 10012 &pLeaf->p[pLeaf->szLeaf], iOff |
| 10013 ); |
| 10014 pIter->iLeafOffset = iOff; |
| 10015 pIter->iEndofDoclist = iOff; |
| 10016 bNewTerm = 1; |
| 10017 } |
| 10018 assert_nc( iOff<pLeaf->szLeaf ); |
| 10019 if( iOff>pLeaf->szLeaf ){ |
| 10020 p->rc = FTS5_CORRUPT; |
| 10021 return; |
| 10022 } |
| 10023 } |
| 10024 } |
| 10025 |
| 10026 /* Check if the iterator is now at EOF. If so, return early. */ |
| 10027 if( pIter->pLeaf ){ |
| 10028 if( bNewTerm ){ |
| 10029 if( pIter->flags & FTS5_SEGITER_ONETERM ){ |
| 10030 fts5DataRelease(pIter->pLeaf); |
| 10031 pIter->pLeaf = 0; |
| 10032 }else{ |
| 10033 fts5SegIterLoadTerm(p, pIter, nKeep); |
| 10034 fts5SegIterLoadNPos(p, pIter); |
| 10035 if( pbNewTerm ) *pbNewTerm = 1; |
| 10036 } |
| 10037 }else{ |
| 10038 /* The following could be done by calling fts5SegIterLoadNPos(). But |
| 10039 ** this block is particularly performance critical, so equivalent |
| 10040 ** code is inlined. |
| 10041 ** |
| 10042 ** Later: Switched back to fts5SegIterLoadNPos() because it supports |
| 10043 ** detail=none mode. Not ideal. |
| 10044 */ |
| 10045 int nSz; |
| 10046 assert( p->rc==SQLITE_OK ); |
| 10047 assert( pIter->iLeafOffset<=pIter->pLeaf->nn ); |
| 10048 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); |
| 10049 pIter->bDel = (nSz & 0x0001); |
| 10050 pIter->nPos = nSz>>1; |
| 10051 assert_nc( pIter->nPos>=0 ); |
| 10052 } |
| 10053 } |
| 10054 } |
| 10055 |
| 10056 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } |
| 10057 |
| 10058 #define fts5IndexSkipVarint(a, iOff) { \ |
| 10059 int iEnd = iOff+9; \ |
| 10060 while( (a[iOff++] & 0x80) && iOff<iEnd ); \ |
| 10061 } |
| 10062 |
| 10063 /* |
| 10064 ** Iterator pIter currently points to the first rowid in a doclist. This |
| 10065 ** function sets the iterator up so that iterates in reverse order through |
| 10066 ** the doclist. |
| 10067 */ |
| 10068 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ |
| 10069 Fts5DlidxIter *pDlidx = pIter->pDlidx; |
| 10070 Fts5Data *pLast = 0; |
| 10071 int pgnoLast = 0; |
| 10072 |
| 10073 if( pDlidx ){ |
| 10074 int iSegid = pIter->pSeg->iSegid; |
| 10075 pgnoLast = fts5DlidxIterPgno(pDlidx); |
| 10076 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); |
| 10077 }else{ |
| 10078 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
| 10079 |
| 10080 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of |
| 10081 ** position-list content for the current rowid. Back it up so that it |
| 10082 ** points to the start of the position-list size field. */ |
| 10083 int iPoslist; |
| 10084 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ |
| 10085 iPoslist = pIter->iTermLeafOffset; |
| 10086 }else{ |
| 10087 iPoslist = 4; |
| 10088 } |
| 10089 fts5IndexSkipVarint(pLeaf->p, iPoslist); |
| 10090 pIter->iLeafOffset = iPoslist; |
| 10091 |
| 10092 /* If this condition is true then the largest rowid for the current |
| 10093 ** term may not be stored on the current page. So search forward to |
| 10094 ** see where said rowid really is. */ |
| 10095 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ |
| 10096 int pgno; |
| 10097 Fts5StructureSegment *pSeg = pIter->pSeg; |
| 10098 |
| 10099 /* The last rowid in the doclist may not be on the current page. Search |
| 10100 ** forward to find the page containing the last rowid. */ |
| 10101 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ |
| 10102 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); |
| 10103 Fts5Data *pNew = fts5DataRead(p, iAbs); |
| 10104 if( pNew ){ |
| 10105 int iRowid, bTermless; |
| 10106 iRowid = fts5LeafFirstRowidOff(pNew); |
| 10107 bTermless = fts5LeafIsTermless(pNew); |
| 10108 if( iRowid ){ |
| 10109 SWAPVAL(Fts5Data*, pNew, pLast); |
| 10110 pgnoLast = pgno; |
| 10111 } |
| 10112 fts5DataRelease(pNew); |
| 10113 if( bTermless==0 ) break; |
| 10114 } |
| 10115 } |
| 10116 } |
| 10117 } |
| 10118 |
| 10119 /* If pLast is NULL at this point, then the last rowid for this doclist |
| 10120 ** lies on the page currently indicated by the iterator. In this case |
| 10121 ** pIter->iLeafOffset is already set to point to the position-list size |
| 10122 ** field associated with the first relevant rowid on the page. |
| 10123 ** |
| 10124 ** Or, if pLast is non-NULL, then it is the page that contains the last |
| 10125 ** rowid. In this case configure the iterator so that it points to the |
| 10126 ** first rowid on this page. |
| 10127 */ |
| 10128 if( pLast ){ |
| 10129 int iOff; |
| 10130 fts5DataRelease(pIter->pLeaf); |
| 10131 pIter->pLeaf = pLast; |
| 10132 pIter->iLeafPgno = pgnoLast; |
| 10133 iOff = fts5LeafFirstRowidOff(pLast); |
| 10134 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); |
| 10135 pIter->iLeafOffset = iOff; |
| 10136 |
| 10137 if( fts5LeafIsTermless(pLast) ){ |
| 10138 pIter->iEndofDoclist = pLast->nn+1; |
| 10139 }else{ |
| 10140 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); |
| 10141 } |
| 10142 |
| 10143 } |
| 10144 |
| 10145 fts5SegIterReverseInitPage(p, pIter); |
| 10146 } |
| 10147 |
| 10148 /* |
| 10149 ** Iterator pIter currently points to the first rowid of a doclist. |
| 10150 ** There is a doclist-index associated with the final term on the current |
| 10151 ** page. If the current term is the last term on the page, load the |
| 10152 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). |
| 10153 */ |
| 10154 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ |
| 10155 int iSeg = pIter->pSeg->iSegid; |
| 10156 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
| 10157 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
| 10158 |
| 10159 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 10160 assert( pIter->pDlidx==0 ); |
| 10161 |
| 10162 /* Check if the current doclist ends on this page. If it does, return |
| 10163 ** early without loading the doclist-index (as it belongs to a different |
| 10164 ** term. */ |
| 10165 if( pIter->iTermLeafPgno==pIter->iLeafPgno |
| 10166 && pIter->iEndofDoclist<pLeaf->szLeaf |
| 10167 ){ |
| 10168 return; |
| 10169 } |
| 10170 |
| 10171 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); |
| 10172 } |
| 10173 |
| 10174 /* |
| 10175 ** The iterator object passed as the second argument currently contains |
| 10176 ** no valid values except for the Fts5SegIter.pLeaf member variable. This |
| 10177 ** function searches the leaf page for a term matching (pTerm/nTerm). |
| 10178 ** |
| 10179 ** If the specified term is found on the page, then the iterator is left |
| 10180 ** pointing to it. If argument bGe is zero and the term is not found, |
| 10181 ** the iterator is left pointing at EOF. |
| 10182 ** |
| 10183 ** If bGe is non-zero and the specified term is not found, then the |
| 10184 ** iterator is left pointing to the smallest term in the segment that |
| 10185 ** is larger than the specified term, even if this term is not on the |
| 10186 ** current page. |
| 10187 */ |
| 10188 static void fts5LeafSeek( |
| 10189 Fts5Index *p, /* Leave any error code here */ |
| 10190 int bGe, /* True for a >= search */ |
| 10191 Fts5SegIter *pIter, /* Iterator to seek */ |
| 10192 const u8 *pTerm, int nTerm /* Term to search for */ |
| 10193 ){ |
| 10194 int iOff; |
| 10195 const u8 *a = pIter->pLeaf->p; |
| 10196 int szLeaf = pIter->pLeaf->szLeaf; |
| 10197 int n = pIter->pLeaf->nn; |
| 10198 |
| 10199 int nMatch = 0; |
| 10200 int nKeep = 0; |
| 10201 int nNew = 0; |
| 10202 int iTermOff; |
| 10203 int iPgidx; /* Current offset in pgidx */ |
| 10204 int bEndOfPage = 0; |
| 10205 |
| 10206 assert( p->rc==SQLITE_OK ); |
| 10207 |
| 10208 iPgidx = szLeaf; |
| 10209 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); |
| 10210 iOff = iTermOff; |
| 10211 if( iOff>n ){ |
| 10212 p->rc = FTS5_CORRUPT; |
| 10213 return; |
| 10214 } |
| 10215 |
| 10216 while( 1 ){ |
| 10217 |
| 10218 /* Figure out how many new bytes are in this term */ |
| 10219 fts5FastGetVarint32(a, iOff, nNew); |
| 10220 if( nKeep<nMatch ){ |
| 10221 goto search_failed; |
| 10222 } |
| 10223 |
| 10224 assert( nKeep>=nMatch ); |
| 10225 if( nKeep==nMatch ){ |
| 10226 int nCmp; |
| 10227 int i; |
| 10228 nCmp = MIN(nNew, nTerm-nMatch); |
| 10229 for(i=0; i<nCmp; i++){ |
| 10230 if( a[iOff+i]!=pTerm[nMatch+i] ) break; |
| 10231 } |
| 10232 nMatch += i; |
| 10233 |
| 10234 if( nTerm==nMatch ){ |
| 10235 if( i==nNew ){ |
| 10236 goto search_success; |
| 10237 }else{ |
| 10238 goto search_failed; |
| 10239 } |
| 10240 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ |
| 10241 goto search_failed; |
| 10242 } |
| 10243 } |
| 10244 |
| 10245 if( iPgidx>=n ){ |
| 10246 bEndOfPage = 1; |
| 10247 break; |
| 10248 } |
| 10249 |
| 10250 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); |
| 10251 iTermOff += nKeep; |
| 10252 iOff = iTermOff; |
| 10253 |
| 10254 if( iOff>=n ){ |
| 10255 p->rc = FTS5_CORRUPT; |
| 10256 return; |
| 10257 } |
| 10258 |
| 10259 /* Read the nKeep field of the next term. */ |
| 10260 fts5FastGetVarint32(a, iOff, nKeep); |
| 10261 } |
| 10262 |
| 10263 search_failed: |
| 10264 if( bGe==0 ){ |
| 10265 fts5DataRelease(pIter->pLeaf); |
| 10266 pIter->pLeaf = 0; |
| 10267 return; |
| 10268 }else if( bEndOfPage ){ |
| 10269 do { |
| 10270 fts5SegIterNextPage(p, pIter); |
| 10271 if( pIter->pLeaf==0 ) return; |
| 10272 a = pIter->pLeaf->p; |
| 10273 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ |
| 10274 iPgidx = pIter->pLeaf->szLeaf; |
| 10275 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff); |
| 10276 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ |
| 10277 p->rc = FTS5_CORRUPT; |
| 10278 }else{ |
| 10279 nKeep = 0; |
| 10280 iTermOff = iOff; |
| 10281 n = pIter->pLeaf->nn; |
| 10282 iOff += fts5GetVarint32(&a[iOff], nNew); |
| 10283 break; |
| 10284 } |
| 10285 } |
| 10286 }while( 1 ); |
| 10287 } |
| 10288 |
| 10289 search_success: |
| 10290 |
| 10291 pIter->iLeafOffset = iOff + nNew; |
| 10292 pIter->iTermLeafOffset = pIter->iLeafOffset; |
| 10293 pIter->iTermLeafPgno = pIter->iLeafPgno; |
| 10294 |
| 10295 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); |
| 10296 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
| 10297 |
| 10298 if( iPgidx>=n ){ |
| 10299 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 10300 }else{ |
| 10301 int nExtra; |
| 10302 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); |
| 10303 pIter->iEndofDoclist = iTermOff + nExtra; |
| 10304 } |
| 10305 pIter->iPgidxOff = iPgidx; |
| 10306 |
| 10307 fts5SegIterLoadRowid(p, pIter); |
| 10308 fts5SegIterLoadNPos(p, pIter); |
| 10309 } |
| 10310 |
| 10311 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ |
| 10312 if( p->pIdxSelect==0 ){ |
| 10313 Fts5Config *pConfig = p->pConfig; |
| 10314 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( |
| 10315 "SELECT pgno FROM '%q'.'%q_idx' WHERE " |
| 10316 "segid=? AND term<=? ORDER BY term DESC LIMIT 1", |
| 10317 pConfig->zDb, pConfig->zName |
| 10318 )); |
| 10319 } |
| 10320 return p->pIdxSelect; |
| 10321 } |
| 10322 |
| 10323 /* |
| 10324 ** Initialize the object pIter to point to term pTerm/nTerm within segment |
| 10325 ** pSeg. If there is no such term in the index, the iterator is set to EOF. |
| 10326 ** |
| 10327 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 10328 ** an error has already occurred when this function is called, it is a no-op. |
| 10329 */ |
| 10330 static void fts5SegIterSeekInit( |
| 10331 Fts5Index *p, /* FTS5 backend */ |
| 10332 const u8 *pTerm, int nTerm, /* Term to seek to */ |
| 10333 int flags, /* Mask of FTS5INDEX_XXX flags */ |
| 10334 Fts5StructureSegment *pSeg, /* Description of segment */ |
| 10335 Fts5SegIter *pIter /* Object to populate */ |
| 10336 ){ |
| 10337 int iPg = 1; |
| 10338 int bGe = (flags & FTS5INDEX_QUERY_SCAN); |
| 10339 int bDlidx = 0; /* True if there is a doclist-index */ |
| 10340 sqlite3_stmt *pIdxSelect = 0; |
| 10341 |
| 10342 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); |
| 10343 assert( pTerm && nTerm ); |
| 10344 memset(pIter, 0, sizeof(*pIter)); |
| 10345 pIter->pSeg = pSeg; |
| 10346 |
| 10347 /* This block sets stack variable iPg to the leaf page number that may |
| 10348 ** contain term (pTerm/nTerm), if it is present in the segment. */ |
| 10349 pIdxSelect = fts5IdxSelectStmt(p); |
| 10350 if( p->rc ) return; |
| 10351 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); |
| 10352 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); |
| 10353 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ |
| 10354 i64 val = sqlite3_column_int(pIdxSelect, 0); |
| 10355 iPg = (int)(val>>1); |
| 10356 bDlidx = (val & 0x0001); |
| 10357 } |
| 10358 p->rc = sqlite3_reset(pIdxSelect); |
| 10359 |
| 10360 if( iPg<pSeg->pgnoFirst ){ |
| 10361 iPg = pSeg->pgnoFirst; |
| 10362 bDlidx = 0; |
| 10363 } |
| 10364 |
| 10365 pIter->iLeafPgno = iPg - 1; |
| 10366 fts5SegIterNextPage(p, pIter); |
| 10367 |
| 10368 if( pIter->pLeaf ){ |
| 10369 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); |
| 10370 } |
| 10371 |
| 10372 if( p->rc==SQLITE_OK && bGe==0 ){ |
| 10373 pIter->flags |= FTS5_SEGITER_ONETERM; |
| 10374 if( pIter->pLeaf ){ |
| 10375 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 10376 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 10377 } |
| 10378 if( bDlidx ){ |
| 10379 fts5SegIterLoadDlidx(p, pIter); |
| 10380 } |
| 10381 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 10382 fts5SegIterReverse(p, pIter); |
| 10383 } |
| 10384 } |
| 10385 } |
| 10386 |
| 10387 fts5SegIterSetNext(p, pIter); |
| 10388 |
| 10389 /* Either: |
| 10390 ** |
| 10391 ** 1) an error has occurred, or |
| 10392 ** 2) the iterator points to EOF, or |
| 10393 ** 3) the iterator points to an entry with term (pTerm/nTerm), or |
| 10394 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points |
| 10395 ** to an entry with a term greater than or equal to (pTerm/nTerm). |
| 10396 */ |
| 10397 assert( p->rc!=SQLITE_OK /* 1 */ |
| 10398 || pIter->pLeaf==0 /* 2 */ |
| 10399 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */ |
| 10400 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */ |
| 10401 ); |
| 10402 } |
| 10403 |
| 10404 /* |
| 10405 ** Initialize the object pIter to point to term pTerm/nTerm within the |
| 10406 ** in-memory hash table. If there is no such term in the hash-table, the |
| 10407 ** iterator is set to EOF. |
| 10408 ** |
| 10409 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 10410 ** an error has already occurred when this function is called, it is a no-op. |
| 10411 */ |
| 10412 static void fts5SegIterHashInit( |
| 10413 Fts5Index *p, /* FTS5 backend */ |
| 10414 const u8 *pTerm, int nTerm, /* Term to seek to */ |
| 10415 int flags, /* Mask of FTS5INDEX_XXX flags */ |
| 10416 Fts5SegIter *pIter /* Object to populate */ |
| 10417 ){ |
| 10418 const u8 *pList = 0; |
| 10419 int nList = 0; |
| 10420 const u8 *z = 0; |
| 10421 int n = 0; |
| 10422 |
| 10423 assert( p->pHash ); |
| 10424 assert( p->rc==SQLITE_OK ); |
| 10425 |
| 10426 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ |
| 10427 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); |
| 10428 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); |
| 10429 n = (z ? (int)strlen((const char*)z) : 0); |
| 10430 }else{ |
| 10431 pIter->flags |= FTS5_SEGITER_ONETERM; |
| 10432 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); |
| 10433 z = pTerm; |
| 10434 n = nTerm; |
| 10435 } |
| 10436 |
| 10437 if( pList ){ |
| 10438 Fts5Data *pLeaf; |
| 10439 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); |
| 10440 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); |
| 10441 if( pLeaf==0 ) return; |
| 10442 pLeaf->p = (u8*)pList; |
| 10443 pLeaf->nn = pLeaf->szLeaf = nList; |
| 10444 pIter->pLeaf = pLeaf; |
| 10445 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); |
| 10446 pIter->iEndofDoclist = pLeaf->nn; |
| 10447 |
| 10448 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 10449 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 10450 fts5SegIterReverseInitPage(p, pIter); |
| 10451 }else{ |
| 10452 fts5SegIterLoadNPos(p, pIter); |
| 10453 } |
| 10454 } |
| 10455 |
| 10456 fts5SegIterSetNext(p, pIter); |
| 10457 } |
| 10458 |
| 10459 /* |
| 10460 ** Zero the iterator passed as the only argument. |
| 10461 */ |
| 10462 static void fts5SegIterClear(Fts5SegIter *pIter){ |
| 10463 fts5BufferFree(&pIter->term); |
| 10464 fts5DataRelease(pIter->pLeaf); |
| 10465 fts5DataRelease(pIter->pNextLeaf); |
| 10466 fts5DlidxIterFree(pIter->pDlidx); |
| 10467 sqlite3_free(pIter->aRowidOffset); |
| 10468 memset(pIter, 0, sizeof(Fts5SegIter)); |
| 10469 } |
| 10470 |
| 10471 #ifdef SQLITE_DEBUG |
| 10472 |
| 10473 /* |
| 10474 ** This function is used as part of the big assert() procedure implemented by |
| 10475 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored |
| 10476 ** in *pRes is the correct result of comparing the current positions of the |
| 10477 ** two iterators. |
| 10478 */ |
| 10479 static void fts5AssertComparisonResult( |
| 10480 Fts5Iter *pIter, |
| 10481 Fts5SegIter *p1, |
| 10482 Fts5SegIter *p2, |
| 10483 Fts5CResult *pRes |
| 10484 ){ |
| 10485 int i1 = p1 - pIter->aSeg; |
| 10486 int i2 = p2 - pIter->aSeg; |
| 10487 |
| 10488 if( p1->pLeaf || p2->pLeaf ){ |
| 10489 if( p1->pLeaf==0 ){ |
| 10490 assert( pRes->iFirst==i2 ); |
| 10491 }else if( p2->pLeaf==0 ){ |
| 10492 assert( pRes->iFirst==i1 ); |
| 10493 }else{ |
| 10494 int nMin = MIN(p1->term.n, p2->term.n); |
| 10495 int res = memcmp(p1->term.p, p2->term.p, nMin); |
| 10496 if( res==0 ) res = p1->term.n - p2->term.n; |
| 10497 |
| 10498 if( res==0 ){ |
| 10499 assert( pRes->bTermEq==1 ); |
| 10500 assert( p1->iRowid!=p2->iRowid ); |
| 10501 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; |
| 10502 }else{ |
| 10503 assert( pRes->bTermEq==0 ); |
| 10504 } |
| 10505 |
| 10506 if( res<0 ){ |
| 10507 assert( pRes->iFirst==i1 ); |
| 10508 }else{ |
| 10509 assert( pRes->iFirst==i2 ); |
| 10510 } |
| 10511 } |
| 10512 } |
| 10513 } |
| 10514 |
| 10515 /* |
| 10516 ** This function is a no-op unless SQLITE_DEBUG is defined when this module |
| 10517 ** is compiled. In that case, this function is essentially an assert() |
| 10518 ** statement used to verify that the contents of the pIter->aFirst[] array |
| 10519 ** are correct. |
| 10520 */ |
| 10521 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ |
| 10522 if( p->rc==SQLITE_OK ){ |
| 10523 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 10524 int i; |
| 10525 |
| 10526 assert( (pFirst->pLeaf==0)==pIter->base.bEof ); |
| 10527 |
| 10528 /* Check that pIter->iSwitchRowid is set correctly. */ |
| 10529 for(i=0; i<pIter->nSeg; i++){ |
| 10530 Fts5SegIter *p1 = &pIter->aSeg[i]; |
| 10531 assert( p1==pFirst |
| 10532 || p1->pLeaf==0 |
| 10533 || fts5BufferCompare(&pFirst->term, &p1->term) |
| 10534 || p1->iRowid==pIter->iSwitchRowid |
| 10535 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev |
| 10536 ); |
| 10537 } |
| 10538 |
| 10539 for(i=0; i<pIter->nSeg; i+=2){ |
| 10540 Fts5SegIter *p1 = &pIter->aSeg[i]; |
| 10541 Fts5SegIter *p2 = &pIter->aSeg[i+1]; |
| 10542 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; |
| 10543 fts5AssertComparisonResult(pIter, p1, p2, pRes); |
| 10544 } |
| 10545 |
| 10546 for(i=1; i<(pIter->nSeg / 2); i+=2){ |
| 10547 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; |
| 10548 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; |
| 10549 Fts5CResult *pRes = &pIter->aFirst[i]; |
| 10550 fts5AssertComparisonResult(pIter, p1, p2, pRes); |
| 10551 } |
| 10552 } |
| 10553 } |
| 10554 #else |
| 10555 # define fts5AssertMultiIterSetup(x,y) |
| 10556 #endif |
| 10557 |
| 10558 /* |
| 10559 ** Do the comparison necessary to populate pIter->aFirst[iOut]. |
| 10560 ** |
| 10561 ** If the returned value is non-zero, then it is the index of an entry |
| 10562 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing |
| 10563 ** to a key that is a duplicate of another, higher priority, |
| 10564 ** segment-iterator in the pSeg->aSeg[] array. |
| 10565 */ |
| 10566 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ |
| 10567 int i1; /* Index of left-hand Fts5SegIter */ |
| 10568 int i2; /* Index of right-hand Fts5SegIter */ |
| 10569 int iRes; |
| 10570 Fts5SegIter *p1; /* Left-hand Fts5SegIter */ |
| 10571 Fts5SegIter *p2; /* Right-hand Fts5SegIter */ |
| 10572 Fts5CResult *pRes = &pIter->aFirst[iOut]; |
| 10573 |
| 10574 assert( iOut<pIter->nSeg && iOut>0 ); |
| 10575 assert( pIter->bRev==0 || pIter->bRev==1 ); |
| 10576 |
| 10577 if( iOut>=(pIter->nSeg/2) ){ |
| 10578 i1 = (iOut - pIter->nSeg/2) * 2; |
| 10579 i2 = i1 + 1; |
| 10580 }else{ |
| 10581 i1 = pIter->aFirst[iOut*2].iFirst; |
| 10582 i2 = pIter->aFirst[iOut*2+1].iFirst; |
| 10583 } |
| 10584 p1 = &pIter->aSeg[i1]; |
| 10585 p2 = &pIter->aSeg[i2]; |
| 10586 |
| 10587 pRes->bTermEq = 0; |
| 10588 if( p1->pLeaf==0 ){ /* If p1 is at EOF */ |
| 10589 iRes = i2; |
| 10590 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ |
| 10591 iRes = i1; |
| 10592 }else{ |
| 10593 int res = fts5BufferCompare(&p1->term, &p2->term); |
| 10594 if( res==0 ){ |
| 10595 assert( i2>i1 ); |
| 10596 assert( i2!=0 ); |
| 10597 pRes->bTermEq = 1; |
| 10598 if( p1->iRowid==p2->iRowid ){ |
| 10599 p1->bDel = p2->bDel; |
| 10600 return i2; |
| 10601 } |
| 10602 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; |
| 10603 } |
| 10604 assert( res!=0 ); |
| 10605 if( res<0 ){ |
| 10606 iRes = i1; |
| 10607 }else{ |
| 10608 iRes = i2; |
| 10609 } |
| 10610 } |
| 10611 |
| 10612 pRes->iFirst = (u16)iRes; |
| 10613 return 0; |
| 10614 } |
| 10615 |
| 10616 /* |
| 10617 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. |
| 10618 ** It is an error if leaf iLeafPgno does not exist or contains no rowids. |
| 10619 */ |
| 10620 static void fts5SegIterGotoPage( |
| 10621 Fts5Index *p, /* FTS5 backend object */ |
| 10622 Fts5SegIter *pIter, /* Iterator to advance */ |
| 10623 int iLeafPgno |
| 10624 ){ |
| 10625 assert( iLeafPgno>pIter->iLeafPgno ); |
| 10626 |
| 10627 if( iLeafPgno>pIter->pSeg->pgnoLast ){ |
| 10628 p->rc = FTS5_CORRUPT; |
| 10629 }else{ |
| 10630 fts5DataRelease(pIter->pNextLeaf); |
| 10631 pIter->pNextLeaf = 0; |
| 10632 pIter->iLeafPgno = iLeafPgno-1; |
| 10633 fts5SegIterNextPage(p, pIter); |
| 10634 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); |
| 10635 |
| 10636 if( p->rc==SQLITE_OK ){ |
| 10637 int iOff; |
| 10638 u8 *a = pIter->pLeaf->p; |
| 10639 int n = pIter->pLeaf->szLeaf; |
| 10640 |
| 10641 iOff = fts5LeafFirstRowidOff(pIter->pLeaf); |
| 10642 if( iOff<4 || iOff>=n ){ |
| 10643 p->rc = FTS5_CORRUPT; |
| 10644 }else{ |
| 10645 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
| 10646 pIter->iLeafOffset = iOff; |
| 10647 fts5SegIterLoadNPos(p, pIter); |
| 10648 } |
| 10649 } |
| 10650 } |
| 10651 } |
| 10652 |
| 10653 /* |
| 10654 ** Advance the iterator passed as the second argument until it is at or |
| 10655 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is |
| 10656 ** always advanced at least once. |
| 10657 */ |
| 10658 static void fts5SegIterNextFrom( |
| 10659 Fts5Index *p, /* FTS5 backend object */ |
| 10660 Fts5SegIter *pIter, /* Iterator to advance */ |
| 10661 i64 iMatch /* Advance iterator at least this far */ |
| 10662 ){ |
| 10663 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
| 10664 Fts5DlidxIter *pDlidx = pIter->pDlidx; |
| 10665 int iLeafPgno = pIter->iLeafPgno; |
| 10666 int bMove = 1; |
| 10667 |
| 10668 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 10669 assert( pIter->pDlidx ); |
| 10670 assert( pIter->pLeaf ); |
| 10671 |
| 10672 if( bRev==0 ){ |
| 10673 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ |
| 10674 iLeafPgno = fts5DlidxIterPgno(pDlidx); |
| 10675 fts5DlidxIterNext(p, pDlidx); |
| 10676 } |
| 10677 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); |
| 10678 if( iLeafPgno>pIter->iLeafPgno ){ |
| 10679 fts5SegIterGotoPage(p, pIter, iLeafPgno); |
| 10680 bMove = 0; |
| 10681 } |
| 10682 }else{ |
| 10683 assert( pIter->pNextLeaf==0 ); |
| 10684 assert( iMatch<pIter->iRowid ); |
| 10685 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ |
| 10686 fts5DlidxIterPrev(p, pDlidx); |
| 10687 } |
| 10688 iLeafPgno = fts5DlidxIterPgno(pDlidx); |
| 10689 |
| 10690 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); |
| 10691 |
| 10692 if( iLeafPgno<pIter->iLeafPgno ){ |
| 10693 pIter->iLeafPgno = iLeafPgno+1; |
| 10694 fts5SegIterReverseNewPage(p, pIter); |
| 10695 bMove = 0; |
| 10696 } |
| 10697 } |
| 10698 |
| 10699 do{ |
| 10700 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); |
| 10701 if( pIter->pLeaf==0 ) break; |
| 10702 if( bRev==0 && pIter->iRowid>=iMatch ) break; |
| 10703 if( bRev!=0 && pIter->iRowid<=iMatch ) break; |
| 10704 bMove = 1; |
| 10705 }while( p->rc==SQLITE_OK ); |
| 10706 } |
| 10707 |
| 10708 |
| 10709 /* |
| 10710 ** Free the iterator object passed as the second argument. |
| 10711 */ |
| 10712 static void fts5MultiIterFree(Fts5Iter *pIter){ |
| 10713 if( pIter ){ |
| 10714 int i; |
| 10715 for(i=0; i<pIter->nSeg; i++){ |
| 10716 fts5SegIterClear(&pIter->aSeg[i]); |
| 10717 } |
| 10718 fts5StructureRelease(pIter->pStruct); |
| 10719 fts5BufferFree(&pIter->poslist); |
| 10720 sqlite3_free(pIter); |
| 10721 } |
| 10722 } |
| 10723 |
| 10724 static void fts5MultiIterAdvanced( |
| 10725 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 10726 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
| 10727 int iChanged, /* Index of sub-iterator just advanced */ |
| 10728 int iMinset /* Minimum entry in aFirst[] to set */ |
| 10729 ){ |
| 10730 int i; |
| 10731 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ |
| 10732 int iEq; |
| 10733 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ |
| 10734 Fts5SegIter *pSeg = &pIter->aSeg[iEq]; |
| 10735 assert( p->rc==SQLITE_OK ); |
| 10736 pSeg->xNext(p, pSeg, 0); |
| 10737 i = pIter->nSeg + iEq; |
| 10738 } |
| 10739 } |
| 10740 } |
| 10741 |
| 10742 /* |
| 10743 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still |
| 10744 ** points to the same term though - just a different rowid. This function |
| 10745 ** attempts to update the contents of the pIter->aFirst[] accordingly. |
| 10746 ** If it does so successfully, 0 is returned. Otherwise 1. |
| 10747 ** |
| 10748 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() |
| 10749 ** on the iterator instead. That function does the same as this one, except |
| 10750 ** that it deals with more complicated cases as well. |
| 10751 */ |
| 10752 static int fts5MultiIterAdvanceRowid( |
| 10753 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
| 10754 int iChanged, /* Index of sub-iterator just advanced */ |
| 10755 Fts5SegIter **ppFirst |
| 10756 ){ |
| 10757 Fts5SegIter *pNew = &pIter->aSeg[iChanged]; |
| 10758 |
| 10759 if( pNew->iRowid==pIter->iSwitchRowid |
| 10760 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev |
| 10761 ){ |
| 10762 int i; |
| 10763 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; |
| 10764 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; |
| 10765 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ |
| 10766 Fts5CResult *pRes = &pIter->aFirst[i]; |
| 10767 |
| 10768 assert( pNew->pLeaf ); |
| 10769 assert( pRes->bTermEq==0 || pOther->pLeaf ); |
| 10770 |
| 10771 if( pRes->bTermEq ){ |
| 10772 if( pNew->iRowid==pOther->iRowid ){ |
| 10773 return 1; |
| 10774 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ |
| 10775 pIter->iSwitchRowid = pOther->iRowid; |
| 10776 pNew = pOther; |
| 10777 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ |
| 10778 pIter->iSwitchRowid = pOther->iRowid; |
| 10779 } |
| 10780 } |
| 10781 pRes->iFirst = (u16)(pNew - pIter->aSeg); |
| 10782 if( i==1 ) break; |
| 10783 |
| 10784 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; |
| 10785 } |
| 10786 } |
| 10787 |
| 10788 *ppFirst = pNew; |
| 10789 return 0; |
| 10790 } |
| 10791 |
| 10792 /* |
| 10793 ** Set the pIter->bEof variable based on the state of the sub-iterators. |
| 10794 */ |
| 10795 static void fts5MultiIterSetEof(Fts5Iter *pIter){ |
| 10796 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 10797 pIter->base.bEof = pSeg->pLeaf==0; |
| 10798 pIter->iSwitchRowid = pSeg->iRowid; |
| 10799 } |
| 10800 |
| 10801 /* |
| 10802 ** Move the iterator to the next entry. |
| 10803 ** |
| 10804 ** If an error occurs, an error code is left in Fts5Index.rc. It is not |
| 10805 ** considered an error if the iterator reaches EOF, or if it is already at |
| 10806 ** EOF when this function is called. |
| 10807 */ |
| 10808 static void fts5MultiIterNext( |
| 10809 Fts5Index *p, |
| 10810 Fts5Iter *pIter, |
| 10811 int bFrom, /* True if argument iFrom is valid */ |
| 10812 i64 iFrom /* Advance at least as far as this */ |
| 10813 ){ |
| 10814 int bUseFrom = bFrom; |
| 10815 assert( pIter->base.bEof==0 ); |
| 10816 while( p->rc==SQLITE_OK ){ |
| 10817 int iFirst = pIter->aFirst[1].iFirst; |
| 10818 int bNewTerm = 0; |
| 10819 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
| 10820 assert( p->rc==SQLITE_OK ); |
| 10821 if( bUseFrom && pSeg->pDlidx ){ |
| 10822 fts5SegIterNextFrom(p, pSeg, iFrom); |
| 10823 }else{ |
| 10824 pSeg->xNext(p, pSeg, &bNewTerm); |
| 10825 } |
| 10826 |
| 10827 if( pSeg->pLeaf==0 || bNewTerm |
| 10828 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
| 10829 ){ |
| 10830 fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
| 10831 fts5MultiIterSetEof(pIter); |
| 10832 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
| 10833 if( pSeg->pLeaf==0 ) return; |
| 10834 } |
| 10835 |
| 10836 fts5AssertMultiIterSetup(p, pIter); |
| 10837 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); |
| 10838 if( pIter->bSkipEmpty==0 || pSeg->nPos ){ |
| 10839 pIter->xSetOutputs(pIter, pSeg); |
| 10840 return; |
| 10841 } |
| 10842 bUseFrom = 0; |
| 10843 } |
| 10844 } |
| 10845 |
| 10846 static void fts5MultiIterNext2( |
| 10847 Fts5Index *p, |
| 10848 Fts5Iter *pIter, |
| 10849 int *pbNewTerm /* OUT: True if *might* be new term */ |
| 10850 ){ |
| 10851 assert( pIter->bSkipEmpty ); |
| 10852 if( p->rc==SQLITE_OK ){ |
| 10853 do { |
| 10854 int iFirst = pIter->aFirst[1].iFirst; |
| 10855 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
| 10856 int bNewTerm = 0; |
| 10857 |
| 10858 assert( p->rc==SQLITE_OK ); |
| 10859 pSeg->xNext(p, pSeg, &bNewTerm); |
| 10860 if( pSeg->pLeaf==0 || bNewTerm |
| 10861 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
| 10862 ){ |
| 10863 fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
| 10864 fts5MultiIterSetEof(pIter); |
| 10865 *pbNewTerm = 1; |
| 10866 }else{ |
| 10867 *pbNewTerm = 0; |
| 10868 } |
| 10869 fts5AssertMultiIterSetup(p, pIter); |
| 10870 |
| 10871 }while( fts5MultiIterIsEmpty(p, pIter) ); |
| 10872 } |
| 10873 } |
| 10874 |
| 10875 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ |
| 10876 UNUSED_PARAM2(pUnused1, pUnused2); |
| 10877 } |
| 10878 |
| 10879 static Fts5Iter *fts5MultiIterAlloc( |
| 10880 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 10881 int nSeg |
| 10882 ){ |
| 10883 Fts5Iter *pNew; |
| 10884 int nSlot; /* Power of two >= nSeg */ |
| 10885 |
| 10886 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); |
| 10887 pNew = fts5IdxMalloc(p, |
| 10888 sizeof(Fts5Iter) + /* pNew */ |
| 10889 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ |
| 10890 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ |
| 10891 ); |
| 10892 if( pNew ){ |
| 10893 pNew->nSeg = nSlot; |
| 10894 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; |
| 10895 pNew->pIndex = p; |
| 10896 pNew->xSetOutputs = fts5IterSetOutputs_Noop; |
| 10897 } |
| 10898 return pNew; |
| 10899 } |
| 10900 |
| 10901 static void fts5PoslistCallback( |
| 10902 Fts5Index *pUnused, |
| 10903 void *pContext, |
| 10904 const u8 *pChunk, int nChunk |
| 10905 ){ |
| 10906 UNUSED_PARAM(pUnused); |
| 10907 assert_nc( nChunk>=0 ); |
| 10908 if( nChunk>0 ){ |
| 10909 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); |
| 10910 } |
| 10911 } |
| 10912 |
| 10913 typedef struct PoslistCallbackCtx PoslistCallbackCtx; |
| 10914 struct PoslistCallbackCtx { |
| 10915 Fts5Buffer *pBuf; /* Append to this buffer */ |
| 10916 Fts5Colset *pColset; /* Restrict matches to this column */ |
| 10917 int eState; /* See above */ |
| 10918 }; |
| 10919 |
| 10920 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; |
| 10921 struct PoslistOffsetsCtx { |
| 10922 Fts5Buffer *pBuf; /* Append to this buffer */ |
| 10923 Fts5Colset *pColset; /* Restrict matches to this column */ |
| 10924 int iRead; |
| 10925 int iWrite; |
| 10926 }; |
| 10927 |
| 10928 /* |
| 10929 ** TODO: Make this more efficient! |
| 10930 */ |
| 10931 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ |
| 10932 int i; |
| 10933 for(i=0; i<pColset->nCol; i++){ |
| 10934 if( pColset->aiCol[i]==iCol ) return 1; |
| 10935 } |
| 10936 return 0; |
| 10937 } |
| 10938 |
| 10939 static void fts5PoslistOffsetsCallback( |
| 10940 Fts5Index *pUnused, |
| 10941 void *pContext, |
| 10942 const u8 *pChunk, int nChunk |
| 10943 ){ |
| 10944 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; |
| 10945 UNUSED_PARAM(pUnused); |
| 10946 assert_nc( nChunk>=0 ); |
| 10947 if( nChunk>0 ){ |
| 10948 int i = 0; |
| 10949 while( i<nChunk ){ |
| 10950 int iVal; |
| 10951 i += fts5GetVarint32(&pChunk[i], iVal); |
| 10952 iVal += pCtx->iRead - 2; |
| 10953 pCtx->iRead = iVal; |
| 10954 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ |
| 10955 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); |
| 10956 pCtx->iWrite = iVal; |
| 10957 } |
| 10958 } |
| 10959 } |
| 10960 } |
| 10961 |
| 10962 static void fts5PoslistFilterCallback( |
| 10963 Fts5Index *pUnused, |
| 10964 void *pContext, |
| 10965 const u8 *pChunk, int nChunk |
| 10966 ){ |
| 10967 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; |
| 10968 UNUSED_PARAM(pUnused); |
| 10969 assert_nc( nChunk>=0 ); |
| 10970 if( nChunk>0 ){ |
| 10971 /* Search through to find the first varint with value 1. This is the |
| 10972 ** start of the next columns hits. */ |
| 10973 int i = 0; |
| 10974 int iStart = 0; |
| 10975 |
| 10976 if( pCtx->eState==2 ){ |
| 10977 int iCol; |
| 10978 fts5FastGetVarint32(pChunk, i, iCol); |
| 10979 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ |
| 10980 pCtx->eState = 1; |
| 10981 fts5BufferSafeAppendVarint(pCtx->pBuf, 1); |
| 10982 }else{ |
| 10983 pCtx->eState = 0; |
| 10984 } |
| 10985 } |
| 10986 |
| 10987 do { |
| 10988 while( i<nChunk && pChunk[i]!=0x01 ){ |
| 10989 while( pChunk[i] & 0x80 ) i++; |
| 10990 i++; |
| 10991 } |
| 10992 if( pCtx->eState ){ |
| 10993 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
| 10994 } |
| 10995 if( i<nChunk ){ |
| 10996 int iCol; |
| 10997 iStart = i; |
| 10998 i++; |
| 10999 if( i>=nChunk ){ |
| 11000 pCtx->eState = 2; |
| 11001 }else{ |
| 11002 fts5FastGetVarint32(pChunk, i, iCol); |
| 11003 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); |
| 11004 if( pCtx->eState ){ |
| 11005 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
| 11006 iStart = i; |
| 11007 } |
| 11008 } |
| 11009 } |
| 11010 }while( i<nChunk ); |
| 11011 } |
| 11012 } |
| 11013 |
| 11014 static void fts5ChunkIterate( |
| 11015 Fts5Index *p, /* Index object */ |
| 11016 Fts5SegIter *pSeg, /* Poslist of this iterator */ |
| 11017 void *pCtx, /* Context pointer for xChunk callback */ |
| 11018 void (*xChunk)(Fts5Index*, void*, const u8*, int) |
| 11019 ){ |
| 11020 int nRem = pSeg->nPos; /* Number of bytes still to come */ |
| 11021 Fts5Data *pData = 0; |
| 11022 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 11023 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); |
| 11024 int pgno = pSeg->iLeafPgno; |
| 11025 int pgnoSave = 0; |
| 11026 |
| 11027 /* This function does notmwork with detail=none databases. */ |
| 11028 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 11029 |
| 11030 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ |
| 11031 pgnoSave = pgno+1; |
| 11032 } |
| 11033 |
| 11034 while( 1 ){ |
| 11035 xChunk(p, pCtx, pChunk, nChunk); |
| 11036 nRem -= nChunk; |
| 11037 fts5DataRelease(pData); |
| 11038 if( nRem<=0 ){ |
| 11039 break; |
| 11040 }else{ |
| 11041 pgno++; |
| 11042 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); |
| 11043 if( pData==0 ) break; |
| 11044 pChunk = &pData->p[4]; |
| 11045 nChunk = MIN(nRem, pData->szLeaf - 4); |
| 11046 if( pgno==pgnoSave ){ |
| 11047 assert( pSeg->pNextLeaf==0 ); |
| 11048 pSeg->pNextLeaf = pData; |
| 11049 pData = 0; |
| 11050 } |
| 11051 } |
| 11052 } |
| 11053 } |
| 11054 |
| 11055 /* |
| 11056 ** Iterator pIter currently points to a valid entry (not EOF). This |
| 11057 ** function appends the position list data for the current entry to |
| 11058 ** buffer pBuf. It does not make a copy of the position-list size |
| 11059 ** field. |
| 11060 */ |
| 11061 static void fts5SegiterPoslist( |
| 11062 Fts5Index *p, |
| 11063 Fts5SegIter *pSeg, |
| 11064 Fts5Colset *pColset, |
| 11065 Fts5Buffer *pBuf |
| 11066 ){ |
| 11067 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ |
| 11068 if( pColset==0 ){ |
| 11069 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); |
| 11070 }else{ |
| 11071 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 11072 PoslistCallbackCtx sCtx; |
| 11073 sCtx.pBuf = pBuf; |
| 11074 sCtx.pColset = pColset; |
| 11075 sCtx.eState = fts5IndexColsetTest(pColset, 0); |
| 11076 assert( sCtx.eState==0 || sCtx.eState==1 ); |
| 11077 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); |
| 11078 }else{ |
| 11079 PoslistOffsetsCtx sCtx; |
| 11080 memset(&sCtx, 0, sizeof(sCtx)); |
| 11081 sCtx.pBuf = pBuf; |
| 11082 sCtx.pColset = pColset; |
| 11083 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); |
| 11084 } |
| 11085 } |
| 11086 } |
| 11087 } |
| 11088 |
| 11089 /* |
| 11090 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If |
| 11091 ** the position list contains entries for column iCol, then (*pa) is set |
| 11092 ** to point to the sub-position-list for that column and the number of |
| 11093 ** bytes in it returned. Or, if the argument position list does not |
| 11094 ** contain any entries for column iCol, return 0. |
| 11095 */ |
| 11096 static int fts5IndexExtractCol( |
| 11097 const u8 **pa, /* IN/OUT: Pointer to poslist */ |
| 11098 int n, /* IN: Size of poslist in bytes */ |
| 11099 int iCol /* Column to extract from poslist */ |
| 11100 ){ |
| 11101 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ |
| 11102 const u8 *p = *pa; |
| 11103 const u8 *pEnd = &p[n]; /* One byte past end of position list */ |
| 11104 |
| 11105 while( iCol>iCurrent ){ |
| 11106 /* Advance pointer p until it points to pEnd or an 0x01 byte that is |
| 11107 ** not part of a varint. Note that it is not possible for a negative |
| 11108 ** or extremely large varint to occur within an uncorrupted position |
| 11109 ** list. So the last byte of each varint may be assumed to have a clear |
| 11110 ** 0x80 bit. */ |
| 11111 while( *p!=0x01 ){ |
| 11112 while( *p++ & 0x80 ); |
| 11113 if( p>=pEnd ) return 0; |
| 11114 } |
| 11115 *pa = p++; |
| 11116 iCurrent = *p++; |
| 11117 if( iCurrent & 0x80 ){ |
| 11118 p--; |
| 11119 p += fts5GetVarint32(p, iCurrent); |
| 11120 } |
| 11121 } |
| 11122 if( iCol!=iCurrent ) return 0; |
| 11123 |
| 11124 /* Advance pointer p until it points to pEnd or an 0x01 byte that is |
| 11125 ** not part of a varint */ |
| 11126 while( p<pEnd && *p!=0x01 ){ |
| 11127 while( *p++ & 0x80 ); |
| 11128 } |
| 11129 |
| 11130 return p - (*pa); |
| 11131 } |
| 11132 |
| 11133 static int fts5IndexExtractColset ( |
| 11134 Fts5Colset *pColset, /* Colset to filter on */ |
| 11135 const u8 *pPos, int nPos, /* Position list */ |
| 11136 Fts5Buffer *pBuf /* Output buffer */ |
| 11137 ){ |
| 11138 int rc = SQLITE_OK; |
| 11139 int i; |
| 11140 |
| 11141 fts5BufferZero(pBuf); |
| 11142 for(i=0; i<pColset->nCol; i++){ |
| 11143 const u8 *pSub = pPos; |
| 11144 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); |
| 11145 if( nSub ){ |
| 11146 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); |
| 11147 } |
| 11148 } |
| 11149 return rc; |
| 11150 } |
| 11151 |
| 11152 /* |
| 11153 ** xSetOutputs callback used by detail=none tables. |
| 11154 */ |
| 11155 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11156 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 11157 pIter->base.iRowid = pSeg->iRowid; |
| 11158 pIter->base.nData = pSeg->nPos; |
| 11159 } |
| 11160 |
| 11161 /* |
| 11162 ** xSetOutputs callback used by detail=full and detail=col tables when no |
| 11163 ** column filters are specified. |
| 11164 */ |
| 11165 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11166 pIter->base.iRowid = pSeg->iRowid; |
| 11167 pIter->base.nData = pSeg->nPos; |
| 11168 |
| 11169 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 11170 assert( pIter->pColset==0 ); |
| 11171 |
| 11172 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
| 11173 /* All data is stored on the current page. Populate the output |
| 11174 ** variables to point into the body of the page object. */ |
| 11175 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 11176 }else{ |
| 11177 /* The data is distributed over two or more pages. Copy it into the |
| 11178 ** Fts5Iter.poslist buffer and then set the output pointer to point |
| 11179 ** to this buffer. */ |
| 11180 fts5BufferZero(&pIter->poslist); |
| 11181 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); |
| 11182 pIter->base.pData = pIter->poslist.p; |
| 11183 } |
| 11184 } |
| 11185 |
| 11186 /* |
| 11187 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match |
| 11188 ** against no columns at all). |
| 11189 */ |
| 11190 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11191 UNUSED_PARAM(pSeg); |
| 11192 pIter->base.nData = 0; |
| 11193 } |
| 11194 |
| 11195 /* |
| 11196 ** xSetOutputs callback used by detail=col when there is a column filter |
| 11197 ** and there are 100 or more columns. Also called as a fallback from |
| 11198 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. |
| 11199 */ |
| 11200 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11201 fts5BufferZero(&pIter->poslist); |
| 11202 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); |
| 11203 pIter->base.iRowid = pSeg->iRowid; |
| 11204 pIter->base.pData = pIter->poslist.p; |
| 11205 pIter->base.nData = pIter->poslist.n; |
| 11206 } |
| 11207 |
| 11208 /* |
| 11209 ** xSetOutputs callback used when: |
| 11210 ** |
| 11211 ** * detail=col, |
| 11212 ** * there is a column filter, and |
| 11213 ** * the table contains 100 or fewer columns. |
| 11214 ** |
| 11215 ** The last point is to ensure all column numbers are stored as |
| 11216 ** single-byte varints. |
| 11217 */ |
| 11218 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11219 |
| 11220 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 11221 assert( pIter->pColset ); |
| 11222 |
| 11223 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ |
| 11224 fts5IterSetOutputs_Col(pIter, pSeg); |
| 11225 }else{ |
| 11226 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 11227 u8 *pEnd = (u8*)&a[pSeg->nPos]; |
| 11228 int iPrev = 0; |
| 11229 int *aiCol = pIter->pColset->aiCol; |
| 11230 int *aiColEnd = &aiCol[pIter->pColset->nCol]; |
| 11231 |
| 11232 u8 *aOut = pIter->poslist.p; |
| 11233 int iPrevOut = 0; |
| 11234 |
| 11235 pIter->base.iRowid = pSeg->iRowid; |
| 11236 |
| 11237 while( a<pEnd ){ |
| 11238 iPrev += (int)a++[0] - 2; |
| 11239 while( *aiCol<iPrev ){ |
| 11240 aiCol++; |
| 11241 if( aiCol==aiColEnd ) goto setoutputs_col_out; |
| 11242 } |
| 11243 if( *aiCol==iPrev ){ |
| 11244 *aOut++ = (u8)((iPrev - iPrevOut) + 2); |
| 11245 iPrevOut = iPrev; |
| 11246 } |
| 11247 } |
| 11248 |
| 11249 setoutputs_col_out: |
| 11250 pIter->base.pData = pIter->poslist.p; |
| 11251 pIter->base.nData = aOut - pIter->poslist.p; |
| 11252 } |
| 11253 } |
| 11254 |
| 11255 /* |
| 11256 ** xSetOutputs callback used by detail=full when there is a column filter. |
| 11257 */ |
| 11258 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 11259 Fts5Colset *pColset = pIter->pColset; |
| 11260 pIter->base.iRowid = pSeg->iRowid; |
| 11261 |
| 11262 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); |
| 11263 assert( pColset ); |
| 11264 |
| 11265 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
| 11266 /* All data is stored on the current page. Populate the output |
| 11267 ** variables to point into the body of the page object. */ |
| 11268 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 11269 if( pColset->nCol==1 ){ |
| 11270 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); |
| 11271 pIter->base.pData = a; |
| 11272 }else{ |
| 11273 fts5BufferZero(&pIter->poslist); |
| 11274 fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist); |
| 11275 pIter->base.pData = pIter->poslist.p; |
| 11276 pIter->base.nData = pIter->poslist.n; |
| 11277 } |
| 11278 }else{ |
| 11279 /* The data is distributed over two or more pages. Copy it into the |
| 11280 ** Fts5Iter.poslist buffer and then set the output pointer to point |
| 11281 ** to this buffer. */ |
| 11282 fts5BufferZero(&pIter->poslist); |
| 11283 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); |
| 11284 pIter->base.pData = pIter->poslist.p; |
| 11285 pIter->base.nData = pIter->poslist.n; |
| 11286 } |
| 11287 } |
| 11288 |
| 11289 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ |
| 11290 if( *pRc==SQLITE_OK ){ |
| 11291 Fts5Config *pConfig = pIter->pIndex->pConfig; |
| 11292 if( pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 11293 pIter->xSetOutputs = fts5IterSetOutputs_None; |
| 11294 } |
| 11295 |
| 11296 else if( pIter->pColset==0 ){ |
| 11297 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; |
| 11298 } |
| 11299 |
| 11300 else if( pIter->pColset->nCol==0 ){ |
| 11301 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; |
| 11302 } |
| 11303 |
| 11304 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 11305 pIter->xSetOutputs = fts5IterSetOutputs_Full; |
| 11306 } |
| 11307 |
| 11308 else{ |
| 11309 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 11310 if( pConfig->nCol<=100 ){ |
| 11311 pIter->xSetOutputs = fts5IterSetOutputs_Col100; |
| 11312 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); |
| 11313 }else{ |
| 11314 pIter->xSetOutputs = fts5IterSetOutputs_Col; |
| 11315 } |
| 11316 } |
| 11317 } |
| 11318 } |
| 11319 |
| 11320 |
| 11321 /* |
| 11322 ** Allocate a new Fts5Iter object. |
| 11323 ** |
| 11324 ** The new object will be used to iterate through data in structure pStruct. |
| 11325 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel |
| 11326 ** is zero or greater, data from the first nSegment segments on level iLevel |
| 11327 ** is merged. |
| 11328 ** |
| 11329 ** The iterator initially points to the first term/rowid entry in the |
| 11330 ** iterated data. |
| 11331 */ |
| 11332 static void fts5MultiIterNew( |
| 11333 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 11334 Fts5Structure *pStruct, /* Structure of specific index */ |
| 11335 int flags, /* FTS5INDEX_QUERY_XXX flags */ |
| 11336 Fts5Colset *pColset, /* Colset to filter on (or NULL) */ |
| 11337 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ |
| 11338 int iLevel, /* Level to iterate (-1 for all) */ |
| 11339 int nSegment, /* Number of segments to merge (iLevel>=0) */ |
| 11340 Fts5Iter **ppOut /* New object */ |
| 11341 ){ |
| 11342 int nSeg = 0; /* Number of segment-iters in use */ |
| 11343 int iIter = 0; /* */ |
| 11344 int iSeg; /* Used to iterate through segments */ |
| 11345 Fts5StructureLevel *pLvl; |
| 11346 Fts5Iter *pNew; |
| 11347 |
| 11348 assert( (pTerm==0 && nTerm==0) || iLevel<0 ); |
| 11349 |
| 11350 /* Allocate space for the new multi-seg-iterator. */ |
| 11351 if( p->rc==SQLITE_OK ){ |
| 11352 if( iLevel<0 ){ |
| 11353 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
| 11354 nSeg = pStruct->nSegment; |
| 11355 nSeg += (p->pHash ? 1 : 0); |
| 11356 }else{ |
| 11357 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); |
| 11358 } |
| 11359 } |
| 11360 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); |
| 11361 if( pNew==0 ) return; |
| 11362 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); |
| 11363 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); |
| 11364 pNew->pStruct = pStruct; |
| 11365 pNew->pColset = pColset; |
| 11366 fts5StructureRef(pStruct); |
| 11367 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ |
| 11368 fts5IterSetOutputCb(&p->rc, pNew); |
| 11369 } |
| 11370 |
| 11371 /* Initialize each of the component segment iterators. */ |
| 11372 if( p->rc==SQLITE_OK ){ |
| 11373 if( iLevel<0 ){ |
| 11374 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; |
| 11375 if( p->pHash ){ |
| 11376 /* Add a segment iterator for the current contents of the hash table. */ |
| 11377 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
| 11378 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); |
| 11379 } |
| 11380 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ |
| 11381 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ |
| 11382 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
| 11383 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
| 11384 if( pTerm==0 ){ |
| 11385 fts5SegIterInit(p, pSeg, pIter); |
| 11386 }else{ |
| 11387 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); |
| 11388 } |
| 11389 } |
| 11390 } |
| 11391 }else{ |
| 11392 pLvl = &pStruct->aLevel[iLevel]; |
| 11393 for(iSeg=nSeg-1; iSeg>=0; iSeg--){ |
| 11394 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); |
| 11395 } |
| 11396 } |
| 11397 assert( iIter==nSeg ); |
| 11398 } |
| 11399 |
| 11400 /* If the above was successful, each component iterators now points |
| 11401 ** to the first entry in its segment. In this case initialize the |
| 11402 ** aFirst[] array. Or, if an error has occurred, free the iterator |
| 11403 ** object and set the output variable to NULL. */ |
| 11404 if( p->rc==SQLITE_OK ){ |
| 11405 for(iIter=pNew->nSeg-1; iIter>0; iIter--){ |
| 11406 int iEq; |
| 11407 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ |
| 11408 Fts5SegIter *pSeg = &pNew->aSeg[iEq]; |
| 11409 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); |
| 11410 fts5MultiIterAdvanced(p, pNew, iEq, iIter); |
| 11411 } |
| 11412 } |
| 11413 fts5MultiIterSetEof(pNew); |
| 11414 fts5AssertMultiIterSetup(p, pNew); |
| 11415 |
| 11416 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ |
| 11417 fts5MultiIterNext(p, pNew, 0, 0); |
| 11418 }else if( pNew->base.bEof==0 ){ |
| 11419 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; |
| 11420 pNew->xSetOutputs(pNew, pSeg); |
| 11421 } |
| 11422 |
| 11423 }else{ |
| 11424 fts5MultiIterFree(pNew); |
| 11425 *ppOut = 0; |
| 11426 } |
| 11427 } |
| 11428 |
| 11429 /* |
| 11430 ** Create an Fts5Iter that iterates through the doclist provided |
| 11431 ** as the second argument. |
| 11432 */ |
| 11433 static void fts5MultiIterNew2( |
| 11434 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 11435 Fts5Data *pData, /* Doclist to iterate through */ |
| 11436 int bDesc, /* True for descending rowid order */ |
| 11437 Fts5Iter **ppOut /* New object */ |
| 11438 ){ |
| 11439 Fts5Iter *pNew; |
| 11440 pNew = fts5MultiIterAlloc(p, 2); |
| 11441 if( pNew ){ |
| 11442 Fts5SegIter *pIter = &pNew->aSeg[1]; |
| 11443 |
| 11444 pIter->flags = FTS5_SEGITER_ONETERM; |
| 11445 if( pData->szLeaf>0 ){ |
| 11446 pIter->pLeaf = pData; |
| 11447 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); |
| 11448 pIter->iEndofDoclist = pData->nn; |
| 11449 pNew->aFirst[1].iFirst = 1; |
| 11450 if( bDesc ){ |
| 11451 pNew->bRev = 1; |
| 11452 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 11453 fts5SegIterReverseInitPage(p, pIter); |
| 11454 }else{ |
| 11455 fts5SegIterLoadNPos(p, pIter); |
| 11456 } |
| 11457 pData = 0; |
| 11458 }else{ |
| 11459 pNew->base.bEof = 1; |
| 11460 } |
| 11461 fts5SegIterSetNext(p, pIter); |
| 11462 |
| 11463 *ppOut = pNew; |
| 11464 } |
| 11465 |
| 11466 fts5DataRelease(pData); |
| 11467 } |
| 11468 |
| 11469 /* |
| 11470 ** Return true if the iterator is at EOF or if an error has occurred. |
| 11471 ** False otherwise. |
| 11472 */ |
| 11473 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ |
| 11474 assert( p->rc |
| 11475 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof |
| 11476 ); |
| 11477 return (p->rc || pIter->base.bEof); |
| 11478 } |
| 11479 |
| 11480 /* |
| 11481 ** Return the rowid of the entry that the iterator currently points |
| 11482 ** to. If the iterator points to EOF when this function is called the |
| 11483 ** results are undefined. |
| 11484 */ |
| 11485 static i64 fts5MultiIterRowid(Fts5Iter *pIter){ |
| 11486 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); |
| 11487 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; |
| 11488 } |
| 11489 |
| 11490 /* |
| 11491 ** Move the iterator to the next entry at or following iMatch. |
| 11492 */ |
| 11493 static void fts5MultiIterNextFrom( |
| 11494 Fts5Index *p, |
| 11495 Fts5Iter *pIter, |
| 11496 i64 iMatch |
| 11497 ){ |
| 11498 while( 1 ){ |
| 11499 i64 iRowid; |
| 11500 fts5MultiIterNext(p, pIter, 1, iMatch); |
| 11501 if( fts5MultiIterEof(p, pIter) ) break; |
| 11502 iRowid = fts5MultiIterRowid(pIter); |
| 11503 if( pIter->bRev==0 && iRowid>=iMatch ) break; |
| 11504 if( pIter->bRev!=0 && iRowid<=iMatch ) break; |
| 11505 } |
| 11506 } |
| 11507 |
| 11508 /* |
| 11509 ** Return a pointer to a buffer containing the term associated with the |
| 11510 ** entry that the iterator currently points to. |
| 11511 */ |
| 11512 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ |
| 11513 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 11514 *pn = p->term.n; |
| 11515 return p->term.p; |
| 11516 } |
| 11517 |
| 11518 /* |
| 11519 ** Allocate a new segment-id for the structure pStruct. The new segment |
| 11520 ** id must be between 1 and 65335 inclusive, and must not be used by |
| 11521 ** any currently existing segment. If a free segment id cannot be found, |
| 11522 ** SQLITE_FULL is returned. |
| 11523 ** |
| 11524 ** If an error has already occurred, this function is a no-op. 0 is |
| 11525 ** returned in this case. |
| 11526 */ |
| 11527 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ |
| 11528 int iSegid = 0; |
| 11529 |
| 11530 if( p->rc==SQLITE_OK ){ |
| 11531 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ |
| 11532 p->rc = SQLITE_FULL; |
| 11533 }else{ |
| 11534 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following |
| 11535 ** array is 63 elements, or 252 bytes, in size. */ |
| 11536 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; |
| 11537 int iLvl, iSeg; |
| 11538 int i; |
| 11539 u32 mask; |
| 11540 memset(aUsed, 0, sizeof(aUsed)); |
| 11541 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 11542 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 11543 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; |
| 11544 if( iId<=FTS5_MAX_SEGMENT ){ |
| 11545 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32); |
| 11546 } |
| 11547 } |
| 11548 } |
| 11549 |
| 11550 for(i=0; aUsed[i]==0xFFFFFFFF; i++); |
| 11551 mask = aUsed[i]; |
| 11552 for(iSegid=0; mask & (1 << iSegid); iSegid++); |
| 11553 iSegid += 1 + i*32; |
| 11554 |
| 11555 #ifdef SQLITE_DEBUG |
| 11556 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 11557 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 11558 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); |
| 11559 } |
| 11560 } |
| 11561 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); |
| 11562 |
| 11563 { |
| 11564 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); |
| 11565 if( p->rc==SQLITE_OK ){ |
| 11566 u8 aBlob[2] = {0xff, 0xff}; |
| 11567 sqlite3_bind_int(pIdxSelect, 1, iSegid); |
| 11568 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); |
| 11569 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); |
| 11570 p->rc = sqlite3_reset(pIdxSelect); |
| 11571 } |
| 11572 } |
| 11573 #endif |
| 11574 } |
| 11575 } |
| 11576 |
| 11577 return iSegid; |
| 11578 } |
| 11579 |
| 11580 /* |
| 11581 ** Discard all data currently cached in the hash-tables. |
| 11582 */ |
| 11583 static void fts5IndexDiscardData(Fts5Index *p){ |
| 11584 assert( p->pHash || p->nPendingData==0 ); |
| 11585 if( p->pHash ){ |
| 11586 sqlite3Fts5HashClear(p->pHash); |
| 11587 p->nPendingData = 0; |
| 11588 } |
| 11589 } |
| 11590 |
| 11591 /* |
| 11592 ** Return the size of the prefix, in bytes, that buffer |
| 11593 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). |
| 11594 ** |
| 11595 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater |
| 11596 ** than buffer (pOld/nOld). |
| 11597 */ |
| 11598 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ |
| 11599 int i; |
| 11600 for(i=0; i<nOld; i++){ |
| 11601 if( pOld[i]!=pNew[i] ) break; |
| 11602 } |
| 11603 return i; |
| 11604 } |
| 11605 |
| 11606 static void fts5WriteDlidxClear( |
| 11607 Fts5Index *p, |
| 11608 Fts5SegWriter *pWriter, |
| 11609 int bFlush /* If true, write dlidx to disk */ |
| 11610 ){ |
| 11611 int i; |
| 11612 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); |
| 11613 for(i=0; i<pWriter->nDlidx; i++){ |
| 11614 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
| 11615 if( pDlidx->buf.n==0 ) break; |
| 11616 if( bFlush ){ |
| 11617 assert( pDlidx->pgno!=0 ); |
| 11618 fts5DataWrite(p, |
| 11619 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
| 11620 pDlidx->buf.p, pDlidx->buf.n |
| 11621 ); |
| 11622 } |
| 11623 sqlite3Fts5BufferZero(&pDlidx->buf); |
| 11624 pDlidx->bPrevValid = 0; |
| 11625 } |
| 11626 } |
| 11627 |
| 11628 /* |
| 11629 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. |
| 11630 ** Any new array elements are zeroed before returning. |
| 11631 */ |
| 11632 static int fts5WriteDlidxGrow( |
| 11633 Fts5Index *p, |
| 11634 Fts5SegWriter *pWriter, |
| 11635 int nLvl |
| 11636 ){ |
| 11637 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ |
| 11638 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( |
| 11639 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl |
| 11640 ); |
| 11641 if( aDlidx==0 ){ |
| 11642 p->rc = SQLITE_NOMEM; |
| 11643 }else{ |
| 11644 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); |
| 11645 memset(&aDlidx[pWriter->nDlidx], 0, nByte); |
| 11646 pWriter->aDlidx = aDlidx; |
| 11647 pWriter->nDlidx = nLvl; |
| 11648 } |
| 11649 } |
| 11650 return p->rc; |
| 11651 } |
| 11652 |
| 11653 /* |
| 11654 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large |
| 11655 ** enough, flush it to disk and return 1. Otherwise discard it and return |
| 11656 ** zero. |
| 11657 */ |
| 11658 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 11659 int bFlag = 0; |
| 11660 |
| 11661 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written |
| 11662 ** to the database, also write the doclist-index to disk. */ |
| 11663 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ |
| 11664 bFlag = 1; |
| 11665 } |
| 11666 fts5WriteDlidxClear(p, pWriter, bFlag); |
| 11667 pWriter->nEmpty = 0; |
| 11668 return bFlag; |
| 11669 } |
| 11670 |
| 11671 /* |
| 11672 ** This function is called whenever processing of the doclist for the |
| 11673 ** last term on leaf page (pWriter->iBtPage) is completed. |
| 11674 ** |
| 11675 ** The doclist-index for that term is currently stored in-memory within the |
| 11676 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function |
| 11677 ** writes it out to disk. Or, if it is too small to bother with, discards |
| 11678 ** it. |
| 11679 ** |
| 11680 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. |
| 11681 */ |
| 11682 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 11683 int bFlag; |
| 11684 |
| 11685 assert( pWriter->iBtPage || pWriter->nEmpty==0 ); |
| 11686 if( pWriter->iBtPage==0 ) return; |
| 11687 bFlag = fts5WriteFlushDlidx(p, pWriter); |
| 11688 |
| 11689 if( p->rc==SQLITE_OK ){ |
| 11690 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); |
| 11691 /* The following was already done in fts5WriteInit(): */ |
| 11692 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ |
| 11693 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); |
| 11694 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); |
| 11695 sqlite3_step(p->pIdxWriter); |
| 11696 p->rc = sqlite3_reset(p->pIdxWriter); |
| 11697 } |
| 11698 pWriter->iBtPage = 0; |
| 11699 } |
| 11700 |
| 11701 /* |
| 11702 ** This is called once for each leaf page except the first that contains |
| 11703 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that |
| 11704 ** is larger than all terms written to earlier leaves, and equal to or |
| 11705 ** smaller than the first term on the new leaf. |
| 11706 ** |
| 11707 ** If an error occurs, an error code is left in Fts5Index.rc. If an error |
| 11708 ** has already occurred when this function is called, it is a no-op. |
| 11709 */ |
| 11710 static void fts5WriteBtreeTerm( |
| 11711 Fts5Index *p, /* FTS5 backend object */ |
| 11712 Fts5SegWriter *pWriter, /* Writer object */ |
| 11713 int nTerm, const u8 *pTerm /* First term on new page */ |
| 11714 ){ |
| 11715 fts5WriteFlushBtree(p, pWriter); |
| 11716 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); |
| 11717 pWriter->iBtPage = pWriter->writer.pgno; |
| 11718 } |
| 11719 |
| 11720 /* |
| 11721 ** This function is called when flushing a leaf page that contains no |
| 11722 ** terms at all to disk. |
| 11723 */ |
| 11724 static void fts5WriteBtreeNoTerm( |
| 11725 Fts5Index *p, /* FTS5 backend object */ |
| 11726 Fts5SegWriter *pWriter /* Writer object */ |
| 11727 ){ |
| 11728 /* If there were no rowids on the leaf page either and the doclist-index |
| 11729 ** has already been started, append an 0x00 byte to it. */ |
| 11730 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ |
| 11731 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; |
| 11732 assert( pDlidx->bPrevValid ); |
| 11733 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); |
| 11734 } |
| 11735 |
| 11736 /* Increment the "number of sequential leaves without a term" counter. */ |
| 11737 pWriter->nEmpty++; |
| 11738 } |
| 11739 |
| 11740 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ |
| 11741 i64 iRowid; |
| 11742 int iOff; |
| 11743 |
| 11744 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); |
| 11745 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); |
| 11746 return iRowid; |
| 11747 } |
| 11748 |
| 11749 /* |
| 11750 ** Rowid iRowid has just been appended to the current leaf page. It is the |
| 11751 ** first on the page. This function appends an appropriate entry to the current |
| 11752 ** doclist-index. |
| 11753 */ |
| 11754 static void fts5WriteDlidxAppend( |
| 11755 Fts5Index *p, |
| 11756 Fts5SegWriter *pWriter, |
| 11757 i64 iRowid |
| 11758 ){ |
| 11759 int i; |
| 11760 int bDone = 0; |
| 11761 |
| 11762 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
| 11763 i64 iVal; |
| 11764 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
| 11765 |
| 11766 if( pDlidx->buf.n>=p->pConfig->pgsz ){ |
| 11767 /* The current doclist-index page is full. Write it to disk and push |
| 11768 ** a copy of iRowid (which will become the first rowid on the next |
| 11769 ** doclist-index leaf page) up into the next level of the b-tree |
| 11770 ** hierarchy. If the node being flushed is currently the root node, |
| 11771 ** also push its first rowid upwards. */ |
| 11772 pDlidx->buf.p[0] = 0x01; /* Not the root node */ |
| 11773 fts5DataWrite(p, |
| 11774 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
| 11775 pDlidx->buf.p, pDlidx->buf.n |
| 11776 ); |
| 11777 fts5WriteDlidxGrow(p, pWriter, i+2); |
| 11778 pDlidx = &pWriter->aDlidx[i]; |
| 11779 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ |
| 11780 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); |
| 11781 |
| 11782 /* This was the root node. Push its first rowid up to the new root. */ |
| 11783 pDlidx[1].pgno = pDlidx->pgno; |
| 11784 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); |
| 11785 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); |
| 11786 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); |
| 11787 pDlidx[1].bPrevValid = 1; |
| 11788 pDlidx[1].iPrev = iFirst; |
| 11789 } |
| 11790 |
| 11791 sqlite3Fts5BufferZero(&pDlidx->buf); |
| 11792 pDlidx->bPrevValid = 0; |
| 11793 pDlidx->pgno++; |
| 11794 }else{ |
| 11795 bDone = 1; |
| 11796 } |
| 11797 |
| 11798 if( pDlidx->bPrevValid ){ |
| 11799 iVal = iRowid - pDlidx->iPrev; |
| 11800 }else{ |
| 11801 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); |
| 11802 assert( pDlidx->buf.n==0 ); |
| 11803 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); |
| 11804 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); |
| 11805 iVal = iRowid; |
| 11806 } |
| 11807 |
| 11808 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); |
| 11809 pDlidx->bPrevValid = 1; |
| 11810 pDlidx->iPrev = iRowid; |
| 11811 } |
| 11812 } |
| 11813 |
| 11814 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 11815 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; |
| 11816 Fts5PageWriter *pPage = &pWriter->writer; |
| 11817 i64 iRowid; |
| 11818 |
| 11819 static int nCall = 0; |
| 11820 nCall++; |
| 11821 |
| 11822 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); |
| 11823 |
| 11824 /* Set the szLeaf header field. */ |
| 11825 assert( 0==fts5GetU16(&pPage->buf.p[2]) ); |
| 11826 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); |
| 11827 |
| 11828 if( pWriter->bFirstTermInPage ){ |
| 11829 /* No term was written to this page. */ |
| 11830 assert( pPage->pgidx.n==0 ); |
| 11831 fts5WriteBtreeNoTerm(p, pWriter); |
| 11832 }else{ |
| 11833 /* Append the pgidx to the page buffer. Set the szLeaf header field. */ |
| 11834 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); |
| 11835 } |
| 11836 |
| 11837 /* Write the page out to disk */ |
| 11838 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); |
| 11839 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); |
| 11840 |
| 11841 /* Initialize the next page. */ |
| 11842 fts5BufferZero(&pPage->buf); |
| 11843 fts5BufferZero(&pPage->pgidx); |
| 11844 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); |
| 11845 pPage->iPrevPgidx = 0; |
| 11846 pPage->pgno++; |
| 11847 |
| 11848 /* Increase the leaves written counter */ |
| 11849 pWriter->nLeafWritten++; |
| 11850 |
| 11851 /* The new leaf holds no terms or rowids */ |
| 11852 pWriter->bFirstTermInPage = 1; |
| 11853 pWriter->bFirstRowidInPage = 1; |
| 11854 } |
| 11855 |
| 11856 /* |
| 11857 ** Append term pTerm/nTerm to the segment being written by the writer passed |
| 11858 ** as the second argument. |
| 11859 ** |
| 11860 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 11861 ** already occurred, this function is a no-op. |
| 11862 */ |
| 11863 static void fts5WriteAppendTerm( |
| 11864 Fts5Index *p, |
| 11865 Fts5SegWriter *pWriter, |
| 11866 int nTerm, const u8 *pTerm |
| 11867 ){ |
| 11868 int nPrefix; /* Bytes of prefix compression for term */ |
| 11869 Fts5PageWriter *pPage = &pWriter->writer; |
| 11870 Fts5Buffer *pPgidx = &pWriter->writer.pgidx; |
| 11871 |
| 11872 assert( p->rc==SQLITE_OK ); |
| 11873 assert( pPage->buf.n>=4 ); |
| 11874 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); |
| 11875 |
| 11876 /* If the current leaf page is full, flush it to disk. */ |
| 11877 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ |
| 11878 if( pPage->buf.n>4 ){ |
| 11879 fts5WriteFlushLeaf(p, pWriter); |
| 11880 } |
| 11881 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); |
| 11882 } |
| 11883 |
| 11884 /* TODO1: Updating pgidx here. */ |
| 11885 pPgidx->n += sqlite3Fts5PutVarint( |
| 11886 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx |
| 11887 ); |
| 11888 pPage->iPrevPgidx = pPage->buf.n; |
| 11889 #if 0 |
| 11890 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); |
| 11891 pPgidx->n += 2; |
| 11892 #endif |
| 11893 |
| 11894 if( pWriter->bFirstTermInPage ){ |
| 11895 nPrefix = 0; |
| 11896 if( pPage->pgno!=1 ){ |
| 11897 /* This is the first term on a leaf that is not the leftmost leaf in |
| 11898 ** the segment b-tree. In this case it is necessary to add a term to |
| 11899 ** the b-tree hierarchy that is (a) larger than the largest term |
| 11900 ** already written to the segment and (b) smaller than or equal to |
| 11901 ** this term. In other words, a prefix of (pTerm/nTerm) that is one |
| 11902 ** byte longer than the longest prefix (pTerm/nTerm) shares with the |
| 11903 ** previous term. |
| 11904 ** |
| 11905 ** Usually, the previous term is available in pPage->term. The exception |
| 11906 ** is if this is the first term written in an incremental-merge step. |
| 11907 ** In this case the previous term is not available, so just write a |
| 11908 ** copy of (pTerm/nTerm) into the parent node. This is slightly |
| 11909 ** inefficient, but still correct. */ |
| 11910 int n = nTerm; |
| 11911 if( pPage->term.n ){ |
| 11912 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); |
| 11913 } |
| 11914 fts5WriteBtreeTerm(p, pWriter, n, pTerm); |
| 11915 pPage = &pWriter->writer; |
| 11916 } |
| 11917 }else{ |
| 11918 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); |
| 11919 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); |
| 11920 } |
| 11921 |
| 11922 /* Append the number of bytes of new data, then the term data itself |
| 11923 ** to the page. */ |
| 11924 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); |
| 11925 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); |
| 11926 |
| 11927 /* Update the Fts5PageWriter.term field. */ |
| 11928 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); |
| 11929 pWriter->bFirstTermInPage = 0; |
| 11930 |
| 11931 pWriter->bFirstRowidInPage = 0; |
| 11932 pWriter->bFirstRowidInDoclist = 1; |
| 11933 |
| 11934 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); |
| 11935 pWriter->aDlidx[0].pgno = pPage->pgno; |
| 11936 } |
| 11937 |
| 11938 /* |
| 11939 ** Append a rowid and position-list size field to the writers output. |
| 11940 */ |
| 11941 static void fts5WriteAppendRowid( |
| 11942 Fts5Index *p, |
| 11943 Fts5SegWriter *pWriter, |
| 11944 i64 iRowid |
| 11945 ){ |
| 11946 if( p->rc==SQLITE_OK ){ |
| 11947 Fts5PageWriter *pPage = &pWriter->writer; |
| 11948 |
| 11949 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ |
| 11950 fts5WriteFlushLeaf(p, pWriter); |
| 11951 } |
| 11952 |
| 11953 /* If this is to be the first rowid written to the page, set the |
| 11954 ** rowid-pointer in the page-header. Also append a value to the dlidx |
| 11955 ** buffer, in case a doclist-index is required. */ |
| 11956 if( pWriter->bFirstRowidInPage ){ |
| 11957 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); |
| 11958 fts5WriteDlidxAppend(p, pWriter, iRowid); |
| 11959 } |
| 11960 |
| 11961 /* Write the rowid. */ |
| 11962 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ |
| 11963 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); |
| 11964 }else{ |
| 11965 assert( p->rc || iRowid>pWriter->iPrevRowid ); |
| 11966 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); |
| 11967 } |
| 11968 pWriter->iPrevRowid = iRowid; |
| 11969 pWriter->bFirstRowidInDoclist = 0; |
| 11970 pWriter->bFirstRowidInPage = 0; |
| 11971 } |
| 11972 } |
| 11973 |
| 11974 static void fts5WriteAppendPoslistData( |
| 11975 Fts5Index *p, |
| 11976 Fts5SegWriter *pWriter, |
| 11977 const u8 *aData, |
| 11978 int nData |
| 11979 ){ |
| 11980 Fts5PageWriter *pPage = &pWriter->writer; |
| 11981 const u8 *a = aData; |
| 11982 int n = nData; |
| 11983 |
| 11984 assert( p->pConfig->pgsz>0 ); |
| 11985 while( p->rc==SQLITE_OK |
| 11986 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz |
| 11987 ){ |
| 11988 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; |
| 11989 int nCopy = 0; |
| 11990 while( nCopy<nReq ){ |
| 11991 i64 dummy; |
| 11992 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); |
| 11993 } |
| 11994 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); |
| 11995 a += nCopy; |
| 11996 n -= nCopy; |
| 11997 fts5WriteFlushLeaf(p, pWriter); |
| 11998 } |
| 11999 if( n>0 ){ |
| 12000 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); |
| 12001 } |
| 12002 } |
| 12003 |
| 12004 /* |
| 12005 ** Flush any data cached by the writer object to the database. Free any |
| 12006 ** allocations associated with the writer. |
| 12007 */ |
| 12008 static void fts5WriteFinish( |
| 12009 Fts5Index *p, |
| 12010 Fts5SegWriter *pWriter, /* Writer object */ |
| 12011 int *pnLeaf /* OUT: Number of leaf pages in b-tree */ |
| 12012 ){ |
| 12013 int i; |
| 12014 Fts5PageWriter *pLeaf = &pWriter->writer; |
| 12015 if( p->rc==SQLITE_OK ){ |
| 12016 assert( pLeaf->pgno>=1 ); |
| 12017 if( pLeaf->buf.n>4 ){ |
| 12018 fts5WriteFlushLeaf(p, pWriter); |
| 12019 } |
| 12020 *pnLeaf = pLeaf->pgno-1; |
| 12021 if( pLeaf->pgno>1 ){ |
| 12022 fts5WriteFlushBtree(p, pWriter); |
| 12023 } |
| 12024 } |
| 12025 fts5BufferFree(&pLeaf->term); |
| 12026 fts5BufferFree(&pLeaf->buf); |
| 12027 fts5BufferFree(&pLeaf->pgidx); |
| 12028 fts5BufferFree(&pWriter->btterm); |
| 12029 |
| 12030 for(i=0; i<pWriter->nDlidx; i++){ |
| 12031 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); |
| 12032 } |
| 12033 sqlite3_free(pWriter->aDlidx); |
| 12034 } |
| 12035 |
| 12036 static void fts5WriteInit( |
| 12037 Fts5Index *p, |
| 12038 Fts5SegWriter *pWriter, |
| 12039 int iSegid |
| 12040 ){ |
| 12041 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; |
| 12042 |
| 12043 memset(pWriter, 0, sizeof(Fts5SegWriter)); |
| 12044 pWriter->iSegid = iSegid; |
| 12045 |
| 12046 fts5WriteDlidxGrow(p, pWriter, 1); |
| 12047 pWriter->writer.pgno = 1; |
| 12048 pWriter->bFirstTermInPage = 1; |
| 12049 pWriter->iBtPage = 1; |
| 12050 |
| 12051 assert( pWriter->writer.buf.n==0 ); |
| 12052 assert( pWriter->writer.pgidx.n==0 ); |
| 12053 |
| 12054 /* Grow the two buffers to pgsz + padding bytes in size. */ |
| 12055 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); |
| 12056 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); |
| 12057 |
| 12058 if( p->pIdxWriter==0 ){ |
| 12059 Fts5Config *pConfig = p->pConfig; |
| 12060 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( |
| 12061 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", |
| 12062 pConfig->zDb, pConfig->zName |
| 12063 )); |
| 12064 } |
| 12065 |
| 12066 if( p->rc==SQLITE_OK ){ |
| 12067 /* Initialize the 4-byte leaf-page header to 0x00. */ |
| 12068 memset(pWriter->writer.buf.p, 0, 4); |
| 12069 pWriter->writer.buf.n = 4; |
| 12070 |
| 12071 /* Bind the current output segment id to the index-writer. This is an |
| 12072 ** optimization over binding the same value over and over as rows are |
| 12073 ** inserted into %_idx by the current writer. */ |
| 12074 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); |
| 12075 } |
| 12076 } |
| 12077 |
| 12078 /* |
| 12079 ** Iterator pIter was used to iterate through the input segments of on an |
| 12080 ** incremental merge operation. This function is called if the incremental |
| 12081 ** merge step has finished but the input has not been completely exhausted. |
| 12082 */ |
| 12083 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ |
| 12084 int i; |
| 12085 Fts5Buffer buf; |
| 12086 memset(&buf, 0, sizeof(Fts5Buffer)); |
| 12087 for(i=0; i<pIter->nSeg; i++){ |
| 12088 Fts5SegIter *pSeg = &pIter->aSeg[i]; |
| 12089 if( pSeg->pSeg==0 ){ |
| 12090 /* no-op */ |
| 12091 }else if( pSeg->pLeaf==0 ){ |
| 12092 /* All keys from this input segment have been transfered to the output. |
| 12093 ** Set both the first and last page-numbers to 0 to indicate that the |
| 12094 ** segment is now empty. */ |
| 12095 pSeg->pSeg->pgnoLast = 0; |
| 12096 pSeg->pSeg->pgnoFirst = 0; |
| 12097 }else{ |
| 12098 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ |
| 12099 i64 iLeafRowid; |
| 12100 Fts5Data *pData; |
| 12101 int iId = pSeg->pSeg->iSegid; |
| 12102 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; |
| 12103 |
| 12104 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); |
| 12105 pData = fts5DataRead(p, iLeafRowid); |
| 12106 if( pData ){ |
| 12107 fts5BufferZero(&buf); |
| 12108 fts5BufferGrow(&p->rc, &buf, pData->nn); |
| 12109 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); |
| 12110 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); |
| 12111 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); |
| 12112 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); |
| 12113 if( p->rc==SQLITE_OK ){ |
| 12114 /* Set the szLeaf field */ |
| 12115 fts5PutU16(&buf.p[2], (u16)buf.n); |
| 12116 } |
| 12117 |
| 12118 /* Set up the new page-index array */ |
| 12119 fts5BufferAppendVarint(&p->rc, &buf, 4); |
| 12120 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno |
| 12121 && pSeg->iEndofDoclist<pData->szLeaf |
| 12122 ){ |
| 12123 int nDiff = pData->szLeaf - pSeg->iEndofDoclist; |
| 12124 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); |
| 12125 fts5BufferAppendBlob(&p->rc, &buf, |
| 12126 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] |
| 12127 ); |
| 12128 } |
| 12129 |
| 12130 fts5DataRelease(pData); |
| 12131 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; |
| 12132 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); |
| 12133 fts5DataWrite(p, iLeafRowid, buf.p, buf.n); |
| 12134 } |
| 12135 } |
| 12136 } |
| 12137 fts5BufferFree(&buf); |
| 12138 } |
| 12139 |
| 12140 static void fts5MergeChunkCallback( |
| 12141 Fts5Index *p, |
| 12142 void *pCtx, |
| 12143 const u8 *pChunk, int nChunk |
| 12144 ){ |
| 12145 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; |
| 12146 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); |
| 12147 } |
| 12148 |
| 12149 /* |
| 12150 ** |
| 12151 */ |
| 12152 static void fts5IndexMergeLevel( |
| 12153 Fts5Index *p, /* FTS5 backend object */ |
| 12154 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ |
| 12155 int iLvl, /* Level to read input from */ |
| 12156 int *pnRem /* Write up to this many output leaves */ |
| 12157 ){ |
| 12158 Fts5Structure *pStruct = *ppStruct; |
| 12159 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 12160 Fts5StructureLevel *pLvlOut; |
| 12161 Fts5Iter *pIter = 0; /* Iterator to read input data */ |
| 12162 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ |
| 12163 int nInput; /* Number of input segments */ |
| 12164 Fts5SegWriter writer; /* Writer object */ |
| 12165 Fts5StructureSegment *pSeg; /* Output segment */ |
| 12166 Fts5Buffer term; |
| 12167 int bOldest; /* True if the output segment is the oldest */ |
| 12168 int eDetail = p->pConfig->eDetail; |
| 12169 const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
| 12170 |
| 12171 assert( iLvl<pStruct->nLevel ); |
| 12172 assert( pLvl->nMerge<=pLvl->nSeg ); |
| 12173 |
| 12174 memset(&writer, 0, sizeof(Fts5SegWriter)); |
| 12175 memset(&term, 0, sizeof(Fts5Buffer)); |
| 12176 if( pLvl->nMerge ){ |
| 12177 pLvlOut = &pStruct->aLevel[iLvl+1]; |
| 12178 assert( pLvlOut->nSeg>0 ); |
| 12179 nInput = pLvl->nMerge; |
| 12180 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; |
| 12181 |
| 12182 fts5WriteInit(p, &writer, pSeg->iSegid); |
| 12183 writer.writer.pgno = pSeg->pgnoLast+1; |
| 12184 writer.iBtPage = 0; |
| 12185 }else{ |
| 12186 int iSegid = fts5AllocateSegid(p, pStruct); |
| 12187 |
| 12188 /* Extend the Fts5Structure object as required to ensure the output |
| 12189 ** segment exists. */ |
| 12190 if( iLvl==pStruct->nLevel-1 ){ |
| 12191 fts5StructureAddLevel(&p->rc, ppStruct); |
| 12192 pStruct = *ppStruct; |
| 12193 } |
| 12194 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); |
| 12195 if( p->rc ) return; |
| 12196 pLvl = &pStruct->aLevel[iLvl]; |
| 12197 pLvlOut = &pStruct->aLevel[iLvl+1]; |
| 12198 |
| 12199 fts5WriteInit(p, &writer, iSegid); |
| 12200 |
| 12201 /* Add the new segment to the output level */ |
| 12202 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; |
| 12203 pLvlOut->nSeg++; |
| 12204 pSeg->pgnoFirst = 1; |
| 12205 pSeg->iSegid = iSegid; |
| 12206 pStruct->nSegment++; |
| 12207 |
| 12208 /* Read input from all segments in the input level */ |
| 12209 nInput = pLvl->nSeg; |
| 12210 } |
| 12211 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); |
| 12212 |
| 12213 assert( iLvl>=0 ); |
| 12214 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); |
| 12215 fts5MultiIterEof(p, pIter)==0; |
| 12216 fts5MultiIterNext(p, pIter, 0, 0) |
| 12217 ){ |
| 12218 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 12219 int nPos; /* position-list size field value */ |
| 12220 int nTerm; |
| 12221 const u8 *pTerm; |
| 12222 |
| 12223 /* Check for key annihilation. */ |
| 12224 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; |
| 12225 |
| 12226 pTerm = fts5MultiIterTerm(pIter, &nTerm); |
| 12227 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ |
| 12228 if( pnRem && writer.nLeafWritten>nRem ){ |
| 12229 break; |
| 12230 } |
| 12231 |
| 12232 /* This is a new term. Append a term to the output segment. */ |
| 12233 fts5WriteAppendTerm(p, &writer, nTerm, pTerm); |
| 12234 fts5BufferSet(&p->rc, &term, nTerm, pTerm); |
| 12235 } |
| 12236 |
| 12237 /* Append the rowid to the output */ |
| 12238 /* WRITEPOSLISTSIZE */ |
| 12239 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); |
| 12240 |
| 12241 if( eDetail==FTS5_DETAIL_NONE ){ |
| 12242 if( pSegIter->bDel ){ |
| 12243 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
| 12244 if( pSegIter->nPos>0 ){ |
| 12245 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
| 12246 } |
| 12247 } |
| 12248 }else{ |
| 12249 /* Append the position-list data to the output */ |
| 12250 nPos = pSegIter->nPos*2 + pSegIter->bDel; |
| 12251 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); |
| 12252 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); |
| 12253 } |
| 12254 } |
| 12255 |
| 12256 /* Flush the last leaf page to disk. Set the output segment b-tree height |
| 12257 ** and last leaf page number at the same time. */ |
| 12258 fts5WriteFinish(p, &writer, &pSeg->pgnoLast); |
| 12259 |
| 12260 if( fts5MultiIterEof(p, pIter) ){ |
| 12261 int i; |
| 12262 |
| 12263 /* Remove the redundant segments from the %_data table */ |
| 12264 for(i=0; i<nInput; i++){ |
| 12265 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); |
| 12266 } |
| 12267 |
| 12268 /* Remove the redundant segments from the input level */ |
| 12269 if( pLvl->nSeg!=nInput ){ |
| 12270 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); |
| 12271 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); |
| 12272 } |
| 12273 pStruct->nSegment -= nInput; |
| 12274 pLvl->nSeg -= nInput; |
| 12275 pLvl->nMerge = 0; |
| 12276 if( pSeg->pgnoLast==0 ){ |
| 12277 pLvlOut->nSeg--; |
| 12278 pStruct->nSegment--; |
| 12279 } |
| 12280 }else{ |
| 12281 assert( pSeg->pgnoLast>0 ); |
| 12282 fts5TrimSegments(p, pIter); |
| 12283 pLvl->nMerge = nInput; |
| 12284 } |
| 12285 |
| 12286 fts5MultiIterFree(pIter); |
| 12287 fts5BufferFree(&term); |
| 12288 if( pnRem ) *pnRem -= writer.nLeafWritten; |
| 12289 } |
| 12290 |
| 12291 /* |
| 12292 ** Do up to nPg pages of automerge work on the index. |
| 12293 ** |
| 12294 ** Return true if any changes were actually made, or false otherwise. |
| 12295 */ |
| 12296 static int fts5IndexMerge( |
| 12297 Fts5Index *p, /* FTS5 backend object */ |
| 12298 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
| 12299 int nPg, /* Pages of work to do */ |
| 12300 int nMin /* Minimum number of segments to merge */ |
| 12301 ){ |
| 12302 int nRem = nPg; |
| 12303 int bRet = 0; |
| 12304 Fts5Structure *pStruct = *ppStruct; |
| 12305 while( nRem>0 && p->rc==SQLITE_OK ){ |
| 12306 int iLvl; /* To iterate through levels */ |
| 12307 int iBestLvl = 0; /* Level offering the most input segments */ |
| 12308 int nBest = 0; /* Number of input segments on best level */ |
| 12309 |
| 12310 /* Set iBestLvl to the level to read input segments from. */ |
| 12311 assert( pStruct->nLevel>0 ); |
| 12312 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 12313 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 12314 if( pLvl->nMerge ){ |
| 12315 if( pLvl->nMerge>nBest ){ |
| 12316 iBestLvl = iLvl; |
| 12317 nBest = pLvl->nMerge; |
| 12318 } |
| 12319 break; |
| 12320 } |
| 12321 if( pLvl->nSeg>nBest ){ |
| 12322 nBest = pLvl->nSeg; |
| 12323 iBestLvl = iLvl; |
| 12324 } |
| 12325 } |
| 12326 |
| 12327 /* If nBest is still 0, then the index must be empty. */ |
| 12328 #ifdef SQLITE_DEBUG |
| 12329 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ |
| 12330 assert( pStruct->aLevel[iLvl].nSeg==0 ); |
| 12331 } |
| 12332 #endif |
| 12333 |
| 12334 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
| 12335 break; |
| 12336 } |
| 12337 bRet = 1; |
| 12338 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); |
| 12339 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
| 12340 fts5StructurePromote(p, iBestLvl+1, pStruct); |
| 12341 } |
| 12342 } |
| 12343 *ppStruct = pStruct; |
| 12344 return bRet; |
| 12345 } |
| 12346 |
| 12347 /* |
| 12348 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 |
| 12349 ** segment. This function updates the write-counter accordingly and, if |
| 12350 ** necessary, performs incremental merge work. |
| 12351 ** |
| 12352 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 12353 ** already occurred, this function is a no-op. |
| 12354 */ |
| 12355 static void fts5IndexAutomerge( |
| 12356 Fts5Index *p, /* FTS5 backend object */ |
| 12357 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
| 12358 int nLeaf /* Number of output leaves just written */ |
| 12359 ){ |
| 12360 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ |
| 12361 Fts5Structure *pStruct = *ppStruct; |
| 12362 u64 nWrite; /* Initial value of write-counter */ |
| 12363 int nWork; /* Number of work-quanta to perform */ |
| 12364 int nRem; /* Number of leaf pages left to write */ |
| 12365 |
| 12366 /* Update the write-counter. While doing so, set nWork. */ |
| 12367 nWrite = pStruct->nWriteCounter; |
| 12368 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); |
| 12369 pStruct->nWriteCounter += nLeaf; |
| 12370 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); |
| 12371 |
| 12372 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); |
| 12373 } |
| 12374 } |
| 12375 |
| 12376 static void fts5IndexCrisismerge( |
| 12377 Fts5Index *p, /* FTS5 backend object */ |
| 12378 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ |
| 12379 ){ |
| 12380 const int nCrisis = p->pConfig->nCrisisMerge; |
| 12381 Fts5Structure *pStruct = *ppStruct; |
| 12382 int iLvl = 0; |
| 12383 |
| 12384 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); |
| 12385 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ |
| 12386 fts5IndexMergeLevel(p, &pStruct, iLvl, 0); |
| 12387 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); |
| 12388 fts5StructurePromote(p, iLvl+1, pStruct); |
| 12389 iLvl++; |
| 12390 } |
| 12391 *ppStruct = pStruct; |
| 12392 } |
| 12393 |
| 12394 static int fts5IndexReturn(Fts5Index *p){ |
| 12395 int rc = p->rc; |
| 12396 p->rc = SQLITE_OK; |
| 12397 return rc; |
| 12398 } |
| 12399 |
| 12400 typedef struct Fts5FlushCtx Fts5FlushCtx; |
| 12401 struct Fts5FlushCtx { |
| 12402 Fts5Index *pIdx; |
| 12403 Fts5SegWriter writer; |
| 12404 }; |
| 12405 |
| 12406 /* |
| 12407 ** Buffer aBuf[] contains a list of varints, all small enough to fit |
| 12408 ** in a 32-bit integer. Return the size of the largest prefix of this |
| 12409 ** list nMax bytes or less in size. |
| 12410 */ |
| 12411 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ |
| 12412 int ret; |
| 12413 u32 dummy; |
| 12414 ret = fts5GetVarint32(aBuf, dummy); |
| 12415 if( ret<nMax ){ |
| 12416 while( 1 ){ |
| 12417 int i = fts5GetVarint32(&aBuf[ret], dummy); |
| 12418 if( (ret + i) > nMax ) break; |
| 12419 ret += i; |
| 12420 } |
| 12421 } |
| 12422 return ret; |
| 12423 } |
| 12424 |
| 12425 /* |
| 12426 ** Flush the contents of in-memory hash table iHash to a new level-0 |
| 12427 ** segment on disk. Also update the corresponding structure record. |
| 12428 ** |
| 12429 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 12430 ** already occurred, this function is a no-op. |
| 12431 */ |
| 12432 static void fts5FlushOneHash(Fts5Index *p){ |
| 12433 Fts5Hash *pHash = p->pHash; |
| 12434 Fts5Structure *pStruct; |
| 12435 int iSegid; |
| 12436 int pgnoLast = 0; /* Last leaf page number in segment */ |
| 12437 |
| 12438 /* Obtain a reference to the index structure and allocate a new segment-id |
| 12439 ** for the new level-0 segment. */ |
| 12440 pStruct = fts5StructureRead(p); |
| 12441 iSegid = fts5AllocateSegid(p, pStruct); |
| 12442 fts5StructureInvalidate(p); |
| 12443 |
| 12444 if( iSegid ){ |
| 12445 const int pgsz = p->pConfig->pgsz; |
| 12446 int eDetail = p->pConfig->eDetail; |
| 12447 Fts5StructureSegment *pSeg; /* New segment within pStruct */ |
| 12448 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ |
| 12449 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ |
| 12450 |
| 12451 Fts5SegWriter writer; |
| 12452 fts5WriteInit(p, &writer, iSegid); |
| 12453 |
| 12454 pBuf = &writer.writer.buf; |
| 12455 pPgidx = &writer.writer.pgidx; |
| 12456 |
| 12457 /* fts5WriteInit() should have initialized the buffers to (most likely) |
| 12458 ** the maximum space required. */ |
| 12459 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
| 12460 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
| 12461 |
| 12462 /* Begin scanning through hash table entries. This loop runs once for each |
| 12463 ** term/doclist currently stored within the hash table. */ |
| 12464 if( p->rc==SQLITE_OK ){ |
| 12465 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); |
| 12466 } |
| 12467 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ |
| 12468 const char *zTerm; /* Buffer containing term */ |
| 12469 const u8 *pDoclist; /* Pointer to doclist for this term */ |
| 12470 int nDoclist; /* Size of doclist in bytes */ |
| 12471 |
| 12472 /* Write the term for this entry to disk. */ |
| 12473 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); |
| 12474 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); |
| 12475 |
| 12476 assert( writer.bFirstRowidInPage==0 ); |
| 12477 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ |
| 12478 /* The entire doclist will fit on the current leaf. */ |
| 12479 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); |
| 12480 }else{ |
| 12481 i64 iRowid = 0; |
| 12482 i64 iDelta = 0; |
| 12483 int iOff = 0; |
| 12484 |
| 12485 /* The entire doclist will not fit on this leaf. The following |
| 12486 ** loop iterates through the poslists that make up the current |
| 12487 ** doclist. */ |
| 12488 while( p->rc==SQLITE_OK && iOff<nDoclist ){ |
| 12489 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); |
| 12490 iRowid += iDelta; |
| 12491 |
| 12492 if( writer.bFirstRowidInPage ){ |
| 12493 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ |
| 12494 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); |
| 12495 writer.bFirstRowidInPage = 0; |
| 12496 fts5WriteDlidxAppend(p, &writer, iRowid); |
| 12497 }else{ |
| 12498 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); |
| 12499 } |
| 12500 assert( pBuf->n<=pBuf->nSpace ); |
| 12501 |
| 12502 if( eDetail==FTS5_DETAIL_NONE ){ |
| 12503 if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
| 12504 pBuf->p[pBuf->n++] = 0; |
| 12505 iOff++; |
| 12506 if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
| 12507 pBuf->p[pBuf->n++] = 0; |
| 12508 iOff++; |
| 12509 } |
| 12510 } |
| 12511 if( (pBuf->n + pPgidx->n)>=pgsz ){ |
| 12512 fts5WriteFlushLeaf(p, &writer); |
| 12513 } |
| 12514 }else{ |
| 12515 int bDummy; |
| 12516 int nPos; |
| 12517 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); |
| 12518 nCopy += nPos; |
| 12519 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ |
| 12520 /* The entire poslist will fit on the current leaf. So copy |
| 12521 ** it in one go. */ |
| 12522 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); |
| 12523 }else{ |
| 12524 /* The entire poslist will not fit on this leaf. So it needs |
| 12525 ** to be broken into sections. The only qualification being |
| 12526 ** that each varint must be stored contiguously. */ |
| 12527 const u8 *pPoslist = &pDoclist[iOff]; |
| 12528 int iPos = 0; |
| 12529 while( p->rc==SQLITE_OK ){ |
| 12530 int nSpace = pgsz - pBuf->n - pPgidx->n; |
| 12531 int n = 0; |
| 12532 if( (nCopy - iPos)<=nSpace ){ |
| 12533 n = nCopy - iPos; |
| 12534 }else{ |
| 12535 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); |
| 12536 } |
| 12537 assert( n>0 ); |
| 12538 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); |
| 12539 iPos += n; |
| 12540 if( (pBuf->n + pPgidx->n)>=pgsz ){ |
| 12541 fts5WriteFlushLeaf(p, &writer); |
| 12542 } |
| 12543 if( iPos>=nCopy ) break; |
| 12544 } |
| 12545 } |
| 12546 iOff += nCopy; |
| 12547 } |
| 12548 } |
| 12549 } |
| 12550 |
| 12551 /* TODO2: Doclist terminator written here. */ |
| 12552 /* pBuf->p[pBuf->n++] = '\0'; */ |
| 12553 assert( pBuf->n<=pBuf->nSpace ); |
| 12554 sqlite3Fts5HashScanNext(pHash); |
| 12555 } |
| 12556 sqlite3Fts5HashClear(pHash); |
| 12557 fts5WriteFinish(p, &writer, &pgnoLast); |
| 12558 |
| 12559 /* Update the Fts5Structure. It is written back to the database by the |
| 12560 ** fts5StructureRelease() call below. */ |
| 12561 if( pStruct->nLevel==0 ){ |
| 12562 fts5StructureAddLevel(&p->rc, &pStruct); |
| 12563 } |
| 12564 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); |
| 12565 if( p->rc==SQLITE_OK ){ |
| 12566 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; |
| 12567 pSeg->iSegid = iSegid; |
| 12568 pSeg->pgnoFirst = 1; |
| 12569 pSeg->pgnoLast = pgnoLast; |
| 12570 pStruct->nSegment++; |
| 12571 } |
| 12572 fts5StructurePromote(p, 0, pStruct); |
| 12573 } |
| 12574 |
| 12575 fts5IndexAutomerge(p, &pStruct, pgnoLast); |
| 12576 fts5IndexCrisismerge(p, &pStruct); |
| 12577 fts5StructureWrite(p, pStruct); |
| 12578 fts5StructureRelease(pStruct); |
| 12579 } |
| 12580 |
| 12581 /* |
| 12582 ** Flush any data stored in the in-memory hash tables to the database. |
| 12583 */ |
| 12584 static void fts5IndexFlush(Fts5Index *p){ |
| 12585 /* Unless it is empty, flush the hash table to disk */ |
| 12586 if( p->nPendingData ){ |
| 12587 assert( p->pHash ); |
| 12588 p->nPendingData = 0; |
| 12589 fts5FlushOneHash(p); |
| 12590 } |
| 12591 } |
| 12592 |
| 12593 static Fts5Structure *fts5IndexOptimizeStruct( |
| 12594 Fts5Index *p, |
| 12595 Fts5Structure *pStruct |
| 12596 ){ |
| 12597 Fts5Structure *pNew = 0; |
| 12598 int nByte = sizeof(Fts5Structure); |
| 12599 int nSeg = pStruct->nSegment; |
| 12600 int i; |
| 12601 |
| 12602 /* Figure out if this structure requires optimization. A structure does |
| 12603 ** not require optimization if either: |
| 12604 ** |
| 12605 ** + it consists of fewer than two segments, or |
| 12606 ** + all segments are on the same level, or |
| 12607 ** + all segments except one are currently inputs to a merge operation. |
| 12608 ** |
| 12609 ** In the first case, return NULL. In the second, increment the ref-count |
| 12610 ** on *pStruct and return a copy of the pointer to it. |
| 12611 */ |
| 12612 if( nSeg<2 ) return 0; |
| 12613 for(i=0; i<pStruct->nLevel; i++){ |
| 12614 int nThis = pStruct->aLevel[i].nSeg; |
| 12615 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ |
| 12616 fts5StructureRef(pStruct); |
| 12617 return pStruct; |
| 12618 } |
| 12619 assert( pStruct->aLevel[i].nMerge<=nThis ); |
| 12620 } |
| 12621 |
| 12622 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); |
| 12623 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); |
| 12624 |
| 12625 if( pNew ){ |
| 12626 Fts5StructureLevel *pLvl; |
| 12627 nByte = nSeg * sizeof(Fts5StructureSegment); |
| 12628 pNew->nLevel = pStruct->nLevel+1; |
| 12629 pNew->nRef = 1; |
| 12630 pNew->nWriteCounter = pStruct->nWriteCounter; |
| 12631 pLvl = &pNew->aLevel[pStruct->nLevel]; |
| 12632 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); |
| 12633 if( pLvl->aSeg ){ |
| 12634 int iLvl, iSeg; |
| 12635 int iSegOut = 0; |
| 12636 /* Iterate through all segments, from oldest to newest. Add them to |
| 12637 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest |
| 12638 ** segment in the data structure. */ |
| 12639 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ |
| 12640 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 12641 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; |
| 12642 iSegOut++; |
| 12643 } |
| 12644 } |
| 12645 pNew->nSegment = pLvl->nSeg = nSeg; |
| 12646 }else{ |
| 12647 sqlite3_free(pNew); |
| 12648 pNew = 0; |
| 12649 } |
| 12650 } |
| 12651 |
| 12652 return pNew; |
| 12653 } |
| 12654 |
| 12655 static int sqlite3Fts5IndexOptimize(Fts5Index *p){ |
| 12656 Fts5Structure *pStruct; |
| 12657 Fts5Structure *pNew = 0; |
| 12658 |
| 12659 assert( p->rc==SQLITE_OK ); |
| 12660 fts5IndexFlush(p); |
| 12661 pStruct = fts5StructureRead(p); |
| 12662 fts5StructureInvalidate(p); |
| 12663 |
| 12664 if( pStruct ){ |
| 12665 pNew = fts5IndexOptimizeStruct(p, pStruct); |
| 12666 } |
| 12667 fts5StructureRelease(pStruct); |
| 12668 |
| 12669 assert( pNew==0 || pNew->nSegment>0 ); |
| 12670 if( pNew ){ |
| 12671 int iLvl; |
| 12672 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} |
| 12673 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ |
| 12674 int nRem = FTS5_OPT_WORK_UNIT; |
| 12675 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); |
| 12676 } |
| 12677 |
| 12678 fts5StructureWrite(p, pNew); |
| 12679 fts5StructureRelease(pNew); |
| 12680 } |
| 12681 |
| 12682 return fts5IndexReturn(p); |
| 12683 } |
| 12684 |
| 12685 /* |
| 12686 ** This is called to implement the special "VALUES('merge', $nMerge)" |
| 12687 ** INSERT command. |
| 12688 */ |
| 12689 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ |
| 12690 Fts5Structure *pStruct = fts5StructureRead(p); |
| 12691 if( pStruct ){ |
| 12692 int nMin = p->pConfig->nUsermerge; |
| 12693 fts5StructureInvalidate(p); |
| 12694 if( nMerge<0 ){ |
| 12695 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); |
| 12696 fts5StructureRelease(pStruct); |
| 12697 pStruct = pNew; |
| 12698 nMin = 2; |
| 12699 nMerge = nMerge*-1; |
| 12700 } |
| 12701 if( pStruct && pStruct->nLevel ){ |
| 12702 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ |
| 12703 fts5StructureWrite(p, pStruct); |
| 12704 } |
| 12705 } |
| 12706 fts5StructureRelease(pStruct); |
| 12707 } |
| 12708 return fts5IndexReturn(p); |
| 12709 } |
| 12710 |
| 12711 static void fts5AppendRowid( |
| 12712 Fts5Index *p, |
| 12713 i64 iDelta, |
| 12714 Fts5Iter *pUnused, |
| 12715 Fts5Buffer *pBuf |
| 12716 ){ |
| 12717 UNUSED_PARAM(pUnused); |
| 12718 fts5BufferAppendVarint(&p->rc, pBuf, iDelta); |
| 12719 } |
| 12720 |
| 12721 static void fts5AppendPoslist( |
| 12722 Fts5Index *p, |
| 12723 i64 iDelta, |
| 12724 Fts5Iter *pMulti, |
| 12725 Fts5Buffer *pBuf |
| 12726 ){ |
| 12727 int nData = pMulti->base.nData; |
| 12728 assert( nData>0 ); |
| 12729 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){ |
| 12730 fts5BufferSafeAppendVarint(pBuf, iDelta); |
| 12731 fts5BufferSafeAppendVarint(pBuf, nData*2); |
| 12732 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); |
| 12733 } |
| 12734 } |
| 12735 |
| 12736 |
| 12737 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ |
| 12738 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; |
| 12739 |
| 12740 assert( pIter->aPoslist ); |
| 12741 if( p>=pIter->aEof ){ |
| 12742 pIter->aPoslist = 0; |
| 12743 }else{ |
| 12744 i64 iDelta; |
| 12745 |
| 12746 p += fts5GetVarint(p, (u64*)&iDelta); |
| 12747 pIter->iRowid += iDelta; |
| 12748 |
| 12749 /* Read position list size */ |
| 12750 if( p[0] & 0x80 ){ |
| 12751 int nPos; |
| 12752 pIter->nSize = fts5GetVarint32(p, nPos); |
| 12753 pIter->nPoslist = (nPos>>1); |
| 12754 }else{ |
| 12755 pIter->nPoslist = ((int)(p[0])) >> 1; |
| 12756 pIter->nSize = 1; |
| 12757 } |
| 12758 |
| 12759 pIter->aPoslist = p; |
| 12760 } |
| 12761 } |
| 12762 |
| 12763 static void fts5DoclistIterInit( |
| 12764 Fts5Buffer *pBuf, |
| 12765 Fts5DoclistIter *pIter |
| 12766 ){ |
| 12767 memset(pIter, 0, sizeof(*pIter)); |
| 12768 pIter->aPoslist = pBuf->p; |
| 12769 pIter->aEof = &pBuf->p[pBuf->n]; |
| 12770 fts5DoclistIterNext(pIter); |
| 12771 } |
| 12772 |
| 12773 #if 0 |
| 12774 /* |
| 12775 ** Append a doclist to buffer pBuf. |
| 12776 ** |
| 12777 ** This function assumes that space within the buffer has already been |
| 12778 ** allocated. |
| 12779 */ |
| 12780 static void fts5MergeAppendDocid( |
| 12781 Fts5Buffer *pBuf, /* Buffer to write to */ |
| 12782 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ |
| 12783 i64 iRowid /* Rowid to append */ |
| 12784 ){ |
| 12785 assert( pBuf->n!=0 || (*piLastRowid)==0 ); |
| 12786 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); |
| 12787 *piLastRowid = iRowid; |
| 12788 } |
| 12789 #endif |
| 12790 |
| 12791 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ |
| 12792 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ |
| 12793 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ |
| 12794 (iLastRowid) = (iRowid); \ |
| 12795 } |
| 12796 |
| 12797 /* |
| 12798 ** Swap the contents of buffer *p1 with that of *p2. |
| 12799 */ |
| 12800 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ |
| 12801 Fts5Buffer tmp = *p1; |
| 12802 *p1 = *p2; |
| 12803 *p2 = tmp; |
| 12804 } |
| 12805 |
| 12806 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ |
| 12807 int i = *piOff; |
| 12808 if( i>=pBuf->n ){ |
| 12809 *piOff = -1; |
| 12810 }else{ |
| 12811 u64 iVal; |
| 12812 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); |
| 12813 *piRowid += iVal; |
| 12814 } |
| 12815 } |
| 12816 |
| 12817 /* |
| 12818 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. |
| 12819 ** In this case the buffers consist of a delta-encoded list of rowids only. |
| 12820 */ |
| 12821 static void fts5MergeRowidLists( |
| 12822 Fts5Index *p, /* FTS5 backend object */ |
| 12823 Fts5Buffer *p1, /* First list to merge */ |
| 12824 Fts5Buffer *p2 /* Second list to merge */ |
| 12825 ){ |
| 12826 int i1 = 0; |
| 12827 int i2 = 0; |
| 12828 i64 iRowid1 = 0; |
| 12829 i64 iRowid2 = 0; |
| 12830 i64 iOut = 0; |
| 12831 |
| 12832 Fts5Buffer out; |
| 12833 memset(&out, 0, sizeof(out)); |
| 12834 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); |
| 12835 if( p->rc ) return; |
| 12836 |
| 12837 fts5NextRowid(p1, &i1, &iRowid1); |
| 12838 fts5NextRowid(p2, &i2, &iRowid2); |
| 12839 while( i1>=0 || i2>=0 ){ |
| 12840 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ |
| 12841 assert( iOut==0 || iRowid1>iOut ); |
| 12842 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); |
| 12843 iOut = iRowid1; |
| 12844 fts5NextRowid(p1, &i1, &iRowid1); |
| 12845 }else{ |
| 12846 assert( iOut==0 || iRowid2>iOut ); |
| 12847 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); |
| 12848 iOut = iRowid2; |
| 12849 if( i1>=0 && iRowid1==iRowid2 ){ |
| 12850 fts5NextRowid(p1, &i1, &iRowid1); |
| 12851 } |
| 12852 fts5NextRowid(p2, &i2, &iRowid2); |
| 12853 } |
| 12854 } |
| 12855 |
| 12856 fts5BufferSwap(&out, p1); |
| 12857 fts5BufferFree(&out); |
| 12858 } |
| 12859 |
| 12860 /* |
| 12861 ** Buffers p1 and p2 contain doclists. This function merges the content |
| 12862 ** of the two doclists together and sets buffer p1 to the result before |
| 12863 ** returning. |
| 12864 ** |
| 12865 ** If an error occurs, an error code is left in p->rc. If an error has |
| 12866 ** already occurred, this function is a no-op. |
| 12867 */ |
| 12868 static void fts5MergePrefixLists( |
| 12869 Fts5Index *p, /* FTS5 backend object */ |
| 12870 Fts5Buffer *p1, /* First list to merge */ |
| 12871 Fts5Buffer *p2 /* Second list to merge */ |
| 12872 ){ |
| 12873 if( p2->n ){ |
| 12874 i64 iLastRowid = 0; |
| 12875 Fts5DoclistIter i1; |
| 12876 Fts5DoclistIter i2; |
| 12877 Fts5Buffer out = {0, 0, 0}; |
| 12878 Fts5Buffer tmp = {0, 0, 0}; |
| 12879 |
| 12880 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return; |
| 12881 fts5DoclistIterInit(p1, &i1); |
| 12882 fts5DoclistIterInit(p2, &i2); |
| 12883 |
| 12884 while( 1 ){ |
| 12885 if( i1.iRowid<i2.iRowid ){ |
| 12886 /* Copy entry from i1 */ |
| 12887 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); |
| 12888 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize); |
| 12889 fts5DoclistIterNext(&i1); |
| 12890 if( i1.aPoslist==0 ) break; |
| 12891 } |
| 12892 else if( i2.iRowid!=i1.iRowid ){ |
| 12893 /* Copy entry from i2 */ |
| 12894 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 12895 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize); |
| 12896 fts5DoclistIterNext(&i2); |
| 12897 if( i2.aPoslist==0 ) break; |
| 12898 } |
| 12899 else{ |
| 12900 /* Merge the two position lists. */ |
| 12901 i64 iPos1 = 0; |
| 12902 i64 iPos2 = 0; |
| 12903 int iOff1 = 0; |
| 12904 int iOff2 = 0; |
| 12905 u8 *a1 = &i1.aPoslist[i1.nSize]; |
| 12906 u8 *a2 = &i2.aPoslist[i2.nSize]; |
| 12907 |
| 12908 i64 iPrev = 0; |
| 12909 Fts5PoslistWriter writer; |
| 12910 memset(&writer, 0, sizeof(writer)); |
| 12911 |
| 12912 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 12913 fts5BufferZero(&tmp); |
| 12914 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist); |
| 12915 if( p->rc ) break; |
| 12916 |
| 12917 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 12918 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 12919 assert( iPos1>=0 && iPos2>=0 ); |
| 12920 |
| 12921 if( iPos1<iPos2 ){ |
| 12922 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 12923 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 12924 }else{ |
| 12925 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 12926 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 12927 } |
| 12928 |
| 12929 if( iPos1>=0 && iPos2>=0 ){ |
| 12930 while( 1 ){ |
| 12931 if( iPos1<iPos2 ){ |
| 12932 if( iPos1!=iPrev ){ |
| 12933 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 12934 } |
| 12935 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 12936 if( iPos1<0 ) break; |
| 12937 }else{ |
| 12938 assert( iPos2!=iPrev ); |
| 12939 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 12940 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 12941 if( iPos2<0 ) break; |
| 12942 } |
| 12943 } |
| 12944 } |
| 12945 |
| 12946 if( iPos1>=0 ){ |
| 12947 if( iPos1!=iPrev ){ |
| 12948 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 12949 } |
| 12950 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1); |
| 12951 }else{ |
| 12952 assert( iPos2>=0 && iPos2!=iPrev ); |
| 12953 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 12954 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2); |
| 12955 } |
| 12956 |
| 12957 /* WRITEPOSLISTSIZE */ |
| 12958 fts5BufferSafeAppendVarint(&out, tmp.n * 2); |
| 12959 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); |
| 12960 fts5DoclistIterNext(&i1); |
| 12961 fts5DoclistIterNext(&i2); |
| 12962 if( i1.aPoslist==0 || i2.aPoslist==0 ) break; |
| 12963 } |
| 12964 } |
| 12965 |
| 12966 if( i1.aPoslist ){ |
| 12967 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); |
| 12968 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist); |
| 12969 } |
| 12970 else if( i2.aPoslist ){ |
| 12971 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 12972 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist); |
| 12973 } |
| 12974 |
| 12975 fts5BufferSet(&p->rc, p1, out.n, out.p); |
| 12976 fts5BufferFree(&tmp); |
| 12977 fts5BufferFree(&out); |
| 12978 } |
| 12979 } |
| 12980 |
| 12981 static void fts5SetupPrefixIter( |
| 12982 Fts5Index *p, /* Index to read from */ |
| 12983 int bDesc, /* True for "ORDER BY rowid DESC" */ |
| 12984 const u8 *pToken, /* Buffer containing prefix to match */ |
| 12985 int nToken, /* Size of buffer pToken in bytes */ |
| 12986 Fts5Colset *pColset, /* Restrict matches to these columns */ |
| 12987 Fts5Iter **ppIter /* OUT: New iterator */ |
| 12988 ){ |
| 12989 Fts5Structure *pStruct; |
| 12990 Fts5Buffer *aBuf; |
| 12991 const int nBuf = 32; |
| 12992 |
| 12993 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); |
| 12994 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*); |
| 12995 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 12996 xMerge = fts5MergeRowidLists; |
| 12997 xAppend = fts5AppendRowid; |
| 12998 }else{ |
| 12999 xMerge = fts5MergePrefixLists; |
| 13000 xAppend = fts5AppendPoslist; |
| 13001 } |
| 13002 |
| 13003 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); |
| 13004 pStruct = fts5StructureRead(p); |
| 13005 |
| 13006 if( aBuf && pStruct ){ |
| 13007 const int flags = FTS5INDEX_QUERY_SCAN |
| 13008 | FTS5INDEX_QUERY_SKIPEMPTY |
| 13009 | FTS5INDEX_QUERY_NOOUTPUT; |
| 13010 int i; |
| 13011 i64 iLastRowid = 0; |
| 13012 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ |
| 13013 Fts5Data *pData; |
| 13014 Fts5Buffer doclist; |
| 13015 int bNewTerm = 1; |
| 13016 |
| 13017 memset(&doclist, 0, sizeof(doclist)); |
| 13018 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); |
| 13019 fts5IterSetOutputCb(&p->rc, p1); |
| 13020 for( /* no-op */ ; |
| 13021 fts5MultiIterEof(p, p1)==0; |
| 13022 fts5MultiIterNext2(p, p1, &bNewTerm) |
| 13023 ){ |
| 13024 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; |
| 13025 int nTerm = pSeg->term.n; |
| 13026 const u8 *pTerm = pSeg->term.p; |
| 13027 p1->xSetOutputs(p1, pSeg); |
| 13028 |
| 13029 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); |
| 13030 if( bNewTerm ){ |
| 13031 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; |
| 13032 } |
| 13033 |
| 13034 if( p1->base.nData==0 ) continue; |
| 13035 |
| 13036 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ |
| 13037 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ |
| 13038 assert( i<nBuf ); |
| 13039 if( aBuf[i].n==0 ){ |
| 13040 fts5BufferSwap(&doclist, &aBuf[i]); |
| 13041 fts5BufferZero(&doclist); |
| 13042 }else{ |
| 13043 xMerge(p, &doclist, &aBuf[i]); |
| 13044 fts5BufferZero(&aBuf[i]); |
| 13045 } |
| 13046 } |
| 13047 iLastRowid = 0; |
| 13048 } |
| 13049 |
| 13050 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist); |
| 13051 iLastRowid = p1->base.iRowid; |
| 13052 } |
| 13053 |
| 13054 for(i=0; i<nBuf; i++){ |
| 13055 if( p->rc==SQLITE_OK ){ |
| 13056 xMerge(p, &doclist, &aBuf[i]); |
| 13057 } |
| 13058 fts5BufferFree(&aBuf[i]); |
| 13059 } |
| 13060 fts5MultiIterFree(p1); |
| 13061 |
| 13062 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); |
| 13063 if( pData ){ |
| 13064 pData->p = (u8*)&pData[1]; |
| 13065 pData->nn = pData->szLeaf = doclist.n; |
| 13066 memcpy(pData->p, doclist.p, doclist.n); |
| 13067 fts5MultiIterNew2(p, pData, bDesc, ppIter); |
| 13068 } |
| 13069 fts5BufferFree(&doclist); |
| 13070 } |
| 13071 |
| 13072 fts5StructureRelease(pStruct); |
| 13073 sqlite3_free(aBuf); |
| 13074 } |
| 13075 |
| 13076 |
| 13077 /* |
| 13078 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain |
| 13079 ** to the document with rowid iRowid. |
| 13080 */ |
| 13081 static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ |
| 13082 assert( p->rc==SQLITE_OK ); |
| 13083 |
| 13084 /* Allocate the hash table if it has not already been allocated */ |
| 13085 if( p->pHash==0 ){ |
| 13086 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); |
| 13087 } |
| 13088 |
| 13089 /* Flush the hash table to disk if required */ |
| 13090 if( iRowid<p->iWriteRowid |
| 13091 || (iRowid==p->iWriteRowid && p->bDelete==0) |
| 13092 || (p->nPendingData > p->pConfig->nHashSize) |
| 13093 ){ |
| 13094 fts5IndexFlush(p); |
| 13095 } |
| 13096 |
| 13097 p->iWriteRowid = iRowid; |
| 13098 p->bDelete = bDelete; |
| 13099 return fts5IndexReturn(p); |
| 13100 } |
| 13101 |
| 13102 /* |
| 13103 ** Commit data to disk. |
| 13104 */ |
| 13105 static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ |
| 13106 assert( p->rc==SQLITE_OK ); |
| 13107 fts5IndexFlush(p); |
| 13108 if( bCommit ) fts5CloseReader(p); |
| 13109 return fts5IndexReturn(p); |
| 13110 } |
| 13111 |
| 13112 /* |
| 13113 ** Discard any data stored in the in-memory hash tables. Do not write it |
| 13114 ** to the database. Additionally, assume that the contents of the %_data |
| 13115 ** table may have changed on disk. So any in-memory caches of %_data |
| 13116 ** records must be invalidated. |
| 13117 */ |
| 13118 static int sqlite3Fts5IndexRollback(Fts5Index *p){ |
| 13119 fts5CloseReader(p); |
| 13120 fts5IndexDiscardData(p); |
| 13121 fts5StructureInvalidate(p); |
| 13122 /* assert( p->rc==SQLITE_OK ); */ |
| 13123 return SQLITE_OK; |
| 13124 } |
| 13125 |
| 13126 /* |
| 13127 ** The %_data table is completely empty when this function is called. This |
| 13128 ** function populates it with the initial structure objects for each index, |
| 13129 ** and the initial version of the "averages" record (a zero-byte blob). |
| 13130 */ |
| 13131 static int sqlite3Fts5IndexReinit(Fts5Index *p){ |
| 13132 Fts5Structure s; |
| 13133 fts5StructureInvalidate(p); |
| 13134 memset(&s, 0, sizeof(Fts5Structure)); |
| 13135 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); |
| 13136 fts5StructureWrite(p, &s); |
| 13137 return fts5IndexReturn(p); |
| 13138 } |
| 13139 |
| 13140 /* |
| 13141 ** Open a new Fts5Index handle. If the bCreate argument is true, create |
| 13142 ** and initialize the underlying %_data table. |
| 13143 ** |
| 13144 ** If successful, set *pp to point to the new object and return SQLITE_OK. |
| 13145 ** Otherwise, set *pp to NULL and return an SQLite error code. |
| 13146 */ |
| 13147 static int sqlite3Fts5IndexOpen( |
| 13148 Fts5Config *pConfig, |
| 13149 int bCreate, |
| 13150 Fts5Index **pp, |
| 13151 char **pzErr |
| 13152 ){ |
| 13153 int rc = SQLITE_OK; |
| 13154 Fts5Index *p; /* New object */ |
| 13155 |
| 13156 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); |
| 13157 if( rc==SQLITE_OK ){ |
| 13158 p->pConfig = pConfig; |
| 13159 p->nWorkUnit = FTS5_WORK_UNIT; |
| 13160 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); |
| 13161 if( p->zDataTbl && bCreate ){ |
| 13162 rc = sqlite3Fts5CreateTable( |
| 13163 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr |
| 13164 ); |
| 13165 if( rc==SQLITE_OK ){ |
| 13166 rc = sqlite3Fts5CreateTable(pConfig, "idx", |
| 13167 "segid, term, pgno, PRIMARY KEY(segid, term)", |
| 13168 1, pzErr |
| 13169 ); |
| 13170 } |
| 13171 if( rc==SQLITE_OK ){ |
| 13172 rc = sqlite3Fts5IndexReinit(p); |
| 13173 } |
| 13174 } |
| 13175 } |
| 13176 |
| 13177 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); |
| 13178 if( rc ){ |
| 13179 sqlite3Fts5IndexClose(p); |
| 13180 *pp = 0; |
| 13181 } |
| 13182 return rc; |
| 13183 } |
| 13184 |
| 13185 /* |
| 13186 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). |
| 13187 */ |
| 13188 static int sqlite3Fts5IndexClose(Fts5Index *p){ |
| 13189 int rc = SQLITE_OK; |
| 13190 if( p ){ |
| 13191 assert( p->pReader==0 ); |
| 13192 fts5StructureInvalidate(p); |
| 13193 sqlite3_finalize(p->pWriter); |
| 13194 sqlite3_finalize(p->pDeleter); |
| 13195 sqlite3_finalize(p->pIdxWriter); |
| 13196 sqlite3_finalize(p->pIdxDeleter); |
| 13197 sqlite3_finalize(p->pIdxSelect); |
| 13198 sqlite3_finalize(p->pDataVersion); |
| 13199 sqlite3Fts5HashFree(p->pHash); |
| 13200 sqlite3_free(p->zDataTbl); |
| 13201 sqlite3_free(p); |
| 13202 } |
| 13203 return rc; |
| 13204 } |
| 13205 |
| 13206 /* |
| 13207 ** Argument p points to a buffer containing utf-8 text that is n bytes in |
| 13208 ** size. Return the number of bytes in the nChar character prefix of the |
| 13209 ** buffer, or 0 if there are less than nChar characters in total. |
| 13210 */ |
| 13211 static int sqlite3Fts5IndexCharlenToBytelen( |
| 13212 const char *p, |
| 13213 int nByte, |
| 13214 int nChar |
| 13215 ){ |
| 13216 int n = 0; |
| 13217 int i; |
| 13218 for(i=0; i<nChar; i++){ |
| 13219 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ |
| 13220 if( (unsigned char)p[n++]>=0xc0 ){ |
| 13221 while( (p[n] & 0xc0)==0x80 ) n++; |
| 13222 } |
| 13223 } |
| 13224 return n; |
| 13225 } |
| 13226 |
| 13227 /* |
| 13228 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of |
| 13229 ** unicode characters in the string. |
| 13230 */ |
| 13231 static int fts5IndexCharlen(const char *pIn, int nIn){ |
| 13232 int nChar = 0; |
| 13233 int i = 0; |
| 13234 while( i<nIn ){ |
| 13235 if( (unsigned char)pIn[i++]>=0xc0 ){ |
| 13236 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; |
| 13237 } |
| 13238 nChar++; |
| 13239 } |
| 13240 return nChar; |
| 13241 } |
| 13242 |
| 13243 /* |
| 13244 ** Insert or remove data to or from the index. Each time a document is |
| 13245 ** added to or removed from the index, this function is called one or more |
| 13246 ** times. |
| 13247 ** |
| 13248 ** For an insert, it must be called once for each token in the new document. |
| 13249 ** If the operation is a delete, it must be called (at least) once for each |
| 13250 ** unique token in the document with an iCol value less than zero. The iPos |
| 13251 ** argument is ignored for a delete. |
| 13252 */ |
| 13253 static int sqlite3Fts5IndexWrite( |
| 13254 Fts5Index *p, /* Index to write to */ |
| 13255 int iCol, /* Column token appears in (-ve -> delete) */ |
| 13256 int iPos, /* Position of token within column */ |
| 13257 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 13258 ){ |
| 13259 int i; /* Used to iterate through indexes */ |
| 13260 int rc = SQLITE_OK; /* Return code */ |
| 13261 Fts5Config *pConfig = p->pConfig; |
| 13262 |
| 13263 assert( p->rc==SQLITE_OK ); |
| 13264 assert( (iCol<0)==p->bDelete ); |
| 13265 |
| 13266 /* Add the entry to the main terms index. */ |
| 13267 rc = sqlite3Fts5HashWrite( |
| 13268 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken |
| 13269 ); |
| 13270 |
| 13271 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ |
| 13272 const int nChar = pConfig->aPrefix[i]; |
| 13273 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); |
| 13274 if( nByte ){ |
| 13275 rc = sqlite3Fts5HashWrite(p->pHash, |
| 13276 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, |
| 13277 nByte |
| 13278 ); |
| 13279 } |
| 13280 } |
| 13281 |
| 13282 return rc; |
| 13283 } |
| 13284 |
| 13285 /* |
| 13286 ** Open a new iterator to iterate though all rowid that match the |
| 13287 ** specified token or token prefix. |
| 13288 */ |
| 13289 static int sqlite3Fts5IndexQuery( |
| 13290 Fts5Index *p, /* FTS index to query */ |
| 13291 const char *pToken, int nToken, /* Token (or prefix) to query for */ |
| 13292 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ |
| 13293 Fts5Colset *pColset, /* Match these columns only */ |
| 13294 Fts5IndexIter **ppIter /* OUT: New iterator object */ |
| 13295 ){ |
| 13296 Fts5Config *pConfig = p->pConfig; |
| 13297 Fts5Iter *pRet = 0; |
| 13298 Fts5Buffer buf = {0, 0, 0}; |
| 13299 |
| 13300 /* If the QUERY_SCAN flag is set, all other flags must be clear. */ |
| 13301 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); |
| 13302 |
| 13303 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ |
| 13304 int iIdx = 0; /* Index to search */ |
| 13305 memcpy(&buf.p[1], pToken, nToken); |
| 13306 |
| 13307 /* Figure out which index to search and set iIdx accordingly. If this |
| 13308 ** is a prefix query for which there is no prefix index, set iIdx to |
| 13309 ** greater than pConfig->nPrefix to indicate that the query will be |
| 13310 ** satisfied by scanning multiple terms in the main index. |
| 13311 ** |
| 13312 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a |
| 13313 ** prefix-query. Instead of using a prefix-index (if one exists), |
| 13314 ** evaluate the prefix query using the main FTS index. This is used |
| 13315 ** for internal sanity checking by the integrity-check in debug |
| 13316 ** mode only. */ |
| 13317 #ifdef SQLITE_DEBUG |
| 13318 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ |
| 13319 assert( flags & FTS5INDEX_QUERY_PREFIX ); |
| 13320 iIdx = 1+pConfig->nPrefix; |
| 13321 }else |
| 13322 #endif |
| 13323 if( flags & FTS5INDEX_QUERY_PREFIX ){ |
| 13324 int nChar = fts5IndexCharlen(pToken, nToken); |
| 13325 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ |
| 13326 if( pConfig->aPrefix[iIdx-1]==nChar ) break; |
| 13327 } |
| 13328 } |
| 13329 |
| 13330 if( iIdx<=pConfig->nPrefix ){ |
| 13331 /* Straight index lookup */ |
| 13332 Fts5Structure *pStruct = fts5StructureRead(p); |
| 13333 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); |
| 13334 if( pStruct ){ |
| 13335 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, |
| 13336 pColset, buf.p, nToken+1, -1, 0, &pRet |
| 13337 ); |
| 13338 fts5StructureRelease(pStruct); |
| 13339 } |
| 13340 }else{ |
| 13341 /* Scan multiple terms in the main index */ |
| 13342 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; |
| 13343 buf.p[0] = FTS5_MAIN_PREFIX; |
| 13344 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); |
| 13345 assert( p->rc!=SQLITE_OK || pRet->pColset==0 ); |
| 13346 fts5IterSetOutputCb(&p->rc, pRet); |
| 13347 if( p->rc==SQLITE_OK ){ |
| 13348 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; |
| 13349 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); |
| 13350 } |
| 13351 } |
| 13352 |
| 13353 if( p->rc ){ |
| 13354 sqlite3Fts5IterClose(&pRet->base); |
| 13355 pRet = 0; |
| 13356 fts5CloseReader(p); |
| 13357 } |
| 13358 |
| 13359 *ppIter = &pRet->base; |
| 13360 sqlite3Fts5BufferFree(&buf); |
| 13361 } |
| 13362 return fts5IndexReturn(p); |
| 13363 } |
| 13364 |
| 13365 /* |
| 13366 ** Return true if the iterator passed as the only argument is at EOF. |
| 13367 */ |
| 13368 /* |
| 13369 ** Move to the next matching rowid. |
| 13370 */ |
| 13371 static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ |
| 13372 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 13373 assert( pIter->pIndex->rc==SQLITE_OK ); |
| 13374 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); |
| 13375 return fts5IndexReturn(pIter->pIndex); |
| 13376 } |
| 13377 |
| 13378 /* |
| 13379 ** Move to the next matching term/rowid. Used by the fts5vocab module. |
| 13380 */ |
| 13381 static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ |
| 13382 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 13383 Fts5Index *p = pIter->pIndex; |
| 13384 |
| 13385 assert( pIter->pIndex->rc==SQLITE_OK ); |
| 13386 |
| 13387 fts5MultiIterNext(p, pIter, 0, 0); |
| 13388 if( p->rc==SQLITE_OK ){ |
| 13389 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 13390 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ |
| 13391 fts5DataRelease(pSeg->pLeaf); |
| 13392 pSeg->pLeaf = 0; |
| 13393 pIter->base.bEof = 1; |
| 13394 } |
| 13395 } |
| 13396 |
| 13397 return fts5IndexReturn(pIter->pIndex); |
| 13398 } |
| 13399 |
| 13400 /* |
| 13401 ** Move to the next matching rowid that occurs at or after iMatch. The |
| 13402 ** definition of "at or after" depends on whether this iterator iterates |
| 13403 ** in ascending or descending rowid order. |
| 13404 */ |
| 13405 static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ |
| 13406 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 13407 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); |
| 13408 return fts5IndexReturn(pIter->pIndex); |
| 13409 } |
| 13410 |
| 13411 /* |
| 13412 ** Return the current term. |
| 13413 */ |
| 13414 static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ |
| 13415 int n; |
| 13416 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); |
| 13417 *pn = n-1; |
| 13418 return &z[1]; |
| 13419 } |
| 13420 |
| 13421 /* |
| 13422 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). |
| 13423 */ |
| 13424 static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ |
| 13425 if( pIndexIter ){ |
| 13426 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 13427 Fts5Index *pIndex = pIter->pIndex; |
| 13428 fts5MultiIterFree(pIter); |
| 13429 fts5CloseReader(pIndex); |
| 13430 } |
| 13431 } |
| 13432 |
| 13433 /* |
| 13434 ** Read and decode the "averages" record from the database. |
| 13435 ** |
| 13436 ** Parameter anSize must point to an array of size nCol, where nCol is |
| 13437 ** the number of user defined columns in the FTS table. |
| 13438 */ |
| 13439 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ |
| 13440 int nCol = p->pConfig->nCol; |
| 13441 Fts5Data *pData; |
| 13442 |
| 13443 *pnRow = 0; |
| 13444 memset(anSize, 0, sizeof(i64) * nCol); |
| 13445 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); |
| 13446 if( p->rc==SQLITE_OK && pData->nn ){ |
| 13447 int i = 0; |
| 13448 int iCol; |
| 13449 i += fts5GetVarint(&pData->p[i], (u64*)pnRow); |
| 13450 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ |
| 13451 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); |
| 13452 } |
| 13453 } |
| 13454 |
| 13455 fts5DataRelease(pData); |
| 13456 return fts5IndexReturn(p); |
| 13457 } |
| 13458 |
| 13459 /* |
| 13460 ** Replace the current "averages" record with the contents of the buffer |
| 13461 ** supplied as the second argument. |
| 13462 */ |
| 13463 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData)
{ |
| 13464 assert( p->rc==SQLITE_OK ); |
| 13465 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); |
| 13466 return fts5IndexReturn(p); |
| 13467 } |
| 13468 |
| 13469 /* |
| 13470 ** Return the total number of blocks this module has read from the %_data |
| 13471 ** table since it was created. |
| 13472 */ |
| 13473 static int sqlite3Fts5IndexReads(Fts5Index *p){ |
| 13474 return p->nRead; |
| 13475 } |
| 13476 |
| 13477 /* |
| 13478 ** Set the 32-bit cookie value stored at the start of all structure |
| 13479 ** records to the value passed as the second argument. |
| 13480 ** |
| 13481 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 13482 ** occurs. |
| 13483 */ |
| 13484 static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ |
| 13485 int rc; /* Return code */ |
| 13486 Fts5Config *pConfig = p->pConfig; /* Configuration object */ |
| 13487 u8 aCookie[4]; /* Binary representation of iNew */ |
| 13488 sqlite3_blob *pBlob = 0; |
| 13489 |
| 13490 assert( p->rc==SQLITE_OK ); |
| 13491 sqlite3Fts5Put32(aCookie, iNew); |
| 13492 |
| 13493 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, |
| 13494 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob |
| 13495 ); |
| 13496 if( rc==SQLITE_OK ){ |
| 13497 sqlite3_blob_write(pBlob, aCookie, 4, 0); |
| 13498 rc = sqlite3_blob_close(pBlob); |
| 13499 } |
| 13500 |
| 13501 return rc; |
| 13502 } |
| 13503 |
| 13504 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ |
| 13505 Fts5Structure *pStruct; |
| 13506 pStruct = fts5StructureRead(p); |
| 13507 fts5StructureRelease(pStruct); |
| 13508 return fts5IndexReturn(p); |
| 13509 } |
| 13510 |
| 13511 |
| 13512 /************************************************************************* |
| 13513 ************************************************************************** |
| 13514 ** Below this point is the implementation of the integrity-check |
| 13515 ** functionality. |
| 13516 */ |
| 13517 |
| 13518 /* |
| 13519 ** Return a simple checksum value based on the arguments. |
| 13520 */ |
| 13521 static u64 sqlite3Fts5IndexEntryCksum( |
| 13522 i64 iRowid, |
| 13523 int iCol, |
| 13524 int iPos, |
| 13525 int iIdx, |
| 13526 const char *pTerm, |
| 13527 int nTerm |
| 13528 ){ |
| 13529 int i; |
| 13530 u64 ret = iRowid; |
| 13531 ret += (ret<<3) + iCol; |
| 13532 ret += (ret<<3) + iPos; |
| 13533 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); |
| 13534 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; |
| 13535 return ret; |
| 13536 } |
| 13537 |
| 13538 #ifdef SQLITE_DEBUG |
| 13539 /* |
| 13540 ** This function is purely an internal test. It does not contribute to |
| 13541 ** FTS functionality, or even the integrity-check, in any way. |
| 13542 ** |
| 13543 ** Instead, it tests that the same set of pgno/rowid combinations are |
| 13544 ** visited regardless of whether the doclist-index identified by parameters |
| 13545 ** iSegid/iLeaf is iterated in forwards or reverse order. |
| 13546 */ |
| 13547 static void fts5TestDlidxReverse( |
| 13548 Fts5Index *p, |
| 13549 int iSegid, /* Segment id to load from */ |
| 13550 int iLeaf /* Load doclist-index for this leaf */ |
| 13551 ){ |
| 13552 Fts5DlidxIter *pDlidx = 0; |
| 13553 u64 cksum1 = 13; |
| 13554 u64 cksum2 = 13; |
| 13555 |
| 13556 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); |
| 13557 fts5DlidxIterEof(p, pDlidx)==0; |
| 13558 fts5DlidxIterNext(p, pDlidx) |
| 13559 ){ |
| 13560 i64 iRowid = fts5DlidxIterRowid(pDlidx); |
| 13561 int pgno = fts5DlidxIterPgno(pDlidx); |
| 13562 assert( pgno>iLeaf ); |
| 13563 cksum1 += iRowid + ((i64)pgno<<32); |
| 13564 } |
| 13565 fts5DlidxIterFree(pDlidx); |
| 13566 pDlidx = 0; |
| 13567 |
| 13568 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); |
| 13569 fts5DlidxIterEof(p, pDlidx)==0; |
| 13570 fts5DlidxIterPrev(p, pDlidx) |
| 13571 ){ |
| 13572 i64 iRowid = fts5DlidxIterRowid(pDlidx); |
| 13573 int pgno = fts5DlidxIterPgno(pDlidx); |
| 13574 assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); |
| 13575 cksum2 += iRowid + ((i64)pgno<<32); |
| 13576 } |
| 13577 fts5DlidxIterFree(pDlidx); |
| 13578 pDlidx = 0; |
| 13579 |
| 13580 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; |
| 13581 } |
| 13582 |
| 13583 static int fts5QueryCksum( |
| 13584 Fts5Index *p, /* Fts5 index object */ |
| 13585 int iIdx, |
| 13586 const char *z, /* Index key to query for */ |
| 13587 int n, /* Size of index key in bytes */ |
| 13588 int flags, /* Flags for Fts5IndexQuery */ |
| 13589 u64 *pCksum /* IN/OUT: Checksum value */ |
| 13590 ){ |
| 13591 int eDetail = p->pConfig->eDetail; |
| 13592 u64 cksum = *pCksum; |
| 13593 Fts5IndexIter *pIter = 0; |
| 13594 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); |
| 13595 |
| 13596 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ |
| 13597 i64 rowid = pIter->iRowid; |
| 13598 |
| 13599 if( eDetail==FTS5_DETAIL_NONE ){ |
| 13600 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); |
| 13601 }else{ |
| 13602 Fts5PoslistReader sReader; |
| 13603 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); |
| 13604 sReader.bEof==0; |
| 13605 sqlite3Fts5PoslistReaderNext(&sReader) |
| 13606 ){ |
| 13607 int iCol = FTS5_POS2COLUMN(sReader.iPos); |
| 13608 int iOff = FTS5_POS2OFFSET(sReader.iPos); |
| 13609 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); |
| 13610 } |
| 13611 } |
| 13612 if( rc==SQLITE_OK ){ |
| 13613 rc = sqlite3Fts5IterNext(pIter); |
| 13614 } |
| 13615 } |
| 13616 sqlite3Fts5IterClose(pIter); |
| 13617 |
| 13618 *pCksum = cksum; |
| 13619 return rc; |
| 13620 } |
| 13621 |
| 13622 |
| 13623 /* |
| 13624 ** This function is also purely an internal test. It does not contribute to |
| 13625 ** FTS functionality, or even the integrity-check, in any way. |
| 13626 */ |
| 13627 static void fts5TestTerm( |
| 13628 Fts5Index *p, |
| 13629 Fts5Buffer *pPrev, /* Previous term */ |
| 13630 const char *z, int n, /* Possibly new term to test */ |
| 13631 u64 expected, |
| 13632 u64 *pCksum |
| 13633 ){ |
| 13634 int rc = p->rc; |
| 13635 if( pPrev->n==0 ){ |
| 13636 fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
| 13637 }else |
| 13638 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ |
| 13639 u64 cksum3 = *pCksum; |
| 13640 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ |
| 13641 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ |
| 13642 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); |
| 13643 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); |
| 13644 u64 ck1 = 0; |
| 13645 u64 ck2 = 0; |
| 13646 |
| 13647 /* Check that the results returned for ASC and DESC queries are |
| 13648 ** the same. If not, call this corruption. */ |
| 13649 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); |
| 13650 if( rc==SQLITE_OK ){ |
| 13651 int f = flags|FTS5INDEX_QUERY_DESC; |
| 13652 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 13653 } |
| 13654 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 13655 |
| 13656 /* If this is a prefix query, check that the results returned if the |
| 13657 ** the index is disabled are the same. In both ASC and DESC order. |
| 13658 ** |
| 13659 ** This check may only be performed if the hash table is empty. This |
| 13660 ** is because the hash table only supports a single scan query at |
| 13661 ** a time, and the multi-iter loop from which this function is called |
| 13662 ** is already performing such a scan. */ |
| 13663 if( p->nPendingData==0 ){ |
| 13664 if( iIdx>0 && rc==SQLITE_OK ){ |
| 13665 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; |
| 13666 ck2 = 0; |
| 13667 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 13668 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 13669 } |
| 13670 if( iIdx>0 && rc==SQLITE_OK ){ |
| 13671 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; |
| 13672 ck2 = 0; |
| 13673 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 13674 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 13675 } |
| 13676 } |
| 13677 |
| 13678 cksum3 ^= ck1; |
| 13679 fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
| 13680 |
| 13681 if( rc==SQLITE_OK && cksum3!=expected ){ |
| 13682 rc = FTS5_CORRUPT; |
| 13683 } |
| 13684 *pCksum = cksum3; |
| 13685 } |
| 13686 p->rc = rc; |
| 13687 } |
| 13688 |
| 13689 #else |
| 13690 # define fts5TestDlidxReverse(x,y,z) |
| 13691 # define fts5TestTerm(u,v,w,x,y,z) |
| 13692 #endif |
| 13693 |
| 13694 /* |
| 13695 ** Check that: |
| 13696 ** |
| 13697 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and |
| 13698 ** contain zero terms. |
| 13699 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and |
| 13700 ** contain zero rowids. |
| 13701 */ |
| 13702 static void fts5IndexIntegrityCheckEmpty( |
| 13703 Fts5Index *p, |
| 13704 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ |
| 13705 int iFirst, |
| 13706 int iNoRowid, |
| 13707 int iLast |
| 13708 ){ |
| 13709 int i; |
| 13710 |
| 13711 /* Now check that the iter.nEmpty leaves following the current leaf |
| 13712 ** (a) exist and (b) contain no terms. */ |
| 13713 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ |
| 13714 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); |
| 13715 if( pLeaf ){ |
| 13716 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; |
| 13717 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; |
| 13718 } |
| 13719 fts5DataRelease(pLeaf); |
| 13720 } |
| 13721 } |
| 13722 |
| 13723 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ |
| 13724 int iTermOff = 0; |
| 13725 int ii; |
| 13726 |
| 13727 Fts5Buffer buf1 = {0,0,0}; |
| 13728 Fts5Buffer buf2 = {0,0,0}; |
| 13729 |
| 13730 ii = pLeaf->szLeaf; |
| 13731 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ |
| 13732 int res; |
| 13733 int iOff; |
| 13734 int nIncr; |
| 13735 |
| 13736 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); |
| 13737 iTermOff += nIncr; |
| 13738 iOff = iTermOff; |
| 13739 |
| 13740 if( iOff>=pLeaf->szLeaf ){ |
| 13741 p->rc = FTS5_CORRUPT; |
| 13742 }else if( iTermOff==nIncr ){ |
| 13743 int nByte; |
| 13744 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
| 13745 if( (iOff+nByte)>pLeaf->szLeaf ){ |
| 13746 p->rc = FTS5_CORRUPT; |
| 13747 }else{ |
| 13748 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
| 13749 } |
| 13750 }else{ |
| 13751 int nKeep, nByte; |
| 13752 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); |
| 13753 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
| 13754 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ |
| 13755 p->rc = FTS5_CORRUPT; |
| 13756 }else{ |
| 13757 buf1.n = nKeep; |
| 13758 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
| 13759 } |
| 13760 |
| 13761 if( p->rc==SQLITE_OK ){ |
| 13762 res = fts5BufferCompare(&buf1, &buf2); |
| 13763 if( res<=0 ) p->rc = FTS5_CORRUPT; |
| 13764 } |
| 13765 } |
| 13766 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); |
| 13767 } |
| 13768 |
| 13769 fts5BufferFree(&buf1); |
| 13770 fts5BufferFree(&buf2); |
| 13771 } |
| 13772 |
| 13773 static void fts5IndexIntegrityCheckSegment( |
| 13774 Fts5Index *p, /* FTS5 backend object */ |
| 13775 Fts5StructureSegment *pSeg /* Segment to check internal consistency */ |
| 13776 ){ |
| 13777 Fts5Config *pConfig = p->pConfig; |
| 13778 sqlite3_stmt *pStmt = 0; |
| 13779 int rc2; |
| 13780 int iIdxPrevLeaf = pSeg->pgnoFirst-1; |
| 13781 int iDlidxPrevLeaf = pSeg->pgnoLast; |
| 13782 |
| 13783 if( pSeg->pgnoFirst==0 ) return; |
| 13784 |
| 13785 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( |
| 13786 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d", |
| 13787 pConfig->zDb, pConfig->zName, pSeg->iSegid |
| 13788 )); |
| 13789 |
| 13790 /* Iterate through the b-tree hierarchy. */ |
| 13791 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13792 i64 iRow; /* Rowid for this leaf */ |
| 13793 Fts5Data *pLeaf; /* Data for this leaf */ |
| 13794 |
| 13795 int nIdxTerm = sqlite3_column_bytes(pStmt, 1); |
| 13796 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); |
| 13797 int iIdxLeaf = sqlite3_column_int(pStmt, 2); |
| 13798 int bIdxDlidx = sqlite3_column_int(pStmt, 3); |
| 13799 |
| 13800 /* If the leaf in question has already been trimmed from the segment, |
| 13801 ** ignore this b-tree entry. Otherwise, load it into memory. */ |
| 13802 if( iIdxLeaf<pSeg->pgnoFirst ) continue; |
| 13803 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); |
| 13804 pLeaf = fts5LeafRead(p, iRow); |
| 13805 if( pLeaf==0 ) break; |
| 13806 |
| 13807 /* Check that the leaf contains at least one term, and that it is equal |
| 13808 ** to or larger than the split-key in zIdxTerm. Also check that if there |
| 13809 ** is also a rowid pointer within the leaf page header, it points to a |
| 13810 ** location before the term. */ |
| 13811 if( pLeaf->nn<=pLeaf->szLeaf ){ |
| 13812 p->rc = FTS5_CORRUPT; |
| 13813 }else{ |
| 13814 int iOff; /* Offset of first term on leaf */ |
| 13815 int iRowidOff; /* Offset of first rowid on leaf */ |
| 13816 int nTerm; /* Size of term on leaf in bytes */ |
| 13817 int res; /* Comparison of term and split-key */ |
| 13818 |
| 13819 iOff = fts5LeafFirstTermOff(pLeaf); |
| 13820 iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
| 13821 if( iRowidOff>=iOff ){ |
| 13822 p->rc = FTS5_CORRUPT; |
| 13823 }else{ |
| 13824 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); |
| 13825 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); |
| 13826 if( res==0 ) res = nTerm - nIdxTerm; |
| 13827 if( res<0 ) p->rc = FTS5_CORRUPT; |
| 13828 } |
| 13829 |
| 13830 fts5IntegrityCheckPgidx(p, pLeaf); |
| 13831 } |
| 13832 fts5DataRelease(pLeaf); |
| 13833 if( p->rc ) break; |
| 13834 |
| 13835 /* Now check that the iter.nEmpty leaves following the current leaf |
| 13836 ** (a) exist and (b) contain no terms. */ |
| 13837 fts5IndexIntegrityCheckEmpty( |
| 13838 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 |
| 13839 ); |
| 13840 if( p->rc ) break; |
| 13841 |
| 13842 /* If there is a doclist-index, check that it looks right. */ |
| 13843 if( bIdxDlidx ){ |
| 13844 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ |
| 13845 int iPrevLeaf = iIdxLeaf; |
| 13846 int iSegid = pSeg->iSegid; |
| 13847 int iPg = 0; |
| 13848 i64 iKey; |
| 13849 |
| 13850 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); |
| 13851 fts5DlidxIterEof(p, pDlidx)==0; |
| 13852 fts5DlidxIterNext(p, pDlidx) |
| 13853 ){ |
| 13854 |
| 13855 /* Check any rowid-less pages that occur before the current leaf. */ |
| 13856 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ |
| 13857 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); |
| 13858 pLeaf = fts5DataRead(p, iKey); |
| 13859 if( pLeaf ){ |
| 13860 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; |
| 13861 fts5DataRelease(pLeaf); |
| 13862 } |
| 13863 } |
| 13864 iPrevLeaf = fts5DlidxIterPgno(pDlidx); |
| 13865 |
| 13866 /* Check that the leaf page indicated by the iterator really does |
| 13867 ** contain the rowid suggested by the same. */ |
| 13868 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); |
| 13869 pLeaf = fts5DataRead(p, iKey); |
| 13870 if( pLeaf ){ |
| 13871 i64 iRowid; |
| 13872 int iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
| 13873 ASSERT_SZLEAF_OK(pLeaf); |
| 13874 if( iRowidOff>=pLeaf->szLeaf ){ |
| 13875 p->rc = FTS5_CORRUPT; |
| 13876 }else{ |
| 13877 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); |
| 13878 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; |
| 13879 } |
| 13880 fts5DataRelease(pLeaf); |
| 13881 } |
| 13882 } |
| 13883 |
| 13884 iDlidxPrevLeaf = iPg; |
| 13885 fts5DlidxIterFree(pDlidx); |
| 13886 fts5TestDlidxReverse(p, iSegid, iIdxLeaf); |
| 13887 }else{ |
| 13888 iDlidxPrevLeaf = pSeg->pgnoLast; |
| 13889 /* TODO: Check there is no doclist index */ |
| 13890 } |
| 13891 |
| 13892 iIdxPrevLeaf = iIdxLeaf; |
| 13893 } |
| 13894 |
| 13895 rc2 = sqlite3_finalize(pStmt); |
| 13896 if( p->rc==SQLITE_OK ) p->rc = rc2; |
| 13897 |
| 13898 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ |
| 13899 #if 0 |
| 13900 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ |
| 13901 p->rc = FTS5_CORRUPT; |
| 13902 } |
| 13903 #endif |
| 13904 } |
| 13905 |
| 13906 |
| 13907 /* |
| 13908 ** Run internal checks to ensure that the FTS index (a) is internally |
| 13909 ** consistent and (b) contains entries for which the XOR of the checksums |
| 13910 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. |
| 13911 ** |
| 13912 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the |
| 13913 ** checksum does not match. Return SQLITE_OK if all checks pass without |
| 13914 ** error, or some other SQLite error code if another error (e.g. OOM) |
| 13915 ** occurs. |
| 13916 */ |
| 13917 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ |
| 13918 int eDetail = p->pConfig->eDetail; |
| 13919 u64 cksum2 = 0; /* Checksum based on contents of indexes */ |
| 13920 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ |
| 13921 Fts5Iter *pIter; /* Used to iterate through entire index */ |
| 13922 Fts5Structure *pStruct; /* Index structure */ |
| 13923 |
| 13924 #ifdef SQLITE_DEBUG |
| 13925 /* Used by extra internal tests only run if NDEBUG is not defined */ |
| 13926 u64 cksum3 = 0; /* Checksum based on contents of indexes */ |
| 13927 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ |
| 13928 #endif |
| 13929 const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
| 13930 |
| 13931 /* Load the FTS index structure */ |
| 13932 pStruct = fts5StructureRead(p); |
| 13933 |
| 13934 /* Check that the internal nodes of each segment match the leaves */ |
| 13935 if( pStruct ){ |
| 13936 int iLvl, iSeg; |
| 13937 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 13938 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 13939 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; |
| 13940 fts5IndexIntegrityCheckSegment(p, pSeg); |
| 13941 } |
| 13942 } |
| 13943 } |
| 13944 |
| 13945 /* The cksum argument passed to this function is a checksum calculated |
| 13946 ** based on all expected entries in the FTS index (including prefix index |
| 13947 ** entries). This block checks that a checksum calculated based on the |
| 13948 ** actual contents of FTS index is identical. |
| 13949 ** |
| 13950 ** Two versions of the same checksum are calculated. The first (stack |
| 13951 ** variable cksum2) based on entries extracted from the full-text index |
| 13952 ** while doing a linear scan of each individual index in turn. |
| 13953 ** |
| 13954 ** As each term visited by the linear scans, a separate query for the |
| 13955 ** same term is performed. cksum3 is calculated based on the entries |
| 13956 ** extracted by these queries. |
| 13957 */ |
| 13958 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); |
| 13959 fts5MultiIterEof(p, pIter)==0; |
| 13960 fts5MultiIterNext(p, pIter, 0, 0) |
| 13961 ){ |
| 13962 int n; /* Size of term in bytes */ |
| 13963 i64 iPos = 0; /* Position read from poslist */ |
| 13964 int iOff = 0; /* Offset within poslist */ |
| 13965 i64 iRowid = fts5MultiIterRowid(pIter); |
| 13966 char *z = (char*)fts5MultiIterTerm(pIter, &n); |
| 13967 |
| 13968 /* If this is a new term, query for it. Update cksum3 with the results. */ |
| 13969 fts5TestTerm(p, &term, z, n, cksum2, &cksum3); |
| 13970 |
| 13971 if( eDetail==FTS5_DETAIL_NONE ){ |
| 13972 if( 0==fts5MultiIterIsEmpty(p, pIter) ){ |
| 13973 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); |
| 13974 } |
| 13975 }else{ |
| 13976 poslist.n = 0; |
| 13977 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); |
| 13978 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ |
| 13979 int iCol = FTS5_POS2COLUMN(iPos); |
| 13980 int iTokOff = FTS5_POS2OFFSET(iPos); |
| 13981 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); |
| 13982 } |
| 13983 } |
| 13984 } |
| 13985 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); |
| 13986 |
| 13987 fts5MultiIterFree(pIter); |
| 13988 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; |
| 13989 |
| 13990 fts5StructureRelease(pStruct); |
| 13991 #ifdef SQLITE_DEBUG |
| 13992 fts5BufferFree(&term); |
| 13993 #endif |
| 13994 fts5BufferFree(&poslist); |
| 13995 return fts5IndexReturn(p); |
| 13996 } |
| 13997 |
| 13998 /************************************************************************* |
| 13999 ************************************************************************** |
| 14000 ** Below this point is the implementation of the fts5_decode() scalar |
| 14001 ** function only. |
| 14002 */ |
| 14003 |
| 14004 /* |
| 14005 ** Decode a segment-data rowid from the %_data table. This function is |
| 14006 ** the opposite of macro FTS5_SEGMENT_ROWID(). |
| 14007 */ |
| 14008 static void fts5DecodeRowid( |
| 14009 i64 iRowid, /* Rowid from %_data table */ |
| 14010 int *piSegid, /* OUT: Segment id */ |
| 14011 int *pbDlidx, /* OUT: Dlidx flag */ |
| 14012 int *piHeight, /* OUT: Height */ |
| 14013 int *piPgno /* OUT: Page number */ |
| 14014 ){ |
| 14015 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); |
| 14016 iRowid >>= FTS5_DATA_PAGE_B; |
| 14017 |
| 14018 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); |
| 14019 iRowid >>= FTS5_DATA_HEIGHT_B; |
| 14020 |
| 14021 *pbDlidx = (int)(iRowid & 0x0001); |
| 14022 iRowid >>= FTS5_DATA_DLI_B; |
| 14023 |
| 14024 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); |
| 14025 } |
| 14026 |
| 14027 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ |
| 14028 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ |
| 14029 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); |
| 14030 |
| 14031 if( iSegid==0 ){ |
| 14032 if( iKey==FTS5_AVERAGES_ROWID ){ |
| 14033 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); |
| 14034 }else{ |
| 14035 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); |
| 14036 } |
| 14037 } |
| 14038 else{ |
| 14039 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}", |
| 14040 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno |
| 14041 ); |
| 14042 } |
| 14043 } |
| 14044 |
| 14045 static void fts5DebugStructure( |
| 14046 int *pRc, /* IN/OUT: error code */ |
| 14047 Fts5Buffer *pBuf, |
| 14048 Fts5Structure *p |
| 14049 ){ |
| 14050 int iLvl, iSeg; /* Iterate through levels, segments */ |
| 14051 |
| 14052 for(iLvl=0; iLvl<p->nLevel; iLvl++){ |
| 14053 Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; |
| 14054 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, |
| 14055 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg |
| 14056 ); |
| 14057 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
| 14058 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
| 14059 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", |
| 14060 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast |
| 14061 ); |
| 14062 } |
| 14063 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); |
| 14064 } |
| 14065 } |
| 14066 |
| 14067 /* |
| 14068 ** This is part of the fts5_decode() debugging aid. |
| 14069 ** |
| 14070 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This |
| 14071 ** function appends a human-readable representation of the same object |
| 14072 ** to the buffer passed as the second argument. |
| 14073 */ |
| 14074 static void fts5DecodeStructure( |
| 14075 int *pRc, /* IN/OUT: error code */ |
| 14076 Fts5Buffer *pBuf, |
| 14077 const u8 *pBlob, int nBlob |
| 14078 ){ |
| 14079 int rc; /* Return code */ |
| 14080 Fts5Structure *p = 0; /* Decoded structure object */ |
| 14081 |
| 14082 rc = fts5StructureDecode(pBlob, nBlob, 0, &p); |
| 14083 if( rc!=SQLITE_OK ){ |
| 14084 *pRc = rc; |
| 14085 return; |
| 14086 } |
| 14087 |
| 14088 fts5DebugStructure(pRc, pBuf, p); |
| 14089 fts5StructureRelease(p); |
| 14090 } |
| 14091 |
| 14092 /* |
| 14093 ** This is part of the fts5_decode() debugging aid. |
| 14094 ** |
| 14095 ** Arguments pBlob/nBlob contain an "averages" record. This function |
| 14096 ** appends a human-readable representation of record to the buffer passed |
| 14097 ** as the second argument. |
| 14098 */ |
| 14099 static void fts5DecodeAverages( |
| 14100 int *pRc, /* IN/OUT: error code */ |
| 14101 Fts5Buffer *pBuf, |
| 14102 const u8 *pBlob, int nBlob |
| 14103 ){ |
| 14104 int i = 0; |
| 14105 const char *zSpace = ""; |
| 14106 |
| 14107 while( i<nBlob ){ |
| 14108 u64 iVal; |
| 14109 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); |
| 14110 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); |
| 14111 zSpace = " "; |
| 14112 } |
| 14113 } |
| 14114 |
| 14115 /* |
| 14116 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read |
| 14117 ** each varint and append its string representation to buffer pBuf. Return |
| 14118 ** after either the input buffer is exhausted or a 0 value is read. |
| 14119 ** |
| 14120 ** The return value is the number of bytes read from the input buffer. |
| 14121 */ |
| 14122 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
| 14123 int iOff = 0; |
| 14124 while( iOff<n ){ |
| 14125 int iVal; |
| 14126 iOff += fts5GetVarint32(&a[iOff], iVal); |
| 14127 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); |
| 14128 } |
| 14129 return iOff; |
| 14130 } |
| 14131 |
| 14132 /* |
| 14133 ** The start of buffer (a/n) contains the start of a doclist. The doclist |
| 14134 ** may or may not finish within the buffer. This function appends a text |
| 14135 ** representation of the part of the doclist that is present to buffer |
| 14136 ** pBuf. |
| 14137 ** |
| 14138 ** The return value is the number of bytes read from the input buffer. |
| 14139 */ |
| 14140 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
| 14141 i64 iDocid = 0; |
| 14142 int iOff = 0; |
| 14143 |
| 14144 if( n>0 ){ |
| 14145 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); |
| 14146 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); |
| 14147 } |
| 14148 while( iOff<n ){ |
| 14149 int nPos; |
| 14150 int bDel; |
| 14151 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); |
| 14152 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); |
| 14153 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); |
| 14154 if( iOff<n ){ |
| 14155 i64 iDelta; |
| 14156 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); |
| 14157 iDocid += iDelta; |
| 14158 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); |
| 14159 } |
| 14160 } |
| 14161 |
| 14162 return iOff; |
| 14163 } |
| 14164 |
| 14165 /* |
| 14166 ** This function is part of the fts5_decode() debugging function. It is |
| 14167 ** only ever used with detail=none tables. |
| 14168 ** |
| 14169 ** Buffer (pData/nData) contains a doclist in the format used by detail=none |
| 14170 ** tables. This function appends a human-readable version of that list to |
| 14171 ** buffer pBuf. |
| 14172 ** |
| 14173 ** If *pRc is other than SQLITE_OK when this function is called, it is a |
| 14174 ** no-op. If an OOM or other error occurs within this function, *pRc is |
| 14175 ** set to an SQLite error code before returning. The final state of buffer |
| 14176 ** pBuf is undefined in this case. |
| 14177 */ |
| 14178 static void fts5DecodeRowidList( |
| 14179 int *pRc, /* IN/OUT: Error code */ |
| 14180 Fts5Buffer *pBuf, /* Buffer to append text to */ |
| 14181 const u8 *pData, int nData /* Data to decode list-of-rowids from */ |
| 14182 ){ |
| 14183 int i = 0; |
| 14184 i64 iRowid = 0; |
| 14185 |
| 14186 while( i<nData ){ |
| 14187 const char *zApp = ""; |
| 14188 u64 iVal; |
| 14189 i += sqlite3Fts5GetVarint(&pData[i], &iVal); |
| 14190 iRowid += iVal; |
| 14191 |
| 14192 if( i<nData && pData[i]==0x00 ){ |
| 14193 i++; |
| 14194 if( i<nData && pData[i]==0x00 ){ |
| 14195 i++; |
| 14196 zApp = "+"; |
| 14197 }else{ |
| 14198 zApp = "*"; |
| 14199 } |
| 14200 } |
| 14201 |
| 14202 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); |
| 14203 } |
| 14204 } |
| 14205 |
| 14206 /* |
| 14207 ** The implementation of user-defined scalar function fts5_decode(). |
| 14208 */ |
| 14209 static void fts5DecodeFunction( |
| 14210 sqlite3_context *pCtx, /* Function call context */ |
| 14211 int nArg, /* Number of args (always 2) */ |
| 14212 sqlite3_value **apVal /* Function arguments */ |
| 14213 ){ |
| 14214 i64 iRowid; /* Rowid for record being decoded */ |
| 14215 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ |
| 14216 const u8 *aBlob; int n; /* Record to decode */ |
| 14217 u8 *a = 0; |
| 14218 Fts5Buffer s; /* Build up text to return here */ |
| 14219 int rc = SQLITE_OK; /* Return code */ |
| 14220 int nSpace = 0; |
| 14221 int eDetailNone = (sqlite3_user_data(pCtx)!=0); |
| 14222 |
| 14223 assert( nArg==2 ); |
| 14224 UNUSED_PARAM(nArg); |
| 14225 memset(&s, 0, sizeof(Fts5Buffer)); |
| 14226 iRowid = sqlite3_value_int64(apVal[0]); |
| 14227 |
| 14228 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] |
| 14229 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents |
| 14230 ** buffer overreads even if the record is corrupt. */ |
| 14231 n = sqlite3_value_bytes(apVal[1]); |
| 14232 aBlob = sqlite3_value_blob(apVal[1]); |
| 14233 nSpace = n + FTS5_DATA_ZERO_PADDING; |
| 14234 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); |
| 14235 if( a==0 ) goto decode_out; |
| 14236 memcpy(a, aBlob, n); |
| 14237 |
| 14238 |
| 14239 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); |
| 14240 |
| 14241 fts5DebugRowid(&rc, &s, iRowid); |
| 14242 if( bDlidx ){ |
| 14243 Fts5Data dlidx; |
| 14244 Fts5DlidxLvl lvl; |
| 14245 |
| 14246 dlidx.p = a; |
| 14247 dlidx.nn = n; |
| 14248 |
| 14249 memset(&lvl, 0, sizeof(Fts5DlidxLvl)); |
| 14250 lvl.pData = &dlidx; |
| 14251 lvl.iLeafPgno = iPgno; |
| 14252 |
| 14253 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ |
| 14254 sqlite3Fts5BufferAppendPrintf(&rc, &s, |
| 14255 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid |
| 14256 ); |
| 14257 } |
| 14258 }else if( iSegid==0 ){ |
| 14259 if( iRowid==FTS5_AVERAGES_ROWID ){ |
| 14260 fts5DecodeAverages(&rc, &s, a, n); |
| 14261 }else{ |
| 14262 fts5DecodeStructure(&rc, &s, a, n); |
| 14263 } |
| 14264 }else if( eDetailNone ){ |
| 14265 Fts5Buffer term; /* Current term read from page */ |
| 14266 int szLeaf; |
| 14267 int iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
| 14268 int iTermOff; |
| 14269 int nKeep = 0; |
| 14270 int iOff; |
| 14271 |
| 14272 memset(&term, 0, sizeof(Fts5Buffer)); |
| 14273 |
| 14274 /* Decode any entries that occur before the first term. */ |
| 14275 if( szLeaf<n ){ |
| 14276 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff); |
| 14277 }else{ |
| 14278 iTermOff = szLeaf; |
| 14279 } |
| 14280 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); |
| 14281 |
| 14282 iOff = iTermOff; |
| 14283 while( iOff<szLeaf ){ |
| 14284 int nAppend; |
| 14285 |
| 14286 /* Read the term data for the next term*/ |
| 14287 iOff += fts5GetVarint32(&a[iOff], nAppend); |
| 14288 term.n = nKeep; |
| 14289 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); |
| 14290 sqlite3Fts5BufferAppendPrintf( |
| 14291 &rc, &s, " term=%.*s", term.n, (const char*)term.p |
| 14292 ); |
| 14293 iOff += nAppend; |
| 14294 |
| 14295 /* Figure out where the doclist for this term ends */ |
| 14296 if( iPgidxOff<n ){ |
| 14297 int nIncr; |
| 14298 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr); |
| 14299 iTermOff += nIncr; |
| 14300 }else{ |
| 14301 iTermOff = szLeaf; |
| 14302 } |
| 14303 |
| 14304 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); |
| 14305 iOff = iTermOff; |
| 14306 if( iOff<szLeaf ){ |
| 14307 iOff += fts5GetVarint32(&a[iOff], nKeep); |
| 14308 } |
| 14309 } |
| 14310 |
| 14311 fts5BufferFree(&term); |
| 14312 }else{ |
| 14313 Fts5Buffer term; /* Current term read from page */ |
| 14314 int szLeaf; /* Offset of pgidx in a[] */ |
| 14315 int iPgidxOff; |
| 14316 int iPgidxPrev = 0; /* Previous value read from pgidx */ |
| 14317 int iTermOff = 0; |
| 14318 int iRowidOff = 0; |
| 14319 int iOff; |
| 14320 int nDoclist; |
| 14321 |
| 14322 memset(&term, 0, sizeof(Fts5Buffer)); |
| 14323 |
| 14324 if( n<4 ){ |
| 14325 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); |
| 14326 goto decode_out; |
| 14327 }else{ |
| 14328 iRowidOff = fts5GetU16(&a[0]); |
| 14329 iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
| 14330 if( iPgidxOff<n ){ |
| 14331 fts5GetVarint32(&a[iPgidxOff], iTermOff); |
| 14332 } |
| 14333 } |
| 14334 |
| 14335 /* Decode the position list tail at the start of the page */ |
| 14336 if( iRowidOff!=0 ){ |
| 14337 iOff = iRowidOff; |
| 14338 }else if( iTermOff!=0 ){ |
| 14339 iOff = iTermOff; |
| 14340 }else{ |
| 14341 iOff = szLeaf; |
| 14342 } |
| 14343 fts5DecodePoslist(&rc, &s, &a[4], iOff-4); |
| 14344 |
| 14345 /* Decode any more doclist data that appears on the page before the |
| 14346 ** first term. */ |
| 14347 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; |
| 14348 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); |
| 14349 |
| 14350 while( iPgidxOff<n ){ |
| 14351 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ |
| 14352 int nByte; /* Bytes of data */ |
| 14353 int iEnd; |
| 14354 |
| 14355 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); |
| 14356 iPgidxPrev += nByte; |
| 14357 iOff = iPgidxPrev; |
| 14358 |
| 14359 if( iPgidxOff<n ){ |
| 14360 fts5GetVarint32(&a[iPgidxOff], nByte); |
| 14361 iEnd = iPgidxPrev + nByte; |
| 14362 }else{ |
| 14363 iEnd = szLeaf; |
| 14364 } |
| 14365 |
| 14366 if( bFirst==0 ){ |
| 14367 iOff += fts5GetVarint32(&a[iOff], nByte); |
| 14368 term.n = nByte; |
| 14369 } |
| 14370 iOff += fts5GetVarint32(&a[iOff], nByte); |
| 14371 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); |
| 14372 iOff += nByte; |
| 14373 |
| 14374 sqlite3Fts5BufferAppendPrintf( |
| 14375 &rc, &s, " term=%.*s", term.n, (const char*)term.p |
| 14376 ); |
| 14377 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); |
| 14378 } |
| 14379 |
| 14380 fts5BufferFree(&term); |
| 14381 } |
| 14382 |
| 14383 decode_out: |
| 14384 sqlite3_free(a); |
| 14385 if( rc==SQLITE_OK ){ |
| 14386 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); |
| 14387 }else{ |
| 14388 sqlite3_result_error_code(pCtx, rc); |
| 14389 } |
| 14390 fts5BufferFree(&s); |
| 14391 } |
| 14392 |
| 14393 /* |
| 14394 ** The implementation of user-defined scalar function fts5_rowid(). |
| 14395 */ |
| 14396 static void fts5RowidFunction( |
| 14397 sqlite3_context *pCtx, /* Function call context */ |
| 14398 int nArg, /* Number of args (always 2) */ |
| 14399 sqlite3_value **apVal /* Function arguments */ |
| 14400 ){ |
| 14401 const char *zArg; |
| 14402 if( nArg==0 ){ |
| 14403 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); |
| 14404 }else{ |
| 14405 zArg = (const char*)sqlite3_value_text(apVal[0]); |
| 14406 if( 0==sqlite3_stricmp(zArg, "segment") ){ |
| 14407 i64 iRowid; |
| 14408 int segid, pgno; |
| 14409 if( nArg!=3 ){ |
| 14410 sqlite3_result_error(pCtx, |
| 14411 "should be: fts5_rowid('segment', segid, pgno))", -1 |
| 14412 ); |
| 14413 }else{ |
| 14414 segid = sqlite3_value_int(apVal[1]); |
| 14415 pgno = sqlite3_value_int(apVal[2]); |
| 14416 iRowid = FTS5_SEGMENT_ROWID(segid, pgno); |
| 14417 sqlite3_result_int64(pCtx, iRowid); |
| 14418 } |
| 14419 }else{ |
| 14420 sqlite3_result_error(pCtx, |
| 14421 "first arg to fts5_rowid() must be 'segment'" , -1 |
| 14422 ); |
| 14423 } |
| 14424 } |
| 14425 } |
| 14426 |
| 14427 /* |
| 14428 ** This is called as part of registering the FTS5 module with database |
| 14429 ** connection db. It registers several user-defined scalar functions useful |
| 14430 ** with FTS5. |
| 14431 ** |
| 14432 ** If successful, SQLITE_OK is returned. If an error occurs, some other |
| 14433 ** SQLite error code is returned instead. |
| 14434 */ |
| 14435 static int sqlite3Fts5IndexInit(sqlite3 *db){ |
| 14436 int rc = sqlite3_create_function( |
| 14437 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 |
| 14438 ); |
| 14439 |
| 14440 if( rc==SQLITE_OK ){ |
| 14441 rc = sqlite3_create_function( |
| 14442 db, "fts5_decode_none", 2, |
| 14443 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 |
| 14444 ); |
| 14445 } |
| 14446 |
| 14447 if( rc==SQLITE_OK ){ |
| 14448 rc = sqlite3_create_function( |
| 14449 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 |
| 14450 ); |
| 14451 } |
| 14452 return rc; |
| 14453 } |
| 14454 |
| 14455 |
| 14456 static int sqlite3Fts5IndexReset(Fts5Index *p){ |
| 14457 assert( p->pStruct==0 || p->iStructVersion!=0 ); |
| 14458 if( fts5IndexDataVersion(p)!=p->iStructVersion ){ |
| 14459 fts5StructureInvalidate(p); |
| 14460 } |
| 14461 return fts5IndexReturn(p); |
| 14462 } |
| 14463 |
| 14464 /* |
| 14465 ** 2014 Jun 09 |
| 14466 ** |
| 14467 ** The author disclaims copyright to this source code. In place of |
| 14468 ** a legal notice, here is a blessing: |
| 14469 ** |
| 14470 ** May you do good and not evil. |
| 14471 ** May you find forgiveness for yourself and forgive others. |
| 14472 ** May you share freely, never taking more than you give. |
| 14473 ** |
| 14474 ****************************************************************************** |
| 14475 ** |
| 14476 ** This is an SQLite module implementing full-text search. |
| 14477 */ |
| 14478 |
| 14479 |
| 14480 /* #include "fts5Int.h" */ |
| 14481 |
| 14482 /* |
| 14483 ** This variable is set to false when running tests for which the on disk |
| 14484 ** structures should not be corrupt. Otherwise, true. If it is false, extra |
| 14485 ** assert() conditions in the fts5 code are activated - conditions that are |
| 14486 ** only true if it is guaranteed that the fts5 database is not corrupt. |
| 14487 */ |
| 14488 SQLITE_API int sqlite3_fts5_may_be_corrupt = 1; |
| 14489 |
| 14490 |
| 14491 typedef struct Fts5Auxdata Fts5Auxdata; |
| 14492 typedef struct Fts5Auxiliary Fts5Auxiliary; |
| 14493 typedef struct Fts5Cursor Fts5Cursor; |
| 14494 typedef struct Fts5Sorter Fts5Sorter; |
| 14495 typedef struct Fts5Table Fts5Table; |
| 14496 typedef struct Fts5TokenizerModule Fts5TokenizerModule; |
| 14497 |
| 14498 /* |
| 14499 ** NOTES ON TRANSACTIONS: |
| 14500 ** |
| 14501 ** SQLite invokes the following virtual table methods as transactions are |
| 14502 ** opened and closed by the user: |
| 14503 ** |
| 14504 ** xBegin(): Start of a new transaction. |
| 14505 ** xSync(): Initial part of two-phase commit. |
| 14506 ** xCommit(): Final part of two-phase commit. |
| 14507 ** xRollback(): Rollback the transaction. |
| 14508 ** |
| 14509 ** Anything that is required as part of a commit that may fail is performed |
| 14510 ** in the xSync() callback. Current versions of SQLite ignore any errors |
| 14511 ** returned by xCommit(). |
| 14512 ** |
| 14513 ** And as sub-transactions are opened/closed: |
| 14514 ** |
| 14515 ** xSavepoint(int S): Open savepoint S. |
| 14516 ** xRelease(int S): Commit and close savepoint S. |
| 14517 ** xRollbackTo(int S): Rollback to start of savepoint S. |
| 14518 ** |
| 14519 ** During a write-transaction the fts5_index.c module may cache some data |
| 14520 ** in-memory. It is flushed to disk whenever xSync(), xRelease() or |
| 14521 ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() |
| 14522 ** is called. |
| 14523 ** |
| 14524 ** Additionally, if SQLITE_DEBUG is defined, an instance of the following |
| 14525 ** structure is used to record the current transaction state. This information |
| 14526 ** is not required, but it is used in the assert() statements executed by |
| 14527 ** function fts5CheckTransactionState() (see below). |
| 14528 */ |
| 14529 struct Fts5TransactionState { |
| 14530 int eState; /* 0==closed, 1==open, 2==synced */ |
| 14531 int iSavepoint; /* Number of open savepoints (0 -> none) */ |
| 14532 }; |
| 14533 |
| 14534 /* |
| 14535 ** A single object of this type is allocated when the FTS5 module is |
| 14536 ** registered with a database handle. It is used to store pointers to |
| 14537 ** all registered FTS5 extensions - tokenizers and auxiliary functions. |
| 14538 */ |
| 14539 struct Fts5Global { |
| 14540 fts5_api api; /* User visible part of object (see fts5.h) */ |
| 14541 sqlite3 *db; /* Associated database connection */ |
| 14542 i64 iNextId; /* Used to allocate unique cursor ids */ |
| 14543 Fts5Auxiliary *pAux; /* First in list of all aux. functions */ |
| 14544 Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ |
| 14545 Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ |
| 14546 Fts5Cursor *pCsr; /* First in list of all open cursors */ |
| 14547 }; |
| 14548 |
| 14549 /* |
| 14550 ** Each auxiliary function registered with the FTS5 module is represented |
| 14551 ** by an object of the following type. All such objects are stored as part |
| 14552 ** of the Fts5Global.pAux list. |
| 14553 */ |
| 14554 struct Fts5Auxiliary { |
| 14555 Fts5Global *pGlobal; /* Global context for this function */ |
| 14556 char *zFunc; /* Function name (nul-terminated) */ |
| 14557 void *pUserData; /* User-data pointer */ |
| 14558 fts5_extension_function xFunc; /* Callback function */ |
| 14559 void (*xDestroy)(void*); /* Destructor function */ |
| 14560 Fts5Auxiliary *pNext; /* Next registered auxiliary function */ |
| 14561 }; |
| 14562 |
| 14563 /* |
| 14564 ** Each tokenizer module registered with the FTS5 module is represented |
| 14565 ** by an object of the following type. All such objects are stored as part |
| 14566 ** of the Fts5Global.pTok list. |
| 14567 */ |
| 14568 struct Fts5TokenizerModule { |
| 14569 char *zName; /* Name of tokenizer */ |
| 14570 void *pUserData; /* User pointer passed to xCreate() */ |
| 14571 fts5_tokenizer x; /* Tokenizer functions */ |
| 14572 void (*xDestroy)(void*); /* Destructor function */ |
| 14573 Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ |
| 14574 }; |
| 14575 |
| 14576 /* |
| 14577 ** Virtual-table object. |
| 14578 */ |
| 14579 struct Fts5Table { |
| 14580 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 14581 Fts5Config *pConfig; /* Virtual table configuration */ |
| 14582 Fts5Index *pIndex; /* Full-text index */ |
| 14583 Fts5Storage *pStorage; /* Document store */ |
| 14584 Fts5Global *pGlobal; /* Global (connection wide) data */ |
| 14585 Fts5Cursor *pSortCsr; /* Sort data from this cursor */ |
| 14586 #ifdef SQLITE_DEBUG |
| 14587 struct Fts5TransactionState ts; |
| 14588 #endif |
| 14589 }; |
| 14590 |
| 14591 struct Fts5MatchPhrase { |
| 14592 Fts5Buffer *pPoslist; /* Pointer to current poslist */ |
| 14593 int nTerm; /* Size of phrase in terms */ |
| 14594 }; |
| 14595 |
| 14596 /* |
| 14597 ** pStmt: |
| 14598 ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; |
| 14599 ** |
| 14600 ** aIdx[]: |
| 14601 ** There is one entry in the aIdx[] array for each phrase in the query, |
| 14602 ** the value of which is the offset within aPoslist[] following the last |
| 14603 ** byte of the position list for the corresponding phrase. |
| 14604 */ |
| 14605 struct Fts5Sorter { |
| 14606 sqlite3_stmt *pStmt; |
| 14607 i64 iRowid; /* Current rowid */ |
| 14608 const u8 *aPoslist; /* Position lists for current row */ |
| 14609 int nIdx; /* Number of entries in aIdx[] */ |
| 14610 int aIdx[1]; /* Offsets into aPoslist for current row */ |
| 14611 }; |
| 14612 |
| 14613 |
| 14614 /* |
| 14615 ** Virtual-table cursor object. |
| 14616 ** |
| 14617 ** iSpecial: |
| 14618 ** If this is a 'special' query (refer to function fts5SpecialMatch()), |
| 14619 ** then this variable contains the result of the query. |
| 14620 ** |
| 14621 ** iFirstRowid, iLastRowid: |
| 14622 ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the |
| 14623 ** cursor iterates in ascending order of rowids, iFirstRowid is the lower |
| 14624 ** limit of rowids to return, and iLastRowid the upper. In other words, the |
| 14625 ** WHERE clause in the user's query might have been: |
| 14626 ** |
| 14627 ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid |
| 14628 ** |
| 14629 ** If the cursor iterates in descending order of rowid, iFirstRowid |
| 14630 ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid |
| 14631 ** the lower. |
| 14632 */ |
| 14633 struct Fts5Cursor { |
| 14634 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 14635 Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ |
| 14636 int *aColumnSize; /* Values for xColumnSize() */ |
| 14637 i64 iCsrId; /* Cursor id */ |
| 14638 |
| 14639 /* Zero from this point onwards on cursor reset */ |
| 14640 int ePlan; /* FTS5_PLAN_XXX value */ |
| 14641 int bDesc; /* True for "ORDER BY rowid DESC" queries */ |
| 14642 i64 iFirstRowid; /* Return no rowids earlier than this */ |
| 14643 i64 iLastRowid; /* Return no rowids later than this */ |
| 14644 sqlite3_stmt *pStmt; /* Statement used to read %_content */ |
| 14645 Fts5Expr *pExpr; /* Expression for MATCH queries */ |
| 14646 Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ |
| 14647 int csrflags; /* Mask of cursor flags (see below) */ |
| 14648 i64 iSpecial; /* Result of special query */ |
| 14649 |
| 14650 /* "rank" function. Populated on demand from vtab.xColumn(). */ |
| 14651 char *zRank; /* Custom rank function */ |
| 14652 char *zRankArgs; /* Custom rank function args */ |
| 14653 Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ |
| 14654 int nRankArg; /* Number of trailing arguments for rank() */ |
| 14655 sqlite3_value **apRankArg; /* Array of trailing arguments */ |
| 14656 sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ |
| 14657 |
| 14658 /* Auxiliary data storage */ |
| 14659 Fts5Auxiliary *pAux; /* Currently executing extension function */ |
| 14660 Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ |
| 14661 |
| 14662 /* Cache used by auxiliary functions xInst() and xInstCount() */ |
| 14663 Fts5PoslistReader *aInstIter; /* One for each phrase */ |
| 14664 int nInstAlloc; /* Size of aInst[] array (entries / 3) */ |
| 14665 int nInstCount; /* Number of phrase instances */ |
| 14666 int *aInst; /* 3 integers per phrase instance */ |
| 14667 }; |
| 14668 |
| 14669 /* |
| 14670 ** Bits that make up the "idxNum" parameter passed indirectly by |
| 14671 ** xBestIndex() to xFilter(). |
| 14672 */ |
| 14673 #define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */ |
| 14674 #define FTS5_BI_RANK 0x0002 /* rank MATCH ? */ |
| 14675 #define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */ |
| 14676 #define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */ |
| 14677 #define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */ |
| 14678 |
| 14679 #define FTS5_BI_ORDER_RANK 0x0020 |
| 14680 #define FTS5_BI_ORDER_ROWID 0x0040 |
| 14681 #define FTS5_BI_ORDER_DESC 0x0080 |
| 14682 |
| 14683 /* |
| 14684 ** Values for Fts5Cursor.csrflags |
| 14685 */ |
| 14686 #define FTS5CSR_EOF 0x01 |
| 14687 #define FTS5CSR_REQUIRE_CONTENT 0x02 |
| 14688 #define FTS5CSR_REQUIRE_DOCSIZE 0x04 |
| 14689 #define FTS5CSR_REQUIRE_INST 0x08 |
| 14690 #define FTS5CSR_FREE_ZRANK 0x10 |
| 14691 #define FTS5CSR_REQUIRE_RESEEK 0x20 |
| 14692 #define FTS5CSR_REQUIRE_POSLIST 0x40 |
| 14693 |
| 14694 #define BitFlagAllTest(x,y) (((x) & (y))==(y)) |
| 14695 #define BitFlagTest(x,y) (((x) & (y))!=0) |
| 14696 |
| 14697 |
| 14698 /* |
| 14699 ** Macros to Set(), Clear() and Test() cursor flags. |
| 14700 */ |
| 14701 #define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag)) |
| 14702 #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) |
| 14703 #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) |
| 14704 |
| 14705 struct Fts5Auxdata { |
| 14706 Fts5Auxiliary *pAux; /* Extension to which this belongs */ |
| 14707 void *pPtr; /* Pointer value */ |
| 14708 void(*xDelete)(void*); /* Destructor */ |
| 14709 Fts5Auxdata *pNext; /* Next object in linked list */ |
| 14710 }; |
| 14711 |
| 14712 #ifdef SQLITE_DEBUG |
| 14713 #define FTS5_BEGIN 1 |
| 14714 #define FTS5_SYNC 2 |
| 14715 #define FTS5_COMMIT 3 |
| 14716 #define FTS5_ROLLBACK 4 |
| 14717 #define FTS5_SAVEPOINT 5 |
| 14718 #define FTS5_RELEASE 6 |
| 14719 #define FTS5_ROLLBACKTO 7 |
| 14720 static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){ |
| 14721 switch( op ){ |
| 14722 case FTS5_BEGIN: |
| 14723 assert( p->ts.eState==0 ); |
| 14724 p->ts.eState = 1; |
| 14725 p->ts.iSavepoint = -1; |
| 14726 break; |
| 14727 |
| 14728 case FTS5_SYNC: |
| 14729 assert( p->ts.eState==1 ); |
| 14730 p->ts.eState = 2; |
| 14731 break; |
| 14732 |
| 14733 case FTS5_COMMIT: |
| 14734 assert( p->ts.eState==2 ); |
| 14735 p->ts.eState = 0; |
| 14736 break; |
| 14737 |
| 14738 case FTS5_ROLLBACK: |
| 14739 assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 ); |
| 14740 p->ts.eState = 0; |
| 14741 break; |
| 14742 |
| 14743 case FTS5_SAVEPOINT: |
| 14744 assert( p->ts.eState==1 ); |
| 14745 assert( iSavepoint>=0 ); |
| 14746 assert( iSavepoint>p->ts.iSavepoint ); |
| 14747 p->ts.iSavepoint = iSavepoint; |
| 14748 break; |
| 14749 |
| 14750 case FTS5_RELEASE: |
| 14751 assert( p->ts.eState==1 ); |
| 14752 assert( iSavepoint>=0 ); |
| 14753 assert( iSavepoint<=p->ts.iSavepoint ); |
| 14754 p->ts.iSavepoint = iSavepoint-1; |
| 14755 break; |
| 14756 |
| 14757 case FTS5_ROLLBACKTO: |
| 14758 assert( p->ts.eState==1 ); |
| 14759 assert( iSavepoint>=0 ); |
| 14760 assert( iSavepoint<=p->ts.iSavepoint ); |
| 14761 p->ts.iSavepoint = iSavepoint; |
| 14762 break; |
| 14763 } |
| 14764 } |
| 14765 #else |
| 14766 # define fts5CheckTransactionState(x,y,z) |
| 14767 #endif |
| 14768 |
| 14769 /* |
| 14770 ** Return true if pTab is a contentless table. |
| 14771 */ |
| 14772 static int fts5IsContentless(Fts5Table *pTab){ |
| 14773 return pTab->pConfig->eContent==FTS5_CONTENT_NONE; |
| 14774 } |
| 14775 |
| 14776 /* |
| 14777 ** Delete a virtual table handle allocated by fts5InitVtab(). |
| 14778 */ |
| 14779 static void fts5FreeVtab(Fts5Table *pTab){ |
| 14780 if( pTab ){ |
| 14781 sqlite3Fts5IndexClose(pTab->pIndex); |
| 14782 sqlite3Fts5StorageClose(pTab->pStorage); |
| 14783 sqlite3Fts5ConfigFree(pTab->pConfig); |
| 14784 sqlite3_free(pTab); |
| 14785 } |
| 14786 } |
| 14787 |
| 14788 /* |
| 14789 ** The xDisconnect() virtual table method. |
| 14790 */ |
| 14791 static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ |
| 14792 fts5FreeVtab((Fts5Table*)pVtab); |
| 14793 return SQLITE_OK; |
| 14794 } |
| 14795 |
| 14796 /* |
| 14797 ** The xDestroy() virtual table method. |
| 14798 */ |
| 14799 static int fts5DestroyMethod(sqlite3_vtab *pVtab){ |
| 14800 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 14801 int rc = sqlite3Fts5DropAll(pTab->pConfig); |
| 14802 if( rc==SQLITE_OK ){ |
| 14803 fts5FreeVtab((Fts5Table*)pVtab); |
| 14804 } |
| 14805 return rc; |
| 14806 } |
| 14807 |
| 14808 /* |
| 14809 ** This function is the implementation of both the xConnect and xCreate |
| 14810 ** methods of the FTS3 virtual table. |
| 14811 ** |
| 14812 ** The argv[] array contains the following: |
| 14813 ** |
| 14814 ** argv[0] -> module name ("fts5") |
| 14815 ** argv[1] -> database name |
| 14816 ** argv[2] -> table name |
| 14817 ** argv[...] -> "column name" and other module argument fields. |
| 14818 */ |
| 14819 static int fts5InitVtab( |
| 14820 int bCreate, /* True for xCreate, false for xConnect */ |
| 14821 sqlite3 *db, /* The SQLite database connection */ |
| 14822 void *pAux, /* Hash table containing tokenizers */ |
| 14823 int argc, /* Number of elements in argv array */ |
| 14824 const char * const *argv, /* xCreate/xConnect argument array */ |
| 14825 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
| 14826 char **pzErr /* Write any error message here */ |
| 14827 ){ |
| 14828 Fts5Global *pGlobal = (Fts5Global*)pAux; |
| 14829 const char **azConfig = (const char**)argv; |
| 14830 int rc = SQLITE_OK; /* Return code */ |
| 14831 Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ |
| 14832 Fts5Table *pTab = 0; /* New virtual table object */ |
| 14833 |
| 14834 /* Allocate the new vtab object and parse the configuration */ |
| 14835 pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table)); |
| 14836 if( rc==SQLITE_OK ){ |
| 14837 rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); |
| 14838 assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); |
| 14839 } |
| 14840 if( rc==SQLITE_OK ){ |
| 14841 pTab->pConfig = pConfig; |
| 14842 pTab->pGlobal = pGlobal; |
| 14843 } |
| 14844 |
| 14845 /* Open the index sub-system */ |
| 14846 if( rc==SQLITE_OK ){ |
| 14847 rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr); |
| 14848 } |
| 14849 |
| 14850 /* Open the storage sub-system */ |
| 14851 if( rc==SQLITE_OK ){ |
| 14852 rc = sqlite3Fts5StorageOpen( |
| 14853 pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr |
| 14854 ); |
| 14855 } |
| 14856 |
| 14857 /* Call sqlite3_declare_vtab() */ |
| 14858 if( rc==SQLITE_OK ){ |
| 14859 rc = sqlite3Fts5ConfigDeclareVtab(pConfig); |
| 14860 } |
| 14861 |
| 14862 /* Load the initial configuration */ |
| 14863 if( rc==SQLITE_OK ){ |
| 14864 assert( pConfig->pzErrmsg==0 ); |
| 14865 pConfig->pzErrmsg = pzErr; |
| 14866 rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); |
| 14867 sqlite3Fts5IndexRollback(pTab->pIndex); |
| 14868 pConfig->pzErrmsg = 0; |
| 14869 } |
| 14870 |
| 14871 if( rc!=SQLITE_OK ){ |
| 14872 fts5FreeVtab(pTab); |
| 14873 pTab = 0; |
| 14874 }else if( bCreate ){ |
| 14875 fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); |
| 14876 } |
| 14877 *ppVTab = (sqlite3_vtab*)pTab; |
| 14878 return rc; |
| 14879 } |
| 14880 |
| 14881 /* |
| 14882 ** The xConnect() and xCreate() methods for the virtual table. All the |
| 14883 ** work is done in function fts5InitVtab(). |
| 14884 */ |
| 14885 static int fts5ConnectMethod( |
| 14886 sqlite3 *db, /* Database connection */ |
| 14887 void *pAux, /* Pointer to tokenizer hash table */ |
| 14888 int argc, /* Number of elements in argv array */ |
| 14889 const char * const *argv, /* xCreate/xConnect argument array */ |
| 14890 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 14891 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 14892 ){ |
| 14893 return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); |
| 14894 } |
| 14895 static int fts5CreateMethod( |
| 14896 sqlite3 *db, /* Database connection */ |
| 14897 void *pAux, /* Pointer to tokenizer hash table */ |
| 14898 int argc, /* Number of elements in argv array */ |
| 14899 const char * const *argv, /* xCreate/xConnect argument array */ |
| 14900 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 14901 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 14902 ){ |
| 14903 return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); |
| 14904 } |
| 14905 |
| 14906 /* |
| 14907 ** The different query plans. |
| 14908 */ |
| 14909 #define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */ |
| 14910 #define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */ |
| 14911 #define FTS5_PLAN_SPECIAL 3 /* An internal query */ |
| 14912 #define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */ |
| 14913 #define FTS5_PLAN_SCAN 5 /* No usable constraint */ |
| 14914 #define FTS5_PLAN_ROWID 6 /* (rowid = ?) */ |
| 14915 |
| 14916 /* |
| 14917 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this |
| 14918 ** extension is currently being used by a version of SQLite too old to |
| 14919 ** support index-info flags. In that case this function is a no-op. |
| 14920 */ |
| 14921 static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ |
| 14922 #if SQLITE_VERSION_NUMBER>=3008012 |
| 14923 #ifndef SQLITE_CORE |
| 14924 if( sqlite3_libversion_number()>=3008012 ) |
| 14925 #endif |
| 14926 { |
| 14927 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; |
| 14928 } |
| 14929 #endif |
| 14930 } |
| 14931 |
| 14932 /* |
| 14933 ** Implementation of the xBestIndex method for FTS5 tables. Within the |
| 14934 ** WHERE constraint, it searches for the following: |
| 14935 ** |
| 14936 ** 1. A MATCH constraint against the special column. |
| 14937 ** 2. A MATCH constraint against the "rank" column. |
| 14938 ** 3. An == constraint against the rowid column. |
| 14939 ** 4. A < or <= constraint against the rowid column. |
| 14940 ** 5. A > or >= constraint against the rowid column. |
| 14941 ** |
| 14942 ** Within the ORDER BY, either: |
| 14943 ** |
| 14944 ** 5. ORDER BY rank [ASC|DESC] |
| 14945 ** 6. ORDER BY rowid [ASC|DESC] |
| 14946 ** |
| 14947 ** Costs are assigned as follows: |
| 14948 ** |
| 14949 ** a) If an unusable MATCH operator is present in the WHERE clause, the |
| 14950 ** cost is unconditionally set to 1e50 (a really big number). |
| 14951 ** |
| 14952 ** a) If a MATCH operator is present, the cost depends on the other |
| 14953 ** constraints also present. As follows: |
| 14954 ** |
| 14955 ** * No other constraints: cost=1000.0 |
| 14956 ** * One rowid range constraint: cost=750.0 |
| 14957 ** * Both rowid range constraints: cost=500.0 |
| 14958 ** * An == rowid constraint: cost=100.0 |
| 14959 ** |
| 14960 ** b) Otherwise, if there is no MATCH: |
| 14961 ** |
| 14962 ** * No other constraints: cost=1000000.0 |
| 14963 ** * One rowid range constraint: cost=750000.0 |
| 14964 ** * Both rowid range constraints: cost=250000.0 |
| 14965 ** * An == rowid constraint: cost=10.0 |
| 14966 ** |
| 14967 ** Costs are not modified by the ORDER BY clause. |
| 14968 */ |
| 14969 static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |
| 14970 Fts5Table *pTab = (Fts5Table*)pVTab; |
| 14971 Fts5Config *pConfig = pTab->pConfig; |
| 14972 int idxFlags = 0; /* Parameter passed through to xFilter() */ |
| 14973 int bHasMatch; |
| 14974 int iNext; |
| 14975 int i; |
| 14976 |
| 14977 struct Constraint { |
| 14978 int op; /* Mask against sqlite3_index_constraint.op */ |
| 14979 int fts5op; /* FTS5 mask for idxFlags */ |
| 14980 int iCol; /* 0==rowid, 1==tbl, 2==rank */ |
| 14981 int omit; /* True to omit this if found */ |
| 14982 int iConsIndex; /* Index in pInfo->aConstraint[] */ |
| 14983 } aConstraint[] = { |
| 14984 {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ, |
| 14985 FTS5_BI_MATCH, 1, 1, -1}, |
| 14986 {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ, |
| 14987 FTS5_BI_RANK, 2, 1, -1}, |
| 14988 {SQLITE_INDEX_CONSTRAINT_EQ, FTS5_BI_ROWID_EQ, 0, 0, -1}, |
| 14989 {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE, |
| 14990 FTS5_BI_ROWID_LE, 0, 0, -1}, |
| 14991 {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE, |
| 14992 FTS5_BI_ROWID_GE, 0, 0, -1}, |
| 14993 }; |
| 14994 |
| 14995 int aColMap[3]; |
| 14996 aColMap[0] = -1; |
| 14997 aColMap[1] = pConfig->nCol; |
| 14998 aColMap[2] = pConfig->nCol+1; |
| 14999 |
| 15000 /* Set idxFlags flags for all WHERE clause terms that will be used. */ |
| 15001 for(i=0; i<pInfo->nConstraint; i++){ |
| 15002 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; |
| 15003 int j; |
| 15004 for(j=0; j<ArraySize(aConstraint); j++){ |
| 15005 struct Constraint *pC = &aConstraint[j]; |
| 15006 if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){ |
| 15007 if( p->usable ){ |
| 15008 pC->iConsIndex = i; |
| 15009 idxFlags |= pC->fts5op; |
| 15010 }else if( j==0 ){ |
| 15011 /* As there exists an unusable MATCH constraint this is an |
| 15012 ** unusable plan. Set a prohibitively high cost. */ |
| 15013 pInfo->estimatedCost = 1e50; |
| 15014 return SQLITE_OK; |
| 15015 } |
| 15016 } |
| 15017 } |
| 15018 } |
| 15019 |
| 15020 /* Set idxFlags flags for the ORDER BY clause */ |
| 15021 if( pInfo->nOrderBy==1 ){ |
| 15022 int iSort = pInfo->aOrderBy[0].iColumn; |
| 15023 if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){ |
| 15024 idxFlags |= FTS5_BI_ORDER_RANK; |
| 15025 }else if( iSort==-1 ){ |
| 15026 idxFlags |= FTS5_BI_ORDER_ROWID; |
| 15027 } |
| 15028 if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ |
| 15029 pInfo->orderByConsumed = 1; |
| 15030 if( pInfo->aOrderBy[0].desc ){ |
| 15031 idxFlags |= FTS5_BI_ORDER_DESC; |
| 15032 } |
| 15033 } |
| 15034 } |
| 15035 |
| 15036 /* Calculate the estimated cost based on the flags set in idxFlags. */ |
| 15037 bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH); |
| 15038 if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){ |
| 15039 pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0; |
| 15040 if( bHasMatch==0 ) fts5SetUniqueFlag(pInfo); |
| 15041 }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ |
| 15042 pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0; |
| 15043 }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ |
| 15044 pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0; |
| 15045 }else{ |
| 15046 pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; |
| 15047 } |
| 15048 |
| 15049 /* Assign argvIndex values to each constraint in use. */ |
| 15050 iNext = 1; |
| 15051 for(i=0; i<ArraySize(aConstraint); i++){ |
| 15052 struct Constraint *pC = &aConstraint[i]; |
| 15053 if( pC->iConsIndex>=0 ){ |
| 15054 pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++; |
| 15055 pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit; |
| 15056 } |
| 15057 } |
| 15058 |
| 15059 pInfo->idxNum = idxFlags; |
| 15060 return SQLITE_OK; |
| 15061 } |
| 15062 |
| 15063 static int fts5NewTransaction(Fts5Table *pTab){ |
| 15064 Fts5Cursor *pCsr; |
| 15065 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
| 15066 if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK; |
| 15067 } |
| 15068 return sqlite3Fts5StorageReset(pTab->pStorage); |
| 15069 } |
| 15070 |
| 15071 /* |
| 15072 ** Implementation of xOpen method. |
| 15073 */ |
| 15074 static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 15075 Fts5Table *pTab = (Fts5Table*)pVTab; |
| 15076 Fts5Config *pConfig = pTab->pConfig; |
| 15077 Fts5Cursor *pCsr = 0; /* New cursor object */ |
| 15078 int nByte; /* Bytes of space to allocate */ |
| 15079 int rc; /* Return code */ |
| 15080 |
| 15081 rc = fts5NewTransaction(pTab); |
| 15082 if( rc==SQLITE_OK ){ |
| 15083 nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); |
| 15084 pCsr = (Fts5Cursor*)sqlite3_malloc(nByte); |
| 15085 if( pCsr ){ |
| 15086 Fts5Global *pGlobal = pTab->pGlobal; |
| 15087 memset(pCsr, 0, nByte); |
| 15088 pCsr->aColumnSize = (int*)&pCsr[1]; |
| 15089 pCsr->pNext = pGlobal->pCsr; |
| 15090 pGlobal->pCsr = pCsr; |
| 15091 pCsr->iCsrId = ++pGlobal->iNextId; |
| 15092 }else{ |
| 15093 rc = SQLITE_NOMEM; |
| 15094 } |
| 15095 } |
| 15096 *ppCsr = (sqlite3_vtab_cursor*)pCsr; |
| 15097 return rc; |
| 15098 } |
| 15099 |
| 15100 static int fts5StmtType(Fts5Cursor *pCsr){ |
| 15101 if( pCsr->ePlan==FTS5_PLAN_SCAN ){ |
| 15102 return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; |
| 15103 } |
| 15104 return FTS5_STMT_LOOKUP; |
| 15105 } |
| 15106 |
| 15107 /* |
| 15108 ** This function is called after the cursor passed as the only argument |
| 15109 ** is moved to point at a different row. It clears all cached data |
| 15110 ** specific to the previous row stored by the cursor object. |
| 15111 */ |
| 15112 static void fts5CsrNewrow(Fts5Cursor *pCsr){ |
| 15113 CsrFlagSet(pCsr, |
| 15114 FTS5CSR_REQUIRE_CONTENT |
| 15115 | FTS5CSR_REQUIRE_DOCSIZE |
| 15116 | FTS5CSR_REQUIRE_INST |
| 15117 | FTS5CSR_REQUIRE_POSLIST |
| 15118 ); |
| 15119 } |
| 15120 |
| 15121 static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ |
| 15122 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 15123 Fts5Auxdata *pData; |
| 15124 Fts5Auxdata *pNext; |
| 15125 |
| 15126 sqlite3_free(pCsr->aInstIter); |
| 15127 sqlite3_free(pCsr->aInst); |
| 15128 if( pCsr->pStmt ){ |
| 15129 int eStmt = fts5StmtType(pCsr); |
| 15130 sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); |
| 15131 } |
| 15132 if( pCsr->pSorter ){ |
| 15133 Fts5Sorter *pSorter = pCsr->pSorter; |
| 15134 sqlite3_finalize(pSorter->pStmt); |
| 15135 sqlite3_free(pSorter); |
| 15136 } |
| 15137 |
| 15138 if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){ |
| 15139 sqlite3Fts5ExprFree(pCsr->pExpr); |
| 15140 } |
| 15141 |
| 15142 for(pData=pCsr->pAuxdata; pData; pData=pNext){ |
| 15143 pNext = pData->pNext; |
| 15144 if( pData->xDelete ) pData->xDelete(pData->pPtr); |
| 15145 sqlite3_free(pData); |
| 15146 } |
| 15147 |
| 15148 sqlite3_finalize(pCsr->pRankArgStmt); |
| 15149 sqlite3_free(pCsr->apRankArg); |
| 15150 |
| 15151 if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ |
| 15152 sqlite3_free(pCsr->zRank); |
| 15153 sqlite3_free(pCsr->zRankArgs); |
| 15154 } |
| 15155 |
| 15156 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); |
| 15157 } |
| 15158 |
| 15159 |
| 15160 /* |
| 15161 ** Close the cursor. For additional information see the documentation |
| 15162 ** on the xClose method of the virtual table interface. |
| 15163 */ |
| 15164 static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 15165 if( pCursor ){ |
| 15166 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); |
| 15167 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 15168 Fts5Cursor **pp; |
| 15169 |
| 15170 fts5FreeCursorComponents(pCsr); |
| 15171 /* Remove the cursor from the Fts5Global.pCsr list */ |
| 15172 for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); |
| 15173 *pp = pCsr->pNext; |
| 15174 |
| 15175 sqlite3_free(pCsr); |
| 15176 } |
| 15177 return SQLITE_OK; |
| 15178 } |
| 15179 |
| 15180 static int fts5SorterNext(Fts5Cursor *pCsr){ |
| 15181 Fts5Sorter *pSorter = pCsr->pSorter; |
| 15182 int rc; |
| 15183 |
| 15184 rc = sqlite3_step(pSorter->pStmt); |
| 15185 if( rc==SQLITE_DONE ){ |
| 15186 rc = SQLITE_OK; |
| 15187 CsrFlagSet(pCsr, FTS5CSR_EOF); |
| 15188 }else if( rc==SQLITE_ROW ){ |
| 15189 const u8 *a; |
| 15190 const u8 *aBlob; |
| 15191 int nBlob; |
| 15192 int i; |
| 15193 int iOff = 0; |
| 15194 rc = SQLITE_OK; |
| 15195 |
| 15196 pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); |
| 15197 nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); |
| 15198 aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); |
| 15199 |
| 15200 /* nBlob==0 in detail=none mode. */ |
| 15201 if( nBlob>0 ){ |
| 15202 for(i=0; i<(pSorter->nIdx-1); i++){ |
| 15203 int iVal; |
| 15204 a += fts5GetVarint32(a, iVal); |
| 15205 iOff += iVal; |
| 15206 pSorter->aIdx[i] = iOff; |
| 15207 } |
| 15208 pSorter->aIdx[i] = &aBlob[nBlob] - a; |
| 15209 pSorter->aPoslist = a; |
| 15210 } |
| 15211 |
| 15212 fts5CsrNewrow(pCsr); |
| 15213 } |
| 15214 |
| 15215 return rc; |
| 15216 } |
| 15217 |
| 15218 |
| 15219 /* |
| 15220 ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors |
| 15221 ** open on table pTab. |
| 15222 */ |
| 15223 static void fts5TripCursors(Fts5Table *pTab){ |
| 15224 Fts5Cursor *pCsr; |
| 15225 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
| 15226 if( pCsr->ePlan==FTS5_PLAN_MATCH |
| 15227 && pCsr->base.pVtab==(sqlite3_vtab*)pTab |
| 15228 ){ |
| 15229 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); |
| 15230 } |
| 15231 } |
| 15232 } |
| 15233 |
| 15234 /* |
| 15235 ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first |
| 15236 ** argument, close and reopen all Fts5IndexIter iterators that the cursor |
| 15237 ** is using. Then attempt to move the cursor to a rowid equal to or laster |
| 15238 ** (in the cursors sort order - ASC or DESC) than the current rowid. |
| 15239 ** |
| 15240 ** If the new rowid is not equal to the old, set output parameter *pbSkip |
| 15241 ** to 1 before returning. Otherwise, leave it unchanged. |
| 15242 ** |
| 15243 ** Return SQLITE_OK if successful or if no reseek was required, or an |
| 15244 ** error code if an error occurred. |
| 15245 */ |
| 15246 static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ |
| 15247 int rc = SQLITE_OK; |
| 15248 assert( *pbSkip==0 ); |
| 15249 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ |
| 15250 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 15251 int bDesc = pCsr->bDesc; |
| 15252 i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); |
| 15253 |
| 15254 rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc); |
| 15255 if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ |
| 15256 *pbSkip = 1; |
| 15257 } |
| 15258 |
| 15259 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); |
| 15260 fts5CsrNewrow(pCsr); |
| 15261 if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ |
| 15262 CsrFlagSet(pCsr, FTS5CSR_EOF); |
| 15263 *pbSkip = 1; |
| 15264 } |
| 15265 } |
| 15266 return rc; |
| 15267 } |
| 15268 |
| 15269 |
| 15270 /* |
| 15271 ** Advance the cursor to the next row in the table that matches the |
| 15272 ** search criteria. |
| 15273 ** |
| 15274 ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned |
| 15275 ** even if we reach end-of-file. The fts5EofMethod() will be called |
| 15276 ** subsequently to determine whether or not an EOF was hit. |
| 15277 */ |
| 15278 static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ |
| 15279 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 15280 int rc; |
| 15281 |
| 15282 assert( (pCsr->ePlan<3)== |
| 15283 (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE) |
| 15284 ); |
| 15285 assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) ); |
| 15286 |
| 15287 if( pCsr->ePlan<3 ){ |
| 15288 int bSkip = 0; |
| 15289 if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; |
| 15290 rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); |
| 15291 CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr)); |
| 15292 fts5CsrNewrow(pCsr); |
| 15293 }else{ |
| 15294 switch( pCsr->ePlan ){ |
| 15295 case FTS5_PLAN_SPECIAL: { |
| 15296 CsrFlagSet(pCsr, FTS5CSR_EOF); |
| 15297 rc = SQLITE_OK; |
| 15298 break; |
| 15299 } |
| 15300 |
| 15301 case FTS5_PLAN_SORTED_MATCH: { |
| 15302 rc = fts5SorterNext(pCsr); |
| 15303 break; |
| 15304 } |
| 15305 |
| 15306 default: |
| 15307 rc = sqlite3_step(pCsr->pStmt); |
| 15308 if( rc!=SQLITE_ROW ){ |
| 15309 CsrFlagSet(pCsr, FTS5CSR_EOF); |
| 15310 rc = sqlite3_reset(pCsr->pStmt); |
| 15311 }else{ |
| 15312 rc = SQLITE_OK; |
| 15313 } |
| 15314 break; |
| 15315 } |
| 15316 } |
| 15317 |
| 15318 return rc; |
| 15319 } |
| 15320 |
| 15321 |
| 15322 static int fts5PrepareStatement( |
| 15323 sqlite3_stmt **ppStmt, |
| 15324 Fts5Config *pConfig, |
| 15325 const char *zFmt, |
| 15326 ... |
| 15327 ){ |
| 15328 sqlite3_stmt *pRet = 0; |
| 15329 int rc; |
| 15330 char *zSql; |
| 15331 va_list ap; |
| 15332 |
| 15333 va_start(ap, zFmt); |
| 15334 zSql = sqlite3_vmprintf(zFmt, ap); |
| 15335 if( zSql==0 ){ |
| 15336 rc = SQLITE_NOMEM; |
| 15337 }else{ |
| 15338 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pRet, 0); |
| 15339 if( rc!=SQLITE_OK ){ |
| 15340 *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db)); |
| 15341 } |
| 15342 sqlite3_free(zSql); |
| 15343 } |
| 15344 |
| 15345 va_end(ap); |
| 15346 *ppStmt = pRet; |
| 15347 return rc; |
| 15348 } |
| 15349 |
| 15350 static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ |
| 15351 Fts5Config *pConfig = pTab->pConfig; |
| 15352 Fts5Sorter *pSorter; |
| 15353 int nPhrase; |
| 15354 int nByte; |
| 15355 int rc; |
| 15356 const char *zRank = pCsr->zRank; |
| 15357 const char *zRankArgs = pCsr->zRankArgs; |
| 15358 |
| 15359 nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
| 15360 nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1); |
| 15361 pSorter = (Fts5Sorter*)sqlite3_malloc(nByte); |
| 15362 if( pSorter==0 ) return SQLITE_NOMEM; |
| 15363 memset(pSorter, 0, nByte); |
| 15364 pSorter->nIdx = nPhrase; |
| 15365 |
| 15366 /* TODO: It would be better to have some system for reusing statement |
| 15367 ** handles here, rather than preparing a new one for each query. But that |
| 15368 ** is not possible as SQLite reference counts the virtual table objects. |
| 15369 ** And since the statement required here reads from this very virtual |
| 15370 ** table, saving it creates a circular reference. |
| 15371 ** |
| 15372 ** If SQLite a built-in statement cache, this wouldn't be a problem. */ |
| 15373 rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, |
| 15374 "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", |
| 15375 pConfig->zDb, pConfig->zName, zRank, pConfig->zName, |
| 15376 (zRankArgs ? ", " : ""), |
| 15377 (zRankArgs ? zRankArgs : ""), |
| 15378 bDesc ? "DESC" : "ASC" |
| 15379 ); |
| 15380 |
| 15381 pCsr->pSorter = pSorter; |
| 15382 if( rc==SQLITE_OK ){ |
| 15383 assert( pTab->pSortCsr==0 ); |
| 15384 pTab->pSortCsr = pCsr; |
| 15385 rc = fts5SorterNext(pCsr); |
| 15386 pTab->pSortCsr = 0; |
| 15387 } |
| 15388 |
| 15389 if( rc!=SQLITE_OK ){ |
| 15390 sqlite3_finalize(pSorter->pStmt); |
| 15391 sqlite3_free(pSorter); |
| 15392 pCsr->pSorter = 0; |
| 15393 } |
| 15394 |
| 15395 return rc; |
| 15396 } |
| 15397 |
| 15398 static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ |
| 15399 int rc; |
| 15400 Fts5Expr *pExpr = pCsr->pExpr; |
| 15401 rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc); |
| 15402 if( sqlite3Fts5ExprEof(pExpr) ){ |
| 15403 CsrFlagSet(pCsr, FTS5CSR_EOF); |
| 15404 } |
| 15405 fts5CsrNewrow(pCsr); |
| 15406 return rc; |
| 15407 } |
| 15408 |
| 15409 /* |
| 15410 ** Process a "special" query. A special query is identified as one with a |
| 15411 ** MATCH expression that begins with a '*' character. The remainder of |
| 15412 ** the text passed to the MATCH operator are used as the special query |
| 15413 ** parameters. |
| 15414 */ |
| 15415 static int fts5SpecialMatch( |
| 15416 Fts5Table *pTab, |
| 15417 Fts5Cursor *pCsr, |
| 15418 const char *zQuery |
| 15419 ){ |
| 15420 int rc = SQLITE_OK; /* Return code */ |
| 15421 const char *z = zQuery; /* Special query text */ |
| 15422 int n; /* Number of bytes in text at z */ |
| 15423 |
| 15424 while( z[0]==' ' ) z++; |
| 15425 for(n=0; z[n] && z[n]!=' '; n++); |
| 15426 |
| 15427 assert( pTab->base.zErrMsg==0 ); |
| 15428 pCsr->ePlan = FTS5_PLAN_SPECIAL; |
| 15429 |
| 15430 if( 0==sqlite3_strnicmp("reads", z, n) ){ |
| 15431 pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex); |
| 15432 } |
| 15433 else if( 0==sqlite3_strnicmp("id", z, n) ){ |
| 15434 pCsr->iSpecial = pCsr->iCsrId; |
| 15435 } |
| 15436 else{ |
| 15437 /* An unrecognized directive. Return an error message. */ |
| 15438 pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z); |
| 15439 rc = SQLITE_ERROR; |
| 15440 } |
| 15441 |
| 15442 return rc; |
| 15443 } |
| 15444 |
| 15445 /* |
| 15446 ** Search for an auxiliary function named zName that can be used with table |
| 15447 ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary |
| 15448 ** structure. Otherwise, if no such function exists, return NULL. |
| 15449 */ |
| 15450 static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){ |
| 15451 Fts5Auxiliary *pAux; |
| 15452 |
| 15453 for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ |
| 15454 if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux; |
| 15455 } |
| 15456 |
| 15457 /* No function of the specified name was found. Return 0. */ |
| 15458 return 0; |
| 15459 } |
| 15460 |
| 15461 |
| 15462 static int fts5FindRankFunction(Fts5Cursor *pCsr){ |
| 15463 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 15464 Fts5Config *pConfig = pTab->pConfig; |
| 15465 int rc = SQLITE_OK; |
| 15466 Fts5Auxiliary *pAux = 0; |
| 15467 const char *zRank = pCsr->zRank; |
| 15468 const char *zRankArgs = pCsr->zRankArgs; |
| 15469 |
| 15470 if( zRankArgs ){ |
| 15471 char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); |
| 15472 if( zSql ){ |
| 15473 sqlite3_stmt *pStmt = 0; |
| 15474 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0); |
| 15475 sqlite3_free(zSql); |
| 15476 assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 ); |
| 15477 if( rc==SQLITE_OK ){ |
| 15478 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 15479 int nByte; |
| 15480 pCsr->nRankArg = sqlite3_column_count(pStmt); |
| 15481 nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; |
| 15482 pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); |
| 15483 if( rc==SQLITE_OK ){ |
| 15484 int i; |
| 15485 for(i=0; i<pCsr->nRankArg; i++){ |
| 15486 pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i); |
| 15487 } |
| 15488 } |
| 15489 pCsr->pRankArgStmt = pStmt; |
| 15490 }else{ |
| 15491 rc = sqlite3_finalize(pStmt); |
| 15492 assert( rc!=SQLITE_OK ); |
| 15493 } |
| 15494 } |
| 15495 } |
| 15496 } |
| 15497 |
| 15498 if( rc==SQLITE_OK ){ |
| 15499 pAux = fts5FindAuxiliary(pTab, zRank); |
| 15500 if( pAux==0 ){ |
| 15501 assert( pTab->base.zErrMsg==0 ); |
| 15502 pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank); |
| 15503 rc = SQLITE_ERROR; |
| 15504 } |
| 15505 } |
| 15506 |
| 15507 pCsr->pRank = pAux; |
| 15508 return rc; |
| 15509 } |
| 15510 |
| 15511 |
| 15512 static int fts5CursorParseRank( |
| 15513 Fts5Config *pConfig, |
| 15514 Fts5Cursor *pCsr, |
| 15515 sqlite3_value *pRank |
| 15516 ){ |
| 15517 int rc = SQLITE_OK; |
| 15518 if( pRank ){ |
| 15519 const char *z = (const char*)sqlite3_value_text(pRank); |
| 15520 char *zRank = 0; |
| 15521 char *zRankArgs = 0; |
| 15522 |
| 15523 if( z==0 ){ |
| 15524 if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR; |
| 15525 }else{ |
| 15526 rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); |
| 15527 } |
| 15528 if( rc==SQLITE_OK ){ |
| 15529 pCsr->zRank = zRank; |
| 15530 pCsr->zRankArgs = zRankArgs; |
| 15531 CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); |
| 15532 }else if( rc==SQLITE_ERROR ){ |
| 15533 pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( |
| 15534 "parse error in rank function: %s", z |
| 15535 ); |
| 15536 } |
| 15537 }else{ |
| 15538 if( pConfig->zRank ){ |
| 15539 pCsr->zRank = (char*)pConfig->zRank; |
| 15540 pCsr->zRankArgs = (char*)pConfig->zRankArgs; |
| 15541 }else{ |
| 15542 pCsr->zRank = (char*)FTS5_DEFAULT_RANK; |
| 15543 pCsr->zRankArgs = 0; |
| 15544 } |
| 15545 } |
| 15546 return rc; |
| 15547 } |
| 15548 |
| 15549 static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ |
| 15550 if( pVal ){ |
| 15551 int eType = sqlite3_value_numeric_type(pVal); |
| 15552 if( eType==SQLITE_INTEGER ){ |
| 15553 return sqlite3_value_int64(pVal); |
| 15554 } |
| 15555 } |
| 15556 return iDefault; |
| 15557 } |
| 15558 |
| 15559 /* |
| 15560 ** This is the xFilter interface for the virtual table. See |
| 15561 ** the virtual table xFilter method documentation for additional |
| 15562 ** information. |
| 15563 ** |
| 15564 ** There are three possible query strategies: |
| 15565 ** |
| 15566 ** 1. Full-text search using a MATCH operator. |
| 15567 ** 2. A by-rowid lookup. |
| 15568 ** 3. A full-table scan. |
| 15569 */ |
| 15570 static int fts5FilterMethod( |
| 15571 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 15572 int idxNum, /* Strategy index */ |
| 15573 const char *zUnused, /* Unused */ |
| 15574 int nVal, /* Number of elements in apVal */ |
| 15575 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 15576 ){ |
| 15577 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); |
| 15578 Fts5Config *pConfig = pTab->pConfig; |
| 15579 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 15580 int rc = SQLITE_OK; /* Error code */ |
| 15581 int iVal = 0; /* Counter for apVal[] */ |
| 15582 int bDesc; /* True if ORDER BY [rank|rowid] DESC */ |
| 15583 int bOrderByRank; /* True if ORDER BY rank */ |
| 15584 sqlite3_value *pMatch = 0; /* <tbl> MATCH ? expression (or NULL) */ |
| 15585 sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ |
| 15586 sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ |
| 15587 sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ |
| 15588 sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ |
| 15589 char **pzErrmsg = pConfig->pzErrmsg; |
| 15590 |
| 15591 UNUSED_PARAM(zUnused); |
| 15592 UNUSED_PARAM(nVal); |
| 15593 |
| 15594 if( pCsr->ePlan ){ |
| 15595 fts5FreeCursorComponents(pCsr); |
| 15596 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); |
| 15597 } |
| 15598 |
| 15599 assert( pCsr->pStmt==0 ); |
| 15600 assert( pCsr->pExpr==0 ); |
| 15601 assert( pCsr->csrflags==0 ); |
| 15602 assert( pCsr->pRank==0 ); |
| 15603 assert( pCsr->zRank==0 ); |
| 15604 assert( pCsr->zRankArgs==0 ); |
| 15605 |
| 15606 assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); |
| 15607 pConfig->pzErrmsg = &pTab->base.zErrMsg; |
| 15608 |
| 15609 /* Decode the arguments passed through to this function. |
| 15610 ** |
| 15611 ** Note: The following set of if(...) statements must be in the same |
| 15612 ** order as the corresponding entries in the struct at the top of |
| 15613 ** fts5BestIndexMethod(). */ |
| 15614 if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++]; |
| 15615 if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++]; |
| 15616 if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++]; |
| 15617 if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++]; |
| 15618 if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++]; |
| 15619 assert( iVal==nVal ); |
| 15620 bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0); |
| 15621 pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0); |
| 15622 |
| 15623 /* Set the cursor upper and lower rowid limits. Only some strategies |
| 15624 ** actually use them. This is ok, as the xBestIndex() method leaves the |
| 15625 ** sqlite3_index_constraint.omit flag clear for range constraints |
| 15626 ** on the rowid field. */ |
| 15627 if( pRowidEq ){ |
| 15628 pRowidLe = pRowidGe = pRowidEq; |
| 15629 } |
| 15630 if( bDesc ){ |
| 15631 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); |
| 15632 pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); |
| 15633 }else{ |
| 15634 pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); |
| 15635 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); |
| 15636 } |
| 15637 |
| 15638 if( pTab->pSortCsr ){ |
| 15639 /* If pSortCsr is non-NULL, then this call is being made as part of |
| 15640 ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is |
| 15641 ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will |
| 15642 ** return results to the user for this query. The current cursor |
| 15643 ** (pCursor) is used to execute the query issued by function |
| 15644 ** fts5CursorFirstSorted() above. */ |
| 15645 assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 ); |
| 15646 assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 ); |
| 15647 assert( pCsr->iLastRowid==LARGEST_INT64 ); |
| 15648 assert( pCsr->iFirstRowid==SMALLEST_INT64 ); |
| 15649 pCsr->ePlan = FTS5_PLAN_SOURCE; |
| 15650 pCsr->pExpr = pTab->pSortCsr->pExpr; |
| 15651 rc = fts5CursorFirst(pTab, pCsr, bDesc); |
| 15652 }else if( pMatch ){ |
| 15653 const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); |
| 15654 if( zExpr==0 ) zExpr = ""; |
| 15655 |
| 15656 rc = fts5CursorParseRank(pConfig, pCsr, pRank); |
| 15657 if( rc==SQLITE_OK ){ |
| 15658 if( zExpr[0]=='*' ){ |
| 15659 /* The user has issued a query of the form "MATCH '*...'". This |
| 15660 ** indicates that the MATCH expression is not a full text query, |
| 15661 ** but a request for an internal parameter. */ |
| 15662 rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); |
| 15663 }else{ |
| 15664 char **pzErr = &pTab->base.zErrMsg; |
| 15665 rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr); |
| 15666 if( rc==SQLITE_OK ){ |
| 15667 if( bOrderByRank ){ |
| 15668 pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; |
| 15669 rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); |
| 15670 }else{ |
| 15671 pCsr->ePlan = FTS5_PLAN_MATCH; |
| 15672 rc = fts5CursorFirst(pTab, pCsr, bDesc); |
| 15673 } |
| 15674 } |
| 15675 } |
| 15676 } |
| 15677 }else if( pConfig->zContent==0 ){ |
| 15678 *pConfig->pzErrmsg = sqlite3_mprintf( |
| 15679 "%s: table does not support scanning", pConfig->zName |
| 15680 ); |
| 15681 rc = SQLITE_ERROR; |
| 15682 }else{ |
| 15683 /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup |
| 15684 ** by rowid (ePlan==FTS5_PLAN_ROWID). */ |
| 15685 pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN); |
| 15686 rc = sqlite3Fts5StorageStmt( |
| 15687 pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg |
| 15688 ); |
| 15689 if( rc==SQLITE_OK ){ |
| 15690 if( pCsr->ePlan==FTS5_PLAN_ROWID ){ |
| 15691 sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); |
| 15692 }else{ |
| 15693 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); |
| 15694 sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); |
| 15695 } |
| 15696 rc = fts5NextMethod(pCursor); |
| 15697 } |
| 15698 } |
| 15699 |
| 15700 pConfig->pzErrmsg = pzErrmsg; |
| 15701 return rc; |
| 15702 } |
| 15703 |
| 15704 /* |
| 15705 ** This is the xEof method of the virtual table. SQLite calls this |
| 15706 ** routine to find out if it has reached the end of a result set. |
| 15707 */ |
| 15708 static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ |
| 15709 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 15710 return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); |
| 15711 } |
| 15712 |
| 15713 /* |
| 15714 ** Return the rowid that the cursor currently points to. |
| 15715 */ |
| 15716 static i64 fts5CursorRowid(Fts5Cursor *pCsr){ |
| 15717 assert( pCsr->ePlan==FTS5_PLAN_MATCH |
| 15718 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH |
| 15719 || pCsr->ePlan==FTS5_PLAN_SOURCE |
| 15720 ); |
| 15721 if( pCsr->pSorter ){ |
| 15722 return pCsr->pSorter->iRowid; |
| 15723 }else{ |
| 15724 return sqlite3Fts5ExprRowid(pCsr->pExpr); |
| 15725 } |
| 15726 } |
| 15727 |
| 15728 /* |
| 15729 ** This is the xRowid method. The SQLite core calls this routine to |
| 15730 ** retrieve the rowid for the current row of the result set. fts5 |
| 15731 ** exposes %_content.rowid as the rowid for the virtual table. The |
| 15732 ** rowid should be written to *pRowid. |
| 15733 */ |
| 15734 static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |
| 15735 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 15736 int ePlan = pCsr->ePlan; |
| 15737 |
| 15738 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); |
| 15739 switch( ePlan ){ |
| 15740 case FTS5_PLAN_SPECIAL: |
| 15741 *pRowid = 0; |
| 15742 break; |
| 15743 |
| 15744 case FTS5_PLAN_SOURCE: |
| 15745 case FTS5_PLAN_MATCH: |
| 15746 case FTS5_PLAN_SORTED_MATCH: |
| 15747 *pRowid = fts5CursorRowid(pCsr); |
| 15748 break; |
| 15749 |
| 15750 default: |
| 15751 *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); |
| 15752 break; |
| 15753 } |
| 15754 |
| 15755 return SQLITE_OK; |
| 15756 } |
| 15757 |
| 15758 /* |
| 15759 ** If the cursor requires seeking (bSeekRequired flag is set), seek it. |
| 15760 ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. |
| 15761 ** |
| 15762 ** If argument bErrormsg is true and an error occurs, an error message may |
| 15763 ** be left in sqlite3_vtab.zErrMsg. |
| 15764 */ |
| 15765 static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ |
| 15766 int rc = SQLITE_OK; |
| 15767 |
| 15768 /* If the cursor does not yet have a statement handle, obtain one now. */ |
| 15769 if( pCsr->pStmt==0 ){ |
| 15770 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 15771 int eStmt = fts5StmtType(pCsr); |
| 15772 rc = sqlite3Fts5StorageStmt( |
| 15773 pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0) |
| 15774 ); |
| 15775 assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 ); |
| 15776 assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); |
| 15777 } |
| 15778 |
| 15779 if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ |
| 15780 assert( pCsr->pExpr ); |
| 15781 sqlite3_reset(pCsr->pStmt); |
| 15782 sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); |
| 15783 rc = sqlite3_step(pCsr->pStmt); |
| 15784 if( rc==SQLITE_ROW ){ |
| 15785 rc = SQLITE_OK; |
| 15786 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT); |
| 15787 }else{ |
| 15788 rc = sqlite3_reset(pCsr->pStmt); |
| 15789 if( rc==SQLITE_OK ){ |
| 15790 rc = FTS5_CORRUPT; |
| 15791 } |
| 15792 } |
| 15793 } |
| 15794 return rc; |
| 15795 } |
| 15796 |
| 15797 static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){ |
| 15798 va_list ap; /* ... printf arguments */ |
| 15799 va_start(ap, zFormat); |
| 15800 assert( p->base.zErrMsg==0 ); |
| 15801 p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap); |
| 15802 va_end(ap); |
| 15803 } |
| 15804 |
| 15805 /* |
| 15806 ** This function is called to handle an FTS INSERT command. In other words, |
| 15807 ** an INSERT statement of the form: |
| 15808 ** |
| 15809 ** INSERT INTO fts(fts) VALUES($pCmd) |
| 15810 ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) |
| 15811 ** |
| 15812 ** Argument pVal is the value assigned to column "fts" by the INSERT |
| 15813 ** statement. This function returns SQLITE_OK if successful, or an SQLite |
| 15814 ** error code if an error occurs. |
| 15815 ** |
| 15816 ** The commands implemented by this function are documented in the "Special |
| 15817 ** INSERT Directives" section of the documentation. It should be updated if |
| 15818 ** more commands are added to this function. |
| 15819 */ |
| 15820 static int fts5SpecialInsert( |
| 15821 Fts5Table *pTab, /* Fts5 table object */ |
| 15822 const char *zCmd, /* Text inserted into table-name column */ |
| 15823 sqlite3_value *pVal /* Value inserted into rank column */ |
| 15824 ){ |
| 15825 Fts5Config *pConfig = pTab->pConfig; |
| 15826 int rc = SQLITE_OK; |
| 15827 int bError = 0; |
| 15828 |
| 15829 if( 0==sqlite3_stricmp("delete-all", zCmd) ){ |
| 15830 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 15831 fts5SetVtabError(pTab, |
| 15832 "'delete-all' may only be used with a " |
| 15833 "contentless or external content fts5 table" |
| 15834 ); |
| 15835 rc = SQLITE_ERROR; |
| 15836 }else{ |
| 15837 rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); |
| 15838 } |
| 15839 }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){ |
| 15840 if( pConfig->eContent==FTS5_CONTENT_NONE ){ |
| 15841 fts5SetVtabError(pTab, |
| 15842 "'rebuild' may not be used with a contentless fts5 table" |
| 15843 ); |
| 15844 rc = SQLITE_ERROR; |
| 15845 }else{ |
| 15846 rc = sqlite3Fts5StorageRebuild(pTab->pStorage); |
| 15847 } |
| 15848 }else if( 0==sqlite3_stricmp("optimize", zCmd) ){ |
| 15849 rc = sqlite3Fts5StorageOptimize(pTab->pStorage); |
| 15850 }else if( 0==sqlite3_stricmp("merge", zCmd) ){ |
| 15851 int nMerge = sqlite3_value_int(pVal); |
| 15852 rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); |
| 15853 }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){ |
| 15854 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); |
| 15855 #ifdef SQLITE_DEBUG |
| 15856 }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){ |
| 15857 pConfig->bPrefixIndex = sqlite3_value_int(pVal); |
| 15858 #endif |
| 15859 }else{ |
| 15860 rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); |
| 15861 if( rc==SQLITE_OK ){ |
| 15862 rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, zCmd, pVal, &bError); |
| 15863 } |
| 15864 if( rc==SQLITE_OK ){ |
| 15865 if( bError ){ |
| 15866 rc = SQLITE_ERROR; |
| 15867 }else{ |
| 15868 rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); |
| 15869 } |
| 15870 } |
| 15871 } |
| 15872 return rc; |
| 15873 } |
| 15874 |
| 15875 static int fts5SpecialDelete( |
| 15876 Fts5Table *pTab, |
| 15877 sqlite3_value **apVal |
| 15878 ){ |
| 15879 int rc = SQLITE_OK; |
| 15880 int eType1 = sqlite3_value_type(apVal[1]); |
| 15881 if( eType1==SQLITE_INTEGER ){ |
| 15882 sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); |
| 15883 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]); |
| 15884 } |
| 15885 return rc; |
| 15886 } |
| 15887 |
| 15888 static void fts5StorageInsert( |
| 15889 int *pRc, |
| 15890 Fts5Table *pTab, |
| 15891 sqlite3_value **apVal, |
| 15892 i64 *piRowid |
| 15893 ){ |
| 15894 int rc = *pRc; |
| 15895 if( rc==SQLITE_OK ){ |
| 15896 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid); |
| 15897 } |
| 15898 if( rc==SQLITE_OK ){ |
| 15899 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); |
| 15900 } |
| 15901 *pRc = rc; |
| 15902 } |
| 15903 |
| 15904 /* |
| 15905 ** This function is the implementation of the xUpdate callback used by |
| 15906 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be |
| 15907 ** inserted, updated or deleted. |
| 15908 ** |
| 15909 ** A delete specifies a single argument - the rowid of the row to remove. |
| 15910 ** |
| 15911 ** Update and insert operations pass: |
| 15912 ** |
| 15913 ** 1. The "old" rowid, or NULL. |
| 15914 ** 2. The "new" rowid. |
| 15915 ** 3. Values for each of the nCol matchable columns. |
| 15916 ** 4. Values for the two hidden columns (<tablename> and "rank"). |
| 15917 */ |
| 15918 static int fts5UpdateMethod( |
| 15919 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 15920 int nArg, /* Size of argument array */ |
| 15921 sqlite3_value **apVal, /* Array of arguments */ |
| 15922 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
| 15923 ){ |
| 15924 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 15925 Fts5Config *pConfig = pTab->pConfig; |
| 15926 int eType0; /* value_type() of apVal[0] */ |
| 15927 int rc = SQLITE_OK; /* Return code */ |
| 15928 |
| 15929 /* A transaction must be open when this is called. */ |
| 15930 assert( pTab->ts.eState==1 ); |
| 15931 |
| 15932 assert( pVtab->zErrMsg==0 ); |
| 15933 assert( nArg==1 || nArg==(2+pConfig->nCol+2) ); |
| 15934 assert( nArg==1 |
| 15935 || sqlite3_value_type(apVal[1])==SQLITE_INTEGER |
| 15936 || sqlite3_value_type(apVal[1])==SQLITE_NULL |
| 15937 ); |
| 15938 assert( pTab->pConfig->pzErrmsg==0 ); |
| 15939 pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; |
| 15940 |
| 15941 /* Put any active cursors into REQUIRE_SEEK state. */ |
| 15942 fts5TripCursors(pTab); |
| 15943 |
| 15944 eType0 = sqlite3_value_type(apVal[0]); |
| 15945 if( eType0==SQLITE_NULL |
| 15946 && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL |
| 15947 ){ |
| 15948 /* A "special" INSERT op. These are handled separately. */ |
| 15949 const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]); |
| 15950 if( pConfig->eContent!=FTS5_CONTENT_NORMAL |
| 15951 && 0==sqlite3_stricmp("delete", z) |
| 15952 ){ |
| 15953 rc = fts5SpecialDelete(pTab, apVal); |
| 15954 }else{ |
| 15955 rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); |
| 15956 } |
| 15957 }else{ |
| 15958 /* A regular INSERT, UPDATE or DELETE statement. The trick here is that |
| 15959 ** any conflict on the rowid value must be detected before any |
| 15960 ** modifications are made to the database file. There are 4 cases: |
| 15961 ** |
| 15962 ** 1) DELETE |
| 15963 ** 2) UPDATE (rowid not modified) |
| 15964 ** 3) UPDATE (rowid modified) |
| 15965 ** 4) INSERT |
| 15966 ** |
| 15967 ** Cases 3 and 4 may violate the rowid constraint. |
| 15968 */ |
| 15969 int eConflict = SQLITE_ABORT; |
| 15970 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 15971 eConflict = sqlite3_vtab_on_conflict(pConfig->db); |
| 15972 } |
| 15973 |
| 15974 assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); |
| 15975 assert( nArg!=1 || eType0==SQLITE_INTEGER ); |
| 15976 |
| 15977 /* Filter out attempts to run UPDATE or DELETE on contentless tables. |
| 15978 ** This is not suported. */ |
| 15979 if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){ |
| 15980 pTab->base.zErrMsg = sqlite3_mprintf( |
| 15981 "cannot %s contentless fts5 table: %s", |
| 15982 (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName |
| 15983 ); |
| 15984 rc = SQLITE_ERROR; |
| 15985 } |
| 15986 |
| 15987 /* DELETE */ |
| 15988 else if( nArg==1 ){ |
| 15989 i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ |
| 15990 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0); |
| 15991 } |
| 15992 |
| 15993 /* INSERT */ |
| 15994 else if( eType0!=SQLITE_INTEGER ){ |
| 15995 /* If this is a REPLACE, first remove the current entry (if any) */ |
| 15996 if( eConflict==SQLITE_REPLACE |
| 15997 && sqlite3_value_type(apVal[1])==SQLITE_INTEGER |
| 15998 ){ |
| 15999 i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */ |
| 16000 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); |
| 16001 } |
| 16002 fts5StorageInsert(&rc, pTab, apVal, pRowid); |
| 16003 } |
| 16004 |
| 16005 /* UPDATE */ |
| 16006 else{ |
| 16007 i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ |
| 16008 i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ |
| 16009 if( iOld!=iNew ){ |
| 16010 if( eConflict==SQLITE_REPLACE ){ |
| 16011 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
| 16012 if( rc==SQLITE_OK ){ |
| 16013 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); |
| 16014 } |
| 16015 fts5StorageInsert(&rc, pTab, apVal, pRowid); |
| 16016 }else{ |
| 16017 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid); |
| 16018 if( rc==SQLITE_OK ){ |
| 16019 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
| 16020 } |
| 16021 if( rc==SQLITE_OK ){ |
| 16022 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *pRowid); |
| 16023 } |
| 16024 } |
| 16025 }else{ |
| 16026 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
| 16027 fts5StorageInsert(&rc, pTab, apVal, pRowid); |
| 16028 } |
| 16029 } |
| 16030 } |
| 16031 |
| 16032 pTab->pConfig->pzErrmsg = 0; |
| 16033 return rc; |
| 16034 } |
| 16035 |
| 16036 /* |
| 16037 ** Implementation of xSync() method. |
| 16038 */ |
| 16039 static int fts5SyncMethod(sqlite3_vtab *pVtab){ |
| 16040 int rc; |
| 16041 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16042 fts5CheckTransactionState(pTab, FTS5_SYNC, 0); |
| 16043 pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; |
| 16044 fts5TripCursors(pTab); |
| 16045 rc = sqlite3Fts5StorageSync(pTab->pStorage, 1); |
| 16046 pTab->pConfig->pzErrmsg = 0; |
| 16047 return rc; |
| 16048 } |
| 16049 |
| 16050 /* |
| 16051 ** Implementation of xBegin() method. |
| 16052 */ |
| 16053 static int fts5BeginMethod(sqlite3_vtab *pVtab){ |
| 16054 fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0); |
| 16055 fts5NewTransaction((Fts5Table*)pVtab); |
| 16056 return SQLITE_OK; |
| 16057 } |
| 16058 |
| 16059 /* |
| 16060 ** Implementation of xCommit() method. This is a no-op. The contents of |
| 16061 ** the pending-terms hash-table have already been flushed into the database |
| 16062 ** by fts5SyncMethod(). |
| 16063 */ |
| 16064 static int fts5CommitMethod(sqlite3_vtab *pVtab){ |
| 16065 UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */ |
| 16066 fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0); |
| 16067 return SQLITE_OK; |
| 16068 } |
| 16069 |
| 16070 /* |
| 16071 ** Implementation of xRollback(). Discard the contents of the pending-terms |
| 16072 ** hash-table. Any changes made to the database are reverted by SQLite. |
| 16073 */ |
| 16074 static int fts5RollbackMethod(sqlite3_vtab *pVtab){ |
| 16075 int rc; |
| 16076 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16077 fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); |
| 16078 rc = sqlite3Fts5StorageRollback(pTab->pStorage); |
| 16079 return rc; |
| 16080 } |
| 16081 |
| 16082 static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); |
| 16083 |
| 16084 static void *fts5ApiUserData(Fts5Context *pCtx){ |
| 16085 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16086 return pCsr->pAux->pUserData; |
| 16087 } |
| 16088 |
| 16089 static int fts5ApiColumnCount(Fts5Context *pCtx){ |
| 16090 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16091 return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; |
| 16092 } |
| 16093 |
| 16094 static int fts5ApiColumnTotalSize( |
| 16095 Fts5Context *pCtx, |
| 16096 int iCol, |
| 16097 sqlite3_int64 *pnToken |
| 16098 ){ |
| 16099 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16100 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 16101 return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); |
| 16102 } |
| 16103 |
| 16104 static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ |
| 16105 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16106 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 16107 return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); |
| 16108 } |
| 16109 |
| 16110 static int fts5ApiTokenize( |
| 16111 Fts5Context *pCtx, |
| 16112 const char *pText, int nText, |
| 16113 void *pUserData, |
| 16114 int (*xToken)(void*, int, const char*, int, int, int) |
| 16115 ){ |
| 16116 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16117 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 16118 return sqlite3Fts5Tokenize( |
| 16119 pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken |
| 16120 ); |
| 16121 } |
| 16122 |
| 16123 static int fts5ApiPhraseCount(Fts5Context *pCtx){ |
| 16124 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16125 return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
| 16126 } |
| 16127 |
| 16128 static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ |
| 16129 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16130 return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); |
| 16131 } |
| 16132 |
| 16133 static int fts5ApiColumnText( |
| 16134 Fts5Context *pCtx, |
| 16135 int iCol, |
| 16136 const char **pz, |
| 16137 int *pn |
| 16138 ){ |
| 16139 int rc = SQLITE_OK; |
| 16140 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16141 if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){ |
| 16142 *pz = 0; |
| 16143 *pn = 0; |
| 16144 }else{ |
| 16145 rc = fts5SeekCursor(pCsr, 0); |
| 16146 if( rc==SQLITE_OK ){ |
| 16147 *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); |
| 16148 *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
| 16149 } |
| 16150 } |
| 16151 return rc; |
| 16152 } |
| 16153 |
| 16154 static int fts5CsrPoslist( |
| 16155 Fts5Cursor *pCsr, |
| 16156 int iPhrase, |
| 16157 const u8 **pa, |
| 16158 int *pn |
| 16159 ){ |
| 16160 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
| 16161 int rc = SQLITE_OK; |
| 16162 int bLive = (pCsr->pSorter==0); |
| 16163 |
| 16164 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){ |
| 16165 |
| 16166 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
| 16167 Fts5PoslistPopulator *aPopulator; |
| 16168 int i; |
| 16169 aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); |
| 16170 if( aPopulator==0 ) rc = SQLITE_NOMEM; |
| 16171 for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){ |
| 16172 int n; const char *z; |
| 16173 rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n); |
| 16174 if( rc==SQLITE_OK ){ |
| 16175 rc = sqlite3Fts5ExprPopulatePoslists( |
| 16176 pConfig, pCsr->pExpr, aPopulator, i, z, n |
| 16177 ); |
| 16178 } |
| 16179 } |
| 16180 sqlite3_free(aPopulator); |
| 16181 |
| 16182 if( pCsr->pSorter ){ |
| 16183 sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); |
| 16184 } |
| 16185 } |
| 16186 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST); |
| 16187 } |
| 16188 |
| 16189 if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 16190 Fts5Sorter *pSorter = pCsr->pSorter; |
| 16191 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); |
| 16192 *pn = pSorter->aIdx[iPhrase] - i1; |
| 16193 *pa = &pSorter->aPoslist[i1]; |
| 16194 }else{ |
| 16195 *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); |
| 16196 } |
| 16197 |
| 16198 return rc; |
| 16199 } |
| 16200 |
| 16201 /* |
| 16202 ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated |
| 16203 ** correctly for the current view. Return SQLITE_OK if successful, or an |
| 16204 ** SQLite error code otherwise. |
| 16205 */ |
| 16206 static int fts5CacheInstArray(Fts5Cursor *pCsr){ |
| 16207 int rc = SQLITE_OK; |
| 16208 Fts5PoslistReader *aIter; /* One iterator for each phrase */ |
| 16209 int nIter; /* Number of iterators/phrases */ |
| 16210 |
| 16211 nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
| 16212 if( pCsr->aInstIter==0 ){ |
| 16213 int nByte = sizeof(Fts5PoslistReader) * nIter; |
| 16214 pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); |
| 16215 } |
| 16216 aIter = pCsr->aInstIter; |
| 16217 |
| 16218 if( aIter ){ |
| 16219 int nInst = 0; /* Number instances seen so far */ |
| 16220 int i; |
| 16221 |
| 16222 /* Initialize all iterators */ |
| 16223 for(i=0; i<nIter && rc==SQLITE_OK; i++){ |
| 16224 const u8 *a; |
| 16225 int n; |
| 16226 rc = fts5CsrPoslist(pCsr, i, &a, &n); |
| 16227 if( rc==SQLITE_OK ){ |
| 16228 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); |
| 16229 } |
| 16230 } |
| 16231 |
| 16232 if( rc==SQLITE_OK ){ |
| 16233 while( 1 ){ |
| 16234 int *aInst; |
| 16235 int iBest = -1; |
| 16236 for(i=0; i<nIter; i++){ |
| 16237 if( (aIter[i].bEof==0) |
| 16238 && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) |
| 16239 ){ |
| 16240 iBest = i; |
| 16241 } |
| 16242 } |
| 16243 if( iBest<0 ) break; |
| 16244 |
| 16245 nInst++; |
| 16246 if( nInst>=pCsr->nInstAlloc ){ |
| 16247 pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; |
| 16248 aInst = (int*)sqlite3_realloc( |
| 16249 pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3 |
| 16250 ); |
| 16251 if( aInst ){ |
| 16252 pCsr->aInst = aInst; |
| 16253 }else{ |
| 16254 rc = SQLITE_NOMEM; |
| 16255 break; |
| 16256 } |
| 16257 } |
| 16258 |
| 16259 aInst = &pCsr->aInst[3 * (nInst-1)]; |
| 16260 aInst[0] = iBest; |
| 16261 aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); |
| 16262 aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); |
| 16263 sqlite3Fts5PoslistReaderNext(&aIter[iBest]); |
| 16264 } |
| 16265 } |
| 16266 |
| 16267 pCsr->nInstCount = nInst; |
| 16268 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST); |
| 16269 } |
| 16270 return rc; |
| 16271 } |
| 16272 |
| 16273 static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ |
| 16274 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16275 int rc = SQLITE_OK; |
| 16276 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 |
| 16277 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ |
| 16278 *pnInst = pCsr->nInstCount; |
| 16279 } |
| 16280 return rc; |
| 16281 } |
| 16282 |
| 16283 static int fts5ApiInst( |
| 16284 Fts5Context *pCtx, |
| 16285 int iIdx, |
| 16286 int *piPhrase, |
| 16287 int *piCol, |
| 16288 int *piOff |
| 16289 ){ |
| 16290 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16291 int rc = SQLITE_OK; |
| 16292 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 |
| 16293 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) |
| 16294 ){ |
| 16295 if( iIdx<0 || iIdx>=pCsr->nInstCount ){ |
| 16296 rc = SQLITE_RANGE; |
| 16297 #if 0 |
| 16298 }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ |
| 16299 *piPhrase = pCsr->aInst[iIdx*3]; |
| 16300 *piCol = pCsr->aInst[iIdx*3 + 2]; |
| 16301 *piOff = -1; |
| 16302 #endif |
| 16303 }else{ |
| 16304 *piPhrase = pCsr->aInst[iIdx*3]; |
| 16305 *piCol = pCsr->aInst[iIdx*3 + 1]; |
| 16306 *piOff = pCsr->aInst[iIdx*3 + 2]; |
| 16307 } |
| 16308 } |
| 16309 return rc; |
| 16310 } |
| 16311 |
| 16312 static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ |
| 16313 return fts5CursorRowid((Fts5Cursor*)pCtx); |
| 16314 } |
| 16315 |
| 16316 static int fts5ColumnSizeCb( |
| 16317 void *pContext, /* Pointer to int */ |
| 16318 int tflags, |
| 16319 const char *pUnused, /* Buffer containing token */ |
| 16320 int nUnused, /* Size of token in bytes */ |
| 16321 int iUnused1, /* Start offset of token */ |
| 16322 int iUnused2 /* End offset of token */ |
| 16323 ){ |
| 16324 int *pCnt = (int*)pContext; |
| 16325 UNUSED_PARAM2(pUnused, nUnused); |
| 16326 UNUSED_PARAM2(iUnused1, iUnused2); |
| 16327 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ |
| 16328 (*pCnt)++; |
| 16329 } |
| 16330 return SQLITE_OK; |
| 16331 } |
| 16332 |
| 16333 static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ |
| 16334 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16335 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 16336 Fts5Config *pConfig = pTab->pConfig; |
| 16337 int rc = SQLITE_OK; |
| 16338 |
| 16339 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ |
| 16340 if( pConfig->bColumnsize ){ |
| 16341 i64 iRowid = fts5CursorRowid(pCsr); |
| 16342 rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); |
| 16343 }else if( pConfig->zContent==0 ){ |
| 16344 int i; |
| 16345 for(i=0; i<pConfig->nCol; i++){ |
| 16346 if( pConfig->abUnindexed[i]==0 ){ |
| 16347 pCsr->aColumnSize[i] = -1; |
| 16348 } |
| 16349 } |
| 16350 }else{ |
| 16351 int i; |
| 16352 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
| 16353 if( pConfig->abUnindexed[i]==0 ){ |
| 16354 const char *z; int n; |
| 16355 void *p = (void*)(&pCsr->aColumnSize[i]); |
| 16356 pCsr->aColumnSize[i] = 0; |
| 16357 rc = fts5ApiColumnText(pCtx, i, &z, &n); |
| 16358 if( rc==SQLITE_OK ){ |
| 16359 rc = sqlite3Fts5Tokenize( |
| 16360 pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb |
| 16361 ); |
| 16362 } |
| 16363 } |
| 16364 } |
| 16365 } |
| 16366 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); |
| 16367 } |
| 16368 if( iCol<0 ){ |
| 16369 int i; |
| 16370 *pnToken = 0; |
| 16371 for(i=0; i<pConfig->nCol; i++){ |
| 16372 *pnToken += pCsr->aColumnSize[i]; |
| 16373 } |
| 16374 }else if( iCol<pConfig->nCol ){ |
| 16375 *pnToken = pCsr->aColumnSize[iCol]; |
| 16376 }else{ |
| 16377 *pnToken = 0; |
| 16378 rc = SQLITE_RANGE; |
| 16379 } |
| 16380 return rc; |
| 16381 } |
| 16382 |
| 16383 /* |
| 16384 ** Implementation of the xSetAuxdata() method. |
| 16385 */ |
| 16386 static int fts5ApiSetAuxdata( |
| 16387 Fts5Context *pCtx, /* Fts5 context */ |
| 16388 void *pPtr, /* Pointer to save as auxdata */ |
| 16389 void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ |
| 16390 ){ |
| 16391 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16392 Fts5Auxdata *pData; |
| 16393 |
| 16394 /* Search through the cursors list of Fts5Auxdata objects for one that |
| 16395 ** corresponds to the currently executing auxiliary function. */ |
| 16396 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ |
| 16397 if( pData->pAux==pCsr->pAux ) break; |
| 16398 } |
| 16399 |
| 16400 if( pData ){ |
| 16401 if( pData->xDelete ){ |
| 16402 pData->xDelete(pData->pPtr); |
| 16403 } |
| 16404 }else{ |
| 16405 int rc = SQLITE_OK; |
| 16406 pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); |
| 16407 if( pData==0 ){ |
| 16408 if( xDelete ) xDelete(pPtr); |
| 16409 return rc; |
| 16410 } |
| 16411 pData->pAux = pCsr->pAux; |
| 16412 pData->pNext = pCsr->pAuxdata; |
| 16413 pCsr->pAuxdata = pData; |
| 16414 } |
| 16415 |
| 16416 pData->xDelete = xDelete; |
| 16417 pData->pPtr = pPtr; |
| 16418 return SQLITE_OK; |
| 16419 } |
| 16420 |
| 16421 static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ |
| 16422 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16423 Fts5Auxdata *pData; |
| 16424 void *pRet = 0; |
| 16425 |
| 16426 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ |
| 16427 if( pData->pAux==pCsr->pAux ) break; |
| 16428 } |
| 16429 |
| 16430 if( pData ){ |
| 16431 pRet = pData->pPtr; |
| 16432 if( bClear ){ |
| 16433 pData->pPtr = 0; |
| 16434 pData->xDelete = 0; |
| 16435 } |
| 16436 } |
| 16437 |
| 16438 return pRet; |
| 16439 } |
| 16440 |
| 16441 static void fts5ApiPhraseNext( |
| 16442 Fts5Context *pUnused, |
| 16443 Fts5PhraseIter *pIter, |
| 16444 int *piCol, int *piOff |
| 16445 ){ |
| 16446 UNUSED_PARAM(pUnused); |
| 16447 if( pIter->a>=pIter->b ){ |
| 16448 *piCol = -1; |
| 16449 *piOff = -1; |
| 16450 }else{ |
| 16451 int iVal; |
| 16452 pIter->a += fts5GetVarint32(pIter->a, iVal); |
| 16453 if( iVal==1 ){ |
| 16454 pIter->a += fts5GetVarint32(pIter->a, iVal); |
| 16455 *piCol = iVal; |
| 16456 *piOff = 0; |
| 16457 pIter->a += fts5GetVarint32(pIter->a, iVal); |
| 16458 } |
| 16459 *piOff += (iVal-2); |
| 16460 } |
| 16461 } |
| 16462 |
| 16463 static int fts5ApiPhraseFirst( |
| 16464 Fts5Context *pCtx, |
| 16465 int iPhrase, |
| 16466 Fts5PhraseIter *pIter, |
| 16467 int *piCol, int *piOff |
| 16468 ){ |
| 16469 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16470 int n; |
| 16471 int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); |
| 16472 if( rc==SQLITE_OK ){ |
| 16473 pIter->b = &pIter->a[n]; |
| 16474 *piCol = 0; |
| 16475 *piOff = 0; |
| 16476 fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); |
| 16477 } |
| 16478 return rc; |
| 16479 } |
| 16480 |
| 16481 static void fts5ApiPhraseNextColumn( |
| 16482 Fts5Context *pCtx, |
| 16483 Fts5PhraseIter *pIter, |
| 16484 int *piCol |
| 16485 ){ |
| 16486 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16487 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
| 16488 |
| 16489 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
| 16490 if( pIter->a>=pIter->b ){ |
| 16491 *piCol = -1; |
| 16492 }else{ |
| 16493 int iIncr; |
| 16494 pIter->a += fts5GetVarint32(&pIter->a[0], iIncr); |
| 16495 *piCol += (iIncr-2); |
| 16496 } |
| 16497 }else{ |
| 16498 while( 1 ){ |
| 16499 int dummy; |
| 16500 if( pIter->a>=pIter->b ){ |
| 16501 *piCol = -1; |
| 16502 return; |
| 16503 } |
| 16504 if( pIter->a[0]==0x01 ) break; |
| 16505 pIter->a += fts5GetVarint32(pIter->a, dummy); |
| 16506 } |
| 16507 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); |
| 16508 } |
| 16509 } |
| 16510 |
| 16511 static int fts5ApiPhraseFirstColumn( |
| 16512 Fts5Context *pCtx, |
| 16513 int iPhrase, |
| 16514 Fts5PhraseIter *pIter, |
| 16515 int *piCol |
| 16516 ){ |
| 16517 int rc = SQLITE_OK; |
| 16518 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16519 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
| 16520 |
| 16521 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
| 16522 Fts5Sorter *pSorter = pCsr->pSorter; |
| 16523 int n; |
| 16524 if( pSorter ){ |
| 16525 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); |
| 16526 n = pSorter->aIdx[iPhrase] - i1; |
| 16527 pIter->a = &pSorter->aPoslist[i1]; |
| 16528 }else{ |
| 16529 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); |
| 16530 } |
| 16531 if( rc==SQLITE_OK ){ |
| 16532 pIter->b = &pIter->a[n]; |
| 16533 *piCol = 0; |
| 16534 fts5ApiPhraseNextColumn(pCtx, pIter, piCol); |
| 16535 } |
| 16536 }else{ |
| 16537 int n; |
| 16538 rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); |
| 16539 if( rc==SQLITE_OK ){ |
| 16540 pIter->b = &pIter->a[n]; |
| 16541 if( n<=0 ){ |
| 16542 *piCol = -1; |
| 16543 }else if( pIter->a[0]==0x01 ){ |
| 16544 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); |
| 16545 }else{ |
| 16546 *piCol = 0; |
| 16547 } |
| 16548 } |
| 16549 } |
| 16550 |
| 16551 return rc; |
| 16552 } |
| 16553 |
| 16554 |
| 16555 static int fts5ApiQueryPhrase(Fts5Context*, int, void*, |
| 16556 int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) |
| 16557 ); |
| 16558 |
| 16559 static const Fts5ExtensionApi sFts5Api = { |
| 16560 2, /* iVersion */ |
| 16561 fts5ApiUserData, |
| 16562 fts5ApiColumnCount, |
| 16563 fts5ApiRowCount, |
| 16564 fts5ApiColumnTotalSize, |
| 16565 fts5ApiTokenize, |
| 16566 fts5ApiPhraseCount, |
| 16567 fts5ApiPhraseSize, |
| 16568 fts5ApiInstCount, |
| 16569 fts5ApiInst, |
| 16570 fts5ApiRowid, |
| 16571 fts5ApiColumnText, |
| 16572 fts5ApiColumnSize, |
| 16573 fts5ApiQueryPhrase, |
| 16574 fts5ApiSetAuxdata, |
| 16575 fts5ApiGetAuxdata, |
| 16576 fts5ApiPhraseFirst, |
| 16577 fts5ApiPhraseNext, |
| 16578 fts5ApiPhraseFirstColumn, |
| 16579 fts5ApiPhraseNextColumn, |
| 16580 }; |
| 16581 |
| 16582 /* |
| 16583 ** Implementation of API function xQueryPhrase(). |
| 16584 */ |
| 16585 static int fts5ApiQueryPhrase( |
| 16586 Fts5Context *pCtx, |
| 16587 int iPhrase, |
| 16588 void *pUserData, |
| 16589 int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) |
| 16590 ){ |
| 16591 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
| 16592 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
| 16593 int rc; |
| 16594 Fts5Cursor *pNew = 0; |
| 16595 |
| 16596 rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); |
| 16597 if( rc==SQLITE_OK ){ |
| 16598 pNew->ePlan = FTS5_PLAN_MATCH; |
| 16599 pNew->iFirstRowid = SMALLEST_INT64; |
| 16600 pNew->iLastRowid = LARGEST_INT64; |
| 16601 pNew->base.pVtab = (sqlite3_vtab*)pTab; |
| 16602 rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); |
| 16603 } |
| 16604 |
| 16605 if( rc==SQLITE_OK ){ |
| 16606 for(rc = fts5CursorFirst(pTab, pNew, 0); |
| 16607 rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; |
| 16608 rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) |
| 16609 ){ |
| 16610 rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); |
| 16611 if( rc!=SQLITE_OK ){ |
| 16612 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 16613 break; |
| 16614 } |
| 16615 } |
| 16616 } |
| 16617 |
| 16618 fts5CloseMethod((sqlite3_vtab_cursor*)pNew); |
| 16619 return rc; |
| 16620 } |
| 16621 |
| 16622 static void fts5ApiInvoke( |
| 16623 Fts5Auxiliary *pAux, |
| 16624 Fts5Cursor *pCsr, |
| 16625 sqlite3_context *context, |
| 16626 int argc, |
| 16627 sqlite3_value **argv |
| 16628 ){ |
| 16629 assert( pCsr->pAux==0 ); |
| 16630 pCsr->pAux = pAux; |
| 16631 pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); |
| 16632 pCsr->pAux = 0; |
| 16633 } |
| 16634 |
| 16635 static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ |
| 16636 Fts5Cursor *pCsr; |
| 16637 for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
| 16638 if( pCsr->iCsrId==iCsrId ) break; |
| 16639 } |
| 16640 return pCsr; |
| 16641 } |
| 16642 |
| 16643 static void fts5ApiCallback( |
| 16644 sqlite3_context *context, |
| 16645 int argc, |
| 16646 sqlite3_value **argv |
| 16647 ){ |
| 16648 |
| 16649 Fts5Auxiliary *pAux; |
| 16650 Fts5Cursor *pCsr; |
| 16651 i64 iCsrId; |
| 16652 |
| 16653 assert( argc>=1 ); |
| 16654 pAux = (Fts5Auxiliary*)sqlite3_user_data(context); |
| 16655 iCsrId = sqlite3_value_int64(argv[0]); |
| 16656 |
| 16657 pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); |
| 16658 if( pCsr==0 ){ |
| 16659 char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); |
| 16660 sqlite3_result_error(context, zErr, -1); |
| 16661 sqlite3_free(zErr); |
| 16662 }else{ |
| 16663 fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); |
| 16664 } |
| 16665 } |
| 16666 |
| 16667 |
| 16668 /* |
| 16669 ** Given cursor id iId, return a pointer to the corresponding Fts5Index |
| 16670 ** object. Or NULL If the cursor id does not exist. |
| 16671 ** |
| 16672 ** If successful, set *ppConfig to point to the associated config object |
| 16673 ** before returning. |
| 16674 */ |
| 16675 static Fts5Index *sqlite3Fts5IndexFromCsrid( |
| 16676 Fts5Global *pGlobal, /* FTS5 global context for db handle */ |
| 16677 i64 iCsrId, /* Id of cursor to find */ |
| 16678 Fts5Config **ppConfig /* OUT: Configuration object */ |
| 16679 ){ |
| 16680 Fts5Cursor *pCsr; |
| 16681 Fts5Table *pTab; |
| 16682 |
| 16683 pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); |
| 16684 pTab = (Fts5Table*)pCsr->base.pVtab; |
| 16685 *ppConfig = pTab->pConfig; |
| 16686 |
| 16687 return pTab->pIndex; |
| 16688 } |
| 16689 |
| 16690 /* |
| 16691 ** Return a "position-list blob" corresponding to the current position of |
| 16692 ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains |
| 16693 ** the current position-list for each phrase in the query associated with |
| 16694 ** cursor pCsr. |
| 16695 ** |
| 16696 ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is |
| 16697 ** the number of phrases in the query. Following the varints are the |
| 16698 ** concatenated position lists for each phrase, in order. |
| 16699 ** |
| 16700 ** The first varint (if it exists) contains the size of the position list |
| 16701 ** for phrase 0. The second (same disclaimer) contains the size of position |
| 16702 ** list 1. And so on. There is no size field for the final position list, |
| 16703 ** as it can be derived from the total size of the blob. |
| 16704 */ |
| 16705 static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ |
| 16706 int i; |
| 16707 int rc = SQLITE_OK; |
| 16708 int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
| 16709 Fts5Buffer val; |
| 16710 |
| 16711 memset(&val, 0, sizeof(Fts5Buffer)); |
| 16712 switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ |
| 16713 case FTS5_DETAIL_FULL: |
| 16714 |
| 16715 /* Append the varints */ |
| 16716 for(i=0; i<(nPhrase-1); i++){ |
| 16717 const u8 *dummy; |
| 16718 int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); |
| 16719 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); |
| 16720 } |
| 16721 |
| 16722 /* Append the position lists */ |
| 16723 for(i=0; i<nPhrase; i++){ |
| 16724 const u8 *pPoslist; |
| 16725 int nPoslist; |
| 16726 nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); |
| 16727 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); |
| 16728 } |
| 16729 break; |
| 16730 |
| 16731 case FTS5_DETAIL_COLUMNS: |
| 16732 |
| 16733 /* Append the varints */ |
| 16734 for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){ |
| 16735 const u8 *dummy; |
| 16736 int nByte; |
| 16737 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); |
| 16738 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); |
| 16739 } |
| 16740 |
| 16741 /* Append the position lists */ |
| 16742 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ |
| 16743 const u8 *pPoslist; |
| 16744 int nPoslist; |
| 16745 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); |
| 16746 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); |
| 16747 } |
| 16748 break; |
| 16749 |
| 16750 default: |
| 16751 break; |
| 16752 } |
| 16753 |
| 16754 sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); |
| 16755 return rc; |
| 16756 } |
| 16757 |
| 16758 /* |
| 16759 ** This is the xColumn method, called by SQLite to request a value from |
| 16760 ** the row that the supplied cursor currently points to. |
| 16761 */ |
| 16762 static int fts5ColumnMethod( |
| 16763 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 16764 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 16765 int iCol /* Index of column to read value from */ |
| 16766 ){ |
| 16767 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); |
| 16768 Fts5Config *pConfig = pTab->pConfig; |
| 16769 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
| 16770 int rc = SQLITE_OK; |
| 16771 |
| 16772 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); |
| 16773 |
| 16774 if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){ |
| 16775 if( iCol==pConfig->nCol ){ |
| 16776 sqlite3_result_int64(pCtx, pCsr->iSpecial); |
| 16777 } |
| 16778 }else |
| 16779 |
| 16780 if( iCol==pConfig->nCol ){ |
| 16781 /* User is requesting the value of the special column with the same name |
| 16782 ** as the table. Return the cursor integer id number. This value is only |
| 16783 ** useful in that it may be passed as the first argument to an FTS5 |
| 16784 ** auxiliary function. */ |
| 16785 sqlite3_result_int64(pCtx, pCsr->iCsrId); |
| 16786 }else if( iCol==pConfig->nCol+1 ){ |
| 16787 |
| 16788 /* The value of the "rank" column. */ |
| 16789 if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ |
| 16790 fts5PoslistBlob(pCtx, pCsr); |
| 16791 }else if( |
| 16792 pCsr->ePlan==FTS5_PLAN_MATCH |
| 16793 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH |
| 16794 ){ |
| 16795 if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ |
| 16796 fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); |
| 16797 } |
| 16798 } |
| 16799 }else if( !fts5IsContentless(pTab) ){ |
| 16800 rc = fts5SeekCursor(pCsr, 1); |
| 16801 if( rc==SQLITE_OK ){ |
| 16802 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); |
| 16803 } |
| 16804 } |
| 16805 return rc; |
| 16806 } |
| 16807 |
| 16808 |
| 16809 /* |
| 16810 ** This routine implements the xFindFunction method for the FTS3 |
| 16811 ** virtual table. |
| 16812 */ |
| 16813 static int fts5FindFunctionMethod( |
| 16814 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 16815 int nUnused, /* Number of SQL function arguments */ |
| 16816 const char *zName, /* Name of SQL function */ |
| 16817 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ |
| 16818 void **ppArg /* OUT: User data for *pxFunc */ |
| 16819 ){ |
| 16820 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16821 Fts5Auxiliary *pAux; |
| 16822 |
| 16823 UNUSED_PARAM(nUnused); |
| 16824 pAux = fts5FindAuxiliary(pTab, zName); |
| 16825 if( pAux ){ |
| 16826 *pxFunc = fts5ApiCallback; |
| 16827 *ppArg = (void*)pAux; |
| 16828 return 1; |
| 16829 } |
| 16830 |
| 16831 /* No function of the specified name was found. Return 0. */ |
| 16832 return 0; |
| 16833 } |
| 16834 |
| 16835 /* |
| 16836 ** Implementation of FTS5 xRename method. Rename an fts5 table. |
| 16837 */ |
| 16838 static int fts5RenameMethod( |
| 16839 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 16840 const char *zName /* New name of table */ |
| 16841 ){ |
| 16842 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16843 return sqlite3Fts5StorageRename(pTab->pStorage, zName); |
| 16844 } |
| 16845 |
| 16846 /* |
| 16847 ** The xSavepoint() method. |
| 16848 ** |
| 16849 ** Flush the contents of the pending-terms table to disk. |
| 16850 */ |
| 16851 static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 16852 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16853 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
| 16854 fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); |
| 16855 fts5TripCursors(pTab); |
| 16856 return sqlite3Fts5StorageSync(pTab->pStorage, 0); |
| 16857 } |
| 16858 |
| 16859 /* |
| 16860 ** The xRelease() method. |
| 16861 ** |
| 16862 ** This is a no-op. |
| 16863 */ |
| 16864 static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 16865 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16866 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
| 16867 fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); |
| 16868 fts5TripCursors(pTab); |
| 16869 return sqlite3Fts5StorageSync(pTab->pStorage, 0); |
| 16870 } |
| 16871 |
| 16872 /* |
| 16873 ** The xRollbackTo() method. |
| 16874 ** |
| 16875 ** Discard the contents of the pending terms table. |
| 16876 */ |
| 16877 static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 16878 Fts5Table *pTab = (Fts5Table*)pVtab; |
| 16879 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
| 16880 fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); |
| 16881 fts5TripCursors(pTab); |
| 16882 return sqlite3Fts5StorageRollback(pTab->pStorage); |
| 16883 } |
| 16884 |
| 16885 /* |
| 16886 ** Register a new auxiliary function with global context pGlobal. |
| 16887 */ |
| 16888 static int fts5CreateAux( |
| 16889 fts5_api *pApi, /* Global context (one per db handle) */ |
| 16890 const char *zName, /* Name of new function */ |
| 16891 void *pUserData, /* User data for aux. function */ |
| 16892 fts5_extension_function xFunc, /* Aux. function implementation */ |
| 16893 void(*xDestroy)(void*) /* Destructor for pUserData */ |
| 16894 ){ |
| 16895 Fts5Global *pGlobal = (Fts5Global*)pApi; |
| 16896 int rc = sqlite3_overload_function(pGlobal->db, zName, -1); |
| 16897 if( rc==SQLITE_OK ){ |
| 16898 Fts5Auxiliary *pAux; |
| 16899 int nName; /* Size of zName in bytes, including \0 */ |
| 16900 int nByte; /* Bytes of space to allocate */ |
| 16901 |
| 16902 nName = (int)strlen(zName) + 1; |
| 16903 nByte = sizeof(Fts5Auxiliary) + nName; |
| 16904 pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte); |
| 16905 if( pAux ){ |
| 16906 memset(pAux, 0, nByte); |
| 16907 pAux->zFunc = (char*)&pAux[1]; |
| 16908 memcpy(pAux->zFunc, zName, nName); |
| 16909 pAux->pGlobal = pGlobal; |
| 16910 pAux->pUserData = pUserData; |
| 16911 pAux->xFunc = xFunc; |
| 16912 pAux->xDestroy = xDestroy; |
| 16913 pAux->pNext = pGlobal->pAux; |
| 16914 pGlobal->pAux = pAux; |
| 16915 }else{ |
| 16916 rc = SQLITE_NOMEM; |
| 16917 } |
| 16918 } |
| 16919 |
| 16920 return rc; |
| 16921 } |
| 16922 |
| 16923 /* |
| 16924 ** Register a new tokenizer. This is the implementation of the |
| 16925 ** fts5_api.xCreateTokenizer() method. |
| 16926 */ |
| 16927 static int fts5CreateTokenizer( |
| 16928 fts5_api *pApi, /* Global context (one per db handle) */ |
| 16929 const char *zName, /* Name of new function */ |
| 16930 void *pUserData, /* User data for aux. function */ |
| 16931 fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ |
| 16932 void(*xDestroy)(void*) /* Destructor for pUserData */ |
| 16933 ){ |
| 16934 Fts5Global *pGlobal = (Fts5Global*)pApi; |
| 16935 Fts5TokenizerModule *pNew; |
| 16936 int nName; /* Size of zName and its \0 terminator */ |
| 16937 int nByte; /* Bytes of space to allocate */ |
| 16938 int rc = SQLITE_OK; |
| 16939 |
| 16940 nName = (int)strlen(zName) + 1; |
| 16941 nByte = sizeof(Fts5TokenizerModule) + nName; |
| 16942 pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte); |
| 16943 if( pNew ){ |
| 16944 memset(pNew, 0, nByte); |
| 16945 pNew->zName = (char*)&pNew[1]; |
| 16946 memcpy(pNew->zName, zName, nName); |
| 16947 pNew->pUserData = pUserData; |
| 16948 pNew->x = *pTokenizer; |
| 16949 pNew->xDestroy = xDestroy; |
| 16950 pNew->pNext = pGlobal->pTok; |
| 16951 pGlobal->pTok = pNew; |
| 16952 if( pNew->pNext==0 ){ |
| 16953 pGlobal->pDfltTok = pNew; |
| 16954 } |
| 16955 }else{ |
| 16956 rc = SQLITE_NOMEM; |
| 16957 } |
| 16958 |
| 16959 return rc; |
| 16960 } |
| 16961 |
| 16962 static Fts5TokenizerModule *fts5LocateTokenizer( |
| 16963 Fts5Global *pGlobal, |
| 16964 const char *zName |
| 16965 ){ |
| 16966 Fts5TokenizerModule *pMod = 0; |
| 16967 |
| 16968 if( zName==0 ){ |
| 16969 pMod = pGlobal->pDfltTok; |
| 16970 }else{ |
| 16971 for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ |
| 16972 if( sqlite3_stricmp(zName, pMod->zName)==0 ) break; |
| 16973 } |
| 16974 } |
| 16975 |
| 16976 return pMod; |
| 16977 } |
| 16978 |
| 16979 /* |
| 16980 ** Find a tokenizer. This is the implementation of the |
| 16981 ** fts5_api.xFindTokenizer() method. |
| 16982 */ |
| 16983 static int fts5FindTokenizer( |
| 16984 fts5_api *pApi, /* Global context (one per db handle) */ |
| 16985 const char *zName, /* Name of new function */ |
| 16986 void **ppUserData, |
| 16987 fts5_tokenizer *pTokenizer /* Populate this object */ |
| 16988 ){ |
| 16989 int rc = SQLITE_OK; |
| 16990 Fts5TokenizerModule *pMod; |
| 16991 |
| 16992 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); |
| 16993 if( pMod ){ |
| 16994 *pTokenizer = pMod->x; |
| 16995 *ppUserData = pMod->pUserData; |
| 16996 }else{ |
| 16997 memset(pTokenizer, 0, sizeof(fts5_tokenizer)); |
| 16998 rc = SQLITE_ERROR; |
| 16999 } |
| 17000 |
| 17001 return rc; |
| 17002 } |
| 17003 |
| 17004 static int sqlite3Fts5GetTokenizer( |
| 17005 Fts5Global *pGlobal, |
| 17006 const char **azArg, |
| 17007 int nArg, |
| 17008 Fts5Tokenizer **ppTok, |
| 17009 fts5_tokenizer **ppTokApi, |
| 17010 char **pzErr |
| 17011 ){ |
| 17012 Fts5TokenizerModule *pMod; |
| 17013 int rc = SQLITE_OK; |
| 17014 |
| 17015 pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); |
| 17016 if( pMod==0 ){ |
| 17017 assert( nArg>0 ); |
| 17018 rc = SQLITE_ERROR; |
| 17019 *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); |
| 17020 }else{ |
| 17021 rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok); |
| 17022 *ppTokApi = &pMod->x; |
| 17023 if( rc!=SQLITE_OK && pzErr ){ |
| 17024 *pzErr = sqlite3_mprintf("error in tokenizer constructor"); |
| 17025 } |
| 17026 } |
| 17027 |
| 17028 if( rc!=SQLITE_OK ){ |
| 17029 *ppTokApi = 0; |
| 17030 *ppTok = 0; |
| 17031 } |
| 17032 |
| 17033 return rc; |
| 17034 } |
| 17035 |
| 17036 static void fts5ModuleDestroy(void *pCtx){ |
| 17037 Fts5TokenizerModule *pTok, *pNextTok; |
| 17038 Fts5Auxiliary *pAux, *pNextAux; |
| 17039 Fts5Global *pGlobal = (Fts5Global*)pCtx; |
| 17040 |
| 17041 for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ |
| 17042 pNextAux = pAux->pNext; |
| 17043 if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); |
| 17044 sqlite3_free(pAux); |
| 17045 } |
| 17046 |
| 17047 for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ |
| 17048 pNextTok = pTok->pNext; |
| 17049 if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); |
| 17050 sqlite3_free(pTok); |
| 17051 } |
| 17052 |
| 17053 sqlite3_free(pGlobal); |
| 17054 } |
| 17055 |
| 17056 static void fts5Fts5Func( |
| 17057 sqlite3_context *pCtx, /* Function call context */ |
| 17058 int nArg, /* Number of args */ |
| 17059 sqlite3_value **apUnused /* Function arguments */ |
| 17060 ){ |
| 17061 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); |
| 17062 char buf[8]; |
| 17063 UNUSED_PARAM2(nArg, apUnused); |
| 17064 assert( nArg==0 ); |
| 17065 assert( sizeof(buf)>=sizeof(pGlobal) ); |
| 17066 memcpy(buf, (void*)&pGlobal, sizeof(pGlobal)); |
| 17067 sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); |
| 17068 } |
| 17069 |
| 17070 /* |
| 17071 ** Implementation of fts5_source_id() function. |
| 17072 */ |
| 17073 static void fts5SourceIdFunc( |
| 17074 sqlite3_context *pCtx, /* Function call context */ |
| 17075 int nArg, /* Number of args */ |
| 17076 sqlite3_value **apUnused /* Function arguments */ |
| 17077 ){ |
| 17078 assert( nArg==0 ); |
| 17079 UNUSED_PARAM2(nArg, apUnused); |
| 17080 sqlite3_result_text(pCtx, "fts5: 2017-02-13 16:02:40 ada05cfa86ad7f5645450ac7a
2a21c9aa6e57d2c", -1, SQLITE_TRANSIENT); |
| 17081 } |
| 17082 |
| 17083 static int fts5Init(sqlite3 *db){ |
| 17084 static const sqlite3_module fts5Mod = { |
| 17085 /* iVersion */ 2, |
| 17086 /* xCreate */ fts5CreateMethod, |
| 17087 /* xConnect */ fts5ConnectMethod, |
| 17088 /* xBestIndex */ fts5BestIndexMethod, |
| 17089 /* xDisconnect */ fts5DisconnectMethod, |
| 17090 /* xDestroy */ fts5DestroyMethod, |
| 17091 /* xOpen */ fts5OpenMethod, |
| 17092 /* xClose */ fts5CloseMethod, |
| 17093 /* xFilter */ fts5FilterMethod, |
| 17094 /* xNext */ fts5NextMethod, |
| 17095 /* xEof */ fts5EofMethod, |
| 17096 /* xColumn */ fts5ColumnMethod, |
| 17097 /* xRowid */ fts5RowidMethod, |
| 17098 /* xUpdate */ fts5UpdateMethod, |
| 17099 /* xBegin */ fts5BeginMethod, |
| 17100 /* xSync */ fts5SyncMethod, |
| 17101 /* xCommit */ fts5CommitMethod, |
| 17102 /* xRollback */ fts5RollbackMethod, |
| 17103 /* xFindFunction */ fts5FindFunctionMethod, |
| 17104 /* xRename */ fts5RenameMethod, |
| 17105 /* xSavepoint */ fts5SavepointMethod, |
| 17106 /* xRelease */ fts5ReleaseMethod, |
| 17107 /* xRollbackTo */ fts5RollbackToMethod, |
| 17108 }; |
| 17109 |
| 17110 int rc; |
| 17111 Fts5Global *pGlobal = 0; |
| 17112 |
| 17113 pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); |
| 17114 if( pGlobal==0 ){ |
| 17115 rc = SQLITE_NOMEM; |
| 17116 }else{ |
| 17117 void *p = (void*)pGlobal; |
| 17118 memset(pGlobal, 0, sizeof(Fts5Global)); |
| 17119 pGlobal->db = db; |
| 17120 pGlobal->api.iVersion = 2; |
| 17121 pGlobal->api.xCreateFunction = fts5CreateAux; |
| 17122 pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; |
| 17123 pGlobal->api.xFindTokenizer = fts5FindTokenizer; |
| 17124 rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); |
| 17125 if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); |
| 17126 if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); |
| 17127 if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); |
| 17128 if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); |
| 17129 if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db); |
| 17130 if( rc==SQLITE_OK ){ |
| 17131 rc = sqlite3_create_function( |
| 17132 db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 |
| 17133 ); |
| 17134 } |
| 17135 if( rc==SQLITE_OK ){ |
| 17136 rc = sqlite3_create_function( |
| 17137 db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0 |
| 17138 ); |
| 17139 } |
| 17140 } |
| 17141 |
| 17142 /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file |
| 17143 ** fts5_test_mi.c is compiled and linked into the executable. And call |
| 17144 ** its entry point to enable the matchinfo() demo. */ |
| 17145 #ifdef SQLITE_FTS5_ENABLE_TEST_MI |
| 17146 if( rc==SQLITE_OK ){ |
| 17147 extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*); |
| 17148 rc = sqlite3Fts5TestRegisterMatchinfo(db); |
| 17149 } |
| 17150 #endif |
| 17151 |
| 17152 return rc; |
| 17153 } |
| 17154 |
| 17155 /* |
| 17156 ** The following functions are used to register the module with SQLite. If |
| 17157 ** this module is being built as part of the SQLite core (SQLITE_CORE is |
| 17158 ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. |
| 17159 ** |
| 17160 ** Or, if this module is being built as a loadable extension, |
| 17161 ** sqlite3Fts5Init() is omitted and the two standard entry points |
| 17162 ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. |
| 17163 */ |
| 17164 #ifndef SQLITE_CORE |
| 17165 #ifdef _WIN32 |
| 17166 __declspec(dllexport) |
| 17167 #endif |
| 17168 SQLITE_API int sqlite3_fts_init( |
| 17169 sqlite3 *db, |
| 17170 char **pzErrMsg, |
| 17171 const sqlite3_api_routines *pApi |
| 17172 ){ |
| 17173 SQLITE_EXTENSION_INIT2(pApi); |
| 17174 (void)pzErrMsg; /* Unused parameter */ |
| 17175 return fts5Init(db); |
| 17176 } |
| 17177 |
| 17178 #ifdef _WIN32 |
| 17179 __declspec(dllexport) |
| 17180 #endif |
| 17181 SQLITE_API int sqlite3_fts5_init( |
| 17182 sqlite3 *db, |
| 17183 char **pzErrMsg, |
| 17184 const sqlite3_api_routines *pApi |
| 17185 ){ |
| 17186 SQLITE_EXTENSION_INIT2(pApi); |
| 17187 (void)pzErrMsg; /* Unused parameter */ |
| 17188 return fts5Init(db); |
| 17189 } |
| 17190 #else |
| 17191 SQLITE_PRIVATE int sqlite3Fts5Init(sqlite3 *db){ |
| 17192 return fts5Init(db); |
| 17193 } |
| 17194 #endif |
| 17195 |
| 17196 /* |
| 17197 ** 2014 May 31 |
| 17198 ** |
| 17199 ** The author disclaims copyright to this source code. In place of |
| 17200 ** a legal notice, here is a blessing: |
| 17201 ** |
| 17202 ** May you do good and not evil. |
| 17203 ** May you find forgiveness for yourself and forgive others. |
| 17204 ** May you share freely, never taking more than you give. |
| 17205 ** |
| 17206 ****************************************************************************** |
| 17207 ** |
| 17208 */ |
| 17209 |
| 17210 |
| 17211 |
| 17212 /* #include "fts5Int.h" */ |
| 17213 |
| 17214 struct Fts5Storage { |
| 17215 Fts5Config *pConfig; |
| 17216 Fts5Index *pIndex; |
| 17217 int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ |
| 17218 i64 nTotalRow; /* Total number of rows in FTS table */ |
| 17219 i64 *aTotalSize; /* Total sizes of each column */ |
| 17220 sqlite3_stmt *aStmt[11]; |
| 17221 }; |
| 17222 |
| 17223 |
| 17224 #if FTS5_STMT_SCAN_ASC!=0 |
| 17225 # error "FTS5_STMT_SCAN_ASC mismatch" |
| 17226 #endif |
| 17227 #if FTS5_STMT_SCAN_DESC!=1 |
| 17228 # error "FTS5_STMT_SCAN_DESC mismatch" |
| 17229 #endif |
| 17230 #if FTS5_STMT_LOOKUP!=2 |
| 17231 # error "FTS5_STMT_LOOKUP mismatch" |
| 17232 #endif |
| 17233 |
| 17234 #define FTS5_STMT_INSERT_CONTENT 3 |
| 17235 #define FTS5_STMT_REPLACE_CONTENT 4 |
| 17236 #define FTS5_STMT_DELETE_CONTENT 5 |
| 17237 #define FTS5_STMT_REPLACE_DOCSIZE 6 |
| 17238 #define FTS5_STMT_DELETE_DOCSIZE 7 |
| 17239 #define FTS5_STMT_LOOKUP_DOCSIZE 8 |
| 17240 #define FTS5_STMT_REPLACE_CONFIG 9 |
| 17241 #define FTS5_STMT_SCAN 10 |
| 17242 |
| 17243 /* |
| 17244 ** Prepare the two insert statements - Fts5Storage.pInsertContent and |
| 17245 ** Fts5Storage.pInsertDocsize - if they have not already been prepared. |
| 17246 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 17247 ** occurs. |
| 17248 */ |
| 17249 static int fts5StorageGetStmt( |
| 17250 Fts5Storage *p, /* Storage handle */ |
| 17251 int eStmt, /* FTS5_STMT_XXX constant */ |
| 17252 sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ |
| 17253 char **pzErrMsg /* OUT: Error message (if any) */ |
| 17254 ){ |
| 17255 int rc = SQLITE_OK; |
| 17256 |
| 17257 /* If there is no %_docsize table, there should be no requests for |
| 17258 ** statements to operate on it. */ |
| 17259 assert( p->pConfig->bColumnsize || ( |
| 17260 eStmt!=FTS5_STMT_REPLACE_DOCSIZE |
| 17261 && eStmt!=FTS5_STMT_DELETE_DOCSIZE |
| 17262 && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE |
| 17263 )); |
| 17264 |
| 17265 assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) ); |
| 17266 if( p->aStmt[eStmt]==0 ){ |
| 17267 const char *azStmt[] = { |
| 17268 "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", |
| 17269 "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", |
| 17270 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ |
| 17271 |
| 17272 "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ |
| 17273 "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ |
| 17274 "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ |
| 17275 "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */ |
| 17276 "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ |
| 17277 |
| 17278 "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ |
| 17279 |
| 17280 "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ |
| 17281 "SELECT %s FROM %s AS T", /* SCAN */ |
| 17282 }; |
| 17283 Fts5Config *pC = p->pConfig; |
| 17284 char *zSql = 0; |
| 17285 |
| 17286 switch( eStmt ){ |
| 17287 case FTS5_STMT_SCAN: |
| 17288 zSql = sqlite3_mprintf(azStmt[eStmt], |
| 17289 pC->zContentExprlist, pC->zContent |
| 17290 ); |
| 17291 break; |
| 17292 |
| 17293 case FTS5_STMT_SCAN_ASC: |
| 17294 case FTS5_STMT_SCAN_DESC: |
| 17295 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, |
| 17296 pC->zContent, pC->zContentRowid, pC->zContentRowid, |
| 17297 pC->zContentRowid |
| 17298 ); |
| 17299 break; |
| 17300 |
| 17301 case FTS5_STMT_LOOKUP: |
| 17302 zSql = sqlite3_mprintf(azStmt[eStmt], |
| 17303 pC->zContentExprlist, pC->zContent, pC->zContentRowid |
| 17304 ); |
| 17305 break; |
| 17306 |
| 17307 case FTS5_STMT_INSERT_CONTENT: |
| 17308 case FTS5_STMT_REPLACE_CONTENT: { |
| 17309 int nCol = pC->nCol + 1; |
| 17310 char *zBind; |
| 17311 int i; |
| 17312 |
| 17313 zBind = sqlite3_malloc(1 + nCol*2); |
| 17314 if( zBind ){ |
| 17315 for(i=0; i<nCol; i++){ |
| 17316 zBind[i*2] = '?'; |
| 17317 zBind[i*2 + 1] = ','; |
| 17318 } |
| 17319 zBind[i*2-1] = '\0'; |
| 17320 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind); |
| 17321 sqlite3_free(zBind); |
| 17322 } |
| 17323 break; |
| 17324 } |
| 17325 |
| 17326 default: |
| 17327 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName); |
| 17328 break; |
| 17329 } |
| 17330 |
| 17331 if( zSql==0 ){ |
| 17332 rc = SQLITE_NOMEM; |
| 17333 }else{ |
| 17334 rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0); |
| 17335 sqlite3_free(zSql); |
| 17336 if( rc!=SQLITE_OK && pzErrMsg ){ |
| 17337 *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db)); |
| 17338 } |
| 17339 } |
| 17340 } |
| 17341 |
| 17342 *ppStmt = p->aStmt[eStmt]; |
| 17343 sqlite3_reset(*ppStmt); |
| 17344 return rc; |
| 17345 } |
| 17346 |
| 17347 |
| 17348 static int fts5ExecPrintf( |
| 17349 sqlite3 *db, |
| 17350 char **pzErr, |
| 17351 const char *zFormat, |
| 17352 ... |
| 17353 ){ |
| 17354 int rc; |
| 17355 va_list ap; /* ... printf arguments */ |
| 17356 char *zSql; |
| 17357 |
| 17358 va_start(ap, zFormat); |
| 17359 zSql = sqlite3_vmprintf(zFormat, ap); |
| 17360 |
| 17361 if( zSql==0 ){ |
| 17362 rc = SQLITE_NOMEM; |
| 17363 }else{ |
| 17364 rc = sqlite3_exec(db, zSql, 0, 0, pzErr); |
| 17365 sqlite3_free(zSql); |
| 17366 } |
| 17367 |
| 17368 va_end(ap); |
| 17369 return rc; |
| 17370 } |
| 17371 |
| 17372 /* |
| 17373 ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error |
| 17374 ** code otherwise. |
| 17375 */ |
| 17376 static int sqlite3Fts5DropAll(Fts5Config *pConfig){ |
| 17377 int rc = fts5ExecPrintf(pConfig->db, 0, |
| 17378 "DROP TABLE IF EXISTS %Q.'%q_data';" |
| 17379 "DROP TABLE IF EXISTS %Q.'%q_idx';" |
| 17380 "DROP TABLE IF EXISTS %Q.'%q_config';", |
| 17381 pConfig->zDb, pConfig->zName, |
| 17382 pConfig->zDb, pConfig->zName, |
| 17383 pConfig->zDb, pConfig->zName |
| 17384 ); |
| 17385 if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
| 17386 rc = fts5ExecPrintf(pConfig->db, 0, |
| 17387 "DROP TABLE IF EXISTS %Q.'%q_docsize';", |
| 17388 pConfig->zDb, pConfig->zName |
| 17389 ); |
| 17390 } |
| 17391 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 17392 rc = fts5ExecPrintf(pConfig->db, 0, |
| 17393 "DROP TABLE IF EXISTS %Q.'%q_content';", |
| 17394 pConfig->zDb, pConfig->zName |
| 17395 ); |
| 17396 } |
| 17397 return rc; |
| 17398 } |
| 17399 |
| 17400 static void fts5StorageRenameOne( |
| 17401 Fts5Config *pConfig, /* Current FTS5 configuration */ |
| 17402 int *pRc, /* IN/OUT: Error code */ |
| 17403 const char *zTail, /* Tail of table name e.g. "data", "config" */ |
| 17404 const char *zName /* New name of FTS5 table */ |
| 17405 ){ |
| 17406 if( *pRc==SQLITE_OK ){ |
| 17407 *pRc = fts5ExecPrintf(pConfig->db, 0, |
| 17408 "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", |
| 17409 pConfig->zDb, pConfig->zName, zTail, zName, zTail |
| 17410 ); |
| 17411 } |
| 17412 } |
| 17413 |
| 17414 static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ |
| 17415 Fts5Config *pConfig = pStorage->pConfig; |
| 17416 int rc = sqlite3Fts5StorageSync(pStorage, 1); |
| 17417 |
| 17418 fts5StorageRenameOne(pConfig, &rc, "data", zName); |
| 17419 fts5StorageRenameOne(pConfig, &rc, "idx", zName); |
| 17420 fts5StorageRenameOne(pConfig, &rc, "config", zName); |
| 17421 if( pConfig->bColumnsize ){ |
| 17422 fts5StorageRenameOne(pConfig, &rc, "docsize", zName); |
| 17423 } |
| 17424 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 17425 fts5StorageRenameOne(pConfig, &rc, "content", zName); |
| 17426 } |
| 17427 return rc; |
| 17428 } |
| 17429 |
| 17430 /* |
| 17431 ** Create the shadow table named zPost, with definition zDefn. Return |
| 17432 ** SQLITE_OK if successful, or an SQLite error code otherwise. |
| 17433 */ |
| 17434 static int sqlite3Fts5CreateTable( |
| 17435 Fts5Config *pConfig, /* FTS5 configuration */ |
| 17436 const char *zPost, /* Shadow table to create (e.g. "content") */ |
| 17437 const char *zDefn, /* Columns etc. for shadow table */ |
| 17438 int bWithout, /* True for without rowid */ |
| 17439 char **pzErr /* OUT: Error message */ |
| 17440 ){ |
| 17441 int rc; |
| 17442 char *zErr = 0; |
| 17443 |
| 17444 rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", |
| 17445 pConfig->zDb, pConfig->zName, zPost, zDefn, |
| 17446 #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID |
| 17447 bWithout?" WITHOUT ROWID": |
| 17448 #endif |
| 17449 "" |
| 17450 ); |
| 17451 if( zErr ){ |
| 17452 *pzErr = sqlite3_mprintf( |
| 17453 "fts5: error creating shadow table %q_%s: %s", |
| 17454 pConfig->zName, zPost, zErr |
| 17455 ); |
| 17456 sqlite3_free(zErr); |
| 17457 } |
| 17458 |
| 17459 return rc; |
| 17460 } |
| 17461 |
| 17462 /* |
| 17463 ** Open a new Fts5Index handle. If the bCreate argument is true, create |
| 17464 ** and initialize the underlying tables |
| 17465 ** |
| 17466 ** If successful, set *pp to point to the new object and return SQLITE_OK. |
| 17467 ** Otherwise, set *pp to NULL and return an SQLite error code. |
| 17468 */ |
| 17469 static int sqlite3Fts5StorageOpen( |
| 17470 Fts5Config *pConfig, |
| 17471 Fts5Index *pIndex, |
| 17472 int bCreate, |
| 17473 Fts5Storage **pp, |
| 17474 char **pzErr /* OUT: Error message */ |
| 17475 ){ |
| 17476 int rc = SQLITE_OK; |
| 17477 Fts5Storage *p; /* New object */ |
| 17478 int nByte; /* Bytes of space to allocate */ |
| 17479 |
| 17480 nByte = sizeof(Fts5Storage) /* Fts5Storage object */ |
| 17481 + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ |
| 17482 *pp = p = (Fts5Storage*)sqlite3_malloc(nByte); |
| 17483 if( !p ) return SQLITE_NOMEM; |
| 17484 |
| 17485 memset(p, 0, nByte); |
| 17486 p->aTotalSize = (i64*)&p[1]; |
| 17487 p->pConfig = pConfig; |
| 17488 p->pIndex = pIndex; |
| 17489 |
| 17490 if( bCreate ){ |
| 17491 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 17492 int nDefn = 32 + pConfig->nCol*10; |
| 17493 char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); |
| 17494 if( zDefn==0 ){ |
| 17495 rc = SQLITE_NOMEM; |
| 17496 }else{ |
| 17497 int i; |
| 17498 int iOff; |
| 17499 sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); |
| 17500 iOff = (int)strlen(zDefn); |
| 17501 for(i=0; i<pConfig->nCol; i++){ |
| 17502 sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); |
| 17503 iOff += (int)strlen(&zDefn[iOff]); |
| 17504 } |
| 17505 rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); |
| 17506 } |
| 17507 sqlite3_free(zDefn); |
| 17508 } |
| 17509 |
| 17510 if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
| 17511 rc = sqlite3Fts5CreateTable( |
| 17512 pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr |
| 17513 ); |
| 17514 } |
| 17515 if( rc==SQLITE_OK ){ |
| 17516 rc = sqlite3Fts5CreateTable( |
| 17517 pConfig, "config", "k PRIMARY KEY, v", 1, pzErr |
| 17518 ); |
| 17519 } |
| 17520 if( rc==SQLITE_OK ){ |
| 17521 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); |
| 17522 } |
| 17523 } |
| 17524 |
| 17525 if( rc ){ |
| 17526 sqlite3Fts5StorageClose(p); |
| 17527 *pp = 0; |
| 17528 } |
| 17529 return rc; |
| 17530 } |
| 17531 |
| 17532 /* |
| 17533 ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). |
| 17534 */ |
| 17535 static int sqlite3Fts5StorageClose(Fts5Storage *p){ |
| 17536 int rc = SQLITE_OK; |
| 17537 if( p ){ |
| 17538 int i; |
| 17539 |
| 17540 /* Finalize all SQL statements */ |
| 17541 for(i=0; i<ArraySize(p->aStmt); i++){ |
| 17542 sqlite3_finalize(p->aStmt[i]); |
| 17543 } |
| 17544 |
| 17545 sqlite3_free(p); |
| 17546 } |
| 17547 return rc; |
| 17548 } |
| 17549 |
| 17550 typedef struct Fts5InsertCtx Fts5InsertCtx; |
| 17551 struct Fts5InsertCtx { |
| 17552 Fts5Storage *pStorage; |
| 17553 int iCol; |
| 17554 int szCol; /* Size of column value in tokens */ |
| 17555 }; |
| 17556 |
| 17557 /* |
| 17558 ** Tokenization callback used when inserting tokens into the FTS index. |
| 17559 */ |
| 17560 static int fts5StorageInsertCallback( |
| 17561 void *pContext, /* Pointer to Fts5InsertCtx object */ |
| 17562 int tflags, |
| 17563 const char *pToken, /* Buffer containing token */ |
| 17564 int nToken, /* Size of token in bytes */ |
| 17565 int iUnused1, /* Start offset of token */ |
| 17566 int iUnused2 /* End offset of token */ |
| 17567 ){ |
| 17568 Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; |
| 17569 Fts5Index *pIdx = pCtx->pStorage->pIndex; |
| 17570 UNUSED_PARAM2(iUnused1, iUnused2); |
| 17571 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 17572 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ |
| 17573 pCtx->szCol++; |
| 17574 } |
| 17575 return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); |
| 17576 } |
| 17577 |
| 17578 /* |
| 17579 ** If a row with rowid iDel is present in the %_content table, add the |
| 17580 ** delete-markers to the FTS index necessary to delete it. Do not actually |
| 17581 ** remove the %_content row at this time though. |
| 17582 */ |
| 17583 static int fts5StorageDeleteFromIndex( |
| 17584 Fts5Storage *p, |
| 17585 i64 iDel, |
| 17586 sqlite3_value **apVal |
| 17587 ){ |
| 17588 Fts5Config *pConfig = p->pConfig; |
| 17589 sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ |
| 17590 int rc; /* Return code */ |
| 17591 int rc2; /* sqlite3_reset() return code */ |
| 17592 int iCol; |
| 17593 Fts5InsertCtx ctx; |
| 17594 |
| 17595 if( apVal==0 ){ |
| 17596 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); |
| 17597 if( rc!=SQLITE_OK ) return rc; |
| 17598 sqlite3_bind_int64(pSeek, 1, iDel); |
| 17599 if( sqlite3_step(pSeek)!=SQLITE_ROW ){ |
| 17600 return sqlite3_reset(pSeek); |
| 17601 } |
| 17602 } |
| 17603 |
| 17604 ctx.pStorage = p; |
| 17605 ctx.iCol = -1; |
| 17606 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); |
| 17607 for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ |
| 17608 if( pConfig->abUnindexed[iCol-1]==0 ){ |
| 17609 const char *zText; |
| 17610 int nText; |
| 17611 if( pSeek ){ |
| 17612 zText = (const char*)sqlite3_column_text(pSeek, iCol); |
| 17613 nText = sqlite3_column_bytes(pSeek, iCol); |
| 17614 }else{ |
| 17615 zText = (const char*)sqlite3_value_text(apVal[iCol-1]); |
| 17616 nText = sqlite3_value_bytes(apVal[iCol-1]); |
| 17617 } |
| 17618 ctx.szCol = 0; |
| 17619 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, |
| 17620 zText, nText, (void*)&ctx, fts5StorageInsertCallback |
| 17621 ); |
| 17622 p->aTotalSize[iCol-1] -= (i64)ctx.szCol; |
| 17623 } |
| 17624 } |
| 17625 p->nTotalRow--; |
| 17626 |
| 17627 rc2 = sqlite3_reset(pSeek); |
| 17628 if( rc==SQLITE_OK ) rc = rc2; |
| 17629 return rc; |
| 17630 } |
| 17631 |
| 17632 |
| 17633 /* |
| 17634 ** Insert a record into the %_docsize table. Specifically, do: |
| 17635 ** |
| 17636 ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); |
| 17637 ** |
| 17638 ** If there is no %_docsize table (as happens if the columnsize=0 option |
| 17639 ** is specified when the FTS5 table is created), this function is a no-op. |
| 17640 */ |
| 17641 static int fts5StorageInsertDocsize( |
| 17642 Fts5Storage *p, /* Storage module to write to */ |
| 17643 i64 iRowid, /* id value */ |
| 17644 Fts5Buffer *pBuf /* sz value */ |
| 17645 ){ |
| 17646 int rc = SQLITE_OK; |
| 17647 if( p->pConfig->bColumnsize ){ |
| 17648 sqlite3_stmt *pReplace = 0; |
| 17649 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); |
| 17650 if( rc==SQLITE_OK ){ |
| 17651 sqlite3_bind_int64(pReplace, 1, iRowid); |
| 17652 sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); |
| 17653 sqlite3_step(pReplace); |
| 17654 rc = sqlite3_reset(pReplace); |
| 17655 } |
| 17656 } |
| 17657 return rc; |
| 17658 } |
| 17659 |
| 17660 /* |
| 17661 ** Load the contents of the "averages" record from disk into the |
| 17662 ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if |
| 17663 ** argument bCache is true, set the p->bTotalsValid flag to indicate |
| 17664 ** that the contents of aTotalSize[] and nTotalRow are valid until |
| 17665 ** further notice. |
| 17666 ** |
| 17667 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 17668 ** occurs. |
| 17669 */ |
| 17670 static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ |
| 17671 int rc = SQLITE_OK; |
| 17672 if( p->bTotalsValid==0 ){ |
| 17673 rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); |
| 17674 p->bTotalsValid = bCache; |
| 17675 } |
| 17676 return rc; |
| 17677 } |
| 17678 |
| 17679 /* |
| 17680 ** Store the current contents of the p->nTotalRow and p->aTotalSize[] |
| 17681 ** variables in the "averages" record on disk. |
| 17682 ** |
| 17683 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 17684 ** occurs. |
| 17685 */ |
| 17686 static int fts5StorageSaveTotals(Fts5Storage *p){ |
| 17687 int nCol = p->pConfig->nCol; |
| 17688 int i; |
| 17689 Fts5Buffer buf; |
| 17690 int rc = SQLITE_OK; |
| 17691 memset(&buf, 0, sizeof(buf)); |
| 17692 |
| 17693 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); |
| 17694 for(i=0; i<nCol; i++){ |
| 17695 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); |
| 17696 } |
| 17697 if( rc==SQLITE_OK ){ |
| 17698 rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); |
| 17699 } |
| 17700 sqlite3_free(buf.p); |
| 17701 |
| 17702 return rc; |
| 17703 } |
| 17704 |
| 17705 /* |
| 17706 ** Remove a row from the FTS table. |
| 17707 */ |
| 17708 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **ap
Val){ |
| 17709 Fts5Config *pConfig = p->pConfig; |
| 17710 int rc; |
| 17711 sqlite3_stmt *pDel = 0; |
| 17712 |
| 17713 assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 ); |
| 17714 rc = fts5StorageLoadTotals(p, 1); |
| 17715 |
| 17716 /* Delete the index records */ |
| 17717 if( rc==SQLITE_OK ){ |
| 17718 rc = fts5StorageDeleteFromIndex(p, iDel, apVal); |
| 17719 } |
| 17720 |
| 17721 /* Delete the %_docsize record */ |
| 17722 if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
| 17723 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); |
| 17724 if( rc==SQLITE_OK ){ |
| 17725 sqlite3_bind_int64(pDel, 1, iDel); |
| 17726 sqlite3_step(pDel); |
| 17727 rc = sqlite3_reset(pDel); |
| 17728 } |
| 17729 } |
| 17730 |
| 17731 /* Delete the %_content record */ |
| 17732 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 17733 if( rc==SQLITE_OK ){ |
| 17734 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); |
| 17735 } |
| 17736 if( rc==SQLITE_OK ){ |
| 17737 sqlite3_bind_int64(pDel, 1, iDel); |
| 17738 sqlite3_step(pDel); |
| 17739 rc = sqlite3_reset(pDel); |
| 17740 } |
| 17741 } |
| 17742 |
| 17743 /* Write the averages record */ |
| 17744 if( rc==SQLITE_OK ){ |
| 17745 rc = fts5StorageSaveTotals(p); |
| 17746 } |
| 17747 |
| 17748 return rc; |
| 17749 } |
| 17750 |
| 17751 /* |
| 17752 ** Delete all entries in the FTS5 index. |
| 17753 */ |
| 17754 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ |
| 17755 Fts5Config *pConfig = p->pConfig; |
| 17756 int rc; |
| 17757 |
| 17758 /* Delete the contents of the %_data and %_docsize tables. */ |
| 17759 rc = fts5ExecPrintf(pConfig->db, 0, |
| 17760 "DELETE FROM %Q.'%q_data';" |
| 17761 "DELETE FROM %Q.'%q_idx';", |
| 17762 pConfig->zDb, pConfig->zName, |
| 17763 pConfig->zDb, pConfig->zName |
| 17764 ); |
| 17765 if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
| 17766 rc = fts5ExecPrintf(pConfig->db, 0, |
| 17767 "DELETE FROM %Q.'%q_docsize';", |
| 17768 pConfig->zDb, pConfig->zName |
| 17769 ); |
| 17770 } |
| 17771 |
| 17772 /* Reinitialize the %_data table. This call creates the initial structure |
| 17773 ** and averages records. */ |
| 17774 if( rc==SQLITE_OK ){ |
| 17775 rc = sqlite3Fts5IndexReinit(p->pIndex); |
| 17776 } |
| 17777 if( rc==SQLITE_OK ){ |
| 17778 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); |
| 17779 } |
| 17780 return rc; |
| 17781 } |
| 17782 |
| 17783 static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ |
| 17784 Fts5Buffer buf = {0,0,0}; |
| 17785 Fts5Config *pConfig = p->pConfig; |
| 17786 sqlite3_stmt *pScan = 0; |
| 17787 Fts5InsertCtx ctx; |
| 17788 int rc; |
| 17789 |
| 17790 memset(&ctx, 0, sizeof(Fts5InsertCtx)); |
| 17791 ctx.pStorage = p; |
| 17792 rc = sqlite3Fts5StorageDeleteAll(p); |
| 17793 if( rc==SQLITE_OK ){ |
| 17794 rc = fts5StorageLoadTotals(p, 1); |
| 17795 } |
| 17796 |
| 17797 if( rc==SQLITE_OK ){ |
| 17798 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); |
| 17799 } |
| 17800 |
| 17801 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ |
| 17802 i64 iRowid = sqlite3_column_int64(pScan, 0); |
| 17803 |
| 17804 sqlite3Fts5BufferZero(&buf); |
| 17805 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); |
| 17806 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ |
| 17807 ctx.szCol = 0; |
| 17808 if( pConfig->abUnindexed[ctx.iCol]==0 ){ |
| 17809 rc = sqlite3Fts5Tokenize(pConfig, |
| 17810 FTS5_TOKENIZE_DOCUMENT, |
| 17811 (const char*)sqlite3_column_text(pScan, ctx.iCol+1), |
| 17812 sqlite3_column_bytes(pScan, ctx.iCol+1), |
| 17813 (void*)&ctx, |
| 17814 fts5StorageInsertCallback |
| 17815 ); |
| 17816 } |
| 17817 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
| 17818 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; |
| 17819 } |
| 17820 p->nTotalRow++; |
| 17821 |
| 17822 if( rc==SQLITE_OK ){ |
| 17823 rc = fts5StorageInsertDocsize(p, iRowid, &buf); |
| 17824 } |
| 17825 } |
| 17826 sqlite3_free(buf.p); |
| 17827 |
| 17828 /* Write the averages record */ |
| 17829 if( rc==SQLITE_OK ){ |
| 17830 rc = fts5StorageSaveTotals(p); |
| 17831 } |
| 17832 return rc; |
| 17833 } |
| 17834 |
| 17835 static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ |
| 17836 return sqlite3Fts5IndexOptimize(p->pIndex); |
| 17837 } |
| 17838 |
| 17839 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ |
| 17840 return sqlite3Fts5IndexMerge(p->pIndex, nMerge); |
| 17841 } |
| 17842 |
| 17843 static int sqlite3Fts5StorageReset(Fts5Storage *p){ |
| 17844 return sqlite3Fts5IndexReset(p->pIndex); |
| 17845 } |
| 17846 |
| 17847 /* |
| 17848 ** Allocate a new rowid. This is used for "external content" tables when |
| 17849 ** a NULL value is inserted into the rowid column. The new rowid is allocated |
| 17850 ** by inserting a dummy row into the %_docsize table. The dummy will be |
| 17851 ** overwritten later. |
| 17852 ** |
| 17853 ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In |
| 17854 ** this case the user is required to provide a rowid explicitly. |
| 17855 */ |
| 17856 static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ |
| 17857 int rc = SQLITE_MISMATCH; |
| 17858 if( p->pConfig->bColumnsize ){ |
| 17859 sqlite3_stmt *pReplace = 0; |
| 17860 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); |
| 17861 if( rc==SQLITE_OK ){ |
| 17862 sqlite3_bind_null(pReplace, 1); |
| 17863 sqlite3_bind_null(pReplace, 2); |
| 17864 sqlite3_step(pReplace); |
| 17865 rc = sqlite3_reset(pReplace); |
| 17866 } |
| 17867 if( rc==SQLITE_OK ){ |
| 17868 *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); |
| 17869 } |
| 17870 } |
| 17871 return rc; |
| 17872 } |
| 17873 |
| 17874 /* |
| 17875 ** Insert a new row into the FTS content table. |
| 17876 */ |
| 17877 static int sqlite3Fts5StorageContentInsert( |
| 17878 Fts5Storage *p, |
| 17879 sqlite3_value **apVal, |
| 17880 i64 *piRowid |
| 17881 ){ |
| 17882 Fts5Config *pConfig = p->pConfig; |
| 17883 int rc = SQLITE_OK; |
| 17884 |
| 17885 /* Insert the new row into the %_content table. */ |
| 17886 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ |
| 17887 if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ |
| 17888 *piRowid = sqlite3_value_int64(apVal[1]); |
| 17889 }else{ |
| 17890 rc = fts5StorageNewRowid(p, piRowid); |
| 17891 } |
| 17892 }else{ |
| 17893 sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ |
| 17894 int i; /* Counter variable */ |
| 17895 rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); |
| 17896 for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ |
| 17897 rc = sqlite3_bind_value(pInsert, i, apVal[i]); |
| 17898 } |
| 17899 if( rc==SQLITE_OK ){ |
| 17900 sqlite3_step(pInsert); |
| 17901 rc = sqlite3_reset(pInsert); |
| 17902 } |
| 17903 *piRowid = sqlite3_last_insert_rowid(pConfig->db); |
| 17904 } |
| 17905 |
| 17906 return rc; |
| 17907 } |
| 17908 |
| 17909 /* |
| 17910 ** Insert new entries into the FTS index and %_docsize table. |
| 17911 */ |
| 17912 static int sqlite3Fts5StorageIndexInsert( |
| 17913 Fts5Storage *p, |
| 17914 sqlite3_value **apVal, |
| 17915 i64 iRowid |
| 17916 ){ |
| 17917 Fts5Config *pConfig = p->pConfig; |
| 17918 int rc = SQLITE_OK; /* Return code */ |
| 17919 Fts5InsertCtx ctx; /* Tokenization callback context object */ |
| 17920 Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ |
| 17921 |
| 17922 memset(&buf, 0, sizeof(Fts5Buffer)); |
| 17923 ctx.pStorage = p; |
| 17924 rc = fts5StorageLoadTotals(p, 1); |
| 17925 |
| 17926 if( rc==SQLITE_OK ){ |
| 17927 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); |
| 17928 } |
| 17929 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ |
| 17930 ctx.szCol = 0; |
| 17931 if( pConfig->abUnindexed[ctx.iCol]==0 ){ |
| 17932 rc = sqlite3Fts5Tokenize(pConfig, |
| 17933 FTS5_TOKENIZE_DOCUMENT, |
| 17934 (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), |
| 17935 sqlite3_value_bytes(apVal[ctx.iCol+2]), |
| 17936 (void*)&ctx, |
| 17937 fts5StorageInsertCallback |
| 17938 ); |
| 17939 } |
| 17940 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
| 17941 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; |
| 17942 } |
| 17943 p->nTotalRow++; |
| 17944 |
| 17945 /* Write the %_docsize record */ |
| 17946 if( rc==SQLITE_OK ){ |
| 17947 rc = fts5StorageInsertDocsize(p, iRowid, &buf); |
| 17948 } |
| 17949 sqlite3_free(buf.p); |
| 17950 |
| 17951 /* Write the averages record */ |
| 17952 if( rc==SQLITE_OK ){ |
| 17953 rc = fts5StorageSaveTotals(p); |
| 17954 } |
| 17955 |
| 17956 return rc; |
| 17957 } |
| 17958 |
| 17959 static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ |
| 17960 Fts5Config *pConfig = p->pConfig; |
| 17961 char *zSql; |
| 17962 int rc; |
| 17963 |
| 17964 zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", |
| 17965 pConfig->zDb, pConfig->zName, zSuffix |
| 17966 ); |
| 17967 if( zSql==0 ){ |
| 17968 rc = SQLITE_NOMEM; |
| 17969 }else{ |
| 17970 sqlite3_stmt *pCnt = 0; |
| 17971 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); |
| 17972 if( rc==SQLITE_OK ){ |
| 17973 if( SQLITE_ROW==sqlite3_step(pCnt) ){ |
| 17974 *pnRow = sqlite3_column_int64(pCnt, 0); |
| 17975 } |
| 17976 rc = sqlite3_finalize(pCnt); |
| 17977 } |
| 17978 } |
| 17979 |
| 17980 sqlite3_free(zSql); |
| 17981 return rc; |
| 17982 } |
| 17983 |
| 17984 /* |
| 17985 ** Context object used by sqlite3Fts5StorageIntegrity(). |
| 17986 */ |
| 17987 typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; |
| 17988 struct Fts5IntegrityCtx { |
| 17989 i64 iRowid; |
| 17990 int iCol; |
| 17991 int szCol; |
| 17992 u64 cksum; |
| 17993 Fts5Termset *pTermset; |
| 17994 Fts5Config *pConfig; |
| 17995 }; |
| 17996 |
| 17997 |
| 17998 /* |
| 17999 ** Tokenization callback used by integrity check. |
| 18000 */ |
| 18001 static int fts5StorageIntegrityCallback( |
| 18002 void *pContext, /* Pointer to Fts5IntegrityCtx object */ |
| 18003 int tflags, |
| 18004 const char *pToken, /* Buffer containing token */ |
| 18005 int nToken, /* Size of token in bytes */ |
| 18006 int iUnused1, /* Start offset of token */ |
| 18007 int iUnused2 /* End offset of token */ |
| 18008 ){ |
| 18009 Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; |
| 18010 Fts5Termset *pTermset = pCtx->pTermset; |
| 18011 int bPresent; |
| 18012 int ii; |
| 18013 int rc = SQLITE_OK; |
| 18014 int iPos; |
| 18015 int iCol; |
| 18016 |
| 18017 UNUSED_PARAM2(iUnused1, iUnused2); |
| 18018 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 18019 |
| 18020 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ |
| 18021 pCtx->szCol++; |
| 18022 } |
| 18023 |
| 18024 switch( pCtx->pConfig->eDetail ){ |
| 18025 case FTS5_DETAIL_FULL: |
| 18026 iPos = pCtx->szCol-1; |
| 18027 iCol = pCtx->iCol; |
| 18028 break; |
| 18029 |
| 18030 case FTS5_DETAIL_COLUMNS: |
| 18031 iPos = pCtx->iCol; |
| 18032 iCol = 0; |
| 18033 break; |
| 18034 |
| 18035 default: |
| 18036 assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 18037 iPos = 0; |
| 18038 iCol = 0; |
| 18039 break; |
| 18040 } |
| 18041 |
| 18042 rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); |
| 18043 if( rc==SQLITE_OK && bPresent==0 ){ |
| 18044 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( |
| 18045 pCtx->iRowid, iCol, iPos, 0, pToken, nToken |
| 18046 ); |
| 18047 } |
| 18048 |
| 18049 for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){ |
| 18050 const int nChar = pCtx->pConfig->aPrefix[ii]; |
| 18051 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); |
| 18052 if( nByte ){ |
| 18053 rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); |
| 18054 if( bPresent==0 ){ |
| 18055 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( |
| 18056 pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte |
| 18057 ); |
| 18058 } |
| 18059 } |
| 18060 } |
| 18061 |
| 18062 return rc; |
| 18063 } |
| 18064 |
| 18065 /* |
| 18066 ** Check that the contents of the FTS index match that of the %_content |
| 18067 ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return |
| 18068 ** some other SQLite error code if an error occurs while attempting to |
| 18069 ** determine this. |
| 18070 */ |
| 18071 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ |
| 18072 Fts5Config *pConfig = p->pConfig; |
| 18073 int rc; /* Return code */ |
| 18074 int *aColSize; /* Array of size pConfig->nCol */ |
| 18075 i64 *aTotalSize; /* Array of size pConfig->nCol */ |
| 18076 Fts5IntegrityCtx ctx; |
| 18077 sqlite3_stmt *pScan; |
| 18078 |
| 18079 memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); |
| 18080 ctx.pConfig = p->pConfig; |
| 18081 aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64))); |
| 18082 if( !aTotalSize ) return SQLITE_NOMEM; |
| 18083 aColSize = (int*)&aTotalSize[pConfig->nCol]; |
| 18084 memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); |
| 18085 |
| 18086 /* Generate the expected index checksum based on the contents of the |
| 18087 ** %_content table. This block stores the checksum in ctx.cksum. */ |
| 18088 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); |
| 18089 if( rc==SQLITE_OK ){ |
| 18090 int rc2; |
| 18091 while( SQLITE_ROW==sqlite3_step(pScan) ){ |
| 18092 int i; |
| 18093 ctx.iRowid = sqlite3_column_int64(pScan, 0); |
| 18094 ctx.szCol = 0; |
| 18095 if( pConfig->bColumnsize ){ |
| 18096 rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); |
| 18097 } |
| 18098 if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 18099 rc = sqlite3Fts5TermsetNew(&ctx.pTermset); |
| 18100 } |
| 18101 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
| 18102 if( pConfig->abUnindexed[i] ) continue; |
| 18103 ctx.iCol = i; |
| 18104 ctx.szCol = 0; |
| 18105 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
| 18106 rc = sqlite3Fts5TermsetNew(&ctx.pTermset); |
| 18107 } |
| 18108 if( rc==SQLITE_OK ){ |
| 18109 rc = sqlite3Fts5Tokenize(pConfig, |
| 18110 FTS5_TOKENIZE_DOCUMENT, |
| 18111 (const char*)sqlite3_column_text(pScan, i+1), |
| 18112 sqlite3_column_bytes(pScan, i+1), |
| 18113 (void*)&ctx, |
| 18114 fts5StorageIntegrityCallback |
| 18115 ); |
| 18116 } |
| 18117 if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ |
| 18118 rc = FTS5_CORRUPT; |
| 18119 } |
| 18120 aTotalSize[i] += ctx.szCol; |
| 18121 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
| 18122 sqlite3Fts5TermsetFree(ctx.pTermset); |
| 18123 ctx.pTermset = 0; |
| 18124 } |
| 18125 } |
| 18126 sqlite3Fts5TermsetFree(ctx.pTermset); |
| 18127 ctx.pTermset = 0; |
| 18128 |
| 18129 if( rc!=SQLITE_OK ) break; |
| 18130 } |
| 18131 rc2 = sqlite3_reset(pScan); |
| 18132 if( rc==SQLITE_OK ) rc = rc2; |
| 18133 } |
| 18134 |
| 18135 /* Test that the "totals" (sometimes called "averages") record looks Ok */ |
| 18136 if( rc==SQLITE_OK ){ |
| 18137 int i; |
| 18138 rc = fts5StorageLoadTotals(p, 0); |
| 18139 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
| 18140 if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; |
| 18141 } |
| 18142 } |
| 18143 |
| 18144 /* Check that the %_docsize and %_content tables contain the expected |
| 18145 ** number of rows. */ |
| 18146 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
| 18147 i64 nRow = 0; |
| 18148 rc = fts5StorageCount(p, "content", &nRow); |
| 18149 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; |
| 18150 } |
| 18151 if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
| 18152 i64 nRow = 0; |
| 18153 rc = fts5StorageCount(p, "docsize", &nRow); |
| 18154 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; |
| 18155 } |
| 18156 |
| 18157 /* Pass the expected checksum down to the FTS index module. It will |
| 18158 ** verify, amongst other things, that it matches the checksum generated by |
| 18159 ** inspecting the index itself. */ |
| 18160 if( rc==SQLITE_OK ){ |
| 18161 rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum); |
| 18162 } |
| 18163 |
| 18164 sqlite3_free(aTotalSize); |
| 18165 return rc; |
| 18166 } |
| 18167 |
| 18168 /* |
| 18169 ** Obtain an SQLite statement handle that may be used to read data from the |
| 18170 ** %_content table. |
| 18171 */ |
| 18172 static int sqlite3Fts5StorageStmt( |
| 18173 Fts5Storage *p, |
| 18174 int eStmt, |
| 18175 sqlite3_stmt **pp, |
| 18176 char **pzErrMsg |
| 18177 ){ |
| 18178 int rc; |
| 18179 assert( eStmt==FTS5_STMT_SCAN_ASC |
| 18180 || eStmt==FTS5_STMT_SCAN_DESC |
| 18181 || eStmt==FTS5_STMT_LOOKUP |
| 18182 ); |
| 18183 rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); |
| 18184 if( rc==SQLITE_OK ){ |
| 18185 assert( p->aStmt[eStmt]==*pp ); |
| 18186 p->aStmt[eStmt] = 0; |
| 18187 } |
| 18188 return rc; |
| 18189 } |
| 18190 |
| 18191 /* |
| 18192 ** Release an SQLite statement handle obtained via an earlier call to |
| 18193 ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function |
| 18194 ** must match that passed to the sqlite3Fts5StorageStmt() call. |
| 18195 */ |
| 18196 static void sqlite3Fts5StorageStmtRelease( |
| 18197 Fts5Storage *p, |
| 18198 int eStmt, |
| 18199 sqlite3_stmt *pStmt |
| 18200 ){ |
| 18201 assert( eStmt==FTS5_STMT_SCAN_ASC |
| 18202 || eStmt==FTS5_STMT_SCAN_DESC |
| 18203 || eStmt==FTS5_STMT_LOOKUP |
| 18204 ); |
| 18205 if( p->aStmt[eStmt]==0 ){ |
| 18206 sqlite3_reset(pStmt); |
| 18207 p->aStmt[eStmt] = pStmt; |
| 18208 }else{ |
| 18209 sqlite3_finalize(pStmt); |
| 18210 } |
| 18211 } |
| 18212 |
| 18213 static int fts5StorageDecodeSizeArray( |
| 18214 int *aCol, int nCol, /* Array to populate */ |
| 18215 const u8 *aBlob, int nBlob /* Record to read varints from */ |
| 18216 ){ |
| 18217 int i; |
| 18218 int iOff = 0; |
| 18219 for(i=0; i<nCol; i++){ |
| 18220 if( iOff>=nBlob ) return 1; |
| 18221 iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]); |
| 18222 } |
| 18223 return (iOff!=nBlob); |
| 18224 } |
| 18225 |
| 18226 /* |
| 18227 ** Argument aCol points to an array of integers containing one entry for |
| 18228 ** each table column. This function reads the %_docsize record for the |
| 18229 ** specified rowid and populates aCol[] with the results. |
| 18230 ** |
| 18231 ** An SQLite error code is returned if an error occurs, or SQLITE_OK |
| 18232 ** otherwise. |
| 18233 */ |
| 18234 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ |
| 18235 int nCol = p->pConfig->nCol; /* Number of user columns in table */ |
| 18236 sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ |
| 18237 int rc; /* Return Code */ |
| 18238 |
| 18239 assert( p->pConfig->bColumnsize ); |
| 18240 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); |
| 18241 if( rc==SQLITE_OK ){ |
| 18242 int bCorrupt = 1; |
| 18243 sqlite3_bind_int64(pLookup, 1, iRowid); |
| 18244 if( SQLITE_ROW==sqlite3_step(pLookup) ){ |
| 18245 const u8 *aBlob = sqlite3_column_blob(pLookup, 0); |
| 18246 int nBlob = sqlite3_column_bytes(pLookup, 0); |
| 18247 if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ |
| 18248 bCorrupt = 0; |
| 18249 } |
| 18250 } |
| 18251 rc = sqlite3_reset(pLookup); |
| 18252 if( bCorrupt && rc==SQLITE_OK ){ |
| 18253 rc = FTS5_CORRUPT; |
| 18254 } |
| 18255 } |
| 18256 |
| 18257 return rc; |
| 18258 } |
| 18259 |
| 18260 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ |
| 18261 int rc = fts5StorageLoadTotals(p, 0); |
| 18262 if( rc==SQLITE_OK ){ |
| 18263 *pnToken = 0; |
| 18264 if( iCol<0 ){ |
| 18265 int i; |
| 18266 for(i=0; i<p->pConfig->nCol; i++){ |
| 18267 *pnToken += p->aTotalSize[i]; |
| 18268 } |
| 18269 }else if( iCol<p->pConfig->nCol ){ |
| 18270 *pnToken = p->aTotalSize[iCol]; |
| 18271 }else{ |
| 18272 rc = SQLITE_RANGE; |
| 18273 } |
| 18274 } |
| 18275 return rc; |
| 18276 } |
| 18277 |
| 18278 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ |
| 18279 int rc = fts5StorageLoadTotals(p, 0); |
| 18280 if( rc==SQLITE_OK ){ |
| 18281 *pnRow = p->nTotalRow; |
| 18282 } |
| 18283 return rc; |
| 18284 } |
| 18285 |
| 18286 /* |
| 18287 ** Flush any data currently held in-memory to disk. |
| 18288 */ |
| 18289 static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){ |
| 18290 if( bCommit && p->bTotalsValid ){ |
| 18291 int rc = fts5StorageSaveTotals(p); |
| 18292 p->bTotalsValid = 0; |
| 18293 if( rc!=SQLITE_OK ) return rc; |
| 18294 } |
| 18295 return sqlite3Fts5IndexSync(p->pIndex, bCommit); |
| 18296 } |
| 18297 |
| 18298 static int sqlite3Fts5StorageRollback(Fts5Storage *p){ |
| 18299 p->bTotalsValid = 0; |
| 18300 return sqlite3Fts5IndexRollback(p->pIndex); |
| 18301 } |
| 18302 |
| 18303 static int sqlite3Fts5StorageConfigValue( |
| 18304 Fts5Storage *p, |
| 18305 const char *z, |
| 18306 sqlite3_value *pVal, |
| 18307 int iVal |
| 18308 ){ |
| 18309 sqlite3_stmt *pReplace = 0; |
| 18310 int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); |
| 18311 if( rc==SQLITE_OK ){ |
| 18312 sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); |
| 18313 if( pVal ){ |
| 18314 sqlite3_bind_value(pReplace, 2, pVal); |
| 18315 }else{ |
| 18316 sqlite3_bind_int(pReplace, 2, iVal); |
| 18317 } |
| 18318 sqlite3_step(pReplace); |
| 18319 rc = sqlite3_reset(pReplace); |
| 18320 } |
| 18321 if( rc==SQLITE_OK && pVal ){ |
| 18322 int iNew = p->pConfig->iCookie + 1; |
| 18323 rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); |
| 18324 if( rc==SQLITE_OK ){ |
| 18325 p->pConfig->iCookie = iNew; |
| 18326 } |
| 18327 } |
| 18328 return rc; |
| 18329 } |
| 18330 |
| 18331 /* |
| 18332 ** 2014 May 31 |
| 18333 ** |
| 18334 ** The author disclaims copyright to this source code. In place of |
| 18335 ** a legal notice, here is a blessing: |
| 18336 ** |
| 18337 ** May you do good and not evil. |
| 18338 ** May you find forgiveness for yourself and forgive others. |
| 18339 ** May you share freely, never taking more than you give. |
| 18340 ** |
| 18341 ****************************************************************************** |
| 18342 */ |
| 18343 |
| 18344 |
| 18345 /* #include "fts5Int.h" */ |
| 18346 |
| 18347 /************************************************************************** |
| 18348 ** Start of ascii tokenizer implementation. |
| 18349 */ |
| 18350 |
| 18351 /* |
| 18352 ** For tokenizers with no "unicode" modifier, the set of token characters |
| 18353 ** is the same as the set of ASCII range alphanumeric characters. |
| 18354 */ |
| 18355 static unsigned char aAsciiTokenChar[128] = { |
| 18356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ |
| 18357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ |
| 18358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ |
| 18359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ |
| 18360 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ |
| 18361 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ |
| 18362 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ |
| 18363 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ |
| 18364 }; |
| 18365 |
| 18366 typedef struct AsciiTokenizer AsciiTokenizer; |
| 18367 struct AsciiTokenizer { |
| 18368 unsigned char aTokenChar[128]; |
| 18369 }; |
| 18370 |
| 18371 static void fts5AsciiAddExceptions( |
| 18372 AsciiTokenizer *p, |
| 18373 const char *zArg, |
| 18374 int bTokenChars |
| 18375 ){ |
| 18376 int i; |
| 18377 for(i=0; zArg[i]; i++){ |
| 18378 if( (zArg[i] & 0x80)==0 ){ |
| 18379 p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; |
| 18380 } |
| 18381 } |
| 18382 } |
| 18383 |
| 18384 /* |
| 18385 ** Delete a "ascii" tokenizer. |
| 18386 */ |
| 18387 static void fts5AsciiDelete(Fts5Tokenizer *p){ |
| 18388 sqlite3_free(p); |
| 18389 } |
| 18390 |
| 18391 /* |
| 18392 ** Create an "ascii" tokenizer. |
| 18393 */ |
| 18394 static int fts5AsciiCreate( |
| 18395 void *pUnused, |
| 18396 const char **azArg, int nArg, |
| 18397 Fts5Tokenizer **ppOut |
| 18398 ){ |
| 18399 int rc = SQLITE_OK; |
| 18400 AsciiTokenizer *p = 0; |
| 18401 UNUSED_PARAM(pUnused); |
| 18402 if( nArg%2 ){ |
| 18403 rc = SQLITE_ERROR; |
| 18404 }else{ |
| 18405 p = sqlite3_malloc(sizeof(AsciiTokenizer)); |
| 18406 if( p==0 ){ |
| 18407 rc = SQLITE_NOMEM; |
| 18408 }else{ |
| 18409 int i; |
| 18410 memset(p, 0, sizeof(AsciiTokenizer)); |
| 18411 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); |
| 18412 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
| 18413 const char *zArg = azArg[i+1]; |
| 18414 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ |
| 18415 fts5AsciiAddExceptions(p, zArg, 1); |
| 18416 }else |
| 18417 if( 0==sqlite3_stricmp(azArg[i], "separators") ){ |
| 18418 fts5AsciiAddExceptions(p, zArg, 0); |
| 18419 }else{ |
| 18420 rc = SQLITE_ERROR; |
| 18421 } |
| 18422 } |
| 18423 if( rc!=SQLITE_OK ){ |
| 18424 fts5AsciiDelete((Fts5Tokenizer*)p); |
| 18425 p = 0; |
| 18426 } |
| 18427 } |
| 18428 } |
| 18429 |
| 18430 *ppOut = (Fts5Tokenizer*)p; |
| 18431 return rc; |
| 18432 } |
| 18433 |
| 18434 |
| 18435 static void asciiFold(char *aOut, const char *aIn, int nByte){ |
| 18436 int i; |
| 18437 for(i=0; i<nByte; i++){ |
| 18438 char c = aIn[i]; |
| 18439 if( c>='A' && c<='Z' ) c += 32; |
| 18440 aOut[i] = c; |
| 18441 } |
| 18442 } |
| 18443 |
| 18444 /* |
| 18445 ** Tokenize some text using the ascii tokenizer. |
| 18446 */ |
| 18447 static int fts5AsciiTokenize( |
| 18448 Fts5Tokenizer *pTokenizer, |
| 18449 void *pCtx, |
| 18450 int iUnused, |
| 18451 const char *pText, int nText, |
| 18452 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
| 18453 ){ |
| 18454 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; |
| 18455 int rc = SQLITE_OK; |
| 18456 int ie; |
| 18457 int is = 0; |
| 18458 |
| 18459 char aFold[64]; |
| 18460 int nFold = sizeof(aFold); |
| 18461 char *pFold = aFold; |
| 18462 unsigned char *a = p->aTokenChar; |
| 18463 |
| 18464 UNUSED_PARAM(iUnused); |
| 18465 |
| 18466 while( is<nText && rc==SQLITE_OK ){ |
| 18467 int nByte; |
| 18468 |
| 18469 /* Skip any leading divider characters. */ |
| 18470 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ |
| 18471 is++; |
| 18472 } |
| 18473 if( is==nText ) break; |
| 18474 |
| 18475 /* Count the token characters */ |
| 18476 ie = is+1; |
| 18477 while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ |
| 18478 ie++; |
| 18479 } |
| 18480 |
| 18481 /* Fold to lower case */ |
| 18482 nByte = ie-is; |
| 18483 if( nByte>nFold ){ |
| 18484 if( pFold!=aFold ) sqlite3_free(pFold); |
| 18485 pFold = sqlite3_malloc(nByte*2); |
| 18486 if( pFold==0 ){ |
| 18487 rc = SQLITE_NOMEM; |
| 18488 break; |
| 18489 } |
| 18490 nFold = nByte*2; |
| 18491 } |
| 18492 asciiFold(pFold, &pText[is], nByte); |
| 18493 |
| 18494 /* Invoke the token callback */ |
| 18495 rc = xToken(pCtx, 0, pFold, nByte, is, ie); |
| 18496 is = ie+1; |
| 18497 } |
| 18498 |
| 18499 if( pFold!=aFold ) sqlite3_free(pFold); |
| 18500 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 18501 return rc; |
| 18502 } |
| 18503 |
| 18504 /************************************************************************** |
| 18505 ** Start of unicode61 tokenizer implementation. |
| 18506 */ |
| 18507 |
| 18508 |
| 18509 /* |
| 18510 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied |
| 18511 ** from the sqlite3 source file utf.c. If this file is compiled as part |
| 18512 ** of the amalgamation, they are not required. |
| 18513 */ |
| 18514 #ifndef SQLITE_AMALGAMATION |
| 18515 |
| 18516 static const unsigned char sqlite3Utf8Trans1[] = { |
| 18517 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18518 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18519 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 18520 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
| 18521 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18522 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18523 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18524 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
| 18525 }; |
| 18526 |
| 18527 #define READ_UTF8(zIn, zTerm, c) \ |
| 18528 c = *(zIn++); \ |
| 18529 if( c>=0xc0 ){ \ |
| 18530 c = sqlite3Utf8Trans1[c-0xc0]; \ |
| 18531 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ |
| 18532 c = (c<<6) + (0x3f & *(zIn++)); \ |
| 18533 } \ |
| 18534 if( c<0x80 \ |
| 18535 || (c&0xFFFFF800)==0xD800 \ |
| 18536 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ |
| 18537 } |
| 18538 |
| 18539 |
| 18540 #define WRITE_UTF8(zOut, c) { \ |
| 18541 if( c<0x00080 ){ \ |
| 18542 *zOut++ = (unsigned char)(c&0xFF); \ |
| 18543 } \ |
| 18544 else if( c<0x00800 ){ \ |
| 18545 *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ |
| 18546 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
| 18547 } \ |
| 18548 else if( c<0x10000 ){ \ |
| 18549 *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ |
| 18550 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ |
| 18551 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
| 18552 }else{ \ |
| 18553 *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ |
| 18554 *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ |
| 18555 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ |
| 18556 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
| 18557 } \ |
| 18558 } |
| 18559 |
| 18560 #endif /* ifndef SQLITE_AMALGAMATION */ |
| 18561 |
| 18562 typedef struct Unicode61Tokenizer Unicode61Tokenizer; |
| 18563 struct Unicode61Tokenizer { |
| 18564 unsigned char aTokenChar[128]; /* ASCII range token characters */ |
| 18565 char *aFold; /* Buffer to fold text into */ |
| 18566 int nFold; /* Size of aFold[] in bytes */ |
| 18567 int bRemoveDiacritic; /* True if remove_diacritics=1 is set */ |
| 18568 int nException; |
| 18569 int *aiException; |
| 18570 }; |
| 18571 |
| 18572 static int fts5UnicodeAddExceptions( |
| 18573 Unicode61Tokenizer *p, /* Tokenizer object */ |
| 18574 const char *z, /* Characters to treat as exceptions */ |
| 18575 int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ |
| 18576 ){ |
| 18577 int rc = SQLITE_OK; |
| 18578 int n = (int)strlen(z); |
| 18579 int *aNew; |
| 18580 |
| 18581 if( n>0 ){ |
| 18582 aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int)); |
| 18583 if( aNew ){ |
| 18584 int nNew = p->nException; |
| 18585 const unsigned char *zCsr = (const unsigned char*)z; |
| 18586 const unsigned char *zTerm = (const unsigned char*)&z[n]; |
| 18587 while( zCsr<zTerm ){ |
| 18588 int iCode; |
| 18589 int bToken; |
| 18590 READ_UTF8(zCsr, zTerm, iCode); |
| 18591 if( iCode<128 ){ |
| 18592 p->aTokenChar[iCode] = (unsigned char)bTokenChars; |
| 18593 }else{ |
| 18594 bToken = sqlite3Fts5UnicodeIsalnum(iCode); |
| 18595 assert( (bToken==0 || bToken==1) ); |
| 18596 assert( (bTokenChars==0 || bTokenChars==1) ); |
| 18597 if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ |
| 18598 int i; |
| 18599 for(i=0; i<nNew; i++){ |
| 18600 if( aNew[i]>iCode ) break; |
| 18601 } |
| 18602 memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); |
| 18603 aNew[i] = iCode; |
| 18604 nNew++; |
| 18605 } |
| 18606 } |
| 18607 } |
| 18608 p->aiException = aNew; |
| 18609 p->nException = nNew; |
| 18610 }else{ |
| 18611 rc = SQLITE_NOMEM; |
| 18612 } |
| 18613 } |
| 18614 |
| 18615 return rc; |
| 18616 } |
| 18617 |
| 18618 /* |
| 18619 ** Return true if the p->aiException[] array contains the value iCode. |
| 18620 */ |
| 18621 static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ |
| 18622 if( p->nException>0 ){ |
| 18623 int *a = p->aiException; |
| 18624 int iLo = 0; |
| 18625 int iHi = p->nException-1; |
| 18626 |
| 18627 while( iHi>=iLo ){ |
| 18628 int iTest = (iHi + iLo) / 2; |
| 18629 if( iCode==a[iTest] ){ |
| 18630 return 1; |
| 18631 }else if( iCode>a[iTest] ){ |
| 18632 iLo = iTest+1; |
| 18633 }else{ |
| 18634 iHi = iTest-1; |
| 18635 } |
| 18636 } |
| 18637 } |
| 18638 |
| 18639 return 0; |
| 18640 } |
| 18641 |
| 18642 /* |
| 18643 ** Delete a "unicode61" tokenizer. |
| 18644 */ |
| 18645 static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ |
| 18646 if( pTok ){ |
| 18647 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; |
| 18648 sqlite3_free(p->aiException); |
| 18649 sqlite3_free(p->aFold); |
| 18650 sqlite3_free(p); |
| 18651 } |
| 18652 return; |
| 18653 } |
| 18654 |
| 18655 /* |
| 18656 ** Create a "unicode61" tokenizer. |
| 18657 */ |
| 18658 static int fts5UnicodeCreate( |
| 18659 void *pUnused, |
| 18660 const char **azArg, int nArg, |
| 18661 Fts5Tokenizer **ppOut |
| 18662 ){ |
| 18663 int rc = SQLITE_OK; /* Return code */ |
| 18664 Unicode61Tokenizer *p = 0; /* New tokenizer object */ |
| 18665 |
| 18666 UNUSED_PARAM(pUnused); |
| 18667 |
| 18668 if( nArg%2 ){ |
| 18669 rc = SQLITE_ERROR; |
| 18670 }else{ |
| 18671 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); |
| 18672 if( p ){ |
| 18673 int i; |
| 18674 memset(p, 0, sizeof(Unicode61Tokenizer)); |
| 18675 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); |
| 18676 p->bRemoveDiacritic = 1; |
| 18677 p->nFold = 64; |
| 18678 p->aFold = sqlite3_malloc(p->nFold * sizeof(char)); |
| 18679 if( p->aFold==0 ){ |
| 18680 rc = SQLITE_NOMEM; |
| 18681 } |
| 18682 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
| 18683 const char *zArg = azArg[i+1]; |
| 18684 if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ |
| 18685 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ |
| 18686 rc = SQLITE_ERROR; |
| 18687 } |
| 18688 p->bRemoveDiacritic = (zArg[0]=='1'); |
| 18689 }else |
| 18690 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ |
| 18691 rc = fts5UnicodeAddExceptions(p, zArg, 1); |
| 18692 }else |
| 18693 if( 0==sqlite3_stricmp(azArg[i], "separators") ){ |
| 18694 rc = fts5UnicodeAddExceptions(p, zArg, 0); |
| 18695 }else{ |
| 18696 rc = SQLITE_ERROR; |
| 18697 } |
| 18698 } |
| 18699 }else{ |
| 18700 rc = SQLITE_NOMEM; |
| 18701 } |
| 18702 if( rc!=SQLITE_OK ){ |
| 18703 fts5UnicodeDelete((Fts5Tokenizer*)p); |
| 18704 p = 0; |
| 18705 } |
| 18706 *ppOut = (Fts5Tokenizer*)p; |
| 18707 } |
| 18708 return rc; |
| 18709 } |
| 18710 |
| 18711 /* |
| 18712 ** Return true if, for the purposes of tokenizing with the tokenizer |
| 18713 ** passed as the first argument, codepoint iCode is considered a token |
| 18714 ** character (not a separator). |
| 18715 */ |
| 18716 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ |
| 18717 assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); |
| 18718 return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); |
| 18719 } |
| 18720 |
| 18721 static int fts5UnicodeTokenize( |
| 18722 Fts5Tokenizer *pTokenizer, |
| 18723 void *pCtx, |
| 18724 int iUnused, |
| 18725 const char *pText, int nText, |
| 18726 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
| 18727 ){ |
| 18728 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; |
| 18729 int rc = SQLITE_OK; |
| 18730 unsigned char *a = p->aTokenChar; |
| 18731 |
| 18732 unsigned char *zTerm = (unsigned char*)&pText[nText]; |
| 18733 unsigned char *zCsr = (unsigned char *)pText; |
| 18734 |
| 18735 /* Output buffer */ |
| 18736 char *aFold = p->aFold; |
| 18737 int nFold = p->nFold; |
| 18738 const char *pEnd = &aFold[nFold-6]; |
| 18739 |
| 18740 UNUSED_PARAM(iUnused); |
| 18741 |
| 18742 /* Each iteration of this loop gobbles up a contiguous run of separators, |
| 18743 ** then the next token. */ |
| 18744 while( rc==SQLITE_OK ){ |
| 18745 int iCode; /* non-ASCII codepoint read from input */ |
| 18746 char *zOut = aFold; |
| 18747 int is; |
| 18748 int ie; |
| 18749 |
| 18750 /* Skip any separator characters. */ |
| 18751 while( 1 ){ |
| 18752 if( zCsr>=zTerm ) goto tokenize_done; |
| 18753 if( *zCsr & 0x80 ) { |
| 18754 /* A character outside of the ascii range. Skip past it if it is |
| 18755 ** a separator character. Or break out of the loop if it is not. */ |
| 18756 is = zCsr - (unsigned char*)pText; |
| 18757 READ_UTF8(zCsr, zTerm, iCode); |
| 18758 if( fts5UnicodeIsAlnum(p, iCode) ){ |
| 18759 goto non_ascii_tokenchar; |
| 18760 } |
| 18761 }else{ |
| 18762 if( a[*zCsr] ){ |
| 18763 is = zCsr - (unsigned char*)pText; |
| 18764 goto ascii_tokenchar; |
| 18765 } |
| 18766 zCsr++; |
| 18767 } |
| 18768 } |
| 18769 |
| 18770 /* Run through the tokenchars. Fold them into the output buffer along |
| 18771 ** the way. */ |
| 18772 while( zCsr<zTerm ){ |
| 18773 |
| 18774 /* Grow the output buffer so that there is sufficient space to fit the |
| 18775 ** largest possible utf-8 character. */ |
| 18776 if( zOut>pEnd ){ |
| 18777 aFold = sqlite3_malloc(nFold*2); |
| 18778 if( aFold==0 ){ |
| 18779 rc = SQLITE_NOMEM; |
| 18780 goto tokenize_done; |
| 18781 } |
| 18782 zOut = &aFold[zOut - p->aFold]; |
| 18783 memcpy(aFold, p->aFold, nFold); |
| 18784 sqlite3_free(p->aFold); |
| 18785 p->aFold = aFold; |
| 18786 p->nFold = nFold = nFold*2; |
| 18787 pEnd = &aFold[nFold-6]; |
| 18788 } |
| 18789 |
| 18790 if( *zCsr & 0x80 ){ |
| 18791 /* An non-ascii-range character. Fold it into the output buffer if |
| 18792 ** it is a token character, or break out of the loop if it is not. */ |
| 18793 READ_UTF8(zCsr, zTerm, iCode); |
| 18794 if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ |
| 18795 non_ascii_tokenchar: |
| 18796 iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); |
| 18797 if( iCode ) WRITE_UTF8(zOut, iCode); |
| 18798 }else{ |
| 18799 break; |
| 18800 } |
| 18801 }else if( a[*zCsr]==0 ){ |
| 18802 /* An ascii-range separator character. End of token. */ |
| 18803 break; |
| 18804 }else{ |
| 18805 ascii_tokenchar: |
| 18806 if( *zCsr>='A' && *zCsr<='Z' ){ |
| 18807 *zOut++ = *zCsr + 32; |
| 18808 }else{ |
| 18809 *zOut++ = *zCsr; |
| 18810 } |
| 18811 zCsr++; |
| 18812 } |
| 18813 ie = zCsr - (unsigned char*)pText; |
| 18814 } |
| 18815 |
| 18816 /* Invoke the token callback */ |
| 18817 rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); |
| 18818 } |
| 18819 |
| 18820 tokenize_done: |
| 18821 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 18822 return rc; |
| 18823 } |
| 18824 |
| 18825 /************************************************************************** |
| 18826 ** Start of porter stemmer implementation. |
| 18827 */ |
| 18828 |
| 18829 /* Any tokens larger than this (in bytes) are passed through without |
| 18830 ** stemming. */ |
| 18831 #define FTS5_PORTER_MAX_TOKEN 64 |
| 18832 |
| 18833 typedef struct PorterTokenizer PorterTokenizer; |
| 18834 struct PorterTokenizer { |
| 18835 fts5_tokenizer tokenizer; /* Parent tokenizer module */ |
| 18836 Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ |
| 18837 char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; |
| 18838 }; |
| 18839 |
| 18840 /* |
| 18841 ** Delete a "porter" tokenizer. |
| 18842 */ |
| 18843 static void fts5PorterDelete(Fts5Tokenizer *pTok){ |
| 18844 if( pTok ){ |
| 18845 PorterTokenizer *p = (PorterTokenizer*)pTok; |
| 18846 if( p->pTokenizer ){ |
| 18847 p->tokenizer.xDelete(p->pTokenizer); |
| 18848 } |
| 18849 sqlite3_free(p); |
| 18850 } |
| 18851 } |
| 18852 |
| 18853 /* |
| 18854 ** Create a "porter" tokenizer. |
| 18855 */ |
| 18856 static int fts5PorterCreate( |
| 18857 void *pCtx, |
| 18858 const char **azArg, int nArg, |
| 18859 Fts5Tokenizer **ppOut |
| 18860 ){ |
| 18861 fts5_api *pApi = (fts5_api*)pCtx; |
| 18862 int rc = SQLITE_OK; |
| 18863 PorterTokenizer *pRet; |
| 18864 void *pUserdata = 0; |
| 18865 const char *zBase = "unicode61"; |
| 18866 |
| 18867 if( nArg>0 ){ |
| 18868 zBase = azArg[0]; |
| 18869 } |
| 18870 |
| 18871 pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); |
| 18872 if( pRet ){ |
| 18873 memset(pRet, 0, sizeof(PorterTokenizer)); |
| 18874 rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); |
| 18875 }else{ |
| 18876 rc = SQLITE_NOMEM; |
| 18877 } |
| 18878 if( rc==SQLITE_OK ){ |
| 18879 int nArg2 = (nArg>0 ? nArg-1 : 0); |
| 18880 const char **azArg2 = (nArg2 ? &azArg[1] : 0); |
| 18881 rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer); |
| 18882 } |
| 18883 |
| 18884 if( rc!=SQLITE_OK ){ |
| 18885 fts5PorterDelete((Fts5Tokenizer*)pRet); |
| 18886 pRet = 0; |
| 18887 } |
| 18888 *ppOut = (Fts5Tokenizer*)pRet; |
| 18889 return rc; |
| 18890 } |
| 18891 |
| 18892 typedef struct PorterContext PorterContext; |
| 18893 struct PorterContext { |
| 18894 void *pCtx; |
| 18895 int (*xToken)(void*, int, const char*, int, int, int); |
| 18896 char *aBuf; |
| 18897 }; |
| 18898 |
| 18899 typedef struct PorterRule PorterRule; |
| 18900 struct PorterRule { |
| 18901 const char *zSuffix; |
| 18902 int nSuffix; |
| 18903 int (*xCond)(char *zStem, int nStem); |
| 18904 const char *zOutput; |
| 18905 int nOutput; |
| 18906 }; |
| 18907 |
| 18908 #if 0 |
| 18909 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ |
| 18910 int ret = -1; |
| 18911 int nBuf = *pnBuf; |
| 18912 PorterRule *p; |
| 18913 |
| 18914 for(p=aRule; p->zSuffix; p++){ |
| 18915 assert( strlen(p->zSuffix)==p->nSuffix ); |
| 18916 assert( strlen(p->zOutput)==p->nOutput ); |
| 18917 if( nBuf<p->nSuffix ) continue; |
| 18918 if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; |
| 18919 } |
| 18920 |
| 18921 if( p->zSuffix ){ |
| 18922 int nStem = nBuf - p->nSuffix; |
| 18923 if( p->xCond==0 || p->xCond(aBuf, nStem) ){ |
| 18924 memcpy(&aBuf[nStem], p->zOutput, p->nOutput); |
| 18925 *pnBuf = nStem + p->nOutput; |
| 18926 ret = p - aRule; |
| 18927 } |
| 18928 } |
| 18929 |
| 18930 return ret; |
| 18931 } |
| 18932 #endif |
| 18933 |
| 18934 static int fts5PorterIsVowel(char c, int bYIsVowel){ |
| 18935 return ( |
| 18936 c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') |
| 18937 ); |
| 18938 } |
| 18939 |
| 18940 static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ |
| 18941 int i; |
| 18942 int bCons = bPrevCons; |
| 18943 |
| 18944 /* Scan for a vowel */ |
| 18945 for(i=0; i<nStem; i++){ |
| 18946 if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; |
| 18947 } |
| 18948 |
| 18949 /* Scan for a consonent */ |
| 18950 for(i++; i<nStem; i++){ |
| 18951 if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; |
| 18952 } |
| 18953 return 0; |
| 18954 } |
| 18955 |
| 18956 /* porter rule condition: (m > 0) */ |
| 18957 static int fts5Porter_MGt0(char *zStem, int nStem){ |
| 18958 return !!fts5PorterGobbleVC(zStem, nStem, 0); |
| 18959 } |
| 18960 |
| 18961 /* porter rule condition: (m > 1) */ |
| 18962 static int fts5Porter_MGt1(char *zStem, int nStem){ |
| 18963 int n; |
| 18964 n = fts5PorterGobbleVC(zStem, nStem, 0); |
| 18965 if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ |
| 18966 return 1; |
| 18967 } |
| 18968 return 0; |
| 18969 } |
| 18970 |
| 18971 /* porter rule condition: (m = 1) */ |
| 18972 static int fts5Porter_MEq1(char *zStem, int nStem){ |
| 18973 int n; |
| 18974 n = fts5PorterGobbleVC(zStem, nStem, 0); |
| 18975 if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ |
| 18976 return 1; |
| 18977 } |
| 18978 return 0; |
| 18979 } |
| 18980 |
| 18981 /* porter rule condition: (*o) */ |
| 18982 static int fts5Porter_Ostar(char *zStem, int nStem){ |
| 18983 if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ |
| 18984 return 0; |
| 18985 }else{ |
| 18986 int i; |
| 18987 int mask = 0; |
| 18988 int bCons = 0; |
| 18989 for(i=0; i<nStem; i++){ |
| 18990 bCons = !fts5PorterIsVowel(zStem[i], bCons); |
| 18991 assert( bCons==0 || bCons==1 ); |
| 18992 mask = (mask << 1) + bCons; |
| 18993 } |
| 18994 return ((mask & 0x0007)==0x0005); |
| 18995 } |
| 18996 } |
| 18997 |
| 18998 /* porter rule condition: (m > 1 and (*S or *T)) */ |
| 18999 static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ |
| 19000 assert( nStem>0 ); |
| 19001 return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') |
| 19002 && fts5Porter_MGt1(zStem, nStem); |
| 19003 } |
| 19004 |
| 19005 /* porter rule condition: (*v*) */ |
| 19006 static int fts5Porter_Vowel(char *zStem, int nStem){ |
| 19007 int i; |
| 19008 for(i=0; i<nStem; i++){ |
| 19009 if( fts5PorterIsVowel(zStem[i], i>0) ){ |
| 19010 return 1; |
| 19011 } |
| 19012 } |
| 19013 return 0; |
| 19014 } |
| 19015 |
| 19016 |
| 19017 /************************************************************************** |
| 19018 *************************************************************************** |
| 19019 ** GENERATED CODE STARTS HERE (mkportersteps.tcl) |
| 19020 */ |
| 19021 |
| 19022 static int fts5PorterStep4(char *aBuf, int *pnBuf){ |
| 19023 int ret = 0; |
| 19024 int nBuf = *pnBuf; |
| 19025 switch( aBuf[nBuf-2] ){ |
| 19026 |
| 19027 case 'a': |
| 19028 if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ |
| 19029 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
| 19030 *pnBuf = nBuf - 2; |
| 19031 } |
| 19032 } |
| 19033 break; |
| 19034 |
| 19035 case 'c': |
| 19036 if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ |
| 19037 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
| 19038 *pnBuf = nBuf - 4; |
| 19039 } |
| 19040 }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ |
| 19041 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
| 19042 *pnBuf = nBuf - 4; |
| 19043 } |
| 19044 } |
| 19045 break; |
| 19046 |
| 19047 case 'e': |
| 19048 if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ |
| 19049 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
| 19050 *pnBuf = nBuf - 2; |
| 19051 } |
| 19052 } |
| 19053 break; |
| 19054 |
| 19055 case 'i': |
| 19056 if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ |
| 19057 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
| 19058 *pnBuf = nBuf - 2; |
| 19059 } |
| 19060 } |
| 19061 break; |
| 19062 |
| 19063 case 'l': |
| 19064 if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ |
| 19065 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
| 19066 *pnBuf = nBuf - 4; |
| 19067 } |
| 19068 }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ |
| 19069 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
| 19070 *pnBuf = nBuf - 4; |
| 19071 } |
| 19072 } |
| 19073 break; |
| 19074 |
| 19075 case 'n': |
| 19076 if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ |
| 19077 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19078 *pnBuf = nBuf - 3; |
| 19079 } |
| 19080 }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ |
| 19081 if( fts5Porter_MGt1(aBuf, nBuf-5) ){ |
| 19082 *pnBuf = nBuf - 5; |
| 19083 } |
| 19084 }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ |
| 19085 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
| 19086 *pnBuf = nBuf - 4; |
| 19087 } |
| 19088 }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ |
| 19089 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19090 *pnBuf = nBuf - 3; |
| 19091 } |
| 19092 } |
| 19093 break; |
| 19094 |
| 19095 case 'o': |
| 19096 if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ |
| 19097 if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ |
| 19098 *pnBuf = nBuf - 3; |
| 19099 } |
| 19100 }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ |
| 19101 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
| 19102 *pnBuf = nBuf - 2; |
| 19103 } |
| 19104 } |
| 19105 break; |
| 19106 |
| 19107 case 's': |
| 19108 if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ |
| 19109 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19110 *pnBuf = nBuf - 3; |
| 19111 } |
| 19112 } |
| 19113 break; |
| 19114 |
| 19115 case 't': |
| 19116 if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ |
| 19117 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19118 *pnBuf = nBuf - 3; |
| 19119 } |
| 19120 }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ |
| 19121 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19122 *pnBuf = nBuf - 3; |
| 19123 } |
| 19124 } |
| 19125 break; |
| 19126 |
| 19127 case 'u': |
| 19128 if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ |
| 19129 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19130 *pnBuf = nBuf - 3; |
| 19131 } |
| 19132 } |
| 19133 break; |
| 19134 |
| 19135 case 'v': |
| 19136 if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ |
| 19137 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19138 *pnBuf = nBuf - 3; |
| 19139 } |
| 19140 } |
| 19141 break; |
| 19142 |
| 19143 case 'z': |
| 19144 if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ |
| 19145 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
| 19146 *pnBuf = nBuf - 3; |
| 19147 } |
| 19148 } |
| 19149 break; |
| 19150 |
| 19151 } |
| 19152 return ret; |
| 19153 } |
| 19154 |
| 19155 |
| 19156 static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ |
| 19157 int ret = 0; |
| 19158 int nBuf = *pnBuf; |
| 19159 switch( aBuf[nBuf-2] ){ |
| 19160 |
| 19161 case 'a': |
| 19162 if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ |
| 19163 memcpy(&aBuf[nBuf-2], "ate", 3); |
| 19164 *pnBuf = nBuf - 2 + 3; |
| 19165 ret = 1; |
| 19166 } |
| 19167 break; |
| 19168 |
| 19169 case 'b': |
| 19170 if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ |
| 19171 memcpy(&aBuf[nBuf-2], "ble", 3); |
| 19172 *pnBuf = nBuf - 2 + 3; |
| 19173 ret = 1; |
| 19174 } |
| 19175 break; |
| 19176 |
| 19177 case 'i': |
| 19178 if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ |
| 19179 memcpy(&aBuf[nBuf-2], "ize", 3); |
| 19180 *pnBuf = nBuf - 2 + 3; |
| 19181 ret = 1; |
| 19182 } |
| 19183 break; |
| 19184 |
| 19185 } |
| 19186 return ret; |
| 19187 } |
| 19188 |
| 19189 |
| 19190 static int fts5PorterStep2(char *aBuf, int *pnBuf){ |
| 19191 int ret = 0; |
| 19192 int nBuf = *pnBuf; |
| 19193 switch( aBuf[nBuf-2] ){ |
| 19194 |
| 19195 case 'a': |
| 19196 if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ |
| 19197 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
| 19198 memcpy(&aBuf[nBuf-7], "ate", 3); |
| 19199 *pnBuf = nBuf - 7 + 3; |
| 19200 } |
| 19201 }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ |
| 19202 if( fts5Porter_MGt0(aBuf, nBuf-6) ){ |
| 19203 memcpy(&aBuf[nBuf-6], "tion", 4); |
| 19204 *pnBuf = nBuf - 6 + 4; |
| 19205 } |
| 19206 } |
| 19207 break; |
| 19208 |
| 19209 case 'c': |
| 19210 if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ |
| 19211 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19212 memcpy(&aBuf[nBuf-4], "ence", 4); |
| 19213 *pnBuf = nBuf - 4 + 4; |
| 19214 } |
| 19215 }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ |
| 19216 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19217 memcpy(&aBuf[nBuf-4], "ance", 4); |
| 19218 *pnBuf = nBuf - 4 + 4; |
| 19219 } |
| 19220 } |
| 19221 break; |
| 19222 |
| 19223 case 'e': |
| 19224 if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ |
| 19225 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19226 memcpy(&aBuf[nBuf-4], "ize", 3); |
| 19227 *pnBuf = nBuf - 4 + 3; |
| 19228 } |
| 19229 } |
| 19230 break; |
| 19231 |
| 19232 case 'g': |
| 19233 if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ |
| 19234 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19235 memcpy(&aBuf[nBuf-4], "log", 3); |
| 19236 *pnBuf = nBuf - 4 + 3; |
| 19237 } |
| 19238 } |
| 19239 break; |
| 19240 |
| 19241 case 'l': |
| 19242 if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ |
| 19243 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
| 19244 memcpy(&aBuf[nBuf-3], "ble", 3); |
| 19245 *pnBuf = nBuf - 3 + 3; |
| 19246 } |
| 19247 }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ |
| 19248 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19249 memcpy(&aBuf[nBuf-4], "al", 2); |
| 19250 *pnBuf = nBuf - 4 + 2; |
| 19251 } |
| 19252 }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ |
| 19253 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19254 memcpy(&aBuf[nBuf-5], "ent", 3); |
| 19255 *pnBuf = nBuf - 5 + 3; |
| 19256 } |
| 19257 }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ |
| 19258 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
| 19259 memcpy(&aBuf[nBuf-3], "e", 1); |
| 19260 *pnBuf = nBuf - 3 + 1; |
| 19261 } |
| 19262 }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ |
| 19263 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19264 memcpy(&aBuf[nBuf-5], "ous", 3); |
| 19265 *pnBuf = nBuf - 5 + 3; |
| 19266 } |
| 19267 } |
| 19268 break; |
| 19269 |
| 19270 case 'o': |
| 19271 if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ |
| 19272 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
| 19273 memcpy(&aBuf[nBuf-7], "ize", 3); |
| 19274 *pnBuf = nBuf - 7 + 3; |
| 19275 } |
| 19276 }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ |
| 19277 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19278 memcpy(&aBuf[nBuf-5], "ate", 3); |
| 19279 *pnBuf = nBuf - 5 + 3; |
| 19280 } |
| 19281 }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ |
| 19282 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19283 memcpy(&aBuf[nBuf-4], "ate", 3); |
| 19284 *pnBuf = nBuf - 4 + 3; |
| 19285 } |
| 19286 } |
| 19287 break; |
| 19288 |
| 19289 case 's': |
| 19290 if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ |
| 19291 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19292 memcpy(&aBuf[nBuf-5], "al", 2); |
| 19293 *pnBuf = nBuf - 5 + 2; |
| 19294 } |
| 19295 }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ |
| 19296 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
| 19297 memcpy(&aBuf[nBuf-7], "ive", 3); |
| 19298 *pnBuf = nBuf - 7 + 3; |
| 19299 } |
| 19300 }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ |
| 19301 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
| 19302 memcpy(&aBuf[nBuf-7], "ful", 3); |
| 19303 *pnBuf = nBuf - 7 + 3; |
| 19304 } |
| 19305 }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ |
| 19306 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
| 19307 memcpy(&aBuf[nBuf-7], "ous", 3); |
| 19308 *pnBuf = nBuf - 7 + 3; |
| 19309 } |
| 19310 } |
| 19311 break; |
| 19312 |
| 19313 case 't': |
| 19314 if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ |
| 19315 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19316 memcpy(&aBuf[nBuf-5], "al", 2); |
| 19317 *pnBuf = nBuf - 5 + 2; |
| 19318 } |
| 19319 }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ |
| 19320 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19321 memcpy(&aBuf[nBuf-5], "ive", 3); |
| 19322 *pnBuf = nBuf - 5 + 3; |
| 19323 } |
| 19324 }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ |
| 19325 if( fts5Porter_MGt0(aBuf, nBuf-6) ){ |
| 19326 memcpy(&aBuf[nBuf-6], "ble", 3); |
| 19327 *pnBuf = nBuf - 6 + 3; |
| 19328 } |
| 19329 } |
| 19330 break; |
| 19331 |
| 19332 } |
| 19333 return ret; |
| 19334 } |
| 19335 |
| 19336 |
| 19337 static int fts5PorterStep3(char *aBuf, int *pnBuf){ |
| 19338 int ret = 0; |
| 19339 int nBuf = *pnBuf; |
| 19340 switch( aBuf[nBuf-2] ){ |
| 19341 |
| 19342 case 'a': |
| 19343 if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ |
| 19344 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19345 memcpy(&aBuf[nBuf-4], "ic", 2); |
| 19346 *pnBuf = nBuf - 4 + 2; |
| 19347 } |
| 19348 } |
| 19349 break; |
| 19350 |
| 19351 case 's': |
| 19352 if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ |
| 19353 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
| 19354 *pnBuf = nBuf - 4; |
| 19355 } |
| 19356 } |
| 19357 break; |
| 19358 |
| 19359 case 't': |
| 19360 if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ |
| 19361 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19362 memcpy(&aBuf[nBuf-5], "ic", 2); |
| 19363 *pnBuf = nBuf - 5 + 2; |
| 19364 } |
| 19365 }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ |
| 19366 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19367 memcpy(&aBuf[nBuf-5], "ic", 2); |
| 19368 *pnBuf = nBuf - 5 + 2; |
| 19369 } |
| 19370 } |
| 19371 break; |
| 19372 |
| 19373 case 'u': |
| 19374 if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ |
| 19375 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
| 19376 *pnBuf = nBuf - 3; |
| 19377 } |
| 19378 } |
| 19379 break; |
| 19380 |
| 19381 case 'v': |
| 19382 if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ |
| 19383 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19384 *pnBuf = nBuf - 5; |
| 19385 } |
| 19386 } |
| 19387 break; |
| 19388 |
| 19389 case 'z': |
| 19390 if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ |
| 19391 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
| 19392 memcpy(&aBuf[nBuf-5], "al", 2); |
| 19393 *pnBuf = nBuf - 5 + 2; |
| 19394 } |
| 19395 } |
| 19396 break; |
| 19397 |
| 19398 } |
| 19399 return ret; |
| 19400 } |
| 19401 |
| 19402 |
| 19403 static int fts5PorterStep1B(char *aBuf, int *pnBuf){ |
| 19404 int ret = 0; |
| 19405 int nBuf = *pnBuf; |
| 19406 switch( aBuf[nBuf-2] ){ |
| 19407 |
| 19408 case 'e': |
| 19409 if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ |
| 19410 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
| 19411 memcpy(&aBuf[nBuf-3], "ee", 2); |
| 19412 *pnBuf = nBuf - 3 + 2; |
| 19413 } |
| 19414 }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ |
| 19415 if( fts5Porter_Vowel(aBuf, nBuf-2) ){ |
| 19416 *pnBuf = nBuf - 2; |
| 19417 ret = 1; |
| 19418 } |
| 19419 } |
| 19420 break; |
| 19421 |
| 19422 case 'n': |
| 19423 if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ |
| 19424 if( fts5Porter_Vowel(aBuf, nBuf-3) ){ |
| 19425 *pnBuf = nBuf - 3; |
| 19426 ret = 1; |
| 19427 } |
| 19428 } |
| 19429 break; |
| 19430 |
| 19431 } |
| 19432 return ret; |
| 19433 } |
| 19434 |
| 19435 /* |
| 19436 ** GENERATED CODE ENDS HERE (mkportersteps.tcl) |
| 19437 *************************************************************************** |
| 19438 **************************************************************************/ |
| 19439 |
| 19440 static void fts5PorterStep1A(char *aBuf, int *pnBuf){ |
| 19441 int nBuf = *pnBuf; |
| 19442 if( aBuf[nBuf-1]=='s' ){ |
| 19443 if( aBuf[nBuf-2]=='e' ){ |
| 19444 if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') |
| 19445 || (nBuf>3 && aBuf[nBuf-3]=='i' ) |
| 19446 ){ |
| 19447 *pnBuf = nBuf-2; |
| 19448 }else{ |
| 19449 *pnBuf = nBuf-1; |
| 19450 } |
| 19451 } |
| 19452 else if( aBuf[nBuf-2]!='s' ){ |
| 19453 *pnBuf = nBuf-1; |
| 19454 } |
| 19455 } |
| 19456 } |
| 19457 |
| 19458 static int fts5PorterCb( |
| 19459 void *pCtx, |
| 19460 int tflags, |
| 19461 const char *pToken, |
| 19462 int nToken, |
| 19463 int iStart, |
| 19464 int iEnd |
| 19465 ){ |
| 19466 PorterContext *p = (PorterContext*)pCtx; |
| 19467 |
| 19468 char *aBuf; |
| 19469 int nBuf; |
| 19470 |
| 19471 if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; |
| 19472 aBuf = p->aBuf; |
| 19473 nBuf = nToken; |
| 19474 memcpy(aBuf, pToken, nBuf); |
| 19475 |
| 19476 /* Step 1. */ |
| 19477 fts5PorterStep1A(aBuf, &nBuf); |
| 19478 if( fts5PorterStep1B(aBuf, &nBuf) ){ |
| 19479 if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ |
| 19480 char c = aBuf[nBuf-1]; |
| 19481 if( fts5PorterIsVowel(c, 0)==0 |
| 19482 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] |
| 19483 ){ |
| 19484 nBuf--; |
| 19485 }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ |
| 19486 aBuf[nBuf++] = 'e'; |
| 19487 } |
| 19488 } |
| 19489 } |
| 19490 |
| 19491 /* Step 1C. */ |
| 19492 if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ |
| 19493 aBuf[nBuf-1] = 'i'; |
| 19494 } |
| 19495 |
| 19496 /* Steps 2 through 4. */ |
| 19497 fts5PorterStep2(aBuf, &nBuf); |
| 19498 fts5PorterStep3(aBuf, &nBuf); |
| 19499 fts5PorterStep4(aBuf, &nBuf); |
| 19500 |
| 19501 /* Step 5a. */ |
| 19502 assert( nBuf>0 ); |
| 19503 if( aBuf[nBuf-1]=='e' ){ |
| 19504 if( fts5Porter_MGt1(aBuf, nBuf-1) |
| 19505 || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) |
| 19506 ){ |
| 19507 nBuf--; |
| 19508 } |
| 19509 } |
| 19510 |
| 19511 /* Step 5b. */ |
| 19512 if( nBuf>1 && aBuf[nBuf-1]=='l' |
| 19513 && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) |
| 19514 ){ |
| 19515 nBuf--; |
| 19516 } |
| 19517 |
| 19518 return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); |
| 19519 |
| 19520 pass_through: |
| 19521 return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); |
| 19522 } |
| 19523 |
| 19524 /* |
| 19525 ** Tokenize using the porter tokenizer. |
| 19526 */ |
| 19527 static int fts5PorterTokenize( |
| 19528 Fts5Tokenizer *pTokenizer, |
| 19529 void *pCtx, |
| 19530 int flags, |
| 19531 const char *pText, int nText, |
| 19532 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
| 19533 ){ |
| 19534 PorterTokenizer *p = (PorterTokenizer*)pTokenizer; |
| 19535 PorterContext sCtx; |
| 19536 sCtx.xToken = xToken; |
| 19537 sCtx.pCtx = pCtx; |
| 19538 sCtx.aBuf = p->aBuf; |
| 19539 return p->tokenizer.xTokenize( |
| 19540 p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb |
| 19541 ); |
| 19542 } |
| 19543 |
| 19544 /* |
| 19545 ** Register all built-in tokenizers with FTS5. |
| 19546 */ |
| 19547 static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ |
| 19548 struct BuiltinTokenizer { |
| 19549 const char *zName; |
| 19550 fts5_tokenizer x; |
| 19551 } aBuiltin[] = { |
| 19552 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, |
| 19553 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, |
| 19554 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, |
| 19555 }; |
| 19556 |
| 19557 int rc = SQLITE_OK; /* Return code */ |
| 19558 int i; /* To iterate through builtin functions */ |
| 19559 |
| 19560 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ |
| 19561 rc = pApi->xCreateTokenizer(pApi, |
| 19562 aBuiltin[i].zName, |
| 19563 (void*)pApi, |
| 19564 &aBuiltin[i].x, |
| 19565 0 |
| 19566 ); |
| 19567 } |
| 19568 |
| 19569 return rc; |
| 19570 } |
| 19571 |
| 19572 |
| 19573 |
| 19574 /* |
| 19575 ** 2012 May 25 |
| 19576 ** |
| 19577 ** The author disclaims copyright to this source code. In place of |
| 19578 ** a legal notice, here is a blessing: |
| 19579 ** |
| 19580 ** May you do good and not evil. |
| 19581 ** May you find forgiveness for yourself and forgive others. |
| 19582 ** May you share freely, never taking more than you give. |
| 19583 ** |
| 19584 ****************************************************************************** |
| 19585 */ |
| 19586 |
| 19587 /* |
| 19588 ** DO NOT EDIT THIS MACHINE GENERATED FILE. |
| 19589 */ |
| 19590 |
| 19591 |
| 19592 /* #include <assert.h> */ |
| 19593 |
| 19594 /* |
| 19595 ** Return true if the argument corresponds to a unicode codepoint |
| 19596 ** classified as either a letter or a number. Otherwise false. |
| 19597 ** |
| 19598 ** The results are undefined if the value passed to this function |
| 19599 ** is less than zero. |
| 19600 */ |
| 19601 static int sqlite3Fts5UnicodeIsalnum(int c){ |
| 19602 /* Each unsigned integer in the following array corresponds to a contiguous |
| 19603 ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 19604 ** codepoints for which this function should return 0). |
| 19605 ** |
| 19606 ** The most significant 22 bits in each 32-bit value contain the first |
| 19607 ** codepoint in the range. The least significant 10 bits are used to store |
| 19608 ** the size of the range (always at least 1). In other words, the value |
| 19609 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
| 19610 ** C. It is not possible to represent a range larger than 1023 codepoints |
| 19611 ** using this format. |
| 19612 */ |
| 19613 static const unsigned int aEntry[] = { |
| 19614 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 19615 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| 19616 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, |
| 19617 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, |
| 19618 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, |
| 19619 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, |
| 19620 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, |
| 19621 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, |
| 19622 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, |
| 19623 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, |
| 19624 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, |
| 19625 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, |
| 19626 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, |
| 19627 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, |
| 19628 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, |
| 19629 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, |
| 19630 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, |
| 19631 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, |
| 19632 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, |
| 19633 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, |
| 19634 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, |
| 19635 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, |
| 19636 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, |
| 19637 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, |
| 19638 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, |
| 19639 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, |
| 19640 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, |
| 19641 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, |
| 19642 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, |
| 19643 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, |
| 19644 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, |
| 19645 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, |
| 19646 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, |
| 19647 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, |
| 19648 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, |
| 19649 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, |
| 19650 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, |
| 19651 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, |
| 19652 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, |
| 19653 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, |
| 19654 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, |
| 19655 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, |
| 19656 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, |
| 19657 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, |
| 19658 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, |
| 19659 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, |
| 19660 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, |
| 19661 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, |
| 19662 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, |
| 19663 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, |
| 19664 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, |
| 19665 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, |
| 19666 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, |
| 19667 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, |
| 19668 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, |
| 19669 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, |
| 19670 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, |
| 19671 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, |
| 19672 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, |
| 19673 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, |
| 19674 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, |
| 19675 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, |
| 19676 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, |
| 19677 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, |
| 19678 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, |
| 19679 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, |
| 19680 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, |
| 19681 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, |
| 19682 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, |
| 19683 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, |
| 19684 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, |
| 19685 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, |
| 19686 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, |
| 19687 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, |
| 19688 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, |
| 19689 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, |
| 19690 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, |
| 19691 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, |
| 19692 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, |
| 19693 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, |
| 19694 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, |
| 19695 0x380400F0, |
| 19696 }; |
| 19697 static const unsigned int aAscii[4] = { |
| 19698 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, |
| 19699 }; |
| 19700 |
| 19701 if( (unsigned int)c<128 ){ |
| 19702 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); |
| 19703 }else if( (unsigned int)c<(1<<22) ){ |
| 19704 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; |
| 19705 int iRes = 0; |
| 19706 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 19707 int iLo = 0; |
| 19708 while( iHi>=iLo ){ |
| 19709 int iTest = (iHi + iLo) / 2; |
| 19710 if( key >= aEntry[iTest] ){ |
| 19711 iRes = iTest; |
| 19712 iLo = iTest+1; |
| 19713 }else{ |
| 19714 iHi = iTest-1; |
| 19715 } |
| 19716 } |
| 19717 assert( aEntry[0]<key ); |
| 19718 assert( key>=aEntry[iRes] ); |
| 19719 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); |
| 19720 } |
| 19721 return 1; |
| 19722 } |
| 19723 |
| 19724 |
| 19725 /* |
| 19726 ** If the argument is a codepoint corresponding to a lowercase letter |
| 19727 ** in the ASCII range with a diacritic added, return the codepoint |
| 19728 ** of the ASCII letter only. For example, if passed 235 - "LATIN |
| 19729 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 19730 ** E"). The resuls of passing a codepoint that corresponds to an |
| 19731 ** uppercase letter are undefined. |
| 19732 */ |
| 19733 static int fts5_remove_diacritic(int c){ |
| 19734 unsigned short aDia[] = { |
| 19735 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
| 19736 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
| 19737 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
| 19738 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
| 19739 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
| 19740 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
| 19741 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
| 19742 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
| 19743 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
| 19744 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, |
| 19745 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, |
| 19746 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, |
| 19747 62924, 63050, 63082, 63274, 63390, |
| 19748 }; |
| 19749 char aChar[] = { |
| 19750 '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', |
| 19751 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', |
| 19752 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', |
| 19753 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', |
| 19754 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', |
| 19755 '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', |
| 19756 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', |
| 19757 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', |
| 19758 'e', 'i', 'o', 'u', 'y', |
| 19759 }; |
| 19760 |
| 19761 unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 19762 int iRes = 0; |
| 19763 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| 19764 int iLo = 0; |
| 19765 while( iHi>=iLo ){ |
| 19766 int iTest = (iHi + iLo) / 2; |
| 19767 if( key >= aDia[iTest] ){ |
| 19768 iRes = iTest; |
| 19769 iLo = iTest+1; |
| 19770 }else{ |
| 19771 iHi = iTest-1; |
| 19772 } |
| 19773 } |
| 19774 assert( key>=aDia[iRes] ); |
| 19775 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); |
| 19776 } |
| 19777 |
| 19778 |
| 19779 /* |
| 19780 ** Return true if the argument interpreted as a unicode codepoint |
| 19781 ** is a diacritical modifier character. |
| 19782 */ |
| 19783 static int sqlite3Fts5UnicodeIsdiacritic(int c){ |
| 19784 unsigned int mask0 = 0x08029FDF; |
| 19785 unsigned int mask1 = 0x000361F8; |
| 19786 if( c<768 || c>817 ) return 0; |
| 19787 return (c < 768+32) ? |
| 19788 (mask0 & (1 << (c-768))) : |
| 19789 (mask1 & (1 << (c-768-32))); |
| 19790 } |
| 19791 |
| 19792 |
| 19793 /* |
| 19794 ** Interpret the argument as a unicode codepoint. If the codepoint |
| 19795 ** is an upper case character that has a lower case equivalent, |
| 19796 ** return the codepoint corresponding to the lower case version. |
| 19797 ** Otherwise, return a copy of the argument. |
| 19798 ** |
| 19799 ** The results are undefined if the value passed to this function |
| 19800 ** is less than zero. |
| 19801 */ |
| 19802 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ |
| 19803 /* Each entry in the following array defines a rule for folding a range |
| 19804 ** of codepoints to lower case. The rule applies to a range of nRange |
| 19805 ** codepoints starting at codepoint iCode. |
| 19806 ** |
| 19807 ** If the least significant bit in flags is clear, then the rule applies |
| 19808 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
| 19809 ** need to be folded). Or, if it is set, then the rule only applies to |
| 19810 ** every second codepoint in the range, starting with codepoint C. |
| 19811 ** |
| 19812 ** The 7 most significant bits in flags are an index into the aiOff[] |
| 19813 ** array. If a specific codepoint C does require folding, then its lower |
| 19814 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). |
| 19815 ** |
| 19816 ** The contents of this array are generated by parsing the CaseFolding.txt |
| 19817 ** file distributed as part of the "Unicode Character Database". See |
| 19818 ** http://www.unicode.org for details. |
| 19819 */ |
| 19820 static const struct TableEntry { |
| 19821 unsigned short iCode; |
| 19822 unsigned char flags; |
| 19823 unsigned char nRange; |
| 19824 } aEntry[] = { |
| 19825 {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, |
| 19826 {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, |
| 19827 {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, |
| 19828 {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, |
| 19829 {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, |
| 19830 {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, |
| 19831 {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, |
| 19832 {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, |
| 19833 {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, |
| 19834 {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, |
| 19835 {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, |
| 19836 {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, |
| 19837 {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, |
| 19838 {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, |
| 19839 {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, |
| 19840 {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, |
| 19841 {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, |
| 19842 {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, |
| 19843 {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, |
| 19844 {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, |
| 19845 {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, |
| 19846 {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, |
| 19847 {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, |
| 19848 {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, |
| 19849 {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, |
| 19850 {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, |
| 19851 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, |
| 19852 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, |
| 19853 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, |
| 19854 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, |
| 19855 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, |
| 19856 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, |
| 19857 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, |
| 19858 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, |
| 19859 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, |
| 19860 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, |
| 19861 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, |
| 19862 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, |
| 19863 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, |
| 19864 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, |
| 19865 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, |
| 19866 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, |
| 19867 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, |
| 19868 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, |
| 19869 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, |
| 19870 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, |
| 19871 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, |
| 19872 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, |
| 19873 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, |
| 19874 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 19875 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 19876 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 19877 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 19878 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 19879 {65313, 14, 26}, |
| 19880 }; |
| 19881 static const unsigned short aiOff[] = { |
| 19882 1, 2, 8, 15, 16, 26, 28, 32, |
| 19883 37, 38, 40, 48, 63, 64, 69, 71, |
| 19884 79, 80, 116, 202, 203, 205, 206, 207, |
| 19885 209, 210, 211, 213, 214, 217, 218, 219, |
| 19886 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
| 19887 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
| 19888 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
| 19889 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
| 19890 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
| 19891 65514, 65521, 65527, 65528, 65529, |
| 19892 }; |
| 19893 |
| 19894 int ret = c; |
| 19895 |
| 19896 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
| 19897 |
| 19898 if( c<128 ){ |
| 19899 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
| 19900 }else if( c<65536 ){ |
| 19901 const struct TableEntry *p; |
| 19902 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 19903 int iLo = 0; |
| 19904 int iRes = -1; |
| 19905 |
| 19906 assert( c>aEntry[0].iCode ); |
| 19907 while( iHi>=iLo ){ |
| 19908 int iTest = (iHi + iLo) / 2; |
| 19909 int cmp = (c - aEntry[iTest].iCode); |
| 19910 if( cmp>=0 ){ |
| 19911 iRes = iTest; |
| 19912 iLo = iTest+1; |
| 19913 }else{ |
| 19914 iHi = iTest-1; |
| 19915 } |
| 19916 } |
| 19917 |
| 19918 assert( iRes>=0 && c>=aEntry[iRes].iCode ); |
| 19919 p = &aEntry[iRes]; |
| 19920 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
| 19921 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
| 19922 assert( ret>0 ); |
| 19923 } |
| 19924 |
| 19925 if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); |
| 19926 } |
| 19927 |
| 19928 else if( c>=66560 && c<66600 ){ |
| 19929 ret = c + 40; |
| 19930 } |
| 19931 |
| 19932 return ret; |
| 19933 } |
| 19934 |
| 19935 /* |
| 19936 ** 2015 May 30 |
| 19937 ** |
| 19938 ** The author disclaims copyright to this source code. In place of |
| 19939 ** a legal notice, here is a blessing: |
| 19940 ** |
| 19941 ** May you do good and not evil. |
| 19942 ** May you find forgiveness for yourself and forgive others. |
| 19943 ** May you share freely, never taking more than you give. |
| 19944 ** |
| 19945 ****************************************************************************** |
| 19946 ** |
| 19947 ** Routines for varint serialization and deserialization. |
| 19948 */ |
| 19949 |
| 19950 |
| 19951 /* #include "fts5Int.h" */ |
| 19952 |
| 19953 /* |
| 19954 ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. |
| 19955 ** Except, this version does handle the single byte case that the core |
| 19956 ** version depends on being handled before its function is called. |
| 19957 */ |
| 19958 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ |
| 19959 u32 a,b; |
| 19960 |
| 19961 /* The 1-byte case. Overwhelmingly the most common. */ |
| 19962 a = *p; |
| 19963 /* a: p0 (unmasked) */ |
| 19964 if (!(a&0x80)) |
| 19965 { |
| 19966 /* Values between 0 and 127 */ |
| 19967 *v = a; |
| 19968 return 1; |
| 19969 } |
| 19970 |
| 19971 /* The 2-byte case */ |
| 19972 p++; |
| 19973 b = *p; |
| 19974 /* b: p1 (unmasked) */ |
| 19975 if (!(b&0x80)) |
| 19976 { |
| 19977 /* Values between 128 and 16383 */ |
| 19978 a &= 0x7f; |
| 19979 a = a<<7; |
| 19980 *v = a | b; |
| 19981 return 2; |
| 19982 } |
| 19983 |
| 19984 /* The 3-byte case */ |
| 19985 p++; |
| 19986 a = a<<14; |
| 19987 a |= *p; |
| 19988 /* a: p0<<14 | p2 (unmasked) */ |
| 19989 if (!(a&0x80)) |
| 19990 { |
| 19991 /* Values between 16384 and 2097151 */ |
| 19992 a &= (0x7f<<14)|(0x7f); |
| 19993 b &= 0x7f; |
| 19994 b = b<<7; |
| 19995 *v = a | b; |
| 19996 return 3; |
| 19997 } |
| 19998 |
| 19999 /* A 32-bit varint is used to store size information in btrees. |
| 20000 ** Objects are rarely larger than 2MiB limit of a 3-byte varint. |
| 20001 ** A 3-byte varint is sufficient, for example, to record the size |
| 20002 ** of a 1048569-byte BLOB or string. |
| 20003 ** |
| 20004 ** We only unroll the first 1-, 2-, and 3- byte cases. The very |
| 20005 ** rare larger cases can be handled by the slower 64-bit varint |
| 20006 ** routine. |
| 20007 */ |
| 20008 { |
| 20009 u64 v64; |
| 20010 u8 n; |
| 20011 p -= 2; |
| 20012 n = sqlite3Fts5GetVarint(p, &v64); |
| 20013 *v = (u32)v64; |
| 20014 assert( n>3 && n<=9 ); |
| 20015 return n; |
| 20016 } |
| 20017 } |
| 20018 |
| 20019 |
| 20020 /* |
| 20021 ** Bitmasks used by sqlite3GetVarint(). These precomputed constants |
| 20022 ** are defined here rather than simply putting the constant expressions |
| 20023 ** inline in order to work around bugs in the RVT compiler. |
| 20024 ** |
| 20025 ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f |
| 20026 ** |
| 20027 ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 |
| 20028 */ |
| 20029 #define SLOT_2_0 0x001fc07f |
| 20030 #define SLOT_4_2_0 0xf01fc07f |
| 20031 |
| 20032 /* |
| 20033 ** Read a 64-bit variable-length integer from memory starting at p[0]. |
| 20034 ** Return the number of bytes read. The value is stored in *v. |
| 20035 */ |
| 20036 static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ |
| 20037 u32 a,b,s; |
| 20038 |
| 20039 a = *p; |
| 20040 /* a: p0 (unmasked) */ |
| 20041 if (!(a&0x80)) |
| 20042 { |
| 20043 *v = a; |
| 20044 return 1; |
| 20045 } |
| 20046 |
| 20047 p++; |
| 20048 b = *p; |
| 20049 /* b: p1 (unmasked) */ |
| 20050 if (!(b&0x80)) |
| 20051 { |
| 20052 a &= 0x7f; |
| 20053 a = a<<7; |
| 20054 a |= b; |
| 20055 *v = a; |
| 20056 return 2; |
| 20057 } |
| 20058 |
| 20059 /* Verify that constants are precomputed correctly */ |
| 20060 assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); |
| 20061 assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); |
| 20062 |
| 20063 p++; |
| 20064 a = a<<14; |
| 20065 a |= *p; |
| 20066 /* a: p0<<14 | p2 (unmasked) */ |
| 20067 if (!(a&0x80)) |
| 20068 { |
| 20069 a &= SLOT_2_0; |
| 20070 b &= 0x7f; |
| 20071 b = b<<7; |
| 20072 a |= b; |
| 20073 *v = a; |
| 20074 return 3; |
| 20075 } |
| 20076 |
| 20077 /* CSE1 from below */ |
| 20078 a &= SLOT_2_0; |
| 20079 p++; |
| 20080 b = b<<14; |
| 20081 b |= *p; |
| 20082 /* b: p1<<14 | p3 (unmasked) */ |
| 20083 if (!(b&0x80)) |
| 20084 { |
| 20085 b &= SLOT_2_0; |
| 20086 /* moved CSE1 up */ |
| 20087 /* a &= (0x7f<<14)|(0x7f); */ |
| 20088 a = a<<7; |
| 20089 a |= b; |
| 20090 *v = a; |
| 20091 return 4; |
| 20092 } |
| 20093 |
| 20094 /* a: p0<<14 | p2 (masked) */ |
| 20095 /* b: p1<<14 | p3 (unmasked) */ |
| 20096 /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
| 20097 /* moved CSE1 up */ |
| 20098 /* a &= (0x7f<<14)|(0x7f); */ |
| 20099 b &= SLOT_2_0; |
| 20100 s = a; |
| 20101 /* s: p0<<14 | p2 (masked) */ |
| 20102 |
| 20103 p++; |
| 20104 a = a<<14; |
| 20105 a |= *p; |
| 20106 /* a: p0<<28 | p2<<14 | p4 (unmasked) */ |
| 20107 if (!(a&0x80)) |
| 20108 { |
| 20109 /* we can skip these cause they were (effectively) done above in calc'ing s
*/ |
| 20110 /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ |
| 20111 /* b &= (0x7f<<14)|(0x7f); */ |
| 20112 b = b<<7; |
| 20113 a |= b; |
| 20114 s = s>>18; |
| 20115 *v = ((u64)s)<<32 | a; |
| 20116 return 5; |
| 20117 } |
| 20118 |
| 20119 /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
| 20120 s = s<<7; |
| 20121 s |= b; |
| 20122 /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
| 20123 |
| 20124 p++; |
| 20125 b = b<<14; |
| 20126 b |= *p; |
| 20127 /* b: p1<<28 | p3<<14 | p5 (unmasked) */ |
| 20128 if (!(b&0x80)) |
| 20129 { |
| 20130 /* we can skip this cause it was (effectively) done above in calc'ing s */ |
| 20131 /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ |
| 20132 a &= SLOT_2_0; |
| 20133 a = a<<7; |
| 20134 a |= b; |
| 20135 s = s>>18; |
| 20136 *v = ((u64)s)<<32 | a; |
| 20137 return 6; |
| 20138 } |
| 20139 |
| 20140 p++; |
| 20141 a = a<<14; |
| 20142 a |= *p; |
| 20143 /* a: p2<<28 | p4<<14 | p6 (unmasked) */ |
| 20144 if (!(a&0x80)) |
| 20145 { |
| 20146 a &= SLOT_4_2_0; |
| 20147 b &= SLOT_2_0; |
| 20148 b = b<<7; |
| 20149 a |= b; |
| 20150 s = s>>11; |
| 20151 *v = ((u64)s)<<32 | a; |
| 20152 return 7; |
| 20153 } |
| 20154 |
| 20155 /* CSE2 from below */ |
| 20156 a &= SLOT_2_0; |
| 20157 p++; |
| 20158 b = b<<14; |
| 20159 b |= *p; |
| 20160 /* b: p3<<28 | p5<<14 | p7 (unmasked) */ |
| 20161 if (!(b&0x80)) |
| 20162 { |
| 20163 b &= SLOT_4_2_0; |
| 20164 /* moved CSE2 up */ |
| 20165 /* a &= (0x7f<<14)|(0x7f); */ |
| 20166 a = a<<7; |
| 20167 a |= b; |
| 20168 s = s>>4; |
| 20169 *v = ((u64)s)<<32 | a; |
| 20170 return 8; |
| 20171 } |
| 20172 |
| 20173 p++; |
| 20174 a = a<<15; |
| 20175 a |= *p; |
| 20176 /* a: p4<<29 | p6<<15 | p8 (unmasked) */ |
| 20177 |
| 20178 /* moved CSE2 up */ |
| 20179 /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ |
| 20180 b &= SLOT_2_0; |
| 20181 b = b<<8; |
| 20182 a |= b; |
| 20183 |
| 20184 s = s<<4; |
| 20185 b = p[-4]; |
| 20186 b &= 0x7f; |
| 20187 b = b>>3; |
| 20188 s |= b; |
| 20189 |
| 20190 *v = ((u64)s)<<32 | a; |
| 20191 |
| 20192 return 9; |
| 20193 } |
| 20194 |
| 20195 /* |
| 20196 ** The variable-length integer encoding is as follows: |
| 20197 ** |
| 20198 ** KEY: |
| 20199 ** A = 0xxxxxxx 7 bits of data and one flag bit |
| 20200 ** B = 1xxxxxxx 7 bits of data and one flag bit |
| 20201 ** C = xxxxxxxx 8 bits of data |
| 20202 ** |
| 20203 ** 7 bits - A |
| 20204 ** 14 bits - BA |
| 20205 ** 21 bits - BBA |
| 20206 ** 28 bits - BBBA |
| 20207 ** 35 bits - BBBBA |
| 20208 ** 42 bits - BBBBBA |
| 20209 ** 49 bits - BBBBBBA |
| 20210 ** 56 bits - BBBBBBBA |
| 20211 ** 64 bits - BBBBBBBBC |
| 20212 */ |
| 20213 |
| 20214 #ifdef SQLITE_NOINLINE |
| 20215 # define FTS5_NOINLINE SQLITE_NOINLINE |
| 20216 #else |
| 20217 # define FTS5_NOINLINE |
| 20218 #endif |
| 20219 |
| 20220 /* |
| 20221 ** Write a 64-bit variable-length integer to memory starting at p[0]. |
| 20222 ** The length of data write will be between 1 and 9 bytes. The number |
| 20223 ** of bytes written is returned. |
| 20224 ** |
| 20225 ** A variable-length integer consists of the lower 7 bits of each byte |
| 20226 ** for all bytes that have the 8th bit set and one byte with the 8th |
| 20227 ** bit clear. Except, if we get to the 9th byte, it stores the full |
| 20228 ** 8 bits and is the last byte. |
| 20229 */ |
| 20230 static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ |
| 20231 int i, j, n; |
| 20232 u8 buf[10]; |
| 20233 if( v & (((u64)0xff000000)<<32) ){ |
| 20234 p[8] = (u8)v; |
| 20235 v >>= 8; |
| 20236 for(i=7; i>=0; i--){ |
| 20237 p[i] = (u8)((v & 0x7f) | 0x80); |
| 20238 v >>= 7; |
| 20239 } |
| 20240 return 9; |
| 20241 } |
| 20242 n = 0; |
| 20243 do{ |
| 20244 buf[n++] = (u8)((v & 0x7f) | 0x80); |
| 20245 v >>= 7; |
| 20246 }while( v!=0 ); |
| 20247 buf[0] &= 0x7f; |
| 20248 assert( n<=9 ); |
| 20249 for(i=0, j=n-1; j>=0; j--, i++){ |
| 20250 p[i] = buf[j]; |
| 20251 } |
| 20252 return n; |
| 20253 } |
| 20254 |
| 20255 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ |
| 20256 if( v<=0x7f ){ |
| 20257 p[0] = v&0x7f; |
| 20258 return 1; |
| 20259 } |
| 20260 if( v<=0x3fff ){ |
| 20261 p[0] = ((v>>7)&0x7f)|0x80; |
| 20262 p[1] = v&0x7f; |
| 20263 return 2; |
| 20264 } |
| 20265 return fts5PutVarint64(p,v); |
| 20266 } |
| 20267 |
| 20268 |
| 20269 static int sqlite3Fts5GetVarintLen(u32 iVal){ |
| 20270 #if 0 |
| 20271 if( iVal<(1 << 7 ) ) return 1; |
| 20272 #endif |
| 20273 assert( iVal>=(1 << 7) ); |
| 20274 if( iVal<(1 << 14) ) return 2; |
| 20275 if( iVal<(1 << 21) ) return 3; |
| 20276 if( iVal<(1 << 28) ) return 4; |
| 20277 return 5; |
| 20278 } |
| 20279 |
| 20280 |
| 20281 /* |
| 20282 ** 2015 May 08 |
| 20283 ** |
| 20284 ** The author disclaims copyright to this source code. In place of |
| 20285 ** a legal notice, here is a blessing: |
| 20286 ** |
| 20287 ** May you do good and not evil. |
| 20288 ** May you find forgiveness for yourself and forgive others. |
| 20289 ** May you share freely, never taking more than you give. |
| 20290 ** |
| 20291 ****************************************************************************** |
| 20292 ** |
| 20293 ** This is an SQLite virtual table module implementing direct access to an |
| 20294 ** existing FTS5 index. The module may create several different types of |
| 20295 ** tables: |
| 20296 ** |
| 20297 ** col: |
| 20298 ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); |
| 20299 ** |
| 20300 ** One row for each term/column combination. The value of $doc is set to |
| 20301 ** the number of fts5 rows that contain at least one instance of term |
| 20302 ** $term within column $col. Field $cnt is set to the total number of |
| 20303 ** instances of term $term in column $col (in any row of the fts5 table). |
| 20304 ** |
| 20305 ** row: |
| 20306 ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); |
| 20307 ** |
| 20308 ** One row for each term in the database. The value of $doc is set to |
| 20309 ** the number of fts5 rows that contain at least one instance of term |
| 20310 ** $term. Field $cnt is set to the total number of instances of term |
| 20311 ** $term in the database. |
| 20312 */ |
| 20313 |
| 20314 |
| 20315 /* #include "fts5Int.h" */ |
| 20316 |
| 20317 |
| 20318 typedef struct Fts5VocabTable Fts5VocabTable; |
| 20319 typedef struct Fts5VocabCursor Fts5VocabCursor; |
| 20320 |
| 20321 struct Fts5VocabTable { |
| 20322 sqlite3_vtab base; |
| 20323 char *zFts5Tbl; /* Name of fts5 table */ |
| 20324 char *zFts5Db; /* Db containing fts5 table */ |
| 20325 sqlite3 *db; /* Database handle */ |
| 20326 Fts5Global *pGlobal; /* FTS5 global object for this database */ |
| 20327 int eType; /* FTS5_VOCAB_COL or ROW */ |
| 20328 }; |
| 20329 |
| 20330 struct Fts5VocabCursor { |
| 20331 sqlite3_vtab_cursor base; |
| 20332 sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ |
| 20333 Fts5Index *pIndex; /* Associated FTS5 index */ |
| 20334 |
| 20335 int bEof; /* True if this cursor is at EOF */ |
| 20336 Fts5IndexIter *pIter; /* Term/rowid iterator object */ |
| 20337 |
| 20338 int nLeTerm; /* Size of zLeTerm in bytes */ |
| 20339 char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ |
| 20340 |
| 20341 /* These are used by 'col' tables only */ |
| 20342 Fts5Config *pConfig; /* Fts5 table configuration */ |
| 20343 int iCol; |
| 20344 i64 *aCnt; |
| 20345 i64 *aDoc; |
| 20346 |
| 20347 /* Output values used by 'row' and 'col' tables */ |
| 20348 i64 rowid; /* This table's current rowid value */ |
| 20349 Fts5Buffer term; /* Current value of 'term' column */ |
| 20350 }; |
| 20351 |
| 20352 #define FTS5_VOCAB_COL 0 |
| 20353 #define FTS5_VOCAB_ROW 1 |
| 20354 |
| 20355 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" |
| 20356 #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" |
| 20357 |
| 20358 /* |
| 20359 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. |
| 20360 */ |
| 20361 #define FTS5_VOCAB_TERM_EQ 0x01 |
| 20362 #define FTS5_VOCAB_TERM_GE 0x02 |
| 20363 #define FTS5_VOCAB_TERM_LE 0x04 |
| 20364 |
| 20365 |
| 20366 /* |
| 20367 ** Translate a string containing an fts5vocab table type to an |
| 20368 ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output |
| 20369 ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message |
| 20370 ** and return SQLITE_ERROR. |
| 20371 */ |
| 20372 static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ |
| 20373 int rc = SQLITE_OK; |
| 20374 char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); |
| 20375 if( rc==SQLITE_OK ){ |
| 20376 sqlite3Fts5Dequote(zCopy); |
| 20377 if( sqlite3_stricmp(zCopy, "col")==0 ){ |
| 20378 *peType = FTS5_VOCAB_COL; |
| 20379 }else |
| 20380 |
| 20381 if( sqlite3_stricmp(zCopy, "row")==0 ){ |
| 20382 *peType = FTS5_VOCAB_ROW; |
| 20383 }else |
| 20384 { |
| 20385 *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); |
| 20386 rc = SQLITE_ERROR; |
| 20387 } |
| 20388 sqlite3_free(zCopy); |
| 20389 } |
| 20390 |
| 20391 return rc; |
| 20392 } |
| 20393 |
| 20394 |
| 20395 /* |
| 20396 ** The xDisconnect() virtual table method. |
| 20397 */ |
| 20398 static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ |
| 20399 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; |
| 20400 sqlite3_free(pTab); |
| 20401 return SQLITE_OK; |
| 20402 } |
| 20403 |
| 20404 /* |
| 20405 ** The xDestroy() virtual table method. |
| 20406 */ |
| 20407 static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ |
| 20408 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; |
| 20409 sqlite3_free(pTab); |
| 20410 return SQLITE_OK; |
| 20411 } |
| 20412 |
| 20413 /* |
| 20414 ** This function is the implementation of both the xConnect and xCreate |
| 20415 ** methods of the FTS3 virtual table. |
| 20416 ** |
| 20417 ** The argv[] array contains the following: |
| 20418 ** |
| 20419 ** argv[0] -> module name ("fts5vocab") |
| 20420 ** argv[1] -> database name |
| 20421 ** argv[2] -> table name |
| 20422 ** |
| 20423 ** then: |
| 20424 ** |
| 20425 ** argv[3] -> name of fts5 table |
| 20426 ** argv[4] -> type of fts5vocab table |
| 20427 ** |
| 20428 ** or, for tables in the TEMP schema only. |
| 20429 ** |
| 20430 ** argv[3] -> name of fts5 tables database |
| 20431 ** argv[4] -> name of fts5 table |
| 20432 ** argv[5] -> type of fts5vocab table |
| 20433 */ |
| 20434 static int fts5VocabInitVtab( |
| 20435 sqlite3 *db, /* The SQLite database connection */ |
| 20436 void *pAux, /* Pointer to Fts5Global object */ |
| 20437 int argc, /* Number of elements in argv array */ |
| 20438 const char * const *argv, /* xCreate/xConnect argument array */ |
| 20439 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
| 20440 char **pzErr /* Write any error message here */ |
| 20441 ){ |
| 20442 const char *azSchema[] = { |
| 20443 "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", |
| 20444 "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" |
| 20445 }; |
| 20446 |
| 20447 Fts5VocabTable *pRet = 0; |
| 20448 int rc = SQLITE_OK; /* Return code */ |
| 20449 int bDb; |
| 20450 |
| 20451 bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); |
| 20452 |
| 20453 if( argc!=5 && bDb==0 ){ |
| 20454 *pzErr = sqlite3_mprintf("wrong number of vtable arguments"); |
| 20455 rc = SQLITE_ERROR; |
| 20456 }else{ |
| 20457 int nByte; /* Bytes of space to allocate */ |
| 20458 const char *zDb = bDb ? argv[3] : argv[1]; |
| 20459 const char *zTab = bDb ? argv[4] : argv[3]; |
| 20460 const char *zType = bDb ? argv[5] : argv[4]; |
| 20461 int nDb = (int)strlen(zDb)+1; |
| 20462 int nTab = (int)strlen(zTab)+1; |
| 20463 int eType = 0; |
| 20464 |
| 20465 rc = fts5VocabTableType(zType, pzErr, &eType); |
| 20466 if( rc==SQLITE_OK ){ |
| 20467 assert( eType>=0 && eType<ArraySize(azSchema) ); |
| 20468 rc = sqlite3_declare_vtab(db, azSchema[eType]); |
| 20469 } |
| 20470 |
| 20471 nByte = sizeof(Fts5VocabTable) + nDb + nTab; |
| 20472 pRet = sqlite3Fts5MallocZero(&rc, nByte); |
| 20473 if( pRet ){ |
| 20474 pRet->pGlobal = (Fts5Global*)pAux; |
| 20475 pRet->eType = eType; |
| 20476 pRet->db = db; |
| 20477 pRet->zFts5Tbl = (char*)&pRet[1]; |
| 20478 pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; |
| 20479 memcpy(pRet->zFts5Tbl, zTab, nTab); |
| 20480 memcpy(pRet->zFts5Db, zDb, nDb); |
| 20481 sqlite3Fts5Dequote(pRet->zFts5Tbl); |
| 20482 sqlite3Fts5Dequote(pRet->zFts5Db); |
| 20483 } |
| 20484 } |
| 20485 |
| 20486 *ppVTab = (sqlite3_vtab*)pRet; |
| 20487 return rc; |
| 20488 } |
| 20489 |
| 20490 |
| 20491 /* |
| 20492 ** The xConnect() and xCreate() methods for the virtual table. All the |
| 20493 ** work is done in function fts5VocabInitVtab(). |
| 20494 */ |
| 20495 static int fts5VocabConnectMethod( |
| 20496 sqlite3 *db, /* Database connection */ |
| 20497 void *pAux, /* Pointer to tokenizer hash table */ |
| 20498 int argc, /* Number of elements in argv array */ |
| 20499 const char * const *argv, /* xCreate/xConnect argument array */ |
| 20500 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 20501 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 20502 ){ |
| 20503 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); |
| 20504 } |
| 20505 static int fts5VocabCreateMethod( |
| 20506 sqlite3 *db, /* Database connection */ |
| 20507 void *pAux, /* Pointer to tokenizer hash table */ |
| 20508 int argc, /* Number of elements in argv array */ |
| 20509 const char * const *argv, /* xCreate/xConnect argument array */ |
| 20510 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 20511 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 20512 ){ |
| 20513 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); |
| 20514 } |
| 20515 |
| 20516 /* |
| 20517 ** Implementation of the xBestIndex method. |
| 20518 */ |
| 20519 static int fts5VocabBestIndexMethod( |
| 20520 sqlite3_vtab *pUnused, |
| 20521 sqlite3_index_info *pInfo |
| 20522 ){ |
| 20523 int i; |
| 20524 int iTermEq = -1; |
| 20525 int iTermGe = -1; |
| 20526 int iTermLe = -1; |
| 20527 int idxNum = 0; |
| 20528 int nArg = 0; |
| 20529 |
| 20530 UNUSED_PARAM(pUnused); |
| 20531 |
| 20532 for(i=0; i<pInfo->nConstraint; i++){ |
| 20533 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; |
| 20534 if( p->usable==0 ) continue; |
| 20535 if( p->iColumn==0 ){ /* term column */ |
| 20536 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i; |
| 20537 if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i; |
| 20538 if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i; |
| 20539 if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i; |
| 20540 if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i; |
| 20541 } |
| 20542 } |
| 20543 |
| 20544 if( iTermEq>=0 ){ |
| 20545 idxNum |= FTS5_VOCAB_TERM_EQ; |
| 20546 pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; |
| 20547 pInfo->estimatedCost = 100; |
| 20548 }else{ |
| 20549 pInfo->estimatedCost = 1000000; |
| 20550 if( iTermGe>=0 ){ |
| 20551 idxNum |= FTS5_VOCAB_TERM_GE; |
| 20552 pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; |
| 20553 pInfo->estimatedCost = pInfo->estimatedCost / 2; |
| 20554 } |
| 20555 if( iTermLe>=0 ){ |
| 20556 idxNum |= FTS5_VOCAB_TERM_LE; |
| 20557 pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; |
| 20558 pInfo->estimatedCost = pInfo->estimatedCost / 2; |
| 20559 } |
| 20560 } |
| 20561 |
| 20562 /* This virtual table always delivers results in ascending order of |
| 20563 ** the "term" column (column 0). So if the user has requested this |
| 20564 ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the |
| 20565 ** sqlite3_index_info.orderByConsumed flag to tell the core the results |
| 20566 ** are already in sorted order. */ |
| 20567 if( pInfo->nOrderBy==1 |
| 20568 && pInfo->aOrderBy[0].iColumn==0 |
| 20569 && pInfo->aOrderBy[0].desc==0 |
| 20570 ){ |
| 20571 pInfo->orderByConsumed = 1; |
| 20572 } |
| 20573 |
| 20574 pInfo->idxNum = idxNum; |
| 20575 return SQLITE_OK; |
| 20576 } |
| 20577 |
| 20578 /* |
| 20579 ** Implementation of xOpen method. |
| 20580 */ |
| 20581 static int fts5VocabOpenMethod( |
| 20582 sqlite3_vtab *pVTab, |
| 20583 sqlite3_vtab_cursor **ppCsr |
| 20584 ){ |
| 20585 Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; |
| 20586 Fts5Index *pIndex = 0; |
| 20587 Fts5Config *pConfig = 0; |
| 20588 Fts5VocabCursor *pCsr = 0; |
| 20589 int rc = SQLITE_OK; |
| 20590 sqlite3_stmt *pStmt = 0; |
| 20591 char *zSql = 0; |
| 20592 |
| 20593 zSql = sqlite3Fts5Mprintf(&rc, |
| 20594 "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", |
| 20595 pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl |
| 20596 ); |
| 20597 if( zSql ){ |
| 20598 rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); |
| 20599 } |
| 20600 sqlite3_free(zSql); |
| 20601 assert( rc==SQLITE_OK || pStmt==0 ); |
| 20602 if( rc==SQLITE_ERROR ) rc = SQLITE_OK; |
| 20603 |
| 20604 if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ |
| 20605 i64 iId = sqlite3_column_int64(pStmt, 0); |
| 20606 pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &pConfig); |
| 20607 } |
| 20608 |
| 20609 if( rc==SQLITE_OK && pIndex==0 ){ |
| 20610 rc = sqlite3_finalize(pStmt); |
| 20611 pStmt = 0; |
| 20612 if( rc==SQLITE_OK ){ |
| 20613 pVTab->zErrMsg = sqlite3_mprintf( |
| 20614 "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl |
| 20615 ); |
| 20616 rc = SQLITE_ERROR; |
| 20617 } |
| 20618 } |
| 20619 |
| 20620 if( rc==SQLITE_OK ){ |
| 20621 int nByte = pConfig->nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor); |
| 20622 pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); |
| 20623 } |
| 20624 |
| 20625 if( pCsr ){ |
| 20626 pCsr->pIndex = pIndex; |
| 20627 pCsr->pStmt = pStmt; |
| 20628 pCsr->pConfig = pConfig; |
| 20629 pCsr->aCnt = (i64*)&pCsr[1]; |
| 20630 pCsr->aDoc = &pCsr->aCnt[pConfig->nCol]; |
| 20631 }else{ |
| 20632 sqlite3_finalize(pStmt); |
| 20633 } |
| 20634 |
| 20635 *ppCsr = (sqlite3_vtab_cursor*)pCsr; |
| 20636 return rc; |
| 20637 } |
| 20638 |
| 20639 static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ |
| 20640 pCsr->rowid = 0; |
| 20641 sqlite3Fts5IterClose(pCsr->pIter); |
| 20642 pCsr->pIter = 0; |
| 20643 sqlite3_free(pCsr->zLeTerm); |
| 20644 pCsr->nLeTerm = -1; |
| 20645 pCsr->zLeTerm = 0; |
| 20646 } |
| 20647 |
| 20648 /* |
| 20649 ** Close the cursor. For additional information see the documentation |
| 20650 ** on the xClose method of the virtual table interface. |
| 20651 */ |
| 20652 static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 20653 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20654 fts5VocabResetCursor(pCsr); |
| 20655 sqlite3Fts5BufferFree(&pCsr->term); |
| 20656 sqlite3_finalize(pCsr->pStmt); |
| 20657 sqlite3_free(pCsr); |
| 20658 return SQLITE_OK; |
| 20659 } |
| 20660 |
| 20661 |
| 20662 /* |
| 20663 ** Advance the cursor to the next row in the table. |
| 20664 */ |
| 20665 static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ |
| 20666 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20667 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; |
| 20668 int rc = SQLITE_OK; |
| 20669 int nCol = pCsr->pConfig->nCol; |
| 20670 |
| 20671 pCsr->rowid++; |
| 20672 |
| 20673 if( pTab->eType==FTS5_VOCAB_COL ){ |
| 20674 for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ |
| 20675 if( pCsr->aDoc[pCsr->iCol] ) break; |
| 20676 } |
| 20677 } |
| 20678 |
| 20679 if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){ |
| 20680 if( sqlite3Fts5IterEof(pCsr->pIter) ){ |
| 20681 pCsr->bEof = 1; |
| 20682 }else{ |
| 20683 const char *zTerm; |
| 20684 int nTerm; |
| 20685 |
| 20686 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); |
| 20687 if( pCsr->nLeTerm>=0 ){ |
| 20688 int nCmp = MIN(nTerm, pCsr->nLeTerm); |
| 20689 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); |
| 20690 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ |
| 20691 pCsr->bEof = 1; |
| 20692 return SQLITE_OK; |
| 20693 } |
| 20694 } |
| 20695 |
| 20696 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); |
| 20697 memset(pCsr->aCnt, 0, nCol * sizeof(i64)); |
| 20698 memset(pCsr->aDoc, 0, nCol * sizeof(i64)); |
| 20699 pCsr->iCol = 0; |
| 20700 |
| 20701 assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); |
| 20702 while( rc==SQLITE_OK ){ |
| 20703 const u8 *pPos; int nPos; /* Position list */ |
| 20704 i64 iPos = 0; /* 64-bit position read from poslist */ |
| 20705 int iOff = 0; /* Current offset within position list */ |
| 20706 |
| 20707 pPos = pCsr->pIter->pData; |
| 20708 nPos = pCsr->pIter->nData; |
| 20709 switch( pCsr->pConfig->eDetail ){ |
| 20710 case FTS5_DETAIL_FULL: |
| 20711 pPos = pCsr->pIter->pData; |
| 20712 nPos = pCsr->pIter->nData; |
| 20713 if( pTab->eType==FTS5_VOCAB_ROW ){ |
| 20714 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ |
| 20715 pCsr->aCnt[0]++; |
| 20716 } |
| 20717 pCsr->aDoc[0]++; |
| 20718 }else{ |
| 20719 int iCol = -1; |
| 20720 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ |
| 20721 int ii = FTS5_POS2COLUMN(iPos); |
| 20722 pCsr->aCnt[ii]++; |
| 20723 if( iCol!=ii ){ |
| 20724 if( ii>=nCol ){ |
| 20725 rc = FTS5_CORRUPT; |
| 20726 break; |
| 20727 } |
| 20728 pCsr->aDoc[ii]++; |
| 20729 iCol = ii; |
| 20730 } |
| 20731 } |
| 20732 } |
| 20733 break; |
| 20734 |
| 20735 case FTS5_DETAIL_COLUMNS: |
| 20736 if( pTab->eType==FTS5_VOCAB_ROW ){ |
| 20737 pCsr->aDoc[0]++; |
| 20738 }else{ |
| 20739 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ |
| 20740 assert_nc( iPos>=0 && iPos<nCol ); |
| 20741 if( iPos>=nCol ){ |
| 20742 rc = FTS5_CORRUPT; |
| 20743 break; |
| 20744 } |
| 20745 pCsr->aDoc[iPos]++; |
| 20746 } |
| 20747 } |
| 20748 break; |
| 20749 |
| 20750 default: |
| 20751 assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 20752 pCsr->aDoc[0]++; |
| 20753 break; |
| 20754 } |
| 20755 |
| 20756 if( rc==SQLITE_OK ){ |
| 20757 rc = sqlite3Fts5IterNextScan(pCsr->pIter); |
| 20758 } |
| 20759 |
| 20760 if( rc==SQLITE_OK ){ |
| 20761 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); |
| 20762 if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ){ |
| 20763 break; |
| 20764 } |
| 20765 if( sqlite3Fts5IterEof(pCsr->pIter) ) break; |
| 20766 } |
| 20767 } |
| 20768 } |
| 20769 } |
| 20770 |
| 20771 if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ |
| 20772 while( pCsr->aDoc[pCsr->iCol]==0 ) pCsr->iCol++; |
| 20773 assert( pCsr->iCol<pCsr->pConfig->nCol ); |
| 20774 } |
| 20775 return rc; |
| 20776 } |
| 20777 |
| 20778 /* |
| 20779 ** This is the xFilter implementation for the virtual table. |
| 20780 */ |
| 20781 static int fts5VocabFilterMethod( |
| 20782 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 20783 int idxNum, /* Strategy index */ |
| 20784 const char *zUnused, /* Unused */ |
| 20785 int nUnused, /* Number of elements in apVal */ |
| 20786 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 20787 ){ |
| 20788 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20789 int rc = SQLITE_OK; |
| 20790 |
| 20791 int iVal = 0; |
| 20792 int f = FTS5INDEX_QUERY_SCAN; |
| 20793 const char *zTerm = 0; |
| 20794 int nTerm = 0; |
| 20795 |
| 20796 sqlite3_value *pEq = 0; |
| 20797 sqlite3_value *pGe = 0; |
| 20798 sqlite3_value *pLe = 0; |
| 20799 |
| 20800 UNUSED_PARAM2(zUnused, nUnused); |
| 20801 |
| 20802 fts5VocabResetCursor(pCsr); |
| 20803 if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++]; |
| 20804 if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++]; |
| 20805 if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++]; |
| 20806 |
| 20807 if( pEq ){ |
| 20808 zTerm = (const char *)sqlite3_value_text(pEq); |
| 20809 nTerm = sqlite3_value_bytes(pEq); |
| 20810 f = 0; |
| 20811 }else{ |
| 20812 if( pGe ){ |
| 20813 zTerm = (const char *)sqlite3_value_text(pGe); |
| 20814 nTerm = sqlite3_value_bytes(pGe); |
| 20815 } |
| 20816 if( pLe ){ |
| 20817 const char *zCopy = (const char *)sqlite3_value_text(pLe); |
| 20818 pCsr->nLeTerm = sqlite3_value_bytes(pLe); |
| 20819 pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1); |
| 20820 if( pCsr->zLeTerm==0 ){ |
| 20821 rc = SQLITE_NOMEM; |
| 20822 }else{ |
| 20823 memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); |
| 20824 } |
| 20825 } |
| 20826 } |
| 20827 |
| 20828 |
| 20829 if( rc==SQLITE_OK ){ |
| 20830 rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); |
| 20831 } |
| 20832 if( rc==SQLITE_OK ){ |
| 20833 rc = fts5VocabNextMethod(pCursor); |
| 20834 } |
| 20835 |
| 20836 return rc; |
| 20837 } |
| 20838 |
| 20839 /* |
| 20840 ** This is the xEof method of the virtual table. SQLite calls this |
| 20841 ** routine to find out if it has reached the end of a result set. |
| 20842 */ |
| 20843 static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ |
| 20844 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20845 return pCsr->bEof; |
| 20846 } |
| 20847 |
| 20848 static int fts5VocabColumnMethod( |
| 20849 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 20850 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 20851 int iCol /* Index of column to read value from */ |
| 20852 ){ |
| 20853 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20854 int eDetail = pCsr->pConfig->eDetail; |
| 20855 int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; |
| 20856 i64 iVal = 0; |
| 20857 |
| 20858 if( iCol==0 ){ |
| 20859 sqlite3_result_text( |
| 20860 pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT |
| 20861 ); |
| 20862 }else if( eType==FTS5_VOCAB_COL ){ |
| 20863 assert( iCol==1 || iCol==2 || iCol==3 ); |
| 20864 if( iCol==1 ){ |
| 20865 if( eDetail!=FTS5_DETAIL_NONE ){ |
| 20866 const char *z = pCsr->pConfig->azCol[pCsr->iCol]; |
| 20867 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); |
| 20868 } |
| 20869 }else if( iCol==2 ){ |
| 20870 iVal = pCsr->aDoc[pCsr->iCol]; |
| 20871 }else{ |
| 20872 iVal = pCsr->aCnt[pCsr->iCol]; |
| 20873 } |
| 20874 }else{ |
| 20875 assert( iCol==1 || iCol==2 ); |
| 20876 if( iCol==1 ){ |
| 20877 iVal = pCsr->aDoc[0]; |
| 20878 }else{ |
| 20879 iVal = pCsr->aCnt[0]; |
| 20880 } |
| 20881 } |
| 20882 |
| 20883 if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); |
| 20884 return SQLITE_OK; |
| 20885 } |
| 20886 |
| 20887 /* |
| 20888 ** This is the xRowid method. The SQLite core calls this routine to |
| 20889 ** retrieve the rowid for the current row of the result set. The |
| 20890 ** rowid should be written to *pRowid. |
| 20891 */ |
| 20892 static int fts5VocabRowidMethod( |
| 20893 sqlite3_vtab_cursor *pCursor, |
| 20894 sqlite_int64 *pRowid |
| 20895 ){ |
| 20896 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
| 20897 *pRowid = pCsr->rowid; |
| 20898 return SQLITE_OK; |
| 20899 } |
| 20900 |
| 20901 static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ |
| 20902 static const sqlite3_module fts5Vocab = { |
| 20903 /* iVersion */ 2, |
| 20904 /* xCreate */ fts5VocabCreateMethod, |
| 20905 /* xConnect */ fts5VocabConnectMethod, |
| 20906 /* xBestIndex */ fts5VocabBestIndexMethod, |
| 20907 /* xDisconnect */ fts5VocabDisconnectMethod, |
| 20908 /* xDestroy */ fts5VocabDestroyMethod, |
| 20909 /* xOpen */ fts5VocabOpenMethod, |
| 20910 /* xClose */ fts5VocabCloseMethod, |
| 20911 /* xFilter */ fts5VocabFilterMethod, |
| 20912 /* xNext */ fts5VocabNextMethod, |
| 20913 /* xEof */ fts5VocabEofMethod, |
| 20914 /* xColumn */ fts5VocabColumnMethod, |
| 20915 /* xRowid */ fts5VocabRowidMethod, |
| 20916 /* xUpdate */ 0, |
| 20917 /* xBegin */ 0, |
| 20918 /* xSync */ 0, |
| 20919 /* xCommit */ 0, |
| 20920 /* xRollback */ 0, |
| 20921 /* xFindFunction */ 0, |
| 20922 /* xRename */ 0, |
| 20923 /* xSavepoint */ 0, |
| 20924 /* xRelease */ 0, |
| 20925 /* xRollbackTo */ 0, |
| 20926 }; |
| 20927 void *p = (void*)pGlobal; |
| 20928 |
| 20929 return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); |
| 20930 } |
| 20931 |
| 20932 |
| 20933 |
| 20934 |
| 20935 |
| 20936 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ |
| 20937 |
| 20938 /************** End of fts5.c ************************************************/ |
OLD | NEW |