Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(213)

Side by Side Diff: third_party/sqlite/amalgamation/sqlite3.09.c

Issue 2755803002: NCI: trybot test for sqlite 3.17 import. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/sqlite/amalgamation/sqlite3.08.c ('k') | third_party/sqlite/split.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /************** Begin file fts5.c ********************************************/
2
3
4 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5)
5
6 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
7 # define NDEBUG 1
8 #endif
9 #if defined(NDEBUG) && defined(SQLITE_DEBUG)
10 # undef NDEBUG
11 #endif
12
13 /*
14 ** 2014 May 31
15 **
16 ** The author disclaims copyright to this source code. In place of
17 ** a legal notice, here is a blessing:
18 **
19 ** May you do good and not evil.
20 ** May you find forgiveness for yourself and forgive others.
21 ** May you share freely, never taking more than you give.
22 **
23 ******************************************************************************
24 **
25 ** Interfaces to extend FTS5. Using the interfaces defined in this file,
26 ** FTS5 may be extended with:
27 **
28 ** * custom tokenizers, and
29 ** * custom auxiliary functions.
30 */
31
32
33 #ifndef _FTS5_H
34 #define _FTS5_H
35
36 /* #include "sqlite3.h" */
37
38 #if 0
39 extern "C" {
40 #endif
41
42 /*************************************************************************
43 ** CUSTOM AUXILIARY FUNCTIONS
44 **
45 ** Virtual table implementations may overload SQL functions by implementing
46 ** the sqlite3_module.xFindFunction() method.
47 */
48
49 typedef struct Fts5ExtensionApi Fts5ExtensionApi;
50 typedef struct Fts5Context Fts5Context;
51 typedef struct Fts5PhraseIter Fts5PhraseIter;
52
53 typedef void (*fts5_extension_function)(
54 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
55 Fts5Context *pFts, /* First arg to pass to pApi functions */
56 sqlite3_context *pCtx, /* Context for returning result/error */
57 int nVal, /* Number of values in apVal[] array */
58 sqlite3_value **apVal /* Array of trailing arguments */
59 );
60
61 struct Fts5PhraseIter {
62 const unsigned char *a;
63 const unsigned char *b;
64 };
65
66 /*
67 ** EXTENSION API FUNCTIONS
68 **
69 ** xUserData(pFts):
70 ** Return a copy of the context pointer the extension function was
71 ** registered with.
72 **
73 ** xColumnTotalSize(pFts, iCol, pnToken):
74 ** If parameter iCol is less than zero, set output variable *pnToken
75 ** to the total number of tokens in the FTS5 table. Or, if iCol is
76 ** non-negative but less than the number of columns in the table, return
77 ** the total number of tokens in column iCol, considering all rows in
78 ** the FTS5 table.
79 **
80 ** If parameter iCol is greater than or equal to the number of columns
81 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
82 ** an OOM condition or IO error), an appropriate SQLite error code is
83 ** returned.
84 **
85 ** xColumnCount(pFts):
86 ** Return the number of columns in the table.
87 **
88 ** xColumnSize(pFts, iCol, pnToken):
89 ** If parameter iCol is less than zero, set output variable *pnToken
90 ** to the total number of tokens in the current row. Or, if iCol is
91 ** non-negative but less than the number of columns in the table, set
92 ** *pnToken to the number of tokens in column iCol of the current row.
93 **
94 ** If parameter iCol is greater than or equal to the number of columns
95 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
96 ** an OOM condition or IO error), an appropriate SQLite error code is
97 ** returned.
98 **
99 ** This function may be quite inefficient if used with an FTS5 table
100 ** created with the "columnsize=0" option.
101 **
102 ** xColumnText:
103 ** This function attempts to retrieve the text of column iCol of the
104 ** current document. If successful, (*pz) is set to point to a buffer
105 ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
106 ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
107 ** if an error occurs, an SQLite error code is returned and the final values
108 ** of (*pz) and (*pn) are undefined.
109 **
110 ** xPhraseCount:
111 ** Returns the number of phrases in the current query expression.
112 **
113 ** xPhraseSize:
114 ** Returns the number of tokens in phrase iPhrase of the query. Phrases
115 ** are numbered starting from zero.
116 **
117 ** xInstCount:
118 ** Set *pnInst to the total number of occurrences of all phrases within
119 ** the query within the current row. Return SQLITE_OK if successful, or
120 ** an error code (i.e. SQLITE_NOMEM) if an error occurs.
121 **
122 ** This API can be quite slow if used with an FTS5 table created with the
123 ** "detail=none" or "detail=column" option. If the FTS5 table is created
124 ** with either "detail=none" or "detail=column" and "content=" option
125 ** (i.e. if it is a contentless table), then this API always returns 0.
126 **
127 ** xInst:
128 ** Query for the details of phrase match iIdx within the current row.
129 ** Phrase matches are numbered starting from zero, so the iIdx argument
130 ** should be greater than or equal to zero and smaller than the value
131 ** output by xInstCount().
132 **
133 ** Usually, output parameter *piPhrase is set to the phrase number, *piCol
134 ** to the column in which it occurs and *piOff the token offset of the
135 ** first token of the phrase. The exception is if the table was created
136 ** with the offsets=0 option specified. In this case *piOff is always
137 ** set to -1.
138 **
139 ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
140 ** if an error occurs.
141 **
142 ** This API can be quite slow if used with an FTS5 table created with the
143 ** "detail=none" or "detail=column" option.
144 **
145 ** xRowid:
146 ** Returns the rowid of the current row.
147 **
148 ** xTokenize:
149 ** Tokenize text using the tokenizer belonging to the FTS5 table.
150 **
151 ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
152 ** This API function is used to query the FTS table for phrase iPhrase
153 ** of the current query. Specifically, a query equivalent to:
154 **
155 ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
156 **
157 ** with $p set to a phrase equivalent to the phrase iPhrase of the
158 ** current query is executed. Any column filter that applies to
159 ** phrase iPhrase of the current query is included in $p. For each
160 ** row visited, the callback function passed as the fourth argument
161 ** is invoked. The context and API objects passed to the callback
162 ** function may be used to access the properties of each matched row.
163 ** Invoking Api.xUserData() returns a copy of the pointer passed as
164 ** the third argument to pUserData.
165 **
166 ** If the callback function returns any value other than SQLITE_OK, the
167 ** query is abandoned and the xQueryPhrase function returns immediately.
168 ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
169 ** Otherwise, the error code is propagated upwards.
170 **
171 ** If the query runs to completion without incident, SQLITE_OK is returned.
172 ** Or, if some error occurs before the query completes or is aborted by
173 ** the callback, an SQLite error code is returned.
174 **
175 **
176 ** xSetAuxdata(pFts5, pAux, xDelete)
177 **
178 ** Save the pointer passed as the second argument as the extension functions
179 ** "auxiliary data". The pointer may then be retrieved by the current or any
180 ** future invocation of the same fts5 extension function made as part of
181 ** of the same MATCH query using the xGetAuxdata() API.
182 **
183 ** Each extension function is allocated a single auxiliary data slot for
184 ** each FTS query (MATCH expression). If the extension function is invoked
185 ** more than once for a single FTS query, then all invocations share a
186 ** single auxiliary data context.
187 **
188 ** If there is already an auxiliary data pointer when this function is
189 ** invoked, then it is replaced by the new pointer. If an xDelete callback
190 ** was specified along with the original pointer, it is invoked at this
191 ** point.
192 **
193 ** The xDelete callback, if one is specified, is also invoked on the
194 ** auxiliary data pointer after the FTS5 query has finished.
195 **
196 ** If an error (e.g. an OOM condition) occurs within this function, an
197 ** the auxiliary data is set to NULL and an error code returned. If the
198 ** xDelete parameter was not NULL, it is invoked on the auxiliary data
199 ** pointer before returning.
200 **
201 **
202 ** xGetAuxdata(pFts5, bClear)
203 **
204 ** Returns the current auxiliary data pointer for the fts5 extension
205 ** function. See the xSetAuxdata() method for details.
206 **
207 ** If the bClear argument is non-zero, then the auxiliary data is cleared
208 ** (set to NULL) before this function returns. In this case the xDelete,
209 ** if any, is not invoked.
210 **
211 **
212 ** xRowCount(pFts5, pnRow)
213 **
214 ** This function is used to retrieve the total number of rows in the table.
215 ** In other words, the same value that would be returned by:
216 **
217 ** SELECT count(*) FROM ftstable;
218 **
219 ** xPhraseFirst()
220 ** This function is used, along with type Fts5PhraseIter and the xPhraseNext
221 ** method, to iterate through all instances of a single query phrase within
222 ** the current row. This is the same information as is accessible via the
223 ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
224 ** to use, this API may be faster under some circumstances. To iterate
225 ** through instances of phrase iPhrase, use the following code:
226 **
227 ** Fts5PhraseIter iter;
228 ** int iCol, iOff;
229 ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
230 ** iCol>=0;
231 ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
232 ** ){
233 ** // An instance of phrase iPhrase at offset iOff of column iCol
234 ** }
235 **
236 ** The Fts5PhraseIter structure is defined above. Applications should not
237 ** modify this structure directly - it should only be used as shown above
238 ** with the xPhraseFirst() and xPhraseNext() API methods (and by
239 ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
240 **
241 ** This API can be quite slow if used with an FTS5 table created with the
242 ** "detail=none" or "detail=column" option. If the FTS5 table is created
243 ** with either "detail=none" or "detail=column" and "content=" option
244 ** (i.e. if it is a contentless table), then this API always iterates
245 ** through an empty set (all calls to xPhraseFirst() set iCol to -1).
246 **
247 ** xPhraseNext()
248 ** See xPhraseFirst above.
249 **
250 ** xPhraseFirstColumn()
251 ** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
252 ** and xPhraseNext() APIs described above. The difference is that instead
253 ** of iterating through all instances of a phrase in the current row, these
254 ** APIs are used to iterate through the set of columns in the current row
255 ** that contain one or more instances of a specified phrase. For example:
256 **
257 ** Fts5PhraseIter iter;
258 ** int iCol;
259 ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
260 ** iCol>=0;
261 ** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
262 ** ){
263 ** // Column iCol contains at least one instance of phrase iPhrase
264 ** }
265 **
266 ** This API can be quite slow if used with an FTS5 table created with the
267 ** "detail=none" option. If the FTS5 table is created with either
268 ** "detail=none" "content=" option (i.e. if it is a contentless table),
269 ** then this API always iterates through an empty set (all calls to
270 ** xPhraseFirstColumn() set iCol to -1).
271 **
272 ** The information accessed using this API and its companion
273 ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
274 ** (or xInst/xInstCount). The chief advantage of this API is that it is
275 ** significantly more efficient than those alternatives when used with
276 ** "detail=column" tables.
277 **
278 ** xPhraseNextColumn()
279 ** See xPhraseFirstColumn above.
280 */
281 struct Fts5ExtensionApi {
282 int iVersion; /* Currently always set to 3 */
283
284 void *(*xUserData)(Fts5Context*);
285
286 int (*xColumnCount)(Fts5Context*);
287 int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
288 int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
289
290 int (*xTokenize)(Fts5Context*,
291 const char *pText, int nText, /* Text to tokenize */
292 void *pCtx, /* Context passed to xToken() */
293 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
294 );
295
296 int (*xPhraseCount)(Fts5Context*);
297 int (*xPhraseSize)(Fts5Context*, int iPhrase);
298
299 int (*xInstCount)(Fts5Context*, int *pnInst);
300 int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
301
302 sqlite3_int64 (*xRowid)(Fts5Context*);
303 int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
304 int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
305
306 int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
307 int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
308 );
309 int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
310 void *(*xGetAuxdata)(Fts5Context*, int bClear);
311
312 int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
313 void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
314
315 int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
316 void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
317 };
318
319 /*
320 ** CUSTOM AUXILIARY FUNCTIONS
321 *************************************************************************/
322
323 /*************************************************************************
324 ** CUSTOM TOKENIZERS
325 **
326 ** Applications may also register custom tokenizer types. A tokenizer
327 ** is registered by providing fts5 with a populated instance of the
328 ** following structure. All structure methods must be defined, setting
329 ** any member of the fts5_tokenizer struct to NULL leads to undefined
330 ** behaviour. The structure methods are expected to function as follows:
331 **
332 ** xCreate:
333 ** This function is used to allocate and initialize a tokenizer instance.
334 ** A tokenizer instance is required to actually tokenize text.
335 **
336 ** The first argument passed to this function is a copy of the (void*)
337 ** pointer provided by the application when the fts5_tokenizer object
338 ** was registered with FTS5 (the third argument to xCreateTokenizer()).
339 ** The second and third arguments are an array of nul-terminated strings
340 ** containing the tokenizer arguments, if any, specified following the
341 ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
342 ** to create the FTS5 table.
343 **
344 ** The final argument is an output variable. If successful, (*ppOut)
345 ** should be set to point to the new tokenizer handle and SQLITE_OK
346 ** returned. If an error occurs, some value other than SQLITE_OK should
347 ** be returned. In this case, fts5 assumes that the final value of *ppOut
348 ** is undefined.
349 **
350 ** xDelete:
351 ** This function is invoked to delete a tokenizer handle previously
352 ** allocated using xCreate(). Fts5 guarantees that this function will
353 ** be invoked exactly once for each successful call to xCreate().
354 **
355 ** xTokenize:
356 ** This function is expected to tokenize the nText byte string indicated
357 ** by argument pText. pText may or may not be nul-terminated. The first
358 ** argument passed to this function is a pointer to an Fts5Tokenizer object
359 ** returned by an earlier call to xCreate().
360 **
361 ** The second argument indicates the reason that FTS5 is requesting
362 ** tokenization of the supplied text. This is always one of the following
363 ** four values:
364 **
365 ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
366 ** or removed from the FTS table. The tokenizer is being invoked to
367 ** determine the set of tokens to add to (or delete from) the
368 ** FTS index.
369 **
370 ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
371 ** against the FTS index. The tokenizer is being called to tokenize
372 ** a bareword or quoted string specified as part of the query.
373 **
374 ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
375 ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
376 ** followed by a "*" character, indicating that the last token
377 ** returned by the tokenizer will be treated as a token prefix.
378 **
379 ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
380 ** satisfy an fts5_api.xTokenize() request made by an auxiliary
381 ** function. Or an fts5_api.xColumnSize() request made by the same
382 ** on a columnsize=0 database.
383 ** </ul>
384 **
385 ** For each token in the input string, the supplied callback xToken() must
386 ** be invoked. The first argument to it should be a copy of the pointer
387 ** passed as the second argument to xTokenize(). The third and fourth
388 ** arguments are a pointer to a buffer containing the token text, and the
389 ** size of the token in bytes. The 4th and 5th arguments are the byte offsets
390 ** of the first byte of and first byte immediately following the text from
391 ** which the token is derived within the input.
392 **
393 ** The second argument passed to the xToken() callback ("tflags") should
394 ** normally be set to 0. The exception is if the tokenizer supports
395 ** synonyms. In this case see the discussion below for details.
396 **
397 ** FTS5 assumes the xToken() callback is invoked for each token in the
398 ** order that they occur within the input text.
399 **
400 ** If an xToken() callback returns any value other than SQLITE_OK, then
401 ** the tokenization should be abandoned and the xTokenize() method should
402 ** immediately return a copy of the xToken() return value. Or, if the
403 ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
404 ** if an error occurs with the xTokenize() implementation itself, it
405 ** may abandon the tokenization and return any error code other than
406 ** SQLITE_OK or SQLITE_DONE.
407 **
408 ** SYNONYM SUPPORT
409 **
410 ** Custom tokenizers may also support synonyms. Consider a case in which a
411 ** user wishes to query for a phrase such as "first place". Using the
412 ** built-in tokenizers, the FTS5 query 'first + place' will match instances
413 ** of "first place" within the document set, but not alternative forms
414 ** such as "1st place". In some applications, it would be better to match
415 ** all instances of "first place" or "1st place" regardless of which form
416 ** the user specified in the MATCH query text.
417 **
418 ** There are several ways to approach this in FTS5:
419 **
420 ** <ol><li> By mapping all synonyms to a single token. In this case, the
421 ** In the above example, this means that the tokenizer returns the
422 ** same token for inputs "first" and "1st". Say that token is in
423 ** fact "first", so that when the user inserts the document "I won
424 ** 1st place" entries are added to the index for tokens "i", "won",
425 ** "first" and "place". If the user then queries for '1st + place',
426 ** the tokenizer substitutes "first" for "1st" and the query works
427 ** as expected.
428 **
429 ** <li> By adding multiple synonyms for a single term to the FTS index.
430 ** In this case, when tokenizing query text, the tokenizer may
431 ** provide multiple synonyms for a single term within the document.
432 ** FTS5 then queries the index for each synonym individually. For
433 ** example, faced with the query:
434 **
435 ** <codeblock>
436 ** ... MATCH 'first place'</codeblock>
437 **
438 ** the tokenizer offers both "1st" and "first" as synonyms for the
439 ** first token in the MATCH query and FTS5 effectively runs a query
440 ** similar to:
441 **
442 ** <codeblock>
443 ** ... MATCH '(first OR 1st) place'</codeblock>
444 **
445 ** except that, for the purposes of auxiliary functions, the query
446 ** still appears to contain just two phrases - "(first OR 1st)"
447 ** being treated as a single phrase.
448 **
449 ** <li> By adding multiple synonyms for a single term to the FTS index.
450 ** Using this method, when tokenizing document text, the tokenizer
451 ** provides multiple synonyms for each token. So that when a
452 ** document such as "I won first place" is tokenized, entries are
453 ** added to the FTS index for "i", "won", "first", "1st" and
454 ** "place".
455 **
456 ** This way, even if the tokenizer does not provide synonyms
457 ** when tokenizing query text (it should not - to do would be
458 ** inefficient), it doesn't matter if the user queries for
459 ** 'first + place' or '1st + place', as there are entires in the
460 ** FTS index corresponding to both forms of the first token.
461 ** </ol>
462 **
463 ** Whether it is parsing document or query text, any call to xToken that
464 ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
465 ** is considered to supply a synonym for the previous token. For example,
466 ** when parsing the document "I won first place", a tokenizer that supports
467 ** synonyms would call xToken() 5 times, as follows:
468 **
469 ** <codeblock>
470 ** xToken(pCtx, 0, "i", 1, 0, 1);
471 ** xToken(pCtx, 0, "won", 3, 2, 5);
472 ** xToken(pCtx, 0, "first", 5, 6, 11);
473 ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
474 ** xToken(pCtx, 0, "place", 5, 12, 17);
475 **</codeblock>
476 **
477 ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
478 ** xToken() is called. Multiple synonyms may be specified for a single token
479 ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
480 ** There is no limit to the number of synonyms that may be provided for a
481 ** single token.
482 **
483 ** In many cases, method (1) above is the best approach. It does not add
484 ** extra data to the FTS index or require FTS5 to query for multiple terms,
485 ** so it is efficient in terms of disk space and query speed. However, it
486 ** does not support prefix queries very well. If, as suggested above, the
487 ** token "first" is subsituted for "1st" by the tokenizer, then the query:
488 **
489 ** <codeblock>
490 ** ... MATCH '1s*'</codeblock>
491 **
492 ** will not match documents that contain the token "1st" (as the tokenizer
493 ** will probably not map "1s" to any prefix of "first").
494 **
495 ** For full prefix support, method (3) may be preferred. In this case,
496 ** because the index contains entries for both "first" and "1st", prefix
497 ** queries such as 'fi*' or '1s*' will match correctly. However, because
498 ** extra entries are added to the FTS index, this method uses more space
499 ** within the database.
500 **
501 ** Method (2) offers a midpoint between (1) and (3). Using this method,
502 ** a query such as '1s*' will match documents that contain the literal
503 ** token "1st", but not "first" (assuming the tokenizer is not able to
504 ** provide synonyms for prefixes). However, a non-prefix query like '1st'
505 ** will match against "1st" and "first". This method does not require
506 ** extra disk space, as no extra entries are added to the FTS index.
507 ** On the other hand, it may require more CPU cycles to run MATCH queries,
508 ** as separate queries of the FTS index are required for each synonym.
509 **
510 ** When using methods (2) or (3), it is important that the tokenizer only
511 ** provide synonyms when tokenizing document text (method (2)) or query
512 ** text (method (3)), not both. Doing so will not cause any errors, but is
513 ** inefficient.
514 */
515 typedef struct Fts5Tokenizer Fts5Tokenizer;
516 typedef struct fts5_tokenizer fts5_tokenizer;
517 struct fts5_tokenizer {
518 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
519 void (*xDelete)(Fts5Tokenizer*);
520 int (*xTokenize)(Fts5Tokenizer*,
521 void *pCtx,
522 int flags, /* Mask of FTS5_TOKENIZE_* flags */
523 const char *pText, int nText,
524 int (*xToken)(
525 void *pCtx, /* Copy of 2nd argument to xTokenize() */
526 int tflags, /* Mask of FTS5_TOKEN_* flags */
527 const char *pToken, /* Pointer to buffer containing token */
528 int nToken, /* Size of token in bytes */
529 int iStart, /* Byte offset of token within input text */
530 int iEnd /* Byte offset of end of token within input text */
531 )
532 );
533 };
534
535 /* Flags that may be passed as the third argument to xTokenize() */
536 #define FTS5_TOKENIZE_QUERY 0x0001
537 #define FTS5_TOKENIZE_PREFIX 0x0002
538 #define FTS5_TOKENIZE_DOCUMENT 0x0004
539 #define FTS5_TOKENIZE_AUX 0x0008
540
541 /* Flags that may be passed by the tokenizer implementation back to FTS5
542 ** as the third argument to the supplied xToken callback. */
543 #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
544
545 /*
546 ** END OF CUSTOM TOKENIZERS
547 *************************************************************************/
548
549 /*************************************************************************
550 ** FTS5 EXTENSION REGISTRATION API
551 */
552 typedef struct fts5_api fts5_api;
553 struct fts5_api {
554 int iVersion; /* Currently always set to 2 */
555
556 /* Create a new tokenizer */
557 int (*xCreateTokenizer)(
558 fts5_api *pApi,
559 const char *zName,
560 void *pContext,
561 fts5_tokenizer *pTokenizer,
562 void (*xDestroy)(void*)
563 );
564
565 /* Find an existing tokenizer */
566 int (*xFindTokenizer)(
567 fts5_api *pApi,
568 const char *zName,
569 void **ppContext,
570 fts5_tokenizer *pTokenizer
571 );
572
573 /* Create a new auxiliary function */
574 int (*xCreateFunction)(
575 fts5_api *pApi,
576 const char *zName,
577 void *pContext,
578 fts5_extension_function xFunction,
579 void (*xDestroy)(void*)
580 );
581 };
582
583 /*
584 ** END OF REGISTRATION API
585 *************************************************************************/
586
587 #if 0
588 } /* end of the 'extern "C"' block */
589 #endif
590
591 #endif /* _FTS5_H */
592
593 /*
594 ** 2014 May 31
595 **
596 ** The author disclaims copyright to this source code. In place of
597 ** a legal notice, here is a blessing:
598 **
599 ** May you do good and not evil.
600 ** May you find forgiveness for yourself and forgive others.
601 ** May you share freely, never taking more than you give.
602 **
603 ******************************************************************************
604 **
605 */
606 #ifndef _FTS5INT_H
607 #define _FTS5INT_H
608
609 /* #include "fts5.h" */
610 /* #include "sqlite3ext.h" */
611 SQLITE_EXTENSION_INIT1
612
613 /* #include <string.h> */
614 /* #include <assert.h> */
615
616 #ifndef SQLITE_AMALGAMATION
617
618 typedef unsigned char u8;
619 typedef unsigned int u32;
620 typedef unsigned short u16;
621 typedef short i16;
622 typedef sqlite3_int64 i64;
623 typedef sqlite3_uint64 u64;
624
625 #define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
626
627 #define testcase(x)
628 #define ALWAYS(x) 1
629 #define NEVER(x) 0
630
631 #define MIN(x,y) (((x) < (y)) ? (x) : (y))
632 #define MAX(x,y) (((x) > (y)) ? (x) : (y))
633
634 /*
635 ** Constants for the largest and smallest possible 64-bit signed integers.
636 */
637 # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
638 # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
639
640 #endif
641
642 /* Truncate very long tokens to this many bytes. Hard limit is
643 ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
644 ** field that occurs at the start of each leaf page (see fts5_index.c). */
645 #define FTS5_MAX_TOKEN_SIZE 32768
646
647 /*
648 ** Maximum number of prefix indexes on single FTS5 table. This must be
649 ** less than 32. If it is set to anything large than that, an #error
650 ** directive in fts5_index.c will cause the build to fail.
651 */
652 #define FTS5_MAX_PREFIX_INDEXES 31
653
654 #define FTS5_DEFAULT_NEARDIST 10
655 #define FTS5_DEFAULT_RANK "bm25"
656
657 /* Name of rank and rowid columns */
658 #define FTS5_RANK_NAME "rank"
659 #define FTS5_ROWID_NAME "rowid"
660
661 #ifdef SQLITE_DEBUG
662 # define FTS5_CORRUPT sqlite3Fts5Corrupt()
663 static int sqlite3Fts5Corrupt(void);
664 #else
665 # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
666 #endif
667
668 /*
669 ** The assert_nc() macro is similar to the assert() macro, except that it
670 ** is used for assert() conditions that are true only if it can be
671 ** guranteed that the database is not corrupt.
672 */
673 #ifdef SQLITE_DEBUG
674 SQLITE_API extern int sqlite3_fts5_may_be_corrupt;
675 # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
676 #else
677 # define assert_nc(x) assert(x)
678 #endif
679
680 /* Mark a function parameter as unused, to suppress nuisance compiler
681 ** warnings. */
682 #ifndef UNUSED_PARAM
683 # define UNUSED_PARAM(X) (void)(X)
684 #endif
685
686 #ifndef UNUSED_PARAM2
687 # define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
688 #endif
689
690 typedef struct Fts5Global Fts5Global;
691 typedef struct Fts5Colset Fts5Colset;
692
693 /* If a NEAR() clump or phrase may only match a specific set of columns,
694 ** then an object of the following type is used to record the set of columns.
695 ** Each entry in the aiCol[] array is a column that may be matched.
696 **
697 ** This object is used by fts5_expr.c and fts5_index.c.
698 */
699 struct Fts5Colset {
700 int nCol;
701 int aiCol[1];
702 };
703
704
705
706 /**************************************************************************
707 ** Interface to code in fts5_config.c. fts5_config.c contains contains code
708 ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
709 */
710
711 typedef struct Fts5Config Fts5Config;
712
713 /*
714 ** An instance of the following structure encodes all information that can
715 ** be gleaned from the CREATE VIRTUAL TABLE statement.
716 **
717 ** And all information loaded from the %_config table.
718 **
719 ** nAutomerge:
720 ** The minimum number of segments that an auto-merge operation should
721 ** attempt to merge together. A value of 1 sets the object to use the
722 ** compile time default. Zero disables auto-merge altogether.
723 **
724 ** zContent:
725 **
726 ** zContentRowid:
727 ** The value of the content_rowid= option, if one was specified. Or
728 ** the string "rowid" otherwise. This text is not quoted - if it is
729 ** used as part of an SQL statement it needs to be quoted appropriately.
730 **
731 ** zContentExprlist:
732 **
733 ** pzErrmsg:
734 ** This exists in order to allow the fts5_index.c module to return a
735 ** decent error message if it encounters a file-format version it does
736 ** not understand.
737 **
738 ** bColumnsize:
739 ** True if the %_docsize table is created.
740 **
741 ** bPrefixIndex:
742 ** This is only used for debugging. If set to false, any prefix indexes
743 ** are ignored. This value is configured using:
744 **
745 ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
746 **
747 */
748 struct Fts5Config {
749 sqlite3 *db; /* Database handle */
750 char *zDb; /* Database holding FTS index (e.g. "main") */
751 char *zName; /* Name of FTS index */
752 int nCol; /* Number of columns */
753 char **azCol; /* Column names */
754 u8 *abUnindexed; /* True for unindexed columns */
755 int nPrefix; /* Number of prefix indexes */
756 int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
757 int eContent; /* An FTS5_CONTENT value */
758 char *zContent; /* content table */
759 char *zContentRowid; /* "content_rowid=" option value */
760 int bColumnsize; /* "columnsize=" option value (dflt==1) */
761 int eDetail; /* FTS5_DETAIL_XXX value */
762 char *zContentExprlist;
763 Fts5Tokenizer *pTok;
764 fts5_tokenizer *pTokApi;
765
766 /* Values loaded from the %_config table */
767 int iCookie; /* Incremented when %_config is modified */
768 int pgsz; /* Approximate page size used in %_data */
769 int nAutomerge; /* 'automerge' setting */
770 int nCrisisMerge; /* Maximum allowed segments per level */
771 int nUsermerge; /* 'usermerge' setting */
772 int nHashSize; /* Bytes of memory for in-memory hash */
773 char *zRank; /* Name of rank function */
774 char *zRankArgs; /* Arguments to rank function */
775
776 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
777 char **pzErrmsg;
778
779 #ifdef SQLITE_DEBUG
780 int bPrefixIndex; /* True to use prefix-indexes */
781 #endif
782 };
783
784 /* Current expected value of %_config table 'version' field */
785 #define FTS5_CURRENT_VERSION 4
786
787 #define FTS5_CONTENT_NORMAL 0
788 #define FTS5_CONTENT_NONE 1
789 #define FTS5_CONTENT_EXTERNAL 2
790
791 #define FTS5_DETAIL_FULL 0
792 #define FTS5_DETAIL_NONE 1
793 #define FTS5_DETAIL_COLUMNS 2
794
795
796
797 static int sqlite3Fts5ConfigParse(
798 Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
799 );
800 static void sqlite3Fts5ConfigFree(Fts5Config*);
801
802 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
803
804 static int sqlite3Fts5Tokenize(
805 Fts5Config *pConfig, /* FTS5 Configuration object */
806 int flags, /* FTS5_TOKENIZE_* flags */
807 const char *pText, int nText, /* Text to tokenize */
808 void *pCtx, /* Context passed to xToken() */
809 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
810 );
811
812 static void sqlite3Fts5Dequote(char *z);
813
814 /* Load the contents of the %_config table */
815 static int sqlite3Fts5ConfigLoad(Fts5Config*, int);
816
817 /* Set the value of a single config attribute */
818 static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, i nt*);
819
820 static int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
821
822 /*
823 ** End of interface to code in fts5_config.c.
824 **************************************************************************/
825
826 /**************************************************************************
827 ** Interface to code in fts5_buffer.c.
828 */
829
830 /*
831 ** Buffer object for the incremental building of string data.
832 */
833 typedef struct Fts5Buffer Fts5Buffer;
834 struct Fts5Buffer {
835 u8 *p;
836 int n;
837 int nSpace;
838 };
839
840 static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
841 static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
842 static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
843 static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
844 static void sqlite3Fts5BufferFree(Fts5Buffer*);
845 static void sqlite3Fts5BufferZero(Fts5Buffer*);
846 static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
847 static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
848
849 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
850
851 #define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
852 #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
853 #define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
854 #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
855 #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
856
857 #define fts5BufferGrow(pRc,pBuf,nn) ( \
858 (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
859 sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
860 )
861
862 /* Write and decode big-endian 32-bit integer values */
863 static void sqlite3Fts5Put32(u8*, int);
864 static int sqlite3Fts5Get32(const u8*);
865
866 #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
867 #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
868
869 typedef struct Fts5PoslistReader Fts5PoslistReader;
870 struct Fts5PoslistReader {
871 /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
872 const u8 *a; /* Position list to iterate through */
873 int n; /* Size of buffer at a[] in bytes */
874 int i; /* Current offset in a[] */
875
876 u8 bFlag; /* For client use (any custom purpose) */
877
878 /* Output variables */
879 u8 bEof; /* Set to true at EOF */
880 i64 iPos; /* (iCol<<32) + iPos */
881 };
882 static int sqlite3Fts5PoslistReaderInit(
883 const u8 *a, int n, /* Poslist buffer to iterate through */
884 Fts5PoslistReader *pIter /* Iterator object to initialize */
885 );
886 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
887
888 typedef struct Fts5PoslistWriter Fts5PoslistWriter;
889 struct Fts5PoslistWriter {
890 i64 iPrev;
891 };
892 static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
893 static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
894
895 static int sqlite3Fts5PoslistNext64(
896 const u8 *a, int n, /* Buffer containing poslist */
897 int *pi, /* IN/OUT: Offset within a[] */
898 i64 *piOff /* IN/OUT: Current offset */
899 );
900
901 /* Malloc utility */
902 static void *sqlite3Fts5MallocZero(int *pRc, int nByte);
903 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
904
905 /* Character set tests (like isspace(), isalpha() etc.) */
906 static int sqlite3Fts5IsBareword(char t);
907
908
909 /* Bucket of terms object used by the integrity-check in offsets=0 mode. */
910 typedef struct Fts5Termset Fts5Termset;
911 static int sqlite3Fts5TermsetNew(Fts5Termset**);
912 static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPre sent);
913 static void sqlite3Fts5TermsetFree(Fts5Termset*);
914
915 /*
916 ** End of interface to code in fts5_buffer.c.
917 **************************************************************************/
918
919 /**************************************************************************
920 ** Interface to code in fts5_index.c. fts5_index.c contains contains code
921 ** to access the data stored in the %_data table.
922 */
923
924 typedef struct Fts5Index Fts5Index;
925 typedef struct Fts5IndexIter Fts5IndexIter;
926
927 struct Fts5IndexIter {
928 i64 iRowid;
929 const u8 *pData;
930 int nData;
931 u8 bEof;
932 };
933
934 #define sqlite3Fts5IterEof(x) ((x)->bEof)
935
936 /*
937 ** Values used as part of the flags argument passed to IndexQuery().
938 */
939 #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
940 #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
941 #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
942 #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
943
944 /* The following are used internally by the fts5_index.c module. They are
945 ** defined here only to make it easier to avoid clashes with the flags
946 ** above. */
947 #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
948 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020
949
950 /*
951 ** Create/destroy an Fts5Index object.
952 */
953 static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, c har**);
954 static int sqlite3Fts5IndexClose(Fts5Index *p);
955
956 /*
957 ** Return a simple checksum value based on the arguments.
958 */
959 static u64 sqlite3Fts5IndexEntryCksum(
960 i64 iRowid,
961 int iCol,
962 int iPos,
963 int iIdx,
964 const char *pTerm,
965 int nTerm
966 );
967
968 /*
969 ** Argument p points to a buffer containing utf-8 text that is n bytes in
970 ** size. Return the number of bytes in the nChar character prefix of the
971 ** buffer, or 0 if there are less than nChar characters in total.
972 */
973 static int sqlite3Fts5IndexCharlenToBytelen(
974 const char *p,
975 int nByte,
976 int nChar
977 );
978
979 /*
980 ** Open a new iterator to iterate though all rowids that match the
981 ** specified token or token prefix.
982 */
983 static int sqlite3Fts5IndexQuery(
984 Fts5Index *p, /* FTS index to query */
985 const char *pToken, int nToken, /* Token (or prefix) to query for */
986 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
987 Fts5Colset *pColset, /* Match these columns only */
988 Fts5IndexIter **ppIter /* OUT: New iterator object */
989 );
990
991 /*
992 ** The various operations on open token or token prefix iterators opened
993 ** using sqlite3Fts5IndexQuery().
994 */
995 static int sqlite3Fts5IterNext(Fts5IndexIter*);
996 static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
997
998 /*
999 ** Close an iterator opened by sqlite3Fts5IndexQuery().
1000 */
1001 static void sqlite3Fts5IterClose(Fts5IndexIter*);
1002
1003 /*
1004 ** This interface is used by the fts5vocab module.
1005 */
1006 static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
1007 static int sqlite3Fts5IterNextScan(Fts5IndexIter*);
1008
1009
1010 /*
1011 ** Insert or remove data to or from the index. Each time a document is
1012 ** added to or removed from the index, this function is called one or more
1013 ** times.
1014 **
1015 ** For an insert, it must be called once for each token in the new document.
1016 ** If the operation is a delete, it must be called (at least) once for each
1017 ** unique token in the document with an iCol value less than zero. The iPos
1018 ** argument is ignored for a delete.
1019 */
1020 static int sqlite3Fts5IndexWrite(
1021 Fts5Index *p, /* Index to write to */
1022 int iCol, /* Column token appears in (-ve -> delete) */
1023 int iPos, /* Position of token within column */
1024 const char *pToken, int nToken /* Token to add or remove to or from index */
1025 );
1026
1027 /*
1028 ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
1029 ** document iDocid.
1030 */
1031 static int sqlite3Fts5IndexBeginWrite(
1032 Fts5Index *p, /* Index to write to */
1033 int bDelete, /* True if current operation is a delete */
1034 i64 iDocid /* Docid to add or remove data from */
1035 );
1036
1037 /*
1038 ** Flush any data stored in the in-memory hash tables to the database.
1039 ** If the bCommit flag is true, also close any open blob handles.
1040 */
1041 static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);
1042
1043 /*
1044 ** Discard any data stored in the in-memory hash tables. Do not write it
1045 ** to the database. Additionally, assume that the contents of the %_data
1046 ** table may have changed on disk. So any in-memory caches of %_data
1047 ** records must be invalidated.
1048 */
1049 static int sqlite3Fts5IndexRollback(Fts5Index *p);
1050
1051 /*
1052 ** Get or set the "averages" values.
1053 */
1054 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
1055 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
1056
1057 /*
1058 ** Functions called by the storage module as part of integrity-check.
1059 */
1060 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
1061
1062 /*
1063 ** Called during virtual module initialization to register UDF
1064 ** fts5_decode() with SQLite
1065 */
1066 static int sqlite3Fts5IndexInit(sqlite3*);
1067
1068 static int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
1069
1070 /*
1071 ** Return the total number of entries read from the %_data table by
1072 ** this connection since it was created.
1073 */
1074 static int sqlite3Fts5IndexReads(Fts5Index *p);
1075
1076 static int sqlite3Fts5IndexReinit(Fts5Index *p);
1077 static int sqlite3Fts5IndexOptimize(Fts5Index *p);
1078 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
1079 static int sqlite3Fts5IndexReset(Fts5Index *p);
1080
1081 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
1082
1083 /*
1084 ** End of interface to code in fts5_index.c.
1085 **************************************************************************/
1086
1087 /**************************************************************************
1088 ** Interface to code in fts5_varint.c.
1089 */
1090 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
1091 static int sqlite3Fts5GetVarintLen(u32 iVal);
1092 static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
1093 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
1094
1095 #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
1096 #define fts5GetVarint sqlite3Fts5GetVarint
1097
1098 #define fts5FastGetVarint32(a, iOff, nVal) { \
1099 nVal = (a)[iOff++]; \
1100 if( nVal & 0x80 ){ \
1101 iOff--; \
1102 iOff += fts5GetVarint32(&(a)[iOff], nVal); \
1103 } \
1104 }
1105
1106
1107 /*
1108 ** End of interface to code in fts5_varint.c.
1109 **************************************************************************/
1110
1111
1112 /**************************************************************************
1113 ** Interface to code in fts5.c.
1114 */
1115
1116 static int sqlite3Fts5GetTokenizer(
1117 Fts5Global*,
1118 const char **azArg,
1119 int nArg,
1120 Fts5Tokenizer**,
1121 fts5_tokenizer**,
1122 char **pzErr
1123 );
1124
1125 static Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, Fts5Config **);
1126
1127 /*
1128 ** End of interface to code in fts5.c.
1129 **************************************************************************/
1130
1131 /**************************************************************************
1132 ** Interface to code in fts5_hash.c.
1133 */
1134 typedef struct Fts5Hash Fts5Hash;
1135
1136 /*
1137 ** Create a hash table, free a hash table.
1138 */
1139 static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
1140 static void sqlite3Fts5HashFree(Fts5Hash*);
1141
1142 static int sqlite3Fts5HashWrite(
1143 Fts5Hash*,
1144 i64 iRowid, /* Rowid for this entry */
1145 int iCol, /* Column token appears in (-ve -> delete) */
1146 int iPos, /* Position of token within column */
1147 char bByte,
1148 const char *pToken, int nToken /* Token to add or remove to or from index */
1149 );
1150
1151 /*
1152 ** Empty (but do not delete) a hash table.
1153 */
1154 static void sqlite3Fts5HashClear(Fts5Hash*);
1155
1156 static int sqlite3Fts5HashQuery(
1157 Fts5Hash*, /* Hash table to query */
1158 const char *pTerm, int nTerm, /* Query term */
1159 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
1160 int *pnDoclist /* OUT: Size of doclist in bytes */
1161 );
1162
1163 static int sqlite3Fts5HashScanInit(
1164 Fts5Hash*, /* Hash table to query */
1165 const char *pTerm, int nTerm /* Query prefix */
1166 );
1167 static void sqlite3Fts5HashScanNext(Fts5Hash*);
1168 static int sqlite3Fts5HashScanEof(Fts5Hash*);
1169 static void sqlite3Fts5HashScanEntry(Fts5Hash *,
1170 const char **pzTerm, /* OUT: term (nul-terminated) */
1171 const u8 **ppDoclist, /* OUT: pointer to doclist */
1172 int *pnDoclist /* OUT: size of doclist in bytes */
1173 );
1174
1175
1176 /*
1177 ** End of interface to code in fts5_hash.c.
1178 **************************************************************************/
1179
1180 /**************************************************************************
1181 ** Interface to code in fts5_storage.c. fts5_storage.c contains contains
1182 ** code to access the data stored in the %_content and %_docsize tables.
1183 */
1184
1185 #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
1186 #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
1187 #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
1188
1189 typedef struct Fts5Storage Fts5Storage;
1190
1191 static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, c har**);
1192 static int sqlite3Fts5StorageClose(Fts5Storage *p);
1193 static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
1194
1195 static int sqlite3Fts5DropAll(Fts5Config*);
1196 static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, ch ar **);
1197
1198 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
1199 static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64* );
1200 static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
1201
1202 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p);
1203
1204 static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, cha r**);
1205 static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stm t*);
1206
1207 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
1208 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
1209 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
1210
1211 static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
1212 static int sqlite3Fts5StorageRollback(Fts5Storage *p);
1213
1214 static int sqlite3Fts5StorageConfigValue(
1215 Fts5Storage *p, const char*, sqlite3_value*, int
1216 );
1217
1218 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
1219 static int sqlite3Fts5StorageRebuild(Fts5Storage *p);
1220 static int sqlite3Fts5StorageOptimize(Fts5Storage *p);
1221 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
1222 static int sqlite3Fts5StorageReset(Fts5Storage *p);
1223
1224 /*
1225 ** End of interface to code in fts5_storage.c.
1226 **************************************************************************/
1227
1228
1229 /**************************************************************************
1230 ** Interface to code in fts5_expr.c.
1231 */
1232 typedef struct Fts5Expr Fts5Expr;
1233 typedef struct Fts5ExprNode Fts5ExprNode;
1234 typedef struct Fts5Parse Fts5Parse;
1235 typedef struct Fts5Token Fts5Token;
1236 typedef struct Fts5ExprPhrase Fts5ExprPhrase;
1237 typedef struct Fts5ExprNearset Fts5ExprNearset;
1238
1239 struct Fts5Token {
1240 const char *p; /* Token text (not NULL terminated) */
1241 int n; /* Size of buffer p in bytes */
1242 };
1243
1244 /* Parse a MATCH expression. */
1245 static int sqlite3Fts5ExprNew(
1246 Fts5Config *pConfig,
1247 const char *zExpr,
1248 Fts5Expr **ppNew,
1249 char **pzErr
1250 );
1251
1252 /*
1253 ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
1254 ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
1255 ** rc = sqlite3Fts5ExprNext(pExpr)
1256 ** ){
1257 ** // The document with rowid iRowid matches the expression!
1258 ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
1259 ** }
1260 */
1261 static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc) ;
1262 static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
1263 static int sqlite3Fts5ExprEof(Fts5Expr*);
1264 static i64 sqlite3Fts5ExprRowid(Fts5Expr*);
1265
1266 static void sqlite3Fts5ExprFree(Fts5Expr*);
1267
1268 /* Called during startup to register a UDF with SQLite */
1269 static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
1270
1271 static int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
1272 static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
1273 static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
1274
1275 typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
1276 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
1277 static int sqlite3Fts5ExprPopulatePoslists(
1278 Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
1279 );
1280 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
1281
1282 static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
1283
1284 static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
1285
1286 /*******************************************
1287 ** The fts5_expr.c API above this point is used by the other hand-written
1288 ** C code in this module. The interfaces below this point are called by
1289 ** the parser code in fts5parse.y. */
1290
1291 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
1292
1293 static Fts5ExprNode *sqlite3Fts5ParseNode(
1294 Fts5Parse *pParse,
1295 int eType,
1296 Fts5ExprNode *pLeft,
1297 Fts5ExprNode *pRight,
1298 Fts5ExprNearset *pNear
1299 );
1300
1301 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
1302 Fts5Parse *pParse,
1303 Fts5ExprNode *pLeft,
1304 Fts5ExprNode *pRight
1305 );
1306
1307 static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
1308 Fts5Parse *pParse,
1309 Fts5ExprPhrase *pPhrase,
1310 Fts5Token *pToken,
1311 int bPrefix
1312 );
1313
1314 static Fts5ExprNearset *sqlite3Fts5ParseNearset(
1315 Fts5Parse*,
1316 Fts5ExprNearset*,
1317 Fts5ExprPhrase*
1318 );
1319
1320 static Fts5Colset *sqlite3Fts5ParseColset(
1321 Fts5Parse*,
1322 Fts5Colset*,
1323 Fts5Token *
1324 );
1325
1326 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
1327 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
1328 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
1329
1330 static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token* );
1331 static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5Colset*) ;
1332 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*);
1333 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
1334 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
1335
1336 /*
1337 ** End of interface to code in fts5_expr.c.
1338 **************************************************************************/
1339
1340
1341
1342 /**************************************************************************
1343 ** Interface to code in fts5_aux.c.
1344 */
1345
1346 static int sqlite3Fts5AuxInit(fts5_api*);
1347 /*
1348 ** End of interface to code in fts5_aux.c.
1349 **************************************************************************/
1350
1351 /**************************************************************************
1352 ** Interface to code in fts5_tokenizer.c.
1353 */
1354
1355 static int sqlite3Fts5TokenizerInit(fts5_api*);
1356 /*
1357 ** End of interface to code in fts5_tokenizer.c.
1358 **************************************************************************/
1359
1360 /**************************************************************************
1361 ** Interface to code in fts5_vocab.c.
1362 */
1363
1364 static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
1365
1366 /*
1367 ** End of interface to code in fts5_vocab.c.
1368 **************************************************************************/
1369
1370
1371 /**************************************************************************
1372 ** Interface to automatically generated code in fts5_unicode2.c.
1373 */
1374 static int sqlite3Fts5UnicodeIsalnum(int c);
1375 static int sqlite3Fts5UnicodeIsdiacritic(int c);
1376 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
1377 /*
1378 ** End of interface to code in fts5_unicode2.c.
1379 **************************************************************************/
1380
1381 #endif
1382
1383 #define FTS5_OR 1
1384 #define FTS5_AND 2
1385 #define FTS5_NOT 3
1386 #define FTS5_TERM 4
1387 #define FTS5_COLON 5
1388 #define FTS5_LP 6
1389 #define FTS5_RP 7
1390 #define FTS5_MINUS 8
1391 #define FTS5_LCP 9
1392 #define FTS5_RCP 10
1393 #define FTS5_STRING 11
1394 #define FTS5_COMMA 12
1395 #define FTS5_PLUS 13
1396 #define FTS5_STAR 14
1397
1398 /*
1399 ** 2000-05-29
1400 **
1401 ** The author disclaims copyright to this source code. In place of
1402 ** a legal notice, here is a blessing:
1403 **
1404 ** May you do good and not evil.
1405 ** May you find forgiveness for yourself and forgive others.
1406 ** May you share freely, never taking more than you give.
1407 **
1408 *************************************************************************
1409 ** Driver template for the LEMON parser generator.
1410 **
1411 ** The "lemon" program processes an LALR(1) input grammar file, then uses
1412 ** this template to construct a parser. The "lemon" program inserts text
1413 ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the
1414 ** interstitial "-" characters) contained in this template is changed into
1415 ** the value of the %name directive from the grammar. Otherwise, the content
1416 ** of this template is copied straight through into the generate parser
1417 ** source file.
1418 **
1419 ** The following is the concatenation of all %include directives from the
1420 ** input grammar file:
1421 */
1422 /* #include <stdio.h> */
1423 /************ Begin %include sections from the grammar ************************/
1424
1425 /* #include "fts5Int.h" */
1426 /* #include "fts5parse.h" */
1427
1428 /*
1429 ** Disable all error recovery processing in the parser push-down
1430 ** automaton.
1431 */
1432 #define fts5YYNOERRORRECOVERY 1
1433
1434 /*
1435 ** Make fts5yytestcase() the same as testcase()
1436 */
1437 #define fts5yytestcase(X) testcase(X)
1438
1439 /*
1440 ** Indicate that sqlite3ParserFree() will never be called with a null
1441 ** pointer.
1442 */
1443 #define fts5YYPARSEFREENOTNULL 1
1444
1445 /*
1446 ** Alternative datatype for the argument to the malloc() routine passed
1447 ** into sqlite3ParserAlloc(). The default is size_t.
1448 */
1449 #define fts5YYMALLOCARGTYPE u64
1450
1451 /**************** End of %include directives **********************************/
1452 /* These constants specify the various numeric values for terminal symbols
1453 ** in a format understandable to "makeheaders". This section is blank unless
1454 ** "lemon" is run with the "-m" command-line option.
1455 ***************** Begin makeheaders token definitions *************************/
1456 /**************** End makeheaders token definitions ***************************/
1457
1458 /* The next sections is a series of control #defines.
1459 ** various aspects of the generated parser.
1460 ** fts5YYCODETYPE is the data type used to store the integer codes
1461 ** that represent terminal and non-terminal symbols.
1462 ** "unsigned char" is used if there are fewer than
1463 ** 256 symbols. Larger types otherwise.
1464 ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for
1465 ** any terminal or nonterminal symbol.
1466 ** fts5YYFALLBACK If defined, this indicates that one or more tokens
1467 ** (also known as: "terminal symbols") have fall-back
1468 ** values which should be used if the original symbol
1469 ** would not parse. This permits keywords to sometimes
1470 ** be used as identifiers, for example.
1471 ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers
1472 ** that indicate what to do in response to the next
1473 ** token.
1474 ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type fo r terminal
1475 ** symbols. Background: A "minor type" is a semantic
1476 ** value associated with a terminal or non-terminal
1477 ** symbols. For example, for an "ID" terminal symbol,
1478 ** the minor type might be the name of the identifier.
1479 ** Each non-terminal can have a different minor type.
1480 ** Terminal symbols all have the same minor type, though.
1481 ** This macros defines the minor type for terminal
1482 ** symbols.
1483 ** fts5YYMINORTYPE is the data type used for all minor types.
1484 ** This is typically a union of many types, one of
1485 ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union
1486 ** for terminal symbols is called "fts5yy0".
1487 ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If
1488 ** zero the stack is dynamically sized using realloc()
1489 ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extr a_argument
1490 ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argu ment
1491 ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypP arser
1492 ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yy pParser
1493 ** fts5YYERRORSYMBOL is the code number of the error symbol. If not
1494 ** defined, then do no error processing.
1495 ** fts5YYNSTATE the combined number of states.
1496 ** fts5YYNRULE the number of rules in the grammar
1497 ** fts5YY_MAX_SHIFT Maximum value for shift actions
1498 ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1499 ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1500 ** fts5YY_MIN_REDUCE Maximum value for reduce actions
1501 ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error
1502 ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept
1503 ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op
1504 */
1505 #ifndef INTERFACE
1506 # define INTERFACE 1
1507 #endif
1508 /************* Begin control #defines *****************************************/
1509 #define fts5YYCODETYPE unsigned char
1510 #define fts5YYNOCODE 28
1511 #define fts5YYACTIONTYPE unsigned char
1512 #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token
1513 typedef union {
1514 int fts5yyinit;
1515 sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0;
1516 int fts5yy4;
1517 Fts5Colset* fts5yy11;
1518 Fts5ExprNode* fts5yy24;
1519 Fts5ExprNearset* fts5yy46;
1520 Fts5ExprPhrase* fts5yy53;
1521 } fts5YYMINORTYPE;
1522 #ifndef fts5YYSTACKDEPTH
1523 #define fts5YYSTACKDEPTH 100
1524 #endif
1525 #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse;
1526 #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse
1527 #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse = fts5yypParser->pParse
1528 #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse = pParse
1529 #define fts5YYNSTATE 29
1530 #define fts5YYNRULE 26
1531 #define fts5YY_MAX_SHIFT 28
1532 #define fts5YY_MIN_SHIFTREDUCE 45
1533 #define fts5YY_MAX_SHIFTREDUCE 70
1534 #define fts5YY_MIN_REDUCE 71
1535 #define fts5YY_MAX_REDUCE 96
1536 #define fts5YY_ERROR_ACTION 97
1537 #define fts5YY_ACCEPT_ACTION 98
1538 #define fts5YY_NO_ACTION 99
1539 /************* End control #defines *******************************************/
1540
1541 /* Define the fts5yytestcase() macro to be a no-op if is not already defined
1542 ** otherwise.
1543 **
1544 ** Applications can choose to define fts5yytestcase() in the %include section
1545 ** to a macro that can assist in verifying code coverage. For production
1546 ** code the fts5yytestcase() macro should be turned off. But it is useful
1547 ** for testing.
1548 */
1549 #ifndef fts5yytestcase
1550 # define fts5yytestcase(X)
1551 #endif
1552
1553
1554 /* Next are the tables used to determine what action to take based on the
1555 ** current state and lookahead token. These tables are used to implement
1556 ** functions that take a state number and lookahead value and return an
1557 ** action integer.
1558 **
1559 ** Suppose the action integer is N. Then the action is determined as
1560 ** follows
1561 **
1562 ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahea d
1563 ** token onto the stack and goto state N.
1564 **
1565 ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1566 ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTRED UCE.
1567 **
1568 ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE
1569 ** and fts5YY_MAX_REDUCE
1570 **
1571 ** N == fts5YY_ERROR_ACTION A syntax error has occurred.
1572 **
1573 ** N == fts5YY_ACCEPT_ACTION The parser accepts its input.
1574 **
1575 ** N == fts5YY_NO_ACTION No such action. Denotes unused
1576 ** slots in the fts5yy_action[] table.
1577 **
1578 ** The action table is constructed as a single large table named fts5yy_action[] .
1579 ** Given state S and lookahead X, the action is computed as either:
1580 **
1581 ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ]
1582 ** (B) N = fts5yy_default[S]
1583 **
1584 ** The (A) formula is preferred. The B formula is used instead if:
1585 ** (1) The fts5yy_shift_ofst[S]+X value is out of range, or
1586 ** (2) fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X, or
1587 ** (3) fts5yy_shift_ofst[S] equal fts5YY_SHIFT_USE_DFLT.
1588 ** (Implementation note: fts5YY_SHIFT_USE_DFLT is chosen so that
1589 ** fts5YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X.
1590 ** Hence only tests (1) and (2) need to be evaluated.)
1591 **
1592 ** The formulas above are for computing the action when the lookahead is
1593 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1594 ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of
1595 ** the fts5yy_shift_ofst[] array and fts5YY_REDUCE_USE_DFLT is used in place of
1596 ** fts5YY_SHIFT_USE_DFLT.
1597 **
1598 ** The following are the tables generated in this section:
1599 **
1600 ** fts5yy_action[] A single table containing all actions.
1601 ** fts5yy_lookahead[] A table containing the lookahead for each entry in
1602 ** fts5yy_action. Used to detect hash collisions.
1603 ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for
1604 ** shifting terminals.
1605 ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for
1606 ** shifting non-terminals after a reduce.
1607 ** fts5yy_default[] Default action for each state.
1608 **
1609 *********** Begin parsing tables **********************************************/
1610 #define fts5YY_ACTTAB_COUNT (85)
1611 static const fts5YYACTIONTYPE fts5yy_action[] = {
1612 /* 0 */ 98, 16, 51, 5, 53, 27, 83, 7, 26, 15,
1613 /* 10 */ 51, 5, 53, 27, 13, 69, 26, 48, 51, 5,
1614 /* 20 */ 53, 27, 19, 11, 26, 9, 20, 51, 5, 53,
1615 /* 30 */ 27, 13, 22, 26, 28, 51, 5, 53, 27, 68,
1616 /* 40 */ 1, 26, 19, 11, 17, 9, 52, 10, 53, 27,
1617 /* 50 */ 23, 24, 26, 54, 3, 4, 2, 26, 6, 21,
1618 /* 60 */ 49, 71, 3, 4, 2, 7, 56, 59, 55, 59,
1619 /* 70 */ 4, 2, 12, 69, 58, 60, 18, 67, 62, 69,
1620 /* 80 */ 25, 66, 8, 14, 2,
1621 };
1622 static const fts5YYCODETYPE fts5yy_lookahead[] = {
1623 /* 0 */ 16, 17, 18, 19, 20, 21, 5, 6, 24, 17,
1624 /* 10 */ 18, 19, 20, 21, 11, 14, 24, 17, 18, 19,
1625 /* 20 */ 20, 21, 8, 9, 24, 11, 17, 18, 19, 20,
1626 /* 30 */ 21, 11, 12, 24, 17, 18, 19, 20, 21, 26,
1627 /* 40 */ 6, 24, 8, 9, 22, 11, 18, 11, 20, 21,
1628 /* 50 */ 24, 25, 24, 20, 1, 2, 3, 24, 23, 24,
1629 /* 60 */ 7, 0, 1, 2, 3, 6, 10, 11, 10, 11,
1630 /* 70 */ 2, 3, 9, 14, 11, 11, 22, 26, 7, 14,
1631 /* 80 */ 13, 11, 5, 11, 3,
1632 };
1633 #define fts5YY_SHIFT_USE_DFLT (85)
1634 #define fts5YY_SHIFT_COUNT (28)
1635 #define fts5YY_SHIFT_MIN (0)
1636 #define fts5YY_SHIFT_MAX (81)
1637 static const unsigned char fts5yy_shift_ofst[] = {
1638 /* 0 */ 34, 34, 34, 34, 34, 14, 20, 3, 36, 1,
1639 /* 10 */ 59, 64, 64, 65, 65, 53, 61, 56, 58, 63,
1640 /* 20 */ 68, 67, 70, 67, 71, 72, 67, 77, 81,
1641 };
1642 #define fts5YY_REDUCE_USE_DFLT (-17)
1643 #define fts5YY_REDUCE_COUNT (14)
1644 #define fts5YY_REDUCE_MIN (-16)
1645 #define fts5YY_REDUCE_MAX (54)
1646 static const signed char fts5yy_reduce_ofst[] = {
1647 /* 0 */ -16, -8, 0, 9, 17, 28, 26, 35, 33, 13,
1648 /* 10 */ 13, 22, 54, 13, 51,
1649 };
1650 static const fts5YYACTIONTYPE fts5yy_default[] = {
1651 /* 0 */ 97, 97, 97, 97, 97, 76, 91, 97, 97, 96,
1652 /* 10 */ 96, 97, 97, 96, 96, 97, 97, 97, 97, 97,
1653 /* 20 */ 73, 89, 97, 90, 97, 97, 87, 97, 72,
1654 };
1655 /********** End of lemon-generated parsing tables *****************************/
1656
1657 /* The next table maps tokens (terminal symbols) into fallback tokens.
1658 ** If a construct like the following:
1659 **
1660 ** %fallback ID X Y Z.
1661 **
1662 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1663 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1664 ** but it does not parse, the type of the token is changed to ID and
1665 ** the parse is retried before an error is thrown.
1666 **
1667 ** This feature can be used, for example, to cause some keywords in a language
1668 ** to revert to identifiers if they keyword does not apply in the context where
1669 ** it appears.
1670 */
1671 #ifdef fts5YYFALLBACK
1672 static const fts5YYCODETYPE fts5yyFallback[] = {
1673 };
1674 #endif /* fts5YYFALLBACK */
1675
1676 /* The following structure represents a single element of the
1677 ** parser's stack. Information stored includes:
1678 **
1679 ** + The state number for the parser at this level of the stack.
1680 **
1681 ** + The value of the token stored at this level of the stack.
1682 ** (In other words, the "major" token.)
1683 **
1684 ** + The semantic value stored at this level of the stack. This is
1685 ** the information used by the action routines in the grammar.
1686 ** It is sometimes called the "minor" token.
1687 **
1688 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1689 ** actually contains the reduce action for the second half of the
1690 ** SHIFTREDUCE.
1691 */
1692 struct fts5yyStackEntry {
1693 fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUC E */
1694 fts5YYCODETYPE major; /* The major token value. This is the code
1695 ** number for the token at this stack level */
1696 fts5YYMINORTYPE minor; /* The user-supplied minor token value. This
1697 ** is the value of the token */
1698 };
1699 typedef struct fts5yyStackEntry fts5yyStackEntry;
1700
1701 /* The state of the parser is completely contained in an instance of
1702 ** the following structure */
1703 struct fts5yyParser {
1704 fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack * /
1705 #ifdef fts5YYTRACKMAXSTACKDEPTH
1706 int fts5yyhwm; /* High-water mark of the stack */
1707 #endif
1708 #ifndef fts5YYNOERRORRECOVERY
1709 int fts5yyerrcnt; /* Shifts left before out of the error */
1710 #endif
1711 sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument * /
1712 #if fts5YYSTACKDEPTH<=0
1713 int fts5yystksz; /* Current side of the stack */
1714 fts5yyStackEntry *fts5yystack; /* The parser's stack */
1715 fts5yyStackEntry fts5yystk0; /* First stack entry */
1716 #else
1717 fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */
1718 #endif
1719 };
1720 typedef struct fts5yyParser fts5yyParser;
1721
1722 #ifndef NDEBUG
1723 /* #include <stdio.h> */
1724 static FILE *fts5yyTraceFILE = 0;
1725 static char *fts5yyTracePrompt = 0;
1726 #endif /* NDEBUG */
1727
1728 #ifndef NDEBUG
1729 /*
1730 ** Turn parser tracing on by giving a stream to which to write the trace
1731 ** and a prompt to preface each trace message. Tracing is turned off
1732 ** by making either argument NULL
1733 **
1734 ** Inputs:
1735 ** <ul>
1736 ** <li> A FILE* to which trace output should be written.
1737 ** If NULL, then tracing is turned off.
1738 ** <li> A prefix string written at the beginning of every
1739 ** line of trace output. If NULL, then tracing is
1740 ** turned off.
1741 ** </ul>
1742 **
1743 ** Outputs:
1744 ** None.
1745 */
1746 static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
1747 fts5yyTraceFILE = TraceFILE;
1748 fts5yyTracePrompt = zTracePrompt;
1749 if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0;
1750 else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
1751 }
1752 #endif /* NDEBUG */
1753
1754 #ifndef NDEBUG
1755 /* For tracing shifts, the names of all terminals and nonterminals
1756 ** are required. The following table supplies these names */
1757 static const char *const fts5yyTokenName[] = {
1758 "$", "OR", "AND", "NOT",
1759 "TERM", "COLON", "LP", "RP",
1760 "MINUS", "LCP", "RCP", "STRING",
1761 "COMMA", "PLUS", "STAR", "error",
1762 "input", "expr", "cnearset", "exprlist",
1763 "nearset", "colset", "colsetlist", "nearphrases",
1764 "phrase", "neardist_opt", "star_opt",
1765 };
1766 #endif /* NDEBUG */
1767
1768 #ifndef NDEBUG
1769 /* For tracing reduce actions, the names of all rules are required.
1770 */
1771 static const char *const fts5yyRuleName[] = {
1772 /* 0 */ "input ::= expr",
1773 /* 1 */ "expr ::= expr AND expr",
1774 /* 2 */ "expr ::= expr OR expr",
1775 /* 3 */ "expr ::= expr NOT expr",
1776 /* 4 */ "expr ::= LP expr RP",
1777 /* 5 */ "expr ::= exprlist",
1778 /* 6 */ "exprlist ::= cnearset",
1779 /* 7 */ "exprlist ::= exprlist cnearset",
1780 /* 8 */ "cnearset ::= nearset",
1781 /* 9 */ "cnearset ::= colset COLON nearset",
1782 /* 10 */ "colset ::= MINUS LCP colsetlist RCP",
1783 /* 11 */ "colset ::= LCP colsetlist RCP",
1784 /* 12 */ "colset ::= STRING",
1785 /* 13 */ "colset ::= MINUS STRING",
1786 /* 14 */ "colsetlist ::= colsetlist STRING",
1787 /* 15 */ "colsetlist ::= STRING",
1788 /* 16 */ "nearset ::= phrase",
1789 /* 17 */ "nearset ::= STRING LP nearphrases neardist_opt RP",
1790 /* 18 */ "nearphrases ::= phrase",
1791 /* 19 */ "nearphrases ::= nearphrases phrase",
1792 /* 20 */ "neardist_opt ::=",
1793 /* 21 */ "neardist_opt ::= COMMA STRING",
1794 /* 22 */ "phrase ::= phrase PLUS STRING star_opt",
1795 /* 23 */ "phrase ::= STRING star_opt",
1796 /* 24 */ "star_opt ::= STAR",
1797 /* 25 */ "star_opt ::=",
1798 };
1799 #endif /* NDEBUG */
1800
1801
1802 #if fts5YYSTACKDEPTH<=0
1803 /*
1804 ** Try to increase the size of the parser stack. Return the number
1805 ** of errors. Return 0 on success.
1806 */
1807 static int fts5yyGrowStack(fts5yyParser *p){
1808 int newSize;
1809 int idx;
1810 fts5yyStackEntry *pNew;
1811
1812 newSize = p->fts5yystksz*2 + 100;
1813 idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0;
1814 if( p->fts5yystack==&p->fts5yystk0 ){
1815 pNew = malloc(newSize*sizeof(pNew[0]));
1816 if( pNew ) pNew[0] = p->fts5yystk0;
1817 }else{
1818 pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0]));
1819 }
1820 if( pNew ){
1821 p->fts5yystack = pNew;
1822 p->fts5yytos = &p->fts5yystack[idx];
1823 #ifndef NDEBUG
1824 if( fts5yyTraceFILE ){
1825 fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n",
1826 fts5yyTracePrompt, p->fts5yystksz, newSize);
1827 }
1828 #endif
1829 p->fts5yystksz = newSize;
1830 }
1831 return pNew==0;
1832 }
1833 #endif
1834
1835 /* Datatype of the argument to the memory allocated passed as the
1836 ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by
1837 ** putting an appropriate #define in the %include section of the input
1838 ** grammar.
1839 */
1840 #ifndef fts5YYMALLOCARGTYPE
1841 # define fts5YYMALLOCARGTYPE size_t
1842 #endif
1843
1844 /* Initialize a new parser that has already been allocated.
1845 */
1846 static void sqlite3Fts5ParserInit(void *fts5yypParser){
1847 fts5yyParser *pParser = (fts5yyParser*)fts5yypParser;
1848 #ifdef fts5YYTRACKMAXSTACKDEPTH
1849 pParser->fts5yyhwm = 0;
1850 #endif
1851 #if fts5YYSTACKDEPTH<=0
1852 pParser->fts5yytos = NULL;
1853 pParser->fts5yystack = NULL;
1854 pParser->fts5yystksz = 0;
1855 if( fts5yyGrowStack(pParser) ){
1856 pParser->fts5yystack = &pParser->fts5yystk0;
1857 pParser->fts5yystksz = 1;
1858 }
1859 #endif
1860 #ifndef fts5YYNOERRORRECOVERY
1861 pParser->fts5yyerrcnt = -1;
1862 #endif
1863 pParser->fts5yytos = pParser->fts5yystack;
1864 pParser->fts5yystack[0].stateno = 0;
1865 pParser->fts5yystack[0].major = 0;
1866 }
1867
1868 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
1869 /*
1870 ** This function allocates a new parser.
1871 ** The only argument is a pointer to a function which works like
1872 ** malloc.
1873 **
1874 ** Inputs:
1875 ** A pointer to the function used to allocate memory.
1876 **
1877 ** Outputs:
1878 ** A pointer to a parser. This pointer is used in subsequent calls
1879 ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
1880 */
1881 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE)){
1882 fts5yyParser *pParser;
1883 pParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyPars er) );
1884 if( pParser ) sqlite3Fts5ParserInit(pParser);
1885 return pParser;
1886 }
1887 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
1888
1889
1890 /* The following function deletes the "minor type" or semantic value
1891 ** associated with a symbol. The symbol can be either a terminal
1892 ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is
1893 ** a pointer to the value to be deleted. The code used to do the
1894 ** deletions is derived from the %destructor and/or %token_destructor
1895 ** directives of the input grammar.
1896 */
1897 static void fts5yy_destructor(
1898 fts5yyParser *fts5yypParser, /* The parser */
1899 fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */
1900 fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */
1901 ){
1902 sqlite3Fts5ParserARG_FETCH;
1903 switch( fts5yymajor ){
1904 /* Here is inserted the actions which take place when a
1905 ** terminal or non-terminal is destroyed. This can happen
1906 ** when the symbol is popped from the stack during a
1907 ** reduce or during error processing or when a parser is
1908 ** being destroyed before it is finished parsing.
1909 **
1910 ** Note: during a reduce, the only symbols destroyed are those
1911 ** which appear on the RHS of the rule, but which are *not* used
1912 ** inside the C code.
1913 */
1914 /********* Begin destructor definitions ***************************************/
1915 case 16: /* input */
1916 {
1917 (void)pParse;
1918 }
1919 break;
1920 case 17: /* expr */
1921 case 18: /* cnearset */
1922 case 19: /* exprlist */
1923 {
1924 sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24));
1925 }
1926 break;
1927 case 20: /* nearset */
1928 case 23: /* nearphrases */
1929 {
1930 sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46));
1931 }
1932 break;
1933 case 21: /* colset */
1934 case 22: /* colsetlist */
1935 {
1936 sqlite3_free((fts5yypminor->fts5yy11));
1937 }
1938 break;
1939 case 24: /* phrase */
1940 {
1941 sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53));
1942 }
1943 break;
1944 /********* End destructor definitions *****************************************/
1945 default: break; /* If no destructor action specified: do nothing */
1946 }
1947 }
1948
1949 /*
1950 ** Pop the parser's stack once.
1951 **
1952 ** If there is a destructor routine associated with the token which
1953 ** is popped from the stack, then call it.
1954 */
1955 static void fts5yy_pop_parser_stack(fts5yyParser *pParser){
1956 fts5yyStackEntry *fts5yytos;
1957 assert( pParser->fts5yytos!=0 );
1958 assert( pParser->fts5yytos > pParser->fts5yystack );
1959 fts5yytos = pParser->fts5yytos--;
1960 #ifndef NDEBUG
1961 if( fts5yyTraceFILE ){
1962 fprintf(fts5yyTraceFILE,"%sPopping %s\n",
1963 fts5yyTracePrompt,
1964 fts5yyTokenName[fts5yytos->major]);
1965 }
1966 #endif
1967 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
1968 }
1969
1970 /*
1971 ** Clear all secondary memory allocations from the parser
1972 */
1973 static void sqlite3Fts5ParserFinalize(void *p){
1974 fts5yyParser *pParser = (fts5yyParser*)p;
1975 while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pPars er);
1976 #if fts5YYSTACKDEPTH<=0
1977 if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack);
1978 #endif
1979 }
1980
1981 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
1982 /*
1983 ** Deallocate and destroy a parser. Destructors are called for
1984 ** all stack elements before shutting the parser down.
1985 **
1986 ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it
1987 ** is defined in a %include section of the input grammar) then it is
1988 ** assumed that the input pointer is never NULL.
1989 */
1990 static void sqlite3Fts5ParserFree(
1991 void *p, /* The parser to be deleted */
1992 void (*freeProc)(void*) /* Function used to reclaim memory */
1993 ){
1994 #ifndef fts5YYPARSEFREENEVERNULL
1995 if( p==0 ) return;
1996 #endif
1997 sqlite3Fts5ParserFinalize(p);
1998 (*freeProc)(p);
1999 }
2000 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2001
2002 /*
2003 ** Return the peak depth of the stack for a parser.
2004 */
2005 #ifdef fts5YYTRACKMAXSTACKDEPTH
2006 static int sqlite3Fts5ParserStackPeak(void *p){
2007 fts5yyParser *pParser = (fts5yyParser*)p;
2008 return pParser->fts5yyhwm;
2009 }
2010 #endif
2011
2012 /*
2013 ** Find the appropriate action for a parser given the terminal
2014 ** look-ahead token iLookAhead.
2015 */
2016 static unsigned int fts5yy_find_shift_action(
2017 fts5yyParser *pParser, /* The parser */
2018 fts5YYCODETYPE iLookAhead /* The look-ahead token */
2019 ){
2020 int i;
2021 int stateno = pParser->fts5yytos->stateno;
2022
2023 if( stateno>=fts5YY_MIN_REDUCE ) return stateno;
2024 assert( stateno <= fts5YY_SHIFT_COUNT );
2025 do{
2026 i = fts5yy_shift_ofst[stateno];
2027 assert( iLookAhead!=fts5YYNOCODE );
2028 i += iLookAhead;
2029 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
2030 #ifdef fts5YYFALLBACK
2031 fts5YYCODETYPE iFallback; /* Fallback token */
2032 if( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])
2033 && (iFallback = fts5yyFallback[iLookAhead])!=0 ){
2034 #ifndef NDEBUG
2035 if( fts5yyTraceFILE ){
2036 fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n",
2037 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFa llback]);
2038 }
2039 #endif
2040 assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
2041 iLookAhead = iFallback;
2042 continue;
2043 }
2044 #endif
2045 #ifdef fts5YYWILDCARD
2046 {
2047 int j = i - iLookAhead + fts5YYWILDCARD;
2048 if(
2049 #if fts5YY_SHIFT_MIN+fts5YYWILDCARD<0
2050 j>=0 &&
2051 #endif
2052 #if fts5YY_SHIFT_MAX+fts5YYWILDCARD>=fts5YY_ACTTAB_COUNT
2053 j<fts5YY_ACTTAB_COUNT &&
2054 #endif
2055 fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0
2056 ){
2057 #ifndef NDEBUG
2058 if( fts5yyTraceFILE ){
2059 fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n",
2060 fts5yyTracePrompt, fts5yyTokenName[iLookAhead],
2061 fts5yyTokenName[fts5YYWILDCARD]);
2062 }
2063 #endif /* NDEBUG */
2064 return fts5yy_action[j];
2065 }
2066 }
2067 #endif /* fts5YYWILDCARD */
2068 return fts5yy_default[stateno];
2069 }else{
2070 return fts5yy_action[i];
2071 }
2072 }while(1);
2073 }
2074
2075 /*
2076 ** Find the appropriate action for a parser given the non-terminal
2077 ** look-ahead token iLookAhead.
2078 */
2079 static int fts5yy_find_reduce_action(
2080 int stateno, /* Current state number */
2081 fts5YYCODETYPE iLookAhead /* The look-ahead token */
2082 ){
2083 int i;
2084 #ifdef fts5YYERRORSYMBOL
2085 if( stateno>fts5YY_REDUCE_COUNT ){
2086 return fts5yy_default[stateno];
2087 }
2088 #else
2089 assert( stateno<=fts5YY_REDUCE_COUNT );
2090 #endif
2091 i = fts5yy_reduce_ofst[stateno];
2092 assert( i!=fts5YY_REDUCE_USE_DFLT );
2093 assert( iLookAhead!=fts5YYNOCODE );
2094 i += iLookAhead;
2095 #ifdef fts5YYERRORSYMBOL
2096 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
2097 return fts5yy_default[stateno];
2098 }
2099 #else
2100 assert( i>=0 && i<fts5YY_ACTTAB_COUNT );
2101 assert( fts5yy_lookahead[i]==iLookAhead );
2102 #endif
2103 return fts5yy_action[i];
2104 }
2105
2106 /*
2107 ** The following routine is called if the stack overflows.
2108 */
2109 static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){
2110 sqlite3Fts5ParserARG_FETCH;
2111 #ifndef NDEBUG
2112 if( fts5yyTraceFILE ){
2113 fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
2114 }
2115 #endif
2116 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parse r_stack(fts5yypParser);
2117 /* Here code is inserted which will execute if the parser
2118 ** stack every overflows */
2119 /******** Begin %stack_overflow code ******************************************/
2120
2121 sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow");
2122 /******** End %stack_overflow code ********************************************/
2123 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument var */
2124 }
2125
2126 /*
2127 ** Print tracing information for a SHIFT action
2128 */
2129 #ifndef NDEBUG
2130 static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState){
2131 if( fts5yyTraceFILE ){
2132 if( fts5yyNewState<fts5YYNSTATE ){
2133 fprintf(fts5yyTraceFILE,"%sShift '%s', go to state %d\n",
2134 fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yytos->major],
2135 fts5yyNewState);
2136 }else{
2137 fprintf(fts5yyTraceFILE,"%sShift '%s'\n",
2138 fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yytos->major]);
2139 }
2140 }
2141 }
2142 #else
2143 # define fts5yyTraceShift(X,Y)
2144 #endif
2145
2146 /*
2147 ** Perform a shift action.
2148 */
2149 static void fts5yy_shift(
2150 fts5yyParser *fts5yypParser, /* The parser to be shifted */
2151 int fts5yyNewState, /* The new state to shift in */
2152 int fts5yyMajor, /* The major token to shift in */
2153 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */
2154 ){
2155 fts5yyStackEntry *fts5yytos;
2156 fts5yypParser->fts5yytos++;
2157 #ifdef fts5YYTRACKMAXSTACKDEPTH
2158 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser ->fts5yyhwm ){
2159 fts5yypParser->fts5yyhwm++;
2160 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yyp Parser->fts5yystack) );
2161 }
2162 #endif
2163 #if fts5YYSTACKDEPTH>0
2164 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5YYSTACKDEPTH] ){
2165 fts5yypParser->fts5yytos--;
2166 fts5yyStackOverflow(fts5yypParser);
2167 return;
2168 }
2169 #else
2170 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5y ystksz] ){
2171 if( fts5yyGrowStack(fts5yypParser) ){
2172 fts5yypParser->fts5yytos--;
2173 fts5yyStackOverflow(fts5yypParser);
2174 return;
2175 }
2176 }
2177 #endif
2178 if( fts5yyNewState > fts5YY_MAX_SHIFT ){
2179 fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
2180 }
2181 fts5yytos = fts5yypParser->fts5yytos;
2182 fts5yytos->stateno = (fts5YYACTIONTYPE)fts5yyNewState;
2183 fts5yytos->major = (fts5YYCODETYPE)fts5yyMajor;
2184 fts5yytos->minor.fts5yy0 = fts5yyMinor;
2185 fts5yyTraceShift(fts5yypParser, fts5yyNewState);
2186 }
2187
2188 /* The following table contains information about every rule that
2189 ** is used during the reduce.
2190 */
2191 static const struct {
2192 fts5YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
2193 unsigned char nrhs; /* Number of right-hand side symbols in the rule */
2194 } fts5yyRuleInfo[] = {
2195 { 16, 1 },
2196 { 17, 3 },
2197 { 17, 3 },
2198 { 17, 3 },
2199 { 17, 3 },
2200 { 17, 1 },
2201 { 19, 1 },
2202 { 19, 2 },
2203 { 18, 1 },
2204 { 18, 3 },
2205 { 21, 4 },
2206 { 21, 3 },
2207 { 21, 1 },
2208 { 21, 2 },
2209 { 22, 2 },
2210 { 22, 1 },
2211 { 20, 1 },
2212 { 20, 5 },
2213 { 23, 1 },
2214 { 23, 2 },
2215 { 25, 0 },
2216 { 25, 2 },
2217 { 24, 4 },
2218 { 24, 2 },
2219 { 26, 1 },
2220 { 26, 0 },
2221 };
2222
2223 static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */
2224
2225 /*
2226 ** Perform a reduce action and the shift that must immediately
2227 ** follow the reduce.
2228 */
2229 static void fts5yy_reduce(
2230 fts5yyParser *fts5yypParser, /* The parser */
2231 unsigned int fts5yyruleno /* Number of the rule by which to reduce */
2232 ){
2233 int fts5yygoto; /* The next state */
2234 int fts5yyact; /* The next action */
2235 fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */
2236 int fts5yysize; /* Amount to pop the stack */
2237 sqlite3Fts5ParserARG_FETCH;
2238 fts5yymsp = fts5yypParser->fts5yytos;
2239 #ifndef NDEBUG
2240 if( fts5yyTraceFILE && fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yy RuleName[0])) ){
2241 fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs;
2242 fprintf(fts5yyTraceFILE, "%sReduce [%s], go to state %d.\n", fts5yyTraceProm pt,
2243 fts5yyRuleName[fts5yyruleno], fts5yymsp[-fts5yysize].stateno);
2244 }
2245 #endif /* NDEBUG */
2246
2247 /* Check that the stack is large enough to grow by a single entry
2248 ** if the RHS of the rule is empty. This ensures that there is room
2249 ** enough on the stack to push the LHS value */
2250 if( fts5yyRuleInfo[fts5yyruleno].nrhs==0 ){
2251 #ifdef fts5YYTRACKMAXSTACKDEPTH
2252 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypPars er->fts5yyhwm ){
2253 fts5yypParser->fts5yyhwm++;
2254 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5y ypParser->fts5yystack));
2255 }
2256 #endif
2257 #if fts5YYSTACKDEPTH>0
2258 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1 ] ){
2259 fts5yyStackOverflow(fts5yypParser);
2260 return;
2261 }
2262 #else
2263 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts 5yystksz-1] ){
2264 if( fts5yyGrowStack(fts5yypParser) ){
2265 fts5yyStackOverflow(fts5yypParser);
2266 return;
2267 }
2268 fts5yymsp = fts5yypParser->fts5yytos;
2269 }
2270 #endif
2271 }
2272
2273 switch( fts5yyruleno ){
2274 /* Beginning here are the reduction cases. A typical example
2275 ** follows:
2276 ** case 0:
2277 ** #line <lineno> <grammarfile>
2278 ** { ... } // User supplied code
2279 ** #line <lineno> <thisfile>
2280 ** break;
2281 */
2282 /********** Begin reduce actions **********************************************/
2283 fts5YYMINORTYPE fts5yylhsminor;
2284 case 0: /* input ::= expr */
2285 { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); }
2286 break;
2287 case 1: /* expr ::= expr AND expr */
2288 {
2289 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2] .minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2290 }
2291 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2292 break;
2293 case 2: /* expr ::= expr OR expr */
2294 {
2295 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2]. minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2296 }
2297 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2298 break;
2299 case 3: /* expr ::= expr NOT expr */
2300 {
2301 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2] .minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2302 }
2303 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2304 break;
2305 case 4: /* expr ::= LP expr RP */
2306 {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;}
2307 break;
2308 case 5: /* expr ::= exprlist */
2309 case 6: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==6);
2310 {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;}
2311 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2312 break;
2313 case 7: /* exprlist ::= exprlist cnearset */
2314 {
2315 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].mi nor.fts5yy24, fts5yymsp[0].minor.fts5yy24);
2316 }
2317 fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2318 break;
2319 case 8: /* cnearset ::= nearset */
2320 {
2321 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5 yymsp[0].minor.fts5yy46);
2322 }
2323 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2324 break;
2325 case 9: /* cnearset ::= colset COLON nearset */
2326 {
2327 sqlite3Fts5ParseSetColset(pParse, fts5yymsp[0].minor.fts5yy46, fts5yymsp[-2].m inor.fts5yy11);
2328 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5 yymsp[0].minor.fts5yy46);
2329 }
2330 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2331 break;
2332 case 10: /* colset ::= MINUS LCP colsetlist RCP */
2333 {
2334 fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yyms p[-1].minor.fts5yy11);
2335 }
2336 break;
2337 case 11: /* colset ::= LCP colsetlist RCP */
2338 { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; }
2339 break;
2340 case 12: /* colset ::= STRING */
2341 {
2342 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].mino r.fts5yy0);
2343 }
2344 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2345 break;
2346 case 13: /* colset ::= MINUS STRING */
2347 {
2348 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0] .minor.fts5yy0);
2349 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[ -1].minor.fts5yy11);
2350 }
2351 break;
2352 case 14: /* colsetlist ::= colsetlist STRING */
2353 {
2354 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.f ts5yy11, &fts5yymsp[0].minor.fts5yy0); }
2355 fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2356 break;
2357 case 15: /* colsetlist ::= STRING */
2358 {
2359 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].mino r.fts5yy0);
2360 }
2361 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2362 break;
2363 case 16: /* nearset ::= phrase */
2364 { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].mino r.fts5yy53); }
2365 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2366 break;
2367 case 17: /* nearset ::= STRING LP nearphrases neardist_opt RP */
2368 {
2369 sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0);
2370 sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[- 1].minor.fts5yy0);
2371 fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46;
2372 }
2373 fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2374 break;
2375 case 18: /* nearphrases ::= phrase */
2376 {
2377 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].mino r.fts5yy53);
2378 }
2379 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2380 break;
2381 case 19: /* nearphrases ::= nearphrases phrase */
2382 {
2383 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor. fts5yy46, fts5yymsp[0].minor.fts5yy53);
2384 }
2385 fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2386 break;
2387 case 20: /* neardist_opt ::= */
2388 { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; }
2389 break;
2390 case 21: /* neardist_opt ::= COMMA STRING */
2391 { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; }
2392 break;
2393 case 22: /* phrase ::= phrase PLUS STRING star_opt */
2394 {
2395 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts 5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2396 }
2397 fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2398 break;
2399 case 23: /* phrase ::= STRING star_opt */
2400 {
2401 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor .fts5yy0, fts5yymsp[0].minor.fts5yy4);
2402 }
2403 fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2404 break;
2405 case 24: /* star_opt ::= STAR */
2406 { fts5yymsp[0].minor.fts5yy4 = 1; }
2407 break;
2408 case 25: /* star_opt ::= */
2409 { fts5yymsp[1].minor.fts5yy4 = 0; }
2410 break;
2411 default:
2412 break;
2413 /********** End reduce actions ************************************************/
2414 };
2415 assert( fts5yyruleno<sizeof(fts5yyRuleInfo)/sizeof(fts5yyRuleInfo[0]) );
2416 fts5yygoto = fts5yyRuleInfo[fts5yyruleno].lhs;
2417 fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs;
2418 fts5yyact = fts5yy_find_reduce_action(fts5yymsp[-fts5yysize].stateno,(fts5YYCO DETYPE)fts5yygoto);
2419 if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
2420 if( fts5yyact>fts5YY_MAX_SHIFT ){
2421 fts5yyact += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
2422 }
2423 fts5yymsp -= fts5yysize-1;
2424 fts5yypParser->fts5yytos = fts5yymsp;
2425 fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact;
2426 fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto;
2427 fts5yyTraceShift(fts5yypParser, fts5yyact);
2428 }else{
2429 assert( fts5yyact == fts5YY_ACCEPT_ACTION );
2430 fts5yypParser->fts5yytos -= fts5yysize;
2431 fts5yy_accept(fts5yypParser);
2432 }
2433 }
2434
2435 /*
2436 ** The following code executes when the parse fails
2437 */
2438 #ifndef fts5YYNOERRORRECOVERY
2439 static void fts5yy_parse_failed(
2440 fts5yyParser *fts5yypParser /* The parser */
2441 ){
2442 sqlite3Fts5ParserARG_FETCH;
2443 #ifndef NDEBUG
2444 if( fts5yyTraceFILE ){
2445 fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
2446 }
2447 #endif
2448 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser _stack(fts5yypParser);
2449 /* Here code is inserted which will be executed whenever the
2450 ** parser fails */
2451 /************ Begin %parse_failure code ***************************************/
2452 /************ End %parse_failure code *****************************************/
2453 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v ariable */
2454 }
2455 #endif /* fts5YYNOERRORRECOVERY */
2456
2457 /*
2458 ** The following code executes when a syntax error first occurs.
2459 */
2460 static void fts5yy_syntax_error(
2461 fts5yyParser *fts5yypParser, /* The parser */
2462 int fts5yymajor, /* The major type of the error token */
2463 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the er ror token */
2464 ){
2465 sqlite3Fts5ParserARG_FETCH;
2466 #define FTS5TOKEN fts5yyminor
2467 /************ Begin %syntax_error code ****************************************/
2468
2469 UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */
2470 sqlite3Fts5ParseError(
2471 pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p
2472 );
2473 /************ End %syntax_error code ******************************************/
2474 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v ariable */
2475 }
2476
2477 /*
2478 ** The following is executed when the parser accepts
2479 */
2480 static void fts5yy_accept(
2481 fts5yyParser *fts5yypParser /* The parser */
2482 ){
2483 sqlite3Fts5ParserARG_FETCH;
2484 #ifndef NDEBUG
2485 if( fts5yyTraceFILE ){
2486 fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
2487 }
2488 #endif
2489 #ifndef fts5YYNOERRORRECOVERY
2490 fts5yypParser->fts5yyerrcnt = -1;
2491 #endif
2492 assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack );
2493 /* Here code is inserted which will be executed whenever the
2494 ** parser accepts */
2495 /*********** Begin %parse_accept code *****************************************/
2496 /*********** End %parse_accept code *******************************************/
2497 sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument v ariable */
2498 }
2499
2500 /* The main parser program.
2501 ** The first argument is a pointer to a structure obtained from
2502 ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser.
2503 ** The second argument is the major token number. The third is
2504 ** the minor token. The fourth optional argument is whatever the
2505 ** user wants (and specified in the grammar) and is available for
2506 ** use by the action routines.
2507 **
2508 ** Inputs:
2509 ** <ul>
2510 ** <li> A pointer to the parser (an opaque structure.)
2511 ** <li> The major token number.
2512 ** <li> The minor token number.
2513 ** <li> An option argument of a grammar-specified type.
2514 ** </ul>
2515 **
2516 ** Outputs:
2517 ** None.
2518 */
2519 static void sqlite3Fts5Parser(
2520 void *fts5yyp, /* The parser */
2521 int fts5yymajor, /* The major token code number */
2522 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */
2523 sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */
2524 ){
2525 fts5YYMINORTYPE fts5yyminorunion;
2526 unsigned int fts5yyact; /* The parser action. */
2527 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2528 int fts5yyendofinput; /* True if we are at the end of input */
2529 #endif
2530 #ifdef fts5YYERRORSYMBOL
2531 int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */
2532 #endif
2533 fts5yyParser *fts5yypParser; /* The parser */
2534
2535 fts5yypParser = (fts5yyParser*)fts5yyp;
2536 assert( fts5yypParser->fts5yytos!=0 );
2537 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2538 fts5yyendofinput = (fts5yymajor==0);
2539 #endif
2540 sqlite3Fts5ParserARG_STORE;
2541
2542 #ifndef NDEBUG
2543 if( fts5yyTraceFILE ){
2544 fprintf(fts5yyTraceFILE,"%sInput '%s'\n",fts5yyTracePrompt,fts5yyTokenName[f ts5yymajor]);
2545 }
2546 #endif
2547
2548 do{
2549 fts5yyact = fts5yy_find_shift_action(fts5yypParser,(fts5YYCODETYPE)fts5yymaj or);
2550 if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
2551 fts5yy_shift(fts5yypParser,fts5yyact,fts5yymajor,fts5yyminor);
2552 #ifndef fts5YYNOERRORRECOVERY
2553 fts5yypParser->fts5yyerrcnt--;
2554 #endif
2555 fts5yymajor = fts5YYNOCODE;
2556 }else if( fts5yyact <= fts5YY_MAX_REDUCE ){
2557 fts5yy_reduce(fts5yypParser,fts5yyact-fts5YY_MIN_REDUCE);
2558 }else{
2559 assert( fts5yyact == fts5YY_ERROR_ACTION );
2560 fts5yyminorunion.fts5yy0 = fts5yyminor;
2561 #ifdef fts5YYERRORSYMBOL
2562 int fts5yymx;
2563 #endif
2564 #ifndef NDEBUG
2565 if( fts5yyTraceFILE ){
2566 fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
2567 }
2568 #endif
2569 #ifdef fts5YYERRORSYMBOL
2570 /* A syntax error has occurred.
2571 ** The response to an error depends upon whether or not the
2572 ** grammar defines an error token "ERROR".
2573 **
2574 ** This is what we do if the grammar does define ERROR:
2575 **
2576 ** * Call the %syntax_error function.
2577 **
2578 ** * Begin popping the stack until we enter a state where
2579 ** it is legal to shift the error symbol, then shift
2580 ** the error symbol.
2581 **
2582 ** * Set the error count to three.
2583 **
2584 ** * Begin accepting and shifting new tokens. No new error
2585 ** processing will occur until three tokens have been
2586 ** shifted successfully.
2587 **
2588 */
2589 if( fts5yypParser->fts5yyerrcnt<0 ){
2590 fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
2591 }
2592 fts5yymx = fts5yypParser->fts5yytos->major;
2593 if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){
2594 #ifndef NDEBUG
2595 if( fts5yyTraceFILE ){
2596 fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
2597 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
2598 }
2599 #endif
2600 fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yymin orunion);
2601 fts5yymajor = fts5YYNOCODE;
2602 }else{
2603 while( fts5yypParser->fts5yytos >= fts5yypParser->fts5yystack
2604 && fts5yymx != fts5YYERRORSYMBOL
2605 && (fts5yyact = fts5yy_find_reduce_action(
2606 fts5yypParser->fts5yytos->stateno,
2607 fts5YYERRORSYMBOL)) >= fts5YY_MIN_REDUCE
2608 ){
2609 fts5yy_pop_parser_stack(fts5yypParser);
2610 }
2611 if( fts5yypParser->fts5yytos < fts5yypParser->fts5yystack || fts5yymajor ==0 ){
2612 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yymin orunion);
2613 fts5yy_parse_failed(fts5yypParser);
2614 #ifndef fts5YYNOERRORRECOVERY
2615 fts5yypParser->fts5yyerrcnt = -1;
2616 #endif
2617 fts5yymajor = fts5YYNOCODE;
2618 }else if( fts5yymx!=fts5YYERRORSYMBOL ){
2619 fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
2620 }
2621 }
2622 fts5yypParser->fts5yyerrcnt = 3;
2623 fts5yyerrorhit = 1;
2624 #elif defined(fts5YYNOERRORRECOVERY)
2625 /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to
2626 ** do any kind of error recovery. Instead, simply invoke the syntax
2627 ** error routine and continue going as if nothing had happened.
2628 **
2629 ** Applications can set this macro (for example inside %include) if
2630 ** they intend to abandon the parse upon the first syntax error seen.
2631 */
2632 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2633 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorun ion);
2634 fts5yymajor = fts5YYNOCODE;
2635
2636 #else /* fts5YYERRORSYMBOL is not defined */
2637 /* This is what we do if the grammar does not define ERROR:
2638 **
2639 ** * Report an error message, and throw away the input token.
2640 **
2641 ** * If the input token is $, then fail the parse.
2642 **
2643 ** As before, subsequent error messages are suppressed until
2644 ** three input tokens have been successfully shifted.
2645 */
2646 if( fts5yypParser->fts5yyerrcnt<=0 ){
2647 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2648 }
2649 fts5yypParser->fts5yyerrcnt = 3;
2650 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorun ion);
2651 if( fts5yyendofinput ){
2652 fts5yy_parse_failed(fts5yypParser);
2653 #ifndef fts5YYNOERRORRECOVERY
2654 fts5yypParser->fts5yyerrcnt = -1;
2655 #endif
2656 }
2657 fts5yymajor = fts5YYNOCODE;
2658 #endif
2659 }
2660 }while( fts5yymajor!=fts5YYNOCODE && fts5yypParser->fts5yytos>fts5yypParser->f ts5yystack );
2661 #ifndef NDEBUG
2662 if( fts5yyTraceFILE ){
2663 fts5yyStackEntry *i;
2664 char cDiv = '[';
2665 fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt);
2666 for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){
2667 fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]);
2668 cDiv = ' ';
2669 }
2670 fprintf(fts5yyTraceFILE,"]\n");
2671 }
2672 #endif
2673 return;
2674 }
2675
2676 /*
2677 ** 2014 May 31
2678 **
2679 ** The author disclaims copyright to this source code. In place of
2680 ** a legal notice, here is a blessing:
2681 **
2682 ** May you do good and not evil.
2683 ** May you find forgiveness for yourself and forgive others.
2684 ** May you share freely, never taking more than you give.
2685 **
2686 ******************************************************************************
2687 */
2688
2689
2690 /* #include "fts5Int.h" */
2691 #include <math.h> /* amalgamator: keep */
2692
2693 /*
2694 ** Object used to iterate through all "coalesced phrase instances" in
2695 ** a single column of the current row. If the phrase instances in the
2696 ** column being considered do not overlap, this object simply iterates
2697 ** through them. Or, if they do overlap (share one or more tokens in
2698 ** common), each set of overlapping instances is treated as a single
2699 ** match. See documentation for the highlight() auxiliary function for
2700 ** details.
2701 **
2702 ** Usage is:
2703 **
2704 ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
2705 ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
2706 ** rc = fts5CInstIterNext(&iter)
2707 ** ){
2708 ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
2709 ** }
2710 **
2711 */
2712 typedef struct CInstIter CInstIter;
2713 struct CInstIter {
2714 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
2715 Fts5Context *pFts; /* First arg to pass to pApi functions */
2716 int iCol; /* Column to search */
2717 int iInst; /* Next phrase instance index */
2718 int nInst; /* Total number of phrase instances */
2719
2720 /* Output variables */
2721 int iStart; /* First token in coalesced phrase instance */
2722 int iEnd; /* Last token in coalesced phrase instance */
2723 };
2724
2725 /*
2726 ** Advance the iterator to the next coalesced phrase instance. Return
2727 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
2728 */
2729 static int fts5CInstIterNext(CInstIter *pIter){
2730 int rc = SQLITE_OK;
2731 pIter->iStart = -1;
2732 pIter->iEnd = -1;
2733
2734 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
2735 int ip; int ic; int io;
2736 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
2737 if( rc==SQLITE_OK ){
2738 if( ic==pIter->iCol ){
2739 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
2740 if( pIter->iStart<0 ){
2741 pIter->iStart = io;
2742 pIter->iEnd = iEnd;
2743 }else if( io<=pIter->iEnd ){
2744 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
2745 }else{
2746 break;
2747 }
2748 }
2749 pIter->iInst++;
2750 }
2751 }
2752
2753 return rc;
2754 }
2755
2756 /*
2757 ** Initialize the iterator object indicated by the final parameter to
2758 ** iterate through coalesced phrase instances in column iCol.
2759 */
2760 static int fts5CInstIterInit(
2761 const Fts5ExtensionApi *pApi,
2762 Fts5Context *pFts,
2763 int iCol,
2764 CInstIter *pIter
2765 ){
2766 int rc;
2767
2768 memset(pIter, 0, sizeof(CInstIter));
2769 pIter->pApi = pApi;
2770 pIter->pFts = pFts;
2771 pIter->iCol = iCol;
2772 rc = pApi->xInstCount(pFts, &pIter->nInst);
2773
2774 if( rc==SQLITE_OK ){
2775 rc = fts5CInstIterNext(pIter);
2776 }
2777
2778 return rc;
2779 }
2780
2781
2782
2783 /*************************************************************************
2784 ** Start of highlight() implementation.
2785 */
2786 typedef struct HighlightContext HighlightContext;
2787 struct HighlightContext {
2788 CInstIter iter; /* Coalesced Instance Iterator */
2789 int iPos; /* Current token offset in zIn[] */
2790 int iRangeStart; /* First token to include */
2791 int iRangeEnd; /* If non-zero, last token to include */
2792 const char *zOpen; /* Opening highlight */
2793 const char *zClose; /* Closing highlight */
2794 const char *zIn; /* Input text */
2795 int nIn; /* Size of input text in bytes */
2796 int iOff; /* Current offset within zIn[] */
2797 char *zOut; /* Output value */
2798 };
2799
2800 /*
2801 ** Append text to the HighlightContext output string - p->zOut. Argument
2802 ** z points to a buffer containing n bytes of text to append. If n is
2803 ** negative, everything up until the first '\0' is appended to the output.
2804 **
2805 ** If *pRc is set to any value other than SQLITE_OK when this function is
2806 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
2807 ** *pRc is set to an error code before returning.
2808 */
2809 static void fts5HighlightAppend(
2810 int *pRc,
2811 HighlightContext *p,
2812 const char *z, int n
2813 ){
2814 if( *pRc==SQLITE_OK ){
2815 if( n<0 ) n = (int)strlen(z);
2816 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
2817 if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
2818 }
2819 }
2820
2821 /*
2822 ** Tokenizer callback used by implementation of highlight() function.
2823 */
2824 static int fts5HighlightCb(
2825 void *pContext, /* Pointer to HighlightContext object */
2826 int tflags, /* Mask of FTS5_TOKEN_* flags */
2827 const char *pToken, /* Buffer containing token */
2828 int nToken, /* Size of token in bytes */
2829 int iStartOff, /* Start offset of token */
2830 int iEndOff /* End offset of token */
2831 ){
2832 HighlightContext *p = (HighlightContext*)pContext;
2833 int rc = SQLITE_OK;
2834 int iPos;
2835
2836 UNUSED_PARAM2(pToken, nToken);
2837
2838 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
2839 iPos = p->iPos++;
2840
2841 if( p->iRangeEnd>0 ){
2842 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
2843 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
2844 }
2845
2846 if( iPos==p->iter.iStart ){
2847 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
2848 fts5HighlightAppend(&rc, p, p->zOpen, -1);
2849 p->iOff = iStartOff;
2850 }
2851
2852 if( iPos==p->iter.iEnd ){
2853 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
2854 fts5HighlightAppend(&rc, p, p->zOpen, -1);
2855 }
2856 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
2857 fts5HighlightAppend(&rc, p, p->zClose, -1);
2858 p->iOff = iEndOff;
2859 if( rc==SQLITE_OK ){
2860 rc = fts5CInstIterNext(&p->iter);
2861 }
2862 }
2863
2864 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
2865 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
2866 p->iOff = iEndOff;
2867 if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
2868 fts5HighlightAppend(&rc, p, p->zClose, -1);
2869 }
2870 }
2871
2872 return rc;
2873 }
2874
2875 /*
2876 ** Implementation of highlight() function.
2877 */
2878 static void fts5HighlightFunction(
2879 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
2880 Fts5Context *pFts, /* First arg to pass to pApi functions */
2881 sqlite3_context *pCtx, /* Context for returning result/error */
2882 int nVal, /* Number of values in apVal[] array */
2883 sqlite3_value **apVal /* Array of trailing arguments */
2884 ){
2885 HighlightContext ctx;
2886 int rc;
2887 int iCol;
2888
2889 if( nVal!=3 ){
2890 const char *zErr = "wrong number of arguments to function highlight()";
2891 sqlite3_result_error(pCtx, zErr, -1);
2892 return;
2893 }
2894
2895 iCol = sqlite3_value_int(apVal[0]);
2896 memset(&ctx, 0, sizeof(HighlightContext));
2897 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
2898 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
2899 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
2900
2901 if( ctx.zIn ){
2902 if( rc==SQLITE_OK ){
2903 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
2904 }
2905
2906 if( rc==SQLITE_OK ){
2907 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
2908 }
2909 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
2910
2911 if( rc==SQLITE_OK ){
2912 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
2913 }
2914 sqlite3_free(ctx.zOut);
2915 }
2916 if( rc!=SQLITE_OK ){
2917 sqlite3_result_error_code(pCtx, rc);
2918 }
2919 }
2920 /*
2921 ** End of highlight() implementation.
2922 **************************************************************************/
2923
2924 /*
2925 ** Context object passed to the fts5SentenceFinderCb() function.
2926 */
2927 typedef struct Fts5SFinder Fts5SFinder;
2928 struct Fts5SFinder {
2929 int iPos; /* Current token position */
2930 int nFirstAlloc; /* Allocated size of aFirst[] */
2931 int nFirst; /* Number of entries in aFirst[] */
2932 int *aFirst; /* Array of first token in each sentence */
2933 const char *zDoc; /* Document being tokenized */
2934 };
2935
2936 /*
2937 ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
2938 ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
2939 ** error occurs.
2940 */
2941 static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
2942 if( p->nFirstAlloc==p->nFirst ){
2943 int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
2944 int *aNew;
2945
2946 aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int));
2947 if( aNew==0 ) return SQLITE_NOMEM;
2948 p->aFirst = aNew;
2949 p->nFirstAlloc = nNew;
2950 }
2951 p->aFirst[p->nFirst++] = iAdd;
2952 return SQLITE_OK;
2953 }
2954
2955 /*
2956 ** This function is an xTokenize() callback used by the auxiliary snippet()
2957 ** function. Its job is to identify tokens that are the first in a sentence.
2958 ** For each such token, an entry is added to the SFinder.aFirst[] array.
2959 */
2960 static int fts5SentenceFinderCb(
2961 void *pContext, /* Pointer to HighlightContext object */
2962 int tflags, /* Mask of FTS5_TOKEN_* flags */
2963 const char *pToken, /* Buffer containing token */
2964 int nToken, /* Size of token in bytes */
2965 int iStartOff, /* Start offset of token */
2966 int iEndOff /* End offset of token */
2967 ){
2968 int rc = SQLITE_OK;
2969
2970 UNUSED_PARAM2(pToken, nToken);
2971 UNUSED_PARAM(iEndOff);
2972
2973 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
2974 Fts5SFinder *p = (Fts5SFinder*)pContext;
2975 if( p->iPos>0 ){
2976 int i;
2977 char c = 0;
2978 for(i=iStartOff-1; i>=0; i--){
2979 c = p->zDoc[i];
2980 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
2981 }
2982 if( i!=iStartOff-1 && (c=='.' || c==':') ){
2983 rc = fts5SentenceFinderAdd(p, p->iPos);
2984 }
2985 }else{
2986 rc = fts5SentenceFinderAdd(p, 0);
2987 }
2988 p->iPos++;
2989 }
2990 return rc;
2991 }
2992
2993 static int fts5SnippetScore(
2994 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
2995 Fts5Context *pFts, /* First arg to pass to pApi functions */
2996 int nDocsize, /* Size of column in tokens */
2997 unsigned char *aSeen, /* Array with one element per query phrase */
2998 int iCol, /* Column to score */
2999 int iPos, /* Starting offset to score */
3000 int nToken, /* Max tokens per snippet */
3001 int *pnScore, /* OUT: Score */
3002 int *piPos /* OUT: Adjusted offset */
3003 ){
3004 int rc;
3005 int i;
3006 int ip = 0;
3007 int ic = 0;
3008 int iOff = 0;
3009 int iFirst = -1;
3010 int nInst;
3011 int nScore = 0;
3012 int iLast = 0;
3013
3014 rc = pApi->xInstCount(pFts, &nInst);
3015 for(i=0; i<nInst && rc==SQLITE_OK; i++){
3016 rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
3017 if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){
3018 nScore += (aSeen[ip] ? 1 : 1000);
3019 aSeen[ip] = 1;
3020 if( iFirst<0 ) iFirst = iOff;
3021 iLast = iOff + pApi->xPhraseSize(pFts, ip);
3022 }
3023 }
3024
3025 *pnScore = nScore;
3026 if( piPos ){
3027 int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
3028 if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
3029 if( iAdj<0 ) iAdj = 0;
3030 *piPos = iAdj;
3031 }
3032
3033 return rc;
3034 }
3035
3036 /*
3037 ** Implementation of snippet() function.
3038 */
3039 static void fts5SnippetFunction(
3040 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3041 Fts5Context *pFts, /* First arg to pass to pApi functions */
3042 sqlite3_context *pCtx, /* Context for returning result/error */
3043 int nVal, /* Number of values in apVal[] array */
3044 sqlite3_value **apVal /* Array of trailing arguments */
3045 ){
3046 HighlightContext ctx;
3047 int rc = SQLITE_OK; /* Return code */
3048 int iCol; /* 1st argument to snippet() */
3049 const char *zEllips; /* 4th argument to snippet() */
3050 int nToken; /* 5th argument to snippet() */
3051 int nInst = 0; /* Number of instance matches this row */
3052 int i; /* Used to iterate through instances */
3053 int nPhrase; /* Number of phrases in query */
3054 unsigned char *aSeen; /* Array of "seen instance" flags */
3055 int iBestCol; /* Column containing best snippet */
3056 int iBestStart = 0; /* First token of best snippet */
3057 int nBestScore = 0; /* Score of best snippet */
3058 int nColSize = 0; /* Total size of iBestCol in tokens */
3059 Fts5SFinder sFinder; /* Used to find the beginnings of sentences */
3060 int nCol;
3061
3062 if( nVal!=5 ){
3063 const char *zErr = "wrong number of arguments to function snippet()";
3064 sqlite3_result_error(pCtx, zErr, -1);
3065 return;
3066 }
3067
3068 nCol = pApi->xColumnCount(pFts);
3069 memset(&ctx, 0, sizeof(HighlightContext));
3070 iCol = sqlite3_value_int(apVal[0]);
3071 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
3072 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
3073 zEllips = (const char*)sqlite3_value_text(apVal[3]);
3074 nToken = sqlite3_value_int(apVal[4]);
3075
3076 iBestCol = (iCol>=0 ? iCol : 0);
3077 nPhrase = pApi->xPhraseCount(pFts);
3078 aSeen = sqlite3_malloc(nPhrase);
3079 if( aSeen==0 ){
3080 rc = SQLITE_NOMEM;
3081 }
3082 if( rc==SQLITE_OK ){
3083 rc = pApi->xInstCount(pFts, &nInst);
3084 }
3085
3086 memset(&sFinder, 0, sizeof(Fts5SFinder));
3087 for(i=0; i<nCol; i++){
3088 if( iCol<0 || iCol==i ){
3089 int nDoc;
3090 int nDocsize;
3091 int ii;
3092 sFinder.iPos = 0;
3093 sFinder.nFirst = 0;
3094 rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
3095 if( rc!=SQLITE_OK ) break;
3096 rc = pApi->xTokenize(pFts,
3097 sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
3098 );
3099 if( rc!=SQLITE_OK ) break;
3100 rc = pApi->xColumnSize(pFts, i, &nDocsize);
3101 if( rc!=SQLITE_OK ) break;
3102
3103 for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
3104 int ip, ic, io;
3105 int iAdj;
3106 int nScore;
3107 int jj;
3108
3109 rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
3110 if( ic!=i || rc!=SQLITE_OK ) continue;
3111 memset(aSeen, 0, nPhrase);
3112 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3113 io, nToken, &nScore, &iAdj
3114 );
3115 if( rc==SQLITE_OK && nScore>nBestScore ){
3116 nBestScore = nScore;
3117 iBestCol = i;
3118 iBestStart = iAdj;
3119 nColSize = nDocsize;
3120 }
3121
3122 if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
3123 for(jj=0; jj<(sFinder.nFirst-1); jj++){
3124 if( sFinder.aFirst[jj+1]>io ) break;
3125 }
3126
3127 if( sFinder.aFirst[jj]<io ){
3128 memset(aSeen, 0, nPhrase);
3129 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3130 sFinder.aFirst[jj], nToken, &nScore, 0
3131 );
3132
3133 nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
3134 if( rc==SQLITE_OK && nScore>nBestScore ){
3135 nBestScore = nScore;
3136 iBestCol = i;
3137 iBestStart = sFinder.aFirst[jj];
3138 nColSize = nDocsize;
3139 }
3140 }
3141 }
3142 }
3143 }
3144 }
3145
3146 if( rc==SQLITE_OK ){
3147 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
3148 }
3149 if( rc==SQLITE_OK && nColSize==0 ){
3150 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
3151 }
3152 if( ctx.zIn ){
3153 if( rc==SQLITE_OK ){
3154 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
3155 }
3156
3157 ctx.iRangeStart = iBestStart;
3158 ctx.iRangeEnd = iBestStart + nToken - 1;
3159
3160 if( iBestStart>0 ){
3161 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3162 }
3163
3164 /* Advance iterator ctx.iter so that it points to the first coalesced
3165 ** phrase instance at or following position iBestStart. */
3166 while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
3167 rc = fts5CInstIterNext(&ctx.iter);
3168 }
3169
3170 if( rc==SQLITE_OK ){
3171 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
3172 }
3173 if( ctx.iRangeEnd>=(nColSize-1) ){
3174 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3175 }else{
3176 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3177 }
3178 }
3179 if( rc==SQLITE_OK ){
3180 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
3181 }else{
3182 sqlite3_result_error_code(pCtx, rc);
3183 }
3184 sqlite3_free(ctx.zOut);
3185 sqlite3_free(aSeen);
3186 sqlite3_free(sFinder.aFirst);
3187 }
3188
3189 /************************************************************************/
3190
3191 /*
3192 ** The first time the bm25() function is called for a query, an instance
3193 ** of the following structure is allocated and populated.
3194 */
3195 typedef struct Fts5Bm25Data Fts5Bm25Data;
3196 struct Fts5Bm25Data {
3197 int nPhrase; /* Number of phrases in query */
3198 double avgdl; /* Average number of tokens in each row */
3199 double *aIDF; /* IDF for each phrase */
3200 double *aFreq; /* Array used to calculate phrase freq. */
3201 };
3202
3203 /*
3204 ** Callback used by fts5Bm25GetData() to count the number of rows in the
3205 ** table matched by each individual phrase within the query.
3206 */
3207 static int fts5CountCb(
3208 const Fts5ExtensionApi *pApi,
3209 Fts5Context *pFts,
3210 void *pUserData /* Pointer to sqlite3_int64 variable */
3211 ){
3212 sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
3213 UNUSED_PARAM2(pApi, pFts);
3214 (*pn)++;
3215 return SQLITE_OK;
3216 }
3217
3218 /*
3219 ** Set *ppData to point to the Fts5Bm25Data object for the current query.
3220 ** If the object has not already been allocated, allocate and populate it
3221 ** now.
3222 */
3223 static int fts5Bm25GetData(
3224 const Fts5ExtensionApi *pApi,
3225 Fts5Context *pFts,
3226 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
3227 ){
3228 int rc = SQLITE_OK; /* Return code */
3229 Fts5Bm25Data *p; /* Object to return */
3230
3231 p = pApi->xGetAuxdata(pFts, 0);
3232 if( p==0 ){
3233 int nPhrase; /* Number of phrases in query */
3234 sqlite3_int64 nRow = 0; /* Number of rows in table */
3235 sqlite3_int64 nToken = 0; /* Number of tokens in table */
3236 int nByte; /* Bytes of space to allocate */
3237 int i;
3238
3239 /* Allocate the Fts5Bm25Data object */
3240 nPhrase = pApi->xPhraseCount(pFts);
3241 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
3242 p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
3243 if( p==0 ){
3244 rc = SQLITE_NOMEM;
3245 }else{
3246 memset(p, 0, nByte);
3247 p->nPhrase = nPhrase;
3248 p->aIDF = (double*)&p[1];
3249 p->aFreq = &p->aIDF[nPhrase];
3250 }
3251
3252 /* Calculate the average document length for this FTS5 table */
3253 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
3254 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
3255 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
3256
3257 /* Calculate an IDF for each phrase in the query */
3258 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
3259 sqlite3_int64 nHit = 0;
3260 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
3261 if( rc==SQLITE_OK ){
3262 /* Calculate the IDF (Inverse Document Frequency) for phrase i.
3263 ** This is done using the standard BM25 formula as found on wikipedia:
3264 **
3265 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
3266 **
3267 ** where "N" is the total number of documents in the set and nHit
3268 ** is the number that contain at least one instance of the phrase
3269 ** under consideration.
3270 **
3271 ** The problem with this is that if (N < 2*nHit), the IDF is
3272 ** negative. Which is undesirable. So the mimimum allowable IDF is
3273 ** (1e-6) - roughly the same as a term that appears in just over
3274 ** half of set of 5,000,000 documents. */
3275 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
3276 if( idf<=0.0 ) idf = 1e-6;
3277 p->aIDF[i] = idf;
3278 }
3279 }
3280
3281 if( rc!=SQLITE_OK ){
3282 sqlite3_free(p);
3283 }else{
3284 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
3285 }
3286 if( rc!=SQLITE_OK ) p = 0;
3287 }
3288 *ppData = p;
3289 return rc;
3290 }
3291
3292 /*
3293 ** Implementation of bm25() function.
3294 */
3295 static void fts5Bm25Function(
3296 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3297 Fts5Context *pFts, /* First arg to pass to pApi functions */
3298 sqlite3_context *pCtx, /* Context for returning result/error */
3299 int nVal, /* Number of values in apVal[] array */
3300 sqlite3_value **apVal /* Array of trailing arguments */
3301 ){
3302 const double k1 = 1.2; /* Constant "k1" from BM25 formula */
3303 const double b = 0.75; /* Constant "b" from BM25 formula */
3304 int rc = SQLITE_OK; /* Error code */
3305 double score = 0.0; /* SQL function return value */
3306 Fts5Bm25Data *pData; /* Values allocated/calculated once only */
3307 int i; /* Iterator variable */
3308 int nInst = 0; /* Value returned by xInstCount() */
3309 double D = 0.0; /* Total number of tokens in row */
3310 double *aFreq = 0; /* Array of phrase freq. for current row */
3311
3312 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
3313 ** for each phrase in the query for the current row. */
3314 rc = fts5Bm25GetData(pApi, pFts, &pData);
3315 if( rc==SQLITE_OK ){
3316 aFreq = pData->aFreq;
3317 memset(aFreq, 0, sizeof(double) * pData->nPhrase);
3318 rc = pApi->xInstCount(pFts, &nInst);
3319 }
3320 for(i=0; rc==SQLITE_OK && i<nInst; i++){
3321 int ip; int ic; int io;
3322 rc = pApi->xInst(pFts, i, &ip, &ic, &io);
3323 if( rc==SQLITE_OK ){
3324 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
3325 aFreq[ip] += w;
3326 }
3327 }
3328
3329 /* Figure out the total size of the current row in tokens. */
3330 if( rc==SQLITE_OK ){
3331 int nTok;
3332 rc = pApi->xColumnSize(pFts, -1, &nTok);
3333 D = (double)nTok;
3334 }
3335
3336 /* Determine the BM25 score for the current row. */
3337 for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
3338 score += pData->aIDF[i] * (
3339 ( aFreq[i] * (k1 + 1.0) ) /
3340 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
3341 );
3342 }
3343
3344 /* If no error has occurred, return the calculated score. Otherwise,
3345 ** throw an SQL exception. */
3346 if( rc==SQLITE_OK ){
3347 sqlite3_result_double(pCtx, -1.0 * score);
3348 }else{
3349 sqlite3_result_error_code(pCtx, rc);
3350 }
3351 }
3352
3353 static int sqlite3Fts5AuxInit(fts5_api *pApi){
3354 struct Builtin {
3355 const char *zFunc; /* Function name (nul-terminated) */
3356 void *pUserData; /* User-data pointer */
3357 fts5_extension_function xFunc;/* Callback function */
3358 void (*xDestroy)(void*); /* Destructor function */
3359 } aBuiltin [] = {
3360 { "snippet", 0, fts5SnippetFunction, 0 },
3361 { "highlight", 0, fts5HighlightFunction, 0 },
3362 { "bm25", 0, fts5Bm25Function, 0 },
3363 };
3364 int rc = SQLITE_OK; /* Return code */
3365 int i; /* To iterate through builtin functions */
3366
3367 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
3368 rc = pApi->xCreateFunction(pApi,
3369 aBuiltin[i].zFunc,
3370 aBuiltin[i].pUserData,
3371 aBuiltin[i].xFunc,
3372 aBuiltin[i].xDestroy
3373 );
3374 }
3375
3376 return rc;
3377 }
3378
3379
3380
3381 /*
3382 ** 2014 May 31
3383 **
3384 ** The author disclaims copyright to this source code. In place of
3385 ** a legal notice, here is a blessing:
3386 **
3387 ** May you do good and not evil.
3388 ** May you find forgiveness for yourself and forgive others.
3389 ** May you share freely, never taking more than you give.
3390 **
3391 ******************************************************************************
3392 */
3393
3394
3395
3396 /* #include "fts5Int.h" */
3397
3398 static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
3399 if( (u32)pBuf->nSpace<nByte ){
3400 u32 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
3401 u8 *pNew;
3402 while( nNew<nByte ){
3403 nNew = nNew * 2;
3404 }
3405 pNew = sqlite3_realloc(pBuf->p, nNew);
3406 if( pNew==0 ){
3407 *pRc = SQLITE_NOMEM;
3408 return 1;
3409 }else{
3410 pBuf->nSpace = nNew;
3411 pBuf->p = pNew;
3412 }
3413 }
3414 return 0;
3415 }
3416
3417
3418 /*
3419 ** Encode value iVal as an SQLite varint and append it to the buffer object
3420 ** pBuf. If an OOM error occurs, set the error code in p.
3421 */
3422 static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
3423 if( fts5BufferGrow(pRc, pBuf, 9) ) return;
3424 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
3425 }
3426
3427 static void sqlite3Fts5Put32(u8 *aBuf, int iVal){
3428 aBuf[0] = (iVal>>24) & 0x00FF;
3429 aBuf[1] = (iVal>>16) & 0x00FF;
3430 aBuf[2] = (iVal>> 8) & 0x00FF;
3431 aBuf[3] = (iVal>> 0) & 0x00FF;
3432 }
3433
3434 static int sqlite3Fts5Get32(const u8 *aBuf){
3435 return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3];
3436 }
3437
3438 /*
3439 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
3440 ** the error code in p. If an error has already occurred when this function
3441 ** is called, it is a no-op.
3442 */
3443 static void sqlite3Fts5BufferAppendBlob(
3444 int *pRc,
3445 Fts5Buffer *pBuf,
3446 u32 nData,
3447 const u8 *pData
3448 ){
3449 assert_nc( *pRc || nData>=0 );
3450 if( fts5BufferGrow(pRc, pBuf, nData) ) return;
3451 memcpy(&pBuf->p[pBuf->n], pData, nData);
3452 pBuf->n += nData;
3453 }
3454
3455 /*
3456 ** Append the nul-terminated string zStr to the buffer pBuf. This function
3457 ** ensures that the byte following the buffer data is set to 0x00, even
3458 ** though this byte is not included in the pBuf->n count.
3459 */
3460 static void sqlite3Fts5BufferAppendString(
3461 int *pRc,
3462 Fts5Buffer *pBuf,
3463 const char *zStr
3464 ){
3465 int nStr = (int)strlen(zStr);
3466 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
3467 pBuf->n--;
3468 }
3469
3470 /*
3471 ** Argument zFmt is a printf() style format string. This function performs
3472 ** the printf() style processing, then appends the results to buffer pBuf.
3473 **
3474 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
3475 ** following the buffer data is set to 0x00, even though this byte is not
3476 ** included in the pBuf->n count.
3477 */
3478 static void sqlite3Fts5BufferAppendPrintf(
3479 int *pRc,
3480 Fts5Buffer *pBuf,
3481 char *zFmt, ...
3482 ){
3483 if( *pRc==SQLITE_OK ){
3484 char *zTmp;
3485 va_list ap;
3486 va_start(ap, zFmt);
3487 zTmp = sqlite3_vmprintf(zFmt, ap);
3488 va_end(ap);
3489
3490 if( zTmp==0 ){
3491 *pRc = SQLITE_NOMEM;
3492 }else{
3493 sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
3494 sqlite3_free(zTmp);
3495 }
3496 }
3497 }
3498
3499 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
3500 char *zRet = 0;
3501 if( *pRc==SQLITE_OK ){
3502 va_list ap;
3503 va_start(ap, zFmt);
3504 zRet = sqlite3_vmprintf(zFmt, ap);
3505 va_end(ap);
3506 if( zRet==0 ){
3507 *pRc = SQLITE_NOMEM;
3508 }
3509 }
3510 return zRet;
3511 }
3512
3513
3514 /*
3515 ** Free any buffer allocated by pBuf. Zero the structure before returning.
3516 */
3517 static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
3518 sqlite3_free(pBuf->p);
3519 memset(pBuf, 0, sizeof(Fts5Buffer));
3520 }
3521
3522 /*
3523 ** Zero the contents of the buffer object. But do not free the associated
3524 ** memory allocation.
3525 */
3526 static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
3527 pBuf->n = 0;
3528 }
3529
3530 /*
3531 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
3532 ** the error code in p. If an error has already occurred when this function
3533 ** is called, it is a no-op.
3534 */
3535 static void sqlite3Fts5BufferSet(
3536 int *pRc,
3537 Fts5Buffer *pBuf,
3538 int nData,
3539 const u8 *pData
3540 ){
3541 pBuf->n = 0;
3542 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
3543 }
3544
3545 static int sqlite3Fts5PoslistNext64(
3546 const u8 *a, int n, /* Buffer containing poslist */
3547 int *pi, /* IN/OUT: Offset within a[] */
3548 i64 *piOff /* IN/OUT: Current offset */
3549 ){
3550 int i = *pi;
3551 if( i>=n ){
3552 /* EOF */
3553 *piOff = -1;
3554 return 1;
3555 }else{
3556 i64 iOff = *piOff;
3557 int iVal;
3558 fts5FastGetVarint32(a, i, iVal);
3559 if( iVal==1 ){
3560 fts5FastGetVarint32(a, i, iVal);
3561 iOff = ((i64)iVal) << 32;
3562 fts5FastGetVarint32(a, i, iVal);
3563 }
3564 *piOff = iOff + (iVal-2);
3565 *pi = i;
3566 return 0;
3567 }
3568 }
3569
3570
3571 /*
3572 ** Advance the iterator object passed as the only argument. Return true
3573 ** if the iterator reaches EOF, or false otherwise.
3574 */
3575 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
3576 if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
3577 pIter->bEof = 1;
3578 }
3579 return pIter->bEof;
3580 }
3581
3582 static int sqlite3Fts5PoslistReaderInit(
3583 const u8 *a, int n, /* Poslist buffer to iterate through */
3584 Fts5PoslistReader *pIter /* Iterator object to initialize */
3585 ){
3586 memset(pIter, 0, sizeof(*pIter));
3587 pIter->a = a;
3588 pIter->n = n;
3589 sqlite3Fts5PoslistReaderNext(pIter);
3590 return pIter->bEof;
3591 }
3592
3593 /*
3594 ** Append position iPos to the position list being accumulated in buffer
3595 ** pBuf, which must be already be large enough to hold the new data.
3596 ** The previous position written to this list is *piPrev. *piPrev is set
3597 ** to iPos before returning.
3598 */
3599 static void sqlite3Fts5PoslistSafeAppend(
3600 Fts5Buffer *pBuf,
3601 i64 *piPrev,
3602 i64 iPos
3603 ){
3604 static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
3605 if( (iPos & colmask) != (*piPrev & colmask) ){
3606 pBuf->p[pBuf->n++] = 1;
3607 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
3608 *piPrev = (iPos & colmask);
3609 }
3610 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
3611 *piPrev = iPos;
3612 }
3613
3614 static int sqlite3Fts5PoslistWriterAppend(
3615 Fts5Buffer *pBuf,
3616 Fts5PoslistWriter *pWriter,
3617 i64 iPos
3618 ){
3619 int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
3620 if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
3621 sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
3622 return SQLITE_OK;
3623 }
3624
3625 static void *sqlite3Fts5MallocZero(int *pRc, int nByte){
3626 void *pRet = 0;
3627 if( *pRc==SQLITE_OK ){
3628 pRet = sqlite3_malloc(nByte);
3629 if( pRet==0 && nByte>0 ){
3630 *pRc = SQLITE_NOMEM;
3631 }else{
3632 memset(pRet, 0, nByte);
3633 }
3634 }
3635 return pRet;
3636 }
3637
3638 /*
3639 ** Return a nul-terminated copy of the string indicated by pIn. If nIn
3640 ** is non-negative, then it is the length of the string in bytes. Otherwise,
3641 ** the length of the string is determined using strlen().
3642 **
3643 ** It is the responsibility of the caller to eventually free the returned
3644 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
3645 */
3646 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
3647 char *zRet = 0;
3648 if( *pRc==SQLITE_OK ){
3649 if( nIn<0 ){
3650 nIn = (int)strlen(pIn);
3651 }
3652 zRet = (char*)sqlite3_malloc(nIn+1);
3653 if( zRet ){
3654 memcpy(zRet, pIn, nIn);
3655 zRet[nIn] = '\0';
3656 }else{
3657 *pRc = SQLITE_NOMEM;
3658 }
3659 }
3660 return zRet;
3661 }
3662
3663
3664 /*
3665 ** Return true if character 't' may be part of an FTS5 bareword, or false
3666 ** otherwise. Characters that may be part of barewords:
3667 **
3668 ** * All non-ASCII characters,
3669 ** * The 52 upper and lower case ASCII characters, and
3670 ** * The 10 integer ASCII characters.
3671 ** * The underscore character "_" (0x5F).
3672 ** * The unicode "subsitute" character (0x1A).
3673 */
3674 static int sqlite3Fts5IsBareword(char t){
3675 u8 aBareword[128] = {
3676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
3677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
3678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
3679 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
3680 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
3681 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
3682 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
3683 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
3684 };
3685
3686 return (t & 0x80) || aBareword[(int)t];
3687 }
3688
3689
3690 /*************************************************************************
3691 */
3692 typedef struct Fts5TermsetEntry Fts5TermsetEntry;
3693 struct Fts5TermsetEntry {
3694 char *pTerm;
3695 int nTerm;
3696 int iIdx; /* Index (main or aPrefix[] entry) */
3697 Fts5TermsetEntry *pNext;
3698 };
3699
3700 struct Fts5Termset {
3701 Fts5TermsetEntry *apHash[512];
3702 };
3703
3704 static int sqlite3Fts5TermsetNew(Fts5Termset **pp){
3705 int rc = SQLITE_OK;
3706 *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
3707 return rc;
3708 }
3709
3710 static int sqlite3Fts5TermsetAdd(
3711 Fts5Termset *p,
3712 int iIdx,
3713 const char *pTerm, int nTerm,
3714 int *pbPresent
3715 ){
3716 int rc = SQLITE_OK;
3717 *pbPresent = 0;
3718 if( p ){
3719 int i;
3720 u32 hash = 13;
3721 Fts5TermsetEntry *pEntry;
3722
3723 /* Calculate a hash value for this term. This is the same hash checksum
3724 ** used by the fts5_hash.c module. This is not important for correct
3725 ** operation of the module, but is necessary to ensure that some tests
3726 ** designed to produce hash table collisions really do work. */
3727 for(i=nTerm-1; i>=0; i--){
3728 hash = (hash << 3) ^ hash ^ pTerm[i];
3729 }
3730 hash = (hash << 3) ^ hash ^ iIdx;
3731 hash = hash % ArraySize(p->apHash);
3732
3733 for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
3734 if( pEntry->iIdx==iIdx
3735 && pEntry->nTerm==nTerm
3736 && memcmp(pEntry->pTerm, pTerm, nTerm)==0
3737 ){
3738 *pbPresent = 1;
3739 break;
3740 }
3741 }
3742
3743 if( pEntry==0 ){
3744 pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
3745 if( pEntry ){
3746 pEntry->pTerm = (char*)&pEntry[1];
3747 pEntry->nTerm = nTerm;
3748 pEntry->iIdx = iIdx;
3749 memcpy(pEntry->pTerm, pTerm, nTerm);
3750 pEntry->pNext = p->apHash[hash];
3751 p->apHash[hash] = pEntry;
3752 }
3753 }
3754 }
3755
3756 return rc;
3757 }
3758
3759 static void sqlite3Fts5TermsetFree(Fts5Termset *p){
3760 if( p ){
3761 u32 i;
3762 for(i=0; i<ArraySize(p->apHash); i++){
3763 Fts5TermsetEntry *pEntry = p->apHash[i];
3764 while( pEntry ){
3765 Fts5TermsetEntry *pDel = pEntry;
3766 pEntry = pEntry->pNext;
3767 sqlite3_free(pDel);
3768 }
3769 }
3770 sqlite3_free(p);
3771 }
3772 }
3773
3774 /*
3775 ** 2014 Jun 09
3776 **
3777 ** The author disclaims copyright to this source code. In place of
3778 ** a legal notice, here is a blessing:
3779 **
3780 ** May you do good and not evil.
3781 ** May you find forgiveness for yourself and forgive others.
3782 ** May you share freely, never taking more than you give.
3783 **
3784 ******************************************************************************
3785 **
3786 ** This is an SQLite module implementing full-text search.
3787 */
3788
3789
3790 /* #include "fts5Int.h" */
3791
3792 #define FTS5_DEFAULT_PAGE_SIZE 4050
3793 #define FTS5_DEFAULT_AUTOMERGE 4
3794 #define FTS5_DEFAULT_USERMERGE 4
3795 #define FTS5_DEFAULT_CRISISMERGE 16
3796 #define FTS5_DEFAULT_HASHSIZE (1024*1024)
3797
3798 /* Maximum allowed page size */
3799 #define FTS5_MAX_PAGE_SIZE (128*1024)
3800
3801 static int fts5_iswhitespace(char x){
3802 return (x==' ');
3803 }
3804
3805 static int fts5_isopenquote(char x){
3806 return (x=='"' || x=='\'' || x=='[' || x=='`');
3807 }
3808
3809 /*
3810 ** Argument pIn points to a character that is part of a nul-terminated
3811 ** string. Return a pointer to the first character following *pIn in
3812 ** the string that is not a white-space character.
3813 */
3814 static const char *fts5ConfigSkipWhitespace(const char *pIn){
3815 const char *p = pIn;
3816 if( p ){
3817 while( fts5_iswhitespace(*p) ){ p++; }
3818 }
3819 return p;
3820 }
3821
3822 /*
3823 ** Argument pIn points to a character that is part of a nul-terminated
3824 ** string. Return a pointer to the first character following *pIn in
3825 ** the string that is not a "bareword" character.
3826 */
3827 static const char *fts5ConfigSkipBareword(const char *pIn){
3828 const char *p = pIn;
3829 while ( sqlite3Fts5IsBareword(*p) ) p++;
3830 if( p==pIn ) p = 0;
3831 return p;
3832 }
3833
3834 static int fts5_isdigit(char a){
3835 return (a>='0' && a<='9');
3836 }
3837
3838
3839
3840 static const char *fts5ConfigSkipLiteral(const char *pIn){
3841 const char *p = pIn;
3842 switch( *p ){
3843 case 'n': case 'N':
3844 if( sqlite3_strnicmp("null", p, 4)==0 ){
3845 p = &p[4];
3846 }else{
3847 p = 0;
3848 }
3849 break;
3850
3851 case 'x': case 'X':
3852 p++;
3853 if( *p=='\'' ){
3854 p++;
3855 while( (*p>='a' && *p<='f')
3856 || (*p>='A' && *p<='F')
3857 || (*p>='0' && *p<='9')
3858 ){
3859 p++;
3860 }
3861 if( *p=='\'' && 0==((p-pIn)%2) ){
3862 p++;
3863 }else{
3864 p = 0;
3865 }
3866 }else{
3867 p = 0;
3868 }
3869 break;
3870
3871 case '\'':
3872 p++;
3873 while( p ){
3874 if( *p=='\'' ){
3875 p++;
3876 if( *p!='\'' ) break;
3877 }
3878 p++;
3879 if( *p==0 ) p = 0;
3880 }
3881 break;
3882
3883 default:
3884 /* maybe a number */
3885 if( *p=='+' || *p=='-' ) p++;
3886 while( fts5_isdigit(*p) ) p++;
3887
3888 /* At this point, if the literal was an integer, the parse is
3889 ** finished. Or, if it is a floating point value, it may continue
3890 ** with either a decimal point or an 'E' character. */
3891 if( *p=='.' && fts5_isdigit(p[1]) ){
3892 p += 2;
3893 while( fts5_isdigit(*p) ) p++;
3894 }
3895 if( p==pIn ) p = 0;
3896
3897 break;
3898 }
3899
3900 return p;
3901 }
3902
3903 /*
3904 ** The first character of the string pointed to by argument z is guaranteed
3905 ** to be an open-quote character (see function fts5_isopenquote()).
3906 **
3907 ** This function searches for the corresponding close-quote character within
3908 ** the string and, if found, dequotes the string in place and adds a new
3909 ** nul-terminator byte.
3910 **
3911 ** If the close-quote is found, the value returned is the byte offset of
3912 ** the character immediately following it. Or, if the close-quote is not
3913 ** found, -1 is returned. If -1 is returned, the buffer is left in an
3914 ** undefined state.
3915 */
3916 static int fts5Dequote(char *z){
3917 char q;
3918 int iIn = 1;
3919 int iOut = 0;
3920 q = z[0];
3921
3922 /* Set stack variable q to the close-quote character */
3923 assert( q=='[' || q=='\'' || q=='"' || q=='`' );
3924 if( q=='[' ) q = ']';
3925
3926 while( ALWAYS(z[iIn]) ){
3927 if( z[iIn]==q ){
3928 if( z[iIn+1]!=q ){
3929 /* Character iIn was the close quote. */
3930 iIn++;
3931 break;
3932 }else{
3933 /* Character iIn and iIn+1 form an escaped quote character. Skip
3934 ** the input cursor past both and copy a single quote character
3935 ** to the output buffer. */
3936 iIn += 2;
3937 z[iOut++] = q;
3938 }
3939 }else{
3940 z[iOut++] = z[iIn++];
3941 }
3942 }
3943
3944 z[iOut] = '\0';
3945 return iIn;
3946 }
3947
3948 /*
3949 ** Convert an SQL-style quoted string into a normal string by removing
3950 ** the quote characters. The conversion is done in-place. If the
3951 ** input does not begin with a quote character, then this routine
3952 ** is a no-op.
3953 **
3954 ** Examples:
3955 **
3956 ** "abc" becomes abc
3957 ** 'xyz' becomes xyz
3958 ** [pqr] becomes pqr
3959 ** `mno` becomes mno
3960 */
3961 static void sqlite3Fts5Dequote(char *z){
3962 char quote; /* Quote character (if any ) */
3963
3964 assert( 0==fts5_iswhitespace(z[0]) );
3965 quote = z[0];
3966 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
3967 fts5Dequote(z);
3968 }
3969 }
3970
3971
3972 struct Fts5Enum {
3973 const char *zName;
3974 int eVal;
3975 };
3976 typedef struct Fts5Enum Fts5Enum;
3977
3978 static int fts5ConfigSetEnum(
3979 const Fts5Enum *aEnum,
3980 const char *zEnum,
3981 int *peVal
3982 ){
3983 int nEnum = (int)strlen(zEnum);
3984 int i;
3985 int iVal = -1;
3986
3987 for(i=0; aEnum[i].zName; i++){
3988 if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
3989 if( iVal>=0 ) return SQLITE_ERROR;
3990 iVal = aEnum[i].eVal;
3991 }
3992 }
3993
3994 *peVal = iVal;
3995 return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
3996 }
3997
3998 /*
3999 ** Parse a "special" CREATE VIRTUAL TABLE directive and update
4000 ** configuration object pConfig as appropriate.
4001 **
4002 ** If successful, object pConfig is updated and SQLITE_OK returned. If
4003 ** an error occurs, an SQLite error code is returned and an error message
4004 ** may be left in *pzErr. It is the responsibility of the caller to
4005 ** eventually free any such error message using sqlite3_free().
4006 */
4007 static int fts5ConfigParseSpecial(
4008 Fts5Global *pGlobal,
4009 Fts5Config *pConfig, /* Configuration object to update */
4010 const char *zCmd, /* Special command to parse */
4011 const char *zArg, /* Argument to parse */
4012 char **pzErr /* OUT: Error message */
4013 ){
4014 int rc = SQLITE_OK;
4015 int nCmd = (int)strlen(zCmd);
4016 if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
4017 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
4018 const char *p;
4019 int bFirst = 1;
4020 if( pConfig->aPrefix==0 ){
4021 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
4022 if( rc ) return rc;
4023 }
4024
4025 p = zArg;
4026 while( 1 ){
4027 int nPre = 0;
4028
4029 while( p[0]==' ' ) p++;
4030 if( bFirst==0 && p[0]==',' ){
4031 p++;
4032 while( p[0]==' ' ) p++;
4033 }else if( p[0]=='\0' ){
4034 break;
4035 }
4036 if( p[0]<'0' || p[0]>'9' ){
4037 *pzErr = sqlite3_mprintf("malformed prefix=... directive");
4038 rc = SQLITE_ERROR;
4039 break;
4040 }
4041
4042 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
4043 *pzErr = sqlite3_mprintf(
4044 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
4045 );
4046 rc = SQLITE_ERROR;
4047 break;
4048 }
4049
4050 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
4051 nPre = nPre*10 + (p[0] - '0');
4052 p++;
4053 }
4054
4055 if( nPre<=0 || nPre>=1000 ){
4056 *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
4057 rc = SQLITE_ERROR;
4058 break;
4059 }
4060
4061 pConfig->aPrefix[pConfig->nPrefix] = nPre;
4062 pConfig->nPrefix++;
4063 bFirst = 0;
4064 }
4065 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
4066 return rc;
4067 }
4068
4069 if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
4070 const char *p = (const char*)zArg;
4071 int nArg = (int)strlen(zArg) + 1;
4072 char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
4073 char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
4074 char *pSpace = pDel;
4075
4076 if( azArg && pSpace ){
4077 if( pConfig->pTok ){
4078 *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
4079 rc = SQLITE_ERROR;
4080 }else{
4081 for(nArg=0; p && *p; nArg++){
4082 const char *p2 = fts5ConfigSkipWhitespace(p);
4083 if( *p2=='\'' ){
4084 p = fts5ConfigSkipLiteral(p2);
4085 }else{
4086 p = fts5ConfigSkipBareword(p2);
4087 }
4088 if( p ){
4089 memcpy(pSpace, p2, p-p2);
4090 azArg[nArg] = pSpace;
4091 sqlite3Fts5Dequote(pSpace);
4092 pSpace += (p - p2) + 1;
4093 p = fts5ConfigSkipWhitespace(p);
4094 }
4095 }
4096 if( p==0 ){
4097 *pzErr = sqlite3_mprintf("parse error in tokenize directive");
4098 rc = SQLITE_ERROR;
4099 }else{
4100 rc = sqlite3Fts5GetTokenizer(pGlobal,
4101 (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi,
4102 pzErr
4103 );
4104 }
4105 }
4106 }
4107
4108 sqlite3_free(azArg);
4109 sqlite3_free(pDel);
4110 return rc;
4111 }
4112
4113 if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
4114 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
4115 *pzErr = sqlite3_mprintf("multiple content=... directives");
4116 rc = SQLITE_ERROR;
4117 }else{
4118 if( zArg[0] ){
4119 pConfig->eContent = FTS5_CONTENT_EXTERNAL;
4120 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
4121 }else{
4122 pConfig->eContent = FTS5_CONTENT_NONE;
4123 }
4124 }
4125 return rc;
4126 }
4127
4128 if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
4129 if( pConfig->zContentRowid ){
4130 *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
4131 rc = SQLITE_ERROR;
4132 }else{
4133 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
4134 }
4135 return rc;
4136 }
4137
4138 if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
4139 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4140 *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
4141 rc = SQLITE_ERROR;
4142 }else{
4143 pConfig->bColumnsize = (zArg[0]=='1');
4144 }
4145 return rc;
4146 }
4147
4148 if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
4149 const Fts5Enum aDetail[] = {
4150 { "none", FTS5_DETAIL_NONE },
4151 { "full", FTS5_DETAIL_FULL },
4152 { "columns", FTS5_DETAIL_COLUMNS },
4153 { 0, 0 }
4154 };
4155
4156 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
4157 *pzErr = sqlite3_mprintf("malformed detail=... directive");
4158 }
4159 return rc;
4160 }
4161
4162 *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
4163 return SQLITE_ERROR;
4164 }
4165
4166 /*
4167 ** Allocate an instance of the default tokenizer ("simple") at
4168 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
4169 ** code if an error occurs.
4170 */
4171 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
4172 assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
4173 return sqlite3Fts5GetTokenizer(
4174 pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
4175 );
4176 }
4177
4178 /*
4179 ** Gobble up the first bareword or quoted word from the input buffer zIn.
4180 ** Return a pointer to the character immediately following the last in
4181 ** the gobbled word if successful, or a NULL pointer otherwise (failed
4182 ** to find close-quote character).
4183 **
4184 ** Before returning, set pzOut to point to a new buffer containing a
4185 ** nul-terminated, dequoted copy of the gobbled word. If the word was
4186 ** quoted, *pbQuoted is also set to 1 before returning.
4187 **
4188 ** If *pRc is other than SQLITE_OK when this function is called, it is
4189 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
4190 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
4191 ** set if a parse error (failed to find close quote) occurs.
4192 */
4193 static const char *fts5ConfigGobbleWord(
4194 int *pRc, /* IN/OUT: Error code */
4195 const char *zIn, /* Buffer to gobble string/bareword from */
4196 char **pzOut, /* OUT: malloc'd buffer containing str/bw */
4197 int *pbQuoted /* OUT: Set to true if dequoting required */
4198 ){
4199 const char *zRet = 0;
4200
4201 int nIn = (int)strlen(zIn);
4202 char *zOut = sqlite3_malloc(nIn+1);
4203
4204 assert( *pRc==SQLITE_OK );
4205 *pbQuoted = 0;
4206 *pzOut = 0;
4207
4208 if( zOut==0 ){
4209 *pRc = SQLITE_NOMEM;
4210 }else{
4211 memcpy(zOut, zIn, nIn+1);
4212 if( fts5_isopenquote(zOut[0]) ){
4213 int ii = fts5Dequote(zOut);
4214 zRet = &zIn[ii];
4215 *pbQuoted = 1;
4216 }else{
4217 zRet = fts5ConfigSkipBareword(zIn);
4218 if( zRet ){
4219 zOut[zRet-zIn] = '\0';
4220 }
4221 }
4222 }
4223
4224 if( zRet==0 ){
4225 sqlite3_free(zOut);
4226 }else{
4227 *pzOut = zOut;
4228 }
4229
4230 return zRet;
4231 }
4232
4233 static int fts5ConfigParseColumn(
4234 Fts5Config *p,
4235 char *zCol,
4236 char *zArg,
4237 char **pzErr
4238 ){
4239 int rc = SQLITE_OK;
4240 if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
4241 || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
4242 ){
4243 *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
4244 rc = SQLITE_ERROR;
4245 }else if( zArg ){
4246 if( 0==sqlite3_stricmp(zArg, "unindexed") ){
4247 p->abUnindexed[p->nCol] = 1;
4248 }else{
4249 *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
4250 rc = SQLITE_ERROR;
4251 }
4252 }
4253
4254 p->azCol[p->nCol++] = zCol;
4255 return rc;
4256 }
4257
4258 /*
4259 ** Populate the Fts5Config.zContentExprlist string.
4260 */
4261 static int fts5ConfigMakeExprlist(Fts5Config *p){
4262 int i;
4263 int rc = SQLITE_OK;
4264 Fts5Buffer buf = {0, 0, 0};
4265
4266 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
4267 if( p->eContent!=FTS5_CONTENT_NONE ){
4268 for(i=0; i<p->nCol; i++){
4269 if( p->eContent==FTS5_CONTENT_EXTERNAL ){
4270 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
4271 }else{
4272 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
4273 }
4274 }
4275 }
4276
4277 assert( p->zContentExprlist==0 );
4278 p->zContentExprlist = (char*)buf.p;
4279 return rc;
4280 }
4281
4282 /*
4283 ** Arguments nArg/azArg contain the string arguments passed to the xCreate
4284 ** or xConnect method of the virtual table. This function attempts to
4285 ** allocate an instance of Fts5Config containing the results of parsing
4286 ** those arguments.
4287 **
4288 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the
4289 ** new Fts5Config object. If an error occurs, an SQLite error code is
4290 ** returned, *ppOut is set to NULL and an error message may be left in
4291 ** *pzErr. It is the responsibility of the caller to eventually free any
4292 ** such error message using sqlite3_free().
4293 */
4294 static int sqlite3Fts5ConfigParse(
4295 Fts5Global *pGlobal,
4296 sqlite3 *db,
4297 int nArg, /* Number of arguments */
4298 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
4299 Fts5Config **ppOut, /* OUT: Results of parse */
4300 char **pzErr /* OUT: Error message */
4301 ){
4302 int rc = SQLITE_OK; /* Return code */
4303 Fts5Config *pRet; /* New object to return */
4304 int i;
4305 int nByte;
4306
4307 *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
4308 if( pRet==0 ) return SQLITE_NOMEM;
4309 memset(pRet, 0, sizeof(Fts5Config));
4310 pRet->db = db;
4311 pRet->iCookie = -1;
4312
4313 nByte = nArg * (sizeof(char*) + sizeof(u8));
4314 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
4315 pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
4316 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
4317 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
4318 pRet->bColumnsize = 1;
4319 pRet->eDetail = FTS5_DETAIL_FULL;
4320 #ifdef SQLITE_DEBUG
4321 pRet->bPrefixIndex = 1;
4322 #endif
4323 if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
4324 *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
4325 rc = SQLITE_ERROR;
4326 }
4327
4328 for(i=3; rc==SQLITE_OK && i<nArg; i++){
4329 const char *zOrig = azArg[i];
4330 const char *z;
4331 char *zOne = 0;
4332 char *zTwo = 0;
4333 int bOption = 0;
4334 int bMustBeCol = 0;
4335
4336 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
4337 z = fts5ConfigSkipWhitespace(z);
4338 if( z && *z=='=' ){
4339 bOption = 1;
4340 z++;
4341 if( bMustBeCol ) z = 0;
4342 }
4343 z = fts5ConfigSkipWhitespace(z);
4344 if( z && z[0] ){
4345 int bDummy;
4346 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
4347 if( z && z[0] ) z = 0;
4348 }
4349
4350 if( rc==SQLITE_OK ){
4351 if( z==0 ){
4352 *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
4353 rc = SQLITE_ERROR;
4354 }else{
4355 if( bOption ){
4356 rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
4357 }else{
4358 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
4359 zOne = 0;
4360 }
4361 }
4362 }
4363
4364 sqlite3_free(zOne);
4365 sqlite3_free(zTwo);
4366 }
4367
4368 /* If a tokenizer= option was successfully parsed, the tokenizer has
4369 ** already been allocated. Otherwise, allocate an instance of the default
4370 ** tokenizer (unicode61) now. */
4371 if( rc==SQLITE_OK && pRet->pTok==0 ){
4372 rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
4373 }
4374
4375 /* If no zContent option was specified, fill in the default values. */
4376 if( rc==SQLITE_OK && pRet->zContent==0 ){
4377 const char *zTail = 0;
4378 assert( pRet->eContent==FTS5_CONTENT_NORMAL
4379 || pRet->eContent==FTS5_CONTENT_NONE
4380 );
4381 if( pRet->eContent==FTS5_CONTENT_NORMAL ){
4382 zTail = "content";
4383 }else if( pRet->bColumnsize ){
4384 zTail = "docsize";
4385 }
4386
4387 if( zTail ){
4388 pRet->zContent = sqlite3Fts5Mprintf(
4389 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
4390 );
4391 }
4392 }
4393
4394 if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
4395 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
4396 }
4397
4398 /* Formulate the zContentExprlist text */
4399 if( rc==SQLITE_OK ){
4400 rc = fts5ConfigMakeExprlist(pRet);
4401 }
4402
4403 if( rc!=SQLITE_OK ){
4404 sqlite3Fts5ConfigFree(pRet);
4405 *ppOut = 0;
4406 }
4407 return rc;
4408 }
4409
4410 /*
4411 ** Free the configuration object passed as the only argument.
4412 */
4413 static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
4414 if( pConfig ){
4415 int i;
4416 if( pConfig->pTok ){
4417 pConfig->pTokApi->xDelete(pConfig->pTok);
4418 }
4419 sqlite3_free(pConfig->zDb);
4420 sqlite3_free(pConfig->zName);
4421 for(i=0; i<pConfig->nCol; i++){
4422 sqlite3_free(pConfig->azCol[i]);
4423 }
4424 sqlite3_free(pConfig->azCol);
4425 sqlite3_free(pConfig->aPrefix);
4426 sqlite3_free(pConfig->zRank);
4427 sqlite3_free(pConfig->zRankArgs);
4428 sqlite3_free(pConfig->zContent);
4429 sqlite3_free(pConfig->zContentRowid);
4430 sqlite3_free(pConfig->zContentExprlist);
4431 sqlite3_free(pConfig);
4432 }
4433 }
4434
4435 /*
4436 ** Call sqlite3_declare_vtab() based on the contents of the configuration
4437 ** object passed as the only argument. Return SQLITE_OK if successful, or
4438 ** an SQLite error code if an error occurs.
4439 */
4440 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
4441 int i;
4442 int rc = SQLITE_OK;
4443 char *zSql;
4444
4445 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
4446 for(i=0; zSql && i<pConfig->nCol; i++){
4447 const char *zSep = (i==0?"":", ");
4448 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
4449 }
4450 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
4451 zSql, pConfig->zName, FTS5_RANK_NAME
4452 );
4453
4454 assert( zSql || rc==SQLITE_NOMEM );
4455 if( zSql ){
4456 rc = sqlite3_declare_vtab(pConfig->db, zSql);
4457 sqlite3_free(zSql);
4458 }
4459
4460 return rc;
4461 }
4462
4463 /*
4464 ** Tokenize the text passed via the second and third arguments.
4465 **
4466 ** The callback is invoked once for each token in the input text. The
4467 ** arguments passed to it are, in order:
4468 **
4469 ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
4470 ** const char *pToken // Pointer to buffer containing token
4471 ** int nToken // Size of token in bytes
4472 ** int iStart // Byte offset of start of token within input text
4473 ** int iEnd // Byte offset of end of token within input text
4474 ** int iPos // Position of token in input (first token is 0)
4475 **
4476 ** If the callback returns a non-zero value the tokenization is abandoned
4477 ** and no further callbacks are issued.
4478 **
4479 ** This function returns SQLITE_OK if successful or an SQLite error code
4480 ** if an error occurs. If the tokenization was abandoned early because
4481 ** the callback returned SQLITE_DONE, this is not an error and this function
4482 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early
4483 ** because the callback returned another non-zero value, it is assumed
4484 ** to be an SQLite error code and returned to the caller.
4485 */
4486 static int sqlite3Fts5Tokenize(
4487 Fts5Config *pConfig, /* FTS5 Configuration object */
4488 int flags, /* FTS5_TOKENIZE_* flags */
4489 const char *pText, int nText, /* Text to tokenize */
4490 void *pCtx, /* Context passed to xToken() */
4491 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
4492 ){
4493 if( pText==0 ) return SQLITE_OK;
4494 return pConfig->pTokApi->xTokenize(
4495 pConfig->pTok, pCtx, flags, pText, nText, xToken
4496 );
4497 }
4498
4499 /*
4500 ** Argument pIn points to the first character in what is expected to be
4501 ** a comma-separated list of SQL literals followed by a ')' character.
4502 ** If it actually is this, return a pointer to the ')'. Otherwise, return
4503 ** NULL to indicate a parse error.
4504 */
4505 static const char *fts5ConfigSkipArgs(const char *pIn){
4506 const char *p = pIn;
4507
4508 while( 1 ){
4509 p = fts5ConfigSkipWhitespace(p);
4510 p = fts5ConfigSkipLiteral(p);
4511 p = fts5ConfigSkipWhitespace(p);
4512 if( p==0 || *p==')' ) break;
4513 if( *p!=',' ){
4514 p = 0;
4515 break;
4516 }
4517 p++;
4518 }
4519
4520 return p;
4521 }
4522
4523 /*
4524 ** Parameter zIn contains a rank() function specification. The format of
4525 ** this is:
4526 **
4527 ** + Bareword (function name)
4528 ** + Open parenthesis - "("
4529 ** + Zero or more SQL literals in a comma separated list
4530 ** + Close parenthesis - ")"
4531 */
4532 static int sqlite3Fts5ConfigParseRank(
4533 const char *zIn, /* Input string */
4534 char **pzRank, /* OUT: Rank function name */
4535 char **pzRankArgs /* OUT: Rank function arguments */
4536 ){
4537 const char *p = zIn;
4538 const char *pRank;
4539 char *zRank = 0;
4540 char *zRankArgs = 0;
4541 int rc = SQLITE_OK;
4542
4543 *pzRank = 0;
4544 *pzRankArgs = 0;
4545
4546 if( p==0 ){
4547 rc = SQLITE_ERROR;
4548 }else{
4549 p = fts5ConfigSkipWhitespace(p);
4550 pRank = p;
4551 p = fts5ConfigSkipBareword(p);
4552
4553 if( p ){
4554 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
4555 if( zRank ) memcpy(zRank, pRank, p-pRank);
4556 }else{
4557 rc = SQLITE_ERROR;
4558 }
4559
4560 if( rc==SQLITE_OK ){
4561 p = fts5ConfigSkipWhitespace(p);
4562 if( *p!='(' ) rc = SQLITE_ERROR;
4563 p++;
4564 }
4565 if( rc==SQLITE_OK ){
4566 const char *pArgs;
4567 p = fts5ConfigSkipWhitespace(p);
4568 pArgs = p;
4569 if( *p!=')' ){
4570 p = fts5ConfigSkipArgs(p);
4571 if( p==0 ){
4572 rc = SQLITE_ERROR;
4573 }else{
4574 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
4575 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
4576 }
4577 }
4578 }
4579 }
4580
4581 if( rc!=SQLITE_OK ){
4582 sqlite3_free(zRank);
4583 assert( zRankArgs==0 );
4584 }else{
4585 *pzRank = zRank;
4586 *pzRankArgs = zRankArgs;
4587 }
4588 return rc;
4589 }
4590
4591 static int sqlite3Fts5ConfigSetValue(
4592 Fts5Config *pConfig,
4593 const char *zKey,
4594 sqlite3_value *pVal,
4595 int *pbBadkey
4596 ){
4597 int rc = SQLITE_OK;
4598
4599 if( 0==sqlite3_stricmp(zKey, "pgsz") ){
4600 int pgsz = 0;
4601 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4602 pgsz = sqlite3_value_int(pVal);
4603 }
4604 if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
4605 *pbBadkey = 1;
4606 }else{
4607 pConfig->pgsz = pgsz;
4608 }
4609 }
4610
4611 else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
4612 int nHashSize = -1;
4613 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4614 nHashSize = sqlite3_value_int(pVal);
4615 }
4616 if( nHashSize<=0 ){
4617 *pbBadkey = 1;
4618 }else{
4619 pConfig->nHashSize = nHashSize;
4620 }
4621 }
4622
4623 else if( 0==sqlite3_stricmp(zKey, "automerge") ){
4624 int nAutomerge = -1;
4625 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4626 nAutomerge = sqlite3_value_int(pVal);
4627 }
4628 if( nAutomerge<0 || nAutomerge>64 ){
4629 *pbBadkey = 1;
4630 }else{
4631 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
4632 pConfig->nAutomerge = nAutomerge;
4633 }
4634 }
4635
4636 else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
4637 int nUsermerge = -1;
4638 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4639 nUsermerge = sqlite3_value_int(pVal);
4640 }
4641 if( nUsermerge<2 || nUsermerge>16 ){
4642 *pbBadkey = 1;
4643 }else{
4644 pConfig->nUsermerge = nUsermerge;
4645 }
4646 }
4647
4648 else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
4649 int nCrisisMerge = -1;
4650 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4651 nCrisisMerge = sqlite3_value_int(pVal);
4652 }
4653 if( nCrisisMerge<0 ){
4654 *pbBadkey = 1;
4655 }else{
4656 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
4657 pConfig->nCrisisMerge = nCrisisMerge;
4658 }
4659 }
4660
4661 else if( 0==sqlite3_stricmp(zKey, "rank") ){
4662 const char *zIn = (const char*)sqlite3_value_text(pVal);
4663 char *zRank;
4664 char *zRankArgs;
4665 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
4666 if( rc==SQLITE_OK ){
4667 sqlite3_free(pConfig->zRank);
4668 sqlite3_free(pConfig->zRankArgs);
4669 pConfig->zRank = zRank;
4670 pConfig->zRankArgs = zRankArgs;
4671 }else if( rc==SQLITE_ERROR ){
4672 rc = SQLITE_OK;
4673 *pbBadkey = 1;
4674 }
4675 }else{
4676 *pbBadkey = 1;
4677 }
4678 return rc;
4679 }
4680
4681 /*
4682 ** Load the contents of the %_config table into memory.
4683 */
4684 static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
4685 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
4686 char *zSql;
4687 sqlite3_stmt *p = 0;
4688 int rc = SQLITE_OK;
4689 int iVersion = 0;
4690
4691 /* Set default values */
4692 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
4693 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
4694 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
4695 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
4696 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
4697
4698 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
4699 if( zSql ){
4700 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
4701 sqlite3_free(zSql);
4702 }
4703
4704 assert( rc==SQLITE_OK || p==0 );
4705 if( rc==SQLITE_OK ){
4706 while( SQLITE_ROW==sqlite3_step(p) ){
4707 const char *zK = (const char*)sqlite3_column_text(p, 0);
4708 sqlite3_value *pVal = sqlite3_column_value(p, 1);
4709 if( 0==sqlite3_stricmp(zK, "version") ){
4710 iVersion = sqlite3_value_int(pVal);
4711 }else{
4712 int bDummy = 0;
4713 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
4714 }
4715 }
4716 rc = sqlite3_finalize(p);
4717 }
4718
4719 if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
4720 rc = SQLITE_ERROR;
4721 if( pConfig->pzErrmsg ){
4722 assert( 0==*pConfig->pzErrmsg );
4723 *pConfig->pzErrmsg = sqlite3_mprintf(
4724 "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
4725 iVersion, FTS5_CURRENT_VERSION
4726 );
4727 }
4728 }
4729
4730 if( rc==SQLITE_OK ){
4731 pConfig->iCookie = iCookie;
4732 }
4733 return rc;
4734 }
4735
4736 /*
4737 ** 2014 May 31
4738 **
4739 ** The author disclaims copyright to this source code. In place of
4740 ** a legal notice, here is a blessing:
4741 **
4742 ** May you do good and not evil.
4743 ** May you find forgiveness for yourself and forgive others.
4744 ** May you share freely, never taking more than you give.
4745 **
4746 ******************************************************************************
4747 **
4748 */
4749
4750
4751
4752 /* #include "fts5Int.h" */
4753 /* #include "fts5parse.h" */
4754
4755 /*
4756 ** All token types in the generated fts5parse.h file are greater than 0.
4757 */
4758 #define FTS5_EOF 0
4759
4760 #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
4761
4762 typedef struct Fts5ExprTerm Fts5ExprTerm;
4763
4764 /*
4765 ** Functions generated by lemon from fts5parse.y.
4766 */
4767 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
4768 static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
4769 static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
4770 #ifndef NDEBUG
4771 /* #include <stdio.h> */
4772 static void sqlite3Fts5ParserTrace(FILE*, char*);
4773 #endif
4774
4775
4776 struct Fts5Expr {
4777 Fts5Index *pIndex;
4778 Fts5Config *pConfig;
4779 Fts5ExprNode *pRoot;
4780 int bDesc; /* Iterate in descending rowid order */
4781 int nPhrase; /* Number of phrases in expression */
4782 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
4783 };
4784
4785 /*
4786 ** eType:
4787 ** Expression node type. Always one of:
4788 **
4789 ** FTS5_AND (nChild, apChild valid)
4790 ** FTS5_OR (nChild, apChild valid)
4791 ** FTS5_NOT (nChild, apChild valid)
4792 ** FTS5_STRING (pNear valid)
4793 ** FTS5_TERM (pNear valid)
4794 */
4795 struct Fts5ExprNode {
4796 int eType; /* Node type */
4797 int bEof; /* True at EOF */
4798 int bNomatch; /* True if entry is not a match */
4799
4800 /* Next method for this node. */
4801 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
4802
4803 i64 iRowid; /* Current rowid */
4804 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
4805
4806 /* Child nodes. For a NOT node, this array always contains 2 entries. For
4807 ** AND or OR nodes, it contains 2 or more entries. */
4808 int nChild; /* Number of child nodes */
4809 Fts5ExprNode *apChild[1]; /* Array of child nodes */
4810 };
4811
4812 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
4813
4814 /*
4815 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be
4816 ** used as if it has the same signature as the xNext() methods themselves.
4817 */
4818 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
4819
4820 /*
4821 ** An instance of the following structure represents a single search term
4822 ** or term prefix.
4823 */
4824 struct Fts5ExprTerm {
4825 int bPrefix; /* True for a prefix term */
4826 char *zTerm; /* nul-terminated term */
4827 Fts5IndexIter *pIter; /* Iterator for this term */
4828 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
4829 };
4830
4831 /*
4832 ** A phrase. One or more terms that must appear in a contiguous sequence
4833 ** within a document for it to match.
4834 */
4835 struct Fts5ExprPhrase {
4836 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
4837 Fts5Buffer poslist; /* Current position list */
4838 int nTerm; /* Number of entries in aTerm[] */
4839 Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
4840 };
4841
4842 /*
4843 ** One or more phrases that must appear within a certain token distance of
4844 ** each other within each matching document.
4845 */
4846 struct Fts5ExprNearset {
4847 int nNear; /* NEAR parameter */
4848 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */
4849 int nPhrase; /* Number of entries in aPhrase[] array */
4850 Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */
4851 };
4852
4853
4854 /*
4855 ** Parse context.
4856 */
4857 struct Fts5Parse {
4858 Fts5Config *pConfig;
4859 char *zErr;
4860 int rc;
4861 int nPhrase; /* Size of apPhrase array */
4862 Fts5ExprPhrase **apPhrase; /* Array of all phrases */
4863 Fts5ExprNode *pExpr; /* Result of a successful parse */
4864 };
4865
4866 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
4867 va_list ap;
4868 va_start(ap, zFmt);
4869 if( pParse->rc==SQLITE_OK ){
4870 pParse->zErr = sqlite3_vmprintf(zFmt, ap);
4871 pParse->rc = SQLITE_ERROR;
4872 }
4873 va_end(ap);
4874 }
4875
4876 static int fts5ExprIsspace(char t){
4877 return t==' ' || t=='\t' || t=='\n' || t=='\r';
4878 }
4879
4880 /*
4881 ** Read the first token from the nul-terminated string at *pz.
4882 */
4883 static int fts5ExprGetToken(
4884 Fts5Parse *pParse,
4885 const char **pz, /* IN/OUT: Pointer into buffer */
4886 Fts5Token *pToken
4887 ){
4888 const char *z = *pz;
4889 int tok;
4890
4891 /* Skip past any whitespace */
4892 while( fts5ExprIsspace(*z) ) z++;
4893
4894 pToken->p = z;
4895 pToken->n = 1;
4896 switch( *z ){
4897 case '(': tok = FTS5_LP; break;
4898 case ')': tok = FTS5_RP; break;
4899 case '{': tok = FTS5_LCP; break;
4900 case '}': tok = FTS5_RCP; break;
4901 case ':': tok = FTS5_COLON; break;
4902 case ',': tok = FTS5_COMMA; break;
4903 case '+': tok = FTS5_PLUS; break;
4904 case '*': tok = FTS5_STAR; break;
4905 case '-': tok = FTS5_MINUS; break;
4906 case '\0': tok = FTS5_EOF; break;
4907
4908 case '"': {
4909 const char *z2;
4910 tok = FTS5_STRING;
4911
4912 for(z2=&z[1]; 1; z2++){
4913 if( z2[0]=='"' ){
4914 z2++;
4915 if( z2[0]!='"' ) break;
4916 }
4917 if( z2[0]=='\0' ){
4918 sqlite3Fts5ParseError(pParse, "unterminated string");
4919 return FTS5_EOF;
4920 }
4921 }
4922 pToken->n = (z2 - z);
4923 break;
4924 }
4925
4926 default: {
4927 const char *z2;
4928 if( sqlite3Fts5IsBareword(z[0])==0 ){
4929 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
4930 return FTS5_EOF;
4931 }
4932 tok = FTS5_STRING;
4933 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
4934 pToken->n = (z2 - z);
4935 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR;
4936 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
4937 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
4938 break;
4939 }
4940 }
4941
4942 *pz = &pToken->p[pToken->n];
4943 return tok;
4944 }
4945
4946 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); }
4947 static void fts5ParseFree(void *p){ sqlite3_free(p); }
4948
4949 static int sqlite3Fts5ExprNew(
4950 Fts5Config *pConfig, /* FTS5 Configuration */
4951 const char *zExpr, /* Expression text */
4952 Fts5Expr **ppNew,
4953 char **pzErr
4954 ){
4955 Fts5Parse sParse;
4956 Fts5Token token;
4957 const char *z = zExpr;
4958 int t; /* Next token type */
4959 void *pEngine;
4960 Fts5Expr *pNew;
4961
4962 *ppNew = 0;
4963 *pzErr = 0;
4964 memset(&sParse, 0, sizeof(sParse));
4965 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
4966 if( pEngine==0 ){ return SQLITE_NOMEM; }
4967 sParse.pConfig = pConfig;
4968
4969 do {
4970 t = fts5ExprGetToken(&sParse, &z, &token);
4971 sqlite3Fts5Parser(pEngine, t, token, &sParse);
4972 }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
4973 sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
4974
4975 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
4976 if( sParse.rc==SQLITE_OK ){
4977 *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
4978 if( pNew==0 ){
4979 sParse.rc = SQLITE_NOMEM;
4980 sqlite3Fts5ParseNodeFree(sParse.pExpr);
4981 }else{
4982 if( !sParse.pExpr ){
4983 const int nByte = sizeof(Fts5ExprNode);
4984 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
4985 if( pNew->pRoot ){
4986 pNew->pRoot->bEof = 1;
4987 }
4988 }else{
4989 pNew->pRoot = sParse.pExpr;
4990 }
4991 pNew->pIndex = 0;
4992 pNew->pConfig = pConfig;
4993 pNew->apExprPhrase = sParse.apPhrase;
4994 pNew->nPhrase = sParse.nPhrase;
4995 sParse.apPhrase = 0;
4996 }
4997 }else{
4998 sqlite3Fts5ParseNodeFree(sParse.pExpr);
4999 }
5000
5001 sqlite3_free(sParse.apPhrase);
5002 *pzErr = sParse.zErr;
5003 return sParse.rc;
5004 }
5005
5006 /*
5007 ** Free the expression node object passed as the only argument.
5008 */
5009 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
5010 if( p ){
5011 int i;
5012 for(i=0; i<p->nChild; i++){
5013 sqlite3Fts5ParseNodeFree(p->apChild[i]);
5014 }
5015 sqlite3Fts5ParseNearsetFree(p->pNear);
5016 sqlite3_free(p);
5017 }
5018 }
5019
5020 /*
5021 ** Free the expression object passed as the only argument.
5022 */
5023 static void sqlite3Fts5ExprFree(Fts5Expr *p){
5024 if( p ){
5025 sqlite3Fts5ParseNodeFree(p->pRoot);
5026 sqlite3_free(p->apExprPhrase);
5027 sqlite3_free(p);
5028 }
5029 }
5030
5031 /*
5032 ** Argument pTerm must be a synonym iterator. Return the current rowid
5033 ** that it points to.
5034 */
5035 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
5036 i64 iRet = 0;
5037 int bRetValid = 0;
5038 Fts5ExprTerm *p;
5039
5040 assert( pTerm->pSynonym );
5041 assert( bDesc==0 || bDesc==1 );
5042 for(p=pTerm; p; p=p->pSynonym){
5043 if( 0==sqlite3Fts5IterEof(p->pIter) ){
5044 i64 iRowid = p->pIter->iRowid;
5045 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
5046 iRet = iRowid;
5047 bRetValid = 1;
5048 }
5049 }
5050 }
5051
5052 if( pbEof && bRetValid==0 ) *pbEof = 1;
5053 return iRet;
5054 }
5055
5056 /*
5057 ** Argument pTerm must be a synonym iterator.
5058 */
5059 static int fts5ExprSynonymList(
5060 Fts5ExprTerm *pTerm,
5061 i64 iRowid,
5062 Fts5Buffer *pBuf, /* Use this buffer for space if required */
5063 u8 **pa, int *pn
5064 ){
5065 Fts5PoslistReader aStatic[4];
5066 Fts5PoslistReader *aIter = aStatic;
5067 int nIter = 0;
5068 int nAlloc = 4;
5069 int rc = SQLITE_OK;
5070 Fts5ExprTerm *p;
5071
5072 assert( pTerm->pSynonym );
5073 for(p=pTerm; p; p=p->pSynonym){
5074 Fts5IndexIter *pIter = p->pIter;
5075 if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
5076 if( pIter->nData==0 ) continue;
5077 if( nIter==nAlloc ){
5078 int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
5079 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
5080 if( aNew==0 ){
5081 rc = SQLITE_NOMEM;
5082 goto synonym_poslist_out;
5083 }
5084 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
5085 nAlloc = nAlloc*2;
5086 if( aIter!=aStatic ) sqlite3_free(aIter);
5087 aIter = aNew;
5088 }
5089 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
5090 assert( aIter[nIter].bEof==0 );
5091 nIter++;
5092 }
5093 }
5094
5095 if( nIter==1 ){
5096 *pa = (u8*)aIter[0].a;
5097 *pn = aIter[0].n;
5098 }else{
5099 Fts5PoslistWriter writer = {0};
5100 i64 iPrev = -1;
5101 fts5BufferZero(pBuf);
5102 while( 1 ){
5103 int i;
5104 i64 iMin = FTS5_LARGEST_INT64;
5105 for(i=0; i<nIter; i++){
5106 if( aIter[i].bEof==0 ){
5107 if( aIter[i].iPos==iPrev ){
5108 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
5109 }
5110 if( aIter[i].iPos<iMin ){
5111 iMin = aIter[i].iPos;
5112 }
5113 }
5114 }
5115 if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
5116 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
5117 iPrev = iMin;
5118 }
5119 if( rc==SQLITE_OK ){
5120 *pa = pBuf->p;
5121 *pn = pBuf->n;
5122 }
5123 }
5124
5125 synonym_poslist_out:
5126 if( aIter!=aStatic ) sqlite3_free(aIter);
5127 return rc;
5128 }
5129
5130
5131 /*
5132 ** All individual term iterators in pPhrase are guaranteed to be valid and
5133 ** pointing to the same rowid when this function is called. This function
5134 ** checks if the current rowid really is a match, and if so populates
5135 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
5136 ** is set to true if this is really a match, or false otherwise.
5137 **
5138 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
5139 ** otherwise. It is not considered an error code if the current rowid is
5140 ** not a match.
5141 */
5142 static int fts5ExprPhraseIsMatch(
5143 Fts5ExprNode *pNode, /* Node pPhrase belongs to */
5144 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
5145 int *pbMatch /* OUT: Set to true if really a match */
5146 ){
5147 Fts5PoslistWriter writer = {0};
5148 Fts5PoslistReader aStatic[4];
5149 Fts5PoslistReader *aIter = aStatic;
5150 int i;
5151 int rc = SQLITE_OK;
5152
5153 fts5BufferZero(&pPhrase->poslist);
5154
5155 /* If the aStatic[] array is not large enough, allocate a large array
5156 ** using sqlite3_malloc(). This approach could be improved upon. */
5157 if( pPhrase->nTerm>ArraySize(aStatic) ){
5158 int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
5159 aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
5160 if( !aIter ) return SQLITE_NOMEM;
5161 }
5162 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
5163
5164 /* Initialize a term iterator for each term in the phrase */
5165 for(i=0; i<pPhrase->nTerm; i++){
5166 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
5167 int n = 0;
5168 int bFlag = 0;
5169 u8 *a = 0;
5170 if( pTerm->pSynonym ){
5171 Fts5Buffer buf = {0, 0, 0};
5172 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
5173 if( rc ){
5174 sqlite3_free(a);
5175 goto ismatch_out;
5176 }
5177 if( a==buf.p ) bFlag = 1;
5178 }else{
5179 a = (u8*)pTerm->pIter->pData;
5180 n = pTerm->pIter->nData;
5181 }
5182 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
5183 aIter[i].bFlag = (u8)bFlag;
5184 if( aIter[i].bEof ) goto ismatch_out;
5185 }
5186
5187 while( 1 ){
5188 int bMatch;
5189 i64 iPos = aIter[0].iPos;
5190 do {
5191 bMatch = 1;
5192 for(i=0; i<pPhrase->nTerm; i++){
5193 Fts5PoslistReader *pPos = &aIter[i];
5194 i64 iAdj = iPos + i;
5195 if( pPos->iPos!=iAdj ){
5196 bMatch = 0;
5197 while( pPos->iPos<iAdj ){
5198 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
5199 }
5200 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
5201 }
5202 }
5203 }while( bMatch==0 );
5204
5205 /* Append position iPos to the output */
5206 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
5207 if( rc!=SQLITE_OK ) goto ismatch_out;
5208
5209 for(i=0; i<pPhrase->nTerm; i++){
5210 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
5211 }
5212 }
5213
5214 ismatch_out:
5215 *pbMatch = (pPhrase->poslist.n>0);
5216 for(i=0; i<pPhrase->nTerm; i++){
5217 if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
5218 }
5219 if( aIter!=aStatic ) sqlite3_free(aIter);
5220 return rc;
5221 }
5222
5223 typedef struct Fts5LookaheadReader Fts5LookaheadReader;
5224 struct Fts5LookaheadReader {
5225 const u8 *a; /* Buffer containing position list */
5226 int n; /* Size of buffer a[] in bytes */
5227 int i; /* Current offset in position list */
5228 i64 iPos; /* Current position */
5229 i64 iLookahead; /* Next position */
5230 };
5231
5232 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
5233
5234 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
5235 p->iPos = p->iLookahead;
5236 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
5237 p->iLookahead = FTS5_LOOKAHEAD_EOF;
5238 }
5239 return (p->iPos==FTS5_LOOKAHEAD_EOF);
5240 }
5241
5242 static int fts5LookaheadReaderInit(
5243 const u8 *a, int n, /* Buffer to read position list from */
5244 Fts5LookaheadReader *p /* Iterator object to initialize */
5245 ){
5246 memset(p, 0, sizeof(Fts5LookaheadReader));
5247 p->a = a;
5248 p->n = n;
5249 fts5LookaheadReaderNext(p);
5250 return fts5LookaheadReaderNext(p);
5251 }
5252
5253 typedef struct Fts5NearTrimmer Fts5NearTrimmer;
5254 struct Fts5NearTrimmer {
5255 Fts5LookaheadReader reader; /* Input iterator */
5256 Fts5PoslistWriter writer; /* Writer context */
5257 Fts5Buffer *pOut; /* Output poslist */
5258 };
5259
5260 /*
5261 ** The near-set object passed as the first argument contains more than
5262 ** one phrase. All phrases currently point to the same row. The
5263 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
5264 ** tests if the current row contains instances of each phrase sufficiently
5265 ** close together to meet the NEAR constraint. Non-zero is returned if it
5266 ** does, or zero otherwise.
5267 **
5268 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
5269 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
5270 ** occurs within this function (*pRc) is set accordingly before returning.
5271 ** The return value is undefined in both these cases.
5272 **
5273 ** If no error occurs and non-zero (a match) is returned, the position-list
5274 ** of each phrase object is edited to contain only those entries that
5275 ** meet the constraint before returning.
5276 */
5277 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
5278 Fts5NearTrimmer aStatic[4];
5279 Fts5NearTrimmer *a = aStatic;
5280 Fts5ExprPhrase **apPhrase = pNear->apPhrase;
5281
5282 int i;
5283 int rc = *pRc;
5284 int bMatch;
5285
5286 assert( pNear->nPhrase>1 );
5287
5288 /* If the aStatic[] array is not large enough, allocate a large array
5289 ** using sqlite3_malloc(). This approach could be improved upon. */
5290 if( pNear->nPhrase>ArraySize(aStatic) ){
5291 int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
5292 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
5293 }else{
5294 memset(aStatic, 0, sizeof(aStatic));
5295 }
5296 if( rc!=SQLITE_OK ){
5297 *pRc = rc;
5298 return 0;
5299 }
5300
5301 /* Initialize a lookahead iterator for each phrase. After passing the
5302 ** buffer and buffer size to the lookaside-reader init function, zero
5303 ** the phrase poslist buffer. The new poslist for the phrase (containing
5304 ** the same entries as the original with some entries removed on account
5305 ** of the NEAR constraint) is written over the original even as it is
5306 ** being read. This is safe as the entries for the new poslist are a
5307 ** subset of the old, so it is not possible for data yet to be read to
5308 ** be overwritten. */
5309 for(i=0; i<pNear->nPhrase; i++){
5310 Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
5311 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
5312 pPoslist->n = 0;
5313 a[i].pOut = pPoslist;
5314 }
5315
5316 while( 1 ){
5317 int iAdv;
5318 i64 iMin;
5319 i64 iMax;
5320
5321 /* This block advances the phrase iterators until they point to a set of
5322 ** entries that together comprise a match. */
5323 iMax = a[0].reader.iPos;
5324 do {
5325 bMatch = 1;
5326 for(i=0; i<pNear->nPhrase; i++){
5327 Fts5LookaheadReader *pPos = &a[i].reader;
5328 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
5329 if( pPos->iPos<iMin || pPos->iPos>iMax ){
5330 bMatch = 0;
5331 while( pPos->iPos<iMin ){
5332 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
5333 }
5334 if( pPos->iPos>iMax ) iMax = pPos->iPos;
5335 }
5336 }
5337 }while( bMatch==0 );
5338
5339 /* Add an entry to each output position list */
5340 for(i=0; i<pNear->nPhrase; i++){
5341 i64 iPos = a[i].reader.iPos;
5342 Fts5PoslistWriter *pWriter = &a[i].writer;
5343 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
5344 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
5345 }
5346 }
5347
5348 iAdv = 0;
5349 iMin = a[0].reader.iLookahead;
5350 for(i=0; i<pNear->nPhrase; i++){
5351 if( a[i].reader.iLookahead < iMin ){
5352 iMin = a[i].reader.iLookahead;
5353 iAdv = i;
5354 }
5355 }
5356 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
5357 }
5358
5359 ismatch_out: {
5360 int bRet = a[0].pOut->n>0;
5361 *pRc = rc;
5362 if( a!=aStatic ) sqlite3_free(a);
5363 return bRet;
5364 }
5365 }
5366
5367 /*
5368 ** Advance iterator pIter until it points to a value equal to or laster
5369 ** than the initial value of *piLast. If this means the iterator points
5370 ** to a value laster than *piLast, update *piLast to the new lastest value.
5371 **
5372 ** If the iterator reaches EOF, set *pbEof to true before returning. If
5373 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
5374 ** are set, return a non-zero value. Otherwise, return zero.
5375 */
5376 static int fts5ExprAdvanceto(
5377 Fts5IndexIter *pIter, /* Iterator to advance */
5378 int bDesc, /* True if iterator is "rowid DESC" */
5379 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
5380 int *pRc, /* OUT: Error code */
5381 int *pbEof /* OUT: Set to true if EOF */
5382 ){
5383 i64 iLast = *piLast;
5384 i64 iRowid;
5385
5386 iRowid = pIter->iRowid;
5387 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5388 int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
5389 if( rc || sqlite3Fts5IterEof(pIter) ){
5390 *pRc = rc;
5391 *pbEof = 1;
5392 return 1;
5393 }
5394 iRowid = pIter->iRowid;
5395 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
5396 }
5397 *piLast = iRowid;
5398
5399 return 0;
5400 }
5401
5402 static int fts5ExprSynonymAdvanceto(
5403 Fts5ExprTerm *pTerm, /* Term iterator to advance */
5404 int bDesc, /* True if iterator is "rowid DESC" */
5405 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
5406 int *pRc /* OUT: Error code */
5407 ){
5408 int rc = SQLITE_OK;
5409 i64 iLast = *piLast;
5410 Fts5ExprTerm *p;
5411 int bEof = 0;
5412
5413 for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
5414 if( sqlite3Fts5IterEof(p->pIter)==0 ){
5415 i64 iRowid = p->pIter->iRowid;
5416 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5417 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
5418 }
5419 }
5420 }
5421
5422 if( rc!=SQLITE_OK ){
5423 *pRc = rc;
5424 bEof = 1;
5425 }else{
5426 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
5427 }
5428 return bEof;
5429 }
5430
5431
5432 static int fts5ExprNearTest(
5433 int *pRc,
5434 Fts5Expr *pExpr, /* Expression that pNear is a part of */
5435 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
5436 ){
5437 Fts5ExprNearset *pNear = pNode->pNear;
5438 int rc = *pRc;
5439
5440 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
5441 Fts5ExprTerm *pTerm;
5442 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
5443 pPhrase->poslist.n = 0;
5444 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
5445 Fts5IndexIter *pIter = pTerm->pIter;
5446 if( sqlite3Fts5IterEof(pIter)==0 ){
5447 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
5448 pPhrase->poslist.n = 1;
5449 }
5450 }
5451 }
5452 return pPhrase->poslist.n;
5453 }else{
5454 int i;
5455
5456 /* Check that each phrase in the nearset matches the current row.
5457 ** Populate the pPhrase->poslist buffers at the same time. If any
5458 ** phrase is not a match, break out of the loop early. */
5459 for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
5460 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5461 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
5462 int bMatch = 0;
5463 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
5464 if( bMatch==0 ) break;
5465 }else{
5466 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
5467 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
5468 }
5469 }
5470
5471 *pRc = rc;
5472 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
5473 return 1;
5474 }
5475 return 0;
5476 }
5477 }
5478
5479
5480 /*
5481 ** Initialize all term iterators in the pNear object. If any term is found
5482 ** to match no documents at all, return immediately without initializing any
5483 ** further iterators.
5484 **
5485 ** If an error occurs, return an SQLite error code. Otherwise, return
5486 ** SQLITE_OK. It is not considered an error if some term matches zero
5487 ** documents.
5488 */
5489 static int fts5ExprNearInitAll(
5490 Fts5Expr *pExpr,
5491 Fts5ExprNode *pNode
5492 ){
5493 Fts5ExprNearset *pNear = pNode->pNear;
5494 int i;
5495
5496 assert( pNode->bNomatch==0 );
5497 for(i=0; i<pNear->nPhrase; i++){
5498 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5499 if( pPhrase->nTerm==0 ){
5500 pNode->bEof = 1;
5501 return SQLITE_OK;
5502 }else{
5503 int j;
5504 for(j=0; j<pPhrase->nTerm; j++){
5505 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
5506 Fts5ExprTerm *p;
5507 int bHit = 0;
5508
5509 for(p=pTerm; p; p=p->pSynonym){
5510 int rc;
5511 if( p->pIter ){
5512 sqlite3Fts5IterClose(p->pIter);
5513 p->pIter = 0;
5514 }
5515 rc = sqlite3Fts5IndexQuery(
5516 pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
5517 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
5518 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
5519 pNear->pColset,
5520 &p->pIter
5521 );
5522 assert( (rc==SQLITE_OK)==(p->pIter!=0) );
5523 if( rc!=SQLITE_OK ) return rc;
5524 if( 0==sqlite3Fts5IterEof(p->pIter) ){
5525 bHit = 1;
5526 }
5527 }
5528
5529 if( bHit==0 ){
5530 pNode->bEof = 1;
5531 return SQLITE_OK;
5532 }
5533 }
5534 }
5535 }
5536
5537 pNode->bEof = 0;
5538 return SQLITE_OK;
5539 }
5540
5541 /*
5542 ** If pExpr is an ASC iterator, this function returns a value with the
5543 ** same sign as:
5544 **
5545 ** (iLhs - iRhs)
5546 **
5547 ** Otherwise, if this is a DESC iterator, the opposite is returned:
5548 **
5549 ** (iRhs - iLhs)
5550 */
5551 static int fts5RowidCmp(
5552 Fts5Expr *pExpr,
5553 i64 iLhs,
5554 i64 iRhs
5555 ){
5556 assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
5557 if( pExpr->bDesc==0 ){
5558 if( iLhs<iRhs ) return -1;
5559 return (iLhs > iRhs);
5560 }else{
5561 if( iLhs>iRhs ) return -1;
5562 return (iLhs < iRhs);
5563 }
5564 }
5565
5566 static void fts5ExprSetEof(Fts5ExprNode *pNode){
5567 int i;
5568 pNode->bEof = 1;
5569 pNode->bNomatch = 0;
5570 for(i=0; i<pNode->nChild; i++){
5571 fts5ExprSetEof(pNode->apChild[i]);
5572 }
5573 }
5574
5575 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
5576 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
5577 Fts5ExprNearset *pNear = pNode->pNear;
5578 int i;
5579 for(i=0; i<pNear->nPhrase; i++){
5580 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5581 pPhrase->poslist.n = 0;
5582 }
5583 }else{
5584 int i;
5585 for(i=0; i<pNode->nChild; i++){
5586 fts5ExprNodeZeroPoslist(pNode->apChild[i]);
5587 }
5588 }
5589 }
5590
5591
5592
5593 /*
5594 ** Compare the values currently indicated by the two nodes as follows:
5595 **
5596 ** res = (*p1) - (*p2)
5597 **
5598 ** Nodes that point to values that come later in the iteration order are
5599 ** considered to be larger. Nodes at EOF are the largest of all.
5600 **
5601 ** This means that if the iteration order is ASC, then numerically larger
5602 ** rowids are considered larger. Or if it is the default DESC, numerically
5603 ** smaller rowids are larger.
5604 */
5605 static int fts5NodeCompare(
5606 Fts5Expr *pExpr,
5607 Fts5ExprNode *p1,
5608 Fts5ExprNode *p2
5609 ){
5610 if( p2->bEof ) return -1;
5611 if( p1->bEof ) return +1;
5612 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
5613 }
5614
5615 /*
5616 ** All individual term iterators in pNear are guaranteed to be valid when
5617 ** this function is called. This function checks if all term iterators
5618 ** point to the same rowid, and if not, advances them until they do.
5619 ** If an EOF is reached before this happens, *pbEof is set to true before
5620 ** returning.
5621 **
5622 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
5623 ** otherwise. It is not considered an error code if an iterator reaches
5624 ** EOF.
5625 */
5626 static int fts5ExprNodeTest_STRING(
5627 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
5628 Fts5ExprNode *pNode
5629 ){
5630 Fts5ExprNearset *pNear = pNode->pNear;
5631 Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
5632 int rc = SQLITE_OK;
5633 i64 iLast; /* Lastest rowid any iterator points to */
5634 int i, j; /* Phrase and token index, respectively */
5635 int bMatch; /* True if all terms are at the same rowid */
5636 const int bDesc = pExpr->bDesc;
5637
5638 /* Check that this node should not be FTS5_TERM */
5639 assert( pNear->nPhrase>1
5640 || pNear->apPhrase[0]->nTerm>1
5641 || pNear->apPhrase[0]->aTerm[0].pSynonym
5642 );
5643
5644 /* Initialize iLast, the "lastest" rowid any iterator points to. If the
5645 ** iterator skips through rowids in the default ascending order, this means
5646 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
5647 ** means the minimum rowid. */
5648 if( pLeft->aTerm[0].pSynonym ){
5649 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
5650 }else{
5651 iLast = pLeft->aTerm[0].pIter->iRowid;
5652 }
5653
5654 do {
5655 bMatch = 1;
5656 for(i=0; i<pNear->nPhrase; i++){
5657 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5658 for(j=0; j<pPhrase->nTerm; j++){
5659 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
5660 if( pTerm->pSynonym ){
5661 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
5662 if( iRowid==iLast ) continue;
5663 bMatch = 0;
5664 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
5665 pNode->bNomatch = 0;
5666 pNode->bEof = 1;
5667 return rc;
5668 }
5669 }else{
5670 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
5671 if( pIter->iRowid==iLast || pIter->bEof ) continue;
5672 bMatch = 0;
5673 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
5674 return rc;
5675 }
5676 }
5677 }
5678 }
5679 }while( bMatch==0 );
5680
5681 pNode->iRowid = iLast;
5682 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
5683 assert( pNode->bEof==0 || pNode->bNomatch==0 );
5684
5685 return rc;
5686 }
5687
5688 /*
5689 ** Advance the first term iterator in the first phrase of pNear. Set output
5690 ** variable *pbEof to true if it reaches EOF or if an error occurs.
5691 **
5692 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5693 ** occurs.
5694 */
5695 static int fts5ExprNodeNext_STRING(
5696 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
5697 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */
5698 int bFromValid,
5699 i64 iFrom
5700 ){
5701 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
5702 int rc = SQLITE_OK;
5703
5704 pNode->bNomatch = 0;
5705 if( pTerm->pSynonym ){
5706 int bEof = 1;
5707 Fts5ExprTerm *p;
5708
5709 /* Find the firstest rowid any synonym points to. */
5710 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
5711
5712 /* Advance each iterator that currently points to iRowid. Or, if iFrom
5713 ** is valid - each iterator that points to a rowid before iFrom. */
5714 for(p=pTerm; p; p=p->pSynonym){
5715 if( sqlite3Fts5IterEof(p->pIter)==0 ){
5716 i64 ii = p->pIter->iRowid;
5717 if( ii==iRowid
5718 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
5719 ){
5720 if( bFromValid ){
5721 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
5722 }else{
5723 rc = sqlite3Fts5IterNext(p->pIter);
5724 }
5725 if( rc!=SQLITE_OK ) break;
5726 if( sqlite3Fts5IterEof(p->pIter)==0 ){
5727 bEof = 0;
5728 }
5729 }else{
5730 bEof = 0;
5731 }
5732 }
5733 }
5734
5735 /* Set the EOF flag if either all synonym iterators are at EOF or an
5736 ** error has occurred. */
5737 pNode->bEof = (rc || bEof);
5738 }else{
5739 Fts5IndexIter *pIter = pTerm->pIter;
5740
5741 assert( Fts5NodeIsString(pNode) );
5742 if( bFromValid ){
5743 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
5744 }else{
5745 rc = sqlite3Fts5IterNext(pIter);
5746 }
5747
5748 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
5749 }
5750
5751 if( pNode->bEof==0 ){
5752 assert( rc==SQLITE_OK );
5753 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
5754 }
5755
5756 return rc;
5757 }
5758
5759
5760 static int fts5ExprNodeTest_TERM(
5761 Fts5Expr *pExpr, /* Expression that pNear is a part of */
5762 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
5763 ){
5764 /* As this "NEAR" object is actually a single phrase that consists
5765 ** of a single term only, grab pointers into the poslist managed by the
5766 ** fts5_index.c iterator object. This is much faster than synthesizing
5767 ** a new poslist the way we have to for more complicated phrase or NEAR
5768 ** expressions. */
5769 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
5770 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
5771
5772 assert( pNode->eType==FTS5_TERM );
5773 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
5774 assert( pPhrase->aTerm[0].pSynonym==0 );
5775
5776 pPhrase->poslist.n = pIter->nData;
5777 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
5778 pPhrase->poslist.p = (u8*)pIter->pData;
5779 }
5780 pNode->iRowid = pIter->iRowid;
5781 pNode->bNomatch = (pPhrase->poslist.n==0);
5782 return SQLITE_OK;
5783 }
5784
5785 /*
5786 ** xNext() method for a node of type FTS5_TERM.
5787 */
5788 static int fts5ExprNodeNext_TERM(
5789 Fts5Expr *pExpr,
5790 Fts5ExprNode *pNode,
5791 int bFromValid,
5792 i64 iFrom
5793 ){
5794 int rc;
5795 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
5796
5797 assert( pNode->bEof==0 );
5798 if( bFromValid ){
5799 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
5800 }else{
5801 rc = sqlite3Fts5IterNext(pIter);
5802 }
5803 if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
5804 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
5805 }else{
5806 pNode->bEof = 1;
5807 pNode->bNomatch = 0;
5808 }
5809 return rc;
5810 }
5811
5812 static void fts5ExprNodeTest_OR(
5813 Fts5Expr *pExpr, /* Expression of which pNode is a part */
5814 Fts5ExprNode *pNode /* Expression node to test */
5815 ){
5816 Fts5ExprNode *pNext = pNode->apChild[0];
5817 int i;
5818
5819 for(i=1; i<pNode->nChild; i++){
5820 Fts5ExprNode *pChild = pNode->apChild[i];
5821 int cmp = fts5NodeCompare(pExpr, pNext, pChild);
5822 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
5823 pNext = pChild;
5824 }
5825 }
5826 pNode->iRowid = pNext->iRowid;
5827 pNode->bEof = pNext->bEof;
5828 pNode->bNomatch = pNext->bNomatch;
5829 }
5830
5831 static int fts5ExprNodeNext_OR(
5832 Fts5Expr *pExpr,
5833 Fts5ExprNode *pNode,
5834 int bFromValid,
5835 i64 iFrom
5836 ){
5837 int i;
5838 i64 iLast = pNode->iRowid;
5839
5840 for(i=0; i<pNode->nChild; i++){
5841 Fts5ExprNode *p1 = pNode->apChild[i];
5842 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
5843 if( p1->bEof==0 ){
5844 if( (p1->iRowid==iLast)
5845 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
5846 ){
5847 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
5848 if( rc!=SQLITE_OK ) return rc;
5849 }
5850 }
5851 }
5852
5853 fts5ExprNodeTest_OR(pExpr, pNode);
5854 return SQLITE_OK;
5855 }
5856
5857 /*
5858 ** Argument pNode is an FTS5_AND node.
5859 */
5860 static int fts5ExprNodeTest_AND(
5861 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
5862 Fts5ExprNode *pAnd /* FTS5_AND node to advance */
5863 ){
5864 int iChild;
5865 i64 iLast = pAnd->iRowid;
5866 int rc = SQLITE_OK;
5867 int bMatch;
5868
5869 assert( pAnd->bEof==0 );
5870 do {
5871 pAnd->bNomatch = 0;
5872 bMatch = 1;
5873 for(iChild=0; iChild<pAnd->nChild; iChild++){
5874 Fts5ExprNode *pChild = pAnd->apChild[iChild];
5875 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
5876 if( cmp>0 ){
5877 /* Advance pChild until it points to iLast or laster */
5878 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
5879 if( rc!=SQLITE_OK ) return rc;
5880 }
5881
5882 /* If the child node is now at EOF, so is the parent AND node. Otherwise,
5883 ** the child node is guaranteed to have advanced at least as far as
5884 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
5885 ** new lastest rowid seen so far. */
5886 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
5887 if( pChild->bEof ){
5888 fts5ExprSetEof(pAnd);
5889 bMatch = 1;
5890 break;
5891 }else if( iLast!=pChild->iRowid ){
5892 bMatch = 0;
5893 iLast = pChild->iRowid;
5894 }
5895
5896 if( pChild->bNomatch ){
5897 pAnd->bNomatch = 1;
5898 }
5899 }
5900 }while( bMatch==0 );
5901
5902 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
5903 fts5ExprNodeZeroPoslist(pAnd);
5904 }
5905 pAnd->iRowid = iLast;
5906 return SQLITE_OK;
5907 }
5908
5909 static int fts5ExprNodeNext_AND(
5910 Fts5Expr *pExpr,
5911 Fts5ExprNode *pNode,
5912 int bFromValid,
5913 i64 iFrom
5914 ){
5915 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
5916 if( rc==SQLITE_OK ){
5917 rc = fts5ExprNodeTest_AND(pExpr, pNode);
5918 }
5919 return rc;
5920 }
5921
5922 static int fts5ExprNodeTest_NOT(
5923 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
5924 Fts5ExprNode *pNode /* FTS5_NOT node to advance */
5925 ){
5926 int rc = SQLITE_OK;
5927 Fts5ExprNode *p1 = pNode->apChild[0];
5928 Fts5ExprNode *p2 = pNode->apChild[1];
5929 assert( pNode->nChild==2 );
5930
5931 while( rc==SQLITE_OK && p1->bEof==0 ){
5932 int cmp = fts5NodeCompare(pExpr, p1, p2);
5933 if( cmp>0 ){
5934 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
5935 cmp = fts5NodeCompare(pExpr, p1, p2);
5936 }
5937 assert( rc!=SQLITE_OK || cmp<=0 );
5938 if( cmp || p2->bNomatch ) break;
5939 rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
5940 }
5941 pNode->bEof = p1->bEof;
5942 pNode->bNomatch = p1->bNomatch;
5943 pNode->iRowid = p1->iRowid;
5944 if( p1->bEof ){
5945 fts5ExprNodeZeroPoslist(p2);
5946 }
5947 return rc;
5948 }
5949
5950 static int fts5ExprNodeNext_NOT(
5951 Fts5Expr *pExpr,
5952 Fts5ExprNode *pNode,
5953 int bFromValid,
5954 i64 iFrom
5955 ){
5956 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
5957 if( rc==SQLITE_OK ){
5958 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
5959 }
5960 return rc;
5961 }
5962
5963 /*
5964 ** If pNode currently points to a match, this function returns SQLITE_OK
5965 ** without modifying it. Otherwise, pNode is advanced until it does point
5966 ** to a match or EOF is reached.
5967 */
5968 static int fts5ExprNodeTest(
5969 Fts5Expr *pExpr, /* Expression of which pNode is a part */
5970 Fts5ExprNode *pNode /* Expression node to test */
5971 ){
5972 int rc = SQLITE_OK;
5973 if( pNode->bEof==0 ){
5974 switch( pNode->eType ){
5975
5976 case FTS5_STRING: {
5977 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
5978 break;
5979 }
5980
5981 case FTS5_TERM: {
5982 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
5983 break;
5984 }
5985
5986 case FTS5_AND: {
5987 rc = fts5ExprNodeTest_AND(pExpr, pNode);
5988 break;
5989 }
5990
5991 case FTS5_OR: {
5992 fts5ExprNodeTest_OR(pExpr, pNode);
5993 break;
5994 }
5995
5996 default: assert( pNode->eType==FTS5_NOT ); {
5997 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
5998 break;
5999 }
6000 }
6001 }
6002 return rc;
6003 }
6004
6005
6006 /*
6007 ** Set node pNode, which is part of expression pExpr, to point to the first
6008 ** match. If there are no matches, set the Node.bEof flag to indicate EOF.
6009 **
6010 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
6011 ** It is not an error if there are no matches.
6012 */
6013 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
6014 int rc = SQLITE_OK;
6015 pNode->bEof = 0;
6016 pNode->bNomatch = 0;
6017
6018 if( Fts5NodeIsString(pNode) ){
6019 /* Initialize all term iterators in the NEAR object. */
6020 rc = fts5ExprNearInitAll(pExpr, pNode);
6021 }else if( pNode->xNext==0 ){
6022 pNode->bEof = 1;
6023 }else{
6024 int i;
6025 int nEof = 0;
6026 for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
6027 Fts5ExprNode *pChild = pNode->apChild[i];
6028 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
6029 assert( pChild->bEof==0 || pChild->bEof==1 );
6030 nEof += pChild->bEof;
6031 }
6032 pNode->iRowid = pNode->apChild[0]->iRowid;
6033
6034 switch( pNode->eType ){
6035 case FTS5_AND:
6036 if( nEof>0 ) fts5ExprSetEof(pNode);
6037 break;
6038
6039 case FTS5_OR:
6040 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
6041 break;
6042
6043 default:
6044 assert( pNode->eType==FTS5_NOT );
6045 pNode->bEof = pNode->apChild[0]->bEof;
6046 break;
6047 }
6048 }
6049
6050 if( rc==SQLITE_OK ){
6051 rc = fts5ExprNodeTest(pExpr, pNode);
6052 }
6053 return rc;
6054 }
6055
6056
6057 /*
6058 ** Begin iterating through the set of documents in index pIdx matched by
6059 ** the MATCH expression passed as the first argument. If the "bDesc"
6060 ** parameter is passed a non-zero value, iteration is in descending rowid
6061 ** order. Or, if it is zero, in ascending order.
6062 **
6063 ** If iterating in ascending rowid order (bDesc==0), the first document
6064 ** visited is that with the smallest rowid that is larger than or equal
6065 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
6066 ** then the first document visited must have a rowid smaller than or
6067 ** equal to iFirst.
6068 **
6069 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6070 ** is not considered an error if the query does not match any documents.
6071 */
6072 static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bD esc){
6073 Fts5ExprNode *pRoot = p->pRoot;
6074 int rc; /* Return code */
6075
6076 p->pIndex = pIdx;
6077 p->bDesc = bDesc;
6078 rc = fts5ExprNodeFirst(p, pRoot);
6079
6080 /* If not at EOF but the current rowid occurs earlier than iFirst in
6081 ** the iteration order, move to document iFirst or later. */
6082 if( rc==SQLITE_OK
6083 && 0==pRoot->bEof
6084 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
6085 ){
6086 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
6087 }
6088
6089 /* If the iterator is not at a real match, skip forward until it is. */
6090 while( pRoot->bNomatch ){
6091 assert( pRoot->bEof==0 && rc==SQLITE_OK );
6092 rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6093 }
6094 return rc;
6095 }
6096
6097 /*
6098 ** Move to the next document
6099 **
6100 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6101 ** is not considered an error if the query does not match any documents.
6102 */
6103 static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
6104 int rc;
6105 Fts5ExprNode *pRoot = p->pRoot;
6106 assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
6107 do {
6108 rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6109 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
6110 }while( pRoot->bNomatch );
6111 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
6112 pRoot->bEof = 1;
6113 }
6114 return rc;
6115 }
6116
6117 static int sqlite3Fts5ExprEof(Fts5Expr *p){
6118 return p->pRoot->bEof;
6119 }
6120
6121 static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
6122 return p->pRoot->iRowid;
6123 }
6124
6125 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
6126 int rc = SQLITE_OK;
6127 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
6128 return rc;
6129 }
6130
6131 /*
6132 ** Free the phrase object passed as the only argument.
6133 */
6134 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
6135 if( pPhrase ){
6136 int i;
6137 for(i=0; i<pPhrase->nTerm; i++){
6138 Fts5ExprTerm *pSyn;
6139 Fts5ExprTerm *pNext;
6140 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
6141 sqlite3_free(pTerm->zTerm);
6142 sqlite3Fts5IterClose(pTerm->pIter);
6143 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
6144 pNext = pSyn->pSynonym;
6145 sqlite3Fts5IterClose(pSyn->pIter);
6146 fts5BufferFree((Fts5Buffer*)&pSyn[1]);
6147 sqlite3_free(pSyn);
6148 }
6149 }
6150 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
6151 sqlite3_free(pPhrase);
6152 }
6153 }
6154
6155 /*
6156 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
6157 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
6158 ** appended to it and the results returned.
6159 **
6160 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and
6161 ** NULL returned.
6162 */
6163 static Fts5ExprNearset *sqlite3Fts5ParseNearset(
6164 Fts5Parse *pParse, /* Parse context */
6165 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
6166 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
6167 ){
6168 const int SZALLOC = 8;
6169 Fts5ExprNearset *pRet = 0;
6170
6171 if( pParse->rc==SQLITE_OK ){
6172 if( pPhrase==0 ){
6173 return pNear;
6174 }
6175 if( pNear==0 ){
6176 int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
6177 pRet = sqlite3_malloc(nByte);
6178 if( pRet==0 ){
6179 pParse->rc = SQLITE_NOMEM;
6180 }else{
6181 memset(pRet, 0, nByte);
6182 }
6183 }else if( (pNear->nPhrase % SZALLOC)==0 ){
6184 int nNew = pNear->nPhrase + SZALLOC;
6185 int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
6186
6187 pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte);
6188 if( pRet==0 ){
6189 pParse->rc = SQLITE_NOMEM;
6190 }
6191 }else{
6192 pRet = pNear;
6193 }
6194 }
6195
6196 if( pRet==0 ){
6197 assert( pParse->rc!=SQLITE_OK );
6198 sqlite3Fts5ParseNearsetFree(pNear);
6199 sqlite3Fts5ParsePhraseFree(pPhrase);
6200 }else{
6201 if( pRet->nPhrase>0 ){
6202 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
6203 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] );
6204 if( pPhrase->nTerm==0 ){
6205 fts5ExprPhraseFree(pPhrase);
6206 pRet->nPhrase--;
6207 pParse->nPhrase--;
6208 pPhrase = pLast;
6209 }else if( pLast->nTerm==0 ){
6210 fts5ExprPhraseFree(pLast);
6211 pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
6212 pParse->nPhrase--;
6213 pRet->nPhrase--;
6214 }
6215 }
6216 pRet->apPhrase[pRet->nPhrase++] = pPhrase;
6217 }
6218 return pRet;
6219 }
6220
6221 typedef struct TokenCtx TokenCtx;
6222 struct TokenCtx {
6223 Fts5ExprPhrase *pPhrase;
6224 int rc;
6225 };
6226
6227 /*
6228 ** Callback for tokenizing terms used by ParseTerm().
6229 */
6230 static int fts5ParseTokenize(
6231 void *pContext, /* Pointer to Fts5InsertCtx object */
6232 int tflags, /* Mask of FTS5_TOKEN_* flags */
6233 const char *pToken, /* Buffer containing token */
6234 int nToken, /* Size of token in bytes */
6235 int iUnused1, /* Start offset of token */
6236 int iUnused2 /* End offset of token */
6237 ){
6238 int rc = SQLITE_OK;
6239 const int SZALLOC = 8;
6240 TokenCtx *pCtx = (TokenCtx*)pContext;
6241 Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
6242
6243 UNUSED_PARAM2(iUnused1, iUnused2);
6244
6245 /* If an error has already occurred, this is a no-op */
6246 if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
6247 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
6248
6249 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
6250 Fts5ExprTerm *pSyn;
6251 int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
6252 pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
6253 if( pSyn==0 ){
6254 rc = SQLITE_NOMEM;
6255 }else{
6256 memset(pSyn, 0, nByte);
6257 pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
6258 memcpy(pSyn->zTerm, pToken, nToken);
6259 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
6260 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
6261 }
6262 }else{
6263 Fts5ExprTerm *pTerm;
6264 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
6265 Fts5ExprPhrase *pNew;
6266 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
6267
6268 pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
6269 sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
6270 );
6271 if( pNew==0 ){
6272 rc = SQLITE_NOMEM;
6273 }else{
6274 if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
6275 pCtx->pPhrase = pPhrase = pNew;
6276 pNew->nTerm = nNew - SZALLOC;
6277 }
6278 }
6279
6280 if( rc==SQLITE_OK ){
6281 pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
6282 memset(pTerm, 0, sizeof(Fts5ExprTerm));
6283 pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
6284 }
6285 }
6286
6287 pCtx->rc = rc;
6288 return rc;
6289 }
6290
6291
6292 /*
6293 ** Free the phrase object passed as the only argument.
6294 */
6295 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
6296 fts5ExprPhraseFree(pPhrase);
6297 }
6298
6299 /*
6300 ** Free the phrase object passed as the second argument.
6301 */
6302 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
6303 if( pNear ){
6304 int i;
6305 for(i=0; i<pNear->nPhrase; i++){
6306 fts5ExprPhraseFree(pNear->apPhrase[i]);
6307 }
6308 sqlite3_free(pNear->pColset);
6309 sqlite3_free(pNear);
6310 }
6311 }
6312
6313 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
6314 assert( pParse->pExpr==0 );
6315 pParse->pExpr = p;
6316 }
6317
6318 /*
6319 ** This function is called by the parser to process a string token. The
6320 ** string may or may not be quoted. In any case it is tokenized and a
6321 ** phrase object consisting of all tokens returned.
6322 */
6323 static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
6324 Fts5Parse *pParse, /* Parse context */
6325 Fts5ExprPhrase *pAppend, /* Phrase to append to */
6326 Fts5Token *pToken, /* String to tokenize */
6327 int bPrefix /* True if there is a trailing "*" */
6328 ){
6329 Fts5Config *pConfig = pParse->pConfig;
6330 TokenCtx sCtx; /* Context object passed to callback */
6331 int rc; /* Tokenize return code */
6332 char *z = 0;
6333
6334 memset(&sCtx, 0, sizeof(TokenCtx));
6335 sCtx.pPhrase = pAppend;
6336
6337 rc = fts5ParseStringFromToken(pToken, &z);
6338 if( rc==SQLITE_OK ){
6339 int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
6340 int n;
6341 sqlite3Fts5Dequote(z);
6342 n = (int)strlen(z);
6343 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
6344 }
6345 sqlite3_free(z);
6346 if( rc || (rc = sCtx.rc) ){
6347 pParse->rc = rc;
6348 fts5ExprPhraseFree(sCtx.pPhrase);
6349 sCtx.pPhrase = 0;
6350 }else{
6351
6352 if( pAppend==0 ){
6353 if( (pParse->nPhrase % 8)==0 ){
6354 int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
6355 Fts5ExprPhrase **apNew;
6356 apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte);
6357 if( apNew==0 ){
6358 pParse->rc = SQLITE_NOMEM;
6359 fts5ExprPhraseFree(sCtx.pPhrase);
6360 return 0;
6361 }
6362 pParse->apPhrase = apNew;
6363 }
6364 pParse->nPhrase++;
6365 }
6366
6367 if( sCtx.pPhrase==0 ){
6368 /* This happens when parsing a token or quoted phrase that contains
6369 ** no token characters at all. (e.g ... MATCH '""'). */
6370 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
6371 }else if( sCtx.pPhrase->nTerm ){
6372 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
6373 }
6374 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
6375 }
6376
6377 return sCtx.pPhrase;
6378 }
6379
6380 /*
6381 ** Create a new FTS5 expression by cloning phrase iPhrase of the
6382 ** expression passed as the second argument.
6383 */
6384 static int sqlite3Fts5ExprClonePhrase(
6385 Fts5Expr *pExpr,
6386 int iPhrase,
6387 Fts5Expr **ppNew
6388 ){
6389 int rc = SQLITE_OK; /* Return code */
6390 Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
6391 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
6392 TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
6393
6394 pOrig = pExpr->apExprPhrase[iPhrase];
6395 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
6396 if( rc==SQLITE_OK ){
6397 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
6398 sizeof(Fts5ExprPhrase*));
6399 }
6400 if( rc==SQLITE_OK ){
6401 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
6402 sizeof(Fts5ExprNode));
6403 }
6404 if( rc==SQLITE_OK ){
6405 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
6406 sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
6407 }
6408 if( rc==SQLITE_OK ){
6409 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
6410 if( pColsetOrig ){
6411 int nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int);
6412 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
6413 if( pColset ){
6414 memcpy(pColset, pColsetOrig, nByte);
6415 }
6416 pNew->pRoot->pNear->pColset = pColset;
6417 }
6418 }
6419
6420 if( pOrig->nTerm ){
6421 int i; /* Used to iterate through phrase terms */
6422 for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
6423 int tflags = 0;
6424 Fts5ExprTerm *p;
6425 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
6426 const char *zTerm = p->zTerm;
6427 rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
6428 0, 0);
6429 tflags = FTS5_TOKEN_COLOCATED;
6430 }
6431 if( rc==SQLITE_OK ){
6432 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
6433 }
6434 }
6435 }else{
6436 /* This happens when parsing a token or quoted phrase that contains
6437 ** no token characters at all. (e.g ... MATCH '""'). */
6438 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase));
6439 }
6440
6441 if( rc==SQLITE_OK ){
6442 /* All the allocations succeeded. Put the expression object together. */
6443 pNew->pIndex = pExpr->pIndex;
6444 pNew->pConfig = pExpr->pConfig;
6445 pNew->nPhrase = 1;
6446 pNew->apExprPhrase[0] = sCtx.pPhrase;
6447 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
6448 pNew->pRoot->pNear->nPhrase = 1;
6449 sCtx.pPhrase->pNode = pNew->pRoot;
6450
6451 if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
6452 pNew->pRoot->eType = FTS5_TERM;
6453 pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
6454 }else{
6455 pNew->pRoot->eType = FTS5_STRING;
6456 pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
6457 }
6458 }else{
6459 sqlite3Fts5ExprFree(pNew);
6460 fts5ExprPhraseFree(sCtx.pPhrase);
6461 pNew = 0;
6462 }
6463
6464 *ppNew = pNew;
6465 return rc;
6466 }
6467
6468
6469 /*
6470 ** Token pTok has appeared in a MATCH expression where the NEAR operator
6471 ** is expected. If token pTok does not contain "NEAR", store an error
6472 ** in the pParse object.
6473 */
6474 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
6475 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
6476 sqlite3Fts5ParseError(
6477 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
6478 );
6479 }
6480 }
6481
6482 static void sqlite3Fts5ParseSetDistance(
6483 Fts5Parse *pParse,
6484 Fts5ExprNearset *pNear,
6485 Fts5Token *p
6486 ){
6487 if( pNear ){
6488 int nNear = 0;
6489 int i;
6490 if( p->n ){
6491 for(i=0; i<p->n; i++){
6492 char c = (char)p->p[i];
6493 if( c<'0' || c>'9' ){
6494 sqlite3Fts5ParseError(
6495 pParse, "expected integer, got \"%.*s\"", p->n, p->p
6496 );
6497 return;
6498 }
6499 nNear = nNear * 10 + (p->p[i] - '0');
6500 }
6501 }else{
6502 nNear = FTS5_DEFAULT_NEARDIST;
6503 }
6504 pNear->nNear = nNear;
6505 }
6506 }
6507
6508 /*
6509 ** The second argument passed to this function may be NULL, or it may be
6510 ** an existing Fts5Colset object. This function returns a pointer to
6511 ** a new colset object containing the contents of (p) with new value column
6512 ** number iCol appended.
6513 **
6514 ** If an OOM error occurs, store an error code in pParse and return NULL.
6515 ** The old colset object (if any) is not freed in this case.
6516 */
6517 static Fts5Colset *fts5ParseColset(
6518 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
6519 Fts5Colset *p, /* Existing colset object */
6520 int iCol /* New column to add to colset object */
6521 ){
6522 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
6523 Fts5Colset *pNew; /* New colset object to return */
6524
6525 assert( pParse->rc==SQLITE_OK );
6526 assert( iCol>=0 && iCol<pParse->pConfig->nCol );
6527
6528 pNew = sqlite3_realloc(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
6529 if( pNew==0 ){
6530 pParse->rc = SQLITE_NOMEM;
6531 }else{
6532 int *aiCol = pNew->aiCol;
6533 int i, j;
6534 for(i=0; i<nCol; i++){
6535 if( aiCol[i]==iCol ) return pNew;
6536 if( aiCol[i]>iCol ) break;
6537 }
6538 for(j=nCol; j>i; j--){
6539 aiCol[j] = aiCol[j-1];
6540 }
6541 aiCol[i] = iCol;
6542 pNew->nCol = nCol+1;
6543
6544 #ifndef NDEBUG
6545 /* Check that the array is in order and contains no duplicate entries. */
6546 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
6547 #endif
6548 }
6549
6550 return pNew;
6551 }
6552
6553 /*
6554 ** Allocate and return an Fts5Colset object specifying the inverse of
6555 ** the colset passed as the second argument. Free the colset passed
6556 ** as the second argument before returning.
6557 */
6558 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p ){
6559 Fts5Colset *pRet;
6560 int nCol = pParse->pConfig->nCol;
6561
6562 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
6563 sizeof(Fts5Colset) + sizeof(int)*nCol
6564 );
6565 if( pRet ){
6566 int i;
6567 int iOld = 0;
6568 for(i=0; i<nCol; i++){
6569 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
6570 pRet->aiCol[pRet->nCol++] = i;
6571 }else{
6572 iOld++;
6573 }
6574 }
6575 }
6576
6577 sqlite3_free(p);
6578 return pRet;
6579 }
6580
6581 static Fts5Colset *sqlite3Fts5ParseColset(
6582 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
6583 Fts5Colset *pColset, /* Existing colset object */
6584 Fts5Token *p
6585 ){
6586 Fts5Colset *pRet = 0;
6587 int iCol;
6588 char *z; /* Dequoted copy of token p */
6589
6590 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
6591 if( pParse->rc==SQLITE_OK ){
6592 Fts5Config *pConfig = pParse->pConfig;
6593 sqlite3Fts5Dequote(z);
6594 for(iCol=0; iCol<pConfig->nCol; iCol++){
6595 if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
6596 }
6597 if( iCol==pConfig->nCol ){
6598 sqlite3Fts5ParseError(pParse, "no such column: %s", z);
6599 }else{
6600 pRet = fts5ParseColset(pParse, pColset, iCol);
6601 }
6602 sqlite3_free(z);
6603 }
6604
6605 if( pRet==0 ){
6606 assert( pParse->rc!=SQLITE_OK );
6607 sqlite3_free(pColset);
6608 }
6609
6610 return pRet;
6611 }
6612
6613 static void sqlite3Fts5ParseSetColset(
6614 Fts5Parse *pParse,
6615 Fts5ExprNearset *pNear,
6616 Fts5Colset *pColset
6617 ){
6618 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
6619 pParse->rc = SQLITE_ERROR;
6620 pParse->zErr = sqlite3_mprintf(
6621 "fts5: column queries are not supported (detail=none)"
6622 );
6623 sqlite3_free(pColset);
6624 return;
6625 }
6626
6627 if( pNear ){
6628 pNear->pColset = pColset;
6629 }else{
6630 sqlite3_free(pColset);
6631 }
6632 }
6633
6634 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
6635 switch( pNode->eType ){
6636 case FTS5_STRING: {
6637 Fts5ExprNearset *pNear = pNode->pNear;
6638 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
6639 && pNear->apPhrase[0]->aTerm[0].pSynonym==0
6640 ){
6641 pNode->eType = FTS5_TERM;
6642 pNode->xNext = fts5ExprNodeNext_TERM;
6643 }else{
6644 pNode->xNext = fts5ExprNodeNext_STRING;
6645 }
6646 break;
6647 };
6648
6649 case FTS5_OR: {
6650 pNode->xNext = fts5ExprNodeNext_OR;
6651 break;
6652 };
6653
6654 case FTS5_AND: {
6655 pNode->xNext = fts5ExprNodeNext_AND;
6656 break;
6657 };
6658
6659 default: assert( pNode->eType==FTS5_NOT ); {
6660 pNode->xNext = fts5ExprNodeNext_NOT;
6661 break;
6662 };
6663 }
6664 }
6665
6666 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
6667 if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
6668 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
6669 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
6670 p->nChild += pSub->nChild;
6671 sqlite3_free(pSub);
6672 }else{
6673 p->apChild[p->nChild++] = pSub;
6674 }
6675 }
6676
6677 /*
6678 ** Allocate and return a new expression object. If anything goes wrong (i.e.
6679 ** OOM error), leave an error code in pParse and return NULL.
6680 */
6681 static Fts5ExprNode *sqlite3Fts5ParseNode(
6682 Fts5Parse *pParse, /* Parse context */
6683 int eType, /* FTS5_STRING, AND, OR or NOT */
6684 Fts5ExprNode *pLeft, /* Left hand child expression */
6685 Fts5ExprNode *pRight, /* Right hand child expression */
6686 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
6687 ){
6688 Fts5ExprNode *pRet = 0;
6689
6690 if( pParse->rc==SQLITE_OK ){
6691 int nChild = 0; /* Number of children of returned node */
6692 int nByte; /* Bytes of space to allocate for this node */
6693
6694 assert( (eType!=FTS5_STRING && !pNear)
6695 || (eType==FTS5_STRING && !pLeft && !pRight)
6696 );
6697 if( eType==FTS5_STRING && pNear==0 ) return 0;
6698 if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
6699 if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
6700
6701 if( eType==FTS5_NOT ){
6702 nChild = 2;
6703 }else if( eType==FTS5_AND || eType==FTS5_OR ){
6704 nChild = 2;
6705 if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
6706 if( pRight->eType==eType ) nChild += pRight->nChild-1;
6707 }
6708
6709 nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
6710 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
6711
6712 if( pRet ){
6713 pRet->eType = eType;
6714 pRet->pNear = pNear;
6715 fts5ExprAssignXNext(pRet);
6716 if( eType==FTS5_STRING ){
6717 int iPhrase;
6718 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
6719 pNear->apPhrase[iPhrase]->pNode = pRet;
6720 if( pNear->apPhrase[iPhrase]->nTerm==0 ){
6721 pRet->xNext = 0;
6722 pRet->eType = FTS5_EOF;
6723 }
6724 }
6725
6726 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL
6727 && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm>1)
6728 ){
6729 assert( pParse->rc==SQLITE_OK );
6730 pParse->rc = SQLITE_ERROR;
6731 assert( pParse->zErr==0 );
6732 pParse->zErr = sqlite3_mprintf(
6733 "fts5: %s queries are not supported (detail!=full)",
6734 pNear->nPhrase==1 ? "phrase": "NEAR"
6735 );
6736 sqlite3_free(pRet);
6737 pRet = 0;
6738 }
6739
6740 }else{
6741 fts5ExprAddChildren(pRet, pLeft);
6742 fts5ExprAddChildren(pRet, pRight);
6743 }
6744 }
6745 }
6746
6747 if( pRet==0 ){
6748 assert( pParse->rc!=SQLITE_OK );
6749 sqlite3Fts5ParseNodeFree(pLeft);
6750 sqlite3Fts5ParseNodeFree(pRight);
6751 sqlite3Fts5ParseNearsetFree(pNear);
6752 }
6753 return pRet;
6754 }
6755
6756 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
6757 Fts5Parse *pParse, /* Parse context */
6758 Fts5ExprNode *pLeft, /* Left hand child expression */
6759 Fts5ExprNode *pRight /* Right hand child expression */
6760 ){
6761 Fts5ExprNode *pRet = 0;
6762 Fts5ExprNode *pPrev;
6763
6764 if( pParse->rc ){
6765 sqlite3Fts5ParseNodeFree(pLeft);
6766 sqlite3Fts5ParseNodeFree(pRight);
6767 }else{
6768
6769 assert( pLeft->eType==FTS5_STRING
6770 || pLeft->eType==FTS5_TERM
6771 || pLeft->eType==FTS5_EOF
6772 || pLeft->eType==FTS5_AND
6773 );
6774 assert( pRight->eType==FTS5_STRING
6775 || pRight->eType==FTS5_TERM
6776 || pRight->eType==FTS5_EOF
6777 );
6778
6779 if( pLeft->eType==FTS5_AND ){
6780 pPrev = pLeft->apChild[pLeft->nChild-1];
6781 }else{
6782 pPrev = pLeft;
6783 }
6784 assert( pPrev->eType==FTS5_STRING
6785 || pPrev->eType==FTS5_TERM
6786 || pPrev->eType==FTS5_EOF
6787 );
6788
6789 if( pRight->eType==FTS5_EOF ){
6790 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
6791 sqlite3Fts5ParseNodeFree(pRight);
6792 pRet = pLeft;
6793 pParse->nPhrase--;
6794 }
6795 else if( pPrev->eType==FTS5_EOF ){
6796 Fts5ExprPhrase **ap;
6797
6798 if( pPrev==pLeft ){
6799 pRet = pRight;
6800 }else{
6801 pLeft->apChild[pLeft->nChild-1] = pRight;
6802 pRet = pLeft;
6803 }
6804
6805 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
6806 assert( ap[0]==pPrev->pNear->apPhrase[0] );
6807 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
6808 pParse->nPhrase--;
6809
6810 sqlite3Fts5ParseNodeFree(pPrev);
6811 }
6812 else{
6813 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
6814 }
6815 }
6816
6817 return pRet;
6818 }
6819
6820 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
6821 int nByte = 0;
6822 Fts5ExprTerm *p;
6823 char *zQuoted;
6824
6825 /* Determine the maximum amount of space required. */
6826 for(p=pTerm; p; p=p->pSynonym){
6827 nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
6828 }
6829 zQuoted = sqlite3_malloc(nByte);
6830
6831 if( zQuoted ){
6832 int i = 0;
6833 for(p=pTerm; p; p=p->pSynonym){
6834 char *zIn = p->zTerm;
6835 zQuoted[i++] = '"';
6836 while( *zIn ){
6837 if( *zIn=='"' ) zQuoted[i++] = '"';
6838 zQuoted[i++] = *zIn++;
6839 }
6840 zQuoted[i++] = '"';
6841 if( p->pSynonym ) zQuoted[i++] = '|';
6842 }
6843 if( pTerm->bPrefix ){
6844 zQuoted[i++] = ' ';
6845 zQuoted[i++] = '*';
6846 }
6847 zQuoted[i++] = '\0';
6848 }
6849 return zQuoted;
6850 }
6851
6852 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
6853 char *zNew;
6854 va_list ap;
6855 va_start(ap, zFmt);
6856 zNew = sqlite3_vmprintf(zFmt, ap);
6857 va_end(ap);
6858 if( zApp && zNew ){
6859 char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
6860 sqlite3_free(zNew);
6861 zNew = zNew2;
6862 }
6863 sqlite3_free(zApp);
6864 return zNew;
6865 }
6866
6867 /*
6868 ** Compose a tcl-readable representation of expression pExpr. Return a
6869 ** pointer to a buffer containing that representation. It is the
6870 ** responsibility of the caller to at some point free the buffer using
6871 ** sqlite3_free().
6872 */
6873 static char *fts5ExprPrintTcl(
6874 Fts5Config *pConfig,
6875 const char *zNearsetCmd,
6876 Fts5ExprNode *pExpr
6877 ){
6878 char *zRet = 0;
6879 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
6880 Fts5ExprNearset *pNear = pExpr->pNear;
6881 int i;
6882 int iTerm;
6883
6884 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
6885 if( zRet==0 ) return 0;
6886 if( pNear->pColset ){
6887 int *aiCol = pNear->pColset->aiCol;
6888 int nCol = pNear->pColset->nCol;
6889 if( nCol==1 ){
6890 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
6891 }else{
6892 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
6893 for(i=1; i<pNear->pColset->nCol; i++){
6894 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
6895 }
6896 zRet = fts5PrintfAppend(zRet, "} ");
6897 }
6898 if( zRet==0 ) return 0;
6899 }
6900
6901 if( pNear->nPhrase>1 ){
6902 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
6903 if( zRet==0 ) return 0;
6904 }
6905
6906 zRet = fts5PrintfAppend(zRet, "--");
6907 if( zRet==0 ) return 0;
6908
6909 for(i=0; i<pNear->nPhrase; i++){
6910 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6911
6912 zRet = fts5PrintfAppend(zRet, " {");
6913 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
6914 char *zTerm = pPhrase->aTerm[iTerm].zTerm;
6915 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
6916 if( pPhrase->aTerm[iTerm].bPrefix ){
6917 zRet = fts5PrintfAppend(zRet, "*");
6918 }
6919 }
6920
6921 if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
6922 if( zRet==0 ) return 0;
6923 }
6924
6925 }else{
6926 char const *zOp = 0;
6927 int i;
6928 switch( pExpr->eType ){
6929 case FTS5_AND: zOp = "AND"; break;
6930 case FTS5_NOT: zOp = "NOT"; break;
6931 default:
6932 assert( pExpr->eType==FTS5_OR );
6933 zOp = "OR";
6934 break;
6935 }
6936
6937 zRet = sqlite3_mprintf("%s", zOp);
6938 for(i=0; zRet && i<pExpr->nChild; i++){
6939 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
6940 if( !z ){
6941 sqlite3_free(zRet);
6942 zRet = 0;
6943 }else{
6944 zRet = fts5PrintfAppend(zRet, " [%z]", z);
6945 }
6946 }
6947 }
6948
6949 return zRet;
6950 }
6951
6952 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
6953 char *zRet = 0;
6954 if( pExpr->eType==0 ){
6955 return sqlite3_mprintf("\"\"");
6956 }else
6957 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
6958 Fts5ExprNearset *pNear = pExpr->pNear;
6959 int i;
6960 int iTerm;
6961
6962 if( pNear->pColset ){
6963 int iCol = pNear->pColset->aiCol[0];
6964 zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]);
6965 if( zRet==0 ) return 0;
6966 }
6967
6968 if( pNear->nPhrase>1 ){
6969 zRet = fts5PrintfAppend(zRet, "NEAR(");
6970 if( zRet==0 ) return 0;
6971 }
6972
6973 for(i=0; i<pNear->nPhrase; i++){
6974 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6975 if( i!=0 ){
6976 zRet = fts5PrintfAppend(zRet, " ");
6977 if( zRet==0 ) return 0;
6978 }
6979 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
6980 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
6981 if( zTerm ){
6982 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
6983 sqlite3_free(zTerm);
6984 }
6985 if( zTerm==0 || zRet==0 ){
6986 sqlite3_free(zRet);
6987 return 0;
6988 }
6989 }
6990 }
6991
6992 if( pNear->nPhrase>1 ){
6993 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
6994 if( zRet==0 ) return 0;
6995 }
6996
6997 }else{
6998 char const *zOp = 0;
6999 int i;
7000
7001 switch( pExpr->eType ){
7002 case FTS5_AND: zOp = " AND "; break;
7003 case FTS5_NOT: zOp = " NOT "; break;
7004 default:
7005 assert( pExpr->eType==FTS5_OR );
7006 zOp = " OR ";
7007 break;
7008 }
7009
7010 for(i=0; i<pExpr->nChild; i++){
7011 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
7012 if( z==0 ){
7013 sqlite3_free(zRet);
7014 zRet = 0;
7015 }else{
7016 int e = pExpr->apChild[i]->eType;
7017 int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
7018 zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
7019 (i==0 ? "" : zOp),
7020 (b?"(":""), z, (b?")":"")
7021 );
7022 }
7023 if( zRet==0 ) break;
7024 }
7025 }
7026
7027 return zRet;
7028 }
7029
7030 /*
7031 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
7032 ** and fts5_expr_tcl() (bTcl!=0).
7033 */
7034 static void fts5ExprFunction(
7035 sqlite3_context *pCtx, /* Function call context */
7036 int nArg, /* Number of args */
7037 sqlite3_value **apVal, /* Function arguments */
7038 int bTcl
7039 ){
7040 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
7041 sqlite3 *db = sqlite3_context_db_handle(pCtx);
7042 const char *zExpr = 0;
7043 char *zErr = 0;
7044 Fts5Expr *pExpr = 0;
7045 int rc;
7046 int i;
7047
7048 const char **azConfig; /* Array of arguments for Fts5Config */
7049 const char *zNearsetCmd = "nearset";
7050 int nConfig; /* Size of azConfig[] */
7051 Fts5Config *pConfig = 0;
7052 int iArg = 1;
7053
7054 if( nArg<1 ){
7055 zErr = sqlite3_mprintf("wrong number of arguments to function %s",
7056 bTcl ? "fts5_expr_tcl" : "fts5_expr"
7057 );
7058 sqlite3_result_error(pCtx, zErr, -1);
7059 sqlite3_free(zErr);
7060 return;
7061 }
7062
7063 if( bTcl && nArg>1 ){
7064 zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
7065 iArg = 2;
7066 }
7067
7068 nConfig = 3 + (nArg-iArg);
7069 azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig);
7070 if( azConfig==0 ){
7071 sqlite3_result_error_nomem(pCtx);
7072 return;
7073 }
7074 azConfig[0] = 0;
7075 azConfig[1] = "main";
7076 azConfig[2] = "tbl";
7077 for(i=3; iArg<nArg; iArg++){
7078 azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]);
7079 }
7080
7081 zExpr = (const char*)sqlite3_value_text(apVal[0]);
7082
7083 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
7084 if( rc==SQLITE_OK ){
7085 rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr);
7086 }
7087 if( rc==SQLITE_OK ){
7088 char *zText;
7089 if( pExpr->pRoot->xNext==0 ){
7090 zText = sqlite3_mprintf("");
7091 }else if( bTcl ){
7092 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
7093 }else{
7094 zText = fts5ExprPrint(pConfig, pExpr->pRoot);
7095 }
7096 if( zText==0 ){
7097 rc = SQLITE_NOMEM;
7098 }else{
7099 sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
7100 sqlite3_free(zText);
7101 }
7102 }
7103
7104 if( rc!=SQLITE_OK ){
7105 if( zErr ){
7106 sqlite3_result_error(pCtx, zErr, -1);
7107 sqlite3_free(zErr);
7108 }else{
7109 sqlite3_result_error_code(pCtx, rc);
7110 }
7111 }
7112 sqlite3_free((void *)azConfig);
7113 sqlite3Fts5ConfigFree(pConfig);
7114 sqlite3Fts5ExprFree(pExpr);
7115 }
7116
7117 static void fts5ExprFunctionHr(
7118 sqlite3_context *pCtx, /* Function call context */
7119 int nArg, /* Number of args */
7120 sqlite3_value **apVal /* Function arguments */
7121 ){
7122 fts5ExprFunction(pCtx, nArg, apVal, 0);
7123 }
7124 static void fts5ExprFunctionTcl(
7125 sqlite3_context *pCtx, /* Function call context */
7126 int nArg, /* Number of args */
7127 sqlite3_value **apVal /* Function arguments */
7128 ){
7129 fts5ExprFunction(pCtx, nArg, apVal, 1);
7130 }
7131
7132 /*
7133 ** The implementation of an SQLite user-defined-function that accepts a
7134 ** single integer as an argument. If the integer is an alpha-numeric
7135 ** unicode code point, 1 is returned. Otherwise 0.
7136 */
7137 static void fts5ExprIsAlnum(
7138 sqlite3_context *pCtx, /* Function call context */
7139 int nArg, /* Number of args */
7140 sqlite3_value **apVal /* Function arguments */
7141 ){
7142 int iCode;
7143 if( nArg!=1 ){
7144 sqlite3_result_error(pCtx,
7145 "wrong number of arguments to function fts5_isalnum", -1
7146 );
7147 return;
7148 }
7149 iCode = sqlite3_value_int(apVal[0]);
7150 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode));
7151 }
7152
7153 static void fts5ExprFold(
7154 sqlite3_context *pCtx, /* Function call context */
7155 int nArg, /* Number of args */
7156 sqlite3_value **apVal /* Function arguments */
7157 ){
7158 if( nArg!=1 && nArg!=2 ){
7159 sqlite3_result_error(pCtx,
7160 "wrong number of arguments to function fts5_fold", -1
7161 );
7162 }else{
7163 int iCode;
7164 int bRemoveDiacritics = 0;
7165 iCode = sqlite3_value_int(apVal[0]);
7166 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
7167 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
7168 }
7169 }
7170
7171 /*
7172 ** This is called during initialization to register the fts5_expr() scalar
7173 ** UDF with the SQLite handle passed as the only argument.
7174 */
7175 static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
7176 struct Fts5ExprFunc {
7177 const char *z;
7178 void (*x)(sqlite3_context*,int,sqlite3_value**);
7179 } aFunc[] = {
7180 { "fts5_expr", fts5ExprFunctionHr },
7181 { "fts5_expr_tcl", fts5ExprFunctionTcl },
7182 { "fts5_isalnum", fts5ExprIsAlnum },
7183 { "fts5_fold", fts5ExprFold },
7184 };
7185 int i;
7186 int rc = SQLITE_OK;
7187 void *pCtx = (void*)pGlobal;
7188
7189 for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
7190 struct Fts5ExprFunc *p = &aFunc[i];
7191 rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
7192 }
7193
7194 /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */
7195 #ifndef NDEBUG
7196 (void)sqlite3Fts5ParserTrace;
7197 #endif
7198
7199 return rc;
7200 }
7201
7202 /*
7203 ** Return the number of phrases in expression pExpr.
7204 */
7205 static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
7206 return (pExpr ? pExpr->nPhrase : 0);
7207 }
7208
7209 /*
7210 ** Return the number of terms in the iPhrase'th phrase in pExpr.
7211 */
7212 static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
7213 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
7214 return pExpr->apExprPhrase[iPhrase]->nTerm;
7215 }
7216
7217 /*
7218 ** This function is used to access the current position list for phrase
7219 ** iPhrase.
7220 */
7221 static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
7222 int nRet;
7223 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
7224 Fts5ExprNode *pNode = pPhrase->pNode;
7225 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
7226 *pa = pPhrase->poslist.p;
7227 nRet = pPhrase->poslist.n;
7228 }else{
7229 *pa = 0;
7230 nRet = 0;
7231 }
7232 return nRet;
7233 }
7234
7235 struct Fts5PoslistPopulator {
7236 Fts5PoslistWriter writer;
7237 int bOk; /* True if ok to populate */
7238 int bMiss;
7239 };
7240
7241 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int b Live){
7242 Fts5PoslistPopulator *pRet;
7243 pRet = sqlite3_malloc(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7244 if( pRet ){
7245 int i;
7246 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7247 for(i=0; i<pExpr->nPhrase; i++){
7248 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
7249 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
7250 assert( pExpr->apExprPhrase[i]->nTerm==1 );
7251 if( bLive &&
7252 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
7253 ){
7254 pRet[i].bMiss = 1;
7255 }else{
7256 pBuf->n = 0;
7257 }
7258 }
7259 }
7260 return pRet;
7261 }
7262
7263 struct Fts5ExprCtx {
7264 Fts5Expr *pExpr;
7265 Fts5PoslistPopulator *aPopulator;
7266 i64 iOff;
7267 };
7268 typedef struct Fts5ExprCtx Fts5ExprCtx;
7269
7270 /*
7271 ** TODO: Make this more efficient!
7272 */
7273 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
7274 int i;
7275 for(i=0; i<pColset->nCol; i++){
7276 if( pColset->aiCol[i]==iCol ) return 1;
7277 }
7278 return 0;
7279 }
7280
7281 static int fts5ExprPopulatePoslistsCb(
7282 void *pCtx, /* Copy of 2nd argument to xTokenize() */
7283 int tflags, /* Mask of FTS5_TOKEN_* flags */
7284 const char *pToken, /* Pointer to buffer containing token */
7285 int nToken, /* Size of token in bytes */
7286 int iUnused1, /* Byte offset of token within input text */
7287 int iUnused2 /* Byte offset of end of token within input text */
7288 ){
7289 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
7290 Fts5Expr *pExpr = p->pExpr;
7291 int i;
7292
7293 UNUSED_PARAM2(iUnused1, iUnused2);
7294
7295 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
7296 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
7297 for(i=0; i<pExpr->nPhrase; i++){
7298 Fts5ExprTerm *pTerm;
7299 if( p->aPopulator[i].bOk==0 ) continue;
7300 for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
7301 int nTerm = (int)strlen(pTerm->zTerm);
7302 if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
7303 && memcmp(pTerm->zTerm, pToken, nTerm)==0
7304 ){
7305 int rc = sqlite3Fts5PoslistWriterAppend(
7306 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
7307 );
7308 if( rc ) return rc;
7309 break;
7310 }
7311 }
7312 }
7313 return SQLITE_OK;
7314 }
7315
7316 static int sqlite3Fts5ExprPopulatePoslists(
7317 Fts5Config *pConfig,
7318 Fts5Expr *pExpr,
7319 Fts5PoslistPopulator *aPopulator,
7320 int iCol,
7321 const char *z, int n
7322 ){
7323 int i;
7324 Fts5ExprCtx sCtx;
7325 sCtx.pExpr = pExpr;
7326 sCtx.aPopulator = aPopulator;
7327 sCtx.iOff = (((i64)iCol) << 32) - 1;
7328
7329 for(i=0; i<pExpr->nPhrase; i++){
7330 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
7331 Fts5Colset *pColset = pNode->pNear->pColset;
7332 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
7333 || aPopulator[i].bMiss
7334 ){
7335 aPopulator[i].bOk = 0;
7336 }else{
7337 aPopulator[i].bOk = 1;
7338 }
7339 }
7340
7341 return sqlite3Fts5Tokenize(pConfig,
7342 FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
7343 );
7344 }
7345
7346 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
7347 if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
7348 pNode->pNear->apPhrase[0]->poslist.n = 0;
7349 }else{
7350 int i;
7351 for(i=0; i<pNode->nChild; i++){
7352 fts5ExprClearPoslists(pNode->apChild[i]);
7353 }
7354 }
7355 }
7356
7357 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
7358 pNode->iRowid = iRowid;
7359 pNode->bEof = 0;
7360 switch( pNode->eType ){
7361 case FTS5_TERM:
7362 case FTS5_STRING:
7363 return (pNode->pNear->apPhrase[0]->poslist.n>0);
7364
7365 case FTS5_AND: {
7366 int i;
7367 for(i=0; i<pNode->nChild; i++){
7368 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
7369 fts5ExprClearPoslists(pNode);
7370 return 0;
7371 }
7372 }
7373 break;
7374 }
7375
7376 case FTS5_OR: {
7377 int i;
7378 int bRet = 0;
7379 for(i=0; i<pNode->nChild; i++){
7380 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
7381 bRet = 1;
7382 }
7383 }
7384 return bRet;
7385 }
7386
7387 default: {
7388 assert( pNode->eType==FTS5_NOT );
7389 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
7390 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
7391 ){
7392 fts5ExprClearPoslists(pNode);
7393 return 0;
7394 }
7395 break;
7396 }
7397 }
7398 return 1;
7399 }
7400
7401 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
7402 fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
7403 }
7404
7405 /*
7406 ** This function is only called for detail=columns tables.
7407 */
7408 static int sqlite3Fts5ExprPhraseCollist(
7409 Fts5Expr *pExpr,
7410 int iPhrase,
7411 const u8 **ppCollist,
7412 int *pnCollist
7413 ){
7414 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
7415 Fts5ExprNode *pNode = pPhrase->pNode;
7416 int rc = SQLITE_OK;
7417
7418 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
7419 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
7420
7421 if( pNode->bEof==0
7422 && pNode->iRowid==pExpr->pRoot->iRowid
7423 && pPhrase->poslist.n>0
7424 ){
7425 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
7426 if( pTerm->pSynonym ){
7427 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
7428 rc = fts5ExprSynonymList(
7429 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
7430 );
7431 }else{
7432 *ppCollist = pPhrase->aTerm[0].pIter->pData;
7433 *pnCollist = pPhrase->aTerm[0].pIter->nData;
7434 }
7435 }else{
7436 *ppCollist = 0;
7437 *pnCollist = 0;
7438 }
7439
7440 return rc;
7441 }
7442
7443
7444 /*
7445 ** 2014 August 11
7446 **
7447 ** The author disclaims copyright to this source code. In place of
7448 ** a legal notice, here is a blessing:
7449 **
7450 ** May you do good and not evil.
7451 ** May you find forgiveness for yourself and forgive others.
7452 ** May you share freely, never taking more than you give.
7453 **
7454 ******************************************************************************
7455 **
7456 */
7457
7458
7459
7460 /* #include "fts5Int.h" */
7461
7462 typedef struct Fts5HashEntry Fts5HashEntry;
7463
7464 /*
7465 ** This file contains the implementation of an in-memory hash table used
7466 ** to accumuluate "term -> doclist" content before it is flused to a level-0
7467 ** segment.
7468 */
7469
7470
7471 struct Fts5Hash {
7472 int eDetail; /* Copy of Fts5Config.eDetail */
7473 int *pnByte; /* Pointer to bytes counter */
7474 int nEntry; /* Number of entries currently in hash */
7475 int nSlot; /* Size of aSlot[] array */
7476 Fts5HashEntry *pScan; /* Current ordered scan item */
7477 Fts5HashEntry **aSlot; /* Array of hash slots */
7478 };
7479
7480 /*
7481 ** Each entry in the hash table is represented by an object of the
7482 ** following type. Each object, its key (zKey[]) and its current data
7483 ** are stored in a single memory allocation. The position list data
7484 ** immediately follows the key data in memory.
7485 **
7486 ** The data that follows the key is in a similar, but not identical format
7487 ** to the doclist data stored in the database. It is:
7488 **
7489 ** * Rowid, as a varint
7490 ** * Position list, without 0x00 terminator.
7491 ** * Size of previous position list and rowid, as a 4 byte
7492 ** big-endian integer.
7493 **
7494 ** iRowidOff:
7495 ** Offset of last rowid written to data area. Relative to first byte of
7496 ** structure.
7497 **
7498 ** nData:
7499 ** Bytes of data written since iRowidOff.
7500 */
7501 struct Fts5HashEntry {
7502 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
7503 Fts5HashEntry *pScanNext; /* Next entry in sorted order */
7504
7505 int nAlloc; /* Total size of allocation */
7506 int iSzPoslist; /* Offset of space for 4-byte poslist size */
7507 int nData; /* Total bytes of data (incl. structure) */
7508 int nKey; /* Length of zKey[] in bytes */
7509 u8 bDel; /* Set delete-flag @ iSzPoslist */
7510 u8 bContent; /* Set content-flag (detail=none mode) */
7511 i16 iCol; /* Column of last value written */
7512 int iPos; /* Position of last value written */
7513 i64 iRowid; /* Rowid of last value written */
7514 char zKey[8]; /* Nul-terminated entry key */
7515 };
7516
7517 /*
7518 ** Size of Fts5HashEntry without the zKey[] array.
7519 */
7520 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
7521
7522
7523
7524 /*
7525 ** Allocate a new hash table.
7526 */
7527 static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte ){
7528 int rc = SQLITE_OK;
7529 Fts5Hash *pNew;
7530
7531 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
7532 if( pNew==0 ){
7533 rc = SQLITE_NOMEM;
7534 }else{
7535 int nByte;
7536 memset(pNew, 0, sizeof(Fts5Hash));
7537 pNew->pnByte = pnByte;
7538 pNew->eDetail = pConfig->eDetail;
7539
7540 pNew->nSlot = 1024;
7541 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
7542 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
7543 if( pNew->aSlot==0 ){
7544 sqlite3_free(pNew);
7545 *ppNew = 0;
7546 rc = SQLITE_NOMEM;
7547 }else{
7548 memset(pNew->aSlot, 0, nByte);
7549 }
7550 }
7551 return rc;
7552 }
7553
7554 /*
7555 ** Free a hash table object.
7556 */
7557 static void sqlite3Fts5HashFree(Fts5Hash *pHash){
7558 if( pHash ){
7559 sqlite3Fts5HashClear(pHash);
7560 sqlite3_free(pHash->aSlot);
7561 sqlite3_free(pHash);
7562 }
7563 }
7564
7565 /*
7566 ** Empty (but do not delete) a hash table.
7567 */
7568 static void sqlite3Fts5HashClear(Fts5Hash *pHash){
7569 int i;
7570 for(i=0; i<pHash->nSlot; i++){
7571 Fts5HashEntry *pNext;
7572 Fts5HashEntry *pSlot;
7573 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
7574 pNext = pSlot->pHashNext;
7575 sqlite3_free(pSlot);
7576 }
7577 }
7578 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
7579 pHash->nEntry = 0;
7580 }
7581
7582 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
7583 int i;
7584 unsigned int h = 13;
7585 for(i=n-1; i>=0; i--){
7586 h = (h << 3) ^ h ^ p[i];
7587 }
7588 return (h % nSlot);
7589 }
7590
7591 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
7592 int i;
7593 unsigned int h = 13;
7594 for(i=n-1; i>=0; i--){
7595 h = (h << 3) ^ h ^ p[i];
7596 }
7597 h = (h << 3) ^ h ^ b;
7598 return (h % nSlot);
7599 }
7600
7601 /*
7602 ** Resize the hash table by doubling the number of slots.
7603 */
7604 static int fts5HashResize(Fts5Hash *pHash){
7605 int nNew = pHash->nSlot*2;
7606 int i;
7607 Fts5HashEntry **apNew;
7608 Fts5HashEntry **apOld = pHash->aSlot;
7609
7610 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
7611 if( !apNew ) return SQLITE_NOMEM;
7612 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
7613
7614 for(i=0; i<pHash->nSlot; i++){
7615 while( apOld[i] ){
7616 int iHash;
7617 Fts5HashEntry *p = apOld[i];
7618 apOld[i] = p->pHashNext;
7619 iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey));
7620 p->pHashNext = apNew[iHash];
7621 apNew[iHash] = p;
7622 }
7623 }
7624
7625 sqlite3_free(apOld);
7626 pHash->nSlot = nNew;
7627 pHash->aSlot = apNew;
7628 return SQLITE_OK;
7629 }
7630
7631 static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){
7632 if( p->iSzPoslist ){
7633 u8 *pPtr = (u8*)p;
7634 if( pHash->eDetail==FTS5_DETAIL_NONE ){
7635 assert( p->nData==p->iSzPoslist );
7636 if( p->bDel ){
7637 pPtr[p->nData++] = 0x00;
7638 if( p->bContent ){
7639 pPtr[p->nData++] = 0x00;
7640 }
7641 }
7642 }else{
7643 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
7644 int nPos = nSz*2 + p->bDel; /* Value of nPos field */
7645
7646 assert( p->bDel==0 || p->bDel==1 );
7647 if( nPos<=127 ){
7648 pPtr[p->iSzPoslist] = (u8)nPos;
7649 }else{
7650 int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
7651 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
7652 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
7653 p->nData += (nByte-1);
7654 }
7655 }
7656
7657 p->iSzPoslist = 0;
7658 p->bDel = 0;
7659 p->bContent = 0;
7660 }
7661 }
7662
7663 /*
7664 ** Add an entry to the in-memory hash table. The key is the concatenation
7665 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
7666 **
7667 ** (bByte || pToken) -> (iRowid,iCol,iPos)
7668 **
7669 ** Or, if iCol is negative, then the value is a delete marker.
7670 */
7671 static int sqlite3Fts5HashWrite(
7672 Fts5Hash *pHash,
7673 i64 iRowid, /* Rowid for this entry */
7674 int iCol, /* Column token appears in (-ve -> delete) */
7675 int iPos, /* Position of token within column */
7676 char bByte, /* First byte of token */
7677 const char *pToken, int nToken /* Token to add or remove to or from index */
7678 ){
7679 unsigned int iHash;
7680 Fts5HashEntry *p;
7681 u8 *pPtr;
7682 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
7683 int bNew; /* If non-delete entry should be written */
7684
7685 bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
7686
7687 /* Attempt to locate an existing hash entry */
7688 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
7689 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
7690 if( p->zKey[0]==bByte
7691 && p->nKey==nToken
7692 && memcmp(&p->zKey[1], pToken, nToken)==0
7693 ){
7694 break;
7695 }
7696 }
7697
7698 /* If an existing hash entry cannot be found, create a new one. */
7699 if( p==0 ){
7700 /* Figure out how much space to allocate */
7701 int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
7702 if( nByte<128 ) nByte = 128;
7703
7704 /* Grow the Fts5Hash.aSlot[] array if necessary. */
7705 if( (pHash->nEntry*2)>=pHash->nSlot ){
7706 int rc = fts5HashResize(pHash);
7707 if( rc!=SQLITE_OK ) return rc;
7708 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
7709 }
7710
7711 /* Allocate new Fts5HashEntry and add it to the hash table. */
7712 p = (Fts5HashEntry*)sqlite3_malloc(nByte);
7713 if( !p ) return SQLITE_NOMEM;
7714 memset(p, 0, FTS5_HASHENTRYSIZE);
7715 p->nAlloc = nByte;
7716 p->zKey[0] = bByte;
7717 memcpy(&p->zKey[1], pToken, nToken);
7718 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
7719 p->nKey = nToken;
7720 p->zKey[nToken+1] = '\0';
7721 p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
7722 p->pHashNext = pHash->aSlot[iHash];
7723 pHash->aSlot[iHash] = p;
7724 pHash->nEntry++;
7725
7726 /* Add the first rowid field to the hash-entry */
7727 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
7728 p->iRowid = iRowid;
7729
7730 p->iSzPoslist = p->nData;
7731 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
7732 p->nData += 1;
7733 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
7734 }
7735
7736 nIncr += p->nData;
7737 }else{
7738
7739 /* Appending to an existing hash-entry. Check that there is enough
7740 ** space to append the largest possible new entry. Worst case scenario
7741 ** is:
7742 **
7743 ** + 9 bytes for a new rowid,
7744 ** + 4 byte reserved for the "poslist size" varint.
7745 ** + 1 byte for a "new column" byte,
7746 ** + 3 bytes for a new column number (16-bit max) as a varint,
7747 ** + 5 bytes for the new position offset (32-bit max).
7748 */
7749 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
7750 int nNew = p->nAlloc * 2;
7751 Fts5HashEntry *pNew;
7752 Fts5HashEntry **pp;
7753 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
7754 if( pNew==0 ) return SQLITE_NOMEM;
7755 pNew->nAlloc = nNew;
7756 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
7757 *pp = pNew;
7758 p = pNew;
7759 }
7760 nIncr -= p->nData;
7761 }
7762 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
7763
7764 pPtr = (u8*)p;
7765
7766 /* If this is a new rowid, append the 4-byte size field for the previous
7767 ** entry, and the new rowid for this entry. */
7768 if( iRowid!=p->iRowid ){
7769 fts5HashAddPoslistSize(pHash, p);
7770 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
7771 p->iRowid = iRowid;
7772 bNew = 1;
7773 p->iSzPoslist = p->nData;
7774 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
7775 p->nData += 1;
7776 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
7777 p->iPos = 0;
7778 }
7779 }
7780
7781 if( iCol>=0 ){
7782 if( pHash->eDetail==FTS5_DETAIL_NONE ){
7783 p->bContent = 1;
7784 }else{
7785 /* Append a new column value, if necessary */
7786 assert( iCol>=p->iCol );
7787 if( iCol!=p->iCol ){
7788 if( pHash->eDetail==FTS5_DETAIL_FULL ){
7789 pPtr[p->nData++] = 0x01;
7790 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
7791 p->iCol = (i16)iCol;
7792 p->iPos = 0;
7793 }else{
7794 bNew = 1;
7795 p->iCol = (i16)(iPos = iCol);
7796 }
7797 }
7798
7799 /* Append the new position offset, if necessary */
7800 if( bNew ){
7801 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
7802 p->iPos = iPos;
7803 }
7804 }
7805 }else{
7806 /* This is a delete. Set the delete flag. */
7807 p->bDel = 1;
7808 }
7809
7810 nIncr += p->nData;
7811 *pHash->pnByte += nIncr;
7812 return SQLITE_OK;
7813 }
7814
7815
7816 /*
7817 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
7818 ** each sorted in key order. This function merges the two lists into a
7819 ** single list and returns a pointer to its first element.
7820 */
7821 static Fts5HashEntry *fts5HashEntryMerge(
7822 Fts5HashEntry *pLeft,
7823 Fts5HashEntry *pRight
7824 ){
7825 Fts5HashEntry *p1 = pLeft;
7826 Fts5HashEntry *p2 = pRight;
7827 Fts5HashEntry *pRet = 0;
7828 Fts5HashEntry **ppOut = &pRet;
7829
7830 while( p1 || p2 ){
7831 if( p1==0 ){
7832 *ppOut = p2;
7833 p2 = 0;
7834 }else if( p2==0 ){
7835 *ppOut = p1;
7836 p1 = 0;
7837 }else{
7838 int i = 0;
7839 while( p1->zKey[i]==p2->zKey[i] ) i++;
7840
7841 if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
7842 /* p2 is smaller */
7843 *ppOut = p2;
7844 ppOut = &p2->pScanNext;
7845 p2 = p2->pScanNext;
7846 }else{
7847 /* p1 is smaller */
7848 *ppOut = p1;
7849 ppOut = &p1->pScanNext;
7850 p1 = p1->pScanNext;
7851 }
7852 *ppOut = 0;
7853 }
7854 }
7855
7856 return pRet;
7857 }
7858
7859 /*
7860 ** Extract all tokens from hash table iHash and link them into a list
7861 ** in sorted order. The hash table is cleared before returning. It is
7862 ** the responsibility of the caller to free the elements of the returned
7863 ** list.
7864 */
7865 static int fts5HashEntrySort(
7866 Fts5Hash *pHash,
7867 const char *pTerm, int nTerm, /* Query prefix, if any */
7868 Fts5HashEntry **ppSorted
7869 ){
7870 const int nMergeSlot = 32;
7871 Fts5HashEntry **ap;
7872 Fts5HashEntry *pList;
7873 int iSlot;
7874 int i;
7875
7876 *ppSorted = 0;
7877 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
7878 if( !ap ) return SQLITE_NOMEM;
7879 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
7880
7881 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
7882 Fts5HashEntry *pIter;
7883 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
7884 if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
7885 Fts5HashEntry *pEntry = pIter;
7886 pEntry->pScanNext = 0;
7887 for(i=0; ap[i]; i++){
7888 pEntry = fts5HashEntryMerge(pEntry, ap[i]);
7889 ap[i] = 0;
7890 }
7891 ap[i] = pEntry;
7892 }
7893 }
7894 }
7895
7896 pList = 0;
7897 for(i=0; i<nMergeSlot; i++){
7898 pList = fts5HashEntryMerge(pList, ap[i]);
7899 }
7900
7901 pHash->nEntry = 0;
7902 sqlite3_free(ap);
7903 *ppSorted = pList;
7904 return SQLITE_OK;
7905 }
7906
7907 /*
7908 ** Query the hash table for a doclist associated with term pTerm/nTerm.
7909 */
7910 static int sqlite3Fts5HashQuery(
7911 Fts5Hash *pHash, /* Hash table to query */
7912 const char *pTerm, int nTerm, /* Query term */
7913 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
7914 int *pnDoclist /* OUT: Size of doclist in bytes */
7915 ){
7916 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
7917 Fts5HashEntry *p;
7918
7919 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
7920 if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
7921 }
7922
7923 if( p ){
7924 fts5HashAddPoslistSize(pHash, p);
7925 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
7926 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
7927 }else{
7928 *ppDoclist = 0;
7929 *pnDoclist = 0;
7930 }
7931
7932 return SQLITE_OK;
7933 }
7934
7935 static int sqlite3Fts5HashScanInit(
7936 Fts5Hash *p, /* Hash table to query */
7937 const char *pTerm, int nTerm /* Query prefix */
7938 ){
7939 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
7940 }
7941
7942 static void sqlite3Fts5HashScanNext(Fts5Hash *p){
7943 assert( !sqlite3Fts5HashScanEof(p) );
7944 p->pScan = p->pScan->pScanNext;
7945 }
7946
7947 static int sqlite3Fts5HashScanEof(Fts5Hash *p){
7948 return (p->pScan==0);
7949 }
7950
7951 static void sqlite3Fts5HashScanEntry(
7952 Fts5Hash *pHash,
7953 const char **pzTerm, /* OUT: term (nul-terminated) */
7954 const u8 **ppDoclist, /* OUT: pointer to doclist */
7955 int *pnDoclist /* OUT: size of doclist in bytes */
7956 ){
7957 Fts5HashEntry *p;
7958 if( (p = pHash->pScan) ){
7959 int nTerm = (int)strlen(p->zKey);
7960 fts5HashAddPoslistSize(pHash, p);
7961 *pzTerm = p->zKey;
7962 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
7963 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
7964 }else{
7965 *pzTerm = 0;
7966 *ppDoclist = 0;
7967 *pnDoclist = 0;
7968 }
7969 }
7970
7971
7972 /*
7973 ** 2014 May 31
7974 **
7975 ** The author disclaims copyright to this source code. In place of
7976 ** a legal notice, here is a blessing:
7977 **
7978 ** May you do good and not evil.
7979 ** May you find forgiveness for yourself and forgive others.
7980 ** May you share freely, never taking more than you give.
7981 **
7982 ******************************************************************************
7983 **
7984 ** Low level access to the FTS index stored in the database file. The
7985 ** routines in this file file implement all read and write access to the
7986 ** %_data table. Other parts of the system access this functionality via
7987 ** the interface defined in fts5Int.h.
7988 */
7989
7990
7991 /* #include "fts5Int.h" */
7992
7993 /*
7994 ** Overview:
7995 **
7996 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
7997 ** As well as the main term index, there may be up to 31 prefix indexes.
7998 ** The format is similar to FTS3/4, except that:
7999 **
8000 ** * all segment b-tree leaf data is stored in fixed size page records
8001 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
8002 ** taken to ensure it is possible to iterate in either direction through
8003 ** the entries in a doclist, or to seek to a specific entry within a
8004 ** doclist, without loading it into memory.
8005 **
8006 ** * large doclists that span many pages have associated "doclist index"
8007 ** records that contain a copy of the first rowid on each page spanned by
8008 ** the doclist. This is used to speed up seek operations, and merges of
8009 ** large doclists with very small doclists.
8010 **
8011 ** * extra fields in the "structure record" record the state of ongoing
8012 ** incremental merge operations.
8013 **
8014 */
8015
8016
8017 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
8018 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
8019
8020 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
8021
8022 #define FTS5_MAIN_PREFIX '0'
8023
8024 #if FTS5_MAX_PREFIX_INDEXES > 31
8025 # error "FTS5_MAX_PREFIX_INDEXES is too large"
8026 #endif
8027
8028 /*
8029 ** Details:
8030 **
8031 ** The %_data table managed by this module,
8032 **
8033 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
8034 **
8035 ** , contains the following 5 types of records. See the comments surrounding
8036 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
8037 ** assigned to each fo them.
8038 **
8039 ** 1. Structure Records:
8040 **
8041 ** The set of segments that make up an index - the index structure - are
8042 ** recorded in a single record within the %_data table. The record consists
8043 ** of a single 32-bit configuration cookie value followed by a list of
8044 ** SQLite varints. If the FTS table features more than one index (because
8045 ** there are one or more prefix indexes), it is guaranteed that all share
8046 ** the same cookie value.
8047 **
8048 ** Immediately following the configuration cookie, the record begins with
8049 ** three varints:
8050 **
8051 ** + number of levels,
8052 ** + total number of segments on all levels,
8053 ** + value of write counter.
8054 **
8055 ** Then, for each level from 0 to nMax:
8056 **
8057 ** + number of input segments in ongoing merge.
8058 ** + total number of segments in level.
8059 ** + for each segment from oldest to newest:
8060 ** + segment id (always > 0)
8061 ** + first leaf page number (often 1, always greater than 0)
8062 ** + final leaf page number
8063 **
8064 ** 2. The Averages Record:
8065 **
8066 ** A single record within the %_data table. The data is a list of varints.
8067 ** The first value is the number of rows in the index. Then, for each column
8068 ** from left to right, the total number of tokens in the column for all
8069 ** rows of the table.
8070 **
8071 ** 3. Segment leaves:
8072 **
8073 ** TERM/DOCLIST FORMAT:
8074 **
8075 ** Most of each segment leaf is taken up by term/doclist data. The
8076 ** general format of term/doclist, starting with the first term
8077 ** on the leaf page, is:
8078 **
8079 ** varint : size of first term
8080 ** blob: first term data
8081 ** doclist: first doclist
8082 ** zero-or-more {
8083 ** varint: number of bytes in common with previous term
8084 ** varint: number of bytes of new term data (nNew)
8085 ** blob: nNew bytes of new term data
8086 ** doclist: next doclist
8087 ** }
8088 **
8089 ** doclist format:
8090 **
8091 ** varint: first rowid
8092 ** poslist: first poslist
8093 ** zero-or-more {
8094 ** varint: rowid delta (always > 0)
8095 ** poslist: next poslist
8096 ** }
8097 **
8098 ** poslist format:
8099 **
8100 ** varint: size of poslist in bytes multiplied by 2, not including
8101 ** this field. Plus 1 if this entry carries the "delete" flag.
8102 ** collist: collist for column 0
8103 ** zero-or-more {
8104 ** 0x01 byte
8105 ** varint: column number (I)
8106 ** collist: collist for column I
8107 ** }
8108 **
8109 ** collist format:
8110 **
8111 ** varint: first offset + 2
8112 ** zero-or-more {
8113 ** varint: offset delta + 2
8114 ** }
8115 **
8116 ** PAGE FORMAT
8117 **
8118 ** Each leaf page begins with a 4-byte header containing 2 16-bit
8119 ** unsigned integer fields in big-endian format. They are:
8120 **
8121 ** * The byte offset of the first rowid on the page, if it exists
8122 ** and occurs before the first term (otherwise 0).
8123 **
8124 ** * The byte offset of the start of the page footer. If the page
8125 ** footer is 0 bytes in size, then this field is the same as the
8126 ** size of the leaf page in bytes.
8127 **
8128 ** The page footer consists of a single varint for each term located
8129 ** on the page. Each varint is the byte offset of the current term
8130 ** within the page, delta-compressed against the previous value. In
8131 ** other words, the first varint in the footer is the byte offset of
8132 ** the first term, the second is the byte offset of the second less that
8133 ** of the first, and so on.
8134 **
8135 ** The term/doclist format described above is accurate if the entire
8136 ** term/doclist data fits on a single leaf page. If this is not the case,
8137 ** the format is changed in two ways:
8138 **
8139 ** + if the first rowid on a page occurs before the first term, it
8140 ** is stored as a literal value:
8141 **
8142 ** varint: first rowid
8143 **
8144 ** + the first term on each page is stored in the same way as the
8145 ** very first term of the segment:
8146 **
8147 ** varint : size of first term
8148 ** blob: first term data
8149 **
8150 ** 5. Segment doclist indexes:
8151 **
8152 ** Doclist indexes are themselves b-trees, however they usually consist of
8153 ** a single leaf record only. The format of each doclist index leaf page
8154 ** is:
8155 **
8156 ** * Flags byte. Bits are:
8157 ** 0x01: Clear if leaf is also the root page, otherwise set.
8158 **
8159 ** * Page number of fts index leaf page. As a varint.
8160 **
8161 ** * First rowid on page indicated by previous field. As a varint.
8162 **
8163 ** * A list of varints, one for each subsequent termless page. A
8164 ** positive delta if the termless page contains at least one rowid,
8165 ** or an 0x00 byte otherwise.
8166 **
8167 ** Internal doclist index nodes are:
8168 **
8169 ** * Flags byte. Bits are:
8170 ** 0x01: Clear for root page, otherwise set.
8171 **
8172 ** * Page number of first child page. As a varint.
8173 **
8174 ** * Copy of first rowid on page indicated by previous field. As a varint.
8175 **
8176 ** * A list of delta-encoded varints - the first rowid on each subsequent
8177 ** child page.
8178 **
8179 */
8180
8181 /*
8182 ** Rowids for the averages and structure records in the %_data table.
8183 */
8184 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
8185 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
8186
8187 /*
8188 ** Macros determining the rowids used by segment leaves and dlidx leaves
8189 ** and nodes. All nodes and leaves are stored in the %_data table with large
8190 ** positive rowids.
8191 **
8192 ** Each segment has a unique non-zero 16-bit id.
8193 **
8194 ** The rowid for each segment leaf is found by passing the segment id and
8195 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
8196 ** sequentially starting from 1.
8197 */
8198 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
8199 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
8200 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
8201 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
8202
8203 #define fts5_dri(segid, dlidx, height, pgno) ( \
8204 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
8205 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
8206 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
8207 ((i64)(pgno)) \
8208 )
8209
8210 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
8211 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
8212
8213 /*
8214 ** Maximum segments permitted in a single index
8215 */
8216 #define FTS5_MAX_SEGMENT 2000
8217
8218 #ifdef SQLITE_DEBUG
8219 static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
8220 #endif
8221
8222
8223 /*
8224 ** Each time a blob is read from the %_data table, it is padded with this
8225 ** many zero bytes. This makes it easier to decode the various record formats
8226 ** without overreading if the records are corrupt.
8227 */
8228 #define FTS5_DATA_ZERO_PADDING 8
8229 #define FTS5_DATA_PADDING 20
8230
8231 typedef struct Fts5Data Fts5Data;
8232 typedef struct Fts5DlidxIter Fts5DlidxIter;
8233 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
8234 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
8235 typedef struct Fts5Iter Fts5Iter;
8236 typedef struct Fts5PageWriter Fts5PageWriter;
8237 typedef struct Fts5SegIter Fts5SegIter;
8238 typedef struct Fts5DoclistIter Fts5DoclistIter;
8239 typedef struct Fts5SegWriter Fts5SegWriter;
8240 typedef struct Fts5Structure Fts5Structure;
8241 typedef struct Fts5StructureLevel Fts5StructureLevel;
8242 typedef struct Fts5StructureSegment Fts5StructureSegment;
8243
8244 struct Fts5Data {
8245 u8 *p; /* Pointer to buffer containing record */
8246 int nn; /* Size of record in bytes */
8247 int szLeaf; /* Size of leaf without page-index */
8248 };
8249
8250 /*
8251 ** One object per %_data table.
8252 */
8253 struct Fts5Index {
8254 Fts5Config *pConfig; /* Virtual table configuration */
8255 char *zDataTbl; /* Name of %_data table */
8256 int nWorkUnit; /* Leaf pages in a "unit" of work */
8257
8258 /*
8259 ** Variables related to the accumulation of tokens and doclists within the
8260 ** in-memory hash tables before they are flushed to disk.
8261 */
8262 Fts5Hash *pHash; /* Hash table for in-memory data */
8263 int nPendingData; /* Current bytes of pending data */
8264 i64 iWriteRowid; /* Rowid for current doc being written */
8265 int bDelete; /* Current write is a delete */
8266
8267 /* Error state. */
8268 int rc; /* Current error code */
8269
8270 /* State used by the fts5DataXXX() functions. */
8271 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
8272 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
8273 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
8274 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
8275 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
8276 sqlite3_stmt *pIdxSelect;
8277 int nRead; /* Total number of blocks read */
8278
8279 sqlite3_stmt *pDataVersion;
8280 i64 iStructVersion; /* data_version when pStruct read */
8281 Fts5Structure *pStruct; /* Current db structure (or NULL) */
8282 };
8283
8284 struct Fts5DoclistIter {
8285 u8 *aEof; /* Pointer to 1 byte past end of doclist */
8286
8287 /* Output variables. aPoslist==0 at EOF */
8288 i64 iRowid;
8289 u8 *aPoslist;
8290 int nPoslist;
8291 int nSize;
8292 };
8293
8294 /*
8295 ** The contents of the "structure" record for each index are represented
8296 ** using an Fts5Structure record in memory. Which uses instances of the
8297 ** other Fts5StructureXXX types as components.
8298 */
8299 struct Fts5StructureSegment {
8300 int iSegid; /* Segment id */
8301 int pgnoFirst; /* First leaf page number in segment */
8302 int pgnoLast; /* Last leaf page number in segment */
8303 };
8304 struct Fts5StructureLevel {
8305 int nMerge; /* Number of segments in incr-merge */
8306 int nSeg; /* Total number of segments on level */
8307 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
8308 };
8309 struct Fts5Structure {
8310 int nRef; /* Object reference count */
8311 u64 nWriteCounter; /* Total leaves written to level 0 */
8312 int nSegment; /* Total segments in this structure */
8313 int nLevel; /* Number of levels in this index */
8314 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
8315 };
8316
8317 /*
8318 ** An object of type Fts5SegWriter is used to write to segments.
8319 */
8320 struct Fts5PageWriter {
8321 int pgno; /* Page number for this page */
8322 int iPrevPgidx; /* Previous value written into pgidx */
8323 Fts5Buffer buf; /* Buffer containing leaf data */
8324 Fts5Buffer pgidx; /* Buffer containing page-index */
8325 Fts5Buffer term; /* Buffer containing previous term on page */
8326 };
8327 struct Fts5DlidxWriter {
8328 int pgno; /* Page number for this page */
8329 int bPrevValid; /* True if iPrev is valid */
8330 i64 iPrev; /* Previous rowid value written to page */
8331 Fts5Buffer buf; /* Buffer containing page data */
8332 };
8333 struct Fts5SegWriter {
8334 int iSegid; /* Segid to write to */
8335 Fts5PageWriter writer; /* PageWriter object */
8336 i64 iPrevRowid; /* Previous rowid written to current leaf */
8337 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
8338 u8 bFirstRowidInPage; /* True if next rowid is first in page */
8339 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
8340 u8 bFirstTermInPage; /* True if next term will be first in leaf */
8341 int nLeafWritten; /* Number of leaf pages written */
8342 int nEmpty; /* Number of contiguous term-less nodes */
8343
8344 int nDlidx; /* Allocated size of aDlidx[] array */
8345 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
8346
8347 /* Values to insert into the %_idx table */
8348 Fts5Buffer btterm; /* Next term to insert into %_idx table */
8349 int iBtPage; /* Page number corresponding to btterm */
8350 };
8351
8352 typedef struct Fts5CResult Fts5CResult;
8353 struct Fts5CResult {
8354 u16 iFirst; /* aSeg[] index of firstest iterator */
8355 u8 bTermEq; /* True if the terms are equal */
8356 };
8357
8358 /*
8359 ** Object for iterating through a single segment, visiting each term/rowid
8360 ** pair in the segment.
8361 **
8362 ** pSeg:
8363 ** The segment to iterate through.
8364 **
8365 ** iLeafPgno:
8366 ** Current leaf page number within segment.
8367 **
8368 ** iLeafOffset:
8369 ** Byte offset within the current leaf that is the first byte of the
8370 ** position list data (one byte passed the position-list size field).
8371 ** rowid field of the current entry. Usually this is the size field of the
8372 ** position list data. The exception is if the rowid for the current entry
8373 ** is the last thing on the leaf page.
8374 **
8375 ** pLeaf:
8376 ** Buffer containing current leaf page data. Set to NULL at EOF.
8377 **
8378 ** iTermLeafPgno, iTermLeafOffset:
8379 ** Leaf page number containing the last term read from the segment. And
8380 ** the offset immediately following the term data.
8381 **
8382 ** flags:
8383 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
8384 **
8385 ** FTS5_SEGITER_ONETERM:
8386 ** If set, set the iterator to point to EOF after the current doclist
8387 ** has been exhausted. Do not proceed to the next term in the segment.
8388 **
8389 ** FTS5_SEGITER_REVERSE:
8390 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
8391 ** it is set, iterate through rowid in descending order instead of the
8392 ** default ascending order.
8393 **
8394 ** iRowidOffset/nRowidOffset/aRowidOffset:
8395 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
8396 **
8397 ** For each rowid on the page corresponding to the current term, the
8398 ** corresponding aRowidOffset[] entry is set to the byte offset of the
8399 ** start of the "position-list-size" field within the page.
8400 **
8401 ** iTermIdx:
8402 ** Index of current term on iTermLeafPgno.
8403 */
8404 struct Fts5SegIter {
8405 Fts5StructureSegment *pSeg; /* Segment to iterate through */
8406 int flags; /* Mask of configuration flags */
8407 int iLeafPgno; /* Current leaf page number */
8408 Fts5Data *pLeaf; /* Current leaf data */
8409 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
8410 int iLeafOffset; /* Byte offset within current leaf */
8411
8412 /* Next method */
8413 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
8414
8415 /* The page and offset from which the current term was read. The offset
8416 ** is the offset of the first rowid in the current doclist. */
8417 int iTermLeafPgno;
8418 int iTermLeafOffset;
8419
8420 int iPgidxOff; /* Next offset in pgidx */
8421 int iEndofDoclist;
8422
8423 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
8424 int iRowidOffset; /* Current entry in aRowidOffset[] */
8425 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
8426 int *aRowidOffset; /* Array of offset to rowid fields */
8427
8428 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
8429
8430 /* Variables populated based on current entry. */
8431 Fts5Buffer term; /* Current term */
8432 i64 iRowid; /* Current rowid */
8433 int nPos; /* Number of bytes in current position list */
8434 u8 bDel; /* True if the delete flag is set */
8435 };
8436
8437 /*
8438 ** Argument is a pointer to an Fts5Data structure that contains a
8439 ** leaf page.
8440 */
8441 #define ASSERT_SZLEAF_OK(x) assert( \
8442 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
8443 )
8444
8445 #define FTS5_SEGITER_ONETERM 0x01
8446 #define FTS5_SEGITER_REVERSE 0x02
8447
8448 /*
8449 ** Argument is a pointer to an Fts5Data structure that contains a leaf
8450 ** page. This macro evaluates to true if the leaf contains no terms, or
8451 ** false if it contains at least one term.
8452 */
8453 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
8454
8455 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
8456
8457 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
8458
8459 /*
8460 ** Object for iterating through the merged results of one or more segments,
8461 ** visiting each term/rowid pair in the merged data.
8462 **
8463 ** nSeg is always a power of two greater than or equal to the number of
8464 ** segments that this object is merging data from. Both the aSeg[] and
8465 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
8466 ** with zeroed objects - these are handled as if they were iterators opened
8467 ** on empty segments.
8468 **
8469 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
8470 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
8471 ** comparison in this context is the index of the iterator that currently
8472 ** points to the smaller term/rowid combination. Iterators at EOF are
8473 ** considered to be greater than all other iterators.
8474 **
8475 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
8476 ** the smallest key overall. aFirst[0] is unused.
8477 **
8478 ** poslist:
8479 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
8480 ** There is no way to tell if this is populated or not.
8481 */
8482 struct Fts5Iter {
8483 Fts5IndexIter base; /* Base class containing output vars */
8484
8485 Fts5Index *pIndex; /* Index that owns this iterator */
8486 Fts5Structure *pStruct; /* Database structure for this iterator */
8487 Fts5Buffer poslist; /* Buffer containing current poslist */
8488 Fts5Colset *pColset; /* Restrict matches to these columns */
8489
8490 /* Invoked to set output variables. */
8491 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
8492
8493 int nSeg; /* Size of aSeg[] array */
8494 int bRev; /* True to iterate in reverse order */
8495 u8 bSkipEmpty; /* True to skip deleted entries */
8496
8497 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
8498 Fts5CResult *aFirst; /* Current merge state (see above) */
8499 Fts5SegIter aSeg[1]; /* Array of segment iterators */
8500 };
8501
8502
8503 /*
8504 ** An instance of the following type is used to iterate through the contents
8505 ** of a doclist-index record.
8506 **
8507 ** pData:
8508 ** Record containing the doclist-index data.
8509 **
8510 ** bEof:
8511 ** Set to true once iterator has reached EOF.
8512 **
8513 ** iOff:
8514 ** Set to the current offset within record pData.
8515 */
8516 struct Fts5DlidxLvl {
8517 Fts5Data *pData; /* Data for current page of this level */
8518 int iOff; /* Current offset into pData */
8519 int bEof; /* At EOF already */
8520 int iFirstOff; /* Used by reverse iterators */
8521
8522 /* Output variables */
8523 int iLeafPgno; /* Page number of current leaf page */
8524 i64 iRowid; /* First rowid on leaf iLeafPgno */
8525 };
8526 struct Fts5DlidxIter {
8527 int nLvl;
8528 int iSegid;
8529 Fts5DlidxLvl aLvl[1];
8530 };
8531
8532 static void fts5PutU16(u8 *aOut, u16 iVal){
8533 aOut[0] = (iVal>>8);
8534 aOut[1] = (iVal&0xFF);
8535 }
8536
8537 static u16 fts5GetU16(const u8 *aIn){
8538 return ((u16)aIn[0] << 8) + aIn[1];
8539 }
8540
8541 /*
8542 ** Allocate and return a buffer at least nByte bytes in size.
8543 **
8544 ** If an OOM error is encountered, return NULL and set the error code in
8545 ** the Fts5Index handle passed as the first argument.
8546 */
8547 static void *fts5IdxMalloc(Fts5Index *p, int nByte){
8548 return sqlite3Fts5MallocZero(&p->rc, nByte);
8549 }
8550
8551 /*
8552 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
8553 **
8554 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
8555 ** +ve if pRight is smaller than pLeft. In other words:
8556 **
8557 ** res = *pLeft - *pRight
8558 */
8559 #ifdef SQLITE_DEBUG
8560 static int fts5BufferCompareBlob(
8561 Fts5Buffer *pLeft, /* Left hand side of comparison */
8562 const u8 *pRight, int nRight /* Right hand side of comparison */
8563 ){
8564 int nCmp = MIN(pLeft->n, nRight);
8565 int res = memcmp(pLeft->p, pRight, nCmp);
8566 return (res==0 ? (pLeft->n - nRight) : res);
8567 }
8568 #endif
8569
8570 /*
8571 ** Compare the contents of the two buffers using memcmp(). If one buffer
8572 ** is a prefix of the other, it is considered the lesser.
8573 **
8574 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
8575 ** +ve if pRight is smaller than pLeft. In other words:
8576 **
8577 ** res = *pLeft - *pRight
8578 */
8579 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
8580 int nCmp = MIN(pLeft->n, pRight->n);
8581 int res = memcmp(pLeft->p, pRight->p, nCmp);
8582 return (res==0 ? (pLeft->n - pRight->n) : res);
8583 }
8584
8585 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
8586 int ret;
8587 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
8588 return ret;
8589 }
8590
8591 /*
8592 ** Close the read-only blob handle, if it is open.
8593 */
8594 static void fts5CloseReader(Fts5Index *p){
8595 if( p->pReader ){
8596 sqlite3_blob *pReader = p->pReader;
8597 p->pReader = 0;
8598 sqlite3_blob_close(pReader);
8599 }
8600 }
8601
8602
8603 /*
8604 ** Retrieve a record from the %_data table.
8605 **
8606 ** If an error occurs, NULL is returned and an error left in the
8607 ** Fts5Index object.
8608 */
8609 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
8610 Fts5Data *pRet = 0;
8611 if( p->rc==SQLITE_OK ){
8612 int rc = SQLITE_OK;
8613
8614 if( p->pReader ){
8615 /* This call may return SQLITE_ABORT if there has been a savepoint
8616 ** rollback since it was last used. In this case a new blob handle
8617 ** is required. */
8618 sqlite3_blob *pBlob = p->pReader;
8619 p->pReader = 0;
8620 rc = sqlite3_blob_reopen(pBlob, iRowid);
8621 assert( p->pReader==0 );
8622 p->pReader = pBlob;
8623 if( rc!=SQLITE_OK ){
8624 fts5CloseReader(p);
8625 }
8626 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
8627 }
8628
8629 /* If the blob handle is not open at this point, open it and seek
8630 ** to the requested entry. */
8631 if( p->pReader==0 && rc==SQLITE_OK ){
8632 Fts5Config *pConfig = p->pConfig;
8633 rc = sqlite3_blob_open(pConfig->db,
8634 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
8635 );
8636 }
8637
8638 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
8639 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
8640 ** All the reasons those functions might return SQLITE_ERROR - missing
8641 ** table, missing row, non-blob/text in block column - indicate
8642 ** backing store corruption. */
8643 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
8644
8645 if( rc==SQLITE_OK ){
8646 u8 *aOut = 0; /* Read blob data into this buffer */
8647 int nByte = sqlite3_blob_bytes(p->pReader);
8648 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
8649 pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
8650 if( pRet ){
8651 pRet->nn = nByte;
8652 aOut = pRet->p = (u8*)&pRet[1];
8653 }else{
8654 rc = SQLITE_NOMEM;
8655 }
8656
8657 if( rc==SQLITE_OK ){
8658 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
8659 }
8660 if( rc!=SQLITE_OK ){
8661 sqlite3_free(pRet);
8662 pRet = 0;
8663 }else{
8664 /* TODO1: Fix this */
8665 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
8666 }
8667 }
8668 p->rc = rc;
8669 p->nRead++;
8670 }
8671
8672 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
8673 return pRet;
8674 }
8675
8676 /*
8677 ** Release a reference to data record returned by an earlier call to
8678 ** fts5DataRead().
8679 */
8680 static void fts5DataRelease(Fts5Data *pData){
8681 sqlite3_free(pData);
8682 }
8683
8684 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
8685 Fts5Data *pRet = fts5DataRead(p, iRowid);
8686 if( pRet ){
8687 if( pRet->szLeaf>pRet->nn ){
8688 p->rc = FTS5_CORRUPT;
8689 fts5DataRelease(pRet);
8690 pRet = 0;
8691 }
8692 }
8693 return pRet;
8694 }
8695
8696 static int fts5IndexPrepareStmt(
8697 Fts5Index *p,
8698 sqlite3_stmt **ppStmt,
8699 char *zSql
8700 ){
8701 if( p->rc==SQLITE_OK ){
8702 if( zSql ){
8703 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
8704 }else{
8705 p->rc = SQLITE_NOMEM;
8706 }
8707 }
8708 sqlite3_free(zSql);
8709 return p->rc;
8710 }
8711
8712
8713 /*
8714 ** INSERT OR REPLACE a record into the %_data table.
8715 */
8716 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
8717 if( p->rc!=SQLITE_OK ) return;
8718
8719 if( p->pWriter==0 ){
8720 Fts5Config *pConfig = p->pConfig;
8721 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
8722 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
8723 pConfig->zDb, pConfig->zName
8724 ));
8725 if( p->rc ) return;
8726 }
8727
8728 sqlite3_bind_int64(p->pWriter, 1, iRowid);
8729 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
8730 sqlite3_step(p->pWriter);
8731 p->rc = sqlite3_reset(p->pWriter);
8732 }
8733
8734 /*
8735 ** Execute the following SQL:
8736 **
8737 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
8738 */
8739 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
8740 if( p->rc!=SQLITE_OK ) return;
8741
8742 if( p->pDeleter==0 ){
8743 int rc;
8744 Fts5Config *pConfig = p->pConfig;
8745 char *zSql = sqlite3_mprintf(
8746 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
8747 pConfig->zDb, pConfig->zName
8748 );
8749 if( zSql==0 ){
8750 rc = SQLITE_NOMEM;
8751 }else{
8752 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0);
8753 sqlite3_free(zSql);
8754 }
8755 if( rc!=SQLITE_OK ){
8756 p->rc = rc;
8757 return;
8758 }
8759 }
8760
8761 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
8762 sqlite3_bind_int64(p->pDeleter, 2, iLast);
8763 sqlite3_step(p->pDeleter);
8764 p->rc = sqlite3_reset(p->pDeleter);
8765 }
8766
8767 /*
8768 ** Remove all records associated with segment iSegid.
8769 */
8770 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
8771 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
8772 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
8773 fts5DataDelete(p, iFirst, iLast);
8774 if( p->pIdxDeleter==0 ){
8775 Fts5Config *pConfig = p->pConfig;
8776 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
8777 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
8778 pConfig->zDb, pConfig->zName
8779 ));
8780 }
8781 if( p->rc==SQLITE_OK ){
8782 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
8783 sqlite3_step(p->pIdxDeleter);
8784 p->rc = sqlite3_reset(p->pIdxDeleter);
8785 }
8786 }
8787
8788 /*
8789 ** Release a reference to an Fts5Structure object returned by an earlier
8790 ** call to fts5StructureRead() or fts5StructureDecode().
8791 */
8792 static void fts5StructureRelease(Fts5Structure *pStruct){
8793 if( pStruct && 0>=(--pStruct->nRef) ){
8794 int i;
8795 assert( pStruct->nRef==0 );
8796 for(i=0; i<pStruct->nLevel; i++){
8797 sqlite3_free(pStruct->aLevel[i].aSeg);
8798 }
8799 sqlite3_free(pStruct);
8800 }
8801 }
8802
8803 static void fts5StructureRef(Fts5Structure *pStruct){
8804 pStruct->nRef++;
8805 }
8806
8807 /*
8808 ** Deserialize and return the structure record currently stored in serialized
8809 ** form within buffer pData/nData.
8810 **
8811 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
8812 ** are over-allocated by one slot. This allows the structure contents
8813 ** to be more easily edited.
8814 **
8815 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
8816 ** returned. Otherwise, *ppOut is set to point to the new object and
8817 ** SQLITE_OK returned.
8818 */
8819 static int fts5StructureDecode(
8820 const u8 *pData, /* Buffer containing serialized structure */
8821 int nData, /* Size of buffer pData in bytes */
8822 int *piCookie, /* Configuration cookie value */
8823 Fts5Structure **ppOut /* OUT: Deserialized object */
8824 ){
8825 int rc = SQLITE_OK;
8826 int i = 0;
8827 int iLvl;
8828 int nLevel = 0;
8829 int nSegment = 0;
8830 int nByte; /* Bytes of space to allocate at pRet */
8831 Fts5Structure *pRet = 0; /* Structure object to return */
8832
8833 /* Grab the cookie value */
8834 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
8835 i = 4;
8836
8837 /* Read the total number of levels and segments from the start of the
8838 ** structure record. */
8839 i += fts5GetVarint32(&pData[i], nLevel);
8840 i += fts5GetVarint32(&pData[i], nSegment);
8841 nByte = (
8842 sizeof(Fts5Structure) + /* Main structure */
8843 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
8844 );
8845 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
8846
8847 if( pRet ){
8848 pRet->nRef = 1;
8849 pRet->nLevel = nLevel;
8850 pRet->nSegment = nSegment;
8851 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
8852
8853 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
8854 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
8855 int nTotal = 0;
8856 int iSeg;
8857
8858 if( i>=nData ){
8859 rc = FTS5_CORRUPT;
8860 }else{
8861 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
8862 i += fts5GetVarint32(&pData[i], nTotal);
8863 assert( nTotal>=pLvl->nMerge );
8864 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
8865 nTotal * sizeof(Fts5StructureSegment)
8866 );
8867 }
8868
8869 if( rc==SQLITE_OK ){
8870 pLvl->nSeg = nTotal;
8871 for(iSeg=0; iSeg<nTotal; iSeg++){
8872 if( i>=nData ){
8873 rc = FTS5_CORRUPT;
8874 break;
8875 }
8876 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
8877 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
8878 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
8879 }
8880 }
8881 }
8882 if( rc!=SQLITE_OK ){
8883 fts5StructureRelease(pRet);
8884 pRet = 0;
8885 }
8886 }
8887
8888 *ppOut = pRet;
8889 return rc;
8890 }
8891
8892 /*
8893 **
8894 */
8895 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
8896 if( *pRc==SQLITE_OK ){
8897 Fts5Structure *pStruct = *ppStruct;
8898 int nLevel = pStruct->nLevel;
8899 int nByte = (
8900 sizeof(Fts5Structure) + /* Main structure */
8901 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
8902 );
8903
8904 pStruct = sqlite3_realloc(pStruct, nByte);
8905 if( pStruct ){
8906 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
8907 pStruct->nLevel++;
8908 *ppStruct = pStruct;
8909 }else{
8910 *pRc = SQLITE_NOMEM;
8911 }
8912 }
8913 }
8914
8915 /*
8916 ** Extend level iLvl so that there is room for at least nExtra more
8917 ** segments.
8918 */
8919 static void fts5StructureExtendLevel(
8920 int *pRc,
8921 Fts5Structure *pStruct,
8922 int iLvl,
8923 int nExtra,
8924 int bInsert
8925 ){
8926 if( *pRc==SQLITE_OK ){
8927 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
8928 Fts5StructureSegment *aNew;
8929 int nByte;
8930
8931 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
8932 aNew = sqlite3_realloc(pLvl->aSeg, nByte);
8933 if( aNew ){
8934 if( bInsert==0 ){
8935 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
8936 }else{
8937 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
8938 memmove(&aNew[nExtra], aNew, nMove);
8939 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
8940 }
8941 pLvl->aSeg = aNew;
8942 }else{
8943 *pRc = SQLITE_NOMEM;
8944 }
8945 }
8946 }
8947
8948 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
8949 Fts5Structure *pRet = 0;
8950 Fts5Config *pConfig = p->pConfig;
8951 int iCookie; /* Configuration cookie */
8952 Fts5Data *pData;
8953
8954 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
8955 if( p->rc==SQLITE_OK ){
8956 /* TODO: Do we need this if the leaf-index is appended? Probably... */
8957 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
8958 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
8959 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
8960 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
8961 }
8962 fts5DataRelease(pData);
8963 if( p->rc!=SQLITE_OK ){
8964 fts5StructureRelease(pRet);
8965 pRet = 0;
8966 }
8967 }
8968
8969 return pRet;
8970 }
8971
8972 static i64 fts5IndexDataVersion(Fts5Index *p){
8973 i64 iVersion = 0;
8974
8975 if( p->rc==SQLITE_OK ){
8976 if( p->pDataVersion==0 ){
8977 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
8978 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
8979 );
8980 if( p->rc ) return 0;
8981 }
8982
8983 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
8984 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
8985 }
8986 p->rc = sqlite3_reset(p->pDataVersion);
8987 }
8988
8989 return iVersion;
8990 }
8991
8992 /*
8993 ** Read, deserialize and return the structure record.
8994 **
8995 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
8996 ** are over-allocated as described for function fts5StructureDecode()
8997 ** above.
8998 **
8999 ** If an error occurs, NULL is returned and an error code left in the
9000 ** Fts5Index handle. If an error has already occurred when this function
9001 ** is called, it is a no-op.
9002 */
9003 static Fts5Structure *fts5StructureRead(Fts5Index *p){
9004
9005 if( p->pStruct==0 ){
9006 p->iStructVersion = fts5IndexDataVersion(p);
9007 if( p->rc==SQLITE_OK ){
9008 p->pStruct = fts5StructureReadUncached(p);
9009 }
9010 }
9011
9012 #if 0
9013 else{
9014 Fts5Structure *pTest = fts5StructureReadUncached(p);
9015 if( pTest ){
9016 int i, j;
9017 assert_nc( p->pStruct->nSegment==pTest->nSegment );
9018 assert_nc( p->pStruct->nLevel==pTest->nLevel );
9019 for(i=0; i<pTest->nLevel; i++){
9020 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
9021 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
9022 for(j=0; j<pTest->aLevel[i].nSeg; j++){
9023 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
9024 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
9025 assert_nc( p1->iSegid==p2->iSegid );
9026 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
9027 assert_nc( p1->pgnoLast==p2->pgnoLast );
9028 }
9029 }
9030 fts5StructureRelease(pTest);
9031 }
9032 }
9033 #endif
9034
9035 if( p->rc!=SQLITE_OK ) return 0;
9036 assert( p->iStructVersion!=0 );
9037 assert( p->pStruct!=0 );
9038 fts5StructureRef(p->pStruct);
9039 return p->pStruct;
9040 }
9041
9042 static void fts5StructureInvalidate(Fts5Index *p){
9043 if( p->pStruct ){
9044 fts5StructureRelease(p->pStruct);
9045 p->pStruct = 0;
9046 }
9047 }
9048
9049 /*
9050 ** Return the total number of segments in index structure pStruct. This
9051 ** function is only ever used as part of assert() conditions.
9052 */
9053 #ifdef SQLITE_DEBUG
9054 static int fts5StructureCountSegments(Fts5Structure *pStruct){
9055 int nSegment = 0; /* Total number of segments */
9056 if( pStruct ){
9057 int iLvl; /* Used to iterate through levels */
9058 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9059 nSegment += pStruct->aLevel[iLvl].nSeg;
9060 }
9061 }
9062
9063 return nSegment;
9064 }
9065 #endif
9066
9067 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
9068 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
9069 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
9070 (pBuf)->n += nBlob; \
9071 }
9072
9073 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
9074 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
9075 assert( (pBuf)->nSpace>=(pBuf)->n ); \
9076 }
9077
9078
9079 /*
9080 ** Serialize and store the "structure" record.
9081 **
9082 ** If an error occurs, leave an error code in the Fts5Index object. If an
9083 ** error has already occurred, this function is a no-op.
9084 */
9085 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
9086 if( p->rc==SQLITE_OK ){
9087 Fts5Buffer buf; /* Buffer to serialize record into */
9088 int iLvl; /* Used to iterate through levels */
9089 int iCookie; /* Cookie value to store */
9090
9091 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
9092 memset(&buf, 0, sizeof(Fts5Buffer));
9093
9094 /* Append the current configuration cookie */
9095 iCookie = p->pConfig->iCookie;
9096 if( iCookie<0 ) iCookie = 0;
9097
9098 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
9099 sqlite3Fts5Put32(buf.p, iCookie);
9100 buf.n = 4;
9101 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
9102 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
9103 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
9104 }
9105
9106 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9107 int iSeg; /* Used to iterate through segments */
9108 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
9109 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
9110 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
9111 assert( pLvl->nMerge<=pLvl->nSeg );
9112
9113 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
9114 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
9115 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
9116 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
9117 }
9118 }
9119
9120 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
9121 fts5BufferFree(&buf);
9122 }
9123 }
9124
9125 #if 0
9126 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
9127 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
9128 int rc = SQLITE_OK;
9129 Fts5Buffer buf;
9130 memset(&buf, 0, sizeof(buf));
9131 fts5DebugStructure(&rc, &buf, pStruct);
9132 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
9133 fflush(stdout);
9134 fts5BufferFree(&buf);
9135 }
9136 #else
9137 # define fts5PrintStructure(x,y)
9138 #endif
9139
9140 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
9141 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
9142 }
9143
9144 /*
9145 ** Return a copy of index structure pStruct. Except, promote as many
9146 ** segments as possible to level iPromote. If an OOM occurs, NULL is
9147 ** returned.
9148 */
9149 static void fts5StructurePromoteTo(
9150 Fts5Index *p,
9151 int iPromote,
9152 int szPromote,
9153 Fts5Structure *pStruct
9154 ){
9155 int il, is;
9156 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
9157
9158 if( pOut->nMerge==0 ){
9159 for(il=iPromote+1; il<pStruct->nLevel; il++){
9160 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
9161 if( pLvl->nMerge ) return;
9162 for(is=pLvl->nSeg-1; is>=0; is--){
9163 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
9164 if( sz>szPromote ) return;
9165 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
9166 if( p->rc ) return;
9167 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
9168 pOut->nSeg++;
9169 pLvl->nSeg--;
9170 }
9171 }
9172 }
9173 }
9174
9175 /*
9176 ** A new segment has just been written to level iLvl of index structure
9177 ** pStruct. This function determines if any segments should be promoted
9178 ** as a result. Segments are promoted in two scenarios:
9179 **
9180 ** a) If the segment just written is smaller than one or more segments
9181 ** within the previous populated level, it is promoted to the previous
9182 ** populated level.
9183 **
9184 ** b) If the segment just written is larger than the newest segment on
9185 ** the next populated level, then that segment, and any other adjacent
9186 ** segments that are also smaller than the one just written, are
9187 ** promoted.
9188 **
9189 ** If one or more segments are promoted, the structure object is updated
9190 ** to reflect this.
9191 */
9192 static void fts5StructurePromote(
9193 Fts5Index *p, /* FTS5 backend object */
9194 int iLvl, /* Index level just updated */
9195 Fts5Structure *pStruct /* Index structure */
9196 ){
9197 if( p->rc==SQLITE_OK ){
9198 int iTst;
9199 int iPromote = -1;
9200 int szPromote = 0; /* Promote anything this size or smaller */
9201 Fts5StructureSegment *pSeg; /* Segment just written */
9202 int szSeg; /* Size of segment just written */
9203 int nSeg = pStruct->aLevel[iLvl].nSeg;
9204
9205 if( nSeg==0 ) return;
9206 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
9207 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
9208
9209 /* Check for condition (a) */
9210 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
9211 if( iTst>=0 ){
9212 int i;
9213 int szMax = 0;
9214 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
9215 assert( pTst->nMerge==0 );
9216 for(i=0; i<pTst->nSeg; i++){
9217 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
9218 if( sz>szMax ) szMax = sz;
9219 }
9220 if( szMax>=szSeg ){
9221 /* Condition (a) is true. Promote the newest segment on level
9222 ** iLvl to level iTst. */
9223 iPromote = iTst;
9224 szPromote = szMax;
9225 }
9226 }
9227
9228 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
9229 ** is a no-op if it is not. */
9230 if( iPromote<0 ){
9231 iPromote = iLvl;
9232 szPromote = szSeg;
9233 }
9234 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
9235 }
9236 }
9237
9238
9239 /*
9240 ** Advance the iterator passed as the only argument. If the end of the
9241 ** doclist-index page is reached, return non-zero.
9242 */
9243 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
9244 Fts5Data *pData = pLvl->pData;
9245
9246 if( pLvl->iOff==0 ){
9247 assert( pLvl->bEof==0 );
9248 pLvl->iOff = 1;
9249 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
9250 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
9251 pLvl->iFirstOff = pLvl->iOff;
9252 }else{
9253 int iOff;
9254 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
9255 if( pData->p[iOff] ) break;
9256 }
9257
9258 if( iOff<pData->nn ){
9259 i64 iVal;
9260 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
9261 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
9262 pLvl->iRowid += iVal;
9263 pLvl->iOff = iOff;
9264 }else{
9265 pLvl->bEof = 1;
9266 }
9267 }
9268
9269 return pLvl->bEof;
9270 }
9271
9272 /*
9273 ** Advance the iterator passed as the only argument.
9274 */
9275 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
9276 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
9277
9278 assert( iLvl<pIter->nLvl );
9279 if( fts5DlidxLvlNext(pLvl) ){
9280 if( (iLvl+1) < pIter->nLvl ){
9281 fts5DlidxIterNextR(p, pIter, iLvl+1);
9282 if( pLvl[1].bEof==0 ){
9283 fts5DataRelease(pLvl->pData);
9284 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
9285 pLvl->pData = fts5DataRead(p,
9286 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
9287 );
9288 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
9289 }
9290 }
9291 }
9292
9293 return pIter->aLvl[0].bEof;
9294 }
9295 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
9296 return fts5DlidxIterNextR(p, pIter, 0);
9297 }
9298
9299 /*
9300 ** The iterator passed as the first argument has the following fields set
9301 ** as follows. This function sets up the rest of the iterator so that it
9302 ** points to the first rowid in the doclist-index.
9303 **
9304 ** pData:
9305 ** pointer to doclist-index record,
9306 **
9307 ** When this function is called pIter->iLeafPgno is the page number the
9308 ** doclist is associated with (the one featuring the term).
9309 */
9310 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
9311 int i;
9312 for(i=0; i<pIter->nLvl; i++){
9313 fts5DlidxLvlNext(&pIter->aLvl[i]);
9314 }
9315 return pIter->aLvl[0].bEof;
9316 }
9317
9318
9319 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
9320 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
9321 }
9322
9323 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
9324 int i;
9325
9326 /* Advance each level to the last entry on the last page */
9327 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
9328 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
9329 while( fts5DlidxLvlNext(pLvl)==0 );
9330 pLvl->bEof = 0;
9331
9332 if( i>0 ){
9333 Fts5DlidxLvl *pChild = &pLvl[-1];
9334 fts5DataRelease(pChild->pData);
9335 memset(pChild, 0, sizeof(Fts5DlidxLvl));
9336 pChild->pData = fts5DataRead(p,
9337 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
9338 );
9339 }
9340 }
9341 }
9342
9343 /*
9344 ** Move the iterator passed as the only argument to the previous entry.
9345 */
9346 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
9347 int iOff = pLvl->iOff;
9348
9349 assert( pLvl->bEof==0 );
9350 if( iOff<=pLvl->iFirstOff ){
9351 pLvl->bEof = 1;
9352 }else{
9353 u8 *a = pLvl->pData->p;
9354 i64 iVal;
9355 int iLimit;
9356 int ii;
9357 int nZero = 0;
9358
9359 /* Currently iOff points to the first byte of a varint. This block
9360 ** decrements iOff until it points to the first byte of the previous
9361 ** varint. Taking care not to read any memory locations that occur
9362 ** before the buffer in memory. */
9363 iLimit = (iOff>9 ? iOff-9 : 0);
9364 for(iOff--; iOff>iLimit; iOff--){
9365 if( (a[iOff-1] & 0x80)==0 ) break;
9366 }
9367
9368 fts5GetVarint(&a[iOff], (u64*)&iVal);
9369 pLvl->iRowid -= iVal;
9370 pLvl->iLeafPgno--;
9371
9372 /* Skip backwards past any 0x00 varints. */
9373 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
9374 nZero++;
9375 }
9376 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
9377 /* The byte immediately before the last 0x00 byte has the 0x80 bit
9378 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
9379 ** bytes before a[ii]. */
9380 int bZero = 0; /* True if last 0x00 counts */
9381 if( (ii-8)>=pLvl->iFirstOff ){
9382 int j;
9383 for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
9384 bZero = (j>8);
9385 }
9386 if( bZero==0 ) nZero--;
9387 }
9388 pLvl->iLeafPgno -= nZero;
9389 pLvl->iOff = iOff - nZero;
9390 }
9391
9392 return pLvl->bEof;
9393 }
9394
9395 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
9396 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
9397
9398 assert( iLvl<pIter->nLvl );
9399 if( fts5DlidxLvlPrev(pLvl) ){
9400 if( (iLvl+1) < pIter->nLvl ){
9401 fts5DlidxIterPrevR(p, pIter, iLvl+1);
9402 if( pLvl[1].bEof==0 ){
9403 fts5DataRelease(pLvl->pData);
9404 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
9405 pLvl->pData = fts5DataRead(p,
9406 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
9407 );
9408 if( pLvl->pData ){
9409 while( fts5DlidxLvlNext(pLvl)==0 );
9410 pLvl->bEof = 0;
9411 }
9412 }
9413 }
9414 }
9415
9416 return pIter->aLvl[0].bEof;
9417 }
9418 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
9419 return fts5DlidxIterPrevR(p, pIter, 0);
9420 }
9421
9422 /*
9423 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
9424 */
9425 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
9426 if( pIter ){
9427 int i;
9428 for(i=0; i<pIter->nLvl; i++){
9429 fts5DataRelease(pIter->aLvl[i].pData);
9430 }
9431 sqlite3_free(pIter);
9432 }
9433 }
9434
9435 static Fts5DlidxIter *fts5DlidxIterInit(
9436 Fts5Index *p, /* Fts5 Backend to iterate within */
9437 int bRev, /* True for ORDER BY ASC */
9438 int iSegid, /* Segment id */
9439 int iLeafPg /* Leaf page number to load dlidx for */
9440 ){
9441 Fts5DlidxIter *pIter = 0;
9442 int i;
9443 int bDone = 0;
9444
9445 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
9446 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
9447 Fts5DlidxIter *pNew;
9448
9449 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
9450 if( pNew==0 ){
9451 p->rc = SQLITE_NOMEM;
9452 }else{
9453 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
9454 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
9455 pIter = pNew;
9456 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
9457 pLvl->pData = fts5DataRead(p, iRowid);
9458 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
9459 bDone = 1;
9460 }
9461 pIter->nLvl = i+1;
9462 }
9463 }
9464
9465 if( p->rc==SQLITE_OK ){
9466 pIter->iSegid = iSegid;
9467 if( bRev==0 ){
9468 fts5DlidxIterFirst(pIter);
9469 }else{
9470 fts5DlidxIterLast(p, pIter);
9471 }
9472 }
9473
9474 if( p->rc!=SQLITE_OK ){
9475 fts5DlidxIterFree(pIter);
9476 pIter = 0;
9477 }
9478
9479 return pIter;
9480 }
9481
9482 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
9483 return pIter->aLvl[0].iRowid;
9484 }
9485 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
9486 return pIter->aLvl[0].iLeafPgno;
9487 }
9488
9489 /*
9490 ** Load the next leaf page into the segment iterator.
9491 */
9492 static void fts5SegIterNextPage(
9493 Fts5Index *p, /* FTS5 backend object */
9494 Fts5SegIter *pIter /* Iterator to advance to next page */
9495 ){
9496 Fts5Data *pLeaf;
9497 Fts5StructureSegment *pSeg = pIter->pSeg;
9498 fts5DataRelease(pIter->pLeaf);
9499 pIter->iLeafPgno++;
9500 if( pIter->pNextLeaf ){
9501 pIter->pLeaf = pIter->pNextLeaf;
9502 pIter->pNextLeaf = 0;
9503 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
9504 pIter->pLeaf = fts5LeafRead(p,
9505 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
9506 );
9507 }else{
9508 pIter->pLeaf = 0;
9509 }
9510 pLeaf = pIter->pLeaf;
9511
9512 if( pLeaf ){
9513 pIter->iPgidxOff = pLeaf->szLeaf;
9514 if( fts5LeafIsTermless(pLeaf) ){
9515 pIter->iEndofDoclist = pLeaf->nn+1;
9516 }else{
9517 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
9518 pIter->iEndofDoclist
9519 );
9520 }
9521 }
9522 }
9523
9524 /*
9525 ** Argument p points to a buffer containing a varint to be interpreted as a
9526 ** position list size field. Read the varint and return the number of bytes
9527 ** read. Before returning, set *pnSz to the number of bytes in the position
9528 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
9529 */
9530 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
9531 int nSz;
9532 int n = 0;
9533 fts5FastGetVarint32(p, n, nSz);
9534 assert_nc( nSz>=0 );
9535 *pnSz = nSz/2;
9536 *pbDel = nSz & 0x0001;
9537 return n;
9538 }
9539
9540 /*
9541 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
9542 ** position-list size field. Read the value of the field and store it
9543 ** in the following variables:
9544 **
9545 ** Fts5SegIter.nPos
9546 ** Fts5SegIter.bDel
9547 **
9548 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
9549 ** position list content (if any).
9550 */
9551 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
9552 if( p->rc==SQLITE_OK ){
9553 int iOff = pIter->iLeafOffset; /* Offset to read at */
9554 ASSERT_SZLEAF_OK(pIter->pLeaf);
9555 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
9556 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
9557 pIter->bDel = 0;
9558 pIter->nPos = 1;
9559 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
9560 pIter->bDel = 1;
9561 iOff++;
9562 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
9563 pIter->nPos = 1;
9564 iOff++;
9565 }else{
9566 pIter->nPos = 0;
9567 }
9568 }
9569 }else{
9570 int nSz;
9571 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
9572 pIter->bDel = (nSz & 0x0001);
9573 pIter->nPos = nSz>>1;
9574 assert_nc( pIter->nPos>=0 );
9575 }
9576 pIter->iLeafOffset = iOff;
9577 }
9578 }
9579
9580 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
9581 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
9582 int iOff = pIter->iLeafOffset;
9583
9584 ASSERT_SZLEAF_OK(pIter->pLeaf);
9585 if( iOff>=pIter->pLeaf->szLeaf ){
9586 fts5SegIterNextPage(p, pIter);
9587 if( pIter->pLeaf==0 ){
9588 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
9589 return;
9590 }
9591 iOff = 4;
9592 a = pIter->pLeaf->p;
9593 }
9594 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
9595 pIter->iLeafOffset = iOff;
9596 }
9597
9598 /*
9599 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
9600 ** "nSuffix" field of a term. Function parameter nKeep contains the value
9601 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
9602 ** the first term in the segment).
9603 **
9604 ** This function populates:
9605 **
9606 ** Fts5SegIter.term
9607 ** Fts5SegIter.rowid
9608 **
9609 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
9610 ** the first position list. The position list belonging to document
9611 ** (Fts5SegIter.iRowid).
9612 */
9613 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
9614 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
9615 int iOff = pIter->iLeafOffset; /* Offset to read at */
9616 int nNew; /* Bytes of new data */
9617
9618 iOff += fts5GetVarint32(&a[iOff], nNew);
9619 if( iOff+nNew>pIter->pLeaf->nn ){
9620 p->rc = FTS5_CORRUPT;
9621 return;
9622 }
9623 pIter->term.n = nKeep;
9624 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
9625 iOff += nNew;
9626 pIter->iTermLeafOffset = iOff;
9627 pIter->iTermLeafPgno = pIter->iLeafPgno;
9628 pIter->iLeafOffset = iOff;
9629
9630 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
9631 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
9632 }else{
9633 int nExtra;
9634 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
9635 pIter->iEndofDoclist += nExtra;
9636 }
9637
9638 fts5SegIterLoadRowid(p, pIter);
9639 }
9640
9641 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
9642 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
9643 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
9644
9645 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
9646 if( pIter->flags & FTS5_SEGITER_REVERSE ){
9647 pIter->xNext = fts5SegIterNext_Reverse;
9648 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
9649 pIter->xNext = fts5SegIterNext_None;
9650 }else{
9651 pIter->xNext = fts5SegIterNext;
9652 }
9653 }
9654
9655 /*
9656 ** Initialize the iterator object pIter to iterate through the entries in
9657 ** segment pSeg. The iterator is left pointing to the first entry when
9658 ** this function returns.
9659 **
9660 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
9661 ** an error has already occurred when this function is called, it is a no-op.
9662 */
9663 static void fts5SegIterInit(
9664 Fts5Index *p, /* FTS index object */
9665 Fts5StructureSegment *pSeg, /* Description of segment */
9666 Fts5SegIter *pIter /* Object to populate */
9667 ){
9668 if( pSeg->pgnoFirst==0 ){
9669 /* This happens if the segment is being used as an input to an incremental
9670 ** merge and all data has already been "trimmed". See function
9671 ** fts5TrimSegments() for details. In this case leave the iterator empty.
9672 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
9673 ** at EOF already. */
9674 assert( pIter->pLeaf==0 );
9675 return;
9676 }
9677
9678 if( p->rc==SQLITE_OK ){
9679 memset(pIter, 0, sizeof(*pIter));
9680 fts5SegIterSetNext(p, pIter);
9681 pIter->pSeg = pSeg;
9682 pIter->iLeafPgno = pSeg->pgnoFirst-1;
9683 fts5SegIterNextPage(p, pIter);
9684 }
9685
9686 if( p->rc==SQLITE_OK ){
9687 pIter->iLeafOffset = 4;
9688 assert_nc( pIter->pLeaf->nn>4 );
9689 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
9690 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
9691 fts5SegIterLoadTerm(p, pIter, 0);
9692 fts5SegIterLoadNPos(p, pIter);
9693 }
9694 }
9695
9696 /*
9697 ** This function is only ever called on iterators created by calls to
9698 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
9699 **
9700 ** The iterator is in an unusual state when this function is called: the
9701 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
9702 ** the position-list size field for the first relevant rowid on the page.
9703 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
9704 **
9705 ** This function advances the iterator so that it points to the last
9706 ** relevant rowid on the page and, if necessary, initializes the
9707 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
9708 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
9709 ** byte of the position list content associated with said rowid.
9710 */
9711 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
9712 int eDetail = p->pConfig->eDetail;
9713 int n = pIter->pLeaf->szLeaf;
9714 int i = pIter->iLeafOffset;
9715 u8 *a = pIter->pLeaf->p;
9716 int iRowidOffset = 0;
9717
9718 if( n>pIter->iEndofDoclist ){
9719 n = pIter->iEndofDoclist;
9720 }
9721
9722 ASSERT_SZLEAF_OK(pIter->pLeaf);
9723 while( 1 ){
9724 i64 iDelta = 0;
9725
9726 if( eDetail==FTS5_DETAIL_NONE ){
9727 /* todo */
9728 if( i<n && a[i]==0 ){
9729 i++;
9730 if( i<n && a[i]==0 ) i++;
9731 }
9732 }else{
9733 int nPos;
9734 int bDummy;
9735 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
9736 i += nPos;
9737 }
9738 if( i>=n ) break;
9739 i += fts5GetVarint(&a[i], (u64*)&iDelta);
9740 pIter->iRowid += iDelta;
9741
9742 /* If necessary, grow the pIter->aRowidOffset[] array. */
9743 if( iRowidOffset>=pIter->nRowidOffset ){
9744 int nNew = pIter->nRowidOffset + 8;
9745 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
9746 if( aNew==0 ){
9747 p->rc = SQLITE_NOMEM;
9748 break;
9749 }
9750 pIter->aRowidOffset = aNew;
9751 pIter->nRowidOffset = nNew;
9752 }
9753
9754 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
9755 pIter->iLeafOffset = i;
9756 }
9757 pIter->iRowidOffset = iRowidOffset;
9758 fts5SegIterLoadNPos(p, pIter);
9759 }
9760
9761 /*
9762 **
9763 */
9764 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
9765 assert( pIter->flags & FTS5_SEGITER_REVERSE );
9766 assert( pIter->flags & FTS5_SEGITER_ONETERM );
9767
9768 fts5DataRelease(pIter->pLeaf);
9769 pIter->pLeaf = 0;
9770 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
9771 Fts5Data *pNew;
9772 pIter->iLeafPgno--;
9773 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
9774 pIter->pSeg->iSegid, pIter->iLeafPgno
9775 ));
9776 if( pNew ){
9777 /* iTermLeafOffset may be equal to szLeaf if the term is the last
9778 ** thing on the page - i.e. the first rowid is on the following page.
9779 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
9780 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
9781 assert( pIter->pLeaf==0 );
9782 if( pIter->iTermLeafOffset<pNew->szLeaf ){
9783 pIter->pLeaf = pNew;
9784 pIter->iLeafOffset = pIter->iTermLeafOffset;
9785 }
9786 }else{
9787 int iRowidOff;
9788 iRowidOff = fts5LeafFirstRowidOff(pNew);
9789 if( iRowidOff ){
9790 pIter->pLeaf = pNew;
9791 pIter->iLeafOffset = iRowidOff;
9792 }
9793 }
9794
9795 if( pIter->pLeaf ){
9796 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
9797 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
9798 break;
9799 }else{
9800 fts5DataRelease(pNew);
9801 }
9802 }
9803 }
9804
9805 if( pIter->pLeaf ){
9806 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
9807 fts5SegIterReverseInitPage(p, pIter);
9808 }
9809 }
9810
9811 /*
9812 ** Return true if the iterator passed as the second argument currently
9813 ** points to a delete marker. A delete marker is an entry with a 0 byte
9814 ** position-list.
9815 */
9816 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
9817 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
9818 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
9819 }
9820
9821 /*
9822 ** Advance iterator pIter to the next entry.
9823 **
9824 ** This version of fts5SegIterNext() is only used by reverse iterators.
9825 */
9826 static void fts5SegIterNext_Reverse(
9827 Fts5Index *p, /* FTS5 backend object */
9828 Fts5SegIter *pIter, /* Iterator to advance */
9829 int *pbUnused /* Unused */
9830 ){
9831 assert( pIter->flags & FTS5_SEGITER_REVERSE );
9832 assert( pIter->pNextLeaf==0 );
9833 UNUSED_PARAM(pbUnused);
9834
9835 if( pIter->iRowidOffset>0 ){
9836 u8 *a = pIter->pLeaf->p;
9837 int iOff;
9838 i64 iDelta;
9839
9840 pIter->iRowidOffset--;
9841 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
9842 fts5SegIterLoadNPos(p, pIter);
9843 iOff = pIter->iLeafOffset;
9844 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
9845 iOff += pIter->nPos;
9846 }
9847 fts5GetVarint(&a[iOff], (u64*)&iDelta);
9848 pIter->iRowid -= iDelta;
9849 }else{
9850 fts5SegIterReverseNewPage(p, pIter);
9851 }
9852 }
9853
9854 /*
9855 ** Advance iterator pIter to the next entry.
9856 **
9857 ** This version of fts5SegIterNext() is only used if detail=none and the
9858 ** iterator is not a reverse direction iterator.
9859 */
9860 static void fts5SegIterNext_None(
9861 Fts5Index *p, /* FTS5 backend object */
9862 Fts5SegIter *pIter, /* Iterator to advance */
9863 int *pbNewTerm /* OUT: Set for new term */
9864 ){
9865 int iOff;
9866
9867 assert( p->rc==SQLITE_OK );
9868 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
9869 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
9870
9871 ASSERT_SZLEAF_OK(pIter->pLeaf);
9872 iOff = pIter->iLeafOffset;
9873
9874 /* Next entry is on the next page */
9875 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
9876 fts5SegIterNextPage(p, pIter);
9877 if( p->rc || pIter->pLeaf==0 ) return;
9878 pIter->iRowid = 0;
9879 iOff = 4;
9880 }
9881
9882 if( iOff<pIter->iEndofDoclist ){
9883 /* Next entry is on the current page */
9884 i64 iDelta;
9885 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
9886 pIter->iLeafOffset = iOff;
9887 pIter->iRowid += iDelta;
9888 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
9889 if( pIter->pSeg ){
9890 int nKeep = 0;
9891 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
9892 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
9893 }
9894 pIter->iLeafOffset = iOff;
9895 fts5SegIterLoadTerm(p, pIter, nKeep);
9896 }else{
9897 const u8 *pList = 0;
9898 const char *zTerm = 0;
9899 int nList;
9900 sqlite3Fts5HashScanNext(p->pHash);
9901 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
9902 if( pList==0 ) goto next_none_eof;
9903 pIter->pLeaf->p = (u8*)pList;
9904 pIter->pLeaf->nn = nList;
9905 pIter->pLeaf->szLeaf = nList;
9906 pIter->iEndofDoclist = nList;
9907 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
9908 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
9909 }
9910
9911 if( pbNewTerm ) *pbNewTerm = 1;
9912 }else{
9913 goto next_none_eof;
9914 }
9915
9916 fts5SegIterLoadNPos(p, pIter);
9917
9918 return;
9919 next_none_eof:
9920 fts5DataRelease(pIter->pLeaf);
9921 pIter->pLeaf = 0;
9922 }
9923
9924
9925 /*
9926 ** Advance iterator pIter to the next entry.
9927 **
9928 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
9929 ** is not considered an error if the iterator reaches EOF. If an error has
9930 ** already occurred when this function is called, it is a no-op.
9931 */
9932 static void fts5SegIterNext(
9933 Fts5Index *p, /* FTS5 backend object */
9934 Fts5SegIter *pIter, /* Iterator to advance */
9935 int *pbNewTerm /* OUT: Set for new term */
9936 ){
9937 Fts5Data *pLeaf = pIter->pLeaf;
9938 int iOff;
9939 int bNewTerm = 0;
9940 int nKeep = 0;
9941 u8 *a;
9942 int n;
9943
9944 assert( pbNewTerm==0 || *pbNewTerm==0 );
9945 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
9946
9947 /* Search for the end of the position list within the current page. */
9948 a = pLeaf->p;
9949 n = pLeaf->szLeaf;
9950
9951 ASSERT_SZLEAF_OK(pLeaf);
9952 iOff = pIter->iLeafOffset + pIter->nPos;
9953
9954 if( iOff<n ){
9955 /* The next entry is on the current page. */
9956 assert_nc( iOff<=pIter->iEndofDoclist );
9957 if( iOff>=pIter->iEndofDoclist ){
9958 bNewTerm = 1;
9959 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
9960 iOff += fts5GetVarint32(&a[iOff], nKeep);
9961 }
9962 }else{
9963 u64 iDelta;
9964 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
9965 pIter->iRowid += iDelta;
9966 assert_nc( iDelta>0 );
9967 }
9968 pIter->iLeafOffset = iOff;
9969
9970 }else if( pIter->pSeg==0 ){
9971 const u8 *pList = 0;
9972 const char *zTerm = 0;
9973 int nList = 0;
9974 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
9975 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
9976 sqlite3Fts5HashScanNext(p->pHash);
9977 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
9978 }
9979 if( pList==0 ){
9980 fts5DataRelease(pIter->pLeaf);
9981 pIter->pLeaf = 0;
9982 }else{
9983 pIter->pLeaf->p = (u8*)pList;
9984 pIter->pLeaf->nn = nList;
9985 pIter->pLeaf->szLeaf = nList;
9986 pIter->iEndofDoclist = nList+1;
9987 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
9988 (u8*)zTerm);
9989 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
9990 *pbNewTerm = 1;
9991 }
9992 }else{
9993 iOff = 0;
9994 /* Next entry is not on the current page */
9995 while( iOff==0 ){
9996 fts5SegIterNextPage(p, pIter);
9997 pLeaf = pIter->pLeaf;
9998 if( pLeaf==0 ) break;
9999 ASSERT_SZLEAF_OK(pLeaf);
10000 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
10001 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
10002 pIter->iLeafOffset = iOff;
10003
10004 if( pLeaf->nn>pLeaf->szLeaf ){
10005 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10006 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
10007 );
10008 }
10009 }
10010 else if( pLeaf->nn>pLeaf->szLeaf ){
10011 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10012 &pLeaf->p[pLeaf->szLeaf], iOff
10013 );
10014 pIter->iLeafOffset = iOff;
10015 pIter->iEndofDoclist = iOff;
10016 bNewTerm = 1;
10017 }
10018 assert_nc( iOff<pLeaf->szLeaf );
10019 if( iOff>pLeaf->szLeaf ){
10020 p->rc = FTS5_CORRUPT;
10021 return;
10022 }
10023 }
10024 }
10025
10026 /* Check if the iterator is now at EOF. If so, return early. */
10027 if( pIter->pLeaf ){
10028 if( bNewTerm ){
10029 if( pIter->flags & FTS5_SEGITER_ONETERM ){
10030 fts5DataRelease(pIter->pLeaf);
10031 pIter->pLeaf = 0;
10032 }else{
10033 fts5SegIterLoadTerm(p, pIter, nKeep);
10034 fts5SegIterLoadNPos(p, pIter);
10035 if( pbNewTerm ) *pbNewTerm = 1;
10036 }
10037 }else{
10038 /* The following could be done by calling fts5SegIterLoadNPos(). But
10039 ** this block is particularly performance critical, so equivalent
10040 ** code is inlined.
10041 **
10042 ** Later: Switched back to fts5SegIterLoadNPos() because it supports
10043 ** detail=none mode. Not ideal.
10044 */
10045 int nSz;
10046 assert( p->rc==SQLITE_OK );
10047 assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
10048 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
10049 pIter->bDel = (nSz & 0x0001);
10050 pIter->nPos = nSz>>1;
10051 assert_nc( pIter->nPos>=0 );
10052 }
10053 }
10054 }
10055
10056 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
10057
10058 #define fts5IndexSkipVarint(a, iOff) { \
10059 int iEnd = iOff+9; \
10060 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
10061 }
10062
10063 /*
10064 ** Iterator pIter currently points to the first rowid in a doclist. This
10065 ** function sets the iterator up so that iterates in reverse order through
10066 ** the doclist.
10067 */
10068 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
10069 Fts5DlidxIter *pDlidx = pIter->pDlidx;
10070 Fts5Data *pLast = 0;
10071 int pgnoLast = 0;
10072
10073 if( pDlidx ){
10074 int iSegid = pIter->pSeg->iSegid;
10075 pgnoLast = fts5DlidxIterPgno(pDlidx);
10076 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
10077 }else{
10078 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
10079
10080 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
10081 ** position-list content for the current rowid. Back it up so that it
10082 ** points to the start of the position-list size field. */
10083 int iPoslist;
10084 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
10085 iPoslist = pIter->iTermLeafOffset;
10086 }else{
10087 iPoslist = 4;
10088 }
10089 fts5IndexSkipVarint(pLeaf->p, iPoslist);
10090 pIter->iLeafOffset = iPoslist;
10091
10092 /* If this condition is true then the largest rowid for the current
10093 ** term may not be stored on the current page. So search forward to
10094 ** see where said rowid really is. */
10095 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
10096 int pgno;
10097 Fts5StructureSegment *pSeg = pIter->pSeg;
10098
10099 /* The last rowid in the doclist may not be on the current page. Search
10100 ** forward to find the page containing the last rowid. */
10101 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
10102 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
10103 Fts5Data *pNew = fts5DataRead(p, iAbs);
10104 if( pNew ){
10105 int iRowid, bTermless;
10106 iRowid = fts5LeafFirstRowidOff(pNew);
10107 bTermless = fts5LeafIsTermless(pNew);
10108 if( iRowid ){
10109 SWAPVAL(Fts5Data*, pNew, pLast);
10110 pgnoLast = pgno;
10111 }
10112 fts5DataRelease(pNew);
10113 if( bTermless==0 ) break;
10114 }
10115 }
10116 }
10117 }
10118
10119 /* If pLast is NULL at this point, then the last rowid for this doclist
10120 ** lies on the page currently indicated by the iterator. In this case
10121 ** pIter->iLeafOffset is already set to point to the position-list size
10122 ** field associated with the first relevant rowid on the page.
10123 **
10124 ** Or, if pLast is non-NULL, then it is the page that contains the last
10125 ** rowid. In this case configure the iterator so that it points to the
10126 ** first rowid on this page.
10127 */
10128 if( pLast ){
10129 int iOff;
10130 fts5DataRelease(pIter->pLeaf);
10131 pIter->pLeaf = pLast;
10132 pIter->iLeafPgno = pgnoLast;
10133 iOff = fts5LeafFirstRowidOff(pLast);
10134 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
10135 pIter->iLeafOffset = iOff;
10136
10137 if( fts5LeafIsTermless(pLast) ){
10138 pIter->iEndofDoclist = pLast->nn+1;
10139 }else{
10140 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
10141 }
10142
10143 }
10144
10145 fts5SegIterReverseInitPage(p, pIter);
10146 }
10147
10148 /*
10149 ** Iterator pIter currently points to the first rowid of a doclist.
10150 ** There is a doclist-index associated with the final term on the current
10151 ** page. If the current term is the last term on the page, load the
10152 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
10153 */
10154 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
10155 int iSeg = pIter->pSeg->iSegid;
10156 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
10157 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
10158
10159 assert( pIter->flags & FTS5_SEGITER_ONETERM );
10160 assert( pIter->pDlidx==0 );
10161
10162 /* Check if the current doclist ends on this page. If it does, return
10163 ** early without loading the doclist-index (as it belongs to a different
10164 ** term. */
10165 if( pIter->iTermLeafPgno==pIter->iLeafPgno
10166 && pIter->iEndofDoclist<pLeaf->szLeaf
10167 ){
10168 return;
10169 }
10170
10171 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
10172 }
10173
10174 /*
10175 ** The iterator object passed as the second argument currently contains
10176 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
10177 ** function searches the leaf page for a term matching (pTerm/nTerm).
10178 **
10179 ** If the specified term is found on the page, then the iterator is left
10180 ** pointing to it. If argument bGe is zero and the term is not found,
10181 ** the iterator is left pointing at EOF.
10182 **
10183 ** If bGe is non-zero and the specified term is not found, then the
10184 ** iterator is left pointing to the smallest term in the segment that
10185 ** is larger than the specified term, even if this term is not on the
10186 ** current page.
10187 */
10188 static void fts5LeafSeek(
10189 Fts5Index *p, /* Leave any error code here */
10190 int bGe, /* True for a >= search */
10191 Fts5SegIter *pIter, /* Iterator to seek */
10192 const u8 *pTerm, int nTerm /* Term to search for */
10193 ){
10194 int iOff;
10195 const u8 *a = pIter->pLeaf->p;
10196 int szLeaf = pIter->pLeaf->szLeaf;
10197 int n = pIter->pLeaf->nn;
10198
10199 int nMatch = 0;
10200 int nKeep = 0;
10201 int nNew = 0;
10202 int iTermOff;
10203 int iPgidx; /* Current offset in pgidx */
10204 int bEndOfPage = 0;
10205
10206 assert( p->rc==SQLITE_OK );
10207
10208 iPgidx = szLeaf;
10209 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
10210 iOff = iTermOff;
10211 if( iOff>n ){
10212 p->rc = FTS5_CORRUPT;
10213 return;
10214 }
10215
10216 while( 1 ){
10217
10218 /* Figure out how many new bytes are in this term */
10219 fts5FastGetVarint32(a, iOff, nNew);
10220 if( nKeep<nMatch ){
10221 goto search_failed;
10222 }
10223
10224 assert( nKeep>=nMatch );
10225 if( nKeep==nMatch ){
10226 int nCmp;
10227 int i;
10228 nCmp = MIN(nNew, nTerm-nMatch);
10229 for(i=0; i<nCmp; i++){
10230 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
10231 }
10232 nMatch += i;
10233
10234 if( nTerm==nMatch ){
10235 if( i==nNew ){
10236 goto search_success;
10237 }else{
10238 goto search_failed;
10239 }
10240 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
10241 goto search_failed;
10242 }
10243 }
10244
10245 if( iPgidx>=n ){
10246 bEndOfPage = 1;
10247 break;
10248 }
10249
10250 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
10251 iTermOff += nKeep;
10252 iOff = iTermOff;
10253
10254 if( iOff>=n ){
10255 p->rc = FTS5_CORRUPT;
10256 return;
10257 }
10258
10259 /* Read the nKeep field of the next term. */
10260 fts5FastGetVarint32(a, iOff, nKeep);
10261 }
10262
10263 search_failed:
10264 if( bGe==0 ){
10265 fts5DataRelease(pIter->pLeaf);
10266 pIter->pLeaf = 0;
10267 return;
10268 }else if( bEndOfPage ){
10269 do {
10270 fts5SegIterNextPage(p, pIter);
10271 if( pIter->pLeaf==0 ) return;
10272 a = pIter->pLeaf->p;
10273 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
10274 iPgidx = pIter->pLeaf->szLeaf;
10275 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
10276 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
10277 p->rc = FTS5_CORRUPT;
10278 }else{
10279 nKeep = 0;
10280 iTermOff = iOff;
10281 n = pIter->pLeaf->nn;
10282 iOff += fts5GetVarint32(&a[iOff], nNew);
10283 break;
10284 }
10285 }
10286 }while( 1 );
10287 }
10288
10289 search_success:
10290
10291 pIter->iLeafOffset = iOff + nNew;
10292 pIter->iTermLeafOffset = pIter->iLeafOffset;
10293 pIter->iTermLeafPgno = pIter->iLeafPgno;
10294
10295 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
10296 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
10297
10298 if( iPgidx>=n ){
10299 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
10300 }else{
10301 int nExtra;
10302 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
10303 pIter->iEndofDoclist = iTermOff + nExtra;
10304 }
10305 pIter->iPgidxOff = iPgidx;
10306
10307 fts5SegIterLoadRowid(p, pIter);
10308 fts5SegIterLoadNPos(p, pIter);
10309 }
10310
10311 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
10312 if( p->pIdxSelect==0 ){
10313 Fts5Config *pConfig = p->pConfig;
10314 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
10315 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
10316 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
10317 pConfig->zDb, pConfig->zName
10318 ));
10319 }
10320 return p->pIdxSelect;
10321 }
10322
10323 /*
10324 ** Initialize the object pIter to point to term pTerm/nTerm within segment
10325 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
10326 **
10327 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
10328 ** an error has already occurred when this function is called, it is a no-op.
10329 */
10330 static void fts5SegIterSeekInit(
10331 Fts5Index *p, /* FTS5 backend */
10332 const u8 *pTerm, int nTerm, /* Term to seek to */
10333 int flags, /* Mask of FTS5INDEX_XXX flags */
10334 Fts5StructureSegment *pSeg, /* Description of segment */
10335 Fts5SegIter *pIter /* Object to populate */
10336 ){
10337 int iPg = 1;
10338 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
10339 int bDlidx = 0; /* True if there is a doclist-index */
10340 sqlite3_stmt *pIdxSelect = 0;
10341
10342 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
10343 assert( pTerm && nTerm );
10344 memset(pIter, 0, sizeof(*pIter));
10345 pIter->pSeg = pSeg;
10346
10347 /* This block sets stack variable iPg to the leaf page number that may
10348 ** contain term (pTerm/nTerm), if it is present in the segment. */
10349 pIdxSelect = fts5IdxSelectStmt(p);
10350 if( p->rc ) return;
10351 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
10352 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
10353 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
10354 i64 val = sqlite3_column_int(pIdxSelect, 0);
10355 iPg = (int)(val>>1);
10356 bDlidx = (val & 0x0001);
10357 }
10358 p->rc = sqlite3_reset(pIdxSelect);
10359
10360 if( iPg<pSeg->pgnoFirst ){
10361 iPg = pSeg->pgnoFirst;
10362 bDlidx = 0;
10363 }
10364
10365 pIter->iLeafPgno = iPg - 1;
10366 fts5SegIterNextPage(p, pIter);
10367
10368 if( pIter->pLeaf ){
10369 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
10370 }
10371
10372 if( p->rc==SQLITE_OK && bGe==0 ){
10373 pIter->flags |= FTS5_SEGITER_ONETERM;
10374 if( pIter->pLeaf ){
10375 if( flags & FTS5INDEX_QUERY_DESC ){
10376 pIter->flags |= FTS5_SEGITER_REVERSE;
10377 }
10378 if( bDlidx ){
10379 fts5SegIterLoadDlidx(p, pIter);
10380 }
10381 if( flags & FTS5INDEX_QUERY_DESC ){
10382 fts5SegIterReverse(p, pIter);
10383 }
10384 }
10385 }
10386
10387 fts5SegIterSetNext(p, pIter);
10388
10389 /* Either:
10390 **
10391 ** 1) an error has occurred, or
10392 ** 2) the iterator points to EOF, or
10393 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
10394 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
10395 ** to an entry with a term greater than or equal to (pTerm/nTerm).
10396 */
10397 assert( p->rc!=SQLITE_OK /* 1 */
10398 || pIter->pLeaf==0 /* 2 */
10399 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
10400 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
10401 );
10402 }
10403
10404 /*
10405 ** Initialize the object pIter to point to term pTerm/nTerm within the
10406 ** in-memory hash table. If there is no such term in the hash-table, the
10407 ** iterator is set to EOF.
10408 **
10409 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
10410 ** an error has already occurred when this function is called, it is a no-op.
10411 */
10412 static void fts5SegIterHashInit(
10413 Fts5Index *p, /* FTS5 backend */
10414 const u8 *pTerm, int nTerm, /* Term to seek to */
10415 int flags, /* Mask of FTS5INDEX_XXX flags */
10416 Fts5SegIter *pIter /* Object to populate */
10417 ){
10418 const u8 *pList = 0;
10419 int nList = 0;
10420 const u8 *z = 0;
10421 int n = 0;
10422
10423 assert( p->pHash );
10424 assert( p->rc==SQLITE_OK );
10425
10426 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
10427 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
10428 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
10429 n = (z ? (int)strlen((const char*)z) : 0);
10430 }else{
10431 pIter->flags |= FTS5_SEGITER_ONETERM;
10432 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
10433 z = pTerm;
10434 n = nTerm;
10435 }
10436
10437 if( pList ){
10438 Fts5Data *pLeaf;
10439 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
10440 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
10441 if( pLeaf==0 ) return;
10442 pLeaf->p = (u8*)pList;
10443 pLeaf->nn = pLeaf->szLeaf = nList;
10444 pIter->pLeaf = pLeaf;
10445 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
10446 pIter->iEndofDoclist = pLeaf->nn;
10447
10448 if( flags & FTS5INDEX_QUERY_DESC ){
10449 pIter->flags |= FTS5_SEGITER_REVERSE;
10450 fts5SegIterReverseInitPage(p, pIter);
10451 }else{
10452 fts5SegIterLoadNPos(p, pIter);
10453 }
10454 }
10455
10456 fts5SegIterSetNext(p, pIter);
10457 }
10458
10459 /*
10460 ** Zero the iterator passed as the only argument.
10461 */
10462 static void fts5SegIterClear(Fts5SegIter *pIter){
10463 fts5BufferFree(&pIter->term);
10464 fts5DataRelease(pIter->pLeaf);
10465 fts5DataRelease(pIter->pNextLeaf);
10466 fts5DlidxIterFree(pIter->pDlidx);
10467 sqlite3_free(pIter->aRowidOffset);
10468 memset(pIter, 0, sizeof(Fts5SegIter));
10469 }
10470
10471 #ifdef SQLITE_DEBUG
10472
10473 /*
10474 ** This function is used as part of the big assert() procedure implemented by
10475 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
10476 ** in *pRes is the correct result of comparing the current positions of the
10477 ** two iterators.
10478 */
10479 static void fts5AssertComparisonResult(
10480 Fts5Iter *pIter,
10481 Fts5SegIter *p1,
10482 Fts5SegIter *p2,
10483 Fts5CResult *pRes
10484 ){
10485 int i1 = p1 - pIter->aSeg;
10486 int i2 = p2 - pIter->aSeg;
10487
10488 if( p1->pLeaf || p2->pLeaf ){
10489 if( p1->pLeaf==0 ){
10490 assert( pRes->iFirst==i2 );
10491 }else if( p2->pLeaf==0 ){
10492 assert( pRes->iFirst==i1 );
10493 }else{
10494 int nMin = MIN(p1->term.n, p2->term.n);
10495 int res = memcmp(p1->term.p, p2->term.p, nMin);
10496 if( res==0 ) res = p1->term.n - p2->term.n;
10497
10498 if( res==0 ){
10499 assert( pRes->bTermEq==1 );
10500 assert( p1->iRowid!=p2->iRowid );
10501 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
10502 }else{
10503 assert( pRes->bTermEq==0 );
10504 }
10505
10506 if( res<0 ){
10507 assert( pRes->iFirst==i1 );
10508 }else{
10509 assert( pRes->iFirst==i2 );
10510 }
10511 }
10512 }
10513 }
10514
10515 /*
10516 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
10517 ** is compiled. In that case, this function is essentially an assert()
10518 ** statement used to verify that the contents of the pIter->aFirst[] array
10519 ** are correct.
10520 */
10521 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
10522 if( p->rc==SQLITE_OK ){
10523 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
10524 int i;
10525
10526 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
10527
10528 /* Check that pIter->iSwitchRowid is set correctly. */
10529 for(i=0; i<pIter->nSeg; i++){
10530 Fts5SegIter *p1 = &pIter->aSeg[i];
10531 assert( p1==pFirst
10532 || p1->pLeaf==0
10533 || fts5BufferCompare(&pFirst->term, &p1->term)
10534 || p1->iRowid==pIter->iSwitchRowid
10535 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
10536 );
10537 }
10538
10539 for(i=0; i<pIter->nSeg; i+=2){
10540 Fts5SegIter *p1 = &pIter->aSeg[i];
10541 Fts5SegIter *p2 = &pIter->aSeg[i+1];
10542 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
10543 fts5AssertComparisonResult(pIter, p1, p2, pRes);
10544 }
10545
10546 for(i=1; i<(pIter->nSeg / 2); i+=2){
10547 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
10548 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
10549 Fts5CResult *pRes = &pIter->aFirst[i];
10550 fts5AssertComparisonResult(pIter, p1, p2, pRes);
10551 }
10552 }
10553 }
10554 #else
10555 # define fts5AssertMultiIterSetup(x,y)
10556 #endif
10557
10558 /*
10559 ** Do the comparison necessary to populate pIter->aFirst[iOut].
10560 **
10561 ** If the returned value is non-zero, then it is the index of an entry
10562 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
10563 ** to a key that is a duplicate of another, higher priority,
10564 ** segment-iterator in the pSeg->aSeg[] array.
10565 */
10566 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
10567 int i1; /* Index of left-hand Fts5SegIter */
10568 int i2; /* Index of right-hand Fts5SegIter */
10569 int iRes;
10570 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
10571 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
10572 Fts5CResult *pRes = &pIter->aFirst[iOut];
10573
10574 assert( iOut<pIter->nSeg && iOut>0 );
10575 assert( pIter->bRev==0 || pIter->bRev==1 );
10576
10577 if( iOut>=(pIter->nSeg/2) ){
10578 i1 = (iOut - pIter->nSeg/2) * 2;
10579 i2 = i1 + 1;
10580 }else{
10581 i1 = pIter->aFirst[iOut*2].iFirst;
10582 i2 = pIter->aFirst[iOut*2+1].iFirst;
10583 }
10584 p1 = &pIter->aSeg[i1];
10585 p2 = &pIter->aSeg[i2];
10586
10587 pRes->bTermEq = 0;
10588 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
10589 iRes = i2;
10590 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
10591 iRes = i1;
10592 }else{
10593 int res = fts5BufferCompare(&p1->term, &p2->term);
10594 if( res==0 ){
10595 assert( i2>i1 );
10596 assert( i2!=0 );
10597 pRes->bTermEq = 1;
10598 if( p1->iRowid==p2->iRowid ){
10599 p1->bDel = p2->bDel;
10600 return i2;
10601 }
10602 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
10603 }
10604 assert( res!=0 );
10605 if( res<0 ){
10606 iRes = i1;
10607 }else{
10608 iRes = i2;
10609 }
10610 }
10611
10612 pRes->iFirst = (u16)iRes;
10613 return 0;
10614 }
10615
10616 /*
10617 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
10618 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
10619 */
10620 static void fts5SegIterGotoPage(
10621 Fts5Index *p, /* FTS5 backend object */
10622 Fts5SegIter *pIter, /* Iterator to advance */
10623 int iLeafPgno
10624 ){
10625 assert( iLeafPgno>pIter->iLeafPgno );
10626
10627 if( iLeafPgno>pIter->pSeg->pgnoLast ){
10628 p->rc = FTS5_CORRUPT;
10629 }else{
10630 fts5DataRelease(pIter->pNextLeaf);
10631 pIter->pNextLeaf = 0;
10632 pIter->iLeafPgno = iLeafPgno-1;
10633 fts5SegIterNextPage(p, pIter);
10634 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
10635
10636 if( p->rc==SQLITE_OK ){
10637 int iOff;
10638 u8 *a = pIter->pLeaf->p;
10639 int n = pIter->pLeaf->szLeaf;
10640
10641 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
10642 if( iOff<4 || iOff>=n ){
10643 p->rc = FTS5_CORRUPT;
10644 }else{
10645 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
10646 pIter->iLeafOffset = iOff;
10647 fts5SegIterLoadNPos(p, pIter);
10648 }
10649 }
10650 }
10651 }
10652
10653 /*
10654 ** Advance the iterator passed as the second argument until it is at or
10655 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
10656 ** always advanced at least once.
10657 */
10658 static void fts5SegIterNextFrom(
10659 Fts5Index *p, /* FTS5 backend object */
10660 Fts5SegIter *pIter, /* Iterator to advance */
10661 i64 iMatch /* Advance iterator at least this far */
10662 ){
10663 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
10664 Fts5DlidxIter *pDlidx = pIter->pDlidx;
10665 int iLeafPgno = pIter->iLeafPgno;
10666 int bMove = 1;
10667
10668 assert( pIter->flags & FTS5_SEGITER_ONETERM );
10669 assert( pIter->pDlidx );
10670 assert( pIter->pLeaf );
10671
10672 if( bRev==0 ){
10673 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
10674 iLeafPgno = fts5DlidxIterPgno(pDlidx);
10675 fts5DlidxIterNext(p, pDlidx);
10676 }
10677 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
10678 if( iLeafPgno>pIter->iLeafPgno ){
10679 fts5SegIterGotoPage(p, pIter, iLeafPgno);
10680 bMove = 0;
10681 }
10682 }else{
10683 assert( pIter->pNextLeaf==0 );
10684 assert( iMatch<pIter->iRowid );
10685 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
10686 fts5DlidxIterPrev(p, pDlidx);
10687 }
10688 iLeafPgno = fts5DlidxIterPgno(pDlidx);
10689
10690 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
10691
10692 if( iLeafPgno<pIter->iLeafPgno ){
10693 pIter->iLeafPgno = iLeafPgno+1;
10694 fts5SegIterReverseNewPage(p, pIter);
10695 bMove = 0;
10696 }
10697 }
10698
10699 do{
10700 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
10701 if( pIter->pLeaf==0 ) break;
10702 if( bRev==0 && pIter->iRowid>=iMatch ) break;
10703 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
10704 bMove = 1;
10705 }while( p->rc==SQLITE_OK );
10706 }
10707
10708
10709 /*
10710 ** Free the iterator object passed as the second argument.
10711 */
10712 static void fts5MultiIterFree(Fts5Iter *pIter){
10713 if( pIter ){
10714 int i;
10715 for(i=0; i<pIter->nSeg; i++){
10716 fts5SegIterClear(&pIter->aSeg[i]);
10717 }
10718 fts5StructureRelease(pIter->pStruct);
10719 fts5BufferFree(&pIter->poslist);
10720 sqlite3_free(pIter);
10721 }
10722 }
10723
10724 static void fts5MultiIterAdvanced(
10725 Fts5Index *p, /* FTS5 backend to iterate within */
10726 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
10727 int iChanged, /* Index of sub-iterator just advanced */
10728 int iMinset /* Minimum entry in aFirst[] to set */
10729 ){
10730 int i;
10731 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
10732 int iEq;
10733 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
10734 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
10735 assert( p->rc==SQLITE_OK );
10736 pSeg->xNext(p, pSeg, 0);
10737 i = pIter->nSeg + iEq;
10738 }
10739 }
10740 }
10741
10742 /*
10743 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
10744 ** points to the same term though - just a different rowid. This function
10745 ** attempts to update the contents of the pIter->aFirst[] accordingly.
10746 ** If it does so successfully, 0 is returned. Otherwise 1.
10747 **
10748 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
10749 ** on the iterator instead. That function does the same as this one, except
10750 ** that it deals with more complicated cases as well.
10751 */
10752 static int fts5MultiIterAdvanceRowid(
10753 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
10754 int iChanged, /* Index of sub-iterator just advanced */
10755 Fts5SegIter **ppFirst
10756 ){
10757 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
10758
10759 if( pNew->iRowid==pIter->iSwitchRowid
10760 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
10761 ){
10762 int i;
10763 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
10764 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
10765 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
10766 Fts5CResult *pRes = &pIter->aFirst[i];
10767
10768 assert( pNew->pLeaf );
10769 assert( pRes->bTermEq==0 || pOther->pLeaf );
10770
10771 if( pRes->bTermEq ){
10772 if( pNew->iRowid==pOther->iRowid ){
10773 return 1;
10774 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
10775 pIter->iSwitchRowid = pOther->iRowid;
10776 pNew = pOther;
10777 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
10778 pIter->iSwitchRowid = pOther->iRowid;
10779 }
10780 }
10781 pRes->iFirst = (u16)(pNew - pIter->aSeg);
10782 if( i==1 ) break;
10783
10784 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
10785 }
10786 }
10787
10788 *ppFirst = pNew;
10789 return 0;
10790 }
10791
10792 /*
10793 ** Set the pIter->bEof variable based on the state of the sub-iterators.
10794 */
10795 static void fts5MultiIterSetEof(Fts5Iter *pIter){
10796 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
10797 pIter->base.bEof = pSeg->pLeaf==0;
10798 pIter->iSwitchRowid = pSeg->iRowid;
10799 }
10800
10801 /*
10802 ** Move the iterator to the next entry.
10803 **
10804 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
10805 ** considered an error if the iterator reaches EOF, or if it is already at
10806 ** EOF when this function is called.
10807 */
10808 static void fts5MultiIterNext(
10809 Fts5Index *p,
10810 Fts5Iter *pIter,
10811 int bFrom, /* True if argument iFrom is valid */
10812 i64 iFrom /* Advance at least as far as this */
10813 ){
10814 int bUseFrom = bFrom;
10815 assert( pIter->base.bEof==0 );
10816 while( p->rc==SQLITE_OK ){
10817 int iFirst = pIter->aFirst[1].iFirst;
10818 int bNewTerm = 0;
10819 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
10820 assert( p->rc==SQLITE_OK );
10821 if( bUseFrom && pSeg->pDlidx ){
10822 fts5SegIterNextFrom(p, pSeg, iFrom);
10823 }else{
10824 pSeg->xNext(p, pSeg, &bNewTerm);
10825 }
10826
10827 if( pSeg->pLeaf==0 || bNewTerm
10828 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
10829 ){
10830 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
10831 fts5MultiIterSetEof(pIter);
10832 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
10833 if( pSeg->pLeaf==0 ) return;
10834 }
10835
10836 fts5AssertMultiIterSetup(p, pIter);
10837 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
10838 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
10839 pIter->xSetOutputs(pIter, pSeg);
10840 return;
10841 }
10842 bUseFrom = 0;
10843 }
10844 }
10845
10846 static void fts5MultiIterNext2(
10847 Fts5Index *p,
10848 Fts5Iter *pIter,
10849 int *pbNewTerm /* OUT: True if *might* be new term */
10850 ){
10851 assert( pIter->bSkipEmpty );
10852 if( p->rc==SQLITE_OK ){
10853 do {
10854 int iFirst = pIter->aFirst[1].iFirst;
10855 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
10856 int bNewTerm = 0;
10857
10858 assert( p->rc==SQLITE_OK );
10859 pSeg->xNext(p, pSeg, &bNewTerm);
10860 if( pSeg->pLeaf==0 || bNewTerm
10861 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
10862 ){
10863 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
10864 fts5MultiIterSetEof(pIter);
10865 *pbNewTerm = 1;
10866 }else{
10867 *pbNewTerm = 0;
10868 }
10869 fts5AssertMultiIterSetup(p, pIter);
10870
10871 }while( fts5MultiIterIsEmpty(p, pIter) );
10872 }
10873 }
10874
10875 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
10876 UNUSED_PARAM2(pUnused1, pUnused2);
10877 }
10878
10879 static Fts5Iter *fts5MultiIterAlloc(
10880 Fts5Index *p, /* FTS5 backend to iterate within */
10881 int nSeg
10882 ){
10883 Fts5Iter *pNew;
10884 int nSlot; /* Power of two >= nSeg */
10885
10886 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
10887 pNew = fts5IdxMalloc(p,
10888 sizeof(Fts5Iter) + /* pNew */
10889 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
10890 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
10891 );
10892 if( pNew ){
10893 pNew->nSeg = nSlot;
10894 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
10895 pNew->pIndex = p;
10896 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
10897 }
10898 return pNew;
10899 }
10900
10901 static void fts5PoslistCallback(
10902 Fts5Index *pUnused,
10903 void *pContext,
10904 const u8 *pChunk, int nChunk
10905 ){
10906 UNUSED_PARAM(pUnused);
10907 assert_nc( nChunk>=0 );
10908 if( nChunk>0 ){
10909 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
10910 }
10911 }
10912
10913 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
10914 struct PoslistCallbackCtx {
10915 Fts5Buffer *pBuf; /* Append to this buffer */
10916 Fts5Colset *pColset; /* Restrict matches to this column */
10917 int eState; /* See above */
10918 };
10919
10920 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
10921 struct PoslistOffsetsCtx {
10922 Fts5Buffer *pBuf; /* Append to this buffer */
10923 Fts5Colset *pColset; /* Restrict matches to this column */
10924 int iRead;
10925 int iWrite;
10926 };
10927
10928 /*
10929 ** TODO: Make this more efficient!
10930 */
10931 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
10932 int i;
10933 for(i=0; i<pColset->nCol; i++){
10934 if( pColset->aiCol[i]==iCol ) return 1;
10935 }
10936 return 0;
10937 }
10938
10939 static void fts5PoslistOffsetsCallback(
10940 Fts5Index *pUnused,
10941 void *pContext,
10942 const u8 *pChunk, int nChunk
10943 ){
10944 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
10945 UNUSED_PARAM(pUnused);
10946 assert_nc( nChunk>=0 );
10947 if( nChunk>0 ){
10948 int i = 0;
10949 while( i<nChunk ){
10950 int iVal;
10951 i += fts5GetVarint32(&pChunk[i], iVal);
10952 iVal += pCtx->iRead - 2;
10953 pCtx->iRead = iVal;
10954 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
10955 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
10956 pCtx->iWrite = iVal;
10957 }
10958 }
10959 }
10960 }
10961
10962 static void fts5PoslistFilterCallback(
10963 Fts5Index *pUnused,
10964 void *pContext,
10965 const u8 *pChunk, int nChunk
10966 ){
10967 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
10968 UNUSED_PARAM(pUnused);
10969 assert_nc( nChunk>=0 );
10970 if( nChunk>0 ){
10971 /* Search through to find the first varint with value 1. This is the
10972 ** start of the next columns hits. */
10973 int i = 0;
10974 int iStart = 0;
10975
10976 if( pCtx->eState==2 ){
10977 int iCol;
10978 fts5FastGetVarint32(pChunk, i, iCol);
10979 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
10980 pCtx->eState = 1;
10981 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
10982 }else{
10983 pCtx->eState = 0;
10984 }
10985 }
10986
10987 do {
10988 while( i<nChunk && pChunk[i]!=0x01 ){
10989 while( pChunk[i] & 0x80 ) i++;
10990 i++;
10991 }
10992 if( pCtx->eState ){
10993 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
10994 }
10995 if( i<nChunk ){
10996 int iCol;
10997 iStart = i;
10998 i++;
10999 if( i>=nChunk ){
11000 pCtx->eState = 2;
11001 }else{
11002 fts5FastGetVarint32(pChunk, i, iCol);
11003 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
11004 if( pCtx->eState ){
11005 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
11006 iStart = i;
11007 }
11008 }
11009 }
11010 }while( i<nChunk );
11011 }
11012 }
11013
11014 static void fts5ChunkIterate(
11015 Fts5Index *p, /* Index object */
11016 Fts5SegIter *pSeg, /* Poslist of this iterator */
11017 void *pCtx, /* Context pointer for xChunk callback */
11018 void (*xChunk)(Fts5Index*, void*, const u8*, int)
11019 ){
11020 int nRem = pSeg->nPos; /* Number of bytes still to come */
11021 Fts5Data *pData = 0;
11022 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
11023 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
11024 int pgno = pSeg->iLeafPgno;
11025 int pgnoSave = 0;
11026
11027 /* This function does notmwork with detail=none databases. */
11028 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
11029
11030 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
11031 pgnoSave = pgno+1;
11032 }
11033
11034 while( 1 ){
11035 xChunk(p, pCtx, pChunk, nChunk);
11036 nRem -= nChunk;
11037 fts5DataRelease(pData);
11038 if( nRem<=0 ){
11039 break;
11040 }else{
11041 pgno++;
11042 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
11043 if( pData==0 ) break;
11044 pChunk = &pData->p[4];
11045 nChunk = MIN(nRem, pData->szLeaf - 4);
11046 if( pgno==pgnoSave ){
11047 assert( pSeg->pNextLeaf==0 );
11048 pSeg->pNextLeaf = pData;
11049 pData = 0;
11050 }
11051 }
11052 }
11053 }
11054
11055 /*
11056 ** Iterator pIter currently points to a valid entry (not EOF). This
11057 ** function appends the position list data for the current entry to
11058 ** buffer pBuf. It does not make a copy of the position-list size
11059 ** field.
11060 */
11061 static void fts5SegiterPoslist(
11062 Fts5Index *p,
11063 Fts5SegIter *pSeg,
11064 Fts5Colset *pColset,
11065 Fts5Buffer *pBuf
11066 ){
11067 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
11068 if( pColset==0 ){
11069 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
11070 }else{
11071 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
11072 PoslistCallbackCtx sCtx;
11073 sCtx.pBuf = pBuf;
11074 sCtx.pColset = pColset;
11075 sCtx.eState = fts5IndexColsetTest(pColset, 0);
11076 assert( sCtx.eState==0 || sCtx.eState==1 );
11077 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
11078 }else{
11079 PoslistOffsetsCtx sCtx;
11080 memset(&sCtx, 0, sizeof(sCtx));
11081 sCtx.pBuf = pBuf;
11082 sCtx.pColset = pColset;
11083 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
11084 }
11085 }
11086 }
11087 }
11088
11089 /*
11090 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
11091 ** the position list contains entries for column iCol, then (*pa) is set
11092 ** to point to the sub-position-list for that column and the number of
11093 ** bytes in it returned. Or, if the argument position list does not
11094 ** contain any entries for column iCol, return 0.
11095 */
11096 static int fts5IndexExtractCol(
11097 const u8 **pa, /* IN/OUT: Pointer to poslist */
11098 int n, /* IN: Size of poslist in bytes */
11099 int iCol /* Column to extract from poslist */
11100 ){
11101 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
11102 const u8 *p = *pa;
11103 const u8 *pEnd = &p[n]; /* One byte past end of position list */
11104
11105 while( iCol>iCurrent ){
11106 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
11107 ** not part of a varint. Note that it is not possible for a negative
11108 ** or extremely large varint to occur within an uncorrupted position
11109 ** list. So the last byte of each varint may be assumed to have a clear
11110 ** 0x80 bit. */
11111 while( *p!=0x01 ){
11112 while( *p++ & 0x80 );
11113 if( p>=pEnd ) return 0;
11114 }
11115 *pa = p++;
11116 iCurrent = *p++;
11117 if( iCurrent & 0x80 ){
11118 p--;
11119 p += fts5GetVarint32(p, iCurrent);
11120 }
11121 }
11122 if( iCol!=iCurrent ) return 0;
11123
11124 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
11125 ** not part of a varint */
11126 while( p<pEnd && *p!=0x01 ){
11127 while( *p++ & 0x80 );
11128 }
11129
11130 return p - (*pa);
11131 }
11132
11133 static int fts5IndexExtractColset (
11134 Fts5Colset *pColset, /* Colset to filter on */
11135 const u8 *pPos, int nPos, /* Position list */
11136 Fts5Buffer *pBuf /* Output buffer */
11137 ){
11138 int rc = SQLITE_OK;
11139 int i;
11140
11141 fts5BufferZero(pBuf);
11142 for(i=0; i<pColset->nCol; i++){
11143 const u8 *pSub = pPos;
11144 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
11145 if( nSub ){
11146 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
11147 }
11148 }
11149 return rc;
11150 }
11151
11152 /*
11153 ** xSetOutputs callback used by detail=none tables.
11154 */
11155 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
11156 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
11157 pIter->base.iRowid = pSeg->iRowid;
11158 pIter->base.nData = pSeg->nPos;
11159 }
11160
11161 /*
11162 ** xSetOutputs callback used by detail=full and detail=col tables when no
11163 ** column filters are specified.
11164 */
11165 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
11166 pIter->base.iRowid = pSeg->iRowid;
11167 pIter->base.nData = pSeg->nPos;
11168
11169 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
11170 assert( pIter->pColset==0 );
11171
11172 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
11173 /* All data is stored on the current page. Populate the output
11174 ** variables to point into the body of the page object. */
11175 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
11176 }else{
11177 /* The data is distributed over two or more pages. Copy it into the
11178 ** Fts5Iter.poslist buffer and then set the output pointer to point
11179 ** to this buffer. */
11180 fts5BufferZero(&pIter->poslist);
11181 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
11182 pIter->base.pData = pIter->poslist.p;
11183 }
11184 }
11185
11186 /*
11187 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
11188 ** against no columns at all).
11189 */
11190 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
11191 UNUSED_PARAM(pSeg);
11192 pIter->base.nData = 0;
11193 }
11194
11195 /*
11196 ** xSetOutputs callback used by detail=col when there is a column filter
11197 ** and there are 100 or more columns. Also called as a fallback from
11198 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
11199 */
11200 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
11201 fts5BufferZero(&pIter->poslist);
11202 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
11203 pIter->base.iRowid = pSeg->iRowid;
11204 pIter->base.pData = pIter->poslist.p;
11205 pIter->base.nData = pIter->poslist.n;
11206 }
11207
11208 /*
11209 ** xSetOutputs callback used when:
11210 **
11211 ** * detail=col,
11212 ** * there is a column filter, and
11213 ** * the table contains 100 or fewer columns.
11214 **
11215 ** The last point is to ensure all column numbers are stored as
11216 ** single-byte varints.
11217 */
11218 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
11219
11220 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
11221 assert( pIter->pColset );
11222
11223 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
11224 fts5IterSetOutputs_Col(pIter, pSeg);
11225 }else{
11226 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
11227 u8 *pEnd = (u8*)&a[pSeg->nPos];
11228 int iPrev = 0;
11229 int *aiCol = pIter->pColset->aiCol;
11230 int *aiColEnd = &aiCol[pIter->pColset->nCol];
11231
11232 u8 *aOut = pIter->poslist.p;
11233 int iPrevOut = 0;
11234
11235 pIter->base.iRowid = pSeg->iRowid;
11236
11237 while( a<pEnd ){
11238 iPrev += (int)a++[0] - 2;
11239 while( *aiCol<iPrev ){
11240 aiCol++;
11241 if( aiCol==aiColEnd ) goto setoutputs_col_out;
11242 }
11243 if( *aiCol==iPrev ){
11244 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
11245 iPrevOut = iPrev;
11246 }
11247 }
11248
11249 setoutputs_col_out:
11250 pIter->base.pData = pIter->poslist.p;
11251 pIter->base.nData = aOut - pIter->poslist.p;
11252 }
11253 }
11254
11255 /*
11256 ** xSetOutputs callback used by detail=full when there is a column filter.
11257 */
11258 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
11259 Fts5Colset *pColset = pIter->pColset;
11260 pIter->base.iRowid = pSeg->iRowid;
11261
11262 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
11263 assert( pColset );
11264
11265 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
11266 /* All data is stored on the current page. Populate the output
11267 ** variables to point into the body of the page object. */
11268 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
11269 if( pColset->nCol==1 ){
11270 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
11271 pIter->base.pData = a;
11272 }else{
11273 fts5BufferZero(&pIter->poslist);
11274 fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
11275 pIter->base.pData = pIter->poslist.p;
11276 pIter->base.nData = pIter->poslist.n;
11277 }
11278 }else{
11279 /* The data is distributed over two or more pages. Copy it into the
11280 ** Fts5Iter.poslist buffer and then set the output pointer to point
11281 ** to this buffer. */
11282 fts5BufferZero(&pIter->poslist);
11283 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
11284 pIter->base.pData = pIter->poslist.p;
11285 pIter->base.nData = pIter->poslist.n;
11286 }
11287 }
11288
11289 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
11290 if( *pRc==SQLITE_OK ){
11291 Fts5Config *pConfig = pIter->pIndex->pConfig;
11292 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
11293 pIter->xSetOutputs = fts5IterSetOutputs_None;
11294 }
11295
11296 else if( pIter->pColset==0 ){
11297 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
11298 }
11299
11300 else if( pIter->pColset->nCol==0 ){
11301 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
11302 }
11303
11304 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
11305 pIter->xSetOutputs = fts5IterSetOutputs_Full;
11306 }
11307
11308 else{
11309 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
11310 if( pConfig->nCol<=100 ){
11311 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
11312 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
11313 }else{
11314 pIter->xSetOutputs = fts5IterSetOutputs_Col;
11315 }
11316 }
11317 }
11318 }
11319
11320
11321 /*
11322 ** Allocate a new Fts5Iter object.
11323 **
11324 ** The new object will be used to iterate through data in structure pStruct.
11325 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
11326 ** is zero or greater, data from the first nSegment segments on level iLevel
11327 ** is merged.
11328 **
11329 ** The iterator initially points to the first term/rowid entry in the
11330 ** iterated data.
11331 */
11332 static void fts5MultiIterNew(
11333 Fts5Index *p, /* FTS5 backend to iterate within */
11334 Fts5Structure *pStruct, /* Structure of specific index */
11335 int flags, /* FTS5INDEX_QUERY_XXX flags */
11336 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
11337 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
11338 int iLevel, /* Level to iterate (-1 for all) */
11339 int nSegment, /* Number of segments to merge (iLevel>=0) */
11340 Fts5Iter **ppOut /* New object */
11341 ){
11342 int nSeg = 0; /* Number of segment-iters in use */
11343 int iIter = 0; /* */
11344 int iSeg; /* Used to iterate through segments */
11345 Fts5StructureLevel *pLvl;
11346 Fts5Iter *pNew;
11347
11348 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
11349
11350 /* Allocate space for the new multi-seg-iterator. */
11351 if( p->rc==SQLITE_OK ){
11352 if( iLevel<0 ){
11353 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
11354 nSeg = pStruct->nSegment;
11355 nSeg += (p->pHash ? 1 : 0);
11356 }else{
11357 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
11358 }
11359 }
11360 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
11361 if( pNew==0 ) return;
11362 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
11363 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
11364 pNew->pStruct = pStruct;
11365 pNew->pColset = pColset;
11366 fts5StructureRef(pStruct);
11367 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
11368 fts5IterSetOutputCb(&p->rc, pNew);
11369 }
11370
11371 /* Initialize each of the component segment iterators. */
11372 if( p->rc==SQLITE_OK ){
11373 if( iLevel<0 ){
11374 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
11375 if( p->pHash ){
11376 /* Add a segment iterator for the current contents of the hash table. */
11377 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
11378 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
11379 }
11380 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
11381 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
11382 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
11383 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
11384 if( pTerm==0 ){
11385 fts5SegIterInit(p, pSeg, pIter);
11386 }else{
11387 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
11388 }
11389 }
11390 }
11391 }else{
11392 pLvl = &pStruct->aLevel[iLevel];
11393 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
11394 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
11395 }
11396 }
11397 assert( iIter==nSeg );
11398 }
11399
11400 /* If the above was successful, each component iterators now points
11401 ** to the first entry in its segment. In this case initialize the
11402 ** aFirst[] array. Or, if an error has occurred, free the iterator
11403 ** object and set the output variable to NULL. */
11404 if( p->rc==SQLITE_OK ){
11405 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
11406 int iEq;
11407 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
11408 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
11409 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
11410 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
11411 }
11412 }
11413 fts5MultiIterSetEof(pNew);
11414 fts5AssertMultiIterSetup(p, pNew);
11415
11416 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
11417 fts5MultiIterNext(p, pNew, 0, 0);
11418 }else if( pNew->base.bEof==0 ){
11419 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
11420 pNew->xSetOutputs(pNew, pSeg);
11421 }
11422
11423 }else{
11424 fts5MultiIterFree(pNew);
11425 *ppOut = 0;
11426 }
11427 }
11428
11429 /*
11430 ** Create an Fts5Iter that iterates through the doclist provided
11431 ** as the second argument.
11432 */
11433 static void fts5MultiIterNew2(
11434 Fts5Index *p, /* FTS5 backend to iterate within */
11435 Fts5Data *pData, /* Doclist to iterate through */
11436 int bDesc, /* True for descending rowid order */
11437 Fts5Iter **ppOut /* New object */
11438 ){
11439 Fts5Iter *pNew;
11440 pNew = fts5MultiIterAlloc(p, 2);
11441 if( pNew ){
11442 Fts5SegIter *pIter = &pNew->aSeg[1];
11443
11444 pIter->flags = FTS5_SEGITER_ONETERM;
11445 if( pData->szLeaf>0 ){
11446 pIter->pLeaf = pData;
11447 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
11448 pIter->iEndofDoclist = pData->nn;
11449 pNew->aFirst[1].iFirst = 1;
11450 if( bDesc ){
11451 pNew->bRev = 1;
11452 pIter->flags |= FTS5_SEGITER_REVERSE;
11453 fts5SegIterReverseInitPage(p, pIter);
11454 }else{
11455 fts5SegIterLoadNPos(p, pIter);
11456 }
11457 pData = 0;
11458 }else{
11459 pNew->base.bEof = 1;
11460 }
11461 fts5SegIterSetNext(p, pIter);
11462
11463 *ppOut = pNew;
11464 }
11465
11466 fts5DataRelease(pData);
11467 }
11468
11469 /*
11470 ** Return true if the iterator is at EOF or if an error has occurred.
11471 ** False otherwise.
11472 */
11473 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
11474 assert( p->rc
11475 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
11476 );
11477 return (p->rc || pIter->base.bEof);
11478 }
11479
11480 /*
11481 ** Return the rowid of the entry that the iterator currently points
11482 ** to. If the iterator points to EOF when this function is called the
11483 ** results are undefined.
11484 */
11485 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
11486 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
11487 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
11488 }
11489
11490 /*
11491 ** Move the iterator to the next entry at or following iMatch.
11492 */
11493 static void fts5MultiIterNextFrom(
11494 Fts5Index *p,
11495 Fts5Iter *pIter,
11496 i64 iMatch
11497 ){
11498 while( 1 ){
11499 i64 iRowid;
11500 fts5MultiIterNext(p, pIter, 1, iMatch);
11501 if( fts5MultiIterEof(p, pIter) ) break;
11502 iRowid = fts5MultiIterRowid(pIter);
11503 if( pIter->bRev==0 && iRowid>=iMatch ) break;
11504 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
11505 }
11506 }
11507
11508 /*
11509 ** Return a pointer to a buffer containing the term associated with the
11510 ** entry that the iterator currently points to.
11511 */
11512 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
11513 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
11514 *pn = p->term.n;
11515 return p->term.p;
11516 }
11517
11518 /*
11519 ** Allocate a new segment-id for the structure pStruct. The new segment
11520 ** id must be between 1 and 65335 inclusive, and must not be used by
11521 ** any currently existing segment. If a free segment id cannot be found,
11522 ** SQLITE_FULL is returned.
11523 **
11524 ** If an error has already occurred, this function is a no-op. 0 is
11525 ** returned in this case.
11526 */
11527 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
11528 int iSegid = 0;
11529
11530 if( p->rc==SQLITE_OK ){
11531 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
11532 p->rc = SQLITE_FULL;
11533 }else{
11534 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
11535 ** array is 63 elements, or 252 bytes, in size. */
11536 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
11537 int iLvl, iSeg;
11538 int i;
11539 u32 mask;
11540 memset(aUsed, 0, sizeof(aUsed));
11541 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
11542 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
11543 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
11544 if( iId<=FTS5_MAX_SEGMENT ){
11545 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
11546 }
11547 }
11548 }
11549
11550 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
11551 mask = aUsed[i];
11552 for(iSegid=0; mask & (1 << iSegid); iSegid++);
11553 iSegid += 1 + i*32;
11554
11555 #ifdef SQLITE_DEBUG
11556 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
11557 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
11558 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
11559 }
11560 }
11561 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
11562
11563 {
11564 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
11565 if( p->rc==SQLITE_OK ){
11566 u8 aBlob[2] = {0xff, 0xff};
11567 sqlite3_bind_int(pIdxSelect, 1, iSegid);
11568 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
11569 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
11570 p->rc = sqlite3_reset(pIdxSelect);
11571 }
11572 }
11573 #endif
11574 }
11575 }
11576
11577 return iSegid;
11578 }
11579
11580 /*
11581 ** Discard all data currently cached in the hash-tables.
11582 */
11583 static void fts5IndexDiscardData(Fts5Index *p){
11584 assert( p->pHash || p->nPendingData==0 );
11585 if( p->pHash ){
11586 sqlite3Fts5HashClear(p->pHash);
11587 p->nPendingData = 0;
11588 }
11589 }
11590
11591 /*
11592 ** Return the size of the prefix, in bytes, that buffer
11593 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
11594 **
11595 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
11596 ** than buffer (pOld/nOld).
11597 */
11598 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
11599 int i;
11600 for(i=0; i<nOld; i++){
11601 if( pOld[i]!=pNew[i] ) break;
11602 }
11603 return i;
11604 }
11605
11606 static void fts5WriteDlidxClear(
11607 Fts5Index *p,
11608 Fts5SegWriter *pWriter,
11609 int bFlush /* If true, write dlidx to disk */
11610 ){
11611 int i;
11612 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
11613 for(i=0; i<pWriter->nDlidx; i++){
11614 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
11615 if( pDlidx->buf.n==0 ) break;
11616 if( bFlush ){
11617 assert( pDlidx->pgno!=0 );
11618 fts5DataWrite(p,
11619 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
11620 pDlidx->buf.p, pDlidx->buf.n
11621 );
11622 }
11623 sqlite3Fts5BufferZero(&pDlidx->buf);
11624 pDlidx->bPrevValid = 0;
11625 }
11626 }
11627
11628 /*
11629 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
11630 ** Any new array elements are zeroed before returning.
11631 */
11632 static int fts5WriteDlidxGrow(
11633 Fts5Index *p,
11634 Fts5SegWriter *pWriter,
11635 int nLvl
11636 ){
11637 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
11638 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
11639 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
11640 );
11641 if( aDlidx==0 ){
11642 p->rc = SQLITE_NOMEM;
11643 }else{
11644 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
11645 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
11646 pWriter->aDlidx = aDlidx;
11647 pWriter->nDlidx = nLvl;
11648 }
11649 }
11650 return p->rc;
11651 }
11652
11653 /*
11654 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
11655 ** enough, flush it to disk and return 1. Otherwise discard it and return
11656 ** zero.
11657 */
11658 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
11659 int bFlag = 0;
11660
11661 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
11662 ** to the database, also write the doclist-index to disk. */
11663 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
11664 bFlag = 1;
11665 }
11666 fts5WriteDlidxClear(p, pWriter, bFlag);
11667 pWriter->nEmpty = 0;
11668 return bFlag;
11669 }
11670
11671 /*
11672 ** This function is called whenever processing of the doclist for the
11673 ** last term on leaf page (pWriter->iBtPage) is completed.
11674 **
11675 ** The doclist-index for that term is currently stored in-memory within the
11676 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
11677 ** writes it out to disk. Or, if it is too small to bother with, discards
11678 ** it.
11679 **
11680 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
11681 */
11682 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
11683 int bFlag;
11684
11685 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
11686 if( pWriter->iBtPage==0 ) return;
11687 bFlag = fts5WriteFlushDlidx(p, pWriter);
11688
11689 if( p->rc==SQLITE_OK ){
11690 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
11691 /* The following was already done in fts5WriteInit(): */
11692 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
11693 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
11694 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
11695 sqlite3_step(p->pIdxWriter);
11696 p->rc = sqlite3_reset(p->pIdxWriter);
11697 }
11698 pWriter->iBtPage = 0;
11699 }
11700
11701 /*
11702 ** This is called once for each leaf page except the first that contains
11703 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
11704 ** is larger than all terms written to earlier leaves, and equal to or
11705 ** smaller than the first term on the new leaf.
11706 **
11707 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
11708 ** has already occurred when this function is called, it is a no-op.
11709 */
11710 static void fts5WriteBtreeTerm(
11711 Fts5Index *p, /* FTS5 backend object */
11712 Fts5SegWriter *pWriter, /* Writer object */
11713 int nTerm, const u8 *pTerm /* First term on new page */
11714 ){
11715 fts5WriteFlushBtree(p, pWriter);
11716 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
11717 pWriter->iBtPage = pWriter->writer.pgno;
11718 }
11719
11720 /*
11721 ** This function is called when flushing a leaf page that contains no
11722 ** terms at all to disk.
11723 */
11724 static void fts5WriteBtreeNoTerm(
11725 Fts5Index *p, /* FTS5 backend object */
11726 Fts5SegWriter *pWriter /* Writer object */
11727 ){
11728 /* If there were no rowids on the leaf page either and the doclist-index
11729 ** has already been started, append an 0x00 byte to it. */
11730 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
11731 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
11732 assert( pDlidx->bPrevValid );
11733 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
11734 }
11735
11736 /* Increment the "number of sequential leaves without a term" counter. */
11737 pWriter->nEmpty++;
11738 }
11739
11740 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
11741 i64 iRowid;
11742 int iOff;
11743
11744 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
11745 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
11746 return iRowid;
11747 }
11748
11749 /*
11750 ** Rowid iRowid has just been appended to the current leaf page. It is the
11751 ** first on the page. This function appends an appropriate entry to the current
11752 ** doclist-index.
11753 */
11754 static void fts5WriteDlidxAppend(
11755 Fts5Index *p,
11756 Fts5SegWriter *pWriter,
11757 i64 iRowid
11758 ){
11759 int i;
11760 int bDone = 0;
11761
11762 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
11763 i64 iVal;
11764 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
11765
11766 if( pDlidx->buf.n>=p->pConfig->pgsz ){
11767 /* The current doclist-index page is full. Write it to disk and push
11768 ** a copy of iRowid (which will become the first rowid on the next
11769 ** doclist-index leaf page) up into the next level of the b-tree
11770 ** hierarchy. If the node being flushed is currently the root node,
11771 ** also push its first rowid upwards. */
11772 pDlidx->buf.p[0] = 0x01; /* Not the root node */
11773 fts5DataWrite(p,
11774 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
11775 pDlidx->buf.p, pDlidx->buf.n
11776 );
11777 fts5WriteDlidxGrow(p, pWriter, i+2);
11778 pDlidx = &pWriter->aDlidx[i];
11779 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
11780 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
11781
11782 /* This was the root node. Push its first rowid up to the new root. */
11783 pDlidx[1].pgno = pDlidx->pgno;
11784 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
11785 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
11786 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
11787 pDlidx[1].bPrevValid = 1;
11788 pDlidx[1].iPrev = iFirst;
11789 }
11790
11791 sqlite3Fts5BufferZero(&pDlidx->buf);
11792 pDlidx->bPrevValid = 0;
11793 pDlidx->pgno++;
11794 }else{
11795 bDone = 1;
11796 }
11797
11798 if( pDlidx->bPrevValid ){
11799 iVal = iRowid - pDlidx->iPrev;
11800 }else{
11801 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
11802 assert( pDlidx->buf.n==0 );
11803 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
11804 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
11805 iVal = iRowid;
11806 }
11807
11808 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
11809 pDlidx->bPrevValid = 1;
11810 pDlidx->iPrev = iRowid;
11811 }
11812 }
11813
11814 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
11815 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
11816 Fts5PageWriter *pPage = &pWriter->writer;
11817 i64 iRowid;
11818
11819 static int nCall = 0;
11820 nCall++;
11821
11822 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
11823
11824 /* Set the szLeaf header field. */
11825 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
11826 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
11827
11828 if( pWriter->bFirstTermInPage ){
11829 /* No term was written to this page. */
11830 assert( pPage->pgidx.n==0 );
11831 fts5WriteBtreeNoTerm(p, pWriter);
11832 }else{
11833 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
11834 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
11835 }
11836
11837 /* Write the page out to disk */
11838 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
11839 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
11840
11841 /* Initialize the next page. */
11842 fts5BufferZero(&pPage->buf);
11843 fts5BufferZero(&pPage->pgidx);
11844 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
11845 pPage->iPrevPgidx = 0;
11846 pPage->pgno++;
11847
11848 /* Increase the leaves written counter */
11849 pWriter->nLeafWritten++;
11850
11851 /* The new leaf holds no terms or rowids */
11852 pWriter->bFirstTermInPage = 1;
11853 pWriter->bFirstRowidInPage = 1;
11854 }
11855
11856 /*
11857 ** Append term pTerm/nTerm to the segment being written by the writer passed
11858 ** as the second argument.
11859 **
11860 ** If an error occurs, set the Fts5Index.rc error code. If an error has
11861 ** already occurred, this function is a no-op.
11862 */
11863 static void fts5WriteAppendTerm(
11864 Fts5Index *p,
11865 Fts5SegWriter *pWriter,
11866 int nTerm, const u8 *pTerm
11867 ){
11868 int nPrefix; /* Bytes of prefix compression for term */
11869 Fts5PageWriter *pPage = &pWriter->writer;
11870 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
11871
11872 assert( p->rc==SQLITE_OK );
11873 assert( pPage->buf.n>=4 );
11874 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
11875
11876 /* If the current leaf page is full, flush it to disk. */
11877 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
11878 if( pPage->buf.n>4 ){
11879 fts5WriteFlushLeaf(p, pWriter);
11880 }
11881 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
11882 }
11883
11884 /* TODO1: Updating pgidx here. */
11885 pPgidx->n += sqlite3Fts5PutVarint(
11886 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
11887 );
11888 pPage->iPrevPgidx = pPage->buf.n;
11889 #if 0
11890 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
11891 pPgidx->n += 2;
11892 #endif
11893
11894 if( pWriter->bFirstTermInPage ){
11895 nPrefix = 0;
11896 if( pPage->pgno!=1 ){
11897 /* This is the first term on a leaf that is not the leftmost leaf in
11898 ** the segment b-tree. In this case it is necessary to add a term to
11899 ** the b-tree hierarchy that is (a) larger than the largest term
11900 ** already written to the segment and (b) smaller than or equal to
11901 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
11902 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
11903 ** previous term.
11904 **
11905 ** Usually, the previous term is available in pPage->term. The exception
11906 ** is if this is the first term written in an incremental-merge step.
11907 ** In this case the previous term is not available, so just write a
11908 ** copy of (pTerm/nTerm) into the parent node. This is slightly
11909 ** inefficient, but still correct. */
11910 int n = nTerm;
11911 if( pPage->term.n ){
11912 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
11913 }
11914 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
11915 pPage = &pWriter->writer;
11916 }
11917 }else{
11918 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
11919 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
11920 }
11921
11922 /* Append the number of bytes of new data, then the term data itself
11923 ** to the page. */
11924 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
11925 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
11926
11927 /* Update the Fts5PageWriter.term field. */
11928 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
11929 pWriter->bFirstTermInPage = 0;
11930
11931 pWriter->bFirstRowidInPage = 0;
11932 pWriter->bFirstRowidInDoclist = 1;
11933
11934 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
11935 pWriter->aDlidx[0].pgno = pPage->pgno;
11936 }
11937
11938 /*
11939 ** Append a rowid and position-list size field to the writers output.
11940 */
11941 static void fts5WriteAppendRowid(
11942 Fts5Index *p,
11943 Fts5SegWriter *pWriter,
11944 i64 iRowid
11945 ){
11946 if( p->rc==SQLITE_OK ){
11947 Fts5PageWriter *pPage = &pWriter->writer;
11948
11949 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
11950 fts5WriteFlushLeaf(p, pWriter);
11951 }
11952
11953 /* If this is to be the first rowid written to the page, set the
11954 ** rowid-pointer in the page-header. Also append a value to the dlidx
11955 ** buffer, in case a doclist-index is required. */
11956 if( pWriter->bFirstRowidInPage ){
11957 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
11958 fts5WriteDlidxAppend(p, pWriter, iRowid);
11959 }
11960
11961 /* Write the rowid. */
11962 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
11963 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
11964 }else{
11965 assert( p->rc || iRowid>pWriter->iPrevRowid );
11966 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
11967 }
11968 pWriter->iPrevRowid = iRowid;
11969 pWriter->bFirstRowidInDoclist = 0;
11970 pWriter->bFirstRowidInPage = 0;
11971 }
11972 }
11973
11974 static void fts5WriteAppendPoslistData(
11975 Fts5Index *p,
11976 Fts5SegWriter *pWriter,
11977 const u8 *aData,
11978 int nData
11979 ){
11980 Fts5PageWriter *pPage = &pWriter->writer;
11981 const u8 *a = aData;
11982 int n = nData;
11983
11984 assert( p->pConfig->pgsz>0 );
11985 while( p->rc==SQLITE_OK
11986 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
11987 ){
11988 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
11989 int nCopy = 0;
11990 while( nCopy<nReq ){
11991 i64 dummy;
11992 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
11993 }
11994 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
11995 a += nCopy;
11996 n -= nCopy;
11997 fts5WriteFlushLeaf(p, pWriter);
11998 }
11999 if( n>0 ){
12000 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
12001 }
12002 }
12003
12004 /*
12005 ** Flush any data cached by the writer object to the database. Free any
12006 ** allocations associated with the writer.
12007 */
12008 static void fts5WriteFinish(
12009 Fts5Index *p,
12010 Fts5SegWriter *pWriter, /* Writer object */
12011 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
12012 ){
12013 int i;
12014 Fts5PageWriter *pLeaf = &pWriter->writer;
12015 if( p->rc==SQLITE_OK ){
12016 assert( pLeaf->pgno>=1 );
12017 if( pLeaf->buf.n>4 ){
12018 fts5WriteFlushLeaf(p, pWriter);
12019 }
12020 *pnLeaf = pLeaf->pgno-1;
12021 if( pLeaf->pgno>1 ){
12022 fts5WriteFlushBtree(p, pWriter);
12023 }
12024 }
12025 fts5BufferFree(&pLeaf->term);
12026 fts5BufferFree(&pLeaf->buf);
12027 fts5BufferFree(&pLeaf->pgidx);
12028 fts5BufferFree(&pWriter->btterm);
12029
12030 for(i=0; i<pWriter->nDlidx; i++){
12031 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
12032 }
12033 sqlite3_free(pWriter->aDlidx);
12034 }
12035
12036 static void fts5WriteInit(
12037 Fts5Index *p,
12038 Fts5SegWriter *pWriter,
12039 int iSegid
12040 ){
12041 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
12042
12043 memset(pWriter, 0, sizeof(Fts5SegWriter));
12044 pWriter->iSegid = iSegid;
12045
12046 fts5WriteDlidxGrow(p, pWriter, 1);
12047 pWriter->writer.pgno = 1;
12048 pWriter->bFirstTermInPage = 1;
12049 pWriter->iBtPage = 1;
12050
12051 assert( pWriter->writer.buf.n==0 );
12052 assert( pWriter->writer.pgidx.n==0 );
12053
12054 /* Grow the two buffers to pgsz + padding bytes in size. */
12055 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
12056 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
12057
12058 if( p->pIdxWriter==0 ){
12059 Fts5Config *pConfig = p->pConfig;
12060 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
12061 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
12062 pConfig->zDb, pConfig->zName
12063 ));
12064 }
12065
12066 if( p->rc==SQLITE_OK ){
12067 /* Initialize the 4-byte leaf-page header to 0x00. */
12068 memset(pWriter->writer.buf.p, 0, 4);
12069 pWriter->writer.buf.n = 4;
12070
12071 /* Bind the current output segment id to the index-writer. This is an
12072 ** optimization over binding the same value over and over as rows are
12073 ** inserted into %_idx by the current writer. */
12074 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
12075 }
12076 }
12077
12078 /*
12079 ** Iterator pIter was used to iterate through the input segments of on an
12080 ** incremental merge operation. This function is called if the incremental
12081 ** merge step has finished but the input has not been completely exhausted.
12082 */
12083 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
12084 int i;
12085 Fts5Buffer buf;
12086 memset(&buf, 0, sizeof(Fts5Buffer));
12087 for(i=0; i<pIter->nSeg; i++){
12088 Fts5SegIter *pSeg = &pIter->aSeg[i];
12089 if( pSeg->pSeg==0 ){
12090 /* no-op */
12091 }else if( pSeg->pLeaf==0 ){
12092 /* All keys from this input segment have been transfered to the output.
12093 ** Set both the first and last page-numbers to 0 to indicate that the
12094 ** segment is now empty. */
12095 pSeg->pSeg->pgnoLast = 0;
12096 pSeg->pSeg->pgnoFirst = 0;
12097 }else{
12098 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
12099 i64 iLeafRowid;
12100 Fts5Data *pData;
12101 int iId = pSeg->pSeg->iSegid;
12102 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
12103
12104 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
12105 pData = fts5DataRead(p, iLeafRowid);
12106 if( pData ){
12107 fts5BufferZero(&buf);
12108 fts5BufferGrow(&p->rc, &buf, pData->nn);
12109 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
12110 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
12111 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
12112 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
12113 if( p->rc==SQLITE_OK ){
12114 /* Set the szLeaf field */
12115 fts5PutU16(&buf.p[2], (u16)buf.n);
12116 }
12117
12118 /* Set up the new page-index array */
12119 fts5BufferAppendVarint(&p->rc, &buf, 4);
12120 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
12121 && pSeg->iEndofDoclist<pData->szLeaf
12122 ){
12123 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
12124 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
12125 fts5BufferAppendBlob(&p->rc, &buf,
12126 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
12127 );
12128 }
12129
12130 fts5DataRelease(pData);
12131 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
12132 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
12133 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
12134 }
12135 }
12136 }
12137 fts5BufferFree(&buf);
12138 }
12139
12140 static void fts5MergeChunkCallback(
12141 Fts5Index *p,
12142 void *pCtx,
12143 const u8 *pChunk, int nChunk
12144 ){
12145 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
12146 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
12147 }
12148
12149 /*
12150 **
12151 */
12152 static void fts5IndexMergeLevel(
12153 Fts5Index *p, /* FTS5 backend object */
12154 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
12155 int iLvl, /* Level to read input from */
12156 int *pnRem /* Write up to this many output leaves */
12157 ){
12158 Fts5Structure *pStruct = *ppStruct;
12159 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
12160 Fts5StructureLevel *pLvlOut;
12161 Fts5Iter *pIter = 0; /* Iterator to read input data */
12162 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
12163 int nInput; /* Number of input segments */
12164 Fts5SegWriter writer; /* Writer object */
12165 Fts5StructureSegment *pSeg; /* Output segment */
12166 Fts5Buffer term;
12167 int bOldest; /* True if the output segment is the oldest */
12168 int eDetail = p->pConfig->eDetail;
12169 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
12170
12171 assert( iLvl<pStruct->nLevel );
12172 assert( pLvl->nMerge<=pLvl->nSeg );
12173
12174 memset(&writer, 0, sizeof(Fts5SegWriter));
12175 memset(&term, 0, sizeof(Fts5Buffer));
12176 if( pLvl->nMerge ){
12177 pLvlOut = &pStruct->aLevel[iLvl+1];
12178 assert( pLvlOut->nSeg>0 );
12179 nInput = pLvl->nMerge;
12180 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
12181
12182 fts5WriteInit(p, &writer, pSeg->iSegid);
12183 writer.writer.pgno = pSeg->pgnoLast+1;
12184 writer.iBtPage = 0;
12185 }else{
12186 int iSegid = fts5AllocateSegid(p, pStruct);
12187
12188 /* Extend the Fts5Structure object as required to ensure the output
12189 ** segment exists. */
12190 if( iLvl==pStruct->nLevel-1 ){
12191 fts5StructureAddLevel(&p->rc, ppStruct);
12192 pStruct = *ppStruct;
12193 }
12194 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
12195 if( p->rc ) return;
12196 pLvl = &pStruct->aLevel[iLvl];
12197 pLvlOut = &pStruct->aLevel[iLvl+1];
12198
12199 fts5WriteInit(p, &writer, iSegid);
12200
12201 /* Add the new segment to the output level */
12202 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
12203 pLvlOut->nSeg++;
12204 pSeg->pgnoFirst = 1;
12205 pSeg->iSegid = iSegid;
12206 pStruct->nSegment++;
12207
12208 /* Read input from all segments in the input level */
12209 nInput = pLvl->nSeg;
12210 }
12211 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
12212
12213 assert( iLvl>=0 );
12214 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
12215 fts5MultiIterEof(p, pIter)==0;
12216 fts5MultiIterNext(p, pIter, 0, 0)
12217 ){
12218 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
12219 int nPos; /* position-list size field value */
12220 int nTerm;
12221 const u8 *pTerm;
12222
12223 /* Check for key annihilation. */
12224 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
12225
12226 pTerm = fts5MultiIterTerm(pIter, &nTerm);
12227 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
12228 if( pnRem && writer.nLeafWritten>nRem ){
12229 break;
12230 }
12231
12232 /* This is a new term. Append a term to the output segment. */
12233 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
12234 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
12235 }
12236
12237 /* Append the rowid to the output */
12238 /* WRITEPOSLISTSIZE */
12239 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
12240
12241 if( eDetail==FTS5_DETAIL_NONE ){
12242 if( pSegIter->bDel ){
12243 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
12244 if( pSegIter->nPos>0 ){
12245 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
12246 }
12247 }
12248 }else{
12249 /* Append the position-list data to the output */
12250 nPos = pSegIter->nPos*2 + pSegIter->bDel;
12251 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
12252 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
12253 }
12254 }
12255
12256 /* Flush the last leaf page to disk. Set the output segment b-tree height
12257 ** and last leaf page number at the same time. */
12258 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
12259
12260 if( fts5MultiIterEof(p, pIter) ){
12261 int i;
12262
12263 /* Remove the redundant segments from the %_data table */
12264 for(i=0; i<nInput; i++){
12265 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
12266 }
12267
12268 /* Remove the redundant segments from the input level */
12269 if( pLvl->nSeg!=nInput ){
12270 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
12271 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
12272 }
12273 pStruct->nSegment -= nInput;
12274 pLvl->nSeg -= nInput;
12275 pLvl->nMerge = 0;
12276 if( pSeg->pgnoLast==0 ){
12277 pLvlOut->nSeg--;
12278 pStruct->nSegment--;
12279 }
12280 }else{
12281 assert( pSeg->pgnoLast>0 );
12282 fts5TrimSegments(p, pIter);
12283 pLvl->nMerge = nInput;
12284 }
12285
12286 fts5MultiIterFree(pIter);
12287 fts5BufferFree(&term);
12288 if( pnRem ) *pnRem -= writer.nLeafWritten;
12289 }
12290
12291 /*
12292 ** Do up to nPg pages of automerge work on the index.
12293 **
12294 ** Return true if any changes were actually made, or false otherwise.
12295 */
12296 static int fts5IndexMerge(
12297 Fts5Index *p, /* FTS5 backend object */
12298 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
12299 int nPg, /* Pages of work to do */
12300 int nMin /* Minimum number of segments to merge */
12301 ){
12302 int nRem = nPg;
12303 int bRet = 0;
12304 Fts5Structure *pStruct = *ppStruct;
12305 while( nRem>0 && p->rc==SQLITE_OK ){
12306 int iLvl; /* To iterate through levels */
12307 int iBestLvl = 0; /* Level offering the most input segments */
12308 int nBest = 0; /* Number of input segments on best level */
12309
12310 /* Set iBestLvl to the level to read input segments from. */
12311 assert( pStruct->nLevel>0 );
12312 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
12313 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
12314 if( pLvl->nMerge ){
12315 if( pLvl->nMerge>nBest ){
12316 iBestLvl = iLvl;
12317 nBest = pLvl->nMerge;
12318 }
12319 break;
12320 }
12321 if( pLvl->nSeg>nBest ){
12322 nBest = pLvl->nSeg;
12323 iBestLvl = iLvl;
12324 }
12325 }
12326
12327 /* If nBest is still 0, then the index must be empty. */
12328 #ifdef SQLITE_DEBUG
12329 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
12330 assert( pStruct->aLevel[iLvl].nSeg==0 );
12331 }
12332 #endif
12333
12334 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
12335 break;
12336 }
12337 bRet = 1;
12338 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
12339 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
12340 fts5StructurePromote(p, iBestLvl+1, pStruct);
12341 }
12342 }
12343 *ppStruct = pStruct;
12344 return bRet;
12345 }
12346
12347 /*
12348 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
12349 ** segment. This function updates the write-counter accordingly and, if
12350 ** necessary, performs incremental merge work.
12351 **
12352 ** If an error occurs, set the Fts5Index.rc error code. If an error has
12353 ** already occurred, this function is a no-op.
12354 */
12355 static void fts5IndexAutomerge(
12356 Fts5Index *p, /* FTS5 backend object */
12357 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
12358 int nLeaf /* Number of output leaves just written */
12359 ){
12360 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
12361 Fts5Structure *pStruct = *ppStruct;
12362 u64 nWrite; /* Initial value of write-counter */
12363 int nWork; /* Number of work-quanta to perform */
12364 int nRem; /* Number of leaf pages left to write */
12365
12366 /* Update the write-counter. While doing so, set nWork. */
12367 nWrite = pStruct->nWriteCounter;
12368 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
12369 pStruct->nWriteCounter += nLeaf;
12370 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
12371
12372 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
12373 }
12374 }
12375
12376 static void fts5IndexCrisismerge(
12377 Fts5Index *p, /* FTS5 backend object */
12378 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
12379 ){
12380 const int nCrisis = p->pConfig->nCrisisMerge;
12381 Fts5Structure *pStruct = *ppStruct;
12382 int iLvl = 0;
12383
12384 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
12385 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
12386 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
12387 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
12388 fts5StructurePromote(p, iLvl+1, pStruct);
12389 iLvl++;
12390 }
12391 *ppStruct = pStruct;
12392 }
12393
12394 static int fts5IndexReturn(Fts5Index *p){
12395 int rc = p->rc;
12396 p->rc = SQLITE_OK;
12397 return rc;
12398 }
12399
12400 typedef struct Fts5FlushCtx Fts5FlushCtx;
12401 struct Fts5FlushCtx {
12402 Fts5Index *pIdx;
12403 Fts5SegWriter writer;
12404 };
12405
12406 /*
12407 ** Buffer aBuf[] contains a list of varints, all small enough to fit
12408 ** in a 32-bit integer. Return the size of the largest prefix of this
12409 ** list nMax bytes or less in size.
12410 */
12411 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
12412 int ret;
12413 u32 dummy;
12414 ret = fts5GetVarint32(aBuf, dummy);
12415 if( ret<nMax ){
12416 while( 1 ){
12417 int i = fts5GetVarint32(&aBuf[ret], dummy);
12418 if( (ret + i) > nMax ) break;
12419 ret += i;
12420 }
12421 }
12422 return ret;
12423 }
12424
12425 /*
12426 ** Flush the contents of in-memory hash table iHash to a new level-0
12427 ** segment on disk. Also update the corresponding structure record.
12428 **
12429 ** If an error occurs, set the Fts5Index.rc error code. If an error has
12430 ** already occurred, this function is a no-op.
12431 */
12432 static void fts5FlushOneHash(Fts5Index *p){
12433 Fts5Hash *pHash = p->pHash;
12434 Fts5Structure *pStruct;
12435 int iSegid;
12436 int pgnoLast = 0; /* Last leaf page number in segment */
12437
12438 /* Obtain a reference to the index structure and allocate a new segment-id
12439 ** for the new level-0 segment. */
12440 pStruct = fts5StructureRead(p);
12441 iSegid = fts5AllocateSegid(p, pStruct);
12442 fts5StructureInvalidate(p);
12443
12444 if( iSegid ){
12445 const int pgsz = p->pConfig->pgsz;
12446 int eDetail = p->pConfig->eDetail;
12447 Fts5StructureSegment *pSeg; /* New segment within pStruct */
12448 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
12449 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
12450
12451 Fts5SegWriter writer;
12452 fts5WriteInit(p, &writer, iSegid);
12453
12454 pBuf = &writer.writer.buf;
12455 pPgidx = &writer.writer.pgidx;
12456
12457 /* fts5WriteInit() should have initialized the buffers to (most likely)
12458 ** the maximum space required. */
12459 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
12460 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
12461
12462 /* Begin scanning through hash table entries. This loop runs once for each
12463 ** term/doclist currently stored within the hash table. */
12464 if( p->rc==SQLITE_OK ){
12465 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
12466 }
12467 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
12468 const char *zTerm; /* Buffer containing term */
12469 const u8 *pDoclist; /* Pointer to doclist for this term */
12470 int nDoclist; /* Size of doclist in bytes */
12471
12472 /* Write the term for this entry to disk. */
12473 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
12474 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
12475
12476 assert( writer.bFirstRowidInPage==0 );
12477 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
12478 /* The entire doclist will fit on the current leaf. */
12479 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
12480 }else{
12481 i64 iRowid = 0;
12482 i64 iDelta = 0;
12483 int iOff = 0;
12484
12485 /* The entire doclist will not fit on this leaf. The following
12486 ** loop iterates through the poslists that make up the current
12487 ** doclist. */
12488 while( p->rc==SQLITE_OK && iOff<nDoclist ){
12489 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
12490 iRowid += iDelta;
12491
12492 if( writer.bFirstRowidInPage ){
12493 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
12494 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
12495 writer.bFirstRowidInPage = 0;
12496 fts5WriteDlidxAppend(p, &writer, iRowid);
12497 }else{
12498 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
12499 }
12500 assert( pBuf->n<=pBuf->nSpace );
12501
12502 if( eDetail==FTS5_DETAIL_NONE ){
12503 if( iOff<nDoclist && pDoclist[iOff]==0 ){
12504 pBuf->p[pBuf->n++] = 0;
12505 iOff++;
12506 if( iOff<nDoclist && pDoclist[iOff]==0 ){
12507 pBuf->p[pBuf->n++] = 0;
12508 iOff++;
12509 }
12510 }
12511 if( (pBuf->n + pPgidx->n)>=pgsz ){
12512 fts5WriteFlushLeaf(p, &writer);
12513 }
12514 }else{
12515 int bDummy;
12516 int nPos;
12517 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
12518 nCopy += nPos;
12519 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
12520 /* The entire poslist will fit on the current leaf. So copy
12521 ** it in one go. */
12522 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
12523 }else{
12524 /* The entire poslist will not fit on this leaf. So it needs
12525 ** to be broken into sections. The only qualification being
12526 ** that each varint must be stored contiguously. */
12527 const u8 *pPoslist = &pDoclist[iOff];
12528 int iPos = 0;
12529 while( p->rc==SQLITE_OK ){
12530 int nSpace = pgsz - pBuf->n - pPgidx->n;
12531 int n = 0;
12532 if( (nCopy - iPos)<=nSpace ){
12533 n = nCopy - iPos;
12534 }else{
12535 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
12536 }
12537 assert( n>0 );
12538 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
12539 iPos += n;
12540 if( (pBuf->n + pPgidx->n)>=pgsz ){
12541 fts5WriteFlushLeaf(p, &writer);
12542 }
12543 if( iPos>=nCopy ) break;
12544 }
12545 }
12546 iOff += nCopy;
12547 }
12548 }
12549 }
12550
12551 /* TODO2: Doclist terminator written here. */
12552 /* pBuf->p[pBuf->n++] = '\0'; */
12553 assert( pBuf->n<=pBuf->nSpace );
12554 sqlite3Fts5HashScanNext(pHash);
12555 }
12556 sqlite3Fts5HashClear(pHash);
12557 fts5WriteFinish(p, &writer, &pgnoLast);
12558
12559 /* Update the Fts5Structure. It is written back to the database by the
12560 ** fts5StructureRelease() call below. */
12561 if( pStruct->nLevel==0 ){
12562 fts5StructureAddLevel(&p->rc, &pStruct);
12563 }
12564 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
12565 if( p->rc==SQLITE_OK ){
12566 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
12567 pSeg->iSegid = iSegid;
12568 pSeg->pgnoFirst = 1;
12569 pSeg->pgnoLast = pgnoLast;
12570 pStruct->nSegment++;
12571 }
12572 fts5StructurePromote(p, 0, pStruct);
12573 }
12574
12575 fts5IndexAutomerge(p, &pStruct, pgnoLast);
12576 fts5IndexCrisismerge(p, &pStruct);
12577 fts5StructureWrite(p, pStruct);
12578 fts5StructureRelease(pStruct);
12579 }
12580
12581 /*
12582 ** Flush any data stored in the in-memory hash tables to the database.
12583 */
12584 static void fts5IndexFlush(Fts5Index *p){
12585 /* Unless it is empty, flush the hash table to disk */
12586 if( p->nPendingData ){
12587 assert( p->pHash );
12588 p->nPendingData = 0;
12589 fts5FlushOneHash(p);
12590 }
12591 }
12592
12593 static Fts5Structure *fts5IndexOptimizeStruct(
12594 Fts5Index *p,
12595 Fts5Structure *pStruct
12596 ){
12597 Fts5Structure *pNew = 0;
12598 int nByte = sizeof(Fts5Structure);
12599 int nSeg = pStruct->nSegment;
12600 int i;
12601
12602 /* Figure out if this structure requires optimization. A structure does
12603 ** not require optimization if either:
12604 **
12605 ** + it consists of fewer than two segments, or
12606 ** + all segments are on the same level, or
12607 ** + all segments except one are currently inputs to a merge operation.
12608 **
12609 ** In the first case, return NULL. In the second, increment the ref-count
12610 ** on *pStruct and return a copy of the pointer to it.
12611 */
12612 if( nSeg<2 ) return 0;
12613 for(i=0; i<pStruct->nLevel; i++){
12614 int nThis = pStruct->aLevel[i].nSeg;
12615 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
12616 fts5StructureRef(pStruct);
12617 return pStruct;
12618 }
12619 assert( pStruct->aLevel[i].nMerge<=nThis );
12620 }
12621
12622 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
12623 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
12624
12625 if( pNew ){
12626 Fts5StructureLevel *pLvl;
12627 nByte = nSeg * sizeof(Fts5StructureSegment);
12628 pNew->nLevel = pStruct->nLevel+1;
12629 pNew->nRef = 1;
12630 pNew->nWriteCounter = pStruct->nWriteCounter;
12631 pLvl = &pNew->aLevel[pStruct->nLevel];
12632 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
12633 if( pLvl->aSeg ){
12634 int iLvl, iSeg;
12635 int iSegOut = 0;
12636 /* Iterate through all segments, from oldest to newest. Add them to
12637 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
12638 ** segment in the data structure. */
12639 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
12640 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
12641 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
12642 iSegOut++;
12643 }
12644 }
12645 pNew->nSegment = pLvl->nSeg = nSeg;
12646 }else{
12647 sqlite3_free(pNew);
12648 pNew = 0;
12649 }
12650 }
12651
12652 return pNew;
12653 }
12654
12655 static int sqlite3Fts5IndexOptimize(Fts5Index *p){
12656 Fts5Structure *pStruct;
12657 Fts5Structure *pNew = 0;
12658
12659 assert( p->rc==SQLITE_OK );
12660 fts5IndexFlush(p);
12661 pStruct = fts5StructureRead(p);
12662 fts5StructureInvalidate(p);
12663
12664 if( pStruct ){
12665 pNew = fts5IndexOptimizeStruct(p, pStruct);
12666 }
12667 fts5StructureRelease(pStruct);
12668
12669 assert( pNew==0 || pNew->nSegment>0 );
12670 if( pNew ){
12671 int iLvl;
12672 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
12673 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
12674 int nRem = FTS5_OPT_WORK_UNIT;
12675 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
12676 }
12677
12678 fts5StructureWrite(p, pNew);
12679 fts5StructureRelease(pNew);
12680 }
12681
12682 return fts5IndexReturn(p);
12683 }
12684
12685 /*
12686 ** This is called to implement the special "VALUES('merge', $nMerge)"
12687 ** INSERT command.
12688 */
12689 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
12690 Fts5Structure *pStruct = fts5StructureRead(p);
12691 if( pStruct ){
12692 int nMin = p->pConfig->nUsermerge;
12693 fts5StructureInvalidate(p);
12694 if( nMerge<0 ){
12695 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
12696 fts5StructureRelease(pStruct);
12697 pStruct = pNew;
12698 nMin = 2;
12699 nMerge = nMerge*-1;
12700 }
12701 if( pStruct && pStruct->nLevel ){
12702 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
12703 fts5StructureWrite(p, pStruct);
12704 }
12705 }
12706 fts5StructureRelease(pStruct);
12707 }
12708 return fts5IndexReturn(p);
12709 }
12710
12711 static void fts5AppendRowid(
12712 Fts5Index *p,
12713 i64 iDelta,
12714 Fts5Iter *pUnused,
12715 Fts5Buffer *pBuf
12716 ){
12717 UNUSED_PARAM(pUnused);
12718 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
12719 }
12720
12721 static void fts5AppendPoslist(
12722 Fts5Index *p,
12723 i64 iDelta,
12724 Fts5Iter *pMulti,
12725 Fts5Buffer *pBuf
12726 ){
12727 int nData = pMulti->base.nData;
12728 assert( nData>0 );
12729 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
12730 fts5BufferSafeAppendVarint(pBuf, iDelta);
12731 fts5BufferSafeAppendVarint(pBuf, nData*2);
12732 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
12733 }
12734 }
12735
12736
12737 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
12738 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
12739
12740 assert( pIter->aPoslist );
12741 if( p>=pIter->aEof ){
12742 pIter->aPoslist = 0;
12743 }else{
12744 i64 iDelta;
12745
12746 p += fts5GetVarint(p, (u64*)&iDelta);
12747 pIter->iRowid += iDelta;
12748
12749 /* Read position list size */
12750 if( p[0] & 0x80 ){
12751 int nPos;
12752 pIter->nSize = fts5GetVarint32(p, nPos);
12753 pIter->nPoslist = (nPos>>1);
12754 }else{
12755 pIter->nPoslist = ((int)(p[0])) >> 1;
12756 pIter->nSize = 1;
12757 }
12758
12759 pIter->aPoslist = p;
12760 }
12761 }
12762
12763 static void fts5DoclistIterInit(
12764 Fts5Buffer *pBuf,
12765 Fts5DoclistIter *pIter
12766 ){
12767 memset(pIter, 0, sizeof(*pIter));
12768 pIter->aPoslist = pBuf->p;
12769 pIter->aEof = &pBuf->p[pBuf->n];
12770 fts5DoclistIterNext(pIter);
12771 }
12772
12773 #if 0
12774 /*
12775 ** Append a doclist to buffer pBuf.
12776 **
12777 ** This function assumes that space within the buffer has already been
12778 ** allocated.
12779 */
12780 static void fts5MergeAppendDocid(
12781 Fts5Buffer *pBuf, /* Buffer to write to */
12782 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
12783 i64 iRowid /* Rowid to append */
12784 ){
12785 assert( pBuf->n!=0 || (*piLastRowid)==0 );
12786 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
12787 *piLastRowid = iRowid;
12788 }
12789 #endif
12790
12791 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
12792 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
12793 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
12794 (iLastRowid) = (iRowid); \
12795 }
12796
12797 /*
12798 ** Swap the contents of buffer *p1 with that of *p2.
12799 */
12800 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
12801 Fts5Buffer tmp = *p1;
12802 *p1 = *p2;
12803 *p2 = tmp;
12804 }
12805
12806 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
12807 int i = *piOff;
12808 if( i>=pBuf->n ){
12809 *piOff = -1;
12810 }else{
12811 u64 iVal;
12812 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
12813 *piRowid += iVal;
12814 }
12815 }
12816
12817 /*
12818 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
12819 ** In this case the buffers consist of a delta-encoded list of rowids only.
12820 */
12821 static void fts5MergeRowidLists(
12822 Fts5Index *p, /* FTS5 backend object */
12823 Fts5Buffer *p1, /* First list to merge */
12824 Fts5Buffer *p2 /* Second list to merge */
12825 ){
12826 int i1 = 0;
12827 int i2 = 0;
12828 i64 iRowid1 = 0;
12829 i64 iRowid2 = 0;
12830 i64 iOut = 0;
12831
12832 Fts5Buffer out;
12833 memset(&out, 0, sizeof(out));
12834 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
12835 if( p->rc ) return;
12836
12837 fts5NextRowid(p1, &i1, &iRowid1);
12838 fts5NextRowid(p2, &i2, &iRowid2);
12839 while( i1>=0 || i2>=0 ){
12840 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
12841 assert( iOut==0 || iRowid1>iOut );
12842 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
12843 iOut = iRowid1;
12844 fts5NextRowid(p1, &i1, &iRowid1);
12845 }else{
12846 assert( iOut==0 || iRowid2>iOut );
12847 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
12848 iOut = iRowid2;
12849 if( i1>=0 && iRowid1==iRowid2 ){
12850 fts5NextRowid(p1, &i1, &iRowid1);
12851 }
12852 fts5NextRowid(p2, &i2, &iRowid2);
12853 }
12854 }
12855
12856 fts5BufferSwap(&out, p1);
12857 fts5BufferFree(&out);
12858 }
12859
12860 /*
12861 ** Buffers p1 and p2 contain doclists. This function merges the content
12862 ** of the two doclists together and sets buffer p1 to the result before
12863 ** returning.
12864 **
12865 ** If an error occurs, an error code is left in p->rc. If an error has
12866 ** already occurred, this function is a no-op.
12867 */
12868 static void fts5MergePrefixLists(
12869 Fts5Index *p, /* FTS5 backend object */
12870 Fts5Buffer *p1, /* First list to merge */
12871 Fts5Buffer *p2 /* Second list to merge */
12872 ){
12873 if( p2->n ){
12874 i64 iLastRowid = 0;
12875 Fts5DoclistIter i1;
12876 Fts5DoclistIter i2;
12877 Fts5Buffer out = {0, 0, 0};
12878 Fts5Buffer tmp = {0, 0, 0};
12879
12880 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
12881 fts5DoclistIterInit(p1, &i1);
12882 fts5DoclistIterInit(p2, &i2);
12883
12884 while( 1 ){
12885 if( i1.iRowid<i2.iRowid ){
12886 /* Copy entry from i1 */
12887 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
12888 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
12889 fts5DoclistIterNext(&i1);
12890 if( i1.aPoslist==0 ) break;
12891 }
12892 else if( i2.iRowid!=i1.iRowid ){
12893 /* Copy entry from i2 */
12894 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
12895 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
12896 fts5DoclistIterNext(&i2);
12897 if( i2.aPoslist==0 ) break;
12898 }
12899 else{
12900 /* Merge the two position lists. */
12901 i64 iPos1 = 0;
12902 i64 iPos2 = 0;
12903 int iOff1 = 0;
12904 int iOff2 = 0;
12905 u8 *a1 = &i1.aPoslist[i1.nSize];
12906 u8 *a2 = &i2.aPoslist[i2.nSize];
12907
12908 i64 iPrev = 0;
12909 Fts5PoslistWriter writer;
12910 memset(&writer, 0, sizeof(writer));
12911
12912 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
12913 fts5BufferZero(&tmp);
12914 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
12915 if( p->rc ) break;
12916
12917 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
12918 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
12919 assert( iPos1>=0 && iPos2>=0 );
12920
12921 if( iPos1<iPos2 ){
12922 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
12923 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
12924 }else{
12925 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
12926 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
12927 }
12928
12929 if( iPos1>=0 && iPos2>=0 ){
12930 while( 1 ){
12931 if( iPos1<iPos2 ){
12932 if( iPos1!=iPrev ){
12933 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
12934 }
12935 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
12936 if( iPos1<0 ) break;
12937 }else{
12938 assert( iPos2!=iPrev );
12939 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
12940 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
12941 if( iPos2<0 ) break;
12942 }
12943 }
12944 }
12945
12946 if( iPos1>=0 ){
12947 if( iPos1!=iPrev ){
12948 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
12949 }
12950 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
12951 }else{
12952 assert( iPos2>=0 && iPos2!=iPrev );
12953 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
12954 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
12955 }
12956
12957 /* WRITEPOSLISTSIZE */
12958 fts5BufferSafeAppendVarint(&out, tmp.n * 2);
12959 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
12960 fts5DoclistIterNext(&i1);
12961 fts5DoclistIterNext(&i2);
12962 if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
12963 }
12964 }
12965
12966 if( i1.aPoslist ){
12967 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
12968 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
12969 }
12970 else if( i2.aPoslist ){
12971 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
12972 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
12973 }
12974
12975 fts5BufferSet(&p->rc, p1, out.n, out.p);
12976 fts5BufferFree(&tmp);
12977 fts5BufferFree(&out);
12978 }
12979 }
12980
12981 static void fts5SetupPrefixIter(
12982 Fts5Index *p, /* Index to read from */
12983 int bDesc, /* True for "ORDER BY rowid DESC" */
12984 const u8 *pToken, /* Buffer containing prefix to match */
12985 int nToken, /* Size of buffer pToken in bytes */
12986 Fts5Colset *pColset, /* Restrict matches to these columns */
12987 Fts5Iter **ppIter /* OUT: New iterator */
12988 ){
12989 Fts5Structure *pStruct;
12990 Fts5Buffer *aBuf;
12991 const int nBuf = 32;
12992
12993 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
12994 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
12995 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
12996 xMerge = fts5MergeRowidLists;
12997 xAppend = fts5AppendRowid;
12998 }else{
12999 xMerge = fts5MergePrefixLists;
13000 xAppend = fts5AppendPoslist;
13001 }
13002
13003 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
13004 pStruct = fts5StructureRead(p);
13005
13006 if( aBuf && pStruct ){
13007 const int flags = FTS5INDEX_QUERY_SCAN
13008 | FTS5INDEX_QUERY_SKIPEMPTY
13009 | FTS5INDEX_QUERY_NOOUTPUT;
13010 int i;
13011 i64 iLastRowid = 0;
13012 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
13013 Fts5Data *pData;
13014 Fts5Buffer doclist;
13015 int bNewTerm = 1;
13016
13017 memset(&doclist, 0, sizeof(doclist));
13018 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
13019 fts5IterSetOutputCb(&p->rc, p1);
13020 for( /* no-op */ ;
13021 fts5MultiIterEof(p, p1)==0;
13022 fts5MultiIterNext2(p, p1, &bNewTerm)
13023 ){
13024 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
13025 int nTerm = pSeg->term.n;
13026 const u8 *pTerm = pSeg->term.p;
13027 p1->xSetOutputs(p1, pSeg);
13028
13029 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
13030 if( bNewTerm ){
13031 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
13032 }
13033
13034 if( p1->base.nData==0 ) continue;
13035
13036 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
13037 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
13038 assert( i<nBuf );
13039 if( aBuf[i].n==0 ){
13040 fts5BufferSwap(&doclist, &aBuf[i]);
13041 fts5BufferZero(&doclist);
13042 }else{
13043 xMerge(p, &doclist, &aBuf[i]);
13044 fts5BufferZero(&aBuf[i]);
13045 }
13046 }
13047 iLastRowid = 0;
13048 }
13049
13050 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
13051 iLastRowid = p1->base.iRowid;
13052 }
13053
13054 for(i=0; i<nBuf; i++){
13055 if( p->rc==SQLITE_OK ){
13056 xMerge(p, &doclist, &aBuf[i]);
13057 }
13058 fts5BufferFree(&aBuf[i]);
13059 }
13060 fts5MultiIterFree(p1);
13061
13062 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
13063 if( pData ){
13064 pData->p = (u8*)&pData[1];
13065 pData->nn = pData->szLeaf = doclist.n;
13066 memcpy(pData->p, doclist.p, doclist.n);
13067 fts5MultiIterNew2(p, pData, bDesc, ppIter);
13068 }
13069 fts5BufferFree(&doclist);
13070 }
13071
13072 fts5StructureRelease(pStruct);
13073 sqlite3_free(aBuf);
13074 }
13075
13076
13077 /*
13078 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
13079 ** to the document with rowid iRowid.
13080 */
13081 static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
13082 assert( p->rc==SQLITE_OK );
13083
13084 /* Allocate the hash table if it has not already been allocated */
13085 if( p->pHash==0 ){
13086 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
13087 }
13088
13089 /* Flush the hash table to disk if required */
13090 if( iRowid<p->iWriteRowid
13091 || (iRowid==p->iWriteRowid && p->bDelete==0)
13092 || (p->nPendingData > p->pConfig->nHashSize)
13093 ){
13094 fts5IndexFlush(p);
13095 }
13096
13097 p->iWriteRowid = iRowid;
13098 p->bDelete = bDelete;
13099 return fts5IndexReturn(p);
13100 }
13101
13102 /*
13103 ** Commit data to disk.
13104 */
13105 static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
13106 assert( p->rc==SQLITE_OK );
13107 fts5IndexFlush(p);
13108 if( bCommit ) fts5CloseReader(p);
13109 return fts5IndexReturn(p);
13110 }
13111
13112 /*
13113 ** Discard any data stored in the in-memory hash tables. Do not write it
13114 ** to the database. Additionally, assume that the contents of the %_data
13115 ** table may have changed on disk. So any in-memory caches of %_data
13116 ** records must be invalidated.
13117 */
13118 static int sqlite3Fts5IndexRollback(Fts5Index *p){
13119 fts5CloseReader(p);
13120 fts5IndexDiscardData(p);
13121 fts5StructureInvalidate(p);
13122 /* assert( p->rc==SQLITE_OK ); */
13123 return SQLITE_OK;
13124 }
13125
13126 /*
13127 ** The %_data table is completely empty when this function is called. This
13128 ** function populates it with the initial structure objects for each index,
13129 ** and the initial version of the "averages" record (a zero-byte blob).
13130 */
13131 static int sqlite3Fts5IndexReinit(Fts5Index *p){
13132 Fts5Structure s;
13133 fts5StructureInvalidate(p);
13134 memset(&s, 0, sizeof(Fts5Structure));
13135 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
13136 fts5StructureWrite(p, &s);
13137 return fts5IndexReturn(p);
13138 }
13139
13140 /*
13141 ** Open a new Fts5Index handle. If the bCreate argument is true, create
13142 ** and initialize the underlying %_data table.
13143 **
13144 ** If successful, set *pp to point to the new object and return SQLITE_OK.
13145 ** Otherwise, set *pp to NULL and return an SQLite error code.
13146 */
13147 static int sqlite3Fts5IndexOpen(
13148 Fts5Config *pConfig,
13149 int bCreate,
13150 Fts5Index **pp,
13151 char **pzErr
13152 ){
13153 int rc = SQLITE_OK;
13154 Fts5Index *p; /* New object */
13155
13156 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
13157 if( rc==SQLITE_OK ){
13158 p->pConfig = pConfig;
13159 p->nWorkUnit = FTS5_WORK_UNIT;
13160 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
13161 if( p->zDataTbl && bCreate ){
13162 rc = sqlite3Fts5CreateTable(
13163 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
13164 );
13165 if( rc==SQLITE_OK ){
13166 rc = sqlite3Fts5CreateTable(pConfig, "idx",
13167 "segid, term, pgno, PRIMARY KEY(segid, term)",
13168 1, pzErr
13169 );
13170 }
13171 if( rc==SQLITE_OK ){
13172 rc = sqlite3Fts5IndexReinit(p);
13173 }
13174 }
13175 }
13176
13177 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
13178 if( rc ){
13179 sqlite3Fts5IndexClose(p);
13180 *pp = 0;
13181 }
13182 return rc;
13183 }
13184
13185 /*
13186 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
13187 */
13188 static int sqlite3Fts5IndexClose(Fts5Index *p){
13189 int rc = SQLITE_OK;
13190 if( p ){
13191 assert( p->pReader==0 );
13192 fts5StructureInvalidate(p);
13193 sqlite3_finalize(p->pWriter);
13194 sqlite3_finalize(p->pDeleter);
13195 sqlite3_finalize(p->pIdxWriter);
13196 sqlite3_finalize(p->pIdxDeleter);
13197 sqlite3_finalize(p->pIdxSelect);
13198 sqlite3_finalize(p->pDataVersion);
13199 sqlite3Fts5HashFree(p->pHash);
13200 sqlite3_free(p->zDataTbl);
13201 sqlite3_free(p);
13202 }
13203 return rc;
13204 }
13205
13206 /*
13207 ** Argument p points to a buffer containing utf-8 text that is n bytes in
13208 ** size. Return the number of bytes in the nChar character prefix of the
13209 ** buffer, or 0 if there are less than nChar characters in total.
13210 */
13211 static int sqlite3Fts5IndexCharlenToBytelen(
13212 const char *p,
13213 int nByte,
13214 int nChar
13215 ){
13216 int n = 0;
13217 int i;
13218 for(i=0; i<nChar; i++){
13219 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
13220 if( (unsigned char)p[n++]>=0xc0 ){
13221 while( (p[n] & 0xc0)==0x80 ) n++;
13222 }
13223 }
13224 return n;
13225 }
13226
13227 /*
13228 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
13229 ** unicode characters in the string.
13230 */
13231 static int fts5IndexCharlen(const char *pIn, int nIn){
13232 int nChar = 0;
13233 int i = 0;
13234 while( i<nIn ){
13235 if( (unsigned char)pIn[i++]>=0xc0 ){
13236 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
13237 }
13238 nChar++;
13239 }
13240 return nChar;
13241 }
13242
13243 /*
13244 ** Insert or remove data to or from the index. Each time a document is
13245 ** added to or removed from the index, this function is called one or more
13246 ** times.
13247 **
13248 ** For an insert, it must be called once for each token in the new document.
13249 ** If the operation is a delete, it must be called (at least) once for each
13250 ** unique token in the document with an iCol value less than zero. The iPos
13251 ** argument is ignored for a delete.
13252 */
13253 static int sqlite3Fts5IndexWrite(
13254 Fts5Index *p, /* Index to write to */
13255 int iCol, /* Column token appears in (-ve -> delete) */
13256 int iPos, /* Position of token within column */
13257 const char *pToken, int nToken /* Token to add or remove to or from index */
13258 ){
13259 int i; /* Used to iterate through indexes */
13260 int rc = SQLITE_OK; /* Return code */
13261 Fts5Config *pConfig = p->pConfig;
13262
13263 assert( p->rc==SQLITE_OK );
13264 assert( (iCol<0)==p->bDelete );
13265
13266 /* Add the entry to the main terms index. */
13267 rc = sqlite3Fts5HashWrite(
13268 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
13269 );
13270
13271 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
13272 const int nChar = pConfig->aPrefix[i];
13273 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
13274 if( nByte ){
13275 rc = sqlite3Fts5HashWrite(p->pHash,
13276 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
13277 nByte
13278 );
13279 }
13280 }
13281
13282 return rc;
13283 }
13284
13285 /*
13286 ** Open a new iterator to iterate though all rowid that match the
13287 ** specified token or token prefix.
13288 */
13289 static int sqlite3Fts5IndexQuery(
13290 Fts5Index *p, /* FTS index to query */
13291 const char *pToken, int nToken, /* Token (or prefix) to query for */
13292 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
13293 Fts5Colset *pColset, /* Match these columns only */
13294 Fts5IndexIter **ppIter /* OUT: New iterator object */
13295 ){
13296 Fts5Config *pConfig = p->pConfig;
13297 Fts5Iter *pRet = 0;
13298 Fts5Buffer buf = {0, 0, 0};
13299
13300 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
13301 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
13302
13303 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
13304 int iIdx = 0; /* Index to search */
13305 memcpy(&buf.p[1], pToken, nToken);
13306
13307 /* Figure out which index to search and set iIdx accordingly. If this
13308 ** is a prefix query for which there is no prefix index, set iIdx to
13309 ** greater than pConfig->nPrefix to indicate that the query will be
13310 ** satisfied by scanning multiple terms in the main index.
13311 **
13312 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
13313 ** prefix-query. Instead of using a prefix-index (if one exists),
13314 ** evaluate the prefix query using the main FTS index. This is used
13315 ** for internal sanity checking by the integrity-check in debug
13316 ** mode only. */
13317 #ifdef SQLITE_DEBUG
13318 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
13319 assert( flags & FTS5INDEX_QUERY_PREFIX );
13320 iIdx = 1+pConfig->nPrefix;
13321 }else
13322 #endif
13323 if( flags & FTS5INDEX_QUERY_PREFIX ){
13324 int nChar = fts5IndexCharlen(pToken, nToken);
13325 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
13326 if( pConfig->aPrefix[iIdx-1]==nChar ) break;
13327 }
13328 }
13329
13330 if( iIdx<=pConfig->nPrefix ){
13331 /* Straight index lookup */
13332 Fts5Structure *pStruct = fts5StructureRead(p);
13333 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
13334 if( pStruct ){
13335 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
13336 pColset, buf.p, nToken+1, -1, 0, &pRet
13337 );
13338 fts5StructureRelease(pStruct);
13339 }
13340 }else{
13341 /* Scan multiple terms in the main index */
13342 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
13343 buf.p[0] = FTS5_MAIN_PREFIX;
13344 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
13345 assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
13346 fts5IterSetOutputCb(&p->rc, pRet);
13347 if( p->rc==SQLITE_OK ){
13348 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
13349 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
13350 }
13351 }
13352
13353 if( p->rc ){
13354 sqlite3Fts5IterClose(&pRet->base);
13355 pRet = 0;
13356 fts5CloseReader(p);
13357 }
13358
13359 *ppIter = &pRet->base;
13360 sqlite3Fts5BufferFree(&buf);
13361 }
13362 return fts5IndexReturn(p);
13363 }
13364
13365 /*
13366 ** Return true if the iterator passed as the only argument is at EOF.
13367 */
13368 /*
13369 ** Move to the next matching rowid.
13370 */
13371 static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
13372 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
13373 assert( pIter->pIndex->rc==SQLITE_OK );
13374 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
13375 return fts5IndexReturn(pIter->pIndex);
13376 }
13377
13378 /*
13379 ** Move to the next matching term/rowid. Used by the fts5vocab module.
13380 */
13381 static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
13382 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
13383 Fts5Index *p = pIter->pIndex;
13384
13385 assert( pIter->pIndex->rc==SQLITE_OK );
13386
13387 fts5MultiIterNext(p, pIter, 0, 0);
13388 if( p->rc==SQLITE_OK ){
13389 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
13390 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
13391 fts5DataRelease(pSeg->pLeaf);
13392 pSeg->pLeaf = 0;
13393 pIter->base.bEof = 1;
13394 }
13395 }
13396
13397 return fts5IndexReturn(pIter->pIndex);
13398 }
13399
13400 /*
13401 ** Move to the next matching rowid that occurs at or after iMatch. The
13402 ** definition of "at or after" depends on whether this iterator iterates
13403 ** in ascending or descending rowid order.
13404 */
13405 static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
13406 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
13407 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
13408 return fts5IndexReturn(pIter->pIndex);
13409 }
13410
13411 /*
13412 ** Return the current term.
13413 */
13414 static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
13415 int n;
13416 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
13417 *pn = n-1;
13418 return &z[1];
13419 }
13420
13421 /*
13422 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
13423 */
13424 static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
13425 if( pIndexIter ){
13426 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
13427 Fts5Index *pIndex = pIter->pIndex;
13428 fts5MultiIterFree(pIter);
13429 fts5CloseReader(pIndex);
13430 }
13431 }
13432
13433 /*
13434 ** Read and decode the "averages" record from the database.
13435 **
13436 ** Parameter anSize must point to an array of size nCol, where nCol is
13437 ** the number of user defined columns in the FTS table.
13438 */
13439 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
13440 int nCol = p->pConfig->nCol;
13441 Fts5Data *pData;
13442
13443 *pnRow = 0;
13444 memset(anSize, 0, sizeof(i64) * nCol);
13445 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
13446 if( p->rc==SQLITE_OK && pData->nn ){
13447 int i = 0;
13448 int iCol;
13449 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
13450 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
13451 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
13452 }
13453 }
13454
13455 fts5DataRelease(pData);
13456 return fts5IndexReturn(p);
13457 }
13458
13459 /*
13460 ** Replace the current "averages" record with the contents of the buffer
13461 ** supplied as the second argument.
13462 */
13463 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData) {
13464 assert( p->rc==SQLITE_OK );
13465 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
13466 return fts5IndexReturn(p);
13467 }
13468
13469 /*
13470 ** Return the total number of blocks this module has read from the %_data
13471 ** table since it was created.
13472 */
13473 static int sqlite3Fts5IndexReads(Fts5Index *p){
13474 return p->nRead;
13475 }
13476
13477 /*
13478 ** Set the 32-bit cookie value stored at the start of all structure
13479 ** records to the value passed as the second argument.
13480 **
13481 ** Return SQLITE_OK if successful, or an SQLite error code if an error
13482 ** occurs.
13483 */
13484 static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
13485 int rc; /* Return code */
13486 Fts5Config *pConfig = p->pConfig; /* Configuration object */
13487 u8 aCookie[4]; /* Binary representation of iNew */
13488 sqlite3_blob *pBlob = 0;
13489
13490 assert( p->rc==SQLITE_OK );
13491 sqlite3Fts5Put32(aCookie, iNew);
13492
13493 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
13494 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
13495 );
13496 if( rc==SQLITE_OK ){
13497 sqlite3_blob_write(pBlob, aCookie, 4, 0);
13498 rc = sqlite3_blob_close(pBlob);
13499 }
13500
13501 return rc;
13502 }
13503
13504 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
13505 Fts5Structure *pStruct;
13506 pStruct = fts5StructureRead(p);
13507 fts5StructureRelease(pStruct);
13508 return fts5IndexReturn(p);
13509 }
13510
13511
13512 /*************************************************************************
13513 **************************************************************************
13514 ** Below this point is the implementation of the integrity-check
13515 ** functionality.
13516 */
13517
13518 /*
13519 ** Return a simple checksum value based on the arguments.
13520 */
13521 static u64 sqlite3Fts5IndexEntryCksum(
13522 i64 iRowid,
13523 int iCol,
13524 int iPos,
13525 int iIdx,
13526 const char *pTerm,
13527 int nTerm
13528 ){
13529 int i;
13530 u64 ret = iRowid;
13531 ret += (ret<<3) + iCol;
13532 ret += (ret<<3) + iPos;
13533 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
13534 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
13535 return ret;
13536 }
13537
13538 #ifdef SQLITE_DEBUG
13539 /*
13540 ** This function is purely an internal test. It does not contribute to
13541 ** FTS functionality, or even the integrity-check, in any way.
13542 **
13543 ** Instead, it tests that the same set of pgno/rowid combinations are
13544 ** visited regardless of whether the doclist-index identified by parameters
13545 ** iSegid/iLeaf is iterated in forwards or reverse order.
13546 */
13547 static void fts5TestDlidxReverse(
13548 Fts5Index *p,
13549 int iSegid, /* Segment id to load from */
13550 int iLeaf /* Load doclist-index for this leaf */
13551 ){
13552 Fts5DlidxIter *pDlidx = 0;
13553 u64 cksum1 = 13;
13554 u64 cksum2 = 13;
13555
13556 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
13557 fts5DlidxIterEof(p, pDlidx)==0;
13558 fts5DlidxIterNext(p, pDlidx)
13559 ){
13560 i64 iRowid = fts5DlidxIterRowid(pDlidx);
13561 int pgno = fts5DlidxIterPgno(pDlidx);
13562 assert( pgno>iLeaf );
13563 cksum1 += iRowid + ((i64)pgno<<32);
13564 }
13565 fts5DlidxIterFree(pDlidx);
13566 pDlidx = 0;
13567
13568 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
13569 fts5DlidxIterEof(p, pDlidx)==0;
13570 fts5DlidxIterPrev(p, pDlidx)
13571 ){
13572 i64 iRowid = fts5DlidxIterRowid(pDlidx);
13573 int pgno = fts5DlidxIterPgno(pDlidx);
13574 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
13575 cksum2 += iRowid + ((i64)pgno<<32);
13576 }
13577 fts5DlidxIterFree(pDlidx);
13578 pDlidx = 0;
13579
13580 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
13581 }
13582
13583 static int fts5QueryCksum(
13584 Fts5Index *p, /* Fts5 index object */
13585 int iIdx,
13586 const char *z, /* Index key to query for */
13587 int n, /* Size of index key in bytes */
13588 int flags, /* Flags for Fts5IndexQuery */
13589 u64 *pCksum /* IN/OUT: Checksum value */
13590 ){
13591 int eDetail = p->pConfig->eDetail;
13592 u64 cksum = *pCksum;
13593 Fts5IndexIter *pIter = 0;
13594 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
13595
13596 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
13597 i64 rowid = pIter->iRowid;
13598
13599 if( eDetail==FTS5_DETAIL_NONE ){
13600 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
13601 }else{
13602 Fts5PoslistReader sReader;
13603 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
13604 sReader.bEof==0;
13605 sqlite3Fts5PoslistReaderNext(&sReader)
13606 ){
13607 int iCol = FTS5_POS2COLUMN(sReader.iPos);
13608 int iOff = FTS5_POS2OFFSET(sReader.iPos);
13609 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
13610 }
13611 }
13612 if( rc==SQLITE_OK ){
13613 rc = sqlite3Fts5IterNext(pIter);
13614 }
13615 }
13616 sqlite3Fts5IterClose(pIter);
13617
13618 *pCksum = cksum;
13619 return rc;
13620 }
13621
13622
13623 /*
13624 ** This function is also purely an internal test. It does not contribute to
13625 ** FTS functionality, or even the integrity-check, in any way.
13626 */
13627 static void fts5TestTerm(
13628 Fts5Index *p,
13629 Fts5Buffer *pPrev, /* Previous term */
13630 const char *z, int n, /* Possibly new term to test */
13631 u64 expected,
13632 u64 *pCksum
13633 ){
13634 int rc = p->rc;
13635 if( pPrev->n==0 ){
13636 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
13637 }else
13638 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
13639 u64 cksum3 = *pCksum;
13640 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
13641 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
13642 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
13643 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
13644 u64 ck1 = 0;
13645 u64 ck2 = 0;
13646
13647 /* Check that the results returned for ASC and DESC queries are
13648 ** the same. If not, call this corruption. */
13649 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
13650 if( rc==SQLITE_OK ){
13651 int f = flags|FTS5INDEX_QUERY_DESC;
13652 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
13653 }
13654 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
13655
13656 /* If this is a prefix query, check that the results returned if the
13657 ** the index is disabled are the same. In both ASC and DESC order.
13658 **
13659 ** This check may only be performed if the hash table is empty. This
13660 ** is because the hash table only supports a single scan query at
13661 ** a time, and the multi-iter loop from which this function is called
13662 ** is already performing such a scan. */
13663 if( p->nPendingData==0 ){
13664 if( iIdx>0 && rc==SQLITE_OK ){
13665 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
13666 ck2 = 0;
13667 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
13668 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
13669 }
13670 if( iIdx>0 && rc==SQLITE_OK ){
13671 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
13672 ck2 = 0;
13673 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
13674 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
13675 }
13676 }
13677
13678 cksum3 ^= ck1;
13679 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
13680
13681 if( rc==SQLITE_OK && cksum3!=expected ){
13682 rc = FTS5_CORRUPT;
13683 }
13684 *pCksum = cksum3;
13685 }
13686 p->rc = rc;
13687 }
13688
13689 #else
13690 # define fts5TestDlidxReverse(x,y,z)
13691 # define fts5TestTerm(u,v,w,x,y,z)
13692 #endif
13693
13694 /*
13695 ** Check that:
13696 **
13697 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
13698 ** contain zero terms.
13699 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
13700 ** contain zero rowids.
13701 */
13702 static void fts5IndexIntegrityCheckEmpty(
13703 Fts5Index *p,
13704 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
13705 int iFirst,
13706 int iNoRowid,
13707 int iLast
13708 ){
13709 int i;
13710
13711 /* Now check that the iter.nEmpty leaves following the current leaf
13712 ** (a) exist and (b) contain no terms. */
13713 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
13714 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
13715 if( pLeaf ){
13716 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
13717 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
13718 }
13719 fts5DataRelease(pLeaf);
13720 }
13721 }
13722
13723 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
13724 int iTermOff = 0;
13725 int ii;
13726
13727 Fts5Buffer buf1 = {0,0,0};
13728 Fts5Buffer buf2 = {0,0,0};
13729
13730 ii = pLeaf->szLeaf;
13731 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
13732 int res;
13733 int iOff;
13734 int nIncr;
13735
13736 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
13737 iTermOff += nIncr;
13738 iOff = iTermOff;
13739
13740 if( iOff>=pLeaf->szLeaf ){
13741 p->rc = FTS5_CORRUPT;
13742 }else if( iTermOff==nIncr ){
13743 int nByte;
13744 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
13745 if( (iOff+nByte)>pLeaf->szLeaf ){
13746 p->rc = FTS5_CORRUPT;
13747 }else{
13748 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
13749 }
13750 }else{
13751 int nKeep, nByte;
13752 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
13753 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
13754 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
13755 p->rc = FTS5_CORRUPT;
13756 }else{
13757 buf1.n = nKeep;
13758 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
13759 }
13760
13761 if( p->rc==SQLITE_OK ){
13762 res = fts5BufferCompare(&buf1, &buf2);
13763 if( res<=0 ) p->rc = FTS5_CORRUPT;
13764 }
13765 }
13766 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
13767 }
13768
13769 fts5BufferFree(&buf1);
13770 fts5BufferFree(&buf2);
13771 }
13772
13773 static void fts5IndexIntegrityCheckSegment(
13774 Fts5Index *p, /* FTS5 backend object */
13775 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
13776 ){
13777 Fts5Config *pConfig = p->pConfig;
13778 sqlite3_stmt *pStmt = 0;
13779 int rc2;
13780 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
13781 int iDlidxPrevLeaf = pSeg->pgnoLast;
13782
13783 if( pSeg->pgnoFirst==0 ) return;
13784
13785 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
13786 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
13787 pConfig->zDb, pConfig->zName, pSeg->iSegid
13788 ));
13789
13790 /* Iterate through the b-tree hierarchy. */
13791 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
13792 i64 iRow; /* Rowid for this leaf */
13793 Fts5Data *pLeaf; /* Data for this leaf */
13794
13795 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
13796 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
13797 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
13798 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
13799
13800 /* If the leaf in question has already been trimmed from the segment,
13801 ** ignore this b-tree entry. Otherwise, load it into memory. */
13802 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
13803 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
13804 pLeaf = fts5LeafRead(p, iRow);
13805 if( pLeaf==0 ) break;
13806
13807 /* Check that the leaf contains at least one term, and that it is equal
13808 ** to or larger than the split-key in zIdxTerm. Also check that if there
13809 ** is also a rowid pointer within the leaf page header, it points to a
13810 ** location before the term. */
13811 if( pLeaf->nn<=pLeaf->szLeaf ){
13812 p->rc = FTS5_CORRUPT;
13813 }else{
13814 int iOff; /* Offset of first term on leaf */
13815 int iRowidOff; /* Offset of first rowid on leaf */
13816 int nTerm; /* Size of term on leaf in bytes */
13817 int res; /* Comparison of term and split-key */
13818
13819 iOff = fts5LeafFirstTermOff(pLeaf);
13820 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
13821 if( iRowidOff>=iOff ){
13822 p->rc = FTS5_CORRUPT;
13823 }else{
13824 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
13825 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
13826 if( res==0 ) res = nTerm - nIdxTerm;
13827 if( res<0 ) p->rc = FTS5_CORRUPT;
13828 }
13829
13830 fts5IntegrityCheckPgidx(p, pLeaf);
13831 }
13832 fts5DataRelease(pLeaf);
13833 if( p->rc ) break;
13834
13835 /* Now check that the iter.nEmpty leaves following the current leaf
13836 ** (a) exist and (b) contain no terms. */
13837 fts5IndexIntegrityCheckEmpty(
13838 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
13839 );
13840 if( p->rc ) break;
13841
13842 /* If there is a doclist-index, check that it looks right. */
13843 if( bIdxDlidx ){
13844 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
13845 int iPrevLeaf = iIdxLeaf;
13846 int iSegid = pSeg->iSegid;
13847 int iPg = 0;
13848 i64 iKey;
13849
13850 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
13851 fts5DlidxIterEof(p, pDlidx)==0;
13852 fts5DlidxIterNext(p, pDlidx)
13853 ){
13854
13855 /* Check any rowid-less pages that occur before the current leaf. */
13856 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
13857 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
13858 pLeaf = fts5DataRead(p, iKey);
13859 if( pLeaf ){
13860 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
13861 fts5DataRelease(pLeaf);
13862 }
13863 }
13864 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
13865
13866 /* Check that the leaf page indicated by the iterator really does
13867 ** contain the rowid suggested by the same. */
13868 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
13869 pLeaf = fts5DataRead(p, iKey);
13870 if( pLeaf ){
13871 i64 iRowid;
13872 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
13873 ASSERT_SZLEAF_OK(pLeaf);
13874 if( iRowidOff>=pLeaf->szLeaf ){
13875 p->rc = FTS5_CORRUPT;
13876 }else{
13877 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
13878 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
13879 }
13880 fts5DataRelease(pLeaf);
13881 }
13882 }
13883
13884 iDlidxPrevLeaf = iPg;
13885 fts5DlidxIterFree(pDlidx);
13886 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
13887 }else{
13888 iDlidxPrevLeaf = pSeg->pgnoLast;
13889 /* TODO: Check there is no doclist index */
13890 }
13891
13892 iIdxPrevLeaf = iIdxLeaf;
13893 }
13894
13895 rc2 = sqlite3_finalize(pStmt);
13896 if( p->rc==SQLITE_OK ) p->rc = rc2;
13897
13898 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
13899 #if 0
13900 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
13901 p->rc = FTS5_CORRUPT;
13902 }
13903 #endif
13904 }
13905
13906
13907 /*
13908 ** Run internal checks to ensure that the FTS index (a) is internally
13909 ** consistent and (b) contains entries for which the XOR of the checksums
13910 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
13911 **
13912 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
13913 ** checksum does not match. Return SQLITE_OK if all checks pass without
13914 ** error, or some other SQLite error code if another error (e.g. OOM)
13915 ** occurs.
13916 */
13917 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
13918 int eDetail = p->pConfig->eDetail;
13919 u64 cksum2 = 0; /* Checksum based on contents of indexes */
13920 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
13921 Fts5Iter *pIter; /* Used to iterate through entire index */
13922 Fts5Structure *pStruct; /* Index structure */
13923
13924 #ifdef SQLITE_DEBUG
13925 /* Used by extra internal tests only run if NDEBUG is not defined */
13926 u64 cksum3 = 0; /* Checksum based on contents of indexes */
13927 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
13928 #endif
13929 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
13930
13931 /* Load the FTS index structure */
13932 pStruct = fts5StructureRead(p);
13933
13934 /* Check that the internal nodes of each segment match the leaves */
13935 if( pStruct ){
13936 int iLvl, iSeg;
13937 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
13938 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
13939 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
13940 fts5IndexIntegrityCheckSegment(p, pSeg);
13941 }
13942 }
13943 }
13944
13945 /* The cksum argument passed to this function is a checksum calculated
13946 ** based on all expected entries in the FTS index (including prefix index
13947 ** entries). This block checks that a checksum calculated based on the
13948 ** actual contents of FTS index is identical.
13949 **
13950 ** Two versions of the same checksum are calculated. The first (stack
13951 ** variable cksum2) based on entries extracted from the full-text index
13952 ** while doing a linear scan of each individual index in turn.
13953 **
13954 ** As each term visited by the linear scans, a separate query for the
13955 ** same term is performed. cksum3 is calculated based on the entries
13956 ** extracted by these queries.
13957 */
13958 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
13959 fts5MultiIterEof(p, pIter)==0;
13960 fts5MultiIterNext(p, pIter, 0, 0)
13961 ){
13962 int n; /* Size of term in bytes */
13963 i64 iPos = 0; /* Position read from poslist */
13964 int iOff = 0; /* Offset within poslist */
13965 i64 iRowid = fts5MultiIterRowid(pIter);
13966 char *z = (char*)fts5MultiIterTerm(pIter, &n);
13967
13968 /* If this is a new term, query for it. Update cksum3 with the results. */
13969 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
13970
13971 if( eDetail==FTS5_DETAIL_NONE ){
13972 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
13973 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
13974 }
13975 }else{
13976 poslist.n = 0;
13977 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
13978 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
13979 int iCol = FTS5_POS2COLUMN(iPos);
13980 int iTokOff = FTS5_POS2OFFSET(iPos);
13981 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
13982 }
13983 }
13984 }
13985 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
13986
13987 fts5MultiIterFree(pIter);
13988 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
13989
13990 fts5StructureRelease(pStruct);
13991 #ifdef SQLITE_DEBUG
13992 fts5BufferFree(&term);
13993 #endif
13994 fts5BufferFree(&poslist);
13995 return fts5IndexReturn(p);
13996 }
13997
13998 /*************************************************************************
13999 **************************************************************************
14000 ** Below this point is the implementation of the fts5_decode() scalar
14001 ** function only.
14002 */
14003
14004 /*
14005 ** Decode a segment-data rowid from the %_data table. This function is
14006 ** the opposite of macro FTS5_SEGMENT_ROWID().
14007 */
14008 static void fts5DecodeRowid(
14009 i64 iRowid, /* Rowid from %_data table */
14010 int *piSegid, /* OUT: Segment id */
14011 int *pbDlidx, /* OUT: Dlidx flag */
14012 int *piHeight, /* OUT: Height */
14013 int *piPgno /* OUT: Page number */
14014 ){
14015 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
14016 iRowid >>= FTS5_DATA_PAGE_B;
14017
14018 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
14019 iRowid >>= FTS5_DATA_HEIGHT_B;
14020
14021 *pbDlidx = (int)(iRowid & 0x0001);
14022 iRowid >>= FTS5_DATA_DLI_B;
14023
14024 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
14025 }
14026
14027 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
14028 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
14029 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
14030
14031 if( iSegid==0 ){
14032 if( iKey==FTS5_AVERAGES_ROWID ){
14033 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
14034 }else{
14035 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
14036 }
14037 }
14038 else{
14039 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
14040 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
14041 );
14042 }
14043 }
14044
14045 static void fts5DebugStructure(
14046 int *pRc, /* IN/OUT: error code */
14047 Fts5Buffer *pBuf,
14048 Fts5Structure *p
14049 ){
14050 int iLvl, iSeg; /* Iterate through levels, segments */
14051
14052 for(iLvl=0; iLvl<p->nLevel; iLvl++){
14053 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
14054 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
14055 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
14056 );
14057 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
14058 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
14059 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
14060 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
14061 );
14062 }
14063 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
14064 }
14065 }
14066
14067 /*
14068 ** This is part of the fts5_decode() debugging aid.
14069 **
14070 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
14071 ** function appends a human-readable representation of the same object
14072 ** to the buffer passed as the second argument.
14073 */
14074 static void fts5DecodeStructure(
14075 int *pRc, /* IN/OUT: error code */
14076 Fts5Buffer *pBuf,
14077 const u8 *pBlob, int nBlob
14078 ){
14079 int rc; /* Return code */
14080 Fts5Structure *p = 0; /* Decoded structure object */
14081
14082 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
14083 if( rc!=SQLITE_OK ){
14084 *pRc = rc;
14085 return;
14086 }
14087
14088 fts5DebugStructure(pRc, pBuf, p);
14089 fts5StructureRelease(p);
14090 }
14091
14092 /*
14093 ** This is part of the fts5_decode() debugging aid.
14094 **
14095 ** Arguments pBlob/nBlob contain an "averages" record. This function
14096 ** appends a human-readable representation of record to the buffer passed
14097 ** as the second argument.
14098 */
14099 static void fts5DecodeAverages(
14100 int *pRc, /* IN/OUT: error code */
14101 Fts5Buffer *pBuf,
14102 const u8 *pBlob, int nBlob
14103 ){
14104 int i = 0;
14105 const char *zSpace = "";
14106
14107 while( i<nBlob ){
14108 u64 iVal;
14109 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
14110 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
14111 zSpace = " ";
14112 }
14113 }
14114
14115 /*
14116 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
14117 ** each varint and append its string representation to buffer pBuf. Return
14118 ** after either the input buffer is exhausted or a 0 value is read.
14119 **
14120 ** The return value is the number of bytes read from the input buffer.
14121 */
14122 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
14123 int iOff = 0;
14124 while( iOff<n ){
14125 int iVal;
14126 iOff += fts5GetVarint32(&a[iOff], iVal);
14127 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
14128 }
14129 return iOff;
14130 }
14131
14132 /*
14133 ** The start of buffer (a/n) contains the start of a doclist. The doclist
14134 ** may or may not finish within the buffer. This function appends a text
14135 ** representation of the part of the doclist that is present to buffer
14136 ** pBuf.
14137 **
14138 ** The return value is the number of bytes read from the input buffer.
14139 */
14140 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
14141 i64 iDocid = 0;
14142 int iOff = 0;
14143
14144 if( n>0 ){
14145 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
14146 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
14147 }
14148 while( iOff<n ){
14149 int nPos;
14150 int bDel;
14151 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
14152 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
14153 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
14154 if( iOff<n ){
14155 i64 iDelta;
14156 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
14157 iDocid += iDelta;
14158 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
14159 }
14160 }
14161
14162 return iOff;
14163 }
14164
14165 /*
14166 ** This function is part of the fts5_decode() debugging function. It is
14167 ** only ever used with detail=none tables.
14168 **
14169 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
14170 ** tables. This function appends a human-readable version of that list to
14171 ** buffer pBuf.
14172 **
14173 ** If *pRc is other than SQLITE_OK when this function is called, it is a
14174 ** no-op. If an OOM or other error occurs within this function, *pRc is
14175 ** set to an SQLite error code before returning. The final state of buffer
14176 ** pBuf is undefined in this case.
14177 */
14178 static void fts5DecodeRowidList(
14179 int *pRc, /* IN/OUT: Error code */
14180 Fts5Buffer *pBuf, /* Buffer to append text to */
14181 const u8 *pData, int nData /* Data to decode list-of-rowids from */
14182 ){
14183 int i = 0;
14184 i64 iRowid = 0;
14185
14186 while( i<nData ){
14187 const char *zApp = "";
14188 u64 iVal;
14189 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
14190 iRowid += iVal;
14191
14192 if( i<nData && pData[i]==0x00 ){
14193 i++;
14194 if( i<nData && pData[i]==0x00 ){
14195 i++;
14196 zApp = "+";
14197 }else{
14198 zApp = "*";
14199 }
14200 }
14201
14202 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
14203 }
14204 }
14205
14206 /*
14207 ** The implementation of user-defined scalar function fts5_decode().
14208 */
14209 static void fts5DecodeFunction(
14210 sqlite3_context *pCtx, /* Function call context */
14211 int nArg, /* Number of args (always 2) */
14212 sqlite3_value **apVal /* Function arguments */
14213 ){
14214 i64 iRowid; /* Rowid for record being decoded */
14215 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
14216 const u8 *aBlob; int n; /* Record to decode */
14217 u8 *a = 0;
14218 Fts5Buffer s; /* Build up text to return here */
14219 int rc = SQLITE_OK; /* Return code */
14220 int nSpace = 0;
14221 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
14222
14223 assert( nArg==2 );
14224 UNUSED_PARAM(nArg);
14225 memset(&s, 0, sizeof(Fts5Buffer));
14226 iRowid = sqlite3_value_int64(apVal[0]);
14227
14228 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
14229 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
14230 ** buffer overreads even if the record is corrupt. */
14231 n = sqlite3_value_bytes(apVal[1]);
14232 aBlob = sqlite3_value_blob(apVal[1]);
14233 nSpace = n + FTS5_DATA_ZERO_PADDING;
14234 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
14235 if( a==0 ) goto decode_out;
14236 memcpy(a, aBlob, n);
14237
14238
14239 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
14240
14241 fts5DebugRowid(&rc, &s, iRowid);
14242 if( bDlidx ){
14243 Fts5Data dlidx;
14244 Fts5DlidxLvl lvl;
14245
14246 dlidx.p = a;
14247 dlidx.nn = n;
14248
14249 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
14250 lvl.pData = &dlidx;
14251 lvl.iLeafPgno = iPgno;
14252
14253 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
14254 sqlite3Fts5BufferAppendPrintf(&rc, &s,
14255 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
14256 );
14257 }
14258 }else if( iSegid==0 ){
14259 if( iRowid==FTS5_AVERAGES_ROWID ){
14260 fts5DecodeAverages(&rc, &s, a, n);
14261 }else{
14262 fts5DecodeStructure(&rc, &s, a, n);
14263 }
14264 }else if( eDetailNone ){
14265 Fts5Buffer term; /* Current term read from page */
14266 int szLeaf;
14267 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
14268 int iTermOff;
14269 int nKeep = 0;
14270 int iOff;
14271
14272 memset(&term, 0, sizeof(Fts5Buffer));
14273
14274 /* Decode any entries that occur before the first term. */
14275 if( szLeaf<n ){
14276 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
14277 }else{
14278 iTermOff = szLeaf;
14279 }
14280 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
14281
14282 iOff = iTermOff;
14283 while( iOff<szLeaf ){
14284 int nAppend;
14285
14286 /* Read the term data for the next term*/
14287 iOff += fts5GetVarint32(&a[iOff], nAppend);
14288 term.n = nKeep;
14289 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
14290 sqlite3Fts5BufferAppendPrintf(
14291 &rc, &s, " term=%.*s", term.n, (const char*)term.p
14292 );
14293 iOff += nAppend;
14294
14295 /* Figure out where the doclist for this term ends */
14296 if( iPgidxOff<n ){
14297 int nIncr;
14298 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
14299 iTermOff += nIncr;
14300 }else{
14301 iTermOff = szLeaf;
14302 }
14303
14304 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
14305 iOff = iTermOff;
14306 if( iOff<szLeaf ){
14307 iOff += fts5GetVarint32(&a[iOff], nKeep);
14308 }
14309 }
14310
14311 fts5BufferFree(&term);
14312 }else{
14313 Fts5Buffer term; /* Current term read from page */
14314 int szLeaf; /* Offset of pgidx in a[] */
14315 int iPgidxOff;
14316 int iPgidxPrev = 0; /* Previous value read from pgidx */
14317 int iTermOff = 0;
14318 int iRowidOff = 0;
14319 int iOff;
14320 int nDoclist;
14321
14322 memset(&term, 0, sizeof(Fts5Buffer));
14323
14324 if( n<4 ){
14325 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
14326 goto decode_out;
14327 }else{
14328 iRowidOff = fts5GetU16(&a[0]);
14329 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
14330 if( iPgidxOff<n ){
14331 fts5GetVarint32(&a[iPgidxOff], iTermOff);
14332 }
14333 }
14334
14335 /* Decode the position list tail at the start of the page */
14336 if( iRowidOff!=0 ){
14337 iOff = iRowidOff;
14338 }else if( iTermOff!=0 ){
14339 iOff = iTermOff;
14340 }else{
14341 iOff = szLeaf;
14342 }
14343 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
14344
14345 /* Decode any more doclist data that appears on the page before the
14346 ** first term. */
14347 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
14348 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
14349
14350 while( iPgidxOff<n ){
14351 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
14352 int nByte; /* Bytes of data */
14353 int iEnd;
14354
14355 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
14356 iPgidxPrev += nByte;
14357 iOff = iPgidxPrev;
14358
14359 if( iPgidxOff<n ){
14360 fts5GetVarint32(&a[iPgidxOff], nByte);
14361 iEnd = iPgidxPrev + nByte;
14362 }else{
14363 iEnd = szLeaf;
14364 }
14365
14366 if( bFirst==0 ){
14367 iOff += fts5GetVarint32(&a[iOff], nByte);
14368 term.n = nByte;
14369 }
14370 iOff += fts5GetVarint32(&a[iOff], nByte);
14371 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
14372 iOff += nByte;
14373
14374 sqlite3Fts5BufferAppendPrintf(
14375 &rc, &s, " term=%.*s", term.n, (const char*)term.p
14376 );
14377 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
14378 }
14379
14380 fts5BufferFree(&term);
14381 }
14382
14383 decode_out:
14384 sqlite3_free(a);
14385 if( rc==SQLITE_OK ){
14386 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
14387 }else{
14388 sqlite3_result_error_code(pCtx, rc);
14389 }
14390 fts5BufferFree(&s);
14391 }
14392
14393 /*
14394 ** The implementation of user-defined scalar function fts5_rowid().
14395 */
14396 static void fts5RowidFunction(
14397 sqlite3_context *pCtx, /* Function call context */
14398 int nArg, /* Number of args (always 2) */
14399 sqlite3_value **apVal /* Function arguments */
14400 ){
14401 const char *zArg;
14402 if( nArg==0 ){
14403 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
14404 }else{
14405 zArg = (const char*)sqlite3_value_text(apVal[0]);
14406 if( 0==sqlite3_stricmp(zArg, "segment") ){
14407 i64 iRowid;
14408 int segid, pgno;
14409 if( nArg!=3 ){
14410 sqlite3_result_error(pCtx,
14411 "should be: fts5_rowid('segment', segid, pgno))", -1
14412 );
14413 }else{
14414 segid = sqlite3_value_int(apVal[1]);
14415 pgno = sqlite3_value_int(apVal[2]);
14416 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
14417 sqlite3_result_int64(pCtx, iRowid);
14418 }
14419 }else{
14420 sqlite3_result_error(pCtx,
14421 "first arg to fts5_rowid() must be 'segment'" , -1
14422 );
14423 }
14424 }
14425 }
14426
14427 /*
14428 ** This is called as part of registering the FTS5 module with database
14429 ** connection db. It registers several user-defined scalar functions useful
14430 ** with FTS5.
14431 **
14432 ** If successful, SQLITE_OK is returned. If an error occurs, some other
14433 ** SQLite error code is returned instead.
14434 */
14435 static int sqlite3Fts5IndexInit(sqlite3 *db){
14436 int rc = sqlite3_create_function(
14437 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
14438 );
14439
14440 if( rc==SQLITE_OK ){
14441 rc = sqlite3_create_function(
14442 db, "fts5_decode_none", 2,
14443 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
14444 );
14445 }
14446
14447 if( rc==SQLITE_OK ){
14448 rc = sqlite3_create_function(
14449 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
14450 );
14451 }
14452 return rc;
14453 }
14454
14455
14456 static int sqlite3Fts5IndexReset(Fts5Index *p){
14457 assert( p->pStruct==0 || p->iStructVersion!=0 );
14458 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
14459 fts5StructureInvalidate(p);
14460 }
14461 return fts5IndexReturn(p);
14462 }
14463
14464 /*
14465 ** 2014 Jun 09
14466 **
14467 ** The author disclaims copyright to this source code. In place of
14468 ** a legal notice, here is a blessing:
14469 **
14470 ** May you do good and not evil.
14471 ** May you find forgiveness for yourself and forgive others.
14472 ** May you share freely, never taking more than you give.
14473 **
14474 ******************************************************************************
14475 **
14476 ** This is an SQLite module implementing full-text search.
14477 */
14478
14479
14480 /* #include "fts5Int.h" */
14481
14482 /*
14483 ** This variable is set to false when running tests for which the on disk
14484 ** structures should not be corrupt. Otherwise, true. If it is false, extra
14485 ** assert() conditions in the fts5 code are activated - conditions that are
14486 ** only true if it is guaranteed that the fts5 database is not corrupt.
14487 */
14488 SQLITE_API int sqlite3_fts5_may_be_corrupt = 1;
14489
14490
14491 typedef struct Fts5Auxdata Fts5Auxdata;
14492 typedef struct Fts5Auxiliary Fts5Auxiliary;
14493 typedef struct Fts5Cursor Fts5Cursor;
14494 typedef struct Fts5Sorter Fts5Sorter;
14495 typedef struct Fts5Table Fts5Table;
14496 typedef struct Fts5TokenizerModule Fts5TokenizerModule;
14497
14498 /*
14499 ** NOTES ON TRANSACTIONS:
14500 **
14501 ** SQLite invokes the following virtual table methods as transactions are
14502 ** opened and closed by the user:
14503 **
14504 ** xBegin(): Start of a new transaction.
14505 ** xSync(): Initial part of two-phase commit.
14506 ** xCommit(): Final part of two-phase commit.
14507 ** xRollback(): Rollback the transaction.
14508 **
14509 ** Anything that is required as part of a commit that may fail is performed
14510 ** in the xSync() callback. Current versions of SQLite ignore any errors
14511 ** returned by xCommit().
14512 **
14513 ** And as sub-transactions are opened/closed:
14514 **
14515 ** xSavepoint(int S): Open savepoint S.
14516 ** xRelease(int S): Commit and close savepoint S.
14517 ** xRollbackTo(int S): Rollback to start of savepoint S.
14518 **
14519 ** During a write-transaction the fts5_index.c module may cache some data
14520 ** in-memory. It is flushed to disk whenever xSync(), xRelease() or
14521 ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo()
14522 ** is called.
14523 **
14524 ** Additionally, if SQLITE_DEBUG is defined, an instance of the following
14525 ** structure is used to record the current transaction state. This information
14526 ** is not required, but it is used in the assert() statements executed by
14527 ** function fts5CheckTransactionState() (see below).
14528 */
14529 struct Fts5TransactionState {
14530 int eState; /* 0==closed, 1==open, 2==synced */
14531 int iSavepoint; /* Number of open savepoints (0 -> none) */
14532 };
14533
14534 /*
14535 ** A single object of this type is allocated when the FTS5 module is
14536 ** registered with a database handle. It is used to store pointers to
14537 ** all registered FTS5 extensions - tokenizers and auxiliary functions.
14538 */
14539 struct Fts5Global {
14540 fts5_api api; /* User visible part of object (see fts5.h) */
14541 sqlite3 *db; /* Associated database connection */
14542 i64 iNextId; /* Used to allocate unique cursor ids */
14543 Fts5Auxiliary *pAux; /* First in list of all aux. functions */
14544 Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */
14545 Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */
14546 Fts5Cursor *pCsr; /* First in list of all open cursors */
14547 };
14548
14549 /*
14550 ** Each auxiliary function registered with the FTS5 module is represented
14551 ** by an object of the following type. All such objects are stored as part
14552 ** of the Fts5Global.pAux list.
14553 */
14554 struct Fts5Auxiliary {
14555 Fts5Global *pGlobal; /* Global context for this function */
14556 char *zFunc; /* Function name (nul-terminated) */
14557 void *pUserData; /* User-data pointer */
14558 fts5_extension_function xFunc; /* Callback function */
14559 void (*xDestroy)(void*); /* Destructor function */
14560 Fts5Auxiliary *pNext; /* Next registered auxiliary function */
14561 };
14562
14563 /*
14564 ** Each tokenizer module registered with the FTS5 module is represented
14565 ** by an object of the following type. All such objects are stored as part
14566 ** of the Fts5Global.pTok list.
14567 */
14568 struct Fts5TokenizerModule {
14569 char *zName; /* Name of tokenizer */
14570 void *pUserData; /* User pointer passed to xCreate() */
14571 fts5_tokenizer x; /* Tokenizer functions */
14572 void (*xDestroy)(void*); /* Destructor function */
14573 Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
14574 };
14575
14576 /*
14577 ** Virtual-table object.
14578 */
14579 struct Fts5Table {
14580 sqlite3_vtab base; /* Base class used by SQLite core */
14581 Fts5Config *pConfig; /* Virtual table configuration */
14582 Fts5Index *pIndex; /* Full-text index */
14583 Fts5Storage *pStorage; /* Document store */
14584 Fts5Global *pGlobal; /* Global (connection wide) data */
14585 Fts5Cursor *pSortCsr; /* Sort data from this cursor */
14586 #ifdef SQLITE_DEBUG
14587 struct Fts5TransactionState ts;
14588 #endif
14589 };
14590
14591 struct Fts5MatchPhrase {
14592 Fts5Buffer *pPoslist; /* Pointer to current poslist */
14593 int nTerm; /* Size of phrase in terms */
14594 };
14595
14596 /*
14597 ** pStmt:
14598 ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
14599 **
14600 ** aIdx[]:
14601 ** There is one entry in the aIdx[] array for each phrase in the query,
14602 ** the value of which is the offset within aPoslist[] following the last
14603 ** byte of the position list for the corresponding phrase.
14604 */
14605 struct Fts5Sorter {
14606 sqlite3_stmt *pStmt;
14607 i64 iRowid; /* Current rowid */
14608 const u8 *aPoslist; /* Position lists for current row */
14609 int nIdx; /* Number of entries in aIdx[] */
14610 int aIdx[1]; /* Offsets into aPoslist for current row */
14611 };
14612
14613
14614 /*
14615 ** Virtual-table cursor object.
14616 **
14617 ** iSpecial:
14618 ** If this is a 'special' query (refer to function fts5SpecialMatch()),
14619 ** then this variable contains the result of the query.
14620 **
14621 ** iFirstRowid, iLastRowid:
14622 ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
14623 ** cursor iterates in ascending order of rowids, iFirstRowid is the lower
14624 ** limit of rowids to return, and iLastRowid the upper. In other words, the
14625 ** WHERE clause in the user's query might have been:
14626 **
14627 ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
14628 **
14629 ** If the cursor iterates in descending order of rowid, iFirstRowid
14630 ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid
14631 ** the lower.
14632 */
14633 struct Fts5Cursor {
14634 sqlite3_vtab_cursor base; /* Base class used by SQLite core */
14635 Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */
14636 int *aColumnSize; /* Values for xColumnSize() */
14637 i64 iCsrId; /* Cursor id */
14638
14639 /* Zero from this point onwards on cursor reset */
14640 int ePlan; /* FTS5_PLAN_XXX value */
14641 int bDesc; /* True for "ORDER BY rowid DESC" queries */
14642 i64 iFirstRowid; /* Return no rowids earlier than this */
14643 i64 iLastRowid; /* Return no rowids later than this */
14644 sqlite3_stmt *pStmt; /* Statement used to read %_content */
14645 Fts5Expr *pExpr; /* Expression for MATCH queries */
14646 Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */
14647 int csrflags; /* Mask of cursor flags (see below) */
14648 i64 iSpecial; /* Result of special query */
14649
14650 /* "rank" function. Populated on demand from vtab.xColumn(). */
14651 char *zRank; /* Custom rank function */
14652 char *zRankArgs; /* Custom rank function args */
14653 Fts5Auxiliary *pRank; /* Rank callback (or NULL) */
14654 int nRankArg; /* Number of trailing arguments for rank() */
14655 sqlite3_value **apRankArg; /* Array of trailing arguments */
14656 sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */
14657
14658 /* Auxiliary data storage */
14659 Fts5Auxiliary *pAux; /* Currently executing extension function */
14660 Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
14661
14662 /* Cache used by auxiliary functions xInst() and xInstCount() */
14663 Fts5PoslistReader *aInstIter; /* One for each phrase */
14664 int nInstAlloc; /* Size of aInst[] array (entries / 3) */
14665 int nInstCount; /* Number of phrase instances */
14666 int *aInst; /* 3 integers per phrase instance */
14667 };
14668
14669 /*
14670 ** Bits that make up the "idxNum" parameter passed indirectly by
14671 ** xBestIndex() to xFilter().
14672 */
14673 #define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */
14674 #define FTS5_BI_RANK 0x0002 /* rank MATCH ? */
14675 #define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */
14676 #define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */
14677 #define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */
14678
14679 #define FTS5_BI_ORDER_RANK 0x0020
14680 #define FTS5_BI_ORDER_ROWID 0x0040
14681 #define FTS5_BI_ORDER_DESC 0x0080
14682
14683 /*
14684 ** Values for Fts5Cursor.csrflags
14685 */
14686 #define FTS5CSR_EOF 0x01
14687 #define FTS5CSR_REQUIRE_CONTENT 0x02
14688 #define FTS5CSR_REQUIRE_DOCSIZE 0x04
14689 #define FTS5CSR_REQUIRE_INST 0x08
14690 #define FTS5CSR_FREE_ZRANK 0x10
14691 #define FTS5CSR_REQUIRE_RESEEK 0x20
14692 #define FTS5CSR_REQUIRE_POSLIST 0x40
14693
14694 #define BitFlagAllTest(x,y) (((x) & (y))==(y))
14695 #define BitFlagTest(x,y) (((x) & (y))!=0)
14696
14697
14698 /*
14699 ** Macros to Set(), Clear() and Test() cursor flags.
14700 */
14701 #define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag))
14702 #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
14703 #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag))
14704
14705 struct Fts5Auxdata {
14706 Fts5Auxiliary *pAux; /* Extension to which this belongs */
14707 void *pPtr; /* Pointer value */
14708 void(*xDelete)(void*); /* Destructor */
14709 Fts5Auxdata *pNext; /* Next object in linked list */
14710 };
14711
14712 #ifdef SQLITE_DEBUG
14713 #define FTS5_BEGIN 1
14714 #define FTS5_SYNC 2
14715 #define FTS5_COMMIT 3
14716 #define FTS5_ROLLBACK 4
14717 #define FTS5_SAVEPOINT 5
14718 #define FTS5_RELEASE 6
14719 #define FTS5_ROLLBACKTO 7
14720 static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){
14721 switch( op ){
14722 case FTS5_BEGIN:
14723 assert( p->ts.eState==0 );
14724 p->ts.eState = 1;
14725 p->ts.iSavepoint = -1;
14726 break;
14727
14728 case FTS5_SYNC:
14729 assert( p->ts.eState==1 );
14730 p->ts.eState = 2;
14731 break;
14732
14733 case FTS5_COMMIT:
14734 assert( p->ts.eState==2 );
14735 p->ts.eState = 0;
14736 break;
14737
14738 case FTS5_ROLLBACK:
14739 assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 );
14740 p->ts.eState = 0;
14741 break;
14742
14743 case FTS5_SAVEPOINT:
14744 assert( p->ts.eState==1 );
14745 assert( iSavepoint>=0 );
14746 assert( iSavepoint>p->ts.iSavepoint );
14747 p->ts.iSavepoint = iSavepoint;
14748 break;
14749
14750 case FTS5_RELEASE:
14751 assert( p->ts.eState==1 );
14752 assert( iSavepoint>=0 );
14753 assert( iSavepoint<=p->ts.iSavepoint );
14754 p->ts.iSavepoint = iSavepoint-1;
14755 break;
14756
14757 case FTS5_ROLLBACKTO:
14758 assert( p->ts.eState==1 );
14759 assert( iSavepoint>=0 );
14760 assert( iSavepoint<=p->ts.iSavepoint );
14761 p->ts.iSavepoint = iSavepoint;
14762 break;
14763 }
14764 }
14765 #else
14766 # define fts5CheckTransactionState(x,y,z)
14767 #endif
14768
14769 /*
14770 ** Return true if pTab is a contentless table.
14771 */
14772 static int fts5IsContentless(Fts5Table *pTab){
14773 return pTab->pConfig->eContent==FTS5_CONTENT_NONE;
14774 }
14775
14776 /*
14777 ** Delete a virtual table handle allocated by fts5InitVtab().
14778 */
14779 static void fts5FreeVtab(Fts5Table *pTab){
14780 if( pTab ){
14781 sqlite3Fts5IndexClose(pTab->pIndex);
14782 sqlite3Fts5StorageClose(pTab->pStorage);
14783 sqlite3Fts5ConfigFree(pTab->pConfig);
14784 sqlite3_free(pTab);
14785 }
14786 }
14787
14788 /*
14789 ** The xDisconnect() virtual table method.
14790 */
14791 static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
14792 fts5FreeVtab((Fts5Table*)pVtab);
14793 return SQLITE_OK;
14794 }
14795
14796 /*
14797 ** The xDestroy() virtual table method.
14798 */
14799 static int fts5DestroyMethod(sqlite3_vtab *pVtab){
14800 Fts5Table *pTab = (Fts5Table*)pVtab;
14801 int rc = sqlite3Fts5DropAll(pTab->pConfig);
14802 if( rc==SQLITE_OK ){
14803 fts5FreeVtab((Fts5Table*)pVtab);
14804 }
14805 return rc;
14806 }
14807
14808 /*
14809 ** This function is the implementation of both the xConnect and xCreate
14810 ** methods of the FTS3 virtual table.
14811 **
14812 ** The argv[] array contains the following:
14813 **
14814 ** argv[0] -> module name ("fts5")
14815 ** argv[1] -> database name
14816 ** argv[2] -> table name
14817 ** argv[...] -> "column name" and other module argument fields.
14818 */
14819 static int fts5InitVtab(
14820 int bCreate, /* True for xCreate, false for xConnect */
14821 sqlite3 *db, /* The SQLite database connection */
14822 void *pAux, /* Hash table containing tokenizers */
14823 int argc, /* Number of elements in argv array */
14824 const char * const *argv, /* xCreate/xConnect argument array */
14825 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
14826 char **pzErr /* Write any error message here */
14827 ){
14828 Fts5Global *pGlobal = (Fts5Global*)pAux;
14829 const char **azConfig = (const char**)argv;
14830 int rc = SQLITE_OK; /* Return code */
14831 Fts5Config *pConfig = 0; /* Results of parsing argc/argv */
14832 Fts5Table *pTab = 0; /* New virtual table object */
14833
14834 /* Allocate the new vtab object and parse the configuration */
14835 pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table));
14836 if( rc==SQLITE_OK ){
14837 rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
14838 assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
14839 }
14840 if( rc==SQLITE_OK ){
14841 pTab->pConfig = pConfig;
14842 pTab->pGlobal = pGlobal;
14843 }
14844
14845 /* Open the index sub-system */
14846 if( rc==SQLITE_OK ){
14847 rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr);
14848 }
14849
14850 /* Open the storage sub-system */
14851 if( rc==SQLITE_OK ){
14852 rc = sqlite3Fts5StorageOpen(
14853 pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr
14854 );
14855 }
14856
14857 /* Call sqlite3_declare_vtab() */
14858 if( rc==SQLITE_OK ){
14859 rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
14860 }
14861
14862 /* Load the initial configuration */
14863 if( rc==SQLITE_OK ){
14864 assert( pConfig->pzErrmsg==0 );
14865 pConfig->pzErrmsg = pzErr;
14866 rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
14867 sqlite3Fts5IndexRollback(pTab->pIndex);
14868 pConfig->pzErrmsg = 0;
14869 }
14870
14871 if( rc!=SQLITE_OK ){
14872 fts5FreeVtab(pTab);
14873 pTab = 0;
14874 }else if( bCreate ){
14875 fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
14876 }
14877 *ppVTab = (sqlite3_vtab*)pTab;
14878 return rc;
14879 }
14880
14881 /*
14882 ** The xConnect() and xCreate() methods for the virtual table. All the
14883 ** work is done in function fts5InitVtab().
14884 */
14885 static int fts5ConnectMethod(
14886 sqlite3 *db, /* Database connection */
14887 void *pAux, /* Pointer to tokenizer hash table */
14888 int argc, /* Number of elements in argv array */
14889 const char * const *argv, /* xCreate/xConnect argument array */
14890 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
14891 char **pzErr /* OUT: sqlite3_malloc'd error message */
14892 ){
14893 return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
14894 }
14895 static int fts5CreateMethod(
14896 sqlite3 *db, /* Database connection */
14897 void *pAux, /* Pointer to tokenizer hash table */
14898 int argc, /* Number of elements in argv array */
14899 const char * const *argv, /* xCreate/xConnect argument array */
14900 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
14901 char **pzErr /* OUT: sqlite3_malloc'd error message */
14902 ){
14903 return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
14904 }
14905
14906 /*
14907 ** The different query plans.
14908 */
14909 #define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */
14910 #define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */
14911 #define FTS5_PLAN_SPECIAL 3 /* An internal query */
14912 #define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */
14913 #define FTS5_PLAN_SCAN 5 /* No usable constraint */
14914 #define FTS5_PLAN_ROWID 6 /* (rowid = ?) */
14915
14916 /*
14917 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
14918 ** extension is currently being used by a version of SQLite too old to
14919 ** support index-info flags. In that case this function is a no-op.
14920 */
14921 static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
14922 #if SQLITE_VERSION_NUMBER>=3008012
14923 #ifndef SQLITE_CORE
14924 if( sqlite3_libversion_number()>=3008012 )
14925 #endif
14926 {
14927 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
14928 }
14929 #endif
14930 }
14931
14932 /*
14933 ** Implementation of the xBestIndex method for FTS5 tables. Within the
14934 ** WHERE constraint, it searches for the following:
14935 **
14936 ** 1. A MATCH constraint against the special column.
14937 ** 2. A MATCH constraint against the "rank" column.
14938 ** 3. An == constraint against the rowid column.
14939 ** 4. A < or <= constraint against the rowid column.
14940 ** 5. A > or >= constraint against the rowid column.
14941 **
14942 ** Within the ORDER BY, either:
14943 **
14944 ** 5. ORDER BY rank [ASC|DESC]
14945 ** 6. ORDER BY rowid [ASC|DESC]
14946 **
14947 ** Costs are assigned as follows:
14948 **
14949 ** a) If an unusable MATCH operator is present in the WHERE clause, the
14950 ** cost is unconditionally set to 1e50 (a really big number).
14951 **
14952 ** a) If a MATCH operator is present, the cost depends on the other
14953 ** constraints also present. As follows:
14954 **
14955 ** * No other constraints: cost=1000.0
14956 ** * One rowid range constraint: cost=750.0
14957 ** * Both rowid range constraints: cost=500.0
14958 ** * An == rowid constraint: cost=100.0
14959 **
14960 ** b) Otherwise, if there is no MATCH:
14961 **
14962 ** * No other constraints: cost=1000000.0
14963 ** * One rowid range constraint: cost=750000.0
14964 ** * Both rowid range constraints: cost=250000.0
14965 ** * An == rowid constraint: cost=10.0
14966 **
14967 ** Costs are not modified by the ORDER BY clause.
14968 */
14969 static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
14970 Fts5Table *pTab = (Fts5Table*)pVTab;
14971 Fts5Config *pConfig = pTab->pConfig;
14972 int idxFlags = 0; /* Parameter passed through to xFilter() */
14973 int bHasMatch;
14974 int iNext;
14975 int i;
14976
14977 struct Constraint {
14978 int op; /* Mask against sqlite3_index_constraint.op */
14979 int fts5op; /* FTS5 mask for idxFlags */
14980 int iCol; /* 0==rowid, 1==tbl, 2==rank */
14981 int omit; /* True to omit this if found */
14982 int iConsIndex; /* Index in pInfo->aConstraint[] */
14983 } aConstraint[] = {
14984 {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ,
14985 FTS5_BI_MATCH, 1, 1, -1},
14986 {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ,
14987 FTS5_BI_RANK, 2, 1, -1},
14988 {SQLITE_INDEX_CONSTRAINT_EQ, FTS5_BI_ROWID_EQ, 0, 0, -1},
14989 {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE,
14990 FTS5_BI_ROWID_LE, 0, 0, -1},
14991 {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE,
14992 FTS5_BI_ROWID_GE, 0, 0, -1},
14993 };
14994
14995 int aColMap[3];
14996 aColMap[0] = -1;
14997 aColMap[1] = pConfig->nCol;
14998 aColMap[2] = pConfig->nCol+1;
14999
15000 /* Set idxFlags flags for all WHERE clause terms that will be used. */
15001 for(i=0; i<pInfo->nConstraint; i++){
15002 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
15003 int j;
15004 for(j=0; j<ArraySize(aConstraint); j++){
15005 struct Constraint *pC = &aConstraint[j];
15006 if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){
15007 if( p->usable ){
15008 pC->iConsIndex = i;
15009 idxFlags |= pC->fts5op;
15010 }else if( j==0 ){
15011 /* As there exists an unusable MATCH constraint this is an
15012 ** unusable plan. Set a prohibitively high cost. */
15013 pInfo->estimatedCost = 1e50;
15014 return SQLITE_OK;
15015 }
15016 }
15017 }
15018 }
15019
15020 /* Set idxFlags flags for the ORDER BY clause */
15021 if( pInfo->nOrderBy==1 ){
15022 int iSort = pInfo->aOrderBy[0].iColumn;
15023 if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){
15024 idxFlags |= FTS5_BI_ORDER_RANK;
15025 }else if( iSort==-1 ){
15026 idxFlags |= FTS5_BI_ORDER_ROWID;
15027 }
15028 if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
15029 pInfo->orderByConsumed = 1;
15030 if( pInfo->aOrderBy[0].desc ){
15031 idxFlags |= FTS5_BI_ORDER_DESC;
15032 }
15033 }
15034 }
15035
15036 /* Calculate the estimated cost based on the flags set in idxFlags. */
15037 bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH);
15038 if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){
15039 pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0;
15040 if( bHasMatch==0 ) fts5SetUniqueFlag(pInfo);
15041 }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
15042 pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0;
15043 }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
15044 pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0;
15045 }else{
15046 pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0;
15047 }
15048
15049 /* Assign argvIndex values to each constraint in use. */
15050 iNext = 1;
15051 for(i=0; i<ArraySize(aConstraint); i++){
15052 struct Constraint *pC = &aConstraint[i];
15053 if( pC->iConsIndex>=0 ){
15054 pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++;
15055 pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit;
15056 }
15057 }
15058
15059 pInfo->idxNum = idxFlags;
15060 return SQLITE_OK;
15061 }
15062
15063 static int fts5NewTransaction(Fts5Table *pTab){
15064 Fts5Cursor *pCsr;
15065 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
15066 if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK;
15067 }
15068 return sqlite3Fts5StorageReset(pTab->pStorage);
15069 }
15070
15071 /*
15072 ** Implementation of xOpen method.
15073 */
15074 static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
15075 Fts5Table *pTab = (Fts5Table*)pVTab;
15076 Fts5Config *pConfig = pTab->pConfig;
15077 Fts5Cursor *pCsr = 0; /* New cursor object */
15078 int nByte; /* Bytes of space to allocate */
15079 int rc; /* Return code */
15080
15081 rc = fts5NewTransaction(pTab);
15082 if( rc==SQLITE_OK ){
15083 nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
15084 pCsr = (Fts5Cursor*)sqlite3_malloc(nByte);
15085 if( pCsr ){
15086 Fts5Global *pGlobal = pTab->pGlobal;
15087 memset(pCsr, 0, nByte);
15088 pCsr->aColumnSize = (int*)&pCsr[1];
15089 pCsr->pNext = pGlobal->pCsr;
15090 pGlobal->pCsr = pCsr;
15091 pCsr->iCsrId = ++pGlobal->iNextId;
15092 }else{
15093 rc = SQLITE_NOMEM;
15094 }
15095 }
15096 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
15097 return rc;
15098 }
15099
15100 static int fts5StmtType(Fts5Cursor *pCsr){
15101 if( pCsr->ePlan==FTS5_PLAN_SCAN ){
15102 return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC;
15103 }
15104 return FTS5_STMT_LOOKUP;
15105 }
15106
15107 /*
15108 ** This function is called after the cursor passed as the only argument
15109 ** is moved to point at a different row. It clears all cached data
15110 ** specific to the previous row stored by the cursor object.
15111 */
15112 static void fts5CsrNewrow(Fts5Cursor *pCsr){
15113 CsrFlagSet(pCsr,
15114 FTS5CSR_REQUIRE_CONTENT
15115 | FTS5CSR_REQUIRE_DOCSIZE
15116 | FTS5CSR_REQUIRE_INST
15117 | FTS5CSR_REQUIRE_POSLIST
15118 );
15119 }
15120
15121 static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
15122 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
15123 Fts5Auxdata *pData;
15124 Fts5Auxdata *pNext;
15125
15126 sqlite3_free(pCsr->aInstIter);
15127 sqlite3_free(pCsr->aInst);
15128 if( pCsr->pStmt ){
15129 int eStmt = fts5StmtType(pCsr);
15130 sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
15131 }
15132 if( pCsr->pSorter ){
15133 Fts5Sorter *pSorter = pCsr->pSorter;
15134 sqlite3_finalize(pSorter->pStmt);
15135 sqlite3_free(pSorter);
15136 }
15137
15138 if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){
15139 sqlite3Fts5ExprFree(pCsr->pExpr);
15140 }
15141
15142 for(pData=pCsr->pAuxdata; pData; pData=pNext){
15143 pNext = pData->pNext;
15144 if( pData->xDelete ) pData->xDelete(pData->pPtr);
15145 sqlite3_free(pData);
15146 }
15147
15148 sqlite3_finalize(pCsr->pRankArgStmt);
15149 sqlite3_free(pCsr->apRankArg);
15150
15151 if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
15152 sqlite3_free(pCsr->zRank);
15153 sqlite3_free(pCsr->zRankArgs);
15154 }
15155
15156 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr));
15157 }
15158
15159
15160 /*
15161 ** Close the cursor. For additional information see the documentation
15162 ** on the xClose method of the virtual table interface.
15163 */
15164 static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
15165 if( pCursor ){
15166 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
15167 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
15168 Fts5Cursor **pp;
15169
15170 fts5FreeCursorComponents(pCsr);
15171 /* Remove the cursor from the Fts5Global.pCsr list */
15172 for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
15173 *pp = pCsr->pNext;
15174
15175 sqlite3_free(pCsr);
15176 }
15177 return SQLITE_OK;
15178 }
15179
15180 static int fts5SorterNext(Fts5Cursor *pCsr){
15181 Fts5Sorter *pSorter = pCsr->pSorter;
15182 int rc;
15183
15184 rc = sqlite3_step(pSorter->pStmt);
15185 if( rc==SQLITE_DONE ){
15186 rc = SQLITE_OK;
15187 CsrFlagSet(pCsr, FTS5CSR_EOF);
15188 }else if( rc==SQLITE_ROW ){
15189 const u8 *a;
15190 const u8 *aBlob;
15191 int nBlob;
15192 int i;
15193 int iOff = 0;
15194 rc = SQLITE_OK;
15195
15196 pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0);
15197 nBlob = sqlite3_column_bytes(pSorter->pStmt, 1);
15198 aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1);
15199
15200 /* nBlob==0 in detail=none mode. */
15201 if( nBlob>0 ){
15202 for(i=0; i<(pSorter->nIdx-1); i++){
15203 int iVal;
15204 a += fts5GetVarint32(a, iVal);
15205 iOff += iVal;
15206 pSorter->aIdx[i] = iOff;
15207 }
15208 pSorter->aIdx[i] = &aBlob[nBlob] - a;
15209 pSorter->aPoslist = a;
15210 }
15211
15212 fts5CsrNewrow(pCsr);
15213 }
15214
15215 return rc;
15216 }
15217
15218
15219 /*
15220 ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors
15221 ** open on table pTab.
15222 */
15223 static void fts5TripCursors(Fts5Table *pTab){
15224 Fts5Cursor *pCsr;
15225 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
15226 if( pCsr->ePlan==FTS5_PLAN_MATCH
15227 && pCsr->base.pVtab==(sqlite3_vtab*)pTab
15228 ){
15229 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
15230 }
15231 }
15232 }
15233
15234 /*
15235 ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
15236 ** argument, close and reopen all Fts5IndexIter iterators that the cursor
15237 ** is using. Then attempt to move the cursor to a rowid equal to or laster
15238 ** (in the cursors sort order - ASC or DESC) than the current rowid.
15239 **
15240 ** If the new rowid is not equal to the old, set output parameter *pbSkip
15241 ** to 1 before returning. Otherwise, leave it unchanged.
15242 **
15243 ** Return SQLITE_OK if successful or if no reseek was required, or an
15244 ** error code if an error occurred.
15245 */
15246 static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
15247 int rc = SQLITE_OK;
15248 assert( *pbSkip==0 );
15249 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){
15250 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
15251 int bDesc = pCsr->bDesc;
15252 i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);
15253
15254 rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc);
15255 if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
15256 *pbSkip = 1;
15257 }
15258
15259 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK);
15260 fts5CsrNewrow(pCsr);
15261 if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
15262 CsrFlagSet(pCsr, FTS5CSR_EOF);
15263 *pbSkip = 1;
15264 }
15265 }
15266 return rc;
15267 }
15268
15269
15270 /*
15271 ** Advance the cursor to the next row in the table that matches the
15272 ** search criteria.
15273 **
15274 ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
15275 ** even if we reach end-of-file. The fts5EofMethod() will be called
15276 ** subsequently to determine whether or not an EOF was hit.
15277 */
15278 static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
15279 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
15280 int rc;
15281
15282 assert( (pCsr->ePlan<3)==
15283 (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)
15284 );
15285 assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
15286
15287 if( pCsr->ePlan<3 ){
15288 int bSkip = 0;
15289 if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
15290 rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
15291 CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr));
15292 fts5CsrNewrow(pCsr);
15293 }else{
15294 switch( pCsr->ePlan ){
15295 case FTS5_PLAN_SPECIAL: {
15296 CsrFlagSet(pCsr, FTS5CSR_EOF);
15297 rc = SQLITE_OK;
15298 break;
15299 }
15300
15301 case FTS5_PLAN_SORTED_MATCH: {
15302 rc = fts5SorterNext(pCsr);
15303 break;
15304 }
15305
15306 default:
15307 rc = sqlite3_step(pCsr->pStmt);
15308 if( rc!=SQLITE_ROW ){
15309 CsrFlagSet(pCsr, FTS5CSR_EOF);
15310 rc = sqlite3_reset(pCsr->pStmt);
15311 }else{
15312 rc = SQLITE_OK;
15313 }
15314 break;
15315 }
15316 }
15317
15318 return rc;
15319 }
15320
15321
15322 static int fts5PrepareStatement(
15323 sqlite3_stmt **ppStmt,
15324 Fts5Config *pConfig,
15325 const char *zFmt,
15326 ...
15327 ){
15328 sqlite3_stmt *pRet = 0;
15329 int rc;
15330 char *zSql;
15331 va_list ap;
15332
15333 va_start(ap, zFmt);
15334 zSql = sqlite3_vmprintf(zFmt, ap);
15335 if( zSql==0 ){
15336 rc = SQLITE_NOMEM;
15337 }else{
15338 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pRet, 0);
15339 if( rc!=SQLITE_OK ){
15340 *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
15341 }
15342 sqlite3_free(zSql);
15343 }
15344
15345 va_end(ap);
15346 *ppStmt = pRet;
15347 return rc;
15348 }
15349
15350 static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
15351 Fts5Config *pConfig = pTab->pConfig;
15352 Fts5Sorter *pSorter;
15353 int nPhrase;
15354 int nByte;
15355 int rc;
15356 const char *zRank = pCsr->zRank;
15357 const char *zRankArgs = pCsr->zRankArgs;
15358
15359 nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
15360 nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
15361 pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
15362 if( pSorter==0 ) return SQLITE_NOMEM;
15363 memset(pSorter, 0, nByte);
15364 pSorter->nIdx = nPhrase;
15365
15366 /* TODO: It would be better to have some system for reusing statement
15367 ** handles here, rather than preparing a new one for each query. But that
15368 ** is not possible as SQLite reference counts the virtual table objects.
15369 ** And since the statement required here reads from this very virtual
15370 ** table, saving it creates a circular reference.
15371 **
15372 ** If SQLite a built-in statement cache, this wouldn't be a problem. */
15373 rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
15374 "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
15375 pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
15376 (zRankArgs ? ", " : ""),
15377 (zRankArgs ? zRankArgs : ""),
15378 bDesc ? "DESC" : "ASC"
15379 );
15380
15381 pCsr->pSorter = pSorter;
15382 if( rc==SQLITE_OK ){
15383 assert( pTab->pSortCsr==0 );
15384 pTab->pSortCsr = pCsr;
15385 rc = fts5SorterNext(pCsr);
15386 pTab->pSortCsr = 0;
15387 }
15388
15389 if( rc!=SQLITE_OK ){
15390 sqlite3_finalize(pSorter->pStmt);
15391 sqlite3_free(pSorter);
15392 pCsr->pSorter = 0;
15393 }
15394
15395 return rc;
15396 }
15397
15398 static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
15399 int rc;
15400 Fts5Expr *pExpr = pCsr->pExpr;
15401 rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc);
15402 if( sqlite3Fts5ExprEof(pExpr) ){
15403 CsrFlagSet(pCsr, FTS5CSR_EOF);
15404 }
15405 fts5CsrNewrow(pCsr);
15406 return rc;
15407 }
15408
15409 /*
15410 ** Process a "special" query. A special query is identified as one with a
15411 ** MATCH expression that begins with a '*' character. The remainder of
15412 ** the text passed to the MATCH operator are used as the special query
15413 ** parameters.
15414 */
15415 static int fts5SpecialMatch(
15416 Fts5Table *pTab,
15417 Fts5Cursor *pCsr,
15418 const char *zQuery
15419 ){
15420 int rc = SQLITE_OK; /* Return code */
15421 const char *z = zQuery; /* Special query text */
15422 int n; /* Number of bytes in text at z */
15423
15424 while( z[0]==' ' ) z++;
15425 for(n=0; z[n] && z[n]!=' '; n++);
15426
15427 assert( pTab->base.zErrMsg==0 );
15428 pCsr->ePlan = FTS5_PLAN_SPECIAL;
15429
15430 if( 0==sqlite3_strnicmp("reads", z, n) ){
15431 pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex);
15432 }
15433 else if( 0==sqlite3_strnicmp("id", z, n) ){
15434 pCsr->iSpecial = pCsr->iCsrId;
15435 }
15436 else{
15437 /* An unrecognized directive. Return an error message. */
15438 pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z);
15439 rc = SQLITE_ERROR;
15440 }
15441
15442 return rc;
15443 }
15444
15445 /*
15446 ** Search for an auxiliary function named zName that can be used with table
15447 ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
15448 ** structure. Otherwise, if no such function exists, return NULL.
15449 */
15450 static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){
15451 Fts5Auxiliary *pAux;
15452
15453 for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
15454 if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
15455 }
15456
15457 /* No function of the specified name was found. Return 0. */
15458 return 0;
15459 }
15460
15461
15462 static int fts5FindRankFunction(Fts5Cursor *pCsr){
15463 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
15464 Fts5Config *pConfig = pTab->pConfig;
15465 int rc = SQLITE_OK;
15466 Fts5Auxiliary *pAux = 0;
15467 const char *zRank = pCsr->zRank;
15468 const char *zRankArgs = pCsr->zRankArgs;
15469
15470 if( zRankArgs ){
15471 char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
15472 if( zSql ){
15473 sqlite3_stmt *pStmt = 0;
15474 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0);
15475 sqlite3_free(zSql);
15476 assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
15477 if( rc==SQLITE_OK ){
15478 if( SQLITE_ROW==sqlite3_step(pStmt) ){
15479 int nByte;
15480 pCsr->nRankArg = sqlite3_column_count(pStmt);
15481 nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
15482 pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
15483 if( rc==SQLITE_OK ){
15484 int i;
15485 for(i=0; i<pCsr->nRankArg; i++){
15486 pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i);
15487 }
15488 }
15489 pCsr->pRankArgStmt = pStmt;
15490 }else{
15491 rc = sqlite3_finalize(pStmt);
15492 assert( rc!=SQLITE_OK );
15493 }
15494 }
15495 }
15496 }
15497
15498 if( rc==SQLITE_OK ){
15499 pAux = fts5FindAuxiliary(pTab, zRank);
15500 if( pAux==0 ){
15501 assert( pTab->base.zErrMsg==0 );
15502 pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank);
15503 rc = SQLITE_ERROR;
15504 }
15505 }
15506
15507 pCsr->pRank = pAux;
15508 return rc;
15509 }
15510
15511
15512 static int fts5CursorParseRank(
15513 Fts5Config *pConfig,
15514 Fts5Cursor *pCsr,
15515 sqlite3_value *pRank
15516 ){
15517 int rc = SQLITE_OK;
15518 if( pRank ){
15519 const char *z = (const char*)sqlite3_value_text(pRank);
15520 char *zRank = 0;
15521 char *zRankArgs = 0;
15522
15523 if( z==0 ){
15524 if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR;
15525 }else{
15526 rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
15527 }
15528 if( rc==SQLITE_OK ){
15529 pCsr->zRank = zRank;
15530 pCsr->zRankArgs = zRankArgs;
15531 CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
15532 }else if( rc==SQLITE_ERROR ){
15533 pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
15534 "parse error in rank function: %s", z
15535 );
15536 }
15537 }else{
15538 if( pConfig->zRank ){
15539 pCsr->zRank = (char*)pConfig->zRank;
15540 pCsr->zRankArgs = (char*)pConfig->zRankArgs;
15541 }else{
15542 pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
15543 pCsr->zRankArgs = 0;
15544 }
15545 }
15546 return rc;
15547 }
15548
15549 static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
15550 if( pVal ){
15551 int eType = sqlite3_value_numeric_type(pVal);
15552 if( eType==SQLITE_INTEGER ){
15553 return sqlite3_value_int64(pVal);
15554 }
15555 }
15556 return iDefault;
15557 }
15558
15559 /*
15560 ** This is the xFilter interface for the virtual table. See
15561 ** the virtual table xFilter method documentation for additional
15562 ** information.
15563 **
15564 ** There are three possible query strategies:
15565 **
15566 ** 1. Full-text search using a MATCH operator.
15567 ** 2. A by-rowid lookup.
15568 ** 3. A full-table scan.
15569 */
15570 static int fts5FilterMethod(
15571 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
15572 int idxNum, /* Strategy index */
15573 const char *zUnused, /* Unused */
15574 int nVal, /* Number of elements in apVal */
15575 sqlite3_value **apVal /* Arguments for the indexing scheme */
15576 ){
15577 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
15578 Fts5Config *pConfig = pTab->pConfig;
15579 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
15580 int rc = SQLITE_OK; /* Error code */
15581 int iVal = 0; /* Counter for apVal[] */
15582 int bDesc; /* True if ORDER BY [rank|rowid] DESC */
15583 int bOrderByRank; /* True if ORDER BY rank */
15584 sqlite3_value *pMatch = 0; /* <tbl> MATCH ? expression (or NULL) */
15585 sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */
15586 sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */
15587 sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */
15588 sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */
15589 char **pzErrmsg = pConfig->pzErrmsg;
15590
15591 UNUSED_PARAM(zUnused);
15592 UNUSED_PARAM(nVal);
15593
15594 if( pCsr->ePlan ){
15595 fts5FreeCursorComponents(pCsr);
15596 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
15597 }
15598
15599 assert( pCsr->pStmt==0 );
15600 assert( pCsr->pExpr==0 );
15601 assert( pCsr->csrflags==0 );
15602 assert( pCsr->pRank==0 );
15603 assert( pCsr->zRank==0 );
15604 assert( pCsr->zRankArgs==0 );
15605
15606 assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg );
15607 pConfig->pzErrmsg = &pTab->base.zErrMsg;
15608
15609 /* Decode the arguments passed through to this function.
15610 **
15611 ** Note: The following set of if(...) statements must be in the same
15612 ** order as the corresponding entries in the struct at the top of
15613 ** fts5BestIndexMethod(). */
15614 if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++];
15615 if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++];
15616 if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++];
15617 if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++];
15618 if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++];
15619 assert( iVal==nVal );
15620 bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0);
15621 pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0);
15622
15623 /* Set the cursor upper and lower rowid limits. Only some strategies
15624 ** actually use them. This is ok, as the xBestIndex() method leaves the
15625 ** sqlite3_index_constraint.omit flag clear for range constraints
15626 ** on the rowid field. */
15627 if( pRowidEq ){
15628 pRowidLe = pRowidGe = pRowidEq;
15629 }
15630 if( bDesc ){
15631 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
15632 pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
15633 }else{
15634 pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
15635 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
15636 }
15637
15638 if( pTab->pSortCsr ){
15639 /* If pSortCsr is non-NULL, then this call is being made as part of
15640 ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
15641 ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
15642 ** return results to the user for this query. The current cursor
15643 ** (pCursor) is used to execute the query issued by function
15644 ** fts5CursorFirstSorted() above. */
15645 assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 );
15646 assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 );
15647 assert( pCsr->iLastRowid==LARGEST_INT64 );
15648 assert( pCsr->iFirstRowid==SMALLEST_INT64 );
15649 pCsr->ePlan = FTS5_PLAN_SOURCE;
15650 pCsr->pExpr = pTab->pSortCsr->pExpr;
15651 rc = fts5CursorFirst(pTab, pCsr, bDesc);
15652 }else if( pMatch ){
15653 const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);
15654 if( zExpr==0 ) zExpr = "";
15655
15656 rc = fts5CursorParseRank(pConfig, pCsr, pRank);
15657 if( rc==SQLITE_OK ){
15658 if( zExpr[0]=='*' ){
15659 /* The user has issued a query of the form "MATCH '*...'". This
15660 ** indicates that the MATCH expression is not a full text query,
15661 ** but a request for an internal parameter. */
15662 rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
15663 }else{
15664 char **pzErr = &pTab->base.zErrMsg;
15665 rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr);
15666 if( rc==SQLITE_OK ){
15667 if( bOrderByRank ){
15668 pCsr->ePlan = FTS5_PLAN_SORTED_MATCH;
15669 rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
15670 }else{
15671 pCsr->ePlan = FTS5_PLAN_MATCH;
15672 rc = fts5CursorFirst(pTab, pCsr, bDesc);
15673 }
15674 }
15675 }
15676 }
15677 }else if( pConfig->zContent==0 ){
15678 *pConfig->pzErrmsg = sqlite3_mprintf(
15679 "%s: table does not support scanning", pConfig->zName
15680 );
15681 rc = SQLITE_ERROR;
15682 }else{
15683 /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
15684 ** by rowid (ePlan==FTS5_PLAN_ROWID). */
15685 pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN);
15686 rc = sqlite3Fts5StorageStmt(
15687 pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg
15688 );
15689 if( rc==SQLITE_OK ){
15690 if( pCsr->ePlan==FTS5_PLAN_ROWID ){
15691 sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
15692 }else{
15693 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
15694 sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
15695 }
15696 rc = fts5NextMethod(pCursor);
15697 }
15698 }
15699
15700 pConfig->pzErrmsg = pzErrmsg;
15701 return rc;
15702 }
15703
15704 /*
15705 ** This is the xEof method of the virtual table. SQLite calls this
15706 ** routine to find out if it has reached the end of a result set.
15707 */
15708 static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
15709 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
15710 return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0);
15711 }
15712
15713 /*
15714 ** Return the rowid that the cursor currently points to.
15715 */
15716 static i64 fts5CursorRowid(Fts5Cursor *pCsr){
15717 assert( pCsr->ePlan==FTS5_PLAN_MATCH
15718 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
15719 || pCsr->ePlan==FTS5_PLAN_SOURCE
15720 );
15721 if( pCsr->pSorter ){
15722 return pCsr->pSorter->iRowid;
15723 }else{
15724 return sqlite3Fts5ExprRowid(pCsr->pExpr);
15725 }
15726 }
15727
15728 /*
15729 ** This is the xRowid method. The SQLite core calls this routine to
15730 ** retrieve the rowid for the current row of the result set. fts5
15731 ** exposes %_content.rowid as the rowid for the virtual table. The
15732 ** rowid should be written to *pRowid.
15733 */
15734 static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
15735 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
15736 int ePlan = pCsr->ePlan;
15737
15738 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
15739 switch( ePlan ){
15740 case FTS5_PLAN_SPECIAL:
15741 *pRowid = 0;
15742 break;
15743
15744 case FTS5_PLAN_SOURCE:
15745 case FTS5_PLAN_MATCH:
15746 case FTS5_PLAN_SORTED_MATCH:
15747 *pRowid = fts5CursorRowid(pCsr);
15748 break;
15749
15750 default:
15751 *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
15752 break;
15753 }
15754
15755 return SQLITE_OK;
15756 }
15757
15758 /*
15759 ** If the cursor requires seeking (bSeekRequired flag is set), seek it.
15760 ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
15761 **
15762 ** If argument bErrormsg is true and an error occurs, an error message may
15763 ** be left in sqlite3_vtab.zErrMsg.
15764 */
15765 static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
15766 int rc = SQLITE_OK;
15767
15768 /* If the cursor does not yet have a statement handle, obtain one now. */
15769 if( pCsr->pStmt==0 ){
15770 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
15771 int eStmt = fts5StmtType(pCsr);
15772 rc = sqlite3Fts5StorageStmt(
15773 pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0)
15774 );
15775 assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 );
15776 assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) );
15777 }
15778
15779 if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){
15780 assert( pCsr->pExpr );
15781 sqlite3_reset(pCsr->pStmt);
15782 sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
15783 rc = sqlite3_step(pCsr->pStmt);
15784 if( rc==SQLITE_ROW ){
15785 rc = SQLITE_OK;
15786 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
15787 }else{
15788 rc = sqlite3_reset(pCsr->pStmt);
15789 if( rc==SQLITE_OK ){
15790 rc = FTS5_CORRUPT;
15791 }
15792 }
15793 }
15794 return rc;
15795 }
15796
15797 static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){
15798 va_list ap; /* ... printf arguments */
15799 va_start(ap, zFormat);
15800 assert( p->base.zErrMsg==0 );
15801 p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap);
15802 va_end(ap);
15803 }
15804
15805 /*
15806 ** This function is called to handle an FTS INSERT command. In other words,
15807 ** an INSERT statement of the form:
15808 **
15809 ** INSERT INTO fts(fts) VALUES($pCmd)
15810 ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
15811 **
15812 ** Argument pVal is the value assigned to column "fts" by the INSERT
15813 ** statement. This function returns SQLITE_OK if successful, or an SQLite
15814 ** error code if an error occurs.
15815 **
15816 ** The commands implemented by this function are documented in the "Special
15817 ** INSERT Directives" section of the documentation. It should be updated if
15818 ** more commands are added to this function.
15819 */
15820 static int fts5SpecialInsert(
15821 Fts5Table *pTab, /* Fts5 table object */
15822 const char *zCmd, /* Text inserted into table-name column */
15823 sqlite3_value *pVal /* Value inserted into rank column */
15824 ){
15825 Fts5Config *pConfig = pTab->pConfig;
15826 int rc = SQLITE_OK;
15827 int bError = 0;
15828
15829 if( 0==sqlite3_stricmp("delete-all", zCmd) ){
15830 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
15831 fts5SetVtabError(pTab,
15832 "'delete-all' may only be used with a "
15833 "contentless or external content fts5 table"
15834 );
15835 rc = SQLITE_ERROR;
15836 }else{
15837 rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
15838 }
15839 }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){
15840 if( pConfig->eContent==FTS5_CONTENT_NONE ){
15841 fts5SetVtabError(pTab,
15842 "'rebuild' may not be used with a contentless fts5 table"
15843 );
15844 rc = SQLITE_ERROR;
15845 }else{
15846 rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
15847 }
15848 }else if( 0==sqlite3_stricmp("optimize", zCmd) ){
15849 rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
15850 }else if( 0==sqlite3_stricmp("merge", zCmd) ){
15851 int nMerge = sqlite3_value_int(pVal);
15852 rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
15853 }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){
15854 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
15855 #ifdef SQLITE_DEBUG
15856 }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){
15857 pConfig->bPrefixIndex = sqlite3_value_int(pVal);
15858 #endif
15859 }else{
15860 rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
15861 if( rc==SQLITE_OK ){
15862 rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, zCmd, pVal, &bError);
15863 }
15864 if( rc==SQLITE_OK ){
15865 if( bError ){
15866 rc = SQLITE_ERROR;
15867 }else{
15868 rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0);
15869 }
15870 }
15871 }
15872 return rc;
15873 }
15874
15875 static int fts5SpecialDelete(
15876 Fts5Table *pTab,
15877 sqlite3_value **apVal
15878 ){
15879 int rc = SQLITE_OK;
15880 int eType1 = sqlite3_value_type(apVal[1]);
15881 if( eType1==SQLITE_INTEGER ){
15882 sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
15883 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]);
15884 }
15885 return rc;
15886 }
15887
15888 static void fts5StorageInsert(
15889 int *pRc,
15890 Fts5Table *pTab,
15891 sqlite3_value **apVal,
15892 i64 *piRowid
15893 ){
15894 int rc = *pRc;
15895 if( rc==SQLITE_OK ){
15896 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid);
15897 }
15898 if( rc==SQLITE_OK ){
15899 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid);
15900 }
15901 *pRc = rc;
15902 }
15903
15904 /*
15905 ** This function is the implementation of the xUpdate callback used by
15906 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
15907 ** inserted, updated or deleted.
15908 **
15909 ** A delete specifies a single argument - the rowid of the row to remove.
15910 **
15911 ** Update and insert operations pass:
15912 **
15913 ** 1. The "old" rowid, or NULL.
15914 ** 2. The "new" rowid.
15915 ** 3. Values for each of the nCol matchable columns.
15916 ** 4. Values for the two hidden columns (<tablename> and "rank").
15917 */
15918 static int fts5UpdateMethod(
15919 sqlite3_vtab *pVtab, /* Virtual table handle */
15920 int nArg, /* Size of argument array */
15921 sqlite3_value **apVal, /* Array of arguments */
15922 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
15923 ){
15924 Fts5Table *pTab = (Fts5Table*)pVtab;
15925 Fts5Config *pConfig = pTab->pConfig;
15926 int eType0; /* value_type() of apVal[0] */
15927 int rc = SQLITE_OK; /* Return code */
15928
15929 /* A transaction must be open when this is called. */
15930 assert( pTab->ts.eState==1 );
15931
15932 assert( pVtab->zErrMsg==0 );
15933 assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
15934 assert( nArg==1
15935 || sqlite3_value_type(apVal[1])==SQLITE_INTEGER
15936 || sqlite3_value_type(apVal[1])==SQLITE_NULL
15937 );
15938 assert( pTab->pConfig->pzErrmsg==0 );
15939 pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;
15940
15941 /* Put any active cursors into REQUIRE_SEEK state. */
15942 fts5TripCursors(pTab);
15943
15944 eType0 = sqlite3_value_type(apVal[0]);
15945 if( eType0==SQLITE_NULL
15946 && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL
15947 ){
15948 /* A "special" INSERT op. These are handled separately. */
15949 const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]);
15950 if( pConfig->eContent!=FTS5_CONTENT_NORMAL
15951 && 0==sqlite3_stricmp("delete", z)
15952 ){
15953 rc = fts5SpecialDelete(pTab, apVal);
15954 }else{
15955 rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
15956 }
15957 }else{
15958 /* A regular INSERT, UPDATE or DELETE statement. The trick here is that
15959 ** any conflict on the rowid value must be detected before any
15960 ** modifications are made to the database file. There are 4 cases:
15961 **
15962 ** 1) DELETE
15963 ** 2) UPDATE (rowid not modified)
15964 ** 3) UPDATE (rowid modified)
15965 ** 4) INSERT
15966 **
15967 ** Cases 3 and 4 may violate the rowid constraint.
15968 */
15969 int eConflict = SQLITE_ABORT;
15970 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
15971 eConflict = sqlite3_vtab_on_conflict(pConfig->db);
15972 }
15973
15974 assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL );
15975 assert( nArg!=1 || eType0==SQLITE_INTEGER );
15976
15977 /* Filter out attempts to run UPDATE or DELETE on contentless tables.
15978 ** This is not suported. */
15979 if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){
15980 pTab->base.zErrMsg = sqlite3_mprintf(
15981 "cannot %s contentless fts5 table: %s",
15982 (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
15983 );
15984 rc = SQLITE_ERROR;
15985 }
15986
15987 /* DELETE */
15988 else if( nArg==1 ){
15989 i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */
15990 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
15991 }
15992
15993 /* INSERT */
15994 else if( eType0!=SQLITE_INTEGER ){
15995 /* If this is a REPLACE, first remove the current entry (if any) */
15996 if( eConflict==SQLITE_REPLACE
15997 && sqlite3_value_type(apVal[1])==SQLITE_INTEGER
15998 ){
15999 i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */
16000 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
16001 }
16002 fts5StorageInsert(&rc, pTab, apVal, pRowid);
16003 }
16004
16005 /* UPDATE */
16006 else{
16007 i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */
16008 i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */
16009 if( iOld!=iNew ){
16010 if( eConflict==SQLITE_REPLACE ){
16011 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
16012 if( rc==SQLITE_OK ){
16013 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
16014 }
16015 fts5StorageInsert(&rc, pTab, apVal, pRowid);
16016 }else{
16017 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid);
16018 if( rc==SQLITE_OK ){
16019 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
16020 }
16021 if( rc==SQLITE_OK ){
16022 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *pRowid);
16023 }
16024 }
16025 }else{
16026 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
16027 fts5StorageInsert(&rc, pTab, apVal, pRowid);
16028 }
16029 }
16030 }
16031
16032 pTab->pConfig->pzErrmsg = 0;
16033 return rc;
16034 }
16035
16036 /*
16037 ** Implementation of xSync() method.
16038 */
16039 static int fts5SyncMethod(sqlite3_vtab *pVtab){
16040 int rc;
16041 Fts5Table *pTab = (Fts5Table*)pVtab;
16042 fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
16043 pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;
16044 fts5TripCursors(pTab);
16045 rc = sqlite3Fts5StorageSync(pTab->pStorage, 1);
16046 pTab->pConfig->pzErrmsg = 0;
16047 return rc;
16048 }
16049
16050 /*
16051 ** Implementation of xBegin() method.
16052 */
16053 static int fts5BeginMethod(sqlite3_vtab *pVtab){
16054 fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0);
16055 fts5NewTransaction((Fts5Table*)pVtab);
16056 return SQLITE_OK;
16057 }
16058
16059 /*
16060 ** Implementation of xCommit() method. This is a no-op. The contents of
16061 ** the pending-terms hash-table have already been flushed into the database
16062 ** by fts5SyncMethod().
16063 */
16064 static int fts5CommitMethod(sqlite3_vtab *pVtab){
16065 UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */
16066 fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0);
16067 return SQLITE_OK;
16068 }
16069
16070 /*
16071 ** Implementation of xRollback(). Discard the contents of the pending-terms
16072 ** hash-table. Any changes made to the database are reverted by SQLite.
16073 */
16074 static int fts5RollbackMethod(sqlite3_vtab *pVtab){
16075 int rc;
16076 Fts5Table *pTab = (Fts5Table*)pVtab;
16077 fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
16078 rc = sqlite3Fts5StorageRollback(pTab->pStorage);
16079 return rc;
16080 }
16081
16082 static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*);
16083
16084 static void *fts5ApiUserData(Fts5Context *pCtx){
16085 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16086 return pCsr->pAux->pUserData;
16087 }
16088
16089 static int fts5ApiColumnCount(Fts5Context *pCtx){
16090 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16091 return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
16092 }
16093
16094 static int fts5ApiColumnTotalSize(
16095 Fts5Context *pCtx,
16096 int iCol,
16097 sqlite3_int64 *pnToken
16098 ){
16099 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16100 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
16101 return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
16102 }
16103
16104 static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
16105 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16106 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
16107 return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
16108 }
16109
16110 static int fts5ApiTokenize(
16111 Fts5Context *pCtx,
16112 const char *pText, int nText,
16113 void *pUserData,
16114 int (*xToken)(void*, int, const char*, int, int, int)
16115 ){
16116 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16117 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
16118 return sqlite3Fts5Tokenize(
16119 pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
16120 );
16121 }
16122
16123 static int fts5ApiPhraseCount(Fts5Context *pCtx){
16124 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16125 return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
16126 }
16127
16128 static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
16129 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16130 return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
16131 }
16132
16133 static int fts5ApiColumnText(
16134 Fts5Context *pCtx,
16135 int iCol,
16136 const char **pz,
16137 int *pn
16138 ){
16139 int rc = SQLITE_OK;
16140 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16141 if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){
16142 *pz = 0;
16143 *pn = 0;
16144 }else{
16145 rc = fts5SeekCursor(pCsr, 0);
16146 if( rc==SQLITE_OK ){
16147 *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
16148 *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
16149 }
16150 }
16151 return rc;
16152 }
16153
16154 static int fts5CsrPoslist(
16155 Fts5Cursor *pCsr,
16156 int iPhrase,
16157 const u8 **pa,
16158 int *pn
16159 ){
16160 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
16161 int rc = SQLITE_OK;
16162 int bLive = (pCsr->pSorter==0);
16163
16164 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){
16165
16166 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
16167 Fts5PoslistPopulator *aPopulator;
16168 int i;
16169 aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
16170 if( aPopulator==0 ) rc = SQLITE_NOMEM;
16171 for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){
16172 int n; const char *z;
16173 rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n);
16174 if( rc==SQLITE_OK ){
16175 rc = sqlite3Fts5ExprPopulatePoslists(
16176 pConfig, pCsr->pExpr, aPopulator, i, z, n
16177 );
16178 }
16179 }
16180 sqlite3_free(aPopulator);
16181
16182 if( pCsr->pSorter ){
16183 sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid);
16184 }
16185 }
16186 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST);
16187 }
16188
16189 if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){
16190 Fts5Sorter *pSorter = pCsr->pSorter;
16191 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
16192 *pn = pSorter->aIdx[iPhrase] - i1;
16193 *pa = &pSorter->aPoslist[i1];
16194 }else{
16195 *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
16196 }
16197
16198 return rc;
16199 }
16200
16201 /*
16202 ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
16203 ** correctly for the current view. Return SQLITE_OK if successful, or an
16204 ** SQLite error code otherwise.
16205 */
16206 static int fts5CacheInstArray(Fts5Cursor *pCsr){
16207 int rc = SQLITE_OK;
16208 Fts5PoslistReader *aIter; /* One iterator for each phrase */
16209 int nIter; /* Number of iterators/phrases */
16210
16211 nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
16212 if( pCsr->aInstIter==0 ){
16213 int nByte = sizeof(Fts5PoslistReader) * nIter;
16214 pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
16215 }
16216 aIter = pCsr->aInstIter;
16217
16218 if( aIter ){
16219 int nInst = 0; /* Number instances seen so far */
16220 int i;
16221
16222 /* Initialize all iterators */
16223 for(i=0; i<nIter && rc==SQLITE_OK; i++){
16224 const u8 *a;
16225 int n;
16226 rc = fts5CsrPoslist(pCsr, i, &a, &n);
16227 if( rc==SQLITE_OK ){
16228 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
16229 }
16230 }
16231
16232 if( rc==SQLITE_OK ){
16233 while( 1 ){
16234 int *aInst;
16235 int iBest = -1;
16236 for(i=0; i<nIter; i++){
16237 if( (aIter[i].bEof==0)
16238 && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos)
16239 ){
16240 iBest = i;
16241 }
16242 }
16243 if( iBest<0 ) break;
16244
16245 nInst++;
16246 if( nInst>=pCsr->nInstAlloc ){
16247 pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
16248 aInst = (int*)sqlite3_realloc(
16249 pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3
16250 );
16251 if( aInst ){
16252 pCsr->aInst = aInst;
16253 }else{
16254 rc = SQLITE_NOMEM;
16255 break;
16256 }
16257 }
16258
16259 aInst = &pCsr->aInst[3 * (nInst-1)];
16260 aInst[0] = iBest;
16261 aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
16262 aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
16263 sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
16264 }
16265 }
16266
16267 pCsr->nInstCount = nInst;
16268 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST);
16269 }
16270 return rc;
16271 }
16272
16273 static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
16274 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16275 int rc = SQLITE_OK;
16276 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
16277 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
16278 *pnInst = pCsr->nInstCount;
16279 }
16280 return rc;
16281 }
16282
16283 static int fts5ApiInst(
16284 Fts5Context *pCtx,
16285 int iIdx,
16286 int *piPhrase,
16287 int *piCol,
16288 int *piOff
16289 ){
16290 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16291 int rc = SQLITE_OK;
16292 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
16293 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
16294 ){
16295 if( iIdx<0 || iIdx>=pCsr->nInstCount ){
16296 rc = SQLITE_RANGE;
16297 #if 0
16298 }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
16299 *piPhrase = pCsr->aInst[iIdx*3];
16300 *piCol = pCsr->aInst[iIdx*3 + 2];
16301 *piOff = -1;
16302 #endif
16303 }else{
16304 *piPhrase = pCsr->aInst[iIdx*3];
16305 *piCol = pCsr->aInst[iIdx*3 + 1];
16306 *piOff = pCsr->aInst[iIdx*3 + 2];
16307 }
16308 }
16309 return rc;
16310 }
16311
16312 static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
16313 return fts5CursorRowid((Fts5Cursor*)pCtx);
16314 }
16315
16316 static int fts5ColumnSizeCb(
16317 void *pContext, /* Pointer to int */
16318 int tflags,
16319 const char *pUnused, /* Buffer containing token */
16320 int nUnused, /* Size of token in bytes */
16321 int iUnused1, /* Start offset of token */
16322 int iUnused2 /* End offset of token */
16323 ){
16324 int *pCnt = (int*)pContext;
16325 UNUSED_PARAM2(pUnused, nUnused);
16326 UNUSED_PARAM2(iUnused1, iUnused2);
16327 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
16328 (*pCnt)++;
16329 }
16330 return SQLITE_OK;
16331 }
16332
16333 static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
16334 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16335 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
16336 Fts5Config *pConfig = pTab->pConfig;
16337 int rc = SQLITE_OK;
16338
16339 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){
16340 if( pConfig->bColumnsize ){
16341 i64 iRowid = fts5CursorRowid(pCsr);
16342 rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
16343 }else if( pConfig->zContent==0 ){
16344 int i;
16345 for(i=0; i<pConfig->nCol; i++){
16346 if( pConfig->abUnindexed[i]==0 ){
16347 pCsr->aColumnSize[i] = -1;
16348 }
16349 }
16350 }else{
16351 int i;
16352 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
16353 if( pConfig->abUnindexed[i]==0 ){
16354 const char *z; int n;
16355 void *p = (void*)(&pCsr->aColumnSize[i]);
16356 pCsr->aColumnSize[i] = 0;
16357 rc = fts5ApiColumnText(pCtx, i, &z, &n);
16358 if( rc==SQLITE_OK ){
16359 rc = sqlite3Fts5Tokenize(
16360 pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
16361 );
16362 }
16363 }
16364 }
16365 }
16366 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
16367 }
16368 if( iCol<0 ){
16369 int i;
16370 *pnToken = 0;
16371 for(i=0; i<pConfig->nCol; i++){
16372 *pnToken += pCsr->aColumnSize[i];
16373 }
16374 }else if( iCol<pConfig->nCol ){
16375 *pnToken = pCsr->aColumnSize[iCol];
16376 }else{
16377 *pnToken = 0;
16378 rc = SQLITE_RANGE;
16379 }
16380 return rc;
16381 }
16382
16383 /*
16384 ** Implementation of the xSetAuxdata() method.
16385 */
16386 static int fts5ApiSetAuxdata(
16387 Fts5Context *pCtx, /* Fts5 context */
16388 void *pPtr, /* Pointer to save as auxdata */
16389 void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */
16390 ){
16391 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16392 Fts5Auxdata *pData;
16393
16394 /* Search through the cursors list of Fts5Auxdata objects for one that
16395 ** corresponds to the currently executing auxiliary function. */
16396 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
16397 if( pData->pAux==pCsr->pAux ) break;
16398 }
16399
16400 if( pData ){
16401 if( pData->xDelete ){
16402 pData->xDelete(pData->pPtr);
16403 }
16404 }else{
16405 int rc = SQLITE_OK;
16406 pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
16407 if( pData==0 ){
16408 if( xDelete ) xDelete(pPtr);
16409 return rc;
16410 }
16411 pData->pAux = pCsr->pAux;
16412 pData->pNext = pCsr->pAuxdata;
16413 pCsr->pAuxdata = pData;
16414 }
16415
16416 pData->xDelete = xDelete;
16417 pData->pPtr = pPtr;
16418 return SQLITE_OK;
16419 }
16420
16421 static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
16422 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16423 Fts5Auxdata *pData;
16424 void *pRet = 0;
16425
16426 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
16427 if( pData->pAux==pCsr->pAux ) break;
16428 }
16429
16430 if( pData ){
16431 pRet = pData->pPtr;
16432 if( bClear ){
16433 pData->pPtr = 0;
16434 pData->xDelete = 0;
16435 }
16436 }
16437
16438 return pRet;
16439 }
16440
16441 static void fts5ApiPhraseNext(
16442 Fts5Context *pUnused,
16443 Fts5PhraseIter *pIter,
16444 int *piCol, int *piOff
16445 ){
16446 UNUSED_PARAM(pUnused);
16447 if( pIter->a>=pIter->b ){
16448 *piCol = -1;
16449 *piOff = -1;
16450 }else{
16451 int iVal;
16452 pIter->a += fts5GetVarint32(pIter->a, iVal);
16453 if( iVal==1 ){
16454 pIter->a += fts5GetVarint32(pIter->a, iVal);
16455 *piCol = iVal;
16456 *piOff = 0;
16457 pIter->a += fts5GetVarint32(pIter->a, iVal);
16458 }
16459 *piOff += (iVal-2);
16460 }
16461 }
16462
16463 static int fts5ApiPhraseFirst(
16464 Fts5Context *pCtx,
16465 int iPhrase,
16466 Fts5PhraseIter *pIter,
16467 int *piCol, int *piOff
16468 ){
16469 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16470 int n;
16471 int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
16472 if( rc==SQLITE_OK ){
16473 pIter->b = &pIter->a[n];
16474 *piCol = 0;
16475 *piOff = 0;
16476 fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
16477 }
16478 return rc;
16479 }
16480
16481 static void fts5ApiPhraseNextColumn(
16482 Fts5Context *pCtx,
16483 Fts5PhraseIter *pIter,
16484 int *piCol
16485 ){
16486 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16487 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
16488
16489 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
16490 if( pIter->a>=pIter->b ){
16491 *piCol = -1;
16492 }else{
16493 int iIncr;
16494 pIter->a += fts5GetVarint32(&pIter->a[0], iIncr);
16495 *piCol += (iIncr-2);
16496 }
16497 }else{
16498 while( 1 ){
16499 int dummy;
16500 if( pIter->a>=pIter->b ){
16501 *piCol = -1;
16502 return;
16503 }
16504 if( pIter->a[0]==0x01 ) break;
16505 pIter->a += fts5GetVarint32(pIter->a, dummy);
16506 }
16507 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
16508 }
16509 }
16510
16511 static int fts5ApiPhraseFirstColumn(
16512 Fts5Context *pCtx,
16513 int iPhrase,
16514 Fts5PhraseIter *pIter,
16515 int *piCol
16516 ){
16517 int rc = SQLITE_OK;
16518 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16519 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
16520
16521 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
16522 Fts5Sorter *pSorter = pCsr->pSorter;
16523 int n;
16524 if( pSorter ){
16525 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
16526 n = pSorter->aIdx[iPhrase] - i1;
16527 pIter->a = &pSorter->aPoslist[i1];
16528 }else{
16529 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
16530 }
16531 if( rc==SQLITE_OK ){
16532 pIter->b = &pIter->a[n];
16533 *piCol = 0;
16534 fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
16535 }
16536 }else{
16537 int n;
16538 rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
16539 if( rc==SQLITE_OK ){
16540 pIter->b = &pIter->a[n];
16541 if( n<=0 ){
16542 *piCol = -1;
16543 }else if( pIter->a[0]==0x01 ){
16544 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
16545 }else{
16546 *piCol = 0;
16547 }
16548 }
16549 }
16550
16551 return rc;
16552 }
16553
16554
16555 static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
16556 int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
16557 );
16558
16559 static const Fts5ExtensionApi sFts5Api = {
16560 2, /* iVersion */
16561 fts5ApiUserData,
16562 fts5ApiColumnCount,
16563 fts5ApiRowCount,
16564 fts5ApiColumnTotalSize,
16565 fts5ApiTokenize,
16566 fts5ApiPhraseCount,
16567 fts5ApiPhraseSize,
16568 fts5ApiInstCount,
16569 fts5ApiInst,
16570 fts5ApiRowid,
16571 fts5ApiColumnText,
16572 fts5ApiColumnSize,
16573 fts5ApiQueryPhrase,
16574 fts5ApiSetAuxdata,
16575 fts5ApiGetAuxdata,
16576 fts5ApiPhraseFirst,
16577 fts5ApiPhraseNext,
16578 fts5ApiPhraseFirstColumn,
16579 fts5ApiPhraseNextColumn,
16580 };
16581
16582 /*
16583 ** Implementation of API function xQueryPhrase().
16584 */
16585 static int fts5ApiQueryPhrase(
16586 Fts5Context *pCtx,
16587 int iPhrase,
16588 void *pUserData,
16589 int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
16590 ){
16591 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
16592 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
16593 int rc;
16594 Fts5Cursor *pNew = 0;
16595
16596 rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
16597 if( rc==SQLITE_OK ){
16598 pNew->ePlan = FTS5_PLAN_MATCH;
16599 pNew->iFirstRowid = SMALLEST_INT64;
16600 pNew->iLastRowid = LARGEST_INT64;
16601 pNew->base.pVtab = (sqlite3_vtab*)pTab;
16602 rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr);
16603 }
16604
16605 if( rc==SQLITE_OK ){
16606 for(rc = fts5CursorFirst(pTab, pNew, 0);
16607 rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
16608 rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
16609 ){
16610 rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
16611 if( rc!=SQLITE_OK ){
16612 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
16613 break;
16614 }
16615 }
16616 }
16617
16618 fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
16619 return rc;
16620 }
16621
16622 static void fts5ApiInvoke(
16623 Fts5Auxiliary *pAux,
16624 Fts5Cursor *pCsr,
16625 sqlite3_context *context,
16626 int argc,
16627 sqlite3_value **argv
16628 ){
16629 assert( pCsr->pAux==0 );
16630 pCsr->pAux = pAux;
16631 pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
16632 pCsr->pAux = 0;
16633 }
16634
16635 static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
16636 Fts5Cursor *pCsr;
16637 for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
16638 if( pCsr->iCsrId==iCsrId ) break;
16639 }
16640 return pCsr;
16641 }
16642
16643 static void fts5ApiCallback(
16644 sqlite3_context *context,
16645 int argc,
16646 sqlite3_value **argv
16647 ){
16648
16649 Fts5Auxiliary *pAux;
16650 Fts5Cursor *pCsr;
16651 i64 iCsrId;
16652
16653 assert( argc>=1 );
16654 pAux = (Fts5Auxiliary*)sqlite3_user_data(context);
16655 iCsrId = sqlite3_value_int64(argv[0]);
16656
16657 pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
16658 if( pCsr==0 ){
16659 char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
16660 sqlite3_result_error(context, zErr, -1);
16661 sqlite3_free(zErr);
16662 }else{
16663 fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
16664 }
16665 }
16666
16667
16668 /*
16669 ** Given cursor id iId, return a pointer to the corresponding Fts5Index
16670 ** object. Or NULL If the cursor id does not exist.
16671 **
16672 ** If successful, set *ppConfig to point to the associated config object
16673 ** before returning.
16674 */
16675 static Fts5Index *sqlite3Fts5IndexFromCsrid(
16676 Fts5Global *pGlobal, /* FTS5 global context for db handle */
16677 i64 iCsrId, /* Id of cursor to find */
16678 Fts5Config **ppConfig /* OUT: Configuration object */
16679 ){
16680 Fts5Cursor *pCsr;
16681 Fts5Table *pTab;
16682
16683 pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
16684 pTab = (Fts5Table*)pCsr->base.pVtab;
16685 *ppConfig = pTab->pConfig;
16686
16687 return pTab->pIndex;
16688 }
16689
16690 /*
16691 ** Return a "position-list blob" corresponding to the current position of
16692 ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
16693 ** the current position-list for each phrase in the query associated with
16694 ** cursor pCsr.
16695 **
16696 ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
16697 ** the number of phrases in the query. Following the varints are the
16698 ** concatenated position lists for each phrase, in order.
16699 **
16700 ** The first varint (if it exists) contains the size of the position list
16701 ** for phrase 0. The second (same disclaimer) contains the size of position
16702 ** list 1. And so on. There is no size field for the final position list,
16703 ** as it can be derived from the total size of the blob.
16704 */
16705 static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
16706 int i;
16707 int rc = SQLITE_OK;
16708 int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
16709 Fts5Buffer val;
16710
16711 memset(&val, 0, sizeof(Fts5Buffer));
16712 switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){
16713 case FTS5_DETAIL_FULL:
16714
16715 /* Append the varints */
16716 for(i=0; i<(nPhrase-1); i++){
16717 const u8 *dummy;
16718 int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
16719 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
16720 }
16721
16722 /* Append the position lists */
16723 for(i=0; i<nPhrase; i++){
16724 const u8 *pPoslist;
16725 int nPoslist;
16726 nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
16727 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
16728 }
16729 break;
16730
16731 case FTS5_DETAIL_COLUMNS:
16732
16733 /* Append the varints */
16734 for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){
16735 const u8 *dummy;
16736 int nByte;
16737 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte);
16738 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
16739 }
16740
16741 /* Append the position lists */
16742 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
16743 const u8 *pPoslist;
16744 int nPoslist;
16745 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist);
16746 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
16747 }
16748 break;
16749
16750 default:
16751 break;
16752 }
16753
16754 sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free);
16755 return rc;
16756 }
16757
16758 /*
16759 ** This is the xColumn method, called by SQLite to request a value from
16760 ** the row that the supplied cursor currently points to.
16761 */
16762 static int fts5ColumnMethod(
16763 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
16764 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
16765 int iCol /* Index of column to read value from */
16766 ){
16767 Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
16768 Fts5Config *pConfig = pTab->pConfig;
16769 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16770 int rc = SQLITE_OK;
16771
16772 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
16773
16774 if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){
16775 if( iCol==pConfig->nCol ){
16776 sqlite3_result_int64(pCtx, pCsr->iSpecial);
16777 }
16778 }else
16779
16780 if( iCol==pConfig->nCol ){
16781 /* User is requesting the value of the special column with the same name
16782 ** as the table. Return the cursor integer id number. This value is only
16783 ** useful in that it may be passed as the first argument to an FTS5
16784 ** auxiliary function. */
16785 sqlite3_result_int64(pCtx, pCsr->iCsrId);
16786 }else if( iCol==pConfig->nCol+1 ){
16787
16788 /* The value of the "rank" column. */
16789 if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
16790 fts5PoslistBlob(pCtx, pCsr);
16791 }else if(
16792 pCsr->ePlan==FTS5_PLAN_MATCH
16793 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
16794 ){
16795 if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){
16796 fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
16797 }
16798 }
16799 }else if( !fts5IsContentless(pTab) ){
16800 rc = fts5SeekCursor(pCsr, 1);
16801 if( rc==SQLITE_OK ){
16802 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
16803 }
16804 }
16805 return rc;
16806 }
16807
16808
16809 /*
16810 ** This routine implements the xFindFunction method for the FTS3
16811 ** virtual table.
16812 */
16813 static int fts5FindFunctionMethod(
16814 sqlite3_vtab *pVtab, /* Virtual table handle */
16815 int nUnused, /* Number of SQL function arguments */
16816 const char *zName, /* Name of SQL function */
16817 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
16818 void **ppArg /* OUT: User data for *pxFunc */
16819 ){
16820 Fts5Table *pTab = (Fts5Table*)pVtab;
16821 Fts5Auxiliary *pAux;
16822
16823 UNUSED_PARAM(nUnused);
16824 pAux = fts5FindAuxiliary(pTab, zName);
16825 if( pAux ){
16826 *pxFunc = fts5ApiCallback;
16827 *ppArg = (void*)pAux;
16828 return 1;
16829 }
16830
16831 /* No function of the specified name was found. Return 0. */
16832 return 0;
16833 }
16834
16835 /*
16836 ** Implementation of FTS5 xRename method. Rename an fts5 table.
16837 */
16838 static int fts5RenameMethod(
16839 sqlite3_vtab *pVtab, /* Virtual table handle */
16840 const char *zName /* New name of table */
16841 ){
16842 Fts5Table *pTab = (Fts5Table*)pVtab;
16843 return sqlite3Fts5StorageRename(pTab->pStorage, zName);
16844 }
16845
16846 /*
16847 ** The xSavepoint() method.
16848 **
16849 ** Flush the contents of the pending-terms table to disk.
16850 */
16851 static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
16852 Fts5Table *pTab = (Fts5Table*)pVtab;
16853 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
16854 fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint);
16855 fts5TripCursors(pTab);
16856 return sqlite3Fts5StorageSync(pTab->pStorage, 0);
16857 }
16858
16859 /*
16860 ** The xRelease() method.
16861 **
16862 ** This is a no-op.
16863 */
16864 static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
16865 Fts5Table *pTab = (Fts5Table*)pVtab;
16866 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
16867 fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint);
16868 fts5TripCursors(pTab);
16869 return sqlite3Fts5StorageSync(pTab->pStorage, 0);
16870 }
16871
16872 /*
16873 ** The xRollbackTo() method.
16874 **
16875 ** Discard the contents of the pending terms table.
16876 */
16877 static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
16878 Fts5Table *pTab = (Fts5Table*)pVtab;
16879 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
16880 fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
16881 fts5TripCursors(pTab);
16882 return sqlite3Fts5StorageRollback(pTab->pStorage);
16883 }
16884
16885 /*
16886 ** Register a new auxiliary function with global context pGlobal.
16887 */
16888 static int fts5CreateAux(
16889 fts5_api *pApi, /* Global context (one per db handle) */
16890 const char *zName, /* Name of new function */
16891 void *pUserData, /* User data for aux. function */
16892 fts5_extension_function xFunc, /* Aux. function implementation */
16893 void(*xDestroy)(void*) /* Destructor for pUserData */
16894 ){
16895 Fts5Global *pGlobal = (Fts5Global*)pApi;
16896 int rc = sqlite3_overload_function(pGlobal->db, zName, -1);
16897 if( rc==SQLITE_OK ){
16898 Fts5Auxiliary *pAux;
16899 int nName; /* Size of zName in bytes, including \0 */
16900 int nByte; /* Bytes of space to allocate */
16901
16902 nName = (int)strlen(zName) + 1;
16903 nByte = sizeof(Fts5Auxiliary) + nName;
16904 pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte);
16905 if( pAux ){
16906 memset(pAux, 0, nByte);
16907 pAux->zFunc = (char*)&pAux[1];
16908 memcpy(pAux->zFunc, zName, nName);
16909 pAux->pGlobal = pGlobal;
16910 pAux->pUserData = pUserData;
16911 pAux->xFunc = xFunc;
16912 pAux->xDestroy = xDestroy;
16913 pAux->pNext = pGlobal->pAux;
16914 pGlobal->pAux = pAux;
16915 }else{
16916 rc = SQLITE_NOMEM;
16917 }
16918 }
16919
16920 return rc;
16921 }
16922
16923 /*
16924 ** Register a new tokenizer. This is the implementation of the
16925 ** fts5_api.xCreateTokenizer() method.
16926 */
16927 static int fts5CreateTokenizer(
16928 fts5_api *pApi, /* Global context (one per db handle) */
16929 const char *zName, /* Name of new function */
16930 void *pUserData, /* User data for aux. function */
16931 fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
16932 void(*xDestroy)(void*) /* Destructor for pUserData */
16933 ){
16934 Fts5Global *pGlobal = (Fts5Global*)pApi;
16935 Fts5TokenizerModule *pNew;
16936 int nName; /* Size of zName and its \0 terminator */
16937 int nByte; /* Bytes of space to allocate */
16938 int rc = SQLITE_OK;
16939
16940 nName = (int)strlen(zName) + 1;
16941 nByte = sizeof(Fts5TokenizerModule) + nName;
16942 pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte);
16943 if( pNew ){
16944 memset(pNew, 0, nByte);
16945 pNew->zName = (char*)&pNew[1];
16946 memcpy(pNew->zName, zName, nName);
16947 pNew->pUserData = pUserData;
16948 pNew->x = *pTokenizer;
16949 pNew->xDestroy = xDestroy;
16950 pNew->pNext = pGlobal->pTok;
16951 pGlobal->pTok = pNew;
16952 if( pNew->pNext==0 ){
16953 pGlobal->pDfltTok = pNew;
16954 }
16955 }else{
16956 rc = SQLITE_NOMEM;
16957 }
16958
16959 return rc;
16960 }
16961
16962 static Fts5TokenizerModule *fts5LocateTokenizer(
16963 Fts5Global *pGlobal,
16964 const char *zName
16965 ){
16966 Fts5TokenizerModule *pMod = 0;
16967
16968 if( zName==0 ){
16969 pMod = pGlobal->pDfltTok;
16970 }else{
16971 for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
16972 if( sqlite3_stricmp(zName, pMod->zName)==0 ) break;
16973 }
16974 }
16975
16976 return pMod;
16977 }
16978
16979 /*
16980 ** Find a tokenizer. This is the implementation of the
16981 ** fts5_api.xFindTokenizer() method.
16982 */
16983 static int fts5FindTokenizer(
16984 fts5_api *pApi, /* Global context (one per db handle) */
16985 const char *zName, /* Name of new function */
16986 void **ppUserData,
16987 fts5_tokenizer *pTokenizer /* Populate this object */
16988 ){
16989 int rc = SQLITE_OK;
16990 Fts5TokenizerModule *pMod;
16991
16992 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
16993 if( pMod ){
16994 *pTokenizer = pMod->x;
16995 *ppUserData = pMod->pUserData;
16996 }else{
16997 memset(pTokenizer, 0, sizeof(fts5_tokenizer));
16998 rc = SQLITE_ERROR;
16999 }
17000
17001 return rc;
17002 }
17003
17004 static int sqlite3Fts5GetTokenizer(
17005 Fts5Global *pGlobal,
17006 const char **azArg,
17007 int nArg,
17008 Fts5Tokenizer **ppTok,
17009 fts5_tokenizer **ppTokApi,
17010 char **pzErr
17011 ){
17012 Fts5TokenizerModule *pMod;
17013 int rc = SQLITE_OK;
17014
17015 pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
17016 if( pMod==0 ){
17017 assert( nArg>0 );
17018 rc = SQLITE_ERROR;
17019 *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
17020 }else{
17021 rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok);
17022 *ppTokApi = &pMod->x;
17023 if( rc!=SQLITE_OK && pzErr ){
17024 *pzErr = sqlite3_mprintf("error in tokenizer constructor");
17025 }
17026 }
17027
17028 if( rc!=SQLITE_OK ){
17029 *ppTokApi = 0;
17030 *ppTok = 0;
17031 }
17032
17033 return rc;
17034 }
17035
17036 static void fts5ModuleDestroy(void *pCtx){
17037 Fts5TokenizerModule *pTok, *pNextTok;
17038 Fts5Auxiliary *pAux, *pNextAux;
17039 Fts5Global *pGlobal = (Fts5Global*)pCtx;
17040
17041 for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
17042 pNextAux = pAux->pNext;
17043 if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
17044 sqlite3_free(pAux);
17045 }
17046
17047 for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
17048 pNextTok = pTok->pNext;
17049 if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
17050 sqlite3_free(pTok);
17051 }
17052
17053 sqlite3_free(pGlobal);
17054 }
17055
17056 static void fts5Fts5Func(
17057 sqlite3_context *pCtx, /* Function call context */
17058 int nArg, /* Number of args */
17059 sqlite3_value **apUnused /* Function arguments */
17060 ){
17061 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
17062 char buf[8];
17063 UNUSED_PARAM2(nArg, apUnused);
17064 assert( nArg==0 );
17065 assert( sizeof(buf)>=sizeof(pGlobal) );
17066 memcpy(buf, (void*)&pGlobal, sizeof(pGlobal));
17067 sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT);
17068 }
17069
17070 /*
17071 ** Implementation of fts5_source_id() function.
17072 */
17073 static void fts5SourceIdFunc(
17074 sqlite3_context *pCtx, /* Function call context */
17075 int nArg, /* Number of args */
17076 sqlite3_value **apUnused /* Function arguments */
17077 ){
17078 assert( nArg==0 );
17079 UNUSED_PARAM2(nArg, apUnused);
17080 sqlite3_result_text(pCtx, "fts5: 2017-02-13 16:02:40 ada05cfa86ad7f5645450ac7a 2a21c9aa6e57d2c", -1, SQLITE_TRANSIENT);
17081 }
17082
17083 static int fts5Init(sqlite3 *db){
17084 static const sqlite3_module fts5Mod = {
17085 /* iVersion */ 2,
17086 /* xCreate */ fts5CreateMethod,
17087 /* xConnect */ fts5ConnectMethod,
17088 /* xBestIndex */ fts5BestIndexMethod,
17089 /* xDisconnect */ fts5DisconnectMethod,
17090 /* xDestroy */ fts5DestroyMethod,
17091 /* xOpen */ fts5OpenMethod,
17092 /* xClose */ fts5CloseMethod,
17093 /* xFilter */ fts5FilterMethod,
17094 /* xNext */ fts5NextMethod,
17095 /* xEof */ fts5EofMethod,
17096 /* xColumn */ fts5ColumnMethod,
17097 /* xRowid */ fts5RowidMethod,
17098 /* xUpdate */ fts5UpdateMethod,
17099 /* xBegin */ fts5BeginMethod,
17100 /* xSync */ fts5SyncMethod,
17101 /* xCommit */ fts5CommitMethod,
17102 /* xRollback */ fts5RollbackMethod,
17103 /* xFindFunction */ fts5FindFunctionMethod,
17104 /* xRename */ fts5RenameMethod,
17105 /* xSavepoint */ fts5SavepointMethod,
17106 /* xRelease */ fts5ReleaseMethod,
17107 /* xRollbackTo */ fts5RollbackToMethod,
17108 };
17109
17110 int rc;
17111 Fts5Global *pGlobal = 0;
17112
17113 pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
17114 if( pGlobal==0 ){
17115 rc = SQLITE_NOMEM;
17116 }else{
17117 void *p = (void*)pGlobal;
17118 memset(pGlobal, 0, sizeof(Fts5Global));
17119 pGlobal->db = db;
17120 pGlobal->api.iVersion = 2;
17121 pGlobal->api.xCreateFunction = fts5CreateAux;
17122 pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
17123 pGlobal->api.xFindTokenizer = fts5FindTokenizer;
17124 rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
17125 if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
17126 if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
17127 if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
17128 if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
17129 if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db);
17130 if( rc==SQLITE_OK ){
17131 rc = sqlite3_create_function(
17132 db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0
17133 );
17134 }
17135 if( rc==SQLITE_OK ){
17136 rc = sqlite3_create_function(
17137 db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
17138 );
17139 }
17140 }
17141
17142 /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
17143 ** fts5_test_mi.c is compiled and linked into the executable. And call
17144 ** its entry point to enable the matchinfo() demo. */
17145 #ifdef SQLITE_FTS5_ENABLE_TEST_MI
17146 if( rc==SQLITE_OK ){
17147 extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*);
17148 rc = sqlite3Fts5TestRegisterMatchinfo(db);
17149 }
17150 #endif
17151
17152 return rc;
17153 }
17154
17155 /*
17156 ** The following functions are used to register the module with SQLite. If
17157 ** this module is being built as part of the SQLite core (SQLITE_CORE is
17158 ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly.
17159 **
17160 ** Or, if this module is being built as a loadable extension,
17161 ** sqlite3Fts5Init() is omitted and the two standard entry points
17162 ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead.
17163 */
17164 #ifndef SQLITE_CORE
17165 #ifdef _WIN32
17166 __declspec(dllexport)
17167 #endif
17168 SQLITE_API int sqlite3_fts_init(
17169 sqlite3 *db,
17170 char **pzErrMsg,
17171 const sqlite3_api_routines *pApi
17172 ){
17173 SQLITE_EXTENSION_INIT2(pApi);
17174 (void)pzErrMsg; /* Unused parameter */
17175 return fts5Init(db);
17176 }
17177
17178 #ifdef _WIN32
17179 __declspec(dllexport)
17180 #endif
17181 SQLITE_API int sqlite3_fts5_init(
17182 sqlite3 *db,
17183 char **pzErrMsg,
17184 const sqlite3_api_routines *pApi
17185 ){
17186 SQLITE_EXTENSION_INIT2(pApi);
17187 (void)pzErrMsg; /* Unused parameter */
17188 return fts5Init(db);
17189 }
17190 #else
17191 SQLITE_PRIVATE int sqlite3Fts5Init(sqlite3 *db){
17192 return fts5Init(db);
17193 }
17194 #endif
17195
17196 /*
17197 ** 2014 May 31
17198 **
17199 ** The author disclaims copyright to this source code. In place of
17200 ** a legal notice, here is a blessing:
17201 **
17202 ** May you do good and not evil.
17203 ** May you find forgiveness for yourself and forgive others.
17204 ** May you share freely, never taking more than you give.
17205 **
17206 ******************************************************************************
17207 **
17208 */
17209
17210
17211
17212 /* #include "fts5Int.h" */
17213
17214 struct Fts5Storage {
17215 Fts5Config *pConfig;
17216 Fts5Index *pIndex;
17217 int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
17218 i64 nTotalRow; /* Total number of rows in FTS table */
17219 i64 *aTotalSize; /* Total sizes of each column */
17220 sqlite3_stmt *aStmt[11];
17221 };
17222
17223
17224 #if FTS5_STMT_SCAN_ASC!=0
17225 # error "FTS5_STMT_SCAN_ASC mismatch"
17226 #endif
17227 #if FTS5_STMT_SCAN_DESC!=1
17228 # error "FTS5_STMT_SCAN_DESC mismatch"
17229 #endif
17230 #if FTS5_STMT_LOOKUP!=2
17231 # error "FTS5_STMT_LOOKUP mismatch"
17232 #endif
17233
17234 #define FTS5_STMT_INSERT_CONTENT 3
17235 #define FTS5_STMT_REPLACE_CONTENT 4
17236 #define FTS5_STMT_DELETE_CONTENT 5
17237 #define FTS5_STMT_REPLACE_DOCSIZE 6
17238 #define FTS5_STMT_DELETE_DOCSIZE 7
17239 #define FTS5_STMT_LOOKUP_DOCSIZE 8
17240 #define FTS5_STMT_REPLACE_CONFIG 9
17241 #define FTS5_STMT_SCAN 10
17242
17243 /*
17244 ** Prepare the two insert statements - Fts5Storage.pInsertContent and
17245 ** Fts5Storage.pInsertDocsize - if they have not already been prepared.
17246 ** Return SQLITE_OK if successful, or an SQLite error code if an error
17247 ** occurs.
17248 */
17249 static int fts5StorageGetStmt(
17250 Fts5Storage *p, /* Storage handle */
17251 int eStmt, /* FTS5_STMT_XXX constant */
17252 sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */
17253 char **pzErrMsg /* OUT: Error message (if any) */
17254 ){
17255 int rc = SQLITE_OK;
17256
17257 /* If there is no %_docsize table, there should be no requests for
17258 ** statements to operate on it. */
17259 assert( p->pConfig->bColumnsize || (
17260 eStmt!=FTS5_STMT_REPLACE_DOCSIZE
17261 && eStmt!=FTS5_STMT_DELETE_DOCSIZE
17262 && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE
17263 ));
17264
17265 assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) );
17266 if( p->aStmt[eStmt]==0 ){
17267 const char *azStmt[] = {
17268 "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
17269 "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
17270 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */
17271
17272 "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */
17273 "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */
17274 "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */
17275 "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */
17276 "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */
17277
17278 "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */
17279
17280 "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */
17281 "SELECT %s FROM %s AS T", /* SCAN */
17282 };
17283 Fts5Config *pC = p->pConfig;
17284 char *zSql = 0;
17285
17286 switch( eStmt ){
17287 case FTS5_STMT_SCAN:
17288 zSql = sqlite3_mprintf(azStmt[eStmt],
17289 pC->zContentExprlist, pC->zContent
17290 );
17291 break;
17292
17293 case FTS5_STMT_SCAN_ASC:
17294 case FTS5_STMT_SCAN_DESC:
17295 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist,
17296 pC->zContent, pC->zContentRowid, pC->zContentRowid,
17297 pC->zContentRowid
17298 );
17299 break;
17300
17301 case FTS5_STMT_LOOKUP:
17302 zSql = sqlite3_mprintf(azStmt[eStmt],
17303 pC->zContentExprlist, pC->zContent, pC->zContentRowid
17304 );
17305 break;
17306
17307 case FTS5_STMT_INSERT_CONTENT:
17308 case FTS5_STMT_REPLACE_CONTENT: {
17309 int nCol = pC->nCol + 1;
17310 char *zBind;
17311 int i;
17312
17313 zBind = sqlite3_malloc(1 + nCol*2);
17314 if( zBind ){
17315 for(i=0; i<nCol; i++){
17316 zBind[i*2] = '?';
17317 zBind[i*2 + 1] = ',';
17318 }
17319 zBind[i*2-1] = '\0';
17320 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
17321 sqlite3_free(zBind);
17322 }
17323 break;
17324 }
17325
17326 default:
17327 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
17328 break;
17329 }
17330
17331 if( zSql==0 ){
17332 rc = SQLITE_NOMEM;
17333 }else{
17334 rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0);
17335 sqlite3_free(zSql);
17336 if( rc!=SQLITE_OK && pzErrMsg ){
17337 *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db));
17338 }
17339 }
17340 }
17341
17342 *ppStmt = p->aStmt[eStmt];
17343 sqlite3_reset(*ppStmt);
17344 return rc;
17345 }
17346
17347
17348 static int fts5ExecPrintf(
17349 sqlite3 *db,
17350 char **pzErr,
17351 const char *zFormat,
17352 ...
17353 ){
17354 int rc;
17355 va_list ap; /* ... printf arguments */
17356 char *zSql;
17357
17358 va_start(ap, zFormat);
17359 zSql = sqlite3_vmprintf(zFormat, ap);
17360
17361 if( zSql==0 ){
17362 rc = SQLITE_NOMEM;
17363 }else{
17364 rc = sqlite3_exec(db, zSql, 0, 0, pzErr);
17365 sqlite3_free(zSql);
17366 }
17367
17368 va_end(ap);
17369 return rc;
17370 }
17371
17372 /*
17373 ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
17374 ** code otherwise.
17375 */
17376 static int sqlite3Fts5DropAll(Fts5Config *pConfig){
17377 int rc = fts5ExecPrintf(pConfig->db, 0,
17378 "DROP TABLE IF EXISTS %Q.'%q_data';"
17379 "DROP TABLE IF EXISTS %Q.'%q_idx';"
17380 "DROP TABLE IF EXISTS %Q.'%q_config';",
17381 pConfig->zDb, pConfig->zName,
17382 pConfig->zDb, pConfig->zName,
17383 pConfig->zDb, pConfig->zName
17384 );
17385 if( rc==SQLITE_OK && pConfig->bColumnsize ){
17386 rc = fts5ExecPrintf(pConfig->db, 0,
17387 "DROP TABLE IF EXISTS %Q.'%q_docsize';",
17388 pConfig->zDb, pConfig->zName
17389 );
17390 }
17391 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
17392 rc = fts5ExecPrintf(pConfig->db, 0,
17393 "DROP TABLE IF EXISTS %Q.'%q_content';",
17394 pConfig->zDb, pConfig->zName
17395 );
17396 }
17397 return rc;
17398 }
17399
17400 static void fts5StorageRenameOne(
17401 Fts5Config *pConfig, /* Current FTS5 configuration */
17402 int *pRc, /* IN/OUT: Error code */
17403 const char *zTail, /* Tail of table name e.g. "data", "config" */
17404 const char *zName /* New name of FTS5 table */
17405 ){
17406 if( *pRc==SQLITE_OK ){
17407 *pRc = fts5ExecPrintf(pConfig->db, 0,
17408 "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
17409 pConfig->zDb, pConfig->zName, zTail, zName, zTail
17410 );
17411 }
17412 }
17413
17414 static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
17415 Fts5Config *pConfig = pStorage->pConfig;
17416 int rc = sqlite3Fts5StorageSync(pStorage, 1);
17417
17418 fts5StorageRenameOne(pConfig, &rc, "data", zName);
17419 fts5StorageRenameOne(pConfig, &rc, "idx", zName);
17420 fts5StorageRenameOne(pConfig, &rc, "config", zName);
17421 if( pConfig->bColumnsize ){
17422 fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
17423 }
17424 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17425 fts5StorageRenameOne(pConfig, &rc, "content", zName);
17426 }
17427 return rc;
17428 }
17429
17430 /*
17431 ** Create the shadow table named zPost, with definition zDefn. Return
17432 ** SQLITE_OK if successful, or an SQLite error code otherwise.
17433 */
17434 static int sqlite3Fts5CreateTable(
17435 Fts5Config *pConfig, /* FTS5 configuration */
17436 const char *zPost, /* Shadow table to create (e.g. "content") */
17437 const char *zDefn, /* Columns etc. for shadow table */
17438 int bWithout, /* True for without rowid */
17439 char **pzErr /* OUT: Error message */
17440 ){
17441 int rc;
17442 char *zErr = 0;
17443
17444 rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
17445 pConfig->zDb, pConfig->zName, zPost, zDefn,
17446 #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID
17447 bWithout?" WITHOUT ROWID":
17448 #endif
17449 ""
17450 );
17451 if( zErr ){
17452 *pzErr = sqlite3_mprintf(
17453 "fts5: error creating shadow table %q_%s: %s",
17454 pConfig->zName, zPost, zErr
17455 );
17456 sqlite3_free(zErr);
17457 }
17458
17459 return rc;
17460 }
17461
17462 /*
17463 ** Open a new Fts5Index handle. If the bCreate argument is true, create
17464 ** and initialize the underlying tables
17465 **
17466 ** If successful, set *pp to point to the new object and return SQLITE_OK.
17467 ** Otherwise, set *pp to NULL and return an SQLite error code.
17468 */
17469 static int sqlite3Fts5StorageOpen(
17470 Fts5Config *pConfig,
17471 Fts5Index *pIndex,
17472 int bCreate,
17473 Fts5Storage **pp,
17474 char **pzErr /* OUT: Error message */
17475 ){
17476 int rc = SQLITE_OK;
17477 Fts5Storage *p; /* New object */
17478 int nByte; /* Bytes of space to allocate */
17479
17480 nByte = sizeof(Fts5Storage) /* Fts5Storage object */
17481 + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */
17482 *pp = p = (Fts5Storage*)sqlite3_malloc(nByte);
17483 if( !p ) return SQLITE_NOMEM;
17484
17485 memset(p, 0, nByte);
17486 p->aTotalSize = (i64*)&p[1];
17487 p->pConfig = pConfig;
17488 p->pIndex = pIndex;
17489
17490 if( bCreate ){
17491 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17492 int nDefn = 32 + pConfig->nCol*10;
17493 char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10);
17494 if( zDefn==0 ){
17495 rc = SQLITE_NOMEM;
17496 }else{
17497 int i;
17498 int iOff;
17499 sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
17500 iOff = (int)strlen(zDefn);
17501 for(i=0; i<pConfig->nCol; i++){
17502 sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
17503 iOff += (int)strlen(&zDefn[iOff]);
17504 }
17505 rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
17506 }
17507 sqlite3_free(zDefn);
17508 }
17509
17510 if( rc==SQLITE_OK && pConfig->bColumnsize ){
17511 rc = sqlite3Fts5CreateTable(
17512 pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
17513 );
17514 }
17515 if( rc==SQLITE_OK ){
17516 rc = sqlite3Fts5CreateTable(
17517 pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
17518 );
17519 }
17520 if( rc==SQLITE_OK ){
17521 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
17522 }
17523 }
17524
17525 if( rc ){
17526 sqlite3Fts5StorageClose(p);
17527 *pp = 0;
17528 }
17529 return rc;
17530 }
17531
17532 /*
17533 ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
17534 */
17535 static int sqlite3Fts5StorageClose(Fts5Storage *p){
17536 int rc = SQLITE_OK;
17537 if( p ){
17538 int i;
17539
17540 /* Finalize all SQL statements */
17541 for(i=0; i<ArraySize(p->aStmt); i++){
17542 sqlite3_finalize(p->aStmt[i]);
17543 }
17544
17545 sqlite3_free(p);
17546 }
17547 return rc;
17548 }
17549
17550 typedef struct Fts5InsertCtx Fts5InsertCtx;
17551 struct Fts5InsertCtx {
17552 Fts5Storage *pStorage;
17553 int iCol;
17554 int szCol; /* Size of column value in tokens */
17555 };
17556
17557 /*
17558 ** Tokenization callback used when inserting tokens into the FTS index.
17559 */
17560 static int fts5StorageInsertCallback(
17561 void *pContext, /* Pointer to Fts5InsertCtx object */
17562 int tflags,
17563 const char *pToken, /* Buffer containing token */
17564 int nToken, /* Size of token in bytes */
17565 int iUnused1, /* Start offset of token */
17566 int iUnused2 /* End offset of token */
17567 ){
17568 Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
17569 Fts5Index *pIdx = pCtx->pStorage->pIndex;
17570 UNUSED_PARAM2(iUnused1, iUnused2);
17571 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
17572 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
17573 pCtx->szCol++;
17574 }
17575 return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
17576 }
17577
17578 /*
17579 ** If a row with rowid iDel is present in the %_content table, add the
17580 ** delete-markers to the FTS index necessary to delete it. Do not actually
17581 ** remove the %_content row at this time though.
17582 */
17583 static int fts5StorageDeleteFromIndex(
17584 Fts5Storage *p,
17585 i64 iDel,
17586 sqlite3_value **apVal
17587 ){
17588 Fts5Config *pConfig = p->pConfig;
17589 sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */
17590 int rc; /* Return code */
17591 int rc2; /* sqlite3_reset() return code */
17592 int iCol;
17593 Fts5InsertCtx ctx;
17594
17595 if( apVal==0 ){
17596 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
17597 if( rc!=SQLITE_OK ) return rc;
17598 sqlite3_bind_int64(pSeek, 1, iDel);
17599 if( sqlite3_step(pSeek)!=SQLITE_ROW ){
17600 return sqlite3_reset(pSeek);
17601 }
17602 }
17603
17604 ctx.pStorage = p;
17605 ctx.iCol = -1;
17606 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
17607 for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
17608 if( pConfig->abUnindexed[iCol-1]==0 ){
17609 const char *zText;
17610 int nText;
17611 if( pSeek ){
17612 zText = (const char*)sqlite3_column_text(pSeek, iCol);
17613 nText = sqlite3_column_bytes(pSeek, iCol);
17614 }else{
17615 zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
17616 nText = sqlite3_value_bytes(apVal[iCol-1]);
17617 }
17618 ctx.szCol = 0;
17619 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
17620 zText, nText, (void*)&ctx, fts5StorageInsertCallback
17621 );
17622 p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
17623 }
17624 }
17625 p->nTotalRow--;
17626
17627 rc2 = sqlite3_reset(pSeek);
17628 if( rc==SQLITE_OK ) rc = rc2;
17629 return rc;
17630 }
17631
17632
17633 /*
17634 ** Insert a record into the %_docsize table. Specifically, do:
17635 **
17636 ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
17637 **
17638 ** If there is no %_docsize table (as happens if the columnsize=0 option
17639 ** is specified when the FTS5 table is created), this function is a no-op.
17640 */
17641 static int fts5StorageInsertDocsize(
17642 Fts5Storage *p, /* Storage module to write to */
17643 i64 iRowid, /* id value */
17644 Fts5Buffer *pBuf /* sz value */
17645 ){
17646 int rc = SQLITE_OK;
17647 if( p->pConfig->bColumnsize ){
17648 sqlite3_stmt *pReplace = 0;
17649 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
17650 if( rc==SQLITE_OK ){
17651 sqlite3_bind_int64(pReplace, 1, iRowid);
17652 sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
17653 sqlite3_step(pReplace);
17654 rc = sqlite3_reset(pReplace);
17655 }
17656 }
17657 return rc;
17658 }
17659
17660 /*
17661 ** Load the contents of the "averages" record from disk into the
17662 ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
17663 ** argument bCache is true, set the p->bTotalsValid flag to indicate
17664 ** that the contents of aTotalSize[] and nTotalRow are valid until
17665 ** further notice.
17666 **
17667 ** Return SQLITE_OK if successful, or an SQLite error code if an error
17668 ** occurs.
17669 */
17670 static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
17671 int rc = SQLITE_OK;
17672 if( p->bTotalsValid==0 ){
17673 rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
17674 p->bTotalsValid = bCache;
17675 }
17676 return rc;
17677 }
17678
17679 /*
17680 ** Store the current contents of the p->nTotalRow and p->aTotalSize[]
17681 ** variables in the "averages" record on disk.
17682 **
17683 ** Return SQLITE_OK if successful, or an SQLite error code if an error
17684 ** occurs.
17685 */
17686 static int fts5StorageSaveTotals(Fts5Storage *p){
17687 int nCol = p->pConfig->nCol;
17688 int i;
17689 Fts5Buffer buf;
17690 int rc = SQLITE_OK;
17691 memset(&buf, 0, sizeof(buf));
17692
17693 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
17694 for(i=0; i<nCol; i++){
17695 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
17696 }
17697 if( rc==SQLITE_OK ){
17698 rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
17699 }
17700 sqlite3_free(buf.p);
17701
17702 return rc;
17703 }
17704
17705 /*
17706 ** Remove a row from the FTS table.
17707 */
17708 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **ap Val){
17709 Fts5Config *pConfig = p->pConfig;
17710 int rc;
17711 sqlite3_stmt *pDel = 0;
17712
17713 assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 );
17714 rc = fts5StorageLoadTotals(p, 1);
17715
17716 /* Delete the index records */
17717 if( rc==SQLITE_OK ){
17718 rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
17719 }
17720
17721 /* Delete the %_docsize record */
17722 if( rc==SQLITE_OK && pConfig->bColumnsize ){
17723 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
17724 if( rc==SQLITE_OK ){
17725 sqlite3_bind_int64(pDel, 1, iDel);
17726 sqlite3_step(pDel);
17727 rc = sqlite3_reset(pDel);
17728 }
17729 }
17730
17731 /* Delete the %_content record */
17732 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17733 if( rc==SQLITE_OK ){
17734 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0);
17735 }
17736 if( rc==SQLITE_OK ){
17737 sqlite3_bind_int64(pDel, 1, iDel);
17738 sqlite3_step(pDel);
17739 rc = sqlite3_reset(pDel);
17740 }
17741 }
17742
17743 /* Write the averages record */
17744 if( rc==SQLITE_OK ){
17745 rc = fts5StorageSaveTotals(p);
17746 }
17747
17748 return rc;
17749 }
17750
17751 /*
17752 ** Delete all entries in the FTS5 index.
17753 */
17754 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
17755 Fts5Config *pConfig = p->pConfig;
17756 int rc;
17757
17758 /* Delete the contents of the %_data and %_docsize tables. */
17759 rc = fts5ExecPrintf(pConfig->db, 0,
17760 "DELETE FROM %Q.'%q_data';"
17761 "DELETE FROM %Q.'%q_idx';",
17762 pConfig->zDb, pConfig->zName,
17763 pConfig->zDb, pConfig->zName
17764 );
17765 if( rc==SQLITE_OK && pConfig->bColumnsize ){
17766 rc = fts5ExecPrintf(pConfig->db, 0,
17767 "DELETE FROM %Q.'%q_docsize';",
17768 pConfig->zDb, pConfig->zName
17769 );
17770 }
17771
17772 /* Reinitialize the %_data table. This call creates the initial structure
17773 ** and averages records. */
17774 if( rc==SQLITE_OK ){
17775 rc = sqlite3Fts5IndexReinit(p->pIndex);
17776 }
17777 if( rc==SQLITE_OK ){
17778 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
17779 }
17780 return rc;
17781 }
17782
17783 static int sqlite3Fts5StorageRebuild(Fts5Storage *p){
17784 Fts5Buffer buf = {0,0,0};
17785 Fts5Config *pConfig = p->pConfig;
17786 sqlite3_stmt *pScan = 0;
17787 Fts5InsertCtx ctx;
17788 int rc;
17789
17790 memset(&ctx, 0, sizeof(Fts5InsertCtx));
17791 ctx.pStorage = p;
17792 rc = sqlite3Fts5StorageDeleteAll(p);
17793 if( rc==SQLITE_OK ){
17794 rc = fts5StorageLoadTotals(p, 1);
17795 }
17796
17797 if( rc==SQLITE_OK ){
17798 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
17799 }
17800
17801 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){
17802 i64 iRowid = sqlite3_column_int64(pScan, 0);
17803
17804 sqlite3Fts5BufferZero(&buf);
17805 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
17806 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
17807 ctx.szCol = 0;
17808 if( pConfig->abUnindexed[ctx.iCol]==0 ){
17809 rc = sqlite3Fts5Tokenize(pConfig,
17810 FTS5_TOKENIZE_DOCUMENT,
17811 (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
17812 sqlite3_column_bytes(pScan, ctx.iCol+1),
17813 (void*)&ctx,
17814 fts5StorageInsertCallback
17815 );
17816 }
17817 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
17818 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
17819 }
17820 p->nTotalRow++;
17821
17822 if( rc==SQLITE_OK ){
17823 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
17824 }
17825 }
17826 sqlite3_free(buf.p);
17827
17828 /* Write the averages record */
17829 if( rc==SQLITE_OK ){
17830 rc = fts5StorageSaveTotals(p);
17831 }
17832 return rc;
17833 }
17834
17835 static int sqlite3Fts5StorageOptimize(Fts5Storage *p){
17836 return sqlite3Fts5IndexOptimize(p->pIndex);
17837 }
17838
17839 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
17840 return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
17841 }
17842
17843 static int sqlite3Fts5StorageReset(Fts5Storage *p){
17844 return sqlite3Fts5IndexReset(p->pIndex);
17845 }
17846
17847 /*
17848 ** Allocate a new rowid. This is used for "external content" tables when
17849 ** a NULL value is inserted into the rowid column. The new rowid is allocated
17850 ** by inserting a dummy row into the %_docsize table. The dummy will be
17851 ** overwritten later.
17852 **
17853 ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
17854 ** this case the user is required to provide a rowid explicitly.
17855 */
17856 static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
17857 int rc = SQLITE_MISMATCH;
17858 if( p->pConfig->bColumnsize ){
17859 sqlite3_stmt *pReplace = 0;
17860 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
17861 if( rc==SQLITE_OK ){
17862 sqlite3_bind_null(pReplace, 1);
17863 sqlite3_bind_null(pReplace, 2);
17864 sqlite3_step(pReplace);
17865 rc = sqlite3_reset(pReplace);
17866 }
17867 if( rc==SQLITE_OK ){
17868 *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
17869 }
17870 }
17871 return rc;
17872 }
17873
17874 /*
17875 ** Insert a new row into the FTS content table.
17876 */
17877 static int sqlite3Fts5StorageContentInsert(
17878 Fts5Storage *p,
17879 sqlite3_value **apVal,
17880 i64 *piRowid
17881 ){
17882 Fts5Config *pConfig = p->pConfig;
17883 int rc = SQLITE_OK;
17884
17885 /* Insert the new row into the %_content table. */
17886 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
17887 if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){
17888 *piRowid = sqlite3_value_int64(apVal[1]);
17889 }else{
17890 rc = fts5StorageNewRowid(p, piRowid);
17891 }
17892 }else{
17893 sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */
17894 int i; /* Counter variable */
17895 rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
17896 for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
17897 rc = sqlite3_bind_value(pInsert, i, apVal[i]);
17898 }
17899 if( rc==SQLITE_OK ){
17900 sqlite3_step(pInsert);
17901 rc = sqlite3_reset(pInsert);
17902 }
17903 *piRowid = sqlite3_last_insert_rowid(pConfig->db);
17904 }
17905
17906 return rc;
17907 }
17908
17909 /*
17910 ** Insert new entries into the FTS index and %_docsize table.
17911 */
17912 static int sqlite3Fts5StorageIndexInsert(
17913 Fts5Storage *p,
17914 sqlite3_value **apVal,
17915 i64 iRowid
17916 ){
17917 Fts5Config *pConfig = p->pConfig;
17918 int rc = SQLITE_OK; /* Return code */
17919 Fts5InsertCtx ctx; /* Tokenization callback context object */
17920 Fts5Buffer buf; /* Buffer used to build up %_docsize blob */
17921
17922 memset(&buf, 0, sizeof(Fts5Buffer));
17923 ctx.pStorage = p;
17924 rc = fts5StorageLoadTotals(p, 1);
17925
17926 if( rc==SQLITE_OK ){
17927 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
17928 }
17929 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
17930 ctx.szCol = 0;
17931 if( pConfig->abUnindexed[ctx.iCol]==0 ){
17932 rc = sqlite3Fts5Tokenize(pConfig,
17933 FTS5_TOKENIZE_DOCUMENT,
17934 (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
17935 sqlite3_value_bytes(apVal[ctx.iCol+2]),
17936 (void*)&ctx,
17937 fts5StorageInsertCallback
17938 );
17939 }
17940 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
17941 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
17942 }
17943 p->nTotalRow++;
17944
17945 /* Write the %_docsize record */
17946 if( rc==SQLITE_OK ){
17947 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
17948 }
17949 sqlite3_free(buf.p);
17950
17951 /* Write the averages record */
17952 if( rc==SQLITE_OK ){
17953 rc = fts5StorageSaveTotals(p);
17954 }
17955
17956 return rc;
17957 }
17958
17959 static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
17960 Fts5Config *pConfig = p->pConfig;
17961 char *zSql;
17962 int rc;
17963
17964 zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'",
17965 pConfig->zDb, pConfig->zName, zSuffix
17966 );
17967 if( zSql==0 ){
17968 rc = SQLITE_NOMEM;
17969 }else{
17970 sqlite3_stmt *pCnt = 0;
17971 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
17972 if( rc==SQLITE_OK ){
17973 if( SQLITE_ROW==sqlite3_step(pCnt) ){
17974 *pnRow = sqlite3_column_int64(pCnt, 0);
17975 }
17976 rc = sqlite3_finalize(pCnt);
17977 }
17978 }
17979
17980 sqlite3_free(zSql);
17981 return rc;
17982 }
17983
17984 /*
17985 ** Context object used by sqlite3Fts5StorageIntegrity().
17986 */
17987 typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
17988 struct Fts5IntegrityCtx {
17989 i64 iRowid;
17990 int iCol;
17991 int szCol;
17992 u64 cksum;
17993 Fts5Termset *pTermset;
17994 Fts5Config *pConfig;
17995 };
17996
17997
17998 /*
17999 ** Tokenization callback used by integrity check.
18000 */
18001 static int fts5StorageIntegrityCallback(
18002 void *pContext, /* Pointer to Fts5IntegrityCtx object */
18003 int tflags,
18004 const char *pToken, /* Buffer containing token */
18005 int nToken, /* Size of token in bytes */
18006 int iUnused1, /* Start offset of token */
18007 int iUnused2 /* End offset of token */
18008 ){
18009 Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
18010 Fts5Termset *pTermset = pCtx->pTermset;
18011 int bPresent;
18012 int ii;
18013 int rc = SQLITE_OK;
18014 int iPos;
18015 int iCol;
18016
18017 UNUSED_PARAM2(iUnused1, iUnused2);
18018 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
18019
18020 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
18021 pCtx->szCol++;
18022 }
18023
18024 switch( pCtx->pConfig->eDetail ){
18025 case FTS5_DETAIL_FULL:
18026 iPos = pCtx->szCol-1;
18027 iCol = pCtx->iCol;
18028 break;
18029
18030 case FTS5_DETAIL_COLUMNS:
18031 iPos = pCtx->iCol;
18032 iCol = 0;
18033 break;
18034
18035 default:
18036 assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE );
18037 iPos = 0;
18038 iCol = 0;
18039 break;
18040 }
18041
18042 rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
18043 if( rc==SQLITE_OK && bPresent==0 ){
18044 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
18045 pCtx->iRowid, iCol, iPos, 0, pToken, nToken
18046 );
18047 }
18048
18049 for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
18050 const int nChar = pCtx->pConfig->aPrefix[ii];
18051 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
18052 if( nByte ){
18053 rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
18054 if( bPresent==0 ){
18055 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
18056 pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
18057 );
18058 }
18059 }
18060 }
18061
18062 return rc;
18063 }
18064
18065 /*
18066 ** Check that the contents of the FTS index match that of the %_content
18067 ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
18068 ** some other SQLite error code if an error occurs while attempting to
18069 ** determine this.
18070 */
18071 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
18072 Fts5Config *pConfig = p->pConfig;
18073 int rc; /* Return code */
18074 int *aColSize; /* Array of size pConfig->nCol */
18075 i64 *aTotalSize; /* Array of size pConfig->nCol */
18076 Fts5IntegrityCtx ctx;
18077 sqlite3_stmt *pScan;
18078
18079 memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
18080 ctx.pConfig = p->pConfig;
18081 aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64)));
18082 if( !aTotalSize ) return SQLITE_NOMEM;
18083 aColSize = (int*)&aTotalSize[pConfig->nCol];
18084 memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
18085
18086 /* Generate the expected index checksum based on the contents of the
18087 ** %_content table. This block stores the checksum in ctx.cksum. */
18088 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
18089 if( rc==SQLITE_OK ){
18090 int rc2;
18091 while( SQLITE_ROW==sqlite3_step(pScan) ){
18092 int i;
18093 ctx.iRowid = sqlite3_column_int64(pScan, 0);
18094 ctx.szCol = 0;
18095 if( pConfig->bColumnsize ){
18096 rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
18097 }
18098 if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){
18099 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
18100 }
18101 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
18102 if( pConfig->abUnindexed[i] ) continue;
18103 ctx.iCol = i;
18104 ctx.szCol = 0;
18105 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
18106 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
18107 }
18108 if( rc==SQLITE_OK ){
18109 rc = sqlite3Fts5Tokenize(pConfig,
18110 FTS5_TOKENIZE_DOCUMENT,
18111 (const char*)sqlite3_column_text(pScan, i+1),
18112 sqlite3_column_bytes(pScan, i+1),
18113 (void*)&ctx,
18114 fts5StorageIntegrityCallback
18115 );
18116 }
18117 if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
18118 rc = FTS5_CORRUPT;
18119 }
18120 aTotalSize[i] += ctx.szCol;
18121 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
18122 sqlite3Fts5TermsetFree(ctx.pTermset);
18123 ctx.pTermset = 0;
18124 }
18125 }
18126 sqlite3Fts5TermsetFree(ctx.pTermset);
18127 ctx.pTermset = 0;
18128
18129 if( rc!=SQLITE_OK ) break;
18130 }
18131 rc2 = sqlite3_reset(pScan);
18132 if( rc==SQLITE_OK ) rc = rc2;
18133 }
18134
18135 /* Test that the "totals" (sometimes called "averages") record looks Ok */
18136 if( rc==SQLITE_OK ){
18137 int i;
18138 rc = fts5StorageLoadTotals(p, 0);
18139 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
18140 if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
18141 }
18142 }
18143
18144 /* Check that the %_docsize and %_content tables contain the expected
18145 ** number of rows. */
18146 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
18147 i64 nRow = 0;
18148 rc = fts5StorageCount(p, "content", &nRow);
18149 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
18150 }
18151 if( rc==SQLITE_OK && pConfig->bColumnsize ){
18152 i64 nRow = 0;
18153 rc = fts5StorageCount(p, "docsize", &nRow);
18154 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
18155 }
18156
18157 /* Pass the expected checksum down to the FTS index module. It will
18158 ** verify, amongst other things, that it matches the checksum generated by
18159 ** inspecting the index itself. */
18160 if( rc==SQLITE_OK ){
18161 rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);
18162 }
18163
18164 sqlite3_free(aTotalSize);
18165 return rc;
18166 }
18167
18168 /*
18169 ** Obtain an SQLite statement handle that may be used to read data from the
18170 ** %_content table.
18171 */
18172 static int sqlite3Fts5StorageStmt(
18173 Fts5Storage *p,
18174 int eStmt,
18175 sqlite3_stmt **pp,
18176 char **pzErrMsg
18177 ){
18178 int rc;
18179 assert( eStmt==FTS5_STMT_SCAN_ASC
18180 || eStmt==FTS5_STMT_SCAN_DESC
18181 || eStmt==FTS5_STMT_LOOKUP
18182 );
18183 rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
18184 if( rc==SQLITE_OK ){
18185 assert( p->aStmt[eStmt]==*pp );
18186 p->aStmt[eStmt] = 0;
18187 }
18188 return rc;
18189 }
18190
18191 /*
18192 ** Release an SQLite statement handle obtained via an earlier call to
18193 ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
18194 ** must match that passed to the sqlite3Fts5StorageStmt() call.
18195 */
18196 static void sqlite3Fts5StorageStmtRelease(
18197 Fts5Storage *p,
18198 int eStmt,
18199 sqlite3_stmt *pStmt
18200 ){
18201 assert( eStmt==FTS5_STMT_SCAN_ASC
18202 || eStmt==FTS5_STMT_SCAN_DESC
18203 || eStmt==FTS5_STMT_LOOKUP
18204 );
18205 if( p->aStmt[eStmt]==0 ){
18206 sqlite3_reset(pStmt);
18207 p->aStmt[eStmt] = pStmt;
18208 }else{
18209 sqlite3_finalize(pStmt);
18210 }
18211 }
18212
18213 static int fts5StorageDecodeSizeArray(
18214 int *aCol, int nCol, /* Array to populate */
18215 const u8 *aBlob, int nBlob /* Record to read varints from */
18216 ){
18217 int i;
18218 int iOff = 0;
18219 for(i=0; i<nCol; i++){
18220 if( iOff>=nBlob ) return 1;
18221 iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]);
18222 }
18223 return (iOff!=nBlob);
18224 }
18225
18226 /*
18227 ** Argument aCol points to an array of integers containing one entry for
18228 ** each table column. This function reads the %_docsize record for the
18229 ** specified rowid and populates aCol[] with the results.
18230 **
18231 ** An SQLite error code is returned if an error occurs, or SQLITE_OK
18232 ** otherwise.
18233 */
18234 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
18235 int nCol = p->pConfig->nCol; /* Number of user columns in table */
18236 sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */
18237 int rc; /* Return Code */
18238
18239 assert( p->pConfig->bColumnsize );
18240 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
18241 if( rc==SQLITE_OK ){
18242 int bCorrupt = 1;
18243 sqlite3_bind_int64(pLookup, 1, iRowid);
18244 if( SQLITE_ROW==sqlite3_step(pLookup) ){
18245 const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
18246 int nBlob = sqlite3_column_bytes(pLookup, 0);
18247 if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
18248 bCorrupt = 0;
18249 }
18250 }
18251 rc = sqlite3_reset(pLookup);
18252 if( bCorrupt && rc==SQLITE_OK ){
18253 rc = FTS5_CORRUPT;
18254 }
18255 }
18256
18257 return rc;
18258 }
18259
18260 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
18261 int rc = fts5StorageLoadTotals(p, 0);
18262 if( rc==SQLITE_OK ){
18263 *pnToken = 0;
18264 if( iCol<0 ){
18265 int i;
18266 for(i=0; i<p->pConfig->nCol; i++){
18267 *pnToken += p->aTotalSize[i];
18268 }
18269 }else if( iCol<p->pConfig->nCol ){
18270 *pnToken = p->aTotalSize[iCol];
18271 }else{
18272 rc = SQLITE_RANGE;
18273 }
18274 }
18275 return rc;
18276 }
18277
18278 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
18279 int rc = fts5StorageLoadTotals(p, 0);
18280 if( rc==SQLITE_OK ){
18281 *pnRow = p->nTotalRow;
18282 }
18283 return rc;
18284 }
18285
18286 /*
18287 ** Flush any data currently held in-memory to disk.
18288 */
18289 static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){
18290 if( bCommit && p->bTotalsValid ){
18291 int rc = fts5StorageSaveTotals(p);
18292 p->bTotalsValid = 0;
18293 if( rc!=SQLITE_OK ) return rc;
18294 }
18295 return sqlite3Fts5IndexSync(p->pIndex, bCommit);
18296 }
18297
18298 static int sqlite3Fts5StorageRollback(Fts5Storage *p){
18299 p->bTotalsValid = 0;
18300 return sqlite3Fts5IndexRollback(p->pIndex);
18301 }
18302
18303 static int sqlite3Fts5StorageConfigValue(
18304 Fts5Storage *p,
18305 const char *z,
18306 sqlite3_value *pVal,
18307 int iVal
18308 ){
18309 sqlite3_stmt *pReplace = 0;
18310 int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0);
18311 if( rc==SQLITE_OK ){
18312 sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC);
18313 if( pVal ){
18314 sqlite3_bind_value(pReplace, 2, pVal);
18315 }else{
18316 sqlite3_bind_int(pReplace, 2, iVal);
18317 }
18318 sqlite3_step(pReplace);
18319 rc = sqlite3_reset(pReplace);
18320 }
18321 if( rc==SQLITE_OK && pVal ){
18322 int iNew = p->pConfig->iCookie + 1;
18323 rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
18324 if( rc==SQLITE_OK ){
18325 p->pConfig->iCookie = iNew;
18326 }
18327 }
18328 return rc;
18329 }
18330
18331 /*
18332 ** 2014 May 31
18333 **
18334 ** The author disclaims copyright to this source code. In place of
18335 ** a legal notice, here is a blessing:
18336 **
18337 ** May you do good and not evil.
18338 ** May you find forgiveness for yourself and forgive others.
18339 ** May you share freely, never taking more than you give.
18340 **
18341 ******************************************************************************
18342 */
18343
18344
18345 /* #include "fts5Int.h" */
18346
18347 /**************************************************************************
18348 ** Start of ascii tokenizer implementation.
18349 */
18350
18351 /*
18352 ** For tokenizers with no "unicode" modifier, the set of token characters
18353 ** is the same as the set of ASCII range alphanumeric characters.
18354 */
18355 static unsigned char aAsciiTokenChar[128] = {
18356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */
18357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */
18358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */
18359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */
18360 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */
18361 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */
18362 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */
18363 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */
18364 };
18365
18366 typedef struct AsciiTokenizer AsciiTokenizer;
18367 struct AsciiTokenizer {
18368 unsigned char aTokenChar[128];
18369 };
18370
18371 static void fts5AsciiAddExceptions(
18372 AsciiTokenizer *p,
18373 const char *zArg,
18374 int bTokenChars
18375 ){
18376 int i;
18377 for(i=0; zArg[i]; i++){
18378 if( (zArg[i] & 0x80)==0 ){
18379 p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
18380 }
18381 }
18382 }
18383
18384 /*
18385 ** Delete a "ascii" tokenizer.
18386 */
18387 static void fts5AsciiDelete(Fts5Tokenizer *p){
18388 sqlite3_free(p);
18389 }
18390
18391 /*
18392 ** Create an "ascii" tokenizer.
18393 */
18394 static int fts5AsciiCreate(
18395 void *pUnused,
18396 const char **azArg, int nArg,
18397 Fts5Tokenizer **ppOut
18398 ){
18399 int rc = SQLITE_OK;
18400 AsciiTokenizer *p = 0;
18401 UNUSED_PARAM(pUnused);
18402 if( nArg%2 ){
18403 rc = SQLITE_ERROR;
18404 }else{
18405 p = sqlite3_malloc(sizeof(AsciiTokenizer));
18406 if( p==0 ){
18407 rc = SQLITE_NOMEM;
18408 }else{
18409 int i;
18410 memset(p, 0, sizeof(AsciiTokenizer));
18411 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
18412 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
18413 const char *zArg = azArg[i+1];
18414 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
18415 fts5AsciiAddExceptions(p, zArg, 1);
18416 }else
18417 if( 0==sqlite3_stricmp(azArg[i], "separators") ){
18418 fts5AsciiAddExceptions(p, zArg, 0);
18419 }else{
18420 rc = SQLITE_ERROR;
18421 }
18422 }
18423 if( rc!=SQLITE_OK ){
18424 fts5AsciiDelete((Fts5Tokenizer*)p);
18425 p = 0;
18426 }
18427 }
18428 }
18429
18430 *ppOut = (Fts5Tokenizer*)p;
18431 return rc;
18432 }
18433
18434
18435 static void asciiFold(char *aOut, const char *aIn, int nByte){
18436 int i;
18437 for(i=0; i<nByte; i++){
18438 char c = aIn[i];
18439 if( c>='A' && c<='Z' ) c += 32;
18440 aOut[i] = c;
18441 }
18442 }
18443
18444 /*
18445 ** Tokenize some text using the ascii tokenizer.
18446 */
18447 static int fts5AsciiTokenize(
18448 Fts5Tokenizer *pTokenizer,
18449 void *pCtx,
18450 int iUnused,
18451 const char *pText, int nText,
18452 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
18453 ){
18454 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
18455 int rc = SQLITE_OK;
18456 int ie;
18457 int is = 0;
18458
18459 char aFold[64];
18460 int nFold = sizeof(aFold);
18461 char *pFold = aFold;
18462 unsigned char *a = p->aTokenChar;
18463
18464 UNUSED_PARAM(iUnused);
18465
18466 while( is<nText && rc==SQLITE_OK ){
18467 int nByte;
18468
18469 /* Skip any leading divider characters. */
18470 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
18471 is++;
18472 }
18473 if( is==nText ) break;
18474
18475 /* Count the token characters */
18476 ie = is+1;
18477 while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
18478 ie++;
18479 }
18480
18481 /* Fold to lower case */
18482 nByte = ie-is;
18483 if( nByte>nFold ){
18484 if( pFold!=aFold ) sqlite3_free(pFold);
18485 pFold = sqlite3_malloc(nByte*2);
18486 if( pFold==0 ){
18487 rc = SQLITE_NOMEM;
18488 break;
18489 }
18490 nFold = nByte*2;
18491 }
18492 asciiFold(pFold, &pText[is], nByte);
18493
18494 /* Invoke the token callback */
18495 rc = xToken(pCtx, 0, pFold, nByte, is, ie);
18496 is = ie+1;
18497 }
18498
18499 if( pFold!=aFold ) sqlite3_free(pFold);
18500 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
18501 return rc;
18502 }
18503
18504 /**************************************************************************
18505 ** Start of unicode61 tokenizer implementation.
18506 */
18507
18508
18509 /*
18510 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
18511 ** from the sqlite3 source file utf.c. If this file is compiled as part
18512 ** of the amalgamation, they are not required.
18513 */
18514 #ifndef SQLITE_AMALGAMATION
18515
18516 static const unsigned char sqlite3Utf8Trans1[] = {
18517 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
18518 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
18519 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
18520 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
18521 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
18522 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
18523 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
18524 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
18525 };
18526
18527 #define READ_UTF8(zIn, zTerm, c) \
18528 c = *(zIn++); \
18529 if( c>=0xc0 ){ \
18530 c = sqlite3Utf8Trans1[c-0xc0]; \
18531 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
18532 c = (c<<6) + (0x3f & *(zIn++)); \
18533 } \
18534 if( c<0x80 \
18535 || (c&0xFFFFF800)==0xD800 \
18536 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
18537 }
18538
18539
18540 #define WRITE_UTF8(zOut, c) { \
18541 if( c<0x00080 ){ \
18542 *zOut++ = (unsigned char)(c&0xFF); \
18543 } \
18544 else if( c<0x00800 ){ \
18545 *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
18546 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
18547 } \
18548 else if( c<0x10000 ){ \
18549 *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
18550 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
18551 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
18552 }else{ \
18553 *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
18554 *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
18555 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
18556 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
18557 } \
18558 }
18559
18560 #endif /* ifndef SQLITE_AMALGAMATION */
18561
18562 typedef struct Unicode61Tokenizer Unicode61Tokenizer;
18563 struct Unicode61Tokenizer {
18564 unsigned char aTokenChar[128]; /* ASCII range token characters */
18565 char *aFold; /* Buffer to fold text into */
18566 int nFold; /* Size of aFold[] in bytes */
18567 int bRemoveDiacritic; /* True if remove_diacritics=1 is set */
18568 int nException;
18569 int *aiException;
18570 };
18571
18572 static int fts5UnicodeAddExceptions(
18573 Unicode61Tokenizer *p, /* Tokenizer object */
18574 const char *z, /* Characters to treat as exceptions */
18575 int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
18576 ){
18577 int rc = SQLITE_OK;
18578 int n = (int)strlen(z);
18579 int *aNew;
18580
18581 if( n>0 ){
18582 aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
18583 if( aNew ){
18584 int nNew = p->nException;
18585 const unsigned char *zCsr = (const unsigned char*)z;
18586 const unsigned char *zTerm = (const unsigned char*)&z[n];
18587 while( zCsr<zTerm ){
18588 int iCode;
18589 int bToken;
18590 READ_UTF8(zCsr, zTerm, iCode);
18591 if( iCode<128 ){
18592 p->aTokenChar[iCode] = (unsigned char)bTokenChars;
18593 }else{
18594 bToken = sqlite3Fts5UnicodeIsalnum(iCode);
18595 assert( (bToken==0 || bToken==1) );
18596 assert( (bTokenChars==0 || bTokenChars==1) );
18597 if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
18598 int i;
18599 for(i=0; i<nNew; i++){
18600 if( aNew[i]>iCode ) break;
18601 }
18602 memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
18603 aNew[i] = iCode;
18604 nNew++;
18605 }
18606 }
18607 }
18608 p->aiException = aNew;
18609 p->nException = nNew;
18610 }else{
18611 rc = SQLITE_NOMEM;
18612 }
18613 }
18614
18615 return rc;
18616 }
18617
18618 /*
18619 ** Return true if the p->aiException[] array contains the value iCode.
18620 */
18621 static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
18622 if( p->nException>0 ){
18623 int *a = p->aiException;
18624 int iLo = 0;
18625 int iHi = p->nException-1;
18626
18627 while( iHi>=iLo ){
18628 int iTest = (iHi + iLo) / 2;
18629 if( iCode==a[iTest] ){
18630 return 1;
18631 }else if( iCode>a[iTest] ){
18632 iLo = iTest+1;
18633 }else{
18634 iHi = iTest-1;
18635 }
18636 }
18637 }
18638
18639 return 0;
18640 }
18641
18642 /*
18643 ** Delete a "unicode61" tokenizer.
18644 */
18645 static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
18646 if( pTok ){
18647 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
18648 sqlite3_free(p->aiException);
18649 sqlite3_free(p->aFold);
18650 sqlite3_free(p);
18651 }
18652 return;
18653 }
18654
18655 /*
18656 ** Create a "unicode61" tokenizer.
18657 */
18658 static int fts5UnicodeCreate(
18659 void *pUnused,
18660 const char **azArg, int nArg,
18661 Fts5Tokenizer **ppOut
18662 ){
18663 int rc = SQLITE_OK; /* Return code */
18664 Unicode61Tokenizer *p = 0; /* New tokenizer object */
18665
18666 UNUSED_PARAM(pUnused);
18667
18668 if( nArg%2 ){
18669 rc = SQLITE_ERROR;
18670 }else{
18671 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
18672 if( p ){
18673 int i;
18674 memset(p, 0, sizeof(Unicode61Tokenizer));
18675 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
18676 p->bRemoveDiacritic = 1;
18677 p->nFold = 64;
18678 p->aFold = sqlite3_malloc(p->nFold * sizeof(char));
18679 if( p->aFold==0 ){
18680 rc = SQLITE_NOMEM;
18681 }
18682 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
18683 const char *zArg = azArg[i+1];
18684 if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
18685 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
18686 rc = SQLITE_ERROR;
18687 }
18688 p->bRemoveDiacritic = (zArg[0]=='1');
18689 }else
18690 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
18691 rc = fts5UnicodeAddExceptions(p, zArg, 1);
18692 }else
18693 if( 0==sqlite3_stricmp(azArg[i], "separators") ){
18694 rc = fts5UnicodeAddExceptions(p, zArg, 0);
18695 }else{
18696 rc = SQLITE_ERROR;
18697 }
18698 }
18699 }else{
18700 rc = SQLITE_NOMEM;
18701 }
18702 if( rc!=SQLITE_OK ){
18703 fts5UnicodeDelete((Fts5Tokenizer*)p);
18704 p = 0;
18705 }
18706 *ppOut = (Fts5Tokenizer*)p;
18707 }
18708 return rc;
18709 }
18710
18711 /*
18712 ** Return true if, for the purposes of tokenizing with the tokenizer
18713 ** passed as the first argument, codepoint iCode is considered a token
18714 ** character (not a separator).
18715 */
18716 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
18717 assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
18718 return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
18719 }
18720
18721 static int fts5UnicodeTokenize(
18722 Fts5Tokenizer *pTokenizer,
18723 void *pCtx,
18724 int iUnused,
18725 const char *pText, int nText,
18726 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
18727 ){
18728 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
18729 int rc = SQLITE_OK;
18730 unsigned char *a = p->aTokenChar;
18731
18732 unsigned char *zTerm = (unsigned char*)&pText[nText];
18733 unsigned char *zCsr = (unsigned char *)pText;
18734
18735 /* Output buffer */
18736 char *aFold = p->aFold;
18737 int nFold = p->nFold;
18738 const char *pEnd = &aFold[nFold-6];
18739
18740 UNUSED_PARAM(iUnused);
18741
18742 /* Each iteration of this loop gobbles up a contiguous run of separators,
18743 ** then the next token. */
18744 while( rc==SQLITE_OK ){
18745 int iCode; /* non-ASCII codepoint read from input */
18746 char *zOut = aFold;
18747 int is;
18748 int ie;
18749
18750 /* Skip any separator characters. */
18751 while( 1 ){
18752 if( zCsr>=zTerm ) goto tokenize_done;
18753 if( *zCsr & 0x80 ) {
18754 /* A character outside of the ascii range. Skip past it if it is
18755 ** a separator character. Or break out of the loop if it is not. */
18756 is = zCsr - (unsigned char*)pText;
18757 READ_UTF8(zCsr, zTerm, iCode);
18758 if( fts5UnicodeIsAlnum(p, iCode) ){
18759 goto non_ascii_tokenchar;
18760 }
18761 }else{
18762 if( a[*zCsr] ){
18763 is = zCsr - (unsigned char*)pText;
18764 goto ascii_tokenchar;
18765 }
18766 zCsr++;
18767 }
18768 }
18769
18770 /* Run through the tokenchars. Fold them into the output buffer along
18771 ** the way. */
18772 while( zCsr<zTerm ){
18773
18774 /* Grow the output buffer so that there is sufficient space to fit the
18775 ** largest possible utf-8 character. */
18776 if( zOut>pEnd ){
18777 aFold = sqlite3_malloc(nFold*2);
18778 if( aFold==0 ){
18779 rc = SQLITE_NOMEM;
18780 goto tokenize_done;
18781 }
18782 zOut = &aFold[zOut - p->aFold];
18783 memcpy(aFold, p->aFold, nFold);
18784 sqlite3_free(p->aFold);
18785 p->aFold = aFold;
18786 p->nFold = nFold = nFold*2;
18787 pEnd = &aFold[nFold-6];
18788 }
18789
18790 if( *zCsr & 0x80 ){
18791 /* An non-ascii-range character. Fold it into the output buffer if
18792 ** it is a token character, or break out of the loop if it is not. */
18793 READ_UTF8(zCsr, zTerm, iCode);
18794 if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
18795 non_ascii_tokenchar:
18796 iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
18797 if( iCode ) WRITE_UTF8(zOut, iCode);
18798 }else{
18799 break;
18800 }
18801 }else if( a[*zCsr]==0 ){
18802 /* An ascii-range separator character. End of token. */
18803 break;
18804 }else{
18805 ascii_tokenchar:
18806 if( *zCsr>='A' && *zCsr<='Z' ){
18807 *zOut++ = *zCsr + 32;
18808 }else{
18809 *zOut++ = *zCsr;
18810 }
18811 zCsr++;
18812 }
18813 ie = zCsr - (unsigned char*)pText;
18814 }
18815
18816 /* Invoke the token callback */
18817 rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
18818 }
18819
18820 tokenize_done:
18821 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
18822 return rc;
18823 }
18824
18825 /**************************************************************************
18826 ** Start of porter stemmer implementation.
18827 */
18828
18829 /* Any tokens larger than this (in bytes) are passed through without
18830 ** stemming. */
18831 #define FTS5_PORTER_MAX_TOKEN 64
18832
18833 typedef struct PorterTokenizer PorterTokenizer;
18834 struct PorterTokenizer {
18835 fts5_tokenizer tokenizer; /* Parent tokenizer module */
18836 Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */
18837 char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
18838 };
18839
18840 /*
18841 ** Delete a "porter" tokenizer.
18842 */
18843 static void fts5PorterDelete(Fts5Tokenizer *pTok){
18844 if( pTok ){
18845 PorterTokenizer *p = (PorterTokenizer*)pTok;
18846 if( p->pTokenizer ){
18847 p->tokenizer.xDelete(p->pTokenizer);
18848 }
18849 sqlite3_free(p);
18850 }
18851 }
18852
18853 /*
18854 ** Create a "porter" tokenizer.
18855 */
18856 static int fts5PorterCreate(
18857 void *pCtx,
18858 const char **azArg, int nArg,
18859 Fts5Tokenizer **ppOut
18860 ){
18861 fts5_api *pApi = (fts5_api*)pCtx;
18862 int rc = SQLITE_OK;
18863 PorterTokenizer *pRet;
18864 void *pUserdata = 0;
18865 const char *zBase = "unicode61";
18866
18867 if( nArg>0 ){
18868 zBase = azArg[0];
18869 }
18870
18871 pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
18872 if( pRet ){
18873 memset(pRet, 0, sizeof(PorterTokenizer));
18874 rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
18875 }else{
18876 rc = SQLITE_NOMEM;
18877 }
18878 if( rc==SQLITE_OK ){
18879 int nArg2 = (nArg>0 ? nArg-1 : 0);
18880 const char **azArg2 = (nArg2 ? &azArg[1] : 0);
18881 rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer);
18882 }
18883
18884 if( rc!=SQLITE_OK ){
18885 fts5PorterDelete((Fts5Tokenizer*)pRet);
18886 pRet = 0;
18887 }
18888 *ppOut = (Fts5Tokenizer*)pRet;
18889 return rc;
18890 }
18891
18892 typedef struct PorterContext PorterContext;
18893 struct PorterContext {
18894 void *pCtx;
18895 int (*xToken)(void*, int, const char*, int, int, int);
18896 char *aBuf;
18897 };
18898
18899 typedef struct PorterRule PorterRule;
18900 struct PorterRule {
18901 const char *zSuffix;
18902 int nSuffix;
18903 int (*xCond)(char *zStem, int nStem);
18904 const char *zOutput;
18905 int nOutput;
18906 };
18907
18908 #if 0
18909 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
18910 int ret = -1;
18911 int nBuf = *pnBuf;
18912 PorterRule *p;
18913
18914 for(p=aRule; p->zSuffix; p++){
18915 assert( strlen(p->zSuffix)==p->nSuffix );
18916 assert( strlen(p->zOutput)==p->nOutput );
18917 if( nBuf<p->nSuffix ) continue;
18918 if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
18919 }
18920
18921 if( p->zSuffix ){
18922 int nStem = nBuf - p->nSuffix;
18923 if( p->xCond==0 || p->xCond(aBuf, nStem) ){
18924 memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
18925 *pnBuf = nStem + p->nOutput;
18926 ret = p - aRule;
18927 }
18928 }
18929
18930 return ret;
18931 }
18932 #endif
18933
18934 static int fts5PorterIsVowel(char c, int bYIsVowel){
18935 return (
18936 c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
18937 );
18938 }
18939
18940 static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
18941 int i;
18942 int bCons = bPrevCons;
18943
18944 /* Scan for a vowel */
18945 for(i=0; i<nStem; i++){
18946 if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
18947 }
18948
18949 /* Scan for a consonent */
18950 for(i++; i<nStem; i++){
18951 if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
18952 }
18953 return 0;
18954 }
18955
18956 /* porter rule condition: (m > 0) */
18957 static int fts5Porter_MGt0(char *zStem, int nStem){
18958 return !!fts5PorterGobbleVC(zStem, nStem, 0);
18959 }
18960
18961 /* porter rule condition: (m > 1) */
18962 static int fts5Porter_MGt1(char *zStem, int nStem){
18963 int n;
18964 n = fts5PorterGobbleVC(zStem, nStem, 0);
18965 if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
18966 return 1;
18967 }
18968 return 0;
18969 }
18970
18971 /* porter rule condition: (m = 1) */
18972 static int fts5Porter_MEq1(char *zStem, int nStem){
18973 int n;
18974 n = fts5PorterGobbleVC(zStem, nStem, 0);
18975 if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
18976 return 1;
18977 }
18978 return 0;
18979 }
18980
18981 /* porter rule condition: (*o) */
18982 static int fts5Porter_Ostar(char *zStem, int nStem){
18983 if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
18984 return 0;
18985 }else{
18986 int i;
18987 int mask = 0;
18988 int bCons = 0;
18989 for(i=0; i<nStem; i++){
18990 bCons = !fts5PorterIsVowel(zStem[i], bCons);
18991 assert( bCons==0 || bCons==1 );
18992 mask = (mask << 1) + bCons;
18993 }
18994 return ((mask & 0x0007)==0x0005);
18995 }
18996 }
18997
18998 /* porter rule condition: (m > 1 and (*S or *T)) */
18999 static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
19000 assert( nStem>0 );
19001 return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
19002 && fts5Porter_MGt1(zStem, nStem);
19003 }
19004
19005 /* porter rule condition: (*v*) */
19006 static int fts5Porter_Vowel(char *zStem, int nStem){
19007 int i;
19008 for(i=0; i<nStem; i++){
19009 if( fts5PorterIsVowel(zStem[i], i>0) ){
19010 return 1;
19011 }
19012 }
19013 return 0;
19014 }
19015
19016
19017 /**************************************************************************
19018 ***************************************************************************
19019 ** GENERATED CODE STARTS HERE (mkportersteps.tcl)
19020 */
19021
19022 static int fts5PorterStep4(char *aBuf, int *pnBuf){
19023 int ret = 0;
19024 int nBuf = *pnBuf;
19025 switch( aBuf[nBuf-2] ){
19026
19027 case 'a':
19028 if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
19029 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
19030 *pnBuf = nBuf - 2;
19031 }
19032 }
19033 break;
19034
19035 case 'c':
19036 if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
19037 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
19038 *pnBuf = nBuf - 4;
19039 }
19040 }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
19041 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
19042 *pnBuf = nBuf - 4;
19043 }
19044 }
19045 break;
19046
19047 case 'e':
19048 if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
19049 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
19050 *pnBuf = nBuf - 2;
19051 }
19052 }
19053 break;
19054
19055 case 'i':
19056 if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
19057 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
19058 *pnBuf = nBuf - 2;
19059 }
19060 }
19061 break;
19062
19063 case 'l':
19064 if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
19065 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
19066 *pnBuf = nBuf - 4;
19067 }
19068 }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
19069 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
19070 *pnBuf = nBuf - 4;
19071 }
19072 }
19073 break;
19074
19075 case 'n':
19076 if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
19077 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19078 *pnBuf = nBuf - 3;
19079 }
19080 }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
19081 if( fts5Porter_MGt1(aBuf, nBuf-5) ){
19082 *pnBuf = nBuf - 5;
19083 }
19084 }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
19085 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
19086 *pnBuf = nBuf - 4;
19087 }
19088 }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
19089 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19090 *pnBuf = nBuf - 3;
19091 }
19092 }
19093 break;
19094
19095 case 'o':
19096 if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
19097 if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
19098 *pnBuf = nBuf - 3;
19099 }
19100 }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
19101 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
19102 *pnBuf = nBuf - 2;
19103 }
19104 }
19105 break;
19106
19107 case 's':
19108 if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
19109 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19110 *pnBuf = nBuf - 3;
19111 }
19112 }
19113 break;
19114
19115 case 't':
19116 if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
19117 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19118 *pnBuf = nBuf - 3;
19119 }
19120 }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
19121 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19122 *pnBuf = nBuf - 3;
19123 }
19124 }
19125 break;
19126
19127 case 'u':
19128 if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
19129 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19130 *pnBuf = nBuf - 3;
19131 }
19132 }
19133 break;
19134
19135 case 'v':
19136 if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
19137 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19138 *pnBuf = nBuf - 3;
19139 }
19140 }
19141 break;
19142
19143 case 'z':
19144 if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
19145 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
19146 *pnBuf = nBuf - 3;
19147 }
19148 }
19149 break;
19150
19151 }
19152 return ret;
19153 }
19154
19155
19156 static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
19157 int ret = 0;
19158 int nBuf = *pnBuf;
19159 switch( aBuf[nBuf-2] ){
19160
19161 case 'a':
19162 if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
19163 memcpy(&aBuf[nBuf-2], "ate", 3);
19164 *pnBuf = nBuf - 2 + 3;
19165 ret = 1;
19166 }
19167 break;
19168
19169 case 'b':
19170 if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
19171 memcpy(&aBuf[nBuf-2], "ble", 3);
19172 *pnBuf = nBuf - 2 + 3;
19173 ret = 1;
19174 }
19175 break;
19176
19177 case 'i':
19178 if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
19179 memcpy(&aBuf[nBuf-2], "ize", 3);
19180 *pnBuf = nBuf - 2 + 3;
19181 ret = 1;
19182 }
19183 break;
19184
19185 }
19186 return ret;
19187 }
19188
19189
19190 static int fts5PorterStep2(char *aBuf, int *pnBuf){
19191 int ret = 0;
19192 int nBuf = *pnBuf;
19193 switch( aBuf[nBuf-2] ){
19194
19195 case 'a':
19196 if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
19197 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
19198 memcpy(&aBuf[nBuf-7], "ate", 3);
19199 *pnBuf = nBuf - 7 + 3;
19200 }
19201 }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
19202 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
19203 memcpy(&aBuf[nBuf-6], "tion", 4);
19204 *pnBuf = nBuf - 6 + 4;
19205 }
19206 }
19207 break;
19208
19209 case 'c':
19210 if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
19211 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19212 memcpy(&aBuf[nBuf-4], "ence", 4);
19213 *pnBuf = nBuf - 4 + 4;
19214 }
19215 }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
19216 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19217 memcpy(&aBuf[nBuf-4], "ance", 4);
19218 *pnBuf = nBuf - 4 + 4;
19219 }
19220 }
19221 break;
19222
19223 case 'e':
19224 if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
19225 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19226 memcpy(&aBuf[nBuf-4], "ize", 3);
19227 *pnBuf = nBuf - 4 + 3;
19228 }
19229 }
19230 break;
19231
19232 case 'g':
19233 if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
19234 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19235 memcpy(&aBuf[nBuf-4], "log", 3);
19236 *pnBuf = nBuf - 4 + 3;
19237 }
19238 }
19239 break;
19240
19241 case 'l':
19242 if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
19243 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
19244 memcpy(&aBuf[nBuf-3], "ble", 3);
19245 *pnBuf = nBuf - 3 + 3;
19246 }
19247 }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
19248 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19249 memcpy(&aBuf[nBuf-4], "al", 2);
19250 *pnBuf = nBuf - 4 + 2;
19251 }
19252 }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
19253 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19254 memcpy(&aBuf[nBuf-5], "ent", 3);
19255 *pnBuf = nBuf - 5 + 3;
19256 }
19257 }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
19258 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
19259 memcpy(&aBuf[nBuf-3], "e", 1);
19260 *pnBuf = nBuf - 3 + 1;
19261 }
19262 }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
19263 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19264 memcpy(&aBuf[nBuf-5], "ous", 3);
19265 *pnBuf = nBuf - 5 + 3;
19266 }
19267 }
19268 break;
19269
19270 case 'o':
19271 if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
19272 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
19273 memcpy(&aBuf[nBuf-7], "ize", 3);
19274 *pnBuf = nBuf - 7 + 3;
19275 }
19276 }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
19277 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19278 memcpy(&aBuf[nBuf-5], "ate", 3);
19279 *pnBuf = nBuf - 5 + 3;
19280 }
19281 }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
19282 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19283 memcpy(&aBuf[nBuf-4], "ate", 3);
19284 *pnBuf = nBuf - 4 + 3;
19285 }
19286 }
19287 break;
19288
19289 case 's':
19290 if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
19291 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19292 memcpy(&aBuf[nBuf-5], "al", 2);
19293 *pnBuf = nBuf - 5 + 2;
19294 }
19295 }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
19296 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
19297 memcpy(&aBuf[nBuf-7], "ive", 3);
19298 *pnBuf = nBuf - 7 + 3;
19299 }
19300 }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
19301 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
19302 memcpy(&aBuf[nBuf-7], "ful", 3);
19303 *pnBuf = nBuf - 7 + 3;
19304 }
19305 }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
19306 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
19307 memcpy(&aBuf[nBuf-7], "ous", 3);
19308 *pnBuf = nBuf - 7 + 3;
19309 }
19310 }
19311 break;
19312
19313 case 't':
19314 if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
19315 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19316 memcpy(&aBuf[nBuf-5], "al", 2);
19317 *pnBuf = nBuf - 5 + 2;
19318 }
19319 }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
19320 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19321 memcpy(&aBuf[nBuf-5], "ive", 3);
19322 *pnBuf = nBuf - 5 + 3;
19323 }
19324 }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
19325 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
19326 memcpy(&aBuf[nBuf-6], "ble", 3);
19327 *pnBuf = nBuf - 6 + 3;
19328 }
19329 }
19330 break;
19331
19332 }
19333 return ret;
19334 }
19335
19336
19337 static int fts5PorterStep3(char *aBuf, int *pnBuf){
19338 int ret = 0;
19339 int nBuf = *pnBuf;
19340 switch( aBuf[nBuf-2] ){
19341
19342 case 'a':
19343 if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
19344 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19345 memcpy(&aBuf[nBuf-4], "ic", 2);
19346 *pnBuf = nBuf - 4 + 2;
19347 }
19348 }
19349 break;
19350
19351 case 's':
19352 if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
19353 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
19354 *pnBuf = nBuf - 4;
19355 }
19356 }
19357 break;
19358
19359 case 't':
19360 if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
19361 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19362 memcpy(&aBuf[nBuf-5], "ic", 2);
19363 *pnBuf = nBuf - 5 + 2;
19364 }
19365 }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
19366 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19367 memcpy(&aBuf[nBuf-5], "ic", 2);
19368 *pnBuf = nBuf - 5 + 2;
19369 }
19370 }
19371 break;
19372
19373 case 'u':
19374 if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
19375 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
19376 *pnBuf = nBuf - 3;
19377 }
19378 }
19379 break;
19380
19381 case 'v':
19382 if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
19383 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19384 *pnBuf = nBuf - 5;
19385 }
19386 }
19387 break;
19388
19389 case 'z':
19390 if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
19391 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
19392 memcpy(&aBuf[nBuf-5], "al", 2);
19393 *pnBuf = nBuf - 5 + 2;
19394 }
19395 }
19396 break;
19397
19398 }
19399 return ret;
19400 }
19401
19402
19403 static int fts5PorterStep1B(char *aBuf, int *pnBuf){
19404 int ret = 0;
19405 int nBuf = *pnBuf;
19406 switch( aBuf[nBuf-2] ){
19407
19408 case 'e':
19409 if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
19410 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
19411 memcpy(&aBuf[nBuf-3], "ee", 2);
19412 *pnBuf = nBuf - 3 + 2;
19413 }
19414 }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
19415 if( fts5Porter_Vowel(aBuf, nBuf-2) ){
19416 *pnBuf = nBuf - 2;
19417 ret = 1;
19418 }
19419 }
19420 break;
19421
19422 case 'n':
19423 if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
19424 if( fts5Porter_Vowel(aBuf, nBuf-3) ){
19425 *pnBuf = nBuf - 3;
19426 ret = 1;
19427 }
19428 }
19429 break;
19430
19431 }
19432 return ret;
19433 }
19434
19435 /*
19436 ** GENERATED CODE ENDS HERE (mkportersteps.tcl)
19437 ***************************************************************************
19438 **************************************************************************/
19439
19440 static void fts5PorterStep1A(char *aBuf, int *pnBuf){
19441 int nBuf = *pnBuf;
19442 if( aBuf[nBuf-1]=='s' ){
19443 if( aBuf[nBuf-2]=='e' ){
19444 if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s')
19445 || (nBuf>3 && aBuf[nBuf-3]=='i' )
19446 ){
19447 *pnBuf = nBuf-2;
19448 }else{
19449 *pnBuf = nBuf-1;
19450 }
19451 }
19452 else if( aBuf[nBuf-2]!='s' ){
19453 *pnBuf = nBuf-1;
19454 }
19455 }
19456 }
19457
19458 static int fts5PorterCb(
19459 void *pCtx,
19460 int tflags,
19461 const char *pToken,
19462 int nToken,
19463 int iStart,
19464 int iEnd
19465 ){
19466 PorterContext *p = (PorterContext*)pCtx;
19467
19468 char *aBuf;
19469 int nBuf;
19470
19471 if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
19472 aBuf = p->aBuf;
19473 nBuf = nToken;
19474 memcpy(aBuf, pToken, nBuf);
19475
19476 /* Step 1. */
19477 fts5PorterStep1A(aBuf, &nBuf);
19478 if( fts5PorterStep1B(aBuf, &nBuf) ){
19479 if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
19480 char c = aBuf[nBuf-1];
19481 if( fts5PorterIsVowel(c, 0)==0
19482 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2]
19483 ){
19484 nBuf--;
19485 }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
19486 aBuf[nBuf++] = 'e';
19487 }
19488 }
19489 }
19490
19491 /* Step 1C. */
19492 if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
19493 aBuf[nBuf-1] = 'i';
19494 }
19495
19496 /* Steps 2 through 4. */
19497 fts5PorterStep2(aBuf, &nBuf);
19498 fts5PorterStep3(aBuf, &nBuf);
19499 fts5PorterStep4(aBuf, &nBuf);
19500
19501 /* Step 5a. */
19502 assert( nBuf>0 );
19503 if( aBuf[nBuf-1]=='e' ){
19504 if( fts5Porter_MGt1(aBuf, nBuf-1)
19505 || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
19506 ){
19507 nBuf--;
19508 }
19509 }
19510
19511 /* Step 5b. */
19512 if( nBuf>1 && aBuf[nBuf-1]=='l'
19513 && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1)
19514 ){
19515 nBuf--;
19516 }
19517
19518 return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
19519
19520 pass_through:
19521 return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
19522 }
19523
19524 /*
19525 ** Tokenize using the porter tokenizer.
19526 */
19527 static int fts5PorterTokenize(
19528 Fts5Tokenizer *pTokenizer,
19529 void *pCtx,
19530 int flags,
19531 const char *pText, int nText,
19532 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
19533 ){
19534 PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
19535 PorterContext sCtx;
19536 sCtx.xToken = xToken;
19537 sCtx.pCtx = pCtx;
19538 sCtx.aBuf = p->aBuf;
19539 return p->tokenizer.xTokenize(
19540 p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
19541 );
19542 }
19543
19544 /*
19545 ** Register all built-in tokenizers with FTS5.
19546 */
19547 static int sqlite3Fts5TokenizerInit(fts5_api *pApi){
19548 struct BuiltinTokenizer {
19549 const char *zName;
19550 fts5_tokenizer x;
19551 } aBuiltin[] = {
19552 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
19553 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
19554 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
19555 };
19556
19557 int rc = SQLITE_OK; /* Return code */
19558 int i; /* To iterate through builtin functions */
19559
19560 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
19561 rc = pApi->xCreateTokenizer(pApi,
19562 aBuiltin[i].zName,
19563 (void*)pApi,
19564 &aBuiltin[i].x,
19565 0
19566 );
19567 }
19568
19569 return rc;
19570 }
19571
19572
19573
19574 /*
19575 ** 2012 May 25
19576 **
19577 ** The author disclaims copyright to this source code. In place of
19578 ** a legal notice, here is a blessing:
19579 **
19580 ** May you do good and not evil.
19581 ** May you find forgiveness for yourself and forgive others.
19582 ** May you share freely, never taking more than you give.
19583 **
19584 ******************************************************************************
19585 */
19586
19587 /*
19588 ** DO NOT EDIT THIS MACHINE GENERATED FILE.
19589 */
19590
19591
19592 /* #include <assert.h> */
19593
19594 /*
19595 ** Return true if the argument corresponds to a unicode codepoint
19596 ** classified as either a letter or a number. Otherwise false.
19597 **
19598 ** The results are undefined if the value passed to this function
19599 ** is less than zero.
19600 */
19601 static int sqlite3Fts5UnicodeIsalnum(int c){
19602 /* Each unsigned integer in the following array corresponds to a contiguous
19603 ** range of unicode codepoints that are not either letters or numbers (i.e.
19604 ** codepoints for which this function should return 0).
19605 **
19606 ** The most significant 22 bits in each 32-bit value contain the first
19607 ** codepoint in the range. The least significant 10 bits are used to store
19608 ** the size of the range (always at least 1). In other words, the value
19609 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
19610 ** C. It is not possible to represent a range larger than 1023 codepoints
19611 ** using this format.
19612 */
19613 static const unsigned int aEntry[] = {
19614 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
19615 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
19616 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
19617 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
19618 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
19619 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
19620 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
19621 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
19622 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
19623 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
19624 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
19625 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
19626 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
19627 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
19628 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
19629 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
19630 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
19631 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
19632 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
19633 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
19634 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
19635 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
19636 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
19637 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
19638 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
19639 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
19640 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
19641 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
19642 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
19643 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
19644 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
19645 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
19646 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
19647 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
19648 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
19649 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
19650 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
19651 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
19652 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
19653 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
19654 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
19655 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
19656 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
19657 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
19658 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
19659 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
19660 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
19661 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
19662 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
19663 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
19664 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
19665 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
19666 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
19667 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
19668 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
19669 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
19670 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
19671 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
19672 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
19673 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
19674 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
19675 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
19676 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
19677 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
19678 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
19679 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
19680 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
19681 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
19682 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
19683 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
19684 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
19685 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
19686 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
19687 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
19688 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
19689 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
19690 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
19691 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
19692 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
19693 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
19694 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
19695 0x380400F0,
19696 };
19697 static const unsigned int aAscii[4] = {
19698 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
19699 };
19700
19701 if( (unsigned int)c<128 ){
19702 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
19703 }else if( (unsigned int)c<(1<<22) ){
19704 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
19705 int iRes = 0;
19706 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
19707 int iLo = 0;
19708 while( iHi>=iLo ){
19709 int iTest = (iHi + iLo) / 2;
19710 if( key >= aEntry[iTest] ){
19711 iRes = iTest;
19712 iLo = iTest+1;
19713 }else{
19714 iHi = iTest-1;
19715 }
19716 }
19717 assert( aEntry[0]<key );
19718 assert( key>=aEntry[iRes] );
19719 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
19720 }
19721 return 1;
19722 }
19723
19724
19725 /*
19726 ** If the argument is a codepoint corresponding to a lowercase letter
19727 ** in the ASCII range with a diacritic added, return the codepoint
19728 ** of the ASCII letter only. For example, if passed 235 - "LATIN
19729 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
19730 ** E"). The resuls of passing a codepoint that corresponds to an
19731 ** uppercase letter are undefined.
19732 */
19733 static int fts5_remove_diacritic(int c){
19734 unsigned short aDia[] = {
19735 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
19736 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
19737 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
19738 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
19739 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
19740 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
19741 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
19742 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
19743 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
19744 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
19745 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
19746 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
19747 62924, 63050, 63082, 63274, 63390,
19748 };
19749 char aChar[] = {
19750 '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
19751 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
19752 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
19753 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
19754 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
19755 '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
19756 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
19757 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
19758 'e', 'i', 'o', 'u', 'y',
19759 };
19760
19761 unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
19762 int iRes = 0;
19763 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
19764 int iLo = 0;
19765 while( iHi>=iLo ){
19766 int iTest = (iHi + iLo) / 2;
19767 if( key >= aDia[iTest] ){
19768 iRes = iTest;
19769 iLo = iTest+1;
19770 }else{
19771 iHi = iTest-1;
19772 }
19773 }
19774 assert( key>=aDia[iRes] );
19775 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
19776 }
19777
19778
19779 /*
19780 ** Return true if the argument interpreted as a unicode codepoint
19781 ** is a diacritical modifier character.
19782 */
19783 static int sqlite3Fts5UnicodeIsdiacritic(int c){
19784 unsigned int mask0 = 0x08029FDF;
19785 unsigned int mask1 = 0x000361F8;
19786 if( c<768 || c>817 ) return 0;
19787 return (c < 768+32) ?
19788 (mask0 & (1 << (c-768))) :
19789 (mask1 & (1 << (c-768-32)));
19790 }
19791
19792
19793 /*
19794 ** Interpret the argument as a unicode codepoint. If the codepoint
19795 ** is an upper case character that has a lower case equivalent,
19796 ** return the codepoint corresponding to the lower case version.
19797 ** Otherwise, return a copy of the argument.
19798 **
19799 ** The results are undefined if the value passed to this function
19800 ** is less than zero.
19801 */
19802 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
19803 /* Each entry in the following array defines a rule for folding a range
19804 ** of codepoints to lower case. The rule applies to a range of nRange
19805 ** codepoints starting at codepoint iCode.
19806 **
19807 ** If the least significant bit in flags is clear, then the rule applies
19808 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
19809 ** need to be folded). Or, if it is set, then the rule only applies to
19810 ** every second codepoint in the range, starting with codepoint C.
19811 **
19812 ** The 7 most significant bits in flags are an index into the aiOff[]
19813 ** array. If a specific codepoint C does require folding, then its lower
19814 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
19815 **
19816 ** The contents of this array are generated by parsing the CaseFolding.txt
19817 ** file distributed as part of the "Unicode Character Database". See
19818 ** http://www.unicode.org for details.
19819 */
19820 static const struct TableEntry {
19821 unsigned short iCode;
19822 unsigned char flags;
19823 unsigned char nRange;
19824 } aEntry[] = {
19825 {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
19826 {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
19827 {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
19828 {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
19829 {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
19830 {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
19831 {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
19832 {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
19833 {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
19834 {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
19835 {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
19836 {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
19837 {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
19838 {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
19839 {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
19840 {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
19841 {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
19842 {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
19843 {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
19844 {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
19845 {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
19846 {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
19847 {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
19848 {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
19849 {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
19850 {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
19851 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
19852 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
19853 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
19854 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
19855 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
19856 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
19857 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
19858 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
19859 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
19860 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
19861 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
19862 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
19863 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
19864 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
19865 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
19866 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
19867 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
19868 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
19869 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
19870 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
19871 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
19872 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
19873 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
19874 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
19875 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
19876 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
19877 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
19878 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
19879 {65313, 14, 26},
19880 };
19881 static const unsigned short aiOff[] = {
19882 1, 2, 8, 15, 16, 26, 28, 32,
19883 37, 38, 40, 48, 63, 64, 69, 71,
19884 79, 80, 116, 202, 203, 205, 206, 207,
19885 209, 210, 211, 213, 214, 217, 218, 219,
19886 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
19887 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
19888 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
19889 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
19890 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
19891 65514, 65521, 65527, 65528, 65529,
19892 };
19893
19894 int ret = c;
19895
19896 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
19897
19898 if( c<128 ){
19899 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
19900 }else if( c<65536 ){
19901 const struct TableEntry *p;
19902 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
19903 int iLo = 0;
19904 int iRes = -1;
19905
19906 assert( c>aEntry[0].iCode );
19907 while( iHi>=iLo ){
19908 int iTest = (iHi + iLo) / 2;
19909 int cmp = (c - aEntry[iTest].iCode);
19910 if( cmp>=0 ){
19911 iRes = iTest;
19912 iLo = iTest+1;
19913 }else{
19914 iHi = iTest-1;
19915 }
19916 }
19917
19918 assert( iRes>=0 && c>=aEntry[iRes].iCode );
19919 p = &aEntry[iRes];
19920 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
19921 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
19922 assert( ret>0 );
19923 }
19924
19925 if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
19926 }
19927
19928 else if( c>=66560 && c<66600 ){
19929 ret = c + 40;
19930 }
19931
19932 return ret;
19933 }
19934
19935 /*
19936 ** 2015 May 30
19937 **
19938 ** The author disclaims copyright to this source code. In place of
19939 ** a legal notice, here is a blessing:
19940 **
19941 ** May you do good and not evil.
19942 ** May you find forgiveness for yourself and forgive others.
19943 ** May you share freely, never taking more than you give.
19944 **
19945 ******************************************************************************
19946 **
19947 ** Routines for varint serialization and deserialization.
19948 */
19949
19950
19951 /* #include "fts5Int.h" */
19952
19953 /*
19954 ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
19955 ** Except, this version does handle the single byte case that the core
19956 ** version depends on being handled before its function is called.
19957 */
19958 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
19959 u32 a,b;
19960
19961 /* The 1-byte case. Overwhelmingly the most common. */
19962 a = *p;
19963 /* a: p0 (unmasked) */
19964 if (!(a&0x80))
19965 {
19966 /* Values between 0 and 127 */
19967 *v = a;
19968 return 1;
19969 }
19970
19971 /* The 2-byte case */
19972 p++;
19973 b = *p;
19974 /* b: p1 (unmasked) */
19975 if (!(b&0x80))
19976 {
19977 /* Values between 128 and 16383 */
19978 a &= 0x7f;
19979 a = a<<7;
19980 *v = a | b;
19981 return 2;
19982 }
19983
19984 /* The 3-byte case */
19985 p++;
19986 a = a<<14;
19987 a |= *p;
19988 /* a: p0<<14 | p2 (unmasked) */
19989 if (!(a&0x80))
19990 {
19991 /* Values between 16384 and 2097151 */
19992 a &= (0x7f<<14)|(0x7f);
19993 b &= 0x7f;
19994 b = b<<7;
19995 *v = a | b;
19996 return 3;
19997 }
19998
19999 /* A 32-bit varint is used to store size information in btrees.
20000 ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
20001 ** A 3-byte varint is sufficient, for example, to record the size
20002 ** of a 1048569-byte BLOB or string.
20003 **
20004 ** We only unroll the first 1-, 2-, and 3- byte cases. The very
20005 ** rare larger cases can be handled by the slower 64-bit varint
20006 ** routine.
20007 */
20008 {
20009 u64 v64;
20010 u8 n;
20011 p -= 2;
20012 n = sqlite3Fts5GetVarint(p, &v64);
20013 *v = (u32)v64;
20014 assert( n>3 && n<=9 );
20015 return n;
20016 }
20017 }
20018
20019
20020 /*
20021 ** Bitmasks used by sqlite3GetVarint(). These precomputed constants
20022 ** are defined here rather than simply putting the constant expressions
20023 ** inline in order to work around bugs in the RVT compiler.
20024 **
20025 ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
20026 **
20027 ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
20028 */
20029 #define SLOT_2_0 0x001fc07f
20030 #define SLOT_4_2_0 0xf01fc07f
20031
20032 /*
20033 ** Read a 64-bit variable-length integer from memory starting at p[0].
20034 ** Return the number of bytes read. The value is stored in *v.
20035 */
20036 static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
20037 u32 a,b,s;
20038
20039 a = *p;
20040 /* a: p0 (unmasked) */
20041 if (!(a&0x80))
20042 {
20043 *v = a;
20044 return 1;
20045 }
20046
20047 p++;
20048 b = *p;
20049 /* b: p1 (unmasked) */
20050 if (!(b&0x80))
20051 {
20052 a &= 0x7f;
20053 a = a<<7;
20054 a |= b;
20055 *v = a;
20056 return 2;
20057 }
20058
20059 /* Verify that constants are precomputed correctly */
20060 assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
20061 assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
20062
20063 p++;
20064 a = a<<14;
20065 a |= *p;
20066 /* a: p0<<14 | p2 (unmasked) */
20067 if (!(a&0x80))
20068 {
20069 a &= SLOT_2_0;
20070 b &= 0x7f;
20071 b = b<<7;
20072 a |= b;
20073 *v = a;
20074 return 3;
20075 }
20076
20077 /* CSE1 from below */
20078 a &= SLOT_2_0;
20079 p++;
20080 b = b<<14;
20081 b |= *p;
20082 /* b: p1<<14 | p3 (unmasked) */
20083 if (!(b&0x80))
20084 {
20085 b &= SLOT_2_0;
20086 /* moved CSE1 up */
20087 /* a &= (0x7f<<14)|(0x7f); */
20088 a = a<<7;
20089 a |= b;
20090 *v = a;
20091 return 4;
20092 }
20093
20094 /* a: p0<<14 | p2 (masked) */
20095 /* b: p1<<14 | p3 (unmasked) */
20096 /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
20097 /* moved CSE1 up */
20098 /* a &= (0x7f<<14)|(0x7f); */
20099 b &= SLOT_2_0;
20100 s = a;
20101 /* s: p0<<14 | p2 (masked) */
20102
20103 p++;
20104 a = a<<14;
20105 a |= *p;
20106 /* a: p0<<28 | p2<<14 | p4 (unmasked) */
20107 if (!(a&0x80))
20108 {
20109 /* we can skip these cause they were (effectively) done above in calc'ing s */
20110 /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
20111 /* b &= (0x7f<<14)|(0x7f); */
20112 b = b<<7;
20113 a |= b;
20114 s = s>>18;
20115 *v = ((u64)s)<<32 | a;
20116 return 5;
20117 }
20118
20119 /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
20120 s = s<<7;
20121 s |= b;
20122 /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
20123
20124 p++;
20125 b = b<<14;
20126 b |= *p;
20127 /* b: p1<<28 | p3<<14 | p5 (unmasked) */
20128 if (!(b&0x80))
20129 {
20130 /* we can skip this cause it was (effectively) done above in calc'ing s */
20131 /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
20132 a &= SLOT_2_0;
20133 a = a<<7;
20134 a |= b;
20135 s = s>>18;
20136 *v = ((u64)s)<<32 | a;
20137 return 6;
20138 }
20139
20140 p++;
20141 a = a<<14;
20142 a |= *p;
20143 /* a: p2<<28 | p4<<14 | p6 (unmasked) */
20144 if (!(a&0x80))
20145 {
20146 a &= SLOT_4_2_0;
20147 b &= SLOT_2_0;
20148 b = b<<7;
20149 a |= b;
20150 s = s>>11;
20151 *v = ((u64)s)<<32 | a;
20152 return 7;
20153 }
20154
20155 /* CSE2 from below */
20156 a &= SLOT_2_0;
20157 p++;
20158 b = b<<14;
20159 b |= *p;
20160 /* b: p3<<28 | p5<<14 | p7 (unmasked) */
20161 if (!(b&0x80))
20162 {
20163 b &= SLOT_4_2_0;
20164 /* moved CSE2 up */
20165 /* a &= (0x7f<<14)|(0x7f); */
20166 a = a<<7;
20167 a |= b;
20168 s = s>>4;
20169 *v = ((u64)s)<<32 | a;
20170 return 8;
20171 }
20172
20173 p++;
20174 a = a<<15;
20175 a |= *p;
20176 /* a: p4<<29 | p6<<15 | p8 (unmasked) */
20177
20178 /* moved CSE2 up */
20179 /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
20180 b &= SLOT_2_0;
20181 b = b<<8;
20182 a |= b;
20183
20184 s = s<<4;
20185 b = p[-4];
20186 b &= 0x7f;
20187 b = b>>3;
20188 s |= b;
20189
20190 *v = ((u64)s)<<32 | a;
20191
20192 return 9;
20193 }
20194
20195 /*
20196 ** The variable-length integer encoding is as follows:
20197 **
20198 ** KEY:
20199 ** A = 0xxxxxxx 7 bits of data and one flag bit
20200 ** B = 1xxxxxxx 7 bits of data and one flag bit
20201 ** C = xxxxxxxx 8 bits of data
20202 **
20203 ** 7 bits - A
20204 ** 14 bits - BA
20205 ** 21 bits - BBA
20206 ** 28 bits - BBBA
20207 ** 35 bits - BBBBA
20208 ** 42 bits - BBBBBA
20209 ** 49 bits - BBBBBBA
20210 ** 56 bits - BBBBBBBA
20211 ** 64 bits - BBBBBBBBC
20212 */
20213
20214 #ifdef SQLITE_NOINLINE
20215 # define FTS5_NOINLINE SQLITE_NOINLINE
20216 #else
20217 # define FTS5_NOINLINE
20218 #endif
20219
20220 /*
20221 ** Write a 64-bit variable-length integer to memory starting at p[0].
20222 ** The length of data write will be between 1 and 9 bytes. The number
20223 ** of bytes written is returned.
20224 **
20225 ** A variable-length integer consists of the lower 7 bits of each byte
20226 ** for all bytes that have the 8th bit set and one byte with the 8th
20227 ** bit clear. Except, if we get to the 9th byte, it stores the full
20228 ** 8 bits and is the last byte.
20229 */
20230 static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
20231 int i, j, n;
20232 u8 buf[10];
20233 if( v & (((u64)0xff000000)<<32) ){
20234 p[8] = (u8)v;
20235 v >>= 8;
20236 for(i=7; i>=0; i--){
20237 p[i] = (u8)((v & 0x7f) | 0x80);
20238 v >>= 7;
20239 }
20240 return 9;
20241 }
20242 n = 0;
20243 do{
20244 buf[n++] = (u8)((v & 0x7f) | 0x80);
20245 v >>= 7;
20246 }while( v!=0 );
20247 buf[0] &= 0x7f;
20248 assert( n<=9 );
20249 for(i=0, j=n-1; j>=0; j--, i++){
20250 p[i] = buf[j];
20251 }
20252 return n;
20253 }
20254
20255 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
20256 if( v<=0x7f ){
20257 p[0] = v&0x7f;
20258 return 1;
20259 }
20260 if( v<=0x3fff ){
20261 p[0] = ((v>>7)&0x7f)|0x80;
20262 p[1] = v&0x7f;
20263 return 2;
20264 }
20265 return fts5PutVarint64(p,v);
20266 }
20267
20268
20269 static int sqlite3Fts5GetVarintLen(u32 iVal){
20270 #if 0
20271 if( iVal<(1 << 7 ) ) return 1;
20272 #endif
20273 assert( iVal>=(1 << 7) );
20274 if( iVal<(1 << 14) ) return 2;
20275 if( iVal<(1 << 21) ) return 3;
20276 if( iVal<(1 << 28) ) return 4;
20277 return 5;
20278 }
20279
20280
20281 /*
20282 ** 2015 May 08
20283 **
20284 ** The author disclaims copyright to this source code. In place of
20285 ** a legal notice, here is a blessing:
20286 **
20287 ** May you do good and not evil.
20288 ** May you find forgiveness for yourself and forgive others.
20289 ** May you share freely, never taking more than you give.
20290 **
20291 ******************************************************************************
20292 **
20293 ** This is an SQLite virtual table module implementing direct access to an
20294 ** existing FTS5 index. The module may create several different types of
20295 ** tables:
20296 **
20297 ** col:
20298 ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
20299 **
20300 ** One row for each term/column combination. The value of $doc is set to
20301 ** the number of fts5 rows that contain at least one instance of term
20302 ** $term within column $col. Field $cnt is set to the total number of
20303 ** instances of term $term in column $col (in any row of the fts5 table).
20304 **
20305 ** row:
20306 ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
20307 **
20308 ** One row for each term in the database. The value of $doc is set to
20309 ** the number of fts5 rows that contain at least one instance of term
20310 ** $term. Field $cnt is set to the total number of instances of term
20311 ** $term in the database.
20312 */
20313
20314
20315 /* #include "fts5Int.h" */
20316
20317
20318 typedef struct Fts5VocabTable Fts5VocabTable;
20319 typedef struct Fts5VocabCursor Fts5VocabCursor;
20320
20321 struct Fts5VocabTable {
20322 sqlite3_vtab base;
20323 char *zFts5Tbl; /* Name of fts5 table */
20324 char *zFts5Db; /* Db containing fts5 table */
20325 sqlite3 *db; /* Database handle */
20326 Fts5Global *pGlobal; /* FTS5 global object for this database */
20327 int eType; /* FTS5_VOCAB_COL or ROW */
20328 };
20329
20330 struct Fts5VocabCursor {
20331 sqlite3_vtab_cursor base;
20332 sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
20333 Fts5Index *pIndex; /* Associated FTS5 index */
20334
20335 int bEof; /* True if this cursor is at EOF */
20336 Fts5IndexIter *pIter; /* Term/rowid iterator object */
20337
20338 int nLeTerm; /* Size of zLeTerm in bytes */
20339 char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
20340
20341 /* These are used by 'col' tables only */
20342 Fts5Config *pConfig; /* Fts5 table configuration */
20343 int iCol;
20344 i64 *aCnt;
20345 i64 *aDoc;
20346
20347 /* Output values used by 'row' and 'col' tables */
20348 i64 rowid; /* This table's current rowid value */
20349 Fts5Buffer term; /* Current value of 'term' column */
20350 };
20351
20352 #define FTS5_VOCAB_COL 0
20353 #define FTS5_VOCAB_ROW 1
20354
20355 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
20356 #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
20357
20358 /*
20359 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
20360 */
20361 #define FTS5_VOCAB_TERM_EQ 0x01
20362 #define FTS5_VOCAB_TERM_GE 0x02
20363 #define FTS5_VOCAB_TERM_LE 0x04
20364
20365
20366 /*
20367 ** Translate a string containing an fts5vocab table type to an
20368 ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
20369 ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
20370 ** and return SQLITE_ERROR.
20371 */
20372 static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
20373 int rc = SQLITE_OK;
20374 char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
20375 if( rc==SQLITE_OK ){
20376 sqlite3Fts5Dequote(zCopy);
20377 if( sqlite3_stricmp(zCopy, "col")==0 ){
20378 *peType = FTS5_VOCAB_COL;
20379 }else
20380
20381 if( sqlite3_stricmp(zCopy, "row")==0 ){
20382 *peType = FTS5_VOCAB_ROW;
20383 }else
20384 {
20385 *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
20386 rc = SQLITE_ERROR;
20387 }
20388 sqlite3_free(zCopy);
20389 }
20390
20391 return rc;
20392 }
20393
20394
20395 /*
20396 ** The xDisconnect() virtual table method.
20397 */
20398 static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
20399 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
20400 sqlite3_free(pTab);
20401 return SQLITE_OK;
20402 }
20403
20404 /*
20405 ** The xDestroy() virtual table method.
20406 */
20407 static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
20408 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
20409 sqlite3_free(pTab);
20410 return SQLITE_OK;
20411 }
20412
20413 /*
20414 ** This function is the implementation of both the xConnect and xCreate
20415 ** methods of the FTS3 virtual table.
20416 **
20417 ** The argv[] array contains the following:
20418 **
20419 ** argv[0] -> module name ("fts5vocab")
20420 ** argv[1] -> database name
20421 ** argv[2] -> table name
20422 **
20423 ** then:
20424 **
20425 ** argv[3] -> name of fts5 table
20426 ** argv[4] -> type of fts5vocab table
20427 **
20428 ** or, for tables in the TEMP schema only.
20429 **
20430 ** argv[3] -> name of fts5 tables database
20431 ** argv[4] -> name of fts5 table
20432 ** argv[5] -> type of fts5vocab table
20433 */
20434 static int fts5VocabInitVtab(
20435 sqlite3 *db, /* The SQLite database connection */
20436 void *pAux, /* Pointer to Fts5Global object */
20437 int argc, /* Number of elements in argv array */
20438 const char * const *argv, /* xCreate/xConnect argument array */
20439 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
20440 char **pzErr /* Write any error message here */
20441 ){
20442 const char *azSchema[] = {
20443 "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
20444 "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")"
20445 };
20446
20447 Fts5VocabTable *pRet = 0;
20448 int rc = SQLITE_OK; /* Return code */
20449 int bDb;
20450
20451 bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
20452
20453 if( argc!=5 && bDb==0 ){
20454 *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
20455 rc = SQLITE_ERROR;
20456 }else{
20457 int nByte; /* Bytes of space to allocate */
20458 const char *zDb = bDb ? argv[3] : argv[1];
20459 const char *zTab = bDb ? argv[4] : argv[3];
20460 const char *zType = bDb ? argv[5] : argv[4];
20461 int nDb = (int)strlen(zDb)+1;
20462 int nTab = (int)strlen(zTab)+1;
20463 int eType = 0;
20464
20465 rc = fts5VocabTableType(zType, pzErr, &eType);
20466 if( rc==SQLITE_OK ){
20467 assert( eType>=0 && eType<ArraySize(azSchema) );
20468 rc = sqlite3_declare_vtab(db, azSchema[eType]);
20469 }
20470
20471 nByte = sizeof(Fts5VocabTable) + nDb + nTab;
20472 pRet = sqlite3Fts5MallocZero(&rc, nByte);
20473 if( pRet ){
20474 pRet->pGlobal = (Fts5Global*)pAux;
20475 pRet->eType = eType;
20476 pRet->db = db;
20477 pRet->zFts5Tbl = (char*)&pRet[1];
20478 pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
20479 memcpy(pRet->zFts5Tbl, zTab, nTab);
20480 memcpy(pRet->zFts5Db, zDb, nDb);
20481 sqlite3Fts5Dequote(pRet->zFts5Tbl);
20482 sqlite3Fts5Dequote(pRet->zFts5Db);
20483 }
20484 }
20485
20486 *ppVTab = (sqlite3_vtab*)pRet;
20487 return rc;
20488 }
20489
20490
20491 /*
20492 ** The xConnect() and xCreate() methods for the virtual table. All the
20493 ** work is done in function fts5VocabInitVtab().
20494 */
20495 static int fts5VocabConnectMethod(
20496 sqlite3 *db, /* Database connection */
20497 void *pAux, /* Pointer to tokenizer hash table */
20498 int argc, /* Number of elements in argv array */
20499 const char * const *argv, /* xCreate/xConnect argument array */
20500 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
20501 char **pzErr /* OUT: sqlite3_malloc'd error message */
20502 ){
20503 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
20504 }
20505 static int fts5VocabCreateMethod(
20506 sqlite3 *db, /* Database connection */
20507 void *pAux, /* Pointer to tokenizer hash table */
20508 int argc, /* Number of elements in argv array */
20509 const char * const *argv, /* xCreate/xConnect argument array */
20510 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
20511 char **pzErr /* OUT: sqlite3_malloc'd error message */
20512 ){
20513 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
20514 }
20515
20516 /*
20517 ** Implementation of the xBestIndex method.
20518 */
20519 static int fts5VocabBestIndexMethod(
20520 sqlite3_vtab *pUnused,
20521 sqlite3_index_info *pInfo
20522 ){
20523 int i;
20524 int iTermEq = -1;
20525 int iTermGe = -1;
20526 int iTermLe = -1;
20527 int idxNum = 0;
20528 int nArg = 0;
20529
20530 UNUSED_PARAM(pUnused);
20531
20532 for(i=0; i<pInfo->nConstraint; i++){
20533 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
20534 if( p->usable==0 ) continue;
20535 if( p->iColumn==0 ){ /* term column */
20536 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
20537 if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
20538 if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
20539 if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
20540 if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
20541 }
20542 }
20543
20544 if( iTermEq>=0 ){
20545 idxNum |= FTS5_VOCAB_TERM_EQ;
20546 pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
20547 pInfo->estimatedCost = 100;
20548 }else{
20549 pInfo->estimatedCost = 1000000;
20550 if( iTermGe>=0 ){
20551 idxNum |= FTS5_VOCAB_TERM_GE;
20552 pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
20553 pInfo->estimatedCost = pInfo->estimatedCost / 2;
20554 }
20555 if( iTermLe>=0 ){
20556 idxNum |= FTS5_VOCAB_TERM_LE;
20557 pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
20558 pInfo->estimatedCost = pInfo->estimatedCost / 2;
20559 }
20560 }
20561
20562 /* This virtual table always delivers results in ascending order of
20563 ** the "term" column (column 0). So if the user has requested this
20564 ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
20565 ** sqlite3_index_info.orderByConsumed flag to tell the core the results
20566 ** are already in sorted order. */
20567 if( pInfo->nOrderBy==1
20568 && pInfo->aOrderBy[0].iColumn==0
20569 && pInfo->aOrderBy[0].desc==0
20570 ){
20571 pInfo->orderByConsumed = 1;
20572 }
20573
20574 pInfo->idxNum = idxNum;
20575 return SQLITE_OK;
20576 }
20577
20578 /*
20579 ** Implementation of xOpen method.
20580 */
20581 static int fts5VocabOpenMethod(
20582 sqlite3_vtab *pVTab,
20583 sqlite3_vtab_cursor **ppCsr
20584 ){
20585 Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
20586 Fts5Index *pIndex = 0;
20587 Fts5Config *pConfig = 0;
20588 Fts5VocabCursor *pCsr = 0;
20589 int rc = SQLITE_OK;
20590 sqlite3_stmt *pStmt = 0;
20591 char *zSql = 0;
20592
20593 zSql = sqlite3Fts5Mprintf(&rc,
20594 "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
20595 pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
20596 );
20597 if( zSql ){
20598 rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
20599 }
20600 sqlite3_free(zSql);
20601 assert( rc==SQLITE_OK || pStmt==0 );
20602 if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
20603
20604 if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
20605 i64 iId = sqlite3_column_int64(pStmt, 0);
20606 pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &pConfig);
20607 }
20608
20609 if( rc==SQLITE_OK && pIndex==0 ){
20610 rc = sqlite3_finalize(pStmt);
20611 pStmt = 0;
20612 if( rc==SQLITE_OK ){
20613 pVTab->zErrMsg = sqlite3_mprintf(
20614 "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
20615 );
20616 rc = SQLITE_ERROR;
20617 }
20618 }
20619
20620 if( rc==SQLITE_OK ){
20621 int nByte = pConfig->nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor);
20622 pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
20623 }
20624
20625 if( pCsr ){
20626 pCsr->pIndex = pIndex;
20627 pCsr->pStmt = pStmt;
20628 pCsr->pConfig = pConfig;
20629 pCsr->aCnt = (i64*)&pCsr[1];
20630 pCsr->aDoc = &pCsr->aCnt[pConfig->nCol];
20631 }else{
20632 sqlite3_finalize(pStmt);
20633 }
20634
20635 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
20636 return rc;
20637 }
20638
20639 static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
20640 pCsr->rowid = 0;
20641 sqlite3Fts5IterClose(pCsr->pIter);
20642 pCsr->pIter = 0;
20643 sqlite3_free(pCsr->zLeTerm);
20644 pCsr->nLeTerm = -1;
20645 pCsr->zLeTerm = 0;
20646 }
20647
20648 /*
20649 ** Close the cursor. For additional information see the documentation
20650 ** on the xClose method of the virtual table interface.
20651 */
20652 static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
20653 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20654 fts5VocabResetCursor(pCsr);
20655 sqlite3Fts5BufferFree(&pCsr->term);
20656 sqlite3_finalize(pCsr->pStmt);
20657 sqlite3_free(pCsr);
20658 return SQLITE_OK;
20659 }
20660
20661
20662 /*
20663 ** Advance the cursor to the next row in the table.
20664 */
20665 static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
20666 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20667 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
20668 int rc = SQLITE_OK;
20669 int nCol = pCsr->pConfig->nCol;
20670
20671 pCsr->rowid++;
20672
20673 if( pTab->eType==FTS5_VOCAB_COL ){
20674 for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
20675 if( pCsr->aDoc[pCsr->iCol] ) break;
20676 }
20677 }
20678
20679 if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){
20680 if( sqlite3Fts5IterEof(pCsr->pIter) ){
20681 pCsr->bEof = 1;
20682 }else{
20683 const char *zTerm;
20684 int nTerm;
20685
20686 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
20687 if( pCsr->nLeTerm>=0 ){
20688 int nCmp = MIN(nTerm, pCsr->nLeTerm);
20689 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
20690 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
20691 pCsr->bEof = 1;
20692 return SQLITE_OK;
20693 }
20694 }
20695
20696 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
20697 memset(pCsr->aCnt, 0, nCol * sizeof(i64));
20698 memset(pCsr->aDoc, 0, nCol * sizeof(i64));
20699 pCsr->iCol = 0;
20700
20701 assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
20702 while( rc==SQLITE_OK ){
20703 const u8 *pPos; int nPos; /* Position list */
20704 i64 iPos = 0; /* 64-bit position read from poslist */
20705 int iOff = 0; /* Current offset within position list */
20706
20707 pPos = pCsr->pIter->pData;
20708 nPos = pCsr->pIter->nData;
20709 switch( pCsr->pConfig->eDetail ){
20710 case FTS5_DETAIL_FULL:
20711 pPos = pCsr->pIter->pData;
20712 nPos = pCsr->pIter->nData;
20713 if( pTab->eType==FTS5_VOCAB_ROW ){
20714 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
20715 pCsr->aCnt[0]++;
20716 }
20717 pCsr->aDoc[0]++;
20718 }else{
20719 int iCol = -1;
20720 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
20721 int ii = FTS5_POS2COLUMN(iPos);
20722 pCsr->aCnt[ii]++;
20723 if( iCol!=ii ){
20724 if( ii>=nCol ){
20725 rc = FTS5_CORRUPT;
20726 break;
20727 }
20728 pCsr->aDoc[ii]++;
20729 iCol = ii;
20730 }
20731 }
20732 }
20733 break;
20734
20735 case FTS5_DETAIL_COLUMNS:
20736 if( pTab->eType==FTS5_VOCAB_ROW ){
20737 pCsr->aDoc[0]++;
20738 }else{
20739 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
20740 assert_nc( iPos>=0 && iPos<nCol );
20741 if( iPos>=nCol ){
20742 rc = FTS5_CORRUPT;
20743 break;
20744 }
20745 pCsr->aDoc[iPos]++;
20746 }
20747 }
20748 break;
20749
20750 default:
20751 assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE );
20752 pCsr->aDoc[0]++;
20753 break;
20754 }
20755
20756 if( rc==SQLITE_OK ){
20757 rc = sqlite3Fts5IterNextScan(pCsr->pIter);
20758 }
20759
20760 if( rc==SQLITE_OK ){
20761 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
20762 if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ){
20763 break;
20764 }
20765 if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
20766 }
20767 }
20768 }
20769 }
20770
20771 if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
20772 while( pCsr->aDoc[pCsr->iCol]==0 ) pCsr->iCol++;
20773 assert( pCsr->iCol<pCsr->pConfig->nCol );
20774 }
20775 return rc;
20776 }
20777
20778 /*
20779 ** This is the xFilter implementation for the virtual table.
20780 */
20781 static int fts5VocabFilterMethod(
20782 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
20783 int idxNum, /* Strategy index */
20784 const char *zUnused, /* Unused */
20785 int nUnused, /* Number of elements in apVal */
20786 sqlite3_value **apVal /* Arguments for the indexing scheme */
20787 ){
20788 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20789 int rc = SQLITE_OK;
20790
20791 int iVal = 0;
20792 int f = FTS5INDEX_QUERY_SCAN;
20793 const char *zTerm = 0;
20794 int nTerm = 0;
20795
20796 sqlite3_value *pEq = 0;
20797 sqlite3_value *pGe = 0;
20798 sqlite3_value *pLe = 0;
20799
20800 UNUSED_PARAM2(zUnused, nUnused);
20801
20802 fts5VocabResetCursor(pCsr);
20803 if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
20804 if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
20805 if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
20806
20807 if( pEq ){
20808 zTerm = (const char *)sqlite3_value_text(pEq);
20809 nTerm = sqlite3_value_bytes(pEq);
20810 f = 0;
20811 }else{
20812 if( pGe ){
20813 zTerm = (const char *)sqlite3_value_text(pGe);
20814 nTerm = sqlite3_value_bytes(pGe);
20815 }
20816 if( pLe ){
20817 const char *zCopy = (const char *)sqlite3_value_text(pLe);
20818 pCsr->nLeTerm = sqlite3_value_bytes(pLe);
20819 pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
20820 if( pCsr->zLeTerm==0 ){
20821 rc = SQLITE_NOMEM;
20822 }else{
20823 memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
20824 }
20825 }
20826 }
20827
20828
20829 if( rc==SQLITE_OK ){
20830 rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
20831 }
20832 if( rc==SQLITE_OK ){
20833 rc = fts5VocabNextMethod(pCursor);
20834 }
20835
20836 return rc;
20837 }
20838
20839 /*
20840 ** This is the xEof method of the virtual table. SQLite calls this
20841 ** routine to find out if it has reached the end of a result set.
20842 */
20843 static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
20844 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20845 return pCsr->bEof;
20846 }
20847
20848 static int fts5VocabColumnMethod(
20849 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
20850 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
20851 int iCol /* Index of column to read value from */
20852 ){
20853 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20854 int eDetail = pCsr->pConfig->eDetail;
20855 int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
20856 i64 iVal = 0;
20857
20858 if( iCol==0 ){
20859 sqlite3_result_text(
20860 pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
20861 );
20862 }else if( eType==FTS5_VOCAB_COL ){
20863 assert( iCol==1 || iCol==2 || iCol==3 );
20864 if( iCol==1 ){
20865 if( eDetail!=FTS5_DETAIL_NONE ){
20866 const char *z = pCsr->pConfig->azCol[pCsr->iCol];
20867 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
20868 }
20869 }else if( iCol==2 ){
20870 iVal = pCsr->aDoc[pCsr->iCol];
20871 }else{
20872 iVal = pCsr->aCnt[pCsr->iCol];
20873 }
20874 }else{
20875 assert( iCol==1 || iCol==2 );
20876 if( iCol==1 ){
20877 iVal = pCsr->aDoc[0];
20878 }else{
20879 iVal = pCsr->aCnt[0];
20880 }
20881 }
20882
20883 if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
20884 return SQLITE_OK;
20885 }
20886
20887 /*
20888 ** This is the xRowid method. The SQLite core calls this routine to
20889 ** retrieve the rowid for the current row of the result set. The
20890 ** rowid should be written to *pRowid.
20891 */
20892 static int fts5VocabRowidMethod(
20893 sqlite3_vtab_cursor *pCursor,
20894 sqlite_int64 *pRowid
20895 ){
20896 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
20897 *pRowid = pCsr->rowid;
20898 return SQLITE_OK;
20899 }
20900
20901 static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
20902 static const sqlite3_module fts5Vocab = {
20903 /* iVersion */ 2,
20904 /* xCreate */ fts5VocabCreateMethod,
20905 /* xConnect */ fts5VocabConnectMethod,
20906 /* xBestIndex */ fts5VocabBestIndexMethod,
20907 /* xDisconnect */ fts5VocabDisconnectMethod,
20908 /* xDestroy */ fts5VocabDestroyMethod,
20909 /* xOpen */ fts5VocabOpenMethod,
20910 /* xClose */ fts5VocabCloseMethod,
20911 /* xFilter */ fts5VocabFilterMethod,
20912 /* xNext */ fts5VocabNextMethod,
20913 /* xEof */ fts5VocabEofMethod,
20914 /* xColumn */ fts5VocabColumnMethod,
20915 /* xRowid */ fts5VocabRowidMethod,
20916 /* xUpdate */ 0,
20917 /* xBegin */ 0,
20918 /* xSync */ 0,
20919 /* xCommit */ 0,
20920 /* xRollback */ 0,
20921 /* xFindFunction */ 0,
20922 /* xRename */ 0,
20923 /* xSavepoint */ 0,
20924 /* xRelease */ 0,
20925 /* xRollbackTo */ 0,
20926 };
20927 void *p = (void*)pGlobal;
20928
20929 return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
20930 }
20931
20932
20933
20934
20935
20936 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */
20937
20938 /************** End of fts5.c ************************************************/
OLDNEW
« no previous file with comments | « third_party/sqlite/amalgamation/sqlite3.08.c ('k') | third_party/sqlite/split.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698