Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(160)

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_index.c

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
17 */
18
19
20 #include "fts5Int.h"
21
22 /*
23 ** Overview:
24 **
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
28 **
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
34 **
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
39 **
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
42 **
43 */
44
45
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
48
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
50
51 #define FTS5_MAIN_PREFIX '0'
52
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
56
57 /*
58 ** Details:
59 **
60 ** The %_data table managed by this module,
61 **
62 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
63 **
64 ** , contains the following 5 types of records. See the comments surrounding
65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
66 ** assigned to each fo them.
67 **
68 ** 1. Structure Records:
69 **
70 ** The set of segments that make up an index - the index structure - are
71 ** recorded in a single record within the %_data table. The record consists
72 ** of a single 32-bit configuration cookie value followed by a list of
73 ** SQLite varints. If the FTS table features more than one index (because
74 ** there are one or more prefix indexes), it is guaranteed that all share
75 ** the same cookie value.
76 **
77 ** Immediately following the configuration cookie, the record begins with
78 ** three varints:
79 **
80 ** + number of levels,
81 ** + total number of segments on all levels,
82 ** + value of write counter.
83 **
84 ** Then, for each level from 0 to nMax:
85 **
86 ** + number of input segments in ongoing merge.
87 ** + total number of segments in level.
88 ** + for each segment from oldest to newest:
89 ** + segment id (always > 0)
90 ** + first leaf page number (often 1, always greater than 0)
91 ** + final leaf page number
92 **
93 ** 2. The Averages Record:
94 **
95 ** A single record within the %_data table. The data is a list of varints.
96 ** The first value is the number of rows in the index. Then, for each column
97 ** from left to right, the total number of tokens in the column for all
98 ** rows of the table.
99 **
100 ** 3. Segment leaves:
101 **
102 ** TERM/DOCLIST FORMAT:
103 **
104 ** Most of each segment leaf is taken up by term/doclist data. The
105 ** general format of term/doclist, starting with the first term
106 ** on the leaf page, is:
107 **
108 ** varint : size of first term
109 ** blob: first term data
110 ** doclist: first doclist
111 ** zero-or-more {
112 ** varint: number of bytes in common with previous term
113 ** varint: number of bytes of new term data (nNew)
114 ** blob: nNew bytes of new term data
115 ** doclist: next doclist
116 ** }
117 **
118 ** doclist format:
119 **
120 ** varint: first rowid
121 ** poslist: first poslist
122 ** zero-or-more {
123 ** varint: rowid delta (always > 0)
124 ** poslist: next poslist
125 ** }
126 **
127 ** poslist format:
128 **
129 ** varint: size of poslist in bytes multiplied by 2, not including
130 ** this field. Plus 1 if this entry carries the "delete" flag.
131 ** collist: collist for column 0
132 ** zero-or-more {
133 ** 0x01 byte
134 ** varint: column number (I)
135 ** collist: collist for column I
136 ** }
137 **
138 ** collist format:
139 **
140 ** varint: first offset + 2
141 ** zero-or-more {
142 ** varint: offset delta + 2
143 ** }
144 **
145 ** PAGE FORMAT
146 **
147 ** Each leaf page begins with a 4-byte header containing 2 16-bit
148 ** unsigned integer fields in big-endian format. They are:
149 **
150 ** * The byte offset of the first rowid on the page, if it exists
151 ** and occurs before the first term (otherwise 0).
152 **
153 ** * The byte offset of the start of the page footer. If the page
154 ** footer is 0 bytes in size, then this field is the same as the
155 ** size of the leaf page in bytes.
156 **
157 ** The page footer consists of a single varint for each term located
158 ** on the page. Each varint is the byte offset of the current term
159 ** within the page, delta-compressed against the previous value. In
160 ** other words, the first varint in the footer is the byte offset of
161 ** the first term, the second is the byte offset of the second less that
162 ** of the first, and so on.
163 **
164 ** The term/doclist format described above is accurate if the entire
165 ** term/doclist data fits on a single leaf page. If this is not the case,
166 ** the format is changed in two ways:
167 **
168 ** + if the first rowid on a page occurs before the first term, it
169 ** is stored as a literal value:
170 **
171 ** varint: first rowid
172 **
173 ** + the first term on each page is stored in the same way as the
174 ** very first term of the segment:
175 **
176 ** varint : size of first term
177 ** blob: first term data
178 **
179 ** 5. Segment doclist indexes:
180 **
181 ** Doclist indexes are themselves b-trees, however they usually consist of
182 ** a single leaf record only. The format of each doclist index leaf page
183 ** is:
184 **
185 ** * Flags byte. Bits are:
186 ** 0x01: Clear if leaf is also the root page, otherwise set.
187 **
188 ** * Page number of fts index leaf page. As a varint.
189 **
190 ** * First rowid on page indicated by previous field. As a varint.
191 **
192 ** * A list of varints, one for each subsequent termless page. A
193 ** positive delta if the termless page contains at least one rowid,
194 ** or an 0x00 byte otherwise.
195 **
196 ** Internal doclist index nodes are:
197 **
198 ** * Flags byte. Bits are:
199 ** 0x01: Clear for root page, otherwise set.
200 **
201 ** * Page number of first child page. As a varint.
202 **
203 ** * Copy of first rowid on page indicated by previous field. As a varint.
204 **
205 ** * A list of delta-encoded varints - the first rowid on each subsequent
206 ** child page.
207 **
208 */
209
210 /*
211 ** Rowids for the averages and structure records in the %_data table.
212 */
213 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
214 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
215
216 /*
217 ** Macros determining the rowids used by segment leaves and dlidx leaves
218 ** and nodes. All nodes and leaves are stored in the %_data table with large
219 ** positive rowids.
220 **
221 ** Each segment has a unique non-zero 16-bit id.
222 **
223 ** The rowid for each segment leaf is found by passing the segment id and
224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
225 ** sequentially starting from 1.
226 */
227 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
228 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
229 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
230 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
231
232 #define fts5_dri(segid, dlidx, height, pgno) ( \
233 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
234 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
235 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
236 ((i64)(pgno)) \
237 )
238
239 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
241
242 /*
243 ** Maximum segments permitted in a single index
244 */
245 #define FTS5_MAX_SEGMENT 2000
246
247 #ifdef SQLITE_DEBUG
248 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
249 #endif
250
251
252 /*
253 ** Each time a blob is read from the %_data table, it is padded with this
254 ** many zero bytes. This makes it easier to decode the various record formats
255 ** without overreading if the records are corrupt.
256 */
257 #define FTS5_DATA_ZERO_PADDING 8
258 #define FTS5_DATA_PADDING 20
259
260 typedef struct Fts5Data Fts5Data;
261 typedef struct Fts5DlidxIter Fts5DlidxIter;
262 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
263 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
264 typedef struct Fts5PageWriter Fts5PageWriter;
265 typedef struct Fts5SegIter Fts5SegIter;
266 typedef struct Fts5DoclistIter Fts5DoclistIter;
267 typedef struct Fts5SegWriter Fts5SegWriter;
268 typedef struct Fts5Structure Fts5Structure;
269 typedef struct Fts5StructureLevel Fts5StructureLevel;
270 typedef struct Fts5StructureSegment Fts5StructureSegment;
271
272 struct Fts5Data {
273 u8 *p; /* Pointer to buffer containing record */
274 int nn; /* Size of record in bytes */
275 int szLeaf; /* Size of leaf without page-index */
276 };
277
278 /*
279 ** One object per %_data table.
280 */
281 struct Fts5Index {
282 Fts5Config *pConfig; /* Virtual table configuration */
283 char *zDataTbl; /* Name of %_data table */
284 int nWorkUnit; /* Leaf pages in a "unit" of work */
285
286 /*
287 ** Variables related to the accumulation of tokens and doclists within the
288 ** in-memory hash tables before they are flushed to disk.
289 */
290 Fts5Hash *pHash; /* Hash table for in-memory data */
291 int nPendingData; /* Current bytes of pending data */
292 i64 iWriteRowid; /* Rowid for current doc being written */
293 int bDelete; /* Current write is a delete */
294
295 /* Error state. */
296 int rc; /* Current error code */
297
298 /* State used by the fts5DataXXX() functions. */
299 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
300 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
301 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
302 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
303 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
304 sqlite3_stmt *pIdxSelect;
305 int nRead; /* Total number of blocks read */
306 };
307
308 struct Fts5DoclistIter {
309 u8 *aEof; /* Pointer to 1 byte past end of doclist */
310
311 /* Output variables. aPoslist==0 at EOF */
312 i64 iRowid;
313 u8 *aPoslist;
314 int nPoslist;
315 int nSize;
316 };
317
318 /*
319 ** The contents of the "structure" record for each index are represented
320 ** using an Fts5Structure record in memory. Which uses instances of the
321 ** other Fts5StructureXXX types as components.
322 */
323 struct Fts5StructureSegment {
324 int iSegid; /* Segment id */
325 int pgnoFirst; /* First leaf page number in segment */
326 int pgnoLast; /* Last leaf page number in segment */
327 };
328 struct Fts5StructureLevel {
329 int nMerge; /* Number of segments in incr-merge */
330 int nSeg; /* Total number of segments on level */
331 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
332 };
333 struct Fts5Structure {
334 int nRef; /* Object reference count */
335 u64 nWriteCounter; /* Total leaves written to level 0 */
336 int nSegment; /* Total segments in this structure */
337 int nLevel; /* Number of levels in this index */
338 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
339 };
340
341 /*
342 ** An object of type Fts5SegWriter is used to write to segments.
343 */
344 struct Fts5PageWriter {
345 int pgno; /* Page number for this page */
346 int iPrevPgidx; /* Previous value written into pgidx */
347 Fts5Buffer buf; /* Buffer containing leaf data */
348 Fts5Buffer pgidx; /* Buffer containing page-index */
349 Fts5Buffer term; /* Buffer containing previous term on page */
350 };
351 struct Fts5DlidxWriter {
352 int pgno; /* Page number for this page */
353 int bPrevValid; /* True if iPrev is valid */
354 i64 iPrev; /* Previous rowid value written to page */
355 Fts5Buffer buf; /* Buffer containing page data */
356 };
357 struct Fts5SegWriter {
358 int iSegid; /* Segid to write to */
359 Fts5PageWriter writer; /* PageWriter object */
360 i64 iPrevRowid; /* Previous rowid written to current leaf */
361 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
362 u8 bFirstRowidInPage; /* True if next rowid is first in page */
363 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
364 u8 bFirstTermInPage; /* True if next term will be first in leaf */
365 int nLeafWritten; /* Number of leaf pages written */
366 int nEmpty; /* Number of contiguous term-less nodes */
367
368 int nDlidx; /* Allocated size of aDlidx[] array */
369 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
370
371 /* Values to insert into the %_idx table */
372 Fts5Buffer btterm; /* Next term to insert into %_idx table */
373 int iBtPage; /* Page number corresponding to btterm */
374 };
375
376 typedef struct Fts5CResult Fts5CResult;
377 struct Fts5CResult {
378 u16 iFirst; /* aSeg[] index of firstest iterator */
379 u8 bTermEq; /* True if the terms are equal */
380 };
381
382 /*
383 ** Object for iterating through a single segment, visiting each term/rowid
384 ** pair in the segment.
385 **
386 ** pSeg:
387 ** The segment to iterate through.
388 **
389 ** iLeafPgno:
390 ** Current leaf page number within segment.
391 **
392 ** iLeafOffset:
393 ** Byte offset within the current leaf that is the first byte of the
394 ** position list data (one byte passed the position-list size field).
395 ** rowid field of the current entry. Usually this is the size field of the
396 ** position list data. The exception is if the rowid for the current entry
397 ** is the last thing on the leaf page.
398 **
399 ** pLeaf:
400 ** Buffer containing current leaf page data. Set to NULL at EOF.
401 **
402 ** iTermLeafPgno, iTermLeafOffset:
403 ** Leaf page number containing the last term read from the segment. And
404 ** the offset immediately following the term data.
405 **
406 ** flags:
407 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
408 **
409 ** FTS5_SEGITER_ONETERM:
410 ** If set, set the iterator to point to EOF after the current doclist
411 ** has been exhausted. Do not proceed to the next term in the segment.
412 **
413 ** FTS5_SEGITER_REVERSE:
414 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
415 ** it is set, iterate through rowid in descending order instead of the
416 ** default ascending order.
417 **
418 ** iRowidOffset/nRowidOffset/aRowidOffset:
419 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
420 **
421 ** For each rowid on the page corresponding to the current term, the
422 ** corresponding aRowidOffset[] entry is set to the byte offset of the
423 ** start of the "position-list-size" field within the page.
424 **
425 ** iTermIdx:
426 ** Index of current term on iTermLeafPgno.
427 */
428 struct Fts5SegIter {
429 Fts5StructureSegment *pSeg; /* Segment to iterate through */
430 int flags; /* Mask of configuration flags */
431 int iLeafPgno; /* Current leaf page number */
432 Fts5Data *pLeaf; /* Current leaf data */
433 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
434 int iLeafOffset; /* Byte offset within current leaf */
435
436 /* The page and offset from which the current term was read. The offset
437 ** is the offset of the first rowid in the current doclist. */
438 int iTermLeafPgno;
439 int iTermLeafOffset;
440
441 int iPgidxOff; /* Next offset in pgidx */
442 int iEndofDoclist;
443
444 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
445 int iRowidOffset; /* Current entry in aRowidOffset[] */
446 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
447 int *aRowidOffset; /* Array of offset to rowid fields */
448
449 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
450
451 /* Variables populated based on current entry. */
452 Fts5Buffer term; /* Current term */
453 i64 iRowid; /* Current rowid */
454 int nPos; /* Number of bytes in current position list */
455 int bDel; /* True if the delete flag is set */
456 };
457
458 /*
459 ** Argument is a pointer to an Fts5Data structure that contains a
460 ** leaf page.
461 */
462 #define ASSERT_SZLEAF_OK(x) assert( \
463 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
464 )
465
466 #define FTS5_SEGITER_ONETERM 0x01
467 #define FTS5_SEGITER_REVERSE 0x02
468
469
470 /*
471 ** Argument is a pointer to an Fts5Data structure that contains a leaf
472 ** page. This macro evaluates to true if the leaf contains no terms, or
473 ** false if it contains at least one term.
474 */
475 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
476
477 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
478
479 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
480
481 /*
482 ** Object for iterating through the merged results of one or more segments,
483 ** visiting each term/rowid pair in the merged data.
484 **
485 ** nSeg is always a power of two greater than or equal to the number of
486 ** segments that this object is merging data from. Both the aSeg[] and
487 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
488 ** with zeroed objects - these are handled as if they were iterators opened
489 ** on empty segments.
490 **
491 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
492 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
493 ** comparison in this context is the index of the iterator that currently
494 ** points to the smaller term/rowid combination. Iterators at EOF are
495 ** considered to be greater than all other iterators.
496 **
497 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
498 ** the smallest key overall. aFirst[0] is unused.
499 **
500 ** poslist:
501 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
502 ** There is no way to tell if this is populated or not.
503 */
504 struct Fts5IndexIter {
505 Fts5Index *pIndex; /* Index that owns this iterator */
506 Fts5Structure *pStruct; /* Database structure for this iterator */
507 Fts5Buffer poslist; /* Buffer containing current poslist */
508
509 int nSeg; /* Size of aSeg[] array */
510 int bRev; /* True to iterate in reverse order */
511 u8 bSkipEmpty; /* True to skip deleted entries */
512 u8 bEof; /* True at EOF */
513 u8 bFiltered; /* True if column-filter already applied */
514
515 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
516 Fts5CResult *aFirst; /* Current merge state (see above) */
517 Fts5SegIter aSeg[1]; /* Array of segment iterators */
518 };
519
520
521 /*
522 ** An instance of the following type is used to iterate through the contents
523 ** of a doclist-index record.
524 **
525 ** pData:
526 ** Record containing the doclist-index data.
527 **
528 ** bEof:
529 ** Set to true once iterator has reached EOF.
530 **
531 ** iOff:
532 ** Set to the current offset within record pData.
533 */
534 struct Fts5DlidxLvl {
535 Fts5Data *pData; /* Data for current page of this level */
536 int iOff; /* Current offset into pData */
537 int bEof; /* At EOF already */
538 int iFirstOff; /* Used by reverse iterators */
539
540 /* Output variables */
541 int iLeafPgno; /* Page number of current leaf page */
542 i64 iRowid; /* First rowid on leaf iLeafPgno */
543 };
544 struct Fts5DlidxIter {
545 int nLvl;
546 int iSegid;
547 Fts5DlidxLvl aLvl[1];
548 };
549
550 static void fts5PutU16(u8 *aOut, u16 iVal){
551 aOut[0] = (iVal>>8);
552 aOut[1] = (iVal&0xFF);
553 }
554
555 static u16 fts5GetU16(const u8 *aIn){
556 return ((u16)aIn[0] << 8) + aIn[1];
557 }
558
559 /*
560 ** Allocate and return a buffer at least nByte bytes in size.
561 **
562 ** If an OOM error is encountered, return NULL and set the error code in
563 ** the Fts5Index handle passed as the first argument.
564 */
565 static void *fts5IdxMalloc(Fts5Index *p, int nByte){
566 return sqlite3Fts5MallocZero(&p->rc, nByte);
567 }
568
569 /*
570 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
571 **
572 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
573 ** +ve if pRight is smaller than pLeft. In other words:
574 **
575 ** res = *pLeft - *pRight
576 */
577 #ifdef SQLITE_DEBUG
578 static int fts5BufferCompareBlob(
579 Fts5Buffer *pLeft, /* Left hand side of comparison */
580 const u8 *pRight, int nRight /* Right hand side of comparison */
581 ){
582 int nCmp = MIN(pLeft->n, nRight);
583 int res = memcmp(pLeft->p, pRight, nCmp);
584 return (res==0 ? (pLeft->n - nRight) : res);
585 }
586 #endif
587
588 /*
589 ** Compare the contents of the two buffers using memcmp(). If one buffer
590 ** is a prefix of the other, it is considered the lesser.
591 **
592 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
593 ** +ve if pRight is smaller than pLeft. In other words:
594 **
595 ** res = *pLeft - *pRight
596 */
597 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
598 int nCmp = MIN(pLeft->n, pRight->n);
599 int res = memcmp(pLeft->p, pRight->p, nCmp);
600 return (res==0 ? (pLeft->n - pRight->n) : res);
601 }
602
603 #ifdef SQLITE_DEBUG
604 static int fts5BlobCompare(
605 const u8 *pLeft, int nLeft,
606 const u8 *pRight, int nRight
607 ){
608 int nCmp = MIN(nLeft, nRight);
609 int res = memcmp(pLeft, pRight, nCmp);
610 return (res==0 ? (nLeft - nRight) : res);
611 }
612 #endif
613
614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
615 int ret;
616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
617 return ret;
618 }
619
620 /*
621 ** Close the read-only blob handle, if it is open.
622 */
623 static void fts5CloseReader(Fts5Index *p){
624 if( p->pReader ){
625 sqlite3_blob *pReader = p->pReader;
626 p->pReader = 0;
627 sqlite3_blob_close(pReader);
628 }
629 }
630
631
632 /*
633 ** Retrieve a record from the %_data table.
634 **
635 ** If an error occurs, NULL is returned and an error left in the
636 ** Fts5Index object.
637 */
638 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
639 Fts5Data *pRet = 0;
640 if( p->rc==SQLITE_OK ){
641 int rc = SQLITE_OK;
642
643 if( p->pReader ){
644 /* This call may return SQLITE_ABORT if there has been a savepoint
645 ** rollback since it was last used. In this case a new blob handle
646 ** is required. */
647 sqlite3_blob *pBlob = p->pReader;
648 p->pReader = 0;
649 rc = sqlite3_blob_reopen(pBlob, iRowid);
650 assert( p->pReader==0 );
651 p->pReader = pBlob;
652 if( rc!=SQLITE_OK ){
653 fts5CloseReader(p);
654 }
655 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
656 }
657
658 /* If the blob handle is not open at this point, open it and seek
659 ** to the requested entry. */
660 if( p->pReader==0 && rc==SQLITE_OK ){
661 Fts5Config *pConfig = p->pConfig;
662 rc = sqlite3_blob_open(pConfig->db,
663 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
664 );
665 }
666
667 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
668 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
669 ** All the reasons those functions might return SQLITE_ERROR - missing
670 ** table, missing row, non-blob/text in block column - indicate
671 ** backing store corruption. */
672 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
673
674 if( rc==SQLITE_OK ){
675 u8 *aOut = 0; /* Read blob data into this buffer */
676 int nByte = sqlite3_blob_bytes(p->pReader);
677 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
678 pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
679 if( pRet ){
680 pRet->nn = nByte;
681 aOut = pRet->p = (u8*)&pRet[1];
682 }else{
683 rc = SQLITE_NOMEM;
684 }
685
686 if( rc==SQLITE_OK ){
687 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
688 }
689 if( rc!=SQLITE_OK ){
690 sqlite3_free(pRet);
691 pRet = 0;
692 }else{
693 /* TODO1: Fix this */
694 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
695 }
696 }
697 p->rc = rc;
698 p->nRead++;
699 }
700
701 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
702 return pRet;
703 }
704
705 /*
706 ** Release a reference to data record returned by an earlier call to
707 ** fts5DataRead().
708 */
709 static void fts5DataRelease(Fts5Data *pData){
710 sqlite3_free(pData);
711 }
712
713 static int fts5IndexPrepareStmt(
714 Fts5Index *p,
715 sqlite3_stmt **ppStmt,
716 char *zSql
717 ){
718 if( p->rc==SQLITE_OK ){
719 if( zSql ){
720 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
721 }else{
722 p->rc = SQLITE_NOMEM;
723 }
724 }
725 sqlite3_free(zSql);
726 return p->rc;
727 }
728
729
730 /*
731 ** INSERT OR REPLACE a record into the %_data table.
732 */
733 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
734 if( p->rc!=SQLITE_OK ) return;
735
736 if( p->pWriter==0 ){
737 Fts5Config *pConfig = p->pConfig;
738 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
739 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
740 pConfig->zDb, pConfig->zName
741 ));
742 if( p->rc ) return;
743 }
744
745 sqlite3_bind_int64(p->pWriter, 1, iRowid);
746 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
747 sqlite3_step(p->pWriter);
748 p->rc = sqlite3_reset(p->pWriter);
749 }
750
751 /*
752 ** Execute the following SQL:
753 **
754 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
755 */
756 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
757 if( p->rc!=SQLITE_OK ) return;
758
759 if( p->pDeleter==0 ){
760 int rc;
761 Fts5Config *pConfig = p->pConfig;
762 char *zSql = sqlite3_mprintf(
763 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
764 pConfig->zDb, pConfig->zName
765 );
766 if( zSql==0 ){
767 rc = SQLITE_NOMEM;
768 }else{
769 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0);
770 sqlite3_free(zSql);
771 }
772 if( rc!=SQLITE_OK ){
773 p->rc = rc;
774 return;
775 }
776 }
777
778 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
779 sqlite3_bind_int64(p->pDeleter, 2, iLast);
780 sqlite3_step(p->pDeleter);
781 p->rc = sqlite3_reset(p->pDeleter);
782 }
783
784 /*
785 ** Remove all records associated with segment iSegid.
786 */
787 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
788 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
789 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
790 fts5DataDelete(p, iFirst, iLast);
791 if( p->pIdxDeleter==0 ){
792 Fts5Config *pConfig = p->pConfig;
793 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
794 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
795 pConfig->zDb, pConfig->zName
796 ));
797 }
798 if( p->rc==SQLITE_OK ){
799 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
800 sqlite3_step(p->pIdxDeleter);
801 p->rc = sqlite3_reset(p->pIdxDeleter);
802 }
803 }
804
805 /*
806 ** Release a reference to an Fts5Structure object returned by an earlier
807 ** call to fts5StructureRead() or fts5StructureDecode().
808 */
809 static void fts5StructureRelease(Fts5Structure *pStruct){
810 if( pStruct && 0>=(--pStruct->nRef) ){
811 int i;
812 assert( pStruct->nRef==0 );
813 for(i=0; i<pStruct->nLevel; i++){
814 sqlite3_free(pStruct->aLevel[i].aSeg);
815 }
816 sqlite3_free(pStruct);
817 }
818 }
819
820 static void fts5StructureRef(Fts5Structure *pStruct){
821 pStruct->nRef++;
822 }
823
824 /*
825 ** Deserialize and return the structure record currently stored in serialized
826 ** form within buffer pData/nData.
827 **
828 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
829 ** are over-allocated by one slot. This allows the structure contents
830 ** to be more easily edited.
831 **
832 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
833 ** returned. Otherwise, *ppOut is set to point to the new object and
834 ** SQLITE_OK returned.
835 */
836 static int fts5StructureDecode(
837 const u8 *pData, /* Buffer containing serialized structure */
838 int nData, /* Size of buffer pData in bytes */
839 int *piCookie, /* Configuration cookie value */
840 Fts5Structure **ppOut /* OUT: Deserialized object */
841 ){
842 int rc = SQLITE_OK;
843 int i = 0;
844 int iLvl;
845 int nLevel = 0;
846 int nSegment = 0;
847 int nByte; /* Bytes of space to allocate at pRet */
848 Fts5Structure *pRet = 0; /* Structure object to return */
849
850 /* Grab the cookie value */
851 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
852 i = 4;
853
854 /* Read the total number of levels and segments from the start of the
855 ** structure record. */
856 i += fts5GetVarint32(&pData[i], nLevel);
857 i += fts5GetVarint32(&pData[i], nSegment);
858 nByte = (
859 sizeof(Fts5Structure) + /* Main structure */
860 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
861 );
862 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
863
864 if( pRet ){
865 pRet->nRef = 1;
866 pRet->nLevel = nLevel;
867 pRet->nSegment = nSegment;
868 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
869
870 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
871 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
872 int nTotal;
873 int iSeg;
874
875 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
876 i += fts5GetVarint32(&pData[i], nTotal);
877 assert( nTotal>=pLvl->nMerge );
878 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
879 nTotal * sizeof(Fts5StructureSegment)
880 );
881
882 if( rc==SQLITE_OK ){
883 pLvl->nSeg = nTotal;
884 for(iSeg=0; iSeg<nTotal; iSeg++){
885 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
886 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
887 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
888 }
889 }else{
890 fts5StructureRelease(pRet);
891 pRet = 0;
892 }
893 }
894 }
895
896 *ppOut = pRet;
897 return rc;
898 }
899
900 /*
901 **
902 */
903 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
904 if( *pRc==SQLITE_OK ){
905 Fts5Structure *pStruct = *ppStruct;
906 int nLevel = pStruct->nLevel;
907 int nByte = (
908 sizeof(Fts5Structure) + /* Main structure */
909 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
910 );
911
912 pStruct = sqlite3_realloc(pStruct, nByte);
913 if( pStruct ){
914 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
915 pStruct->nLevel++;
916 *ppStruct = pStruct;
917 }else{
918 *pRc = SQLITE_NOMEM;
919 }
920 }
921 }
922
923 /*
924 ** Extend level iLvl so that there is room for at least nExtra more
925 ** segments.
926 */
927 static void fts5StructureExtendLevel(
928 int *pRc,
929 Fts5Structure *pStruct,
930 int iLvl,
931 int nExtra,
932 int bInsert
933 ){
934 if( *pRc==SQLITE_OK ){
935 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
936 Fts5StructureSegment *aNew;
937 int nByte;
938
939 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
940 aNew = sqlite3_realloc(pLvl->aSeg, nByte);
941 if( aNew ){
942 if( bInsert==0 ){
943 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
944 }else{
945 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
946 memmove(&aNew[nExtra], aNew, nMove);
947 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
948 }
949 pLvl->aSeg = aNew;
950 }else{
951 *pRc = SQLITE_NOMEM;
952 }
953 }
954 }
955
956 /*
957 ** Read, deserialize and return the structure record.
958 **
959 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
960 ** are over-allocated as described for function fts5StructureDecode()
961 ** above.
962 **
963 ** If an error occurs, NULL is returned and an error code left in the
964 ** Fts5Index handle. If an error has already occurred when this function
965 ** is called, it is a no-op.
966 */
967 static Fts5Structure *fts5StructureRead(Fts5Index *p){
968 Fts5Config *pConfig = p->pConfig;
969 Fts5Structure *pRet = 0; /* Object to return */
970 int iCookie; /* Configuration cookie */
971 Fts5Data *pData;
972
973 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
974 if( p->rc ) return 0;
975 /* TODO: Do we need this if the leaf-index is appended? Probably... */
976 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
977 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
978 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
979 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
980 }
981
982 fts5DataRelease(pData);
983 if( p->rc!=SQLITE_OK ){
984 fts5StructureRelease(pRet);
985 pRet = 0;
986 }
987 return pRet;
988 }
989
990 /*
991 ** Return the total number of segments in index structure pStruct. This
992 ** function is only ever used as part of assert() conditions.
993 */
994 #ifdef SQLITE_DEBUG
995 static int fts5StructureCountSegments(Fts5Structure *pStruct){
996 int nSegment = 0; /* Total number of segments */
997 if( pStruct ){
998 int iLvl; /* Used to iterate through levels */
999 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1000 nSegment += pStruct->aLevel[iLvl].nSeg;
1001 }
1002 }
1003
1004 return nSegment;
1005 }
1006 #endif
1007
1008 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1009 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1010 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1011 (pBuf)->n += nBlob; \
1012 }
1013
1014 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1015 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1016 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1017 }
1018
1019
1020 /*
1021 ** Serialize and store the "structure" record.
1022 **
1023 ** If an error occurs, leave an error code in the Fts5Index object. If an
1024 ** error has already occurred, this function is a no-op.
1025 */
1026 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1027 if( p->rc==SQLITE_OK ){
1028 Fts5Buffer buf; /* Buffer to serialize record into */
1029 int iLvl; /* Used to iterate through levels */
1030 int iCookie; /* Cookie value to store */
1031
1032 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1033 memset(&buf, 0, sizeof(Fts5Buffer));
1034
1035 /* Append the current configuration cookie */
1036 iCookie = p->pConfig->iCookie;
1037 if( iCookie<0 ) iCookie = 0;
1038
1039 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1040 sqlite3Fts5Put32(buf.p, iCookie);
1041 buf.n = 4;
1042 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1043 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1044 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1045 }
1046
1047 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1048 int iSeg; /* Used to iterate through segments */
1049 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1050 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1051 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1052 assert( pLvl->nMerge<=pLvl->nSeg );
1053
1054 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1055 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1056 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1057 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1058 }
1059 }
1060
1061 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1062 fts5BufferFree(&buf);
1063 }
1064 }
1065
1066 #if 0
1067 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1068 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1069 int rc = SQLITE_OK;
1070 Fts5Buffer buf;
1071 memset(&buf, 0, sizeof(buf));
1072 fts5DebugStructure(&rc, &buf, pStruct);
1073 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1074 fflush(stdout);
1075 fts5BufferFree(&buf);
1076 }
1077 #else
1078 # define fts5PrintStructure(x,y)
1079 #endif
1080
1081 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1082 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1083 }
1084
1085 /*
1086 ** Return a copy of index structure pStruct. Except, promote as many
1087 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1088 ** returned.
1089 */
1090 static void fts5StructurePromoteTo(
1091 Fts5Index *p,
1092 int iPromote,
1093 int szPromote,
1094 Fts5Structure *pStruct
1095 ){
1096 int il, is;
1097 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1098
1099 if( pOut->nMerge==0 ){
1100 for(il=iPromote+1; il<pStruct->nLevel; il++){
1101 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1102 if( pLvl->nMerge ) return;
1103 for(is=pLvl->nSeg-1; is>=0; is--){
1104 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1105 if( sz>szPromote ) return;
1106 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1107 if( p->rc ) return;
1108 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1109 pOut->nSeg++;
1110 pLvl->nSeg--;
1111 }
1112 }
1113 }
1114 }
1115
1116 /*
1117 ** A new segment has just been written to level iLvl of index structure
1118 ** pStruct. This function determines if any segments should be promoted
1119 ** as a result. Segments are promoted in two scenarios:
1120 **
1121 ** a) If the segment just written is smaller than one or more segments
1122 ** within the previous populated level, it is promoted to the previous
1123 ** populated level.
1124 **
1125 ** b) If the segment just written is larger than the newest segment on
1126 ** the next populated level, then that segment, and any other adjacent
1127 ** segments that are also smaller than the one just written, are
1128 ** promoted.
1129 **
1130 ** If one or more segments are promoted, the structure object is updated
1131 ** to reflect this.
1132 */
1133 static void fts5StructurePromote(
1134 Fts5Index *p, /* FTS5 backend object */
1135 int iLvl, /* Index level just updated */
1136 Fts5Structure *pStruct /* Index structure */
1137 ){
1138 if( p->rc==SQLITE_OK ){
1139 int iTst;
1140 int iPromote = -1;
1141 int szPromote = 0; /* Promote anything this size or smaller */
1142 Fts5StructureSegment *pSeg; /* Segment just written */
1143 int szSeg; /* Size of segment just written */
1144 int nSeg = pStruct->aLevel[iLvl].nSeg;
1145
1146 if( nSeg==0 ) return;
1147 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1148 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1149
1150 /* Check for condition (a) */
1151 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1152 if( iTst>=0 ){
1153 int i;
1154 int szMax = 0;
1155 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1156 assert( pTst->nMerge==0 );
1157 for(i=0; i<pTst->nSeg; i++){
1158 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1159 if( sz>szMax ) szMax = sz;
1160 }
1161 if( szMax>=szSeg ){
1162 /* Condition (a) is true. Promote the newest segment on level
1163 ** iLvl to level iTst. */
1164 iPromote = iTst;
1165 szPromote = szMax;
1166 }
1167 }
1168
1169 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1170 ** is a no-op if it is not. */
1171 if( iPromote<0 ){
1172 iPromote = iLvl;
1173 szPromote = szSeg;
1174 }
1175 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1176 }
1177 }
1178
1179
1180 /*
1181 ** Advance the iterator passed as the only argument. If the end of the
1182 ** doclist-index page is reached, return non-zero.
1183 */
1184 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1185 Fts5Data *pData = pLvl->pData;
1186
1187 if( pLvl->iOff==0 ){
1188 assert( pLvl->bEof==0 );
1189 pLvl->iOff = 1;
1190 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1191 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1192 pLvl->iFirstOff = pLvl->iOff;
1193 }else{
1194 int iOff;
1195 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1196 if( pData->p[iOff] ) break;
1197 }
1198
1199 if( iOff<pData->nn ){
1200 i64 iVal;
1201 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1202 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1203 pLvl->iRowid += iVal;
1204 pLvl->iOff = iOff;
1205 }else{
1206 pLvl->bEof = 1;
1207 }
1208 }
1209
1210 return pLvl->bEof;
1211 }
1212
1213 /*
1214 ** Advance the iterator passed as the only argument.
1215 */
1216 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1217 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1218
1219 assert( iLvl<pIter->nLvl );
1220 if( fts5DlidxLvlNext(pLvl) ){
1221 if( (iLvl+1) < pIter->nLvl ){
1222 fts5DlidxIterNextR(p, pIter, iLvl+1);
1223 if( pLvl[1].bEof==0 ){
1224 fts5DataRelease(pLvl->pData);
1225 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1226 pLvl->pData = fts5DataRead(p,
1227 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1228 );
1229 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1230 }
1231 }
1232 }
1233
1234 return pIter->aLvl[0].bEof;
1235 }
1236 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1237 return fts5DlidxIterNextR(p, pIter, 0);
1238 }
1239
1240 /*
1241 ** The iterator passed as the first argument has the following fields set
1242 ** as follows. This function sets up the rest of the iterator so that it
1243 ** points to the first rowid in the doclist-index.
1244 **
1245 ** pData:
1246 ** pointer to doclist-index record,
1247 **
1248 ** When this function is called pIter->iLeafPgno is the page number the
1249 ** doclist is associated with (the one featuring the term).
1250 */
1251 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1252 int i;
1253 for(i=0; i<pIter->nLvl; i++){
1254 fts5DlidxLvlNext(&pIter->aLvl[i]);
1255 }
1256 return pIter->aLvl[0].bEof;
1257 }
1258
1259
1260 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1261 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1262 }
1263
1264 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1265 int i;
1266
1267 /* Advance each level to the last entry on the last page */
1268 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1269 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1270 while( fts5DlidxLvlNext(pLvl)==0 );
1271 pLvl->bEof = 0;
1272
1273 if( i>0 ){
1274 Fts5DlidxLvl *pChild = &pLvl[-1];
1275 fts5DataRelease(pChild->pData);
1276 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1277 pChild->pData = fts5DataRead(p,
1278 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1279 );
1280 }
1281 }
1282 }
1283
1284 /*
1285 ** Move the iterator passed as the only argument to the previous entry.
1286 */
1287 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1288 int iOff = pLvl->iOff;
1289
1290 assert( pLvl->bEof==0 );
1291 if( iOff<=pLvl->iFirstOff ){
1292 pLvl->bEof = 1;
1293 }else{
1294 u8 *a = pLvl->pData->p;
1295 i64 iVal;
1296 int iLimit;
1297 int ii;
1298 int nZero = 0;
1299
1300 /* Currently iOff points to the first byte of a varint. This block
1301 ** decrements iOff until it points to the first byte of the previous
1302 ** varint. Taking care not to read any memory locations that occur
1303 ** before the buffer in memory. */
1304 iLimit = (iOff>9 ? iOff-9 : 0);
1305 for(iOff--; iOff>iLimit; iOff--){
1306 if( (a[iOff-1] & 0x80)==0 ) break;
1307 }
1308
1309 fts5GetVarint(&a[iOff], (u64*)&iVal);
1310 pLvl->iRowid -= iVal;
1311 pLvl->iLeafPgno--;
1312
1313 /* Skip backwards past any 0x00 varints. */
1314 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1315 nZero++;
1316 }
1317 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1318 /* The byte immediately before the last 0x00 byte has the 0x80 bit
1319 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1320 ** bytes before a[ii]. */
1321 int bZero = 0; /* True if last 0x00 counts */
1322 if( (ii-8)>=pLvl->iFirstOff ){
1323 int j;
1324 for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1325 bZero = (j>8);
1326 }
1327 if( bZero==0 ) nZero--;
1328 }
1329 pLvl->iLeafPgno -= nZero;
1330 pLvl->iOff = iOff - nZero;
1331 }
1332
1333 return pLvl->bEof;
1334 }
1335
1336 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1337 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1338
1339 assert( iLvl<pIter->nLvl );
1340 if( fts5DlidxLvlPrev(pLvl) ){
1341 if( (iLvl+1) < pIter->nLvl ){
1342 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1343 if( pLvl[1].bEof==0 ){
1344 fts5DataRelease(pLvl->pData);
1345 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1346 pLvl->pData = fts5DataRead(p,
1347 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1348 );
1349 if( pLvl->pData ){
1350 while( fts5DlidxLvlNext(pLvl)==0 );
1351 pLvl->bEof = 0;
1352 }
1353 }
1354 }
1355 }
1356
1357 return pIter->aLvl[0].bEof;
1358 }
1359 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1360 return fts5DlidxIterPrevR(p, pIter, 0);
1361 }
1362
1363 /*
1364 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1365 */
1366 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1367 if( pIter ){
1368 int i;
1369 for(i=0; i<pIter->nLvl; i++){
1370 fts5DataRelease(pIter->aLvl[i].pData);
1371 }
1372 sqlite3_free(pIter);
1373 }
1374 }
1375
1376 static Fts5DlidxIter *fts5DlidxIterInit(
1377 Fts5Index *p, /* Fts5 Backend to iterate within */
1378 int bRev, /* True for ORDER BY ASC */
1379 int iSegid, /* Segment id */
1380 int iLeafPg /* Leaf page number to load dlidx for */
1381 ){
1382 Fts5DlidxIter *pIter = 0;
1383 int i;
1384 int bDone = 0;
1385
1386 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1387 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1388 Fts5DlidxIter *pNew;
1389
1390 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
1391 if( pNew==0 ){
1392 p->rc = SQLITE_NOMEM;
1393 }else{
1394 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1395 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1396 pIter = pNew;
1397 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1398 pLvl->pData = fts5DataRead(p, iRowid);
1399 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1400 bDone = 1;
1401 }
1402 pIter->nLvl = i+1;
1403 }
1404 }
1405
1406 if( p->rc==SQLITE_OK ){
1407 pIter->iSegid = iSegid;
1408 if( bRev==0 ){
1409 fts5DlidxIterFirst(pIter);
1410 }else{
1411 fts5DlidxIterLast(p, pIter);
1412 }
1413 }
1414
1415 if( p->rc!=SQLITE_OK ){
1416 fts5DlidxIterFree(pIter);
1417 pIter = 0;
1418 }
1419
1420 return pIter;
1421 }
1422
1423 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1424 return pIter->aLvl[0].iRowid;
1425 }
1426 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1427 return pIter->aLvl[0].iLeafPgno;
1428 }
1429
1430 /*
1431 ** Load the next leaf page into the segment iterator.
1432 */
1433 static void fts5SegIterNextPage(
1434 Fts5Index *p, /* FTS5 backend object */
1435 Fts5SegIter *pIter /* Iterator to advance to next page */
1436 ){
1437 Fts5Data *pLeaf;
1438 Fts5StructureSegment *pSeg = pIter->pSeg;
1439 fts5DataRelease(pIter->pLeaf);
1440 pIter->iLeafPgno++;
1441 if( pIter->pNextLeaf ){
1442 pIter->pLeaf = pIter->pNextLeaf;
1443 pIter->pNextLeaf = 0;
1444 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1445 pIter->pLeaf = fts5DataRead(p,
1446 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1447 );
1448 }else{
1449 pIter->pLeaf = 0;
1450 }
1451 pLeaf = pIter->pLeaf;
1452
1453 if( pLeaf ){
1454 pIter->iPgidxOff = pLeaf->szLeaf;
1455 if( fts5LeafIsTermless(pLeaf) ){
1456 pIter->iEndofDoclist = pLeaf->nn+1;
1457 }else{
1458 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1459 pIter->iEndofDoclist
1460 );
1461 }
1462 }
1463 }
1464
1465 /*
1466 ** Argument p points to a buffer containing a varint to be interpreted as a
1467 ** position list size field. Read the varint and return the number of bytes
1468 ** read. Before returning, set *pnSz to the number of bytes in the position
1469 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1470 */
1471 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1472 int nSz;
1473 int n = 0;
1474 fts5FastGetVarint32(p, n, nSz);
1475 assert_nc( nSz>=0 );
1476 *pnSz = nSz/2;
1477 *pbDel = nSz & 0x0001;
1478 return n;
1479 }
1480
1481 /*
1482 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1483 ** position-list size field. Read the value of the field and store it
1484 ** in the following variables:
1485 **
1486 ** Fts5SegIter.nPos
1487 ** Fts5SegIter.bDel
1488 **
1489 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1490 ** position list content (if any).
1491 */
1492 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1493 if( p->rc==SQLITE_OK ){
1494 int iOff = pIter->iLeafOffset; /* Offset to read at */
1495 int nSz;
1496 ASSERT_SZLEAF_OK(pIter->pLeaf);
1497 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1498 pIter->bDel = (nSz & 0x0001);
1499 pIter->nPos = nSz>>1;
1500 pIter->iLeafOffset = iOff;
1501 assert_nc( pIter->nPos>=0 );
1502 }
1503 }
1504
1505 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1506 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1507 int iOff = pIter->iLeafOffset;
1508
1509 ASSERT_SZLEAF_OK(pIter->pLeaf);
1510 if( iOff>=pIter->pLeaf->szLeaf ){
1511 fts5SegIterNextPage(p, pIter);
1512 if( pIter->pLeaf==0 ){
1513 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1514 return;
1515 }
1516 iOff = 4;
1517 a = pIter->pLeaf->p;
1518 }
1519 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1520 pIter->iLeafOffset = iOff;
1521 }
1522
1523 /*
1524 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1525 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1526 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1527 ** the first term in the segment).
1528 **
1529 ** This function populates:
1530 **
1531 ** Fts5SegIter.term
1532 ** Fts5SegIter.rowid
1533 **
1534 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1535 ** the first position list. The position list belonging to document
1536 ** (Fts5SegIter.iRowid).
1537 */
1538 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1539 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1540 int iOff = pIter->iLeafOffset; /* Offset to read at */
1541 int nNew; /* Bytes of new data */
1542
1543 iOff += fts5GetVarint32(&a[iOff], nNew);
1544 pIter->term.n = nKeep;
1545 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1546 iOff += nNew;
1547 pIter->iTermLeafOffset = iOff;
1548 pIter->iTermLeafPgno = pIter->iLeafPgno;
1549 pIter->iLeafOffset = iOff;
1550
1551 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1552 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1553 }else{
1554 int nExtra;
1555 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1556 pIter->iEndofDoclist += nExtra;
1557 }
1558
1559 fts5SegIterLoadRowid(p, pIter);
1560 }
1561
1562 /*
1563 ** Initialize the iterator object pIter to iterate through the entries in
1564 ** segment pSeg. The iterator is left pointing to the first entry when
1565 ** this function returns.
1566 **
1567 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1568 ** an error has already occurred when this function is called, it is a no-op.
1569 */
1570 static void fts5SegIterInit(
1571 Fts5Index *p, /* FTS index object */
1572 Fts5StructureSegment *pSeg, /* Description of segment */
1573 Fts5SegIter *pIter /* Object to populate */
1574 ){
1575 if( pSeg->pgnoFirst==0 ){
1576 /* This happens if the segment is being used as an input to an incremental
1577 ** merge and all data has already been "trimmed". See function
1578 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1579 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1580 ** at EOF already. */
1581 assert( pIter->pLeaf==0 );
1582 return;
1583 }
1584
1585 if( p->rc==SQLITE_OK ){
1586 memset(pIter, 0, sizeof(*pIter));
1587 pIter->pSeg = pSeg;
1588 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1589 fts5SegIterNextPage(p, pIter);
1590 }
1591
1592 if( p->rc==SQLITE_OK ){
1593 pIter->iLeafOffset = 4;
1594 assert_nc( pIter->pLeaf->nn>4 );
1595 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1596 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1597 fts5SegIterLoadTerm(p, pIter, 0);
1598 fts5SegIterLoadNPos(p, pIter);
1599 }
1600 }
1601
1602 /*
1603 ** This function is only ever called on iterators created by calls to
1604 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1605 **
1606 ** The iterator is in an unusual state when this function is called: the
1607 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1608 ** the position-list size field for the first relevant rowid on the page.
1609 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1610 **
1611 ** This function advances the iterator so that it points to the last
1612 ** relevant rowid on the page and, if necessary, initializes the
1613 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1614 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1615 ** byte of the position list content associated with said rowid.
1616 */
1617 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1618 int n = pIter->pLeaf->szLeaf;
1619 int i = pIter->iLeafOffset;
1620 u8 *a = pIter->pLeaf->p;
1621 int iRowidOffset = 0;
1622
1623 if( n>pIter->iEndofDoclist ){
1624 n = pIter->iEndofDoclist;
1625 }
1626
1627 ASSERT_SZLEAF_OK(pIter->pLeaf);
1628 while( 1 ){
1629 i64 iDelta = 0;
1630 int nPos;
1631 int bDummy;
1632
1633 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1634 i += nPos;
1635 if( i>=n ) break;
1636 i += fts5GetVarint(&a[i], (u64*)&iDelta);
1637 pIter->iRowid += iDelta;
1638
1639 if( iRowidOffset>=pIter->nRowidOffset ){
1640 int nNew = pIter->nRowidOffset + 8;
1641 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
1642 if( aNew==0 ){
1643 p->rc = SQLITE_NOMEM;
1644 break;
1645 }
1646 pIter->aRowidOffset = aNew;
1647 pIter->nRowidOffset = nNew;
1648 }
1649
1650 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1651 pIter->iLeafOffset = i;
1652 }
1653 pIter->iRowidOffset = iRowidOffset;
1654 fts5SegIterLoadNPos(p, pIter);
1655 }
1656
1657 /*
1658 **
1659 */
1660 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1661 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1662 assert( pIter->flags & FTS5_SEGITER_ONETERM );
1663
1664 fts5DataRelease(pIter->pLeaf);
1665 pIter->pLeaf = 0;
1666 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1667 Fts5Data *pNew;
1668 pIter->iLeafPgno--;
1669 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1670 pIter->pSeg->iSegid, pIter->iLeafPgno
1671 ));
1672 if( pNew ){
1673 /* iTermLeafOffset may be equal to szLeaf if the term is the last
1674 ** thing on the page - i.e. the first rowid is on the following page.
1675 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1676 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1677 assert( pIter->pLeaf==0 );
1678 if( pIter->iTermLeafOffset<pNew->szLeaf ){
1679 pIter->pLeaf = pNew;
1680 pIter->iLeafOffset = pIter->iTermLeafOffset;
1681 }
1682 }else{
1683 int iRowidOff;
1684 iRowidOff = fts5LeafFirstRowidOff(pNew);
1685 if( iRowidOff ){
1686 pIter->pLeaf = pNew;
1687 pIter->iLeafOffset = iRowidOff;
1688 }
1689 }
1690
1691 if( pIter->pLeaf ){
1692 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1693 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1694 break;
1695 }else{
1696 fts5DataRelease(pNew);
1697 }
1698 }
1699 }
1700
1701 if( pIter->pLeaf ){
1702 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1703 fts5SegIterReverseInitPage(p, pIter);
1704 }
1705 }
1706
1707 /*
1708 ** Return true if the iterator passed as the second argument currently
1709 ** points to a delete marker. A delete marker is an entry with a 0 byte
1710 ** position-list.
1711 */
1712 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){
1713 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1714 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1715 }
1716
1717 /*
1718 ** Advance iterator pIter to the next entry.
1719 **
1720 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
1721 ** is not considered an error if the iterator reaches EOF. If an error has
1722 ** already occurred when this function is called, it is a no-op.
1723 */
1724 static void fts5SegIterNext(
1725 Fts5Index *p, /* FTS5 backend object */
1726 Fts5SegIter *pIter, /* Iterator to advance */
1727 int *pbNewTerm /* OUT: Set for new term */
1728 ){
1729 assert( pbNewTerm==0 || *pbNewTerm==0 );
1730 if( p->rc==SQLITE_OK ){
1731 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1732 assert( pIter->pNextLeaf==0 );
1733 if( pIter->iRowidOffset>0 ){
1734 u8 *a = pIter->pLeaf->p;
1735 int iOff;
1736 int nPos;
1737 int bDummy;
1738 i64 iDelta;
1739
1740 pIter->iRowidOffset--;
1741 pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
1742 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
1743 iOff += nPos;
1744 fts5GetVarint(&a[iOff], (u64*)&iDelta);
1745 pIter->iRowid -= iDelta;
1746 fts5SegIterLoadNPos(p, pIter);
1747 }else{
1748 fts5SegIterReverseNewPage(p, pIter);
1749 }
1750 }else{
1751 Fts5Data *pLeaf = pIter->pLeaf;
1752 int iOff;
1753 int bNewTerm = 0;
1754 int nKeep = 0;
1755
1756 /* Search for the end of the position list within the current page. */
1757 u8 *a = pLeaf->p;
1758 int n = pLeaf->szLeaf;
1759
1760 ASSERT_SZLEAF_OK(pLeaf);
1761 iOff = pIter->iLeafOffset + pIter->nPos;
1762
1763 if( iOff<n ){
1764 /* The next entry is on the current page. */
1765 assert_nc( iOff<=pIter->iEndofDoclist );
1766 if( iOff>=pIter->iEndofDoclist ){
1767 bNewTerm = 1;
1768 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1769 iOff += fts5GetVarint32(&a[iOff], nKeep);
1770 }
1771 }else{
1772 u64 iDelta;
1773 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1774 pIter->iRowid += iDelta;
1775 assert_nc( iDelta>0 );
1776 }
1777 pIter->iLeafOffset = iOff;
1778
1779 }else if( pIter->pSeg==0 ){
1780 const u8 *pList = 0;
1781 const char *zTerm = 0;
1782 int nList = 0;
1783 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
1784 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
1785 sqlite3Fts5HashScanNext(p->pHash);
1786 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1787 }
1788 if( pList==0 ){
1789 fts5DataRelease(pIter->pLeaf);
1790 pIter->pLeaf = 0;
1791 }else{
1792 pIter->pLeaf->p = (u8*)pList;
1793 pIter->pLeaf->nn = nList;
1794 pIter->pLeaf->szLeaf = nList;
1795 pIter->iEndofDoclist = nList+1;
1796 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
1797 (u8*)zTerm);
1798 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1799 *pbNewTerm = 1;
1800 }
1801 }else{
1802 iOff = 0;
1803 /* Next entry is not on the current page */
1804 while( iOff==0 ){
1805 fts5SegIterNextPage(p, pIter);
1806 pLeaf = pIter->pLeaf;
1807 if( pLeaf==0 ) break;
1808 ASSERT_SZLEAF_OK(pLeaf);
1809 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
1810 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
1811 pIter->iLeafOffset = iOff;
1812
1813 if( pLeaf->nn>pLeaf->szLeaf ){
1814 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
1815 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
1816 );
1817 }
1818
1819 }
1820 else if( pLeaf->nn>pLeaf->szLeaf ){
1821 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
1822 &pLeaf->p[pLeaf->szLeaf], iOff
1823 );
1824 pIter->iLeafOffset = iOff;
1825 pIter->iEndofDoclist = iOff;
1826 bNewTerm = 1;
1827 }
1828 if( iOff>=pLeaf->szLeaf ){
1829 p->rc = FTS5_CORRUPT;
1830 return;
1831 }
1832 }
1833 }
1834
1835 /* Check if the iterator is now at EOF. If so, return early. */
1836 if( pIter->pLeaf ){
1837 if( bNewTerm ){
1838 if( pIter->flags & FTS5_SEGITER_ONETERM ){
1839 fts5DataRelease(pIter->pLeaf);
1840 pIter->pLeaf = 0;
1841 }else{
1842 fts5SegIterLoadTerm(p, pIter, nKeep);
1843 fts5SegIterLoadNPos(p, pIter);
1844 if( pbNewTerm ) *pbNewTerm = 1;
1845 }
1846 }else{
1847 /* The following could be done by calling fts5SegIterLoadNPos(). But
1848 ** this block is particularly performance critical, so equivalent
1849 ** code is inlined. */
1850 int nSz;
1851 assert( p->rc==SQLITE_OK );
1852 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
1853 pIter->bDel = (nSz & 0x0001);
1854 pIter->nPos = nSz>>1;
1855 assert_nc( pIter->nPos>=0 );
1856 }
1857 }
1858 }
1859 }
1860 }
1861
1862 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
1863
1864 /*
1865 ** Iterator pIter currently points to the first rowid in a doclist. This
1866 ** function sets the iterator up so that iterates in reverse order through
1867 ** the doclist.
1868 */
1869 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
1870 Fts5DlidxIter *pDlidx = pIter->pDlidx;
1871 Fts5Data *pLast = 0;
1872 int pgnoLast = 0;
1873
1874 if( pDlidx ){
1875 int iSegid = pIter->pSeg->iSegid;
1876 pgnoLast = fts5DlidxIterPgno(pDlidx);
1877 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
1878 }else{
1879 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
1880
1881 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
1882 ** position-list content for the current rowid. Back it up so that it
1883 ** points to the start of the position-list size field. */
1884 pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
1885
1886 /* If this condition is true then the largest rowid for the current
1887 ** term may not be stored on the current page. So search forward to
1888 ** see where said rowid really is. */
1889 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
1890 int pgno;
1891 Fts5StructureSegment *pSeg = pIter->pSeg;
1892
1893 /* The last rowid in the doclist may not be on the current page. Search
1894 ** forward to find the page containing the last rowid. */
1895 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
1896 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
1897 Fts5Data *pNew = fts5DataRead(p, iAbs);
1898 if( pNew ){
1899 int iRowid, bTermless;
1900 iRowid = fts5LeafFirstRowidOff(pNew);
1901 bTermless = fts5LeafIsTermless(pNew);
1902 if( iRowid ){
1903 SWAPVAL(Fts5Data*, pNew, pLast);
1904 pgnoLast = pgno;
1905 }
1906 fts5DataRelease(pNew);
1907 if( bTermless==0 ) break;
1908 }
1909 }
1910 }
1911 }
1912
1913 /* If pLast is NULL at this point, then the last rowid for this doclist
1914 ** lies on the page currently indicated by the iterator. In this case
1915 ** pIter->iLeafOffset is already set to point to the position-list size
1916 ** field associated with the first relevant rowid on the page.
1917 **
1918 ** Or, if pLast is non-NULL, then it is the page that contains the last
1919 ** rowid. In this case configure the iterator so that it points to the
1920 ** first rowid on this page.
1921 */
1922 if( pLast ){
1923 int iOff;
1924 fts5DataRelease(pIter->pLeaf);
1925 pIter->pLeaf = pLast;
1926 pIter->iLeafPgno = pgnoLast;
1927 iOff = fts5LeafFirstRowidOff(pLast);
1928 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
1929 pIter->iLeafOffset = iOff;
1930
1931 if( fts5LeafIsTermless(pLast) ){
1932 pIter->iEndofDoclist = pLast->nn+1;
1933 }else{
1934 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
1935 }
1936
1937 }
1938
1939 fts5SegIterReverseInitPage(p, pIter);
1940 }
1941
1942 /*
1943 ** Iterator pIter currently points to the first rowid of a doclist.
1944 ** There is a doclist-index associated with the final term on the current
1945 ** page. If the current term is the last term on the page, load the
1946 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
1947 */
1948 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
1949 int iSeg = pIter->pSeg->iSegid;
1950 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
1951 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
1952
1953 assert( pIter->flags & FTS5_SEGITER_ONETERM );
1954 assert( pIter->pDlidx==0 );
1955
1956 /* Check if the current doclist ends on this page. If it does, return
1957 ** early without loading the doclist-index (as it belongs to a different
1958 ** term. */
1959 if( pIter->iTermLeafPgno==pIter->iLeafPgno
1960 && pIter->iEndofDoclist<pLeaf->szLeaf
1961 ){
1962 return;
1963 }
1964
1965 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
1966 }
1967
1968 #define fts5IndexSkipVarint(a, iOff) { \
1969 int iEnd = iOff+9; \
1970 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
1971 }
1972
1973 /*
1974 ** The iterator object passed as the second argument currently contains
1975 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
1976 ** function searches the leaf page for a term matching (pTerm/nTerm).
1977 **
1978 ** If the specified term is found on the page, then the iterator is left
1979 ** pointing to it. If argument bGe is zero and the term is not found,
1980 ** the iterator is left pointing at EOF.
1981 **
1982 ** If bGe is non-zero and the specified term is not found, then the
1983 ** iterator is left pointing to the smallest term in the segment that
1984 ** is larger than the specified term, even if this term is not on the
1985 ** current page.
1986 */
1987 static void fts5LeafSeek(
1988 Fts5Index *p, /* Leave any error code here */
1989 int bGe, /* True for a >= search */
1990 Fts5SegIter *pIter, /* Iterator to seek */
1991 const u8 *pTerm, int nTerm /* Term to search for */
1992 ){
1993 int iOff;
1994 const u8 *a = pIter->pLeaf->p;
1995 int szLeaf = pIter->pLeaf->szLeaf;
1996 int n = pIter->pLeaf->nn;
1997
1998 int nMatch = 0;
1999 int nKeep = 0;
2000 int nNew = 0;
2001 int iTermOff;
2002 int iPgidx; /* Current offset in pgidx */
2003 int bEndOfPage = 0;
2004
2005 assert( p->rc==SQLITE_OK );
2006
2007 iPgidx = szLeaf;
2008 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2009 iOff = iTermOff;
2010
2011 while( 1 ){
2012
2013 /* Figure out how many new bytes are in this term */
2014 fts5FastGetVarint32(a, iOff, nNew);
2015 if( nKeep<nMatch ){
2016 goto search_failed;
2017 }
2018
2019 assert( nKeep>=nMatch );
2020 if( nKeep==nMatch ){
2021 int nCmp;
2022 int i;
2023 nCmp = MIN(nNew, nTerm-nMatch);
2024 for(i=0; i<nCmp; i++){
2025 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2026 }
2027 nMatch += i;
2028
2029 if( nTerm==nMatch ){
2030 if( i==nNew ){
2031 goto search_success;
2032 }else{
2033 goto search_failed;
2034 }
2035 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2036 goto search_failed;
2037 }
2038 }
2039
2040 if( iPgidx>=n ){
2041 bEndOfPage = 1;
2042 break;
2043 }
2044
2045 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2046 iTermOff += nKeep;
2047 iOff = iTermOff;
2048
2049 /* Read the nKeep field of the next term. */
2050 fts5FastGetVarint32(a, iOff, nKeep);
2051 }
2052
2053 search_failed:
2054 if( bGe==0 ){
2055 fts5DataRelease(pIter->pLeaf);
2056 pIter->pLeaf = 0;
2057 return;
2058 }else if( bEndOfPage ){
2059 do {
2060 fts5SegIterNextPage(p, pIter);
2061 if( pIter->pLeaf==0 ) return;
2062 a = pIter->pLeaf->p;
2063 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2064 iPgidx = pIter->pLeaf->szLeaf;
2065 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2066 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
2067 p->rc = FTS5_CORRUPT;
2068 }else{
2069 nKeep = 0;
2070 iTermOff = iOff;
2071 n = pIter->pLeaf->nn;
2072 iOff += fts5GetVarint32(&a[iOff], nNew);
2073 break;
2074 }
2075 }
2076 }while( 1 );
2077 }
2078
2079 search_success:
2080
2081 pIter->iLeafOffset = iOff + nNew;
2082 pIter->iTermLeafOffset = pIter->iLeafOffset;
2083 pIter->iTermLeafPgno = pIter->iLeafPgno;
2084
2085 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2086 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2087
2088 if( iPgidx>=n ){
2089 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2090 }else{
2091 int nExtra;
2092 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2093 pIter->iEndofDoclist = iTermOff + nExtra;
2094 }
2095 pIter->iPgidxOff = iPgidx;
2096
2097 fts5SegIterLoadRowid(p, pIter);
2098 fts5SegIterLoadNPos(p, pIter);
2099 }
2100
2101 /*
2102 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2103 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2104 **
2105 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2106 ** an error has already occurred when this function is called, it is a no-op.
2107 */
2108 static void fts5SegIterSeekInit(
2109 Fts5Index *p, /* FTS5 backend */
2110 Fts5Buffer *pBuf, /* Buffer to use for loading pages */
2111 const u8 *pTerm, int nTerm, /* Term to seek to */
2112 int flags, /* Mask of FTS5INDEX_XXX flags */
2113 Fts5StructureSegment *pSeg, /* Description of segment */
2114 Fts5SegIter *pIter /* Object to populate */
2115 ){
2116 int iPg = 1;
2117 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2118 int bDlidx = 0; /* True if there is a doclist-index */
2119
2120 static int nCall = 0;
2121 nCall++;
2122
2123 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2124 assert( pTerm && nTerm );
2125 memset(pIter, 0, sizeof(*pIter));
2126 pIter->pSeg = pSeg;
2127
2128 /* This block sets stack variable iPg to the leaf page number that may
2129 ** contain term (pTerm/nTerm), if it is present in the segment. */
2130 if( p->pIdxSelect==0 ){
2131 Fts5Config *pConfig = p->pConfig;
2132 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2133 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2134 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2135 pConfig->zDb, pConfig->zName
2136 ));
2137 }
2138 if( p->rc ) return;
2139 sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
2140 sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2141 if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
2142 i64 val = sqlite3_column_int(p->pIdxSelect, 0);
2143 iPg = (int)(val>>1);
2144 bDlidx = (val & 0x0001);
2145 }
2146 p->rc = sqlite3_reset(p->pIdxSelect);
2147
2148 if( iPg<pSeg->pgnoFirst ){
2149 iPg = pSeg->pgnoFirst;
2150 bDlidx = 0;
2151 }
2152
2153 pIter->iLeafPgno = iPg - 1;
2154 fts5SegIterNextPage(p, pIter);
2155
2156 if( pIter->pLeaf ){
2157 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2158 }
2159
2160 if( p->rc==SQLITE_OK && bGe==0 ){
2161 pIter->flags |= FTS5_SEGITER_ONETERM;
2162 if( pIter->pLeaf ){
2163 if( flags & FTS5INDEX_QUERY_DESC ){
2164 pIter->flags |= FTS5_SEGITER_REVERSE;
2165 }
2166 if( bDlidx ){
2167 fts5SegIterLoadDlidx(p, pIter);
2168 }
2169 if( flags & FTS5INDEX_QUERY_DESC ){
2170 fts5SegIterReverse(p, pIter);
2171 }
2172 }
2173 }
2174
2175 /* Either:
2176 **
2177 ** 1) an error has occurred, or
2178 ** 2) the iterator points to EOF, or
2179 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2180 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2181 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2182 */
2183 assert( p->rc!=SQLITE_OK /* 1 */
2184 || pIter->pLeaf==0 /* 2 */
2185 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2186 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2187 );
2188 }
2189
2190 /*
2191 ** Initialize the object pIter to point to term pTerm/nTerm within the
2192 ** in-memory hash table. If there is no such term in the hash-table, the
2193 ** iterator is set to EOF.
2194 **
2195 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2196 ** an error has already occurred when this function is called, it is a no-op.
2197 */
2198 static void fts5SegIterHashInit(
2199 Fts5Index *p, /* FTS5 backend */
2200 const u8 *pTerm, int nTerm, /* Term to seek to */
2201 int flags, /* Mask of FTS5INDEX_XXX flags */
2202 Fts5SegIter *pIter /* Object to populate */
2203 ){
2204 const u8 *pList = 0;
2205 int nList = 0;
2206 const u8 *z = 0;
2207 int n = 0;
2208
2209 assert( p->pHash );
2210 assert( p->rc==SQLITE_OK );
2211
2212 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2213 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2214 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2215 n = (z ? (int)strlen((const char*)z) : 0);
2216 }else{
2217 pIter->flags |= FTS5_SEGITER_ONETERM;
2218 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
2219 z = pTerm;
2220 n = nTerm;
2221 }
2222
2223 if( pList ){
2224 Fts5Data *pLeaf;
2225 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2226 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2227 if( pLeaf==0 ) return;
2228 pLeaf->p = (u8*)pList;
2229 pLeaf->nn = pLeaf->szLeaf = nList;
2230 pIter->pLeaf = pLeaf;
2231 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2232 pIter->iEndofDoclist = pLeaf->nn+1;
2233
2234 if( flags & FTS5INDEX_QUERY_DESC ){
2235 pIter->flags |= FTS5_SEGITER_REVERSE;
2236 fts5SegIterReverseInitPage(p, pIter);
2237 }else{
2238 fts5SegIterLoadNPos(p, pIter);
2239 }
2240 }
2241 }
2242
2243 /*
2244 ** Zero the iterator passed as the only argument.
2245 */
2246 static void fts5SegIterClear(Fts5SegIter *pIter){
2247 fts5BufferFree(&pIter->term);
2248 fts5DataRelease(pIter->pLeaf);
2249 fts5DataRelease(pIter->pNextLeaf);
2250 fts5DlidxIterFree(pIter->pDlidx);
2251 sqlite3_free(pIter->aRowidOffset);
2252 memset(pIter, 0, sizeof(Fts5SegIter));
2253 }
2254
2255 #ifdef SQLITE_DEBUG
2256
2257 /*
2258 ** This function is used as part of the big assert() procedure implemented by
2259 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2260 ** in *pRes is the correct result of comparing the current positions of the
2261 ** two iterators.
2262 */
2263 static void fts5AssertComparisonResult(
2264 Fts5IndexIter *pIter,
2265 Fts5SegIter *p1,
2266 Fts5SegIter *p2,
2267 Fts5CResult *pRes
2268 ){
2269 int i1 = p1 - pIter->aSeg;
2270 int i2 = p2 - pIter->aSeg;
2271
2272 if( p1->pLeaf || p2->pLeaf ){
2273 if( p1->pLeaf==0 ){
2274 assert( pRes->iFirst==i2 );
2275 }else if( p2->pLeaf==0 ){
2276 assert( pRes->iFirst==i1 );
2277 }else{
2278 int nMin = MIN(p1->term.n, p2->term.n);
2279 int res = memcmp(p1->term.p, p2->term.p, nMin);
2280 if( res==0 ) res = p1->term.n - p2->term.n;
2281
2282 if( res==0 ){
2283 assert( pRes->bTermEq==1 );
2284 assert( p1->iRowid!=p2->iRowid );
2285 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2286 }else{
2287 assert( pRes->bTermEq==0 );
2288 }
2289
2290 if( res<0 ){
2291 assert( pRes->iFirst==i1 );
2292 }else{
2293 assert( pRes->iFirst==i2 );
2294 }
2295 }
2296 }
2297 }
2298
2299 /*
2300 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2301 ** is compiled. In that case, this function is essentially an assert()
2302 ** statement used to verify that the contents of the pIter->aFirst[] array
2303 ** are correct.
2304 */
2305 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){
2306 if( p->rc==SQLITE_OK ){
2307 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2308 int i;
2309
2310 assert( (pFirst->pLeaf==0)==pIter->bEof );
2311
2312 /* Check that pIter->iSwitchRowid is set correctly. */
2313 for(i=0; i<pIter->nSeg; i++){
2314 Fts5SegIter *p1 = &pIter->aSeg[i];
2315 assert( p1==pFirst
2316 || p1->pLeaf==0
2317 || fts5BufferCompare(&pFirst->term, &p1->term)
2318 || p1->iRowid==pIter->iSwitchRowid
2319 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2320 );
2321 }
2322
2323 for(i=0; i<pIter->nSeg; i+=2){
2324 Fts5SegIter *p1 = &pIter->aSeg[i];
2325 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2326 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2327 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2328 }
2329
2330 for(i=1; i<(pIter->nSeg / 2); i+=2){
2331 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2332 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2333 Fts5CResult *pRes = &pIter->aFirst[i];
2334 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2335 }
2336 }
2337 }
2338 #else
2339 # define fts5AssertMultiIterSetup(x,y)
2340 #endif
2341
2342 /*
2343 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2344 **
2345 ** If the returned value is non-zero, then it is the index of an entry
2346 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2347 ** to a key that is a duplicate of another, higher priority,
2348 ** segment-iterator in the pSeg->aSeg[] array.
2349 */
2350 static int fts5MultiIterDoCompare(Fts5IndexIter *pIter, int iOut){
2351 int i1; /* Index of left-hand Fts5SegIter */
2352 int i2; /* Index of right-hand Fts5SegIter */
2353 int iRes;
2354 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2355 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2356 Fts5CResult *pRes = &pIter->aFirst[iOut];
2357
2358 assert( iOut<pIter->nSeg && iOut>0 );
2359 assert( pIter->bRev==0 || pIter->bRev==1 );
2360
2361 if( iOut>=(pIter->nSeg/2) ){
2362 i1 = (iOut - pIter->nSeg/2) * 2;
2363 i2 = i1 + 1;
2364 }else{
2365 i1 = pIter->aFirst[iOut*2].iFirst;
2366 i2 = pIter->aFirst[iOut*2+1].iFirst;
2367 }
2368 p1 = &pIter->aSeg[i1];
2369 p2 = &pIter->aSeg[i2];
2370
2371 pRes->bTermEq = 0;
2372 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
2373 iRes = i2;
2374 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
2375 iRes = i1;
2376 }else{
2377 int res = fts5BufferCompare(&p1->term, &p2->term);
2378 if( res==0 ){
2379 assert( i2>i1 );
2380 assert( i2!=0 );
2381 pRes->bTermEq = 1;
2382 if( p1->iRowid==p2->iRowid ){
2383 p1->bDel = p2->bDel;
2384 return i2;
2385 }
2386 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2387 }
2388 assert( res!=0 );
2389 if( res<0 ){
2390 iRes = i1;
2391 }else{
2392 iRes = i2;
2393 }
2394 }
2395
2396 pRes->iFirst = (u16)iRes;
2397 return 0;
2398 }
2399
2400 /*
2401 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2402 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2403 */
2404 static void fts5SegIterGotoPage(
2405 Fts5Index *p, /* FTS5 backend object */
2406 Fts5SegIter *pIter, /* Iterator to advance */
2407 int iLeafPgno
2408 ){
2409 assert( iLeafPgno>pIter->iLeafPgno );
2410
2411 if( iLeafPgno>pIter->pSeg->pgnoLast ){
2412 p->rc = FTS5_CORRUPT;
2413 }else{
2414 fts5DataRelease(pIter->pNextLeaf);
2415 pIter->pNextLeaf = 0;
2416 pIter->iLeafPgno = iLeafPgno-1;
2417 fts5SegIterNextPage(p, pIter);
2418 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2419
2420 if( p->rc==SQLITE_OK ){
2421 int iOff;
2422 u8 *a = pIter->pLeaf->p;
2423 int n = pIter->pLeaf->szLeaf;
2424
2425 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2426 if( iOff<4 || iOff>=n ){
2427 p->rc = FTS5_CORRUPT;
2428 }else{
2429 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2430 pIter->iLeafOffset = iOff;
2431 fts5SegIterLoadNPos(p, pIter);
2432 }
2433 }
2434 }
2435 }
2436
2437 /*
2438 ** Advance the iterator passed as the second argument until it is at or
2439 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2440 ** always advanced at least once.
2441 */
2442 static void fts5SegIterNextFrom(
2443 Fts5Index *p, /* FTS5 backend object */
2444 Fts5SegIter *pIter, /* Iterator to advance */
2445 i64 iMatch /* Advance iterator at least this far */
2446 ){
2447 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2448 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2449 int iLeafPgno = pIter->iLeafPgno;
2450 int bMove = 1;
2451
2452 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2453 assert( pIter->pDlidx );
2454 assert( pIter->pLeaf );
2455
2456 if( bRev==0 ){
2457 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2458 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2459 fts5DlidxIterNext(p, pDlidx);
2460 }
2461 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2462 if( iLeafPgno>pIter->iLeafPgno ){
2463 fts5SegIterGotoPage(p, pIter, iLeafPgno);
2464 bMove = 0;
2465 }
2466 }else{
2467 assert( pIter->pNextLeaf==0 );
2468 assert( iMatch<pIter->iRowid );
2469 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2470 fts5DlidxIterPrev(p, pDlidx);
2471 }
2472 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2473
2474 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2475
2476 if( iLeafPgno<pIter->iLeafPgno ){
2477 pIter->iLeafPgno = iLeafPgno+1;
2478 fts5SegIterReverseNewPage(p, pIter);
2479 bMove = 0;
2480 }
2481 }
2482
2483 do{
2484 if( bMove ) fts5SegIterNext(p, pIter, 0);
2485 if( pIter->pLeaf==0 ) break;
2486 if( bRev==0 && pIter->iRowid>=iMatch ) break;
2487 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2488 bMove = 1;
2489 }while( p->rc==SQLITE_OK );
2490 }
2491
2492
2493 /*
2494 ** Free the iterator object passed as the second argument.
2495 */
2496 static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
2497 if( pIter ){
2498 int i;
2499 for(i=0; i<pIter->nSeg; i++){
2500 fts5SegIterClear(&pIter->aSeg[i]);
2501 }
2502 fts5StructureRelease(pIter->pStruct);
2503 fts5BufferFree(&pIter->poslist);
2504 sqlite3_free(pIter);
2505 }
2506 }
2507
2508 static void fts5MultiIterAdvanced(
2509 Fts5Index *p, /* FTS5 backend to iterate within */
2510 Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */
2511 int iChanged, /* Index of sub-iterator just advanced */
2512 int iMinset /* Minimum entry in aFirst[] to set */
2513 ){
2514 int i;
2515 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2516 int iEq;
2517 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2518 fts5SegIterNext(p, &pIter->aSeg[iEq], 0);
2519 i = pIter->nSeg + iEq;
2520 }
2521 }
2522 }
2523
2524 /*
2525 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2526 ** points to the same term though - just a different rowid. This function
2527 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2528 ** If it does so successfully, 0 is returned. Otherwise 1.
2529 **
2530 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2531 ** on the iterator instead. That function does the same as this one, except
2532 ** that it deals with more complicated cases as well.
2533 */
2534 static int fts5MultiIterAdvanceRowid(
2535 Fts5Index *p, /* FTS5 backend to iterate within */
2536 Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */
2537 int iChanged /* Index of sub-iterator just advanced */
2538 ){
2539 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2540
2541 if( pNew->iRowid==pIter->iSwitchRowid
2542 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2543 ){
2544 int i;
2545 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2546 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2547 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2548 Fts5CResult *pRes = &pIter->aFirst[i];
2549
2550 assert( pNew->pLeaf );
2551 assert( pRes->bTermEq==0 || pOther->pLeaf );
2552
2553 if( pRes->bTermEq ){
2554 if( pNew->iRowid==pOther->iRowid ){
2555 return 1;
2556 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2557 pIter->iSwitchRowid = pOther->iRowid;
2558 pNew = pOther;
2559 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2560 pIter->iSwitchRowid = pOther->iRowid;
2561 }
2562 }
2563 pRes->iFirst = (u16)(pNew - pIter->aSeg);
2564 if( i==1 ) break;
2565
2566 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2567 }
2568 }
2569
2570 return 0;
2571 }
2572
2573 /*
2574 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2575 */
2576 static void fts5MultiIterSetEof(Fts5IndexIter *pIter){
2577 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2578 pIter->bEof = pSeg->pLeaf==0;
2579 pIter->iSwitchRowid = pSeg->iRowid;
2580 }
2581
2582 /*
2583 ** Move the iterator to the next entry.
2584 **
2585 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2586 ** considered an error if the iterator reaches EOF, or if it is already at
2587 ** EOF when this function is called.
2588 */
2589 static void fts5MultiIterNext(
2590 Fts5Index *p,
2591 Fts5IndexIter *pIter,
2592 int bFrom, /* True if argument iFrom is valid */
2593 i64 iFrom /* Advance at least as far as this */
2594 ){
2595 if( p->rc==SQLITE_OK ){
2596 int bUseFrom = bFrom;
2597 do {
2598 int iFirst = pIter->aFirst[1].iFirst;
2599 int bNewTerm = 0;
2600 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2601 assert( p->rc==SQLITE_OK );
2602 if( bUseFrom && pSeg->pDlidx ){
2603 fts5SegIterNextFrom(p, pSeg, iFrom);
2604 }else{
2605 fts5SegIterNext(p, pSeg, &bNewTerm);
2606 }
2607
2608 if( pSeg->pLeaf==0 || bNewTerm
2609 || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
2610 ){
2611 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2612 fts5MultiIterSetEof(pIter);
2613 }
2614 fts5AssertMultiIterSetup(p, pIter);
2615
2616 bUseFrom = 0;
2617 }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );
2618 }
2619 }
2620
2621 static void fts5MultiIterNext2(
2622 Fts5Index *p,
2623 Fts5IndexIter *pIter,
2624 int *pbNewTerm /* OUT: True if *might* be new term */
2625 ){
2626 assert( pIter->bSkipEmpty );
2627 if( p->rc==SQLITE_OK ){
2628 do {
2629 int iFirst = pIter->aFirst[1].iFirst;
2630 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2631 int bNewTerm = 0;
2632
2633 fts5SegIterNext(p, pSeg, &bNewTerm);
2634 if( pSeg->pLeaf==0 || bNewTerm
2635 || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
2636 ){
2637 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2638 fts5MultiIterSetEof(pIter);
2639 *pbNewTerm = 1;
2640 }else{
2641 *pbNewTerm = 0;
2642 }
2643 fts5AssertMultiIterSetup(p, pIter);
2644
2645 }while( fts5MultiIterIsEmpty(p, pIter) );
2646 }
2647 }
2648
2649
2650 static Fts5IndexIter *fts5MultiIterAlloc(
2651 Fts5Index *p, /* FTS5 backend to iterate within */
2652 int nSeg
2653 ){
2654 Fts5IndexIter *pNew;
2655 int nSlot; /* Power of two >= nSeg */
2656
2657 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2658 pNew = fts5IdxMalloc(p,
2659 sizeof(Fts5IndexIter) + /* pNew */
2660 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
2661 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
2662 );
2663 if( pNew ){
2664 pNew->nSeg = nSlot;
2665 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2666 pNew->pIndex = p;
2667 }
2668 return pNew;
2669 }
2670
2671 /*
2672 ** Allocate a new Fts5IndexIter object.
2673 **
2674 ** The new object will be used to iterate through data in structure pStruct.
2675 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
2676 ** is zero or greater, data from the first nSegment segments on level iLevel
2677 ** is merged.
2678 **
2679 ** The iterator initially points to the first term/rowid entry in the
2680 ** iterated data.
2681 */
2682 static void fts5MultiIterNew(
2683 Fts5Index *p, /* FTS5 backend to iterate within */
2684 Fts5Structure *pStruct, /* Structure of specific index */
2685 int bSkipEmpty, /* True to ignore delete-keys */
2686 int flags, /* FTS5INDEX_QUERY_XXX flags */
2687 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
2688 int iLevel, /* Level to iterate (-1 for all) */
2689 int nSegment, /* Number of segments to merge (iLevel>=0) */
2690 Fts5IndexIter **ppOut /* New object */
2691 ){
2692 int nSeg = 0; /* Number of segment-iters in use */
2693 int iIter = 0; /* */
2694 int iSeg; /* Used to iterate through segments */
2695 Fts5Buffer buf = {0,0,0}; /* Buffer used by fts5SegIterSeekInit() */
2696 Fts5StructureLevel *pLvl;
2697 Fts5IndexIter *pNew;
2698
2699 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
2700
2701 /* Allocate space for the new multi-seg-iterator. */
2702 if( p->rc==SQLITE_OK ){
2703 if( iLevel<0 ){
2704 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
2705 nSeg = pStruct->nSegment;
2706 nSeg += (p->pHash ? 1 : 0);
2707 }else{
2708 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
2709 }
2710 }
2711 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
2712 if( pNew==0 ) return;
2713 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
2714 pNew->bSkipEmpty = (u8)bSkipEmpty;
2715 pNew->pStruct = pStruct;
2716 fts5StructureRef(pStruct);
2717
2718 /* Initialize each of the component segment iterators. */
2719 if( iLevel<0 ){
2720 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
2721 if( p->pHash ){
2722 /* Add a segment iterator for the current contents of the hash table. */
2723 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
2724 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
2725 }
2726 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
2727 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
2728 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
2729 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
2730 if( pTerm==0 ){
2731 fts5SegIterInit(p, pSeg, pIter);
2732 }else{
2733 fts5SegIterSeekInit(p, &buf, pTerm, nTerm, flags, pSeg, pIter);
2734 }
2735 }
2736 }
2737 }else{
2738 pLvl = &pStruct->aLevel[iLevel];
2739 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
2740 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
2741 }
2742 }
2743 assert( iIter==nSeg );
2744
2745 /* If the above was successful, each component iterators now points
2746 ** to the first entry in its segment. In this case initialize the
2747 ** aFirst[] array. Or, if an error has occurred, free the iterator
2748 ** object and set the output variable to NULL. */
2749 if( p->rc==SQLITE_OK ){
2750 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
2751 int iEq;
2752 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
2753 fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
2754 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
2755 }
2756 }
2757 fts5MultiIterSetEof(pNew);
2758 fts5AssertMultiIterSetup(p, pNew);
2759
2760 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
2761 fts5MultiIterNext(p, pNew, 0, 0);
2762 }
2763 }else{
2764 fts5MultiIterFree(p, pNew);
2765 *ppOut = 0;
2766 }
2767 fts5BufferFree(&buf);
2768 }
2769
2770 /*
2771 ** Create an Fts5IndexIter that iterates through the doclist provided
2772 ** as the second argument.
2773 */
2774 static void fts5MultiIterNew2(
2775 Fts5Index *p, /* FTS5 backend to iterate within */
2776 Fts5Data *pData, /* Doclist to iterate through */
2777 int bDesc, /* True for descending rowid order */
2778 Fts5IndexIter **ppOut /* New object */
2779 ){
2780 Fts5IndexIter *pNew;
2781 pNew = fts5MultiIterAlloc(p, 2);
2782 if( pNew ){
2783 Fts5SegIter *pIter = &pNew->aSeg[1];
2784
2785 pNew->bFiltered = 1;
2786 pIter->flags = FTS5_SEGITER_ONETERM;
2787 if( pData->szLeaf>0 ){
2788 pIter->pLeaf = pData;
2789 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
2790 pIter->iEndofDoclist = pData->nn;
2791 pNew->aFirst[1].iFirst = 1;
2792 if( bDesc ){
2793 pNew->bRev = 1;
2794 pIter->flags |= FTS5_SEGITER_REVERSE;
2795 fts5SegIterReverseInitPage(p, pIter);
2796 }else{
2797 fts5SegIterLoadNPos(p, pIter);
2798 }
2799 pData = 0;
2800 }else{
2801 pNew->bEof = 1;
2802 }
2803
2804 *ppOut = pNew;
2805 }
2806
2807 fts5DataRelease(pData);
2808 }
2809
2810 /*
2811 ** Return true if the iterator is at EOF or if an error has occurred.
2812 ** False otherwise.
2813 */
2814 static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){
2815 assert( p->rc
2816 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof
2817 );
2818 return (p->rc || pIter->bEof);
2819 }
2820
2821 /*
2822 ** Return the rowid of the entry that the iterator currently points
2823 ** to. If the iterator points to EOF when this function is called the
2824 ** results are undefined.
2825 */
2826 static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){
2827 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
2828 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
2829 }
2830
2831 /*
2832 ** Move the iterator to the next entry at or following iMatch.
2833 */
2834 static void fts5MultiIterNextFrom(
2835 Fts5Index *p,
2836 Fts5IndexIter *pIter,
2837 i64 iMatch
2838 ){
2839 while( 1 ){
2840 i64 iRowid;
2841 fts5MultiIterNext(p, pIter, 1, iMatch);
2842 if( fts5MultiIterEof(p, pIter) ) break;
2843 iRowid = fts5MultiIterRowid(pIter);
2844 if( pIter->bRev==0 && iRowid>=iMatch ) break;
2845 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
2846 }
2847 }
2848
2849 /*
2850 ** Return a pointer to a buffer containing the term associated with the
2851 ** entry that the iterator currently points to.
2852 */
2853 static const u8 *fts5MultiIterTerm(Fts5IndexIter *pIter, int *pn){
2854 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2855 *pn = p->term.n;
2856 return p->term.p;
2857 }
2858
2859 static void fts5ChunkIterate(
2860 Fts5Index *p, /* Index object */
2861 Fts5SegIter *pSeg, /* Poslist of this iterator */
2862 void *pCtx, /* Context pointer for xChunk callback */
2863 void (*xChunk)(Fts5Index*, void*, const u8*, int)
2864 ){
2865 int nRem = pSeg->nPos; /* Number of bytes still to come */
2866 Fts5Data *pData = 0;
2867 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
2868 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
2869 int pgno = pSeg->iLeafPgno;
2870 int pgnoSave = 0;
2871
2872 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
2873 pgnoSave = pgno+1;
2874 }
2875
2876 while( 1 ){
2877 xChunk(p, pCtx, pChunk, nChunk);
2878 nRem -= nChunk;
2879 fts5DataRelease(pData);
2880 if( nRem<=0 ){
2881 break;
2882 }else{
2883 pgno++;
2884 pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
2885 if( pData==0 ) break;
2886 pChunk = &pData->p[4];
2887 nChunk = MIN(nRem, pData->szLeaf - 4);
2888 if( pgno==pgnoSave ){
2889 assert( pSeg->pNextLeaf==0 );
2890 pSeg->pNextLeaf = pData;
2891 pData = 0;
2892 }
2893 }
2894 }
2895 }
2896
2897
2898
2899 /*
2900 ** Allocate a new segment-id for the structure pStruct. The new segment
2901 ** id must be between 1 and 65335 inclusive, and must not be used by
2902 ** any currently existing segment. If a free segment id cannot be found,
2903 ** SQLITE_FULL is returned.
2904 **
2905 ** If an error has already occurred, this function is a no-op. 0 is
2906 ** returned in this case.
2907 */
2908 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
2909 int iSegid = 0;
2910
2911 if( p->rc==SQLITE_OK ){
2912 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
2913 p->rc = SQLITE_FULL;
2914 }else{
2915 while( iSegid==0 ){
2916 int iLvl, iSeg;
2917 sqlite3_randomness(sizeof(u32), (void*)&iSegid);
2918 iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1);
2919 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
2920 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
2921 if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
2922 iSegid = 0;
2923 }
2924 }
2925 }
2926 }
2927 }
2928 }
2929
2930 return iSegid;
2931 }
2932
2933 /*
2934 ** Discard all data currently cached in the hash-tables.
2935 */
2936 static void fts5IndexDiscardData(Fts5Index *p){
2937 assert( p->pHash || p->nPendingData==0 );
2938 if( p->pHash ){
2939 sqlite3Fts5HashClear(p->pHash);
2940 p->nPendingData = 0;
2941 }
2942 }
2943
2944 /*
2945 ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
2946 ** with buffer (nOld/pOld).
2947 */
2948 static int fts5PrefixCompress(
2949 int nOld, const u8 *pOld,
2950 int nNew, const u8 *pNew
2951 ){
2952 int i;
2953 assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
2954 for(i=0; i<nOld; i++){
2955 if( pOld[i]!=pNew[i] ) break;
2956 }
2957 return i;
2958 }
2959
2960 static void fts5WriteDlidxClear(
2961 Fts5Index *p,
2962 Fts5SegWriter *pWriter,
2963 int bFlush /* If true, write dlidx to disk */
2964 ){
2965 int i;
2966 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
2967 for(i=0; i<pWriter->nDlidx; i++){
2968 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
2969 if( pDlidx->buf.n==0 ) break;
2970 if( bFlush ){
2971 assert( pDlidx->pgno!=0 );
2972 fts5DataWrite(p,
2973 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
2974 pDlidx->buf.p, pDlidx->buf.n
2975 );
2976 }
2977 sqlite3Fts5BufferZero(&pDlidx->buf);
2978 pDlidx->bPrevValid = 0;
2979 }
2980 }
2981
2982 /*
2983 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
2984 ** Any new array elements are zeroed before returning.
2985 */
2986 static int fts5WriteDlidxGrow(
2987 Fts5Index *p,
2988 Fts5SegWriter *pWriter,
2989 int nLvl
2990 ){
2991 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
2992 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
2993 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
2994 );
2995 if( aDlidx==0 ){
2996 p->rc = SQLITE_NOMEM;
2997 }else{
2998 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
2999 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3000 pWriter->aDlidx = aDlidx;
3001 pWriter->nDlidx = nLvl;
3002 }
3003 }
3004 return p->rc;
3005 }
3006
3007 /*
3008 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3009 ** enough, flush it to disk and return 1. Otherwise discard it and return
3010 ** zero.
3011 */
3012 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3013 int bFlag = 0;
3014
3015 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3016 ** to the database, also write the doclist-index to disk. */
3017 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3018 bFlag = 1;
3019 }
3020 fts5WriteDlidxClear(p, pWriter, bFlag);
3021 pWriter->nEmpty = 0;
3022 return bFlag;
3023 }
3024
3025 /*
3026 ** This function is called whenever processing of the doclist for the
3027 ** last term on leaf page (pWriter->iBtPage) is completed.
3028 **
3029 ** The doclist-index for that term is currently stored in-memory within the
3030 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3031 ** writes it out to disk. Or, if it is too small to bother with, discards
3032 ** it.
3033 **
3034 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3035 */
3036 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3037 int bFlag;
3038
3039 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3040 if( pWriter->iBtPage==0 ) return;
3041 bFlag = fts5WriteFlushDlidx(p, pWriter);
3042
3043 if( p->rc==SQLITE_OK ){
3044 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3045 /* The following was already done in fts5WriteInit(): */
3046 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3047 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3048 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3049 sqlite3_step(p->pIdxWriter);
3050 p->rc = sqlite3_reset(p->pIdxWriter);
3051 }
3052 pWriter->iBtPage = 0;
3053 }
3054
3055 /*
3056 ** This is called once for each leaf page except the first that contains
3057 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3058 ** is larger than all terms written to earlier leaves, and equal to or
3059 ** smaller than the first term on the new leaf.
3060 **
3061 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3062 ** has already occurred when this function is called, it is a no-op.
3063 */
3064 static void fts5WriteBtreeTerm(
3065 Fts5Index *p, /* FTS5 backend object */
3066 Fts5SegWriter *pWriter, /* Writer object */
3067 int nTerm, const u8 *pTerm /* First term on new page */
3068 ){
3069 fts5WriteFlushBtree(p, pWriter);
3070 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3071 pWriter->iBtPage = pWriter->writer.pgno;
3072 }
3073
3074 /*
3075 ** This function is called when flushing a leaf page that contains no
3076 ** terms at all to disk.
3077 */
3078 static void fts5WriteBtreeNoTerm(
3079 Fts5Index *p, /* FTS5 backend object */
3080 Fts5SegWriter *pWriter /* Writer object */
3081 ){
3082 /* If there were no rowids on the leaf page either and the doclist-index
3083 ** has already been started, append an 0x00 byte to it. */
3084 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3085 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3086 assert( pDlidx->bPrevValid );
3087 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3088 }
3089
3090 /* Increment the "number of sequential leaves without a term" counter. */
3091 pWriter->nEmpty++;
3092 }
3093
3094 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3095 i64 iRowid;
3096 int iOff;
3097
3098 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3099 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3100 return iRowid;
3101 }
3102
3103 /*
3104 ** Rowid iRowid has just been appended to the current leaf page. It is the
3105 ** first on the page. This function appends an appropriate entry to the current
3106 ** doclist-index.
3107 */
3108 static void fts5WriteDlidxAppend(
3109 Fts5Index *p,
3110 Fts5SegWriter *pWriter,
3111 i64 iRowid
3112 ){
3113 int i;
3114 int bDone = 0;
3115
3116 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3117 i64 iVal;
3118 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3119
3120 if( pDlidx->buf.n>=p->pConfig->pgsz ){
3121 /* The current doclist-index page is full. Write it to disk and push
3122 ** a copy of iRowid (which will become the first rowid on the next
3123 ** doclist-index leaf page) up into the next level of the b-tree
3124 ** hierarchy. If the node being flushed is currently the root node,
3125 ** also push its first rowid upwards. */
3126 pDlidx->buf.p[0] = 0x01; /* Not the root node */
3127 fts5DataWrite(p,
3128 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3129 pDlidx->buf.p, pDlidx->buf.n
3130 );
3131 fts5WriteDlidxGrow(p, pWriter, i+2);
3132 pDlidx = &pWriter->aDlidx[i];
3133 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3134 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3135
3136 /* This was the root node. Push its first rowid up to the new root. */
3137 pDlidx[1].pgno = pDlidx->pgno;
3138 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3139 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3140 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3141 pDlidx[1].bPrevValid = 1;
3142 pDlidx[1].iPrev = iFirst;
3143 }
3144
3145 sqlite3Fts5BufferZero(&pDlidx->buf);
3146 pDlidx->bPrevValid = 0;
3147 pDlidx->pgno++;
3148 }else{
3149 bDone = 1;
3150 }
3151
3152 if( pDlidx->bPrevValid ){
3153 iVal = iRowid - pDlidx->iPrev;
3154 }else{
3155 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3156 assert( pDlidx->buf.n==0 );
3157 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3158 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3159 iVal = iRowid;
3160 }
3161
3162 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3163 pDlidx->bPrevValid = 1;
3164 pDlidx->iPrev = iRowid;
3165 }
3166 }
3167
3168 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3169 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3170 Fts5PageWriter *pPage = &pWriter->writer;
3171 i64 iRowid;
3172
3173 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3174
3175 /* Set the szLeaf header field. */
3176 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3177 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3178
3179 if( pWriter->bFirstTermInPage ){
3180 /* No term was written to this page. */
3181 assert( pPage->pgidx.n==0 );
3182 fts5WriteBtreeNoTerm(p, pWriter);
3183 }else{
3184 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3185 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3186 }
3187
3188 /* Write the page out to disk */
3189 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3190 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3191
3192 /* Initialize the next page. */
3193 fts5BufferZero(&pPage->buf);
3194 fts5BufferZero(&pPage->pgidx);
3195 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3196 pPage->iPrevPgidx = 0;
3197 pPage->pgno++;
3198
3199 /* Increase the leaves written counter */
3200 pWriter->nLeafWritten++;
3201
3202 /* The new leaf holds no terms or rowids */
3203 pWriter->bFirstTermInPage = 1;
3204 pWriter->bFirstRowidInPage = 1;
3205 }
3206
3207 /*
3208 ** Append term pTerm/nTerm to the segment being written by the writer passed
3209 ** as the second argument.
3210 **
3211 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3212 ** already occurred, this function is a no-op.
3213 */
3214 static void fts5WriteAppendTerm(
3215 Fts5Index *p,
3216 Fts5SegWriter *pWriter,
3217 int nTerm, const u8 *pTerm
3218 ){
3219 int nPrefix; /* Bytes of prefix compression for term */
3220 Fts5PageWriter *pPage = &pWriter->writer;
3221 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3222
3223 assert( p->rc==SQLITE_OK );
3224 assert( pPage->buf.n>=4 );
3225 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3226
3227 /* If the current leaf page is full, flush it to disk. */
3228 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
3229 if( pPage->buf.n>4 ){
3230 fts5WriteFlushLeaf(p, pWriter);
3231 }
3232 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
3233 }
3234
3235 /* TODO1: Updating pgidx here. */
3236 pPgidx->n += sqlite3Fts5PutVarint(
3237 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
3238 );
3239 pPage->iPrevPgidx = pPage->buf.n;
3240 #if 0
3241 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
3242 pPgidx->n += 2;
3243 #endif
3244
3245 if( pWriter->bFirstTermInPage ){
3246 nPrefix = 0;
3247 if( pPage->pgno!=1 ){
3248 /* This is the first term on a leaf that is not the leftmost leaf in
3249 ** the segment b-tree. In this case it is necessary to add a term to
3250 ** the b-tree hierarchy that is (a) larger than the largest term
3251 ** already written to the segment and (b) smaller than or equal to
3252 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
3253 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
3254 ** previous term.
3255 **
3256 ** Usually, the previous term is available in pPage->term. The exception
3257 ** is if this is the first term written in an incremental-merge step.
3258 ** In this case the previous term is not available, so just write a
3259 ** copy of (pTerm/nTerm) into the parent node. This is slightly
3260 ** inefficient, but still correct. */
3261 int n = nTerm;
3262 if( pPage->term.n ){
3263 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
3264 }
3265 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
3266 pPage = &pWriter->writer;
3267 }
3268 }else{
3269 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
3270 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
3271 }
3272
3273 /* Append the number of bytes of new data, then the term data itself
3274 ** to the page. */
3275 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
3276 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
3277
3278 /* Update the Fts5PageWriter.term field. */
3279 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
3280 pWriter->bFirstTermInPage = 0;
3281
3282 pWriter->bFirstRowidInPage = 0;
3283 pWriter->bFirstRowidInDoclist = 1;
3284
3285 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
3286 pWriter->aDlidx[0].pgno = pPage->pgno;
3287 }
3288
3289 /*
3290 ** Append a rowid and position-list size field to the writers output.
3291 */
3292 static void fts5WriteAppendRowid(
3293 Fts5Index *p,
3294 Fts5SegWriter *pWriter,
3295 i64 iRowid,
3296 int nPos
3297 ){
3298 if( p->rc==SQLITE_OK ){
3299 Fts5PageWriter *pPage = &pWriter->writer;
3300
3301 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
3302 fts5WriteFlushLeaf(p, pWriter);
3303 }
3304
3305 /* If this is to be the first rowid written to the page, set the
3306 ** rowid-pointer in the page-header. Also append a value to the dlidx
3307 ** buffer, in case a doclist-index is required. */
3308 if( pWriter->bFirstRowidInPage ){
3309 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
3310 fts5WriteDlidxAppend(p, pWriter, iRowid);
3311 }
3312
3313 /* Write the rowid. */
3314 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
3315 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
3316 }else{
3317 assert( p->rc || iRowid>pWriter->iPrevRowid );
3318 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
3319 }
3320 pWriter->iPrevRowid = iRowid;
3321 pWriter->bFirstRowidInDoclist = 0;
3322 pWriter->bFirstRowidInPage = 0;
3323
3324 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
3325 }
3326 }
3327
3328 static void fts5WriteAppendPoslistData(
3329 Fts5Index *p,
3330 Fts5SegWriter *pWriter,
3331 const u8 *aData,
3332 int nData
3333 ){
3334 Fts5PageWriter *pPage = &pWriter->writer;
3335 const u8 *a = aData;
3336 int n = nData;
3337
3338 assert( p->pConfig->pgsz>0 );
3339 while( p->rc==SQLITE_OK
3340 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
3341 ){
3342 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
3343 int nCopy = 0;
3344 while( nCopy<nReq ){
3345 i64 dummy;
3346 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
3347 }
3348 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
3349 a += nCopy;
3350 n -= nCopy;
3351 fts5WriteFlushLeaf(p, pWriter);
3352 }
3353 if( n>0 ){
3354 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
3355 }
3356 }
3357
3358 /*
3359 ** Flush any data cached by the writer object to the database. Free any
3360 ** allocations associated with the writer.
3361 */
3362 static void fts5WriteFinish(
3363 Fts5Index *p,
3364 Fts5SegWriter *pWriter, /* Writer object */
3365 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
3366 ){
3367 int i;
3368 Fts5PageWriter *pLeaf = &pWriter->writer;
3369 if( p->rc==SQLITE_OK ){
3370 assert( pLeaf->pgno>=1 );
3371 if( pLeaf->buf.n>4 ){
3372 fts5WriteFlushLeaf(p, pWriter);
3373 }
3374 *pnLeaf = pLeaf->pgno-1;
3375 fts5WriteFlushBtree(p, pWriter);
3376 }
3377 fts5BufferFree(&pLeaf->term);
3378 fts5BufferFree(&pLeaf->buf);
3379 fts5BufferFree(&pLeaf->pgidx);
3380 fts5BufferFree(&pWriter->btterm);
3381
3382 for(i=0; i<pWriter->nDlidx; i++){
3383 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
3384 }
3385 sqlite3_free(pWriter->aDlidx);
3386 }
3387
3388 static void fts5WriteInit(
3389 Fts5Index *p,
3390 Fts5SegWriter *pWriter,
3391 int iSegid
3392 ){
3393 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
3394
3395 memset(pWriter, 0, sizeof(Fts5SegWriter));
3396 pWriter->iSegid = iSegid;
3397
3398 fts5WriteDlidxGrow(p, pWriter, 1);
3399 pWriter->writer.pgno = 1;
3400 pWriter->bFirstTermInPage = 1;
3401 pWriter->iBtPage = 1;
3402
3403 assert( pWriter->writer.buf.n==0 );
3404 assert( pWriter->writer.pgidx.n==0 );
3405
3406 /* Grow the two buffers to pgsz + padding bytes in size. */
3407 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
3408 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
3409
3410 if( p->pIdxWriter==0 ){
3411 Fts5Config *pConfig = p->pConfig;
3412 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
3413 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
3414 pConfig->zDb, pConfig->zName
3415 ));
3416 }
3417
3418 if( p->rc==SQLITE_OK ){
3419 /* Initialize the 4-byte leaf-page header to 0x00. */
3420 memset(pWriter->writer.buf.p, 0, 4);
3421 pWriter->writer.buf.n = 4;
3422
3423 /* Bind the current output segment id to the index-writer. This is an
3424 ** optimization over binding the same value over and over as rows are
3425 ** inserted into %_idx by the current writer. */
3426 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
3427 }
3428 }
3429
3430 /*
3431 ** Iterator pIter was used to iterate through the input segments of on an
3432 ** incremental merge operation. This function is called if the incremental
3433 ** merge step has finished but the input has not been completely exhausted.
3434 */
3435 static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
3436 int i;
3437 Fts5Buffer buf;
3438 memset(&buf, 0, sizeof(Fts5Buffer));
3439 for(i=0; i<pIter->nSeg; i++){
3440 Fts5SegIter *pSeg = &pIter->aSeg[i];
3441 if( pSeg->pSeg==0 ){
3442 /* no-op */
3443 }else if( pSeg->pLeaf==0 ){
3444 /* All keys from this input segment have been transfered to the output.
3445 ** Set both the first and last page-numbers to 0 to indicate that the
3446 ** segment is now empty. */
3447 pSeg->pSeg->pgnoLast = 0;
3448 pSeg->pSeg->pgnoFirst = 0;
3449 }else{
3450 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
3451 i64 iLeafRowid;
3452 Fts5Data *pData;
3453 int iId = pSeg->pSeg->iSegid;
3454 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
3455
3456 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
3457 pData = fts5DataRead(p, iLeafRowid);
3458 if( pData ){
3459 fts5BufferZero(&buf);
3460 fts5BufferGrow(&p->rc, &buf, pData->nn);
3461 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
3462 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
3463 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
3464 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
3465 if( p->rc==SQLITE_OK ){
3466 /* Set the szLeaf field */
3467 fts5PutU16(&buf.p[2], (u16)buf.n);
3468 }
3469
3470 /* Set up the new page-index array */
3471 fts5BufferAppendVarint(&p->rc, &buf, 4);
3472 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
3473 && pSeg->iEndofDoclist<pData->szLeaf
3474 ){
3475 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
3476 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
3477 fts5BufferAppendBlob(&p->rc, &buf,
3478 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
3479 );
3480 }
3481
3482 fts5DataRelease(pData);
3483 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
3484 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
3485 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
3486 }
3487 }
3488 }
3489 fts5BufferFree(&buf);
3490 }
3491
3492 static void fts5MergeChunkCallback(
3493 Fts5Index *p,
3494 void *pCtx,
3495 const u8 *pChunk, int nChunk
3496 ){
3497 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
3498 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
3499 }
3500
3501 /*
3502 **
3503 */
3504 static void fts5IndexMergeLevel(
3505 Fts5Index *p, /* FTS5 backend object */
3506 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
3507 int iLvl, /* Level to read input from */
3508 int *pnRem /* Write up to this many output leaves */
3509 ){
3510 Fts5Structure *pStruct = *ppStruct;
3511 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
3512 Fts5StructureLevel *pLvlOut;
3513 Fts5IndexIter *pIter = 0; /* Iterator to read input data */
3514 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
3515 int nInput; /* Number of input segments */
3516 Fts5SegWriter writer; /* Writer object */
3517 Fts5StructureSegment *pSeg; /* Output segment */
3518 Fts5Buffer term;
3519 int bOldest; /* True if the output segment is the oldest */
3520
3521 assert( iLvl<pStruct->nLevel );
3522 assert( pLvl->nMerge<=pLvl->nSeg );
3523
3524 memset(&writer, 0, sizeof(Fts5SegWriter));
3525 memset(&term, 0, sizeof(Fts5Buffer));
3526 if( pLvl->nMerge ){
3527 pLvlOut = &pStruct->aLevel[iLvl+1];
3528 assert( pLvlOut->nSeg>0 );
3529 nInput = pLvl->nMerge;
3530 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
3531
3532 fts5WriteInit(p, &writer, pSeg->iSegid);
3533 writer.writer.pgno = pSeg->pgnoLast+1;
3534 writer.iBtPage = 0;
3535 }else{
3536 int iSegid = fts5AllocateSegid(p, pStruct);
3537
3538 /* Extend the Fts5Structure object as required to ensure the output
3539 ** segment exists. */
3540 if( iLvl==pStruct->nLevel-1 ){
3541 fts5StructureAddLevel(&p->rc, ppStruct);
3542 pStruct = *ppStruct;
3543 }
3544 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
3545 if( p->rc ) return;
3546 pLvl = &pStruct->aLevel[iLvl];
3547 pLvlOut = &pStruct->aLevel[iLvl+1];
3548
3549 fts5WriteInit(p, &writer, iSegid);
3550
3551 /* Add the new segment to the output level */
3552 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
3553 pLvlOut->nSeg++;
3554 pSeg->pgnoFirst = 1;
3555 pSeg->iSegid = iSegid;
3556 pStruct->nSegment++;
3557
3558 /* Read input from all segments in the input level */
3559 nInput = pLvl->nSeg;
3560 }
3561 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
3562
3563 assert( iLvl>=0 );
3564 for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter);
3565 fts5MultiIterEof(p, pIter)==0;
3566 fts5MultiIterNext(p, pIter, 0, 0)
3567 ){
3568 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3569 int nPos; /* position-list size field value */
3570 int nTerm;
3571 const u8 *pTerm;
3572
3573 /* Check for key annihilation. */
3574 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
3575
3576 pTerm = fts5MultiIterTerm(pIter, &nTerm);
3577 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
3578 if( pnRem && writer.nLeafWritten>nRem ){
3579 break;
3580 }
3581
3582 /* This is a new term. Append a term to the output segment. */
3583 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
3584 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
3585 }
3586
3587 /* Append the rowid to the output */
3588 /* WRITEPOSLISTSIZE */
3589 nPos = pSegIter->nPos*2 + pSegIter->bDel;
3590 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);
3591
3592 /* Append the position-list data to the output */
3593 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
3594 }
3595
3596 /* Flush the last leaf page to disk. Set the output segment b-tree height
3597 ** and last leaf page number at the same time. */
3598 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
3599
3600 if( fts5MultiIterEof(p, pIter) ){
3601 int i;
3602
3603 /* Remove the redundant segments from the %_data table */
3604 for(i=0; i<nInput; i++){
3605 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
3606 }
3607
3608 /* Remove the redundant segments from the input level */
3609 if( pLvl->nSeg!=nInput ){
3610 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
3611 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
3612 }
3613 pStruct->nSegment -= nInput;
3614 pLvl->nSeg -= nInput;
3615 pLvl->nMerge = 0;
3616 if( pSeg->pgnoLast==0 ){
3617 pLvlOut->nSeg--;
3618 pStruct->nSegment--;
3619 }
3620 }else{
3621 assert( pSeg->pgnoLast>0 );
3622 fts5TrimSegments(p, pIter);
3623 pLvl->nMerge = nInput;
3624 }
3625
3626 fts5MultiIterFree(p, pIter);
3627 fts5BufferFree(&term);
3628 if( pnRem ) *pnRem -= writer.nLeafWritten;
3629 }
3630
3631 /*
3632 ** Do up to nPg pages of automerge work on the index.
3633 */
3634 static void fts5IndexMerge(
3635 Fts5Index *p, /* FTS5 backend object */
3636 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
3637 int nPg /* Pages of work to do */
3638 ){
3639 int nRem = nPg;
3640 Fts5Structure *pStruct = *ppStruct;
3641 while( nRem>0 && p->rc==SQLITE_OK ){
3642 int iLvl; /* To iterate through levels */
3643 int iBestLvl = 0; /* Level offering the most input segments */
3644 int nBest = 0; /* Number of input segments on best level */
3645
3646 /* Set iBestLvl to the level to read input segments from. */
3647 assert( pStruct->nLevel>0 );
3648 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3649 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
3650 if( pLvl->nMerge ){
3651 if( pLvl->nMerge>nBest ){
3652 iBestLvl = iLvl;
3653 nBest = pLvl->nMerge;
3654 }
3655 break;
3656 }
3657 if( pLvl->nSeg>nBest ){
3658 nBest = pLvl->nSeg;
3659 iBestLvl = iLvl;
3660 }
3661 }
3662
3663 /* If nBest is still 0, then the index must be empty. */
3664 #ifdef SQLITE_DEBUG
3665 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
3666 assert( pStruct->aLevel[iLvl].nSeg==0 );
3667 }
3668 #endif
3669
3670 if( nBest<p->pConfig->nAutomerge
3671 && pStruct->aLevel[iBestLvl].nMerge==0
3672 ){
3673 break;
3674 }
3675 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
3676 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
3677 fts5StructurePromote(p, iBestLvl+1, pStruct);
3678 }
3679 }
3680 *ppStruct = pStruct;
3681 }
3682
3683 /*
3684 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
3685 ** segment. This function updates the write-counter accordingly and, if
3686 ** necessary, performs incremental merge work.
3687 **
3688 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3689 ** already occurred, this function is a no-op.
3690 */
3691 static void fts5IndexAutomerge(
3692 Fts5Index *p, /* FTS5 backend object */
3693 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
3694 int nLeaf /* Number of output leaves just written */
3695 ){
3696 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
3697 Fts5Structure *pStruct = *ppStruct;
3698 u64 nWrite; /* Initial value of write-counter */
3699 int nWork; /* Number of work-quanta to perform */
3700 int nRem; /* Number of leaf pages left to write */
3701
3702 /* Update the write-counter. While doing so, set nWork. */
3703 nWrite = pStruct->nWriteCounter;
3704 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
3705 pStruct->nWriteCounter += nLeaf;
3706 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
3707
3708 fts5IndexMerge(p, ppStruct, nRem);
3709 }
3710 }
3711
3712 static void fts5IndexCrisismerge(
3713 Fts5Index *p, /* FTS5 backend object */
3714 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
3715 ){
3716 const int nCrisis = p->pConfig->nCrisisMerge;
3717 Fts5Structure *pStruct = *ppStruct;
3718 int iLvl = 0;
3719
3720 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
3721 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
3722 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
3723 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
3724 fts5StructurePromote(p, iLvl+1, pStruct);
3725 iLvl++;
3726 }
3727 *ppStruct = pStruct;
3728 }
3729
3730 static int fts5IndexReturn(Fts5Index *p){
3731 int rc = p->rc;
3732 p->rc = SQLITE_OK;
3733 return rc;
3734 }
3735
3736 typedef struct Fts5FlushCtx Fts5FlushCtx;
3737 struct Fts5FlushCtx {
3738 Fts5Index *pIdx;
3739 Fts5SegWriter writer;
3740 };
3741
3742 /*
3743 ** Buffer aBuf[] contains a list of varints, all small enough to fit
3744 ** in a 32-bit integer. Return the size of the largest prefix of this
3745 ** list nMax bytes or less in size.
3746 */
3747 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
3748 int ret;
3749 u32 dummy;
3750 ret = fts5GetVarint32(aBuf, dummy);
3751 if( ret<nMax ){
3752 while( 1 ){
3753 int i = fts5GetVarint32(&aBuf[ret], dummy);
3754 if( (ret + i) > nMax ) break;
3755 ret += i;
3756 }
3757 }
3758 return ret;
3759 }
3760
3761 /*
3762 ** Flush the contents of in-memory hash table iHash to a new level-0
3763 ** segment on disk. Also update the corresponding structure record.
3764 **
3765 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3766 ** already occurred, this function is a no-op.
3767 */
3768 static void fts5FlushOneHash(Fts5Index *p){
3769 Fts5Hash *pHash = p->pHash;
3770 Fts5Structure *pStruct;
3771 int iSegid;
3772 int pgnoLast = 0; /* Last leaf page number in segment */
3773
3774 /* Obtain a reference to the index structure and allocate a new segment-id
3775 ** for the new level-0 segment. */
3776 pStruct = fts5StructureRead(p);
3777 iSegid = fts5AllocateSegid(p, pStruct);
3778
3779 if( iSegid ){
3780 const int pgsz = p->pConfig->pgsz;
3781
3782 Fts5StructureSegment *pSeg; /* New segment within pStruct */
3783 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
3784 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
3785
3786 Fts5SegWriter writer;
3787 fts5WriteInit(p, &writer, iSegid);
3788
3789 pBuf = &writer.writer.buf;
3790 pPgidx = &writer.writer.pgidx;
3791
3792 /* fts5WriteInit() should have initialized the buffers to (most likely)
3793 ** the maximum space required. */
3794 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
3795 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
3796
3797 /* Begin scanning through hash table entries. This loop runs once for each
3798 ** term/doclist currently stored within the hash table. */
3799 if( p->rc==SQLITE_OK ){
3800 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
3801 }
3802 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
3803 const char *zTerm; /* Buffer containing term */
3804 const u8 *pDoclist; /* Pointer to doclist for this term */
3805 int nDoclist; /* Size of doclist in bytes */
3806
3807 /* Write the term for this entry to disk. */
3808 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
3809 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
3810
3811 assert( writer.bFirstRowidInPage==0 );
3812 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
3813 /* The entire doclist will fit on the current leaf. */
3814 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
3815 }else{
3816 i64 iRowid = 0;
3817 i64 iDelta = 0;
3818 int iOff = 0;
3819
3820 /* The entire doclist will not fit on this leaf. The following
3821 ** loop iterates through the poslists that make up the current
3822 ** doclist. */
3823 while( p->rc==SQLITE_OK && iOff<nDoclist ){
3824 int nPos;
3825 int nCopy;
3826 int bDummy;
3827 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
3828 nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
3829 nCopy += nPos;
3830 iRowid += iDelta;
3831
3832 if( writer.bFirstRowidInPage ){
3833 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
3834 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
3835 writer.bFirstRowidInPage = 0;
3836 fts5WriteDlidxAppend(p, &writer, iRowid);
3837 }else{
3838 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
3839 }
3840 assert( pBuf->n<=pBuf->nSpace );
3841
3842 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
3843 /* The entire poslist will fit on the current leaf. So copy
3844 ** it in one go. */
3845 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
3846 }else{
3847 /* The entire poslist will not fit on this leaf. So it needs
3848 ** to be broken into sections. The only qualification being
3849 ** that each varint must be stored contiguously. */
3850 const u8 *pPoslist = &pDoclist[iOff];
3851 int iPos = 0;
3852 while( p->rc==SQLITE_OK ){
3853 int nSpace = pgsz - pBuf->n - pPgidx->n;
3854 int n = 0;
3855 if( (nCopy - iPos)<=nSpace ){
3856 n = nCopy - iPos;
3857 }else{
3858 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
3859 }
3860 assert( n>0 );
3861 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
3862 iPos += n;
3863 if( (pBuf->n + pPgidx->n)>=pgsz ){
3864 fts5WriteFlushLeaf(p, &writer);
3865 }
3866 if( iPos>=nCopy ) break;
3867 }
3868 }
3869 iOff += nCopy;
3870 }
3871 }
3872
3873 /* TODO2: Doclist terminator written here. */
3874 /* pBuf->p[pBuf->n++] = '\0'; */
3875 assert( pBuf->n<=pBuf->nSpace );
3876 sqlite3Fts5HashScanNext(pHash);
3877 }
3878 sqlite3Fts5HashClear(pHash);
3879 fts5WriteFinish(p, &writer, &pgnoLast);
3880
3881 /* Update the Fts5Structure. It is written back to the database by the
3882 ** fts5StructureRelease() call below. */
3883 if( pStruct->nLevel==0 ){
3884 fts5StructureAddLevel(&p->rc, &pStruct);
3885 }
3886 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
3887 if( p->rc==SQLITE_OK ){
3888 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
3889 pSeg->iSegid = iSegid;
3890 pSeg->pgnoFirst = 1;
3891 pSeg->pgnoLast = pgnoLast;
3892 pStruct->nSegment++;
3893 }
3894 fts5StructurePromote(p, 0, pStruct);
3895 }
3896
3897 fts5IndexAutomerge(p, &pStruct, pgnoLast);
3898 fts5IndexCrisismerge(p, &pStruct);
3899 fts5StructureWrite(p, pStruct);
3900 fts5StructureRelease(pStruct);
3901 }
3902
3903 /*
3904 ** Flush any data stored in the in-memory hash tables to the database.
3905 */
3906 static void fts5IndexFlush(Fts5Index *p){
3907 /* Unless it is empty, flush the hash table to disk */
3908 if( p->nPendingData ){
3909 assert( p->pHash );
3910 p->nPendingData = 0;
3911 fts5FlushOneHash(p);
3912 }
3913 }
3914
3915
3916 int sqlite3Fts5IndexOptimize(Fts5Index *p){
3917 Fts5Structure *pStruct;
3918 Fts5Structure *pNew = 0;
3919 int nSeg = 0;
3920
3921 assert( p->rc==SQLITE_OK );
3922 fts5IndexFlush(p);
3923 pStruct = fts5StructureRead(p);
3924
3925 if( pStruct ){
3926 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3927 nSeg = pStruct->nSegment;
3928 if( nSeg>1 ){
3929 int nByte = sizeof(Fts5Structure);
3930 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
3931 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
3932 }
3933 }
3934 if( pNew ){
3935 Fts5StructureLevel *pLvl;
3936 int nByte = nSeg * sizeof(Fts5StructureSegment);
3937 pNew->nLevel = pStruct->nLevel+1;
3938 pNew->nRef = 1;
3939 pNew->nWriteCounter = pStruct->nWriteCounter;
3940 pLvl = &pNew->aLevel[pStruct->nLevel];
3941 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
3942 if( pLvl->aSeg ){
3943 int iLvl, iSeg;
3944 int iSegOut = 0;
3945 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3946 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3947 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
3948 iSegOut++;
3949 }
3950 }
3951 pNew->nSegment = pLvl->nSeg = nSeg;
3952 }else{
3953 sqlite3_free(pNew);
3954 pNew = 0;
3955 }
3956 }
3957
3958 if( pNew ){
3959 int iLvl = pNew->nLevel-1;
3960 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
3961 int nRem = FTS5_OPT_WORK_UNIT;
3962 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
3963 }
3964
3965 fts5StructureWrite(p, pNew);
3966 fts5StructureRelease(pNew);
3967 }
3968
3969 fts5StructureRelease(pStruct);
3970 return fts5IndexReturn(p);
3971 }
3972
3973 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
3974 Fts5Structure *pStruct;
3975
3976 pStruct = fts5StructureRead(p);
3977 if( pStruct && pStruct->nLevel ){
3978 fts5IndexMerge(p, &pStruct, nMerge);
3979 fts5StructureWrite(p, pStruct);
3980 }
3981 fts5StructureRelease(pStruct);
3982
3983 return fts5IndexReturn(p);
3984 }
3985
3986 static void fts5PoslistCallback(
3987 Fts5Index *p,
3988 void *pContext,
3989 const u8 *pChunk, int nChunk
3990 ){
3991 assert_nc( nChunk>=0 );
3992 if( nChunk>0 ){
3993 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3994 }
3995 }
3996
3997 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3998 struct PoslistCallbackCtx {
3999 Fts5Buffer *pBuf; /* Append to this buffer */
4000 Fts5Colset *pColset; /* Restrict matches to this column */
4001 int eState; /* See above */
4002 };
4003
4004 /*
4005 ** TODO: Make this more efficient!
4006 */
4007 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
4008 int i;
4009 for(i=0; i<pColset->nCol; i++){
4010 if( pColset->aiCol[i]==iCol ) return 1;
4011 }
4012 return 0;
4013 }
4014
4015 static void fts5PoslistFilterCallback(
4016 Fts5Index *p,
4017 void *pContext,
4018 const u8 *pChunk, int nChunk
4019 ){
4020 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
4021 assert_nc( nChunk>=0 );
4022 if( nChunk>0 ){
4023 /* Search through to find the first varint with value 1. This is the
4024 ** start of the next columns hits. */
4025 int i = 0;
4026 int iStart = 0;
4027
4028 if( pCtx->eState==2 ){
4029 int iCol;
4030 fts5FastGetVarint32(pChunk, i, iCol);
4031 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
4032 pCtx->eState = 1;
4033 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
4034 }else{
4035 pCtx->eState = 0;
4036 }
4037 }
4038
4039 do {
4040 while( i<nChunk && pChunk[i]!=0x01 ){
4041 while( pChunk[i] & 0x80 ) i++;
4042 i++;
4043 }
4044 if( pCtx->eState ){
4045 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
4046 }
4047 if( i<nChunk ){
4048 int iCol;
4049 iStart = i;
4050 i++;
4051 if( i>=nChunk ){
4052 pCtx->eState = 2;
4053 }else{
4054 fts5FastGetVarint32(pChunk, i, iCol);
4055 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
4056 if( pCtx->eState ){
4057 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
4058 iStart = i;
4059 }
4060 }
4061 }
4062 }while( i<nChunk );
4063 }
4064 }
4065
4066 /*
4067 ** Iterator pIter currently points to a valid entry (not EOF). This
4068 ** function appends the position list data for the current entry to
4069 ** buffer pBuf. It does not make a copy of the position-list size
4070 ** field.
4071 */
4072 static void fts5SegiterPoslist(
4073 Fts5Index *p,
4074 Fts5SegIter *pSeg,
4075 Fts5Colset *pColset,
4076 Fts5Buffer *pBuf
4077 ){
4078 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
4079 if( pColset==0 ){
4080 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
4081 }else{
4082 PoslistCallbackCtx sCtx;
4083 sCtx.pBuf = pBuf;
4084 sCtx.pColset = pColset;
4085 sCtx.eState = fts5IndexColsetTest(pColset, 0);
4086 assert( sCtx.eState==0 || sCtx.eState==1 );
4087 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
4088 }
4089 }
4090 }
4091
4092 /*
4093 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
4094 ** the position list contains entries for column iCol, then (*pa) is set
4095 ** to point to the sub-position-list for that column and the number of
4096 ** bytes in it returned. Or, if the argument position list does not
4097 ** contain any entries for column iCol, return 0.
4098 */
4099 static int fts5IndexExtractCol(
4100 const u8 **pa, /* IN/OUT: Pointer to poslist */
4101 int n, /* IN: Size of poslist in bytes */
4102 int iCol /* Column to extract from poslist */
4103 ){
4104 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
4105 const u8 *p = *pa;
4106 const u8 *pEnd = &p[n]; /* One byte past end of position list */
4107 u8 prev = 0;
4108
4109 while( iCol>iCurrent ){
4110 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
4111 ** not part of a varint */
4112 while( (prev & 0x80) || *p!=0x01 ){
4113 prev = *p++;
4114 if( p==pEnd ) return 0;
4115 }
4116 *pa = p++;
4117 p += fts5GetVarint32(p, iCurrent);
4118 }
4119 if( iCol!=iCurrent ) return 0;
4120
4121 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
4122 ** not part of a varint */
4123 assert( (prev & 0x80)==0 );
4124 while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
4125 prev = *p++;
4126 }
4127 return p - (*pa);
4128 }
4129
4130
4131 /*
4132 ** Iterator pMulti currently points to a valid entry (not EOF). This
4133 ** function appends the following to buffer pBuf:
4134 **
4135 ** * The varint iDelta, and
4136 ** * the position list that currently points to, including the size field.
4137 **
4138 ** If argument pColset is NULL, then the position list is filtered according
4139 ** to pColset before being appended to the buffer. If this means there are
4140 ** no entries in the position list, nothing is appended to the buffer (not
4141 ** even iDelta).
4142 **
4143 ** If an error occurs, an error code is left in p->rc.
4144 */
4145 static int fts5AppendPoslist(
4146 Fts5Index *p,
4147 i64 iDelta,
4148 Fts5IndexIter *pMulti,
4149 Fts5Colset *pColset,
4150 Fts5Buffer *pBuf
4151 ){
4152 if( p->rc==SQLITE_OK ){
4153 Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
4154 assert( fts5MultiIterEof(p, pMulti)==0 );
4155 assert( pSeg->nPos>0 );
4156 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){
4157
4158 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf
4159 && (pColset==0 || pColset->nCol==1)
4160 ){
4161 const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
4162 int nPos;
4163 if( pColset ){
4164 nPos = fts5IndexExtractCol(&pPos, pSeg->nPos, pColset->aiCol[0]);
4165 if( nPos==0 ) return 1;
4166 }else{
4167 nPos = pSeg->nPos;
4168 }
4169 assert( nPos>0 );
4170 fts5BufferSafeAppendVarint(pBuf, iDelta);
4171 fts5BufferSafeAppendVarint(pBuf, nPos*2);
4172 fts5BufferSafeAppendBlob(pBuf, pPos, nPos);
4173 }else{
4174 int iSv1;
4175 int iSv2;
4176 int iData;
4177
4178 /* Append iDelta */
4179 iSv1 = pBuf->n;
4180 fts5BufferSafeAppendVarint(pBuf, iDelta);
4181
4182 /* WRITEPOSLISTSIZE */
4183 iSv2 = pBuf->n;
4184 fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
4185 iData = pBuf->n;
4186
4187 fts5SegiterPoslist(p, pSeg, pColset, pBuf);
4188
4189 if( pColset ){
4190 int nActual = pBuf->n - iData;
4191 if( nActual!=pSeg->nPos ){
4192 if( nActual==0 ){
4193 pBuf->n = iSv1;
4194 return 1;
4195 }else{
4196 int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
4197 while( iSv2<(iData-nReq) ){ pBuf->p[iSv2++] = 0x80; }
4198 sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2);
4199 }
4200 }
4201 }
4202 }
4203
4204 }
4205 }
4206
4207 return 0;
4208 }
4209
4210 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4211 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4212
4213 assert( pIter->aPoslist );
4214 if( p>=pIter->aEof ){
4215 pIter->aPoslist = 0;
4216 }else{
4217 i64 iDelta;
4218
4219 p += fts5GetVarint(p, (u64*)&iDelta);
4220 pIter->iRowid += iDelta;
4221
4222 /* Read position list size */
4223 if( p[0] & 0x80 ){
4224 int nPos;
4225 pIter->nSize = fts5GetVarint32(p, nPos);
4226 pIter->nPoslist = (nPos>>1);
4227 }else{
4228 pIter->nPoslist = ((int)(p[0])) >> 1;
4229 pIter->nSize = 1;
4230 }
4231
4232 pIter->aPoslist = p;
4233 }
4234 }
4235
4236 static void fts5DoclistIterInit(
4237 Fts5Buffer *pBuf,
4238 Fts5DoclistIter *pIter
4239 ){
4240 memset(pIter, 0, sizeof(*pIter));
4241 pIter->aPoslist = pBuf->p;
4242 pIter->aEof = &pBuf->p[pBuf->n];
4243 fts5DoclistIterNext(pIter);
4244 }
4245
4246 #if 0
4247 /*
4248 ** Append a doclist to buffer pBuf.
4249 **
4250 ** This function assumes that space within the buffer has already been
4251 ** allocated.
4252 */
4253 static void fts5MergeAppendDocid(
4254 Fts5Buffer *pBuf, /* Buffer to write to */
4255 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
4256 i64 iRowid /* Rowid to append */
4257 ){
4258 assert( pBuf->n!=0 || (*piLastRowid)==0 );
4259 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4260 *piLastRowid = iRowid;
4261 }
4262 #endif
4263
4264 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
4265 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
4266 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
4267 (iLastRowid) = (iRowid); \
4268 }
4269
4270 /*
4271 ** Buffers p1 and p2 contain doclists. This function merges the content
4272 ** of the two doclists together and sets buffer p1 to the result before
4273 ** returning.
4274 **
4275 ** If an error occurs, an error code is left in p->rc. If an error has
4276 ** already occurred, this function is a no-op.
4277 */
4278 static void fts5MergePrefixLists(
4279 Fts5Index *p, /* FTS5 backend object */
4280 Fts5Buffer *p1, /* First list to merge */
4281 Fts5Buffer *p2 /* Second list to merge */
4282 ){
4283 if( p2->n ){
4284 i64 iLastRowid = 0;
4285 Fts5DoclistIter i1;
4286 Fts5DoclistIter i2;
4287 Fts5Buffer out;
4288 Fts5Buffer tmp;
4289 memset(&out, 0, sizeof(out));
4290 memset(&tmp, 0, sizeof(tmp));
4291
4292 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4293 fts5DoclistIterInit(p1, &i1);
4294 fts5DoclistIterInit(p2, &i2);
4295 while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){
4296 if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){
4297 /* Copy entry from i1 */
4298 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4299 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
4300 fts5DoclistIterNext(&i1);
4301 }
4302 else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
4303 /* Copy entry from i2 */
4304 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4305 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
4306 fts5DoclistIterNext(&i2);
4307 }
4308 else{
4309 i64 iPos1 = 0;
4310 i64 iPos2 = 0;
4311 int iOff1 = 0;
4312 int iOff2 = 0;
4313 u8 *a1 = &i1.aPoslist[i1.nSize];
4314 u8 *a2 = &i2.aPoslist[i2.nSize];
4315
4316 Fts5PoslistWriter writer;
4317 memset(&writer, 0, sizeof(writer));
4318
4319 /* Merge the two position lists. */
4320 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4321 fts5BufferZero(&tmp);
4322
4323 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4324 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4325
4326 while( p->rc==SQLITE_OK && (iPos1>=0 || iPos2>=0) ){
4327 i64 iNew;
4328 if( iPos2<0 || (iPos1>=0 && iPos1<iPos2) ){
4329 iNew = iPos1;
4330 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4331 }else{
4332 iNew = iPos2;
4333 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4334 if( iPos1==iPos2 ){
4335 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1);
4336 }
4337 }
4338 p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
4339 }
4340
4341 /* WRITEPOSLISTSIZE */
4342 fts5BufferSafeAppendVarint(&out, tmp.n * 2);
4343 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
4344 fts5DoclistIterNext(&i1);
4345 fts5DoclistIterNext(&i2);
4346 }
4347 }
4348
4349 fts5BufferSet(&p->rc, p1, out.n, out.p);
4350 fts5BufferFree(&tmp);
4351 fts5BufferFree(&out);
4352 }
4353 }
4354
4355 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4356 Fts5Buffer tmp = *p1;
4357 *p1 = *p2;
4358 *p2 = tmp;
4359 }
4360
4361 static void fts5SetupPrefixIter(
4362 Fts5Index *p, /* Index to read from */
4363 int bDesc, /* True for "ORDER BY rowid DESC" */
4364 const u8 *pToken, /* Buffer containing prefix to match */
4365 int nToken, /* Size of buffer pToken in bytes */
4366 Fts5Colset *pColset, /* Restrict matches to these columns */
4367 Fts5IndexIter **ppIter /* OUT: New iterator */
4368 ){
4369 Fts5Structure *pStruct;
4370 Fts5Buffer *aBuf;
4371 const int nBuf = 32;
4372
4373 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
4374 pStruct = fts5StructureRead(p);
4375
4376 if( aBuf && pStruct ){
4377 const int flags = FTS5INDEX_QUERY_SCAN;
4378 int i;
4379 i64 iLastRowid = 0;
4380 Fts5IndexIter *p1 = 0; /* Iterator used to gather data from index */
4381 Fts5Data *pData;
4382 Fts5Buffer doclist;
4383 int bNewTerm = 1;
4384
4385 memset(&doclist, 0, sizeof(doclist));
4386 for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
4387 fts5MultiIterEof(p, p1)==0;
4388 fts5MultiIterNext2(p, p1, &bNewTerm)
4389 ){
4390 i64 iRowid = fts5MultiIterRowid(p1);
4391 int nTerm;
4392 const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm);
4393 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
4394 if( bNewTerm ){
4395 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
4396 }
4397
4398 if( doclist.n>0 && iRowid<=iLastRowid ){
4399 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
4400 assert( i<nBuf );
4401 if( aBuf[i].n==0 ){
4402 fts5BufferSwap(&doclist, &aBuf[i]);
4403 fts5BufferZero(&doclist);
4404 }else{
4405 fts5MergePrefixLists(p, &doclist, &aBuf[i]);
4406 fts5BufferZero(&aBuf[i]);
4407 }
4408 }
4409 iLastRowid = 0;
4410 }
4411
4412 if( !fts5AppendPoslist(p, iRowid-iLastRowid, p1, pColset, &doclist) ){
4413 iLastRowid = iRowid;
4414 }
4415 }
4416
4417 for(i=0; i<nBuf; i++){
4418 if( p->rc==SQLITE_OK ){
4419 fts5MergePrefixLists(p, &doclist, &aBuf[i]);
4420 }
4421 fts5BufferFree(&aBuf[i]);
4422 }
4423 fts5MultiIterFree(p, p1);
4424
4425 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
4426 if( pData ){
4427 pData->p = (u8*)&pData[1];
4428 pData->nn = pData->szLeaf = doclist.n;
4429 memcpy(pData->p, doclist.p, doclist.n);
4430 fts5MultiIterNew2(p, pData, bDesc, ppIter);
4431 }
4432 fts5BufferFree(&doclist);
4433 }
4434
4435 fts5StructureRelease(pStruct);
4436 sqlite3_free(aBuf);
4437 }
4438
4439
4440 /*
4441 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
4442 ** to the document with rowid iRowid.
4443 */
4444 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
4445 assert( p->rc==SQLITE_OK );
4446
4447 /* Allocate the hash table if it has not already been allocated */
4448 if( p->pHash==0 ){
4449 p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData);
4450 }
4451
4452 /* Flush the hash table to disk if required */
4453 if( iRowid<p->iWriteRowid
4454 || (iRowid==p->iWriteRowid && p->bDelete==0)
4455 || (p->nPendingData > p->pConfig->nHashSize)
4456 ){
4457 fts5IndexFlush(p);
4458 }
4459
4460 p->iWriteRowid = iRowid;
4461 p->bDelete = bDelete;
4462 return fts5IndexReturn(p);
4463 }
4464
4465 /*
4466 ** Commit data to disk.
4467 */
4468 int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
4469 assert( p->rc==SQLITE_OK );
4470 fts5IndexFlush(p);
4471 if( bCommit ) fts5CloseReader(p);
4472 return fts5IndexReturn(p);
4473 }
4474
4475 /*
4476 ** Discard any data stored in the in-memory hash tables. Do not write it
4477 ** to the database. Additionally, assume that the contents of the %_data
4478 ** table may have changed on disk. So any in-memory caches of %_data
4479 ** records must be invalidated.
4480 */
4481 int sqlite3Fts5IndexRollback(Fts5Index *p){
4482 fts5CloseReader(p);
4483 fts5IndexDiscardData(p);
4484 assert( p->rc==SQLITE_OK );
4485 return SQLITE_OK;
4486 }
4487
4488 /*
4489 ** The %_data table is completely empty when this function is called. This
4490 ** function populates it with the initial structure objects for each index,
4491 ** and the initial version of the "averages" record (a zero-byte blob).
4492 */
4493 int sqlite3Fts5IndexReinit(Fts5Index *p){
4494 Fts5Structure s;
4495 memset(&s, 0, sizeof(Fts5Structure));
4496 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
4497 fts5StructureWrite(p, &s);
4498 return fts5IndexReturn(p);
4499 }
4500
4501 /*
4502 ** Open a new Fts5Index handle. If the bCreate argument is true, create
4503 ** and initialize the underlying %_data table.
4504 **
4505 ** If successful, set *pp to point to the new object and return SQLITE_OK.
4506 ** Otherwise, set *pp to NULL and return an SQLite error code.
4507 */
4508 int sqlite3Fts5IndexOpen(
4509 Fts5Config *pConfig,
4510 int bCreate,
4511 Fts5Index **pp,
4512 char **pzErr
4513 ){
4514 int rc = SQLITE_OK;
4515 Fts5Index *p; /* New object */
4516
4517 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
4518 if( rc==SQLITE_OK ){
4519 p->pConfig = pConfig;
4520 p->nWorkUnit = FTS5_WORK_UNIT;
4521 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
4522 if( p->zDataTbl && bCreate ){
4523 rc = sqlite3Fts5CreateTable(
4524 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
4525 );
4526 if( rc==SQLITE_OK ){
4527 rc = sqlite3Fts5CreateTable(pConfig, "idx",
4528 "segid, term, pgno, PRIMARY KEY(segid, term)",
4529 1, pzErr
4530 );
4531 }
4532 if( rc==SQLITE_OK ){
4533 rc = sqlite3Fts5IndexReinit(p);
4534 }
4535 }
4536 }
4537
4538 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
4539 if( rc ){
4540 sqlite3Fts5IndexClose(p);
4541 *pp = 0;
4542 }
4543 return rc;
4544 }
4545
4546 /*
4547 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
4548 */
4549 int sqlite3Fts5IndexClose(Fts5Index *p){
4550 int rc = SQLITE_OK;
4551 if( p ){
4552 assert( p->pReader==0 );
4553 sqlite3_finalize(p->pWriter);
4554 sqlite3_finalize(p->pDeleter);
4555 sqlite3_finalize(p->pIdxWriter);
4556 sqlite3_finalize(p->pIdxDeleter);
4557 sqlite3_finalize(p->pIdxSelect);
4558 sqlite3Fts5HashFree(p->pHash);
4559 sqlite3_free(p->zDataTbl);
4560 sqlite3_free(p);
4561 }
4562 return rc;
4563 }
4564
4565 /*
4566 ** Argument p points to a buffer containing utf-8 text that is n bytes in
4567 ** size. Return the number of bytes in the nChar character prefix of the
4568 ** buffer, or 0 if there are less than nChar characters in total.
4569 */
4570 static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
4571 int n = 0;
4572 int i;
4573 for(i=0; i<nChar; i++){
4574 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
4575 if( (unsigned char)p[n++]>=0xc0 ){
4576 while( (p[n] & 0xc0)==0x80 ) n++;
4577 }
4578 }
4579 return n;
4580 }
4581
4582 /*
4583 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
4584 ** unicode characters in the string.
4585 */
4586 static int fts5IndexCharlen(const char *pIn, int nIn){
4587 int nChar = 0;
4588 int i = 0;
4589 while( i<nIn ){
4590 if( (unsigned char)pIn[i++]>=0xc0 ){
4591 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
4592 }
4593 nChar++;
4594 }
4595 return nChar;
4596 }
4597
4598 /*
4599 ** Insert or remove data to or from the index. Each time a document is
4600 ** added to or removed from the index, this function is called one or more
4601 ** times.
4602 **
4603 ** For an insert, it must be called once for each token in the new document.
4604 ** If the operation is a delete, it must be called (at least) once for each
4605 ** unique token in the document with an iCol value less than zero. The iPos
4606 ** argument is ignored for a delete.
4607 */
4608 int sqlite3Fts5IndexWrite(
4609 Fts5Index *p, /* Index to write to */
4610 int iCol, /* Column token appears in (-ve -> delete) */
4611 int iPos, /* Position of token within column */
4612 const char *pToken, int nToken /* Token to add or remove to or from index */
4613 ){
4614 int i; /* Used to iterate through indexes */
4615 int rc = SQLITE_OK; /* Return code */
4616 Fts5Config *pConfig = p->pConfig;
4617
4618 assert( p->rc==SQLITE_OK );
4619 assert( (iCol<0)==p->bDelete );
4620
4621 /* Add the entry to the main terms index. */
4622 rc = sqlite3Fts5HashWrite(
4623 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
4624 );
4625
4626 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
4627 int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
4628 if( nByte ){
4629 rc = sqlite3Fts5HashWrite(p->pHash,
4630 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
4631 nByte
4632 );
4633 }
4634 }
4635
4636 return rc;
4637 }
4638
4639 /*
4640 ** Open a new iterator to iterate though all rowid that match the
4641 ** specified token or token prefix.
4642 */
4643 int sqlite3Fts5IndexQuery(
4644 Fts5Index *p, /* FTS index to query */
4645 const char *pToken, int nToken, /* Token (or prefix) to query for */
4646 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
4647 Fts5Colset *pColset, /* Match these columns only */
4648 Fts5IndexIter **ppIter /* OUT: New iterator object */
4649 ){
4650 Fts5Config *pConfig = p->pConfig;
4651 Fts5IndexIter *pRet = 0;
4652 int iIdx = 0;
4653 Fts5Buffer buf = {0, 0, 0};
4654
4655 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
4656 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
4657
4658 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
4659 memcpy(&buf.p[1], pToken, nToken);
4660
4661 #ifdef SQLITE_DEBUG
4662 /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
4663 ** prefix-query. Instead of using a prefix-index (if one exists),
4664 ** evaluate the prefix query using the main FTS index. This is used
4665 ** for internal sanity checking by the integrity-check in debug
4666 ** mode only. */
4667 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
4668 assert( flags & FTS5INDEX_QUERY_PREFIX );
4669 iIdx = 1+pConfig->nPrefix;
4670 }else
4671 #endif
4672 if( flags & FTS5INDEX_QUERY_PREFIX ){
4673 int nChar = fts5IndexCharlen(pToken, nToken);
4674 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
4675 if( pConfig->aPrefix[iIdx-1]==nChar ) break;
4676 }
4677 }
4678
4679 if( iIdx<=pConfig->nPrefix ){
4680 Fts5Structure *pStruct = fts5StructureRead(p);
4681 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
4682 if( pStruct ){
4683 fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet);
4684 fts5StructureRelease(pStruct);
4685 }
4686 }else{
4687 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
4688 buf.p[0] = FTS5_MAIN_PREFIX;
4689 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
4690 }
4691
4692 if( p->rc ){
4693 sqlite3Fts5IterClose(pRet);
4694 pRet = 0;
4695 fts5CloseReader(p);
4696 }
4697 *ppIter = pRet;
4698 sqlite3Fts5BufferFree(&buf);
4699 }
4700 return fts5IndexReturn(p);
4701 }
4702
4703 /*
4704 ** Return true if the iterator passed as the only argument is at EOF.
4705 */
4706 int sqlite3Fts5IterEof(Fts5IndexIter *pIter){
4707 assert( pIter->pIndex->rc==SQLITE_OK );
4708 return pIter->bEof;
4709 }
4710
4711 /*
4712 ** Move to the next matching rowid.
4713 */
4714 int sqlite3Fts5IterNext(Fts5IndexIter *pIter){
4715 assert( pIter->pIndex->rc==SQLITE_OK );
4716 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
4717 return fts5IndexReturn(pIter->pIndex);
4718 }
4719
4720 /*
4721 ** Move to the next matching term/rowid. Used by the fts5vocab module.
4722 */
4723 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
4724 Fts5Index *p = pIter->pIndex;
4725
4726 assert( pIter->pIndex->rc==SQLITE_OK );
4727
4728 fts5MultiIterNext(p, pIter, 0, 0);
4729 if( p->rc==SQLITE_OK ){
4730 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4731 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
4732 fts5DataRelease(pSeg->pLeaf);
4733 pSeg->pLeaf = 0;
4734 pIter->bEof = 1;
4735 }
4736 }
4737
4738 return fts5IndexReturn(pIter->pIndex);
4739 }
4740
4741 /*
4742 ** Move to the next matching rowid that occurs at or after iMatch. The
4743 ** definition of "at or after" depends on whether this iterator iterates
4744 ** in ascending or descending rowid order.
4745 */
4746 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){
4747 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
4748 return fts5IndexReturn(pIter->pIndex);
4749 }
4750
4751 /*
4752 ** Return the current rowid.
4753 */
4754 i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
4755 return fts5MultiIterRowid(pIter);
4756 }
4757
4758 /*
4759 ** Return the current term.
4760 */
4761 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
4762 int n;
4763 const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
4764 *pn = n-1;
4765 return &z[1];
4766 }
4767
4768
4769 static int fts5IndexExtractColset (
4770 Fts5Colset *pColset, /* Colset to filter on */
4771 const u8 *pPos, int nPos, /* Position list */
4772 Fts5Buffer *pBuf /* Output buffer */
4773 ){
4774 int rc = SQLITE_OK;
4775 int i;
4776
4777 fts5BufferZero(pBuf);
4778 for(i=0; i<pColset->nCol; i++){
4779 const u8 *pSub = pPos;
4780 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
4781 if( nSub ){
4782 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
4783 }
4784 }
4785 return rc;
4786 }
4787
4788
4789 /*
4790 ** Return a pointer to a buffer containing a copy of the position list for
4791 ** the current entry. Output variable *pn is set to the size of the buffer
4792 ** in bytes before returning.
4793 **
4794 ** The returned position list does not include the "number of bytes" varint
4795 ** field that starts the position list on disk.
4796 */
4797 int sqlite3Fts5IterPoslist(
4798 Fts5IndexIter *pIter,
4799 Fts5Colset *pColset, /* Column filter (or NULL) */
4800 const u8 **pp, /* OUT: Pointer to position-list data */
4801 int *pn, /* OUT: Size of position-list in bytes */
4802 i64 *piRowid /* OUT: Current rowid */
4803 ){
4804 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4805 assert( pIter->pIndex->rc==SQLITE_OK );
4806 *piRowid = pSeg->iRowid;
4807 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
4808 u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
4809 if( pColset==0 || pIter->bFiltered ){
4810 *pn = pSeg->nPos;
4811 *pp = pPos;
4812 }else if( pColset->nCol==1 ){
4813 *pp = pPos;
4814 *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
4815 }else{
4816 fts5BufferZero(&pIter->poslist);
4817 fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
4818 *pp = pIter->poslist.p;
4819 *pn = pIter->poslist.n;
4820 }
4821 }else{
4822 fts5BufferZero(&pIter->poslist);
4823 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
4824 *pp = pIter->poslist.p;
4825 *pn = pIter->poslist.n;
4826 }
4827 return fts5IndexReturn(pIter->pIndex);
4828 }
4829
4830 /*
4831 ** This function is similar to sqlite3Fts5IterPoslist(), except that it
4832 ** copies the position list into the buffer supplied as the second
4833 ** argument.
4834 */
4835 int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
4836 Fts5Index *p = pIter->pIndex;
4837 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4838 assert( p->rc==SQLITE_OK );
4839 fts5BufferZero(pBuf);
4840 fts5SegiterPoslist(p, pSeg, 0, pBuf);
4841 return fts5IndexReturn(p);
4842 }
4843
4844 /*
4845 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
4846 */
4847 void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
4848 if( pIter ){
4849 Fts5Index *pIndex = pIter->pIndex;
4850 fts5MultiIterFree(pIter->pIndex, pIter);
4851 fts5CloseReader(pIndex);
4852 }
4853 }
4854
4855 /*
4856 ** Read and decode the "averages" record from the database.
4857 **
4858 ** Parameter anSize must point to an array of size nCol, where nCol is
4859 ** the number of user defined columns in the FTS table.
4860 */
4861 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
4862 int nCol = p->pConfig->nCol;
4863 Fts5Data *pData;
4864
4865 *pnRow = 0;
4866 memset(anSize, 0, sizeof(i64) * nCol);
4867 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
4868 if( p->rc==SQLITE_OK && pData->nn ){
4869 int i = 0;
4870 int iCol;
4871 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
4872 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
4873 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
4874 }
4875 }
4876
4877 fts5DataRelease(pData);
4878 return fts5IndexReturn(p);
4879 }
4880
4881 /*
4882 ** Replace the current "averages" record with the contents of the buffer
4883 ** supplied as the second argument.
4884 */
4885 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
4886 assert( p->rc==SQLITE_OK );
4887 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
4888 return fts5IndexReturn(p);
4889 }
4890
4891 /*
4892 ** Return the total number of blocks this module has read from the %_data
4893 ** table since it was created.
4894 */
4895 int sqlite3Fts5IndexReads(Fts5Index *p){
4896 return p->nRead;
4897 }
4898
4899 /*
4900 ** Set the 32-bit cookie value stored at the start of all structure
4901 ** records to the value passed as the second argument.
4902 **
4903 ** Return SQLITE_OK if successful, or an SQLite error code if an error
4904 ** occurs.
4905 */
4906 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
4907 int rc; /* Return code */
4908 Fts5Config *pConfig = p->pConfig; /* Configuration object */
4909 u8 aCookie[4]; /* Binary representation of iNew */
4910 sqlite3_blob *pBlob = 0;
4911
4912 assert( p->rc==SQLITE_OK );
4913 sqlite3Fts5Put32(aCookie, iNew);
4914
4915 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
4916 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
4917 );
4918 if( rc==SQLITE_OK ){
4919 sqlite3_blob_write(pBlob, aCookie, 4, 0);
4920 rc = sqlite3_blob_close(pBlob);
4921 }
4922
4923 return rc;
4924 }
4925
4926 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
4927 Fts5Structure *pStruct;
4928 pStruct = fts5StructureRead(p);
4929 fts5StructureRelease(pStruct);
4930 return fts5IndexReturn(p);
4931 }
4932
4933
4934 /*************************************************************************
4935 **************************************************************************
4936 ** Below this point is the implementation of the integrity-check
4937 ** functionality.
4938 */
4939
4940 /*
4941 ** Return a simple checksum value based on the arguments.
4942 */
4943 static u64 fts5IndexEntryCksum(
4944 i64 iRowid,
4945 int iCol,
4946 int iPos,
4947 int iIdx,
4948 const char *pTerm,
4949 int nTerm
4950 ){
4951 int i;
4952 u64 ret = iRowid;
4953 ret += (ret<<3) + iCol;
4954 ret += (ret<<3) + iPos;
4955 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
4956 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
4957 return ret;
4958 }
4959
4960 #ifdef SQLITE_DEBUG
4961 /*
4962 ** This function is purely an internal test. It does not contribute to
4963 ** FTS functionality, or even the integrity-check, in any way.
4964 **
4965 ** Instead, it tests that the same set of pgno/rowid combinations are
4966 ** visited regardless of whether the doclist-index identified by parameters
4967 ** iSegid/iLeaf is iterated in forwards or reverse order.
4968 */
4969 static void fts5TestDlidxReverse(
4970 Fts5Index *p,
4971 int iSegid, /* Segment id to load from */
4972 int iLeaf /* Load doclist-index for this leaf */
4973 ){
4974 Fts5DlidxIter *pDlidx = 0;
4975 u64 cksum1 = 13;
4976 u64 cksum2 = 13;
4977
4978 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
4979 fts5DlidxIterEof(p, pDlidx)==0;
4980 fts5DlidxIterNext(p, pDlidx)
4981 ){
4982 i64 iRowid = fts5DlidxIterRowid(pDlidx);
4983 int pgno = fts5DlidxIterPgno(pDlidx);
4984 assert( pgno>iLeaf );
4985 cksum1 += iRowid + ((i64)pgno<<32);
4986 }
4987 fts5DlidxIterFree(pDlidx);
4988 pDlidx = 0;
4989
4990 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
4991 fts5DlidxIterEof(p, pDlidx)==0;
4992 fts5DlidxIterPrev(p, pDlidx)
4993 ){
4994 i64 iRowid = fts5DlidxIterRowid(pDlidx);
4995 int pgno = fts5DlidxIterPgno(pDlidx);
4996 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
4997 cksum2 += iRowid + ((i64)pgno<<32);
4998 }
4999 fts5DlidxIterFree(pDlidx);
5000 pDlidx = 0;
5001
5002 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5003 }
5004
5005 static int fts5QueryCksum(
5006 Fts5Index *p, /* Fts5 index object */
5007 int iIdx,
5008 const char *z, /* Index key to query for */
5009 int n, /* Size of index key in bytes */
5010 int flags, /* Flags for Fts5IndexQuery */
5011 u64 *pCksum /* IN/OUT: Checksum value */
5012 ){
5013 u64 cksum = *pCksum;
5014 Fts5IndexIter *pIdxIter = 0;
5015 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);
5016
5017 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
5018 i64 dummy;
5019 const u8 *pPos;
5020 int nPos;
5021 i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
5022 rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy);
5023 if( rc==SQLITE_OK ){
5024 Fts5PoslistReader sReader;
5025 for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader);
5026 sReader.bEof==0;
5027 sqlite3Fts5PoslistReaderNext(&sReader)
5028 ){
5029 int iCol = FTS5_POS2COLUMN(sReader.iPos);
5030 int iOff = FTS5_POS2OFFSET(sReader.iPos);
5031 cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5032 }
5033 rc = sqlite3Fts5IterNext(pIdxIter);
5034 }
5035 }
5036 sqlite3Fts5IterClose(pIdxIter);
5037
5038 *pCksum = cksum;
5039 return rc;
5040 }
5041
5042
5043 /*
5044 ** This function is also purely an internal test. It does not contribute to
5045 ** FTS functionality, or even the integrity-check, in any way.
5046 */
5047 static void fts5TestTerm(
5048 Fts5Index *p,
5049 Fts5Buffer *pPrev, /* Previous term */
5050 const char *z, int n, /* Possibly new term to test */
5051 u64 expected,
5052 u64 *pCksum
5053 ){
5054 int rc = p->rc;
5055 if( pPrev->n==0 ){
5056 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5057 }else
5058 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5059 u64 cksum3 = *pCksum;
5060 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
5061 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
5062 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5063 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5064 u64 ck1 = 0;
5065 u64 ck2 = 0;
5066
5067 /* Check that the results returned for ASC and DESC queries are
5068 ** the same. If not, call this corruption. */
5069 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5070 if( rc==SQLITE_OK ){
5071 int f = flags|FTS5INDEX_QUERY_DESC;
5072 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5073 }
5074 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5075
5076 /* If this is a prefix query, check that the results returned if the
5077 ** the index is disabled are the same. In both ASC and DESC order.
5078 **
5079 ** This check may only be performed if the hash table is empty. This
5080 ** is because the hash table only supports a single scan query at
5081 ** a time, and the multi-iter loop from which this function is called
5082 ** is already performing such a scan. */
5083 if( p->nPendingData==0 ){
5084 if( iIdx>0 && rc==SQLITE_OK ){
5085 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5086 ck2 = 0;
5087 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5088 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5089 }
5090 if( iIdx>0 && rc==SQLITE_OK ){
5091 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5092 ck2 = 0;
5093 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5094 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5095 }
5096 }
5097
5098 cksum3 ^= ck1;
5099 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5100
5101 if( rc==SQLITE_OK && cksum3!=expected ){
5102 rc = FTS5_CORRUPT;
5103 }
5104 *pCksum = cksum3;
5105 }
5106 p->rc = rc;
5107 }
5108
5109 #else
5110 # define fts5TestDlidxReverse(x,y,z)
5111 # define fts5TestTerm(u,v,w,x,y,z)
5112 #endif
5113
5114 /*
5115 ** Check that:
5116 **
5117 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
5118 ** contain zero terms.
5119 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
5120 ** contain zero rowids.
5121 */
5122 static void fts5IndexIntegrityCheckEmpty(
5123 Fts5Index *p,
5124 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
5125 int iFirst,
5126 int iNoRowid,
5127 int iLast
5128 ){
5129 int i;
5130
5131 /* Now check that the iter.nEmpty leaves following the current leaf
5132 ** (a) exist and (b) contain no terms. */
5133 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
5134 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
5135 if( pLeaf ){
5136 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
5137 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
5138 }
5139 fts5DataRelease(pLeaf);
5140 }
5141 }
5142
5143 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
5144 int iTermOff = 0;
5145 int ii;
5146
5147 Fts5Buffer buf1 = {0,0,0};
5148 Fts5Buffer buf2 = {0,0,0};
5149
5150 ii = pLeaf->szLeaf;
5151 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
5152 int res;
5153 int iOff;
5154 int nIncr;
5155
5156 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
5157 iTermOff += nIncr;
5158 iOff = iTermOff;
5159
5160 if( iOff>=pLeaf->szLeaf ){
5161 p->rc = FTS5_CORRUPT;
5162 }else if( iTermOff==nIncr ){
5163 int nByte;
5164 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5165 if( (iOff+nByte)>pLeaf->szLeaf ){
5166 p->rc = FTS5_CORRUPT;
5167 }else{
5168 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5169 }
5170 }else{
5171 int nKeep, nByte;
5172 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
5173 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
5174 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
5175 p->rc = FTS5_CORRUPT;
5176 }else{
5177 buf1.n = nKeep;
5178 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
5179 }
5180
5181 if( p->rc==SQLITE_OK ){
5182 res = fts5BufferCompare(&buf1, &buf2);
5183 if( res<=0 ) p->rc = FTS5_CORRUPT;
5184 }
5185 }
5186 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
5187 }
5188
5189 fts5BufferFree(&buf1);
5190 fts5BufferFree(&buf2);
5191 }
5192
5193 static void fts5IndexIntegrityCheckSegment(
5194 Fts5Index *p, /* FTS5 backend object */
5195 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
5196 ){
5197 Fts5Config *pConfig = p->pConfig;
5198 sqlite3_stmt *pStmt = 0;
5199 int rc2;
5200 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
5201 int iDlidxPrevLeaf = pSeg->pgnoLast;
5202
5203 if( pSeg->pgnoFirst==0 ) return;
5204
5205 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
5206 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
5207 pConfig->zDb, pConfig->zName, pSeg->iSegid
5208 ));
5209
5210 /* Iterate through the b-tree hierarchy. */
5211 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
5212 i64 iRow; /* Rowid for this leaf */
5213 Fts5Data *pLeaf; /* Data for this leaf */
5214
5215 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
5216 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
5217 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
5218 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
5219
5220 /* If the leaf in question has already been trimmed from the segment,
5221 ** ignore this b-tree entry. Otherwise, load it into memory. */
5222 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
5223 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
5224 pLeaf = fts5DataRead(p, iRow);
5225 if( pLeaf==0 ) break;
5226
5227 /* Check that the leaf contains at least one term, and that it is equal
5228 ** to or larger than the split-key in zIdxTerm. Also check that if there
5229 ** is also a rowid pointer within the leaf page header, it points to a
5230 ** location before the term. */
5231 if( pLeaf->nn<=pLeaf->szLeaf ){
5232 p->rc = FTS5_CORRUPT;
5233 }else{
5234 int iOff; /* Offset of first term on leaf */
5235 int iRowidOff; /* Offset of first rowid on leaf */
5236 int nTerm; /* Size of term on leaf in bytes */
5237 int res; /* Comparison of term and split-key */
5238
5239 iOff = fts5LeafFirstTermOff(pLeaf);
5240 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5241 if( iRowidOff>=iOff ){
5242 p->rc = FTS5_CORRUPT;
5243 }else{
5244 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
5245 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
5246 if( res==0 ) res = nTerm - nIdxTerm;
5247 if( res<0 ) p->rc = FTS5_CORRUPT;
5248 }
5249
5250 fts5IntegrityCheckPgidx(p, pLeaf);
5251 }
5252 fts5DataRelease(pLeaf);
5253 if( p->rc ) break;
5254
5255 /* Now check that the iter.nEmpty leaves following the current leaf
5256 ** (a) exist and (b) contain no terms. */
5257 fts5IndexIntegrityCheckEmpty(
5258 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
5259 );
5260 if( p->rc ) break;
5261
5262 /* If there is a doclist-index, check that it looks right. */
5263 if( bIdxDlidx ){
5264 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
5265 int iPrevLeaf = iIdxLeaf;
5266 int iSegid = pSeg->iSegid;
5267 int iPg = 0;
5268 i64 iKey;
5269
5270 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
5271 fts5DlidxIterEof(p, pDlidx)==0;
5272 fts5DlidxIterNext(p, pDlidx)
5273 ){
5274
5275 /* Check any rowid-less pages that occur before the current leaf. */
5276 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
5277 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
5278 pLeaf = fts5DataRead(p, iKey);
5279 if( pLeaf ){
5280 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
5281 fts5DataRelease(pLeaf);
5282 }
5283 }
5284 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
5285
5286 /* Check that the leaf page indicated by the iterator really does
5287 ** contain the rowid suggested by the same. */
5288 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
5289 pLeaf = fts5DataRead(p, iKey);
5290 if( pLeaf ){
5291 i64 iRowid;
5292 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
5293 ASSERT_SZLEAF_OK(pLeaf);
5294 if( iRowidOff>=pLeaf->szLeaf ){
5295 p->rc = FTS5_CORRUPT;
5296 }else{
5297 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
5298 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
5299 }
5300 fts5DataRelease(pLeaf);
5301 }
5302 }
5303
5304 iDlidxPrevLeaf = iPg;
5305 fts5DlidxIterFree(pDlidx);
5306 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
5307 }else{
5308 iDlidxPrevLeaf = pSeg->pgnoLast;
5309 /* TODO: Check there is no doclist index */
5310 }
5311
5312 iIdxPrevLeaf = iIdxLeaf;
5313 }
5314
5315 rc2 = sqlite3_finalize(pStmt);
5316 if( p->rc==SQLITE_OK ) p->rc = rc2;
5317
5318 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
5319 #if 0
5320 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
5321 p->rc = FTS5_CORRUPT;
5322 }
5323 #endif
5324 }
5325
5326
5327 /*
5328 ** Run internal checks to ensure that the FTS index (a) is internally
5329 ** consistent and (b) contains entries for which the XOR of the checksums
5330 ** as calculated by fts5IndexEntryCksum() is cksum.
5331 **
5332 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
5333 ** checksum does not match. Return SQLITE_OK if all checks pass without
5334 ** error, or some other SQLite error code if another error (e.g. OOM)
5335 ** occurs.
5336 */
5337 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
5338 u64 cksum2 = 0; /* Checksum based on contents of indexes */
5339 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
5340 Fts5IndexIter *pIter; /* Used to iterate through entire index */
5341 Fts5Structure *pStruct; /* Index structure */
5342
5343 #ifdef SQLITE_DEBUG
5344 /* Used by extra internal tests only run if NDEBUG is not defined */
5345 u64 cksum3 = 0; /* Checksum based on contents of indexes */
5346 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
5347 #endif
5348
5349 /* Load the FTS index structure */
5350 pStruct = fts5StructureRead(p);
5351
5352 /* Check that the internal nodes of each segment match the leaves */
5353 if( pStruct ){
5354 int iLvl, iSeg;
5355 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
5356 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5357 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
5358 fts5IndexIntegrityCheckSegment(p, pSeg);
5359 }
5360 }
5361 }
5362
5363 /* The cksum argument passed to this function is a checksum calculated
5364 ** based on all expected entries in the FTS index (including prefix index
5365 ** entries). This block checks that a checksum calculated based on the
5366 ** actual contents of FTS index is identical.
5367 **
5368 ** Two versions of the same checksum are calculated. The first (stack
5369 ** variable cksum2) based on entries extracted from the full-text index
5370 ** while doing a linear scan of each individual index in turn.
5371 **
5372 ** As each term visited by the linear scans, a separate query for the
5373 ** same term is performed. cksum3 is calculated based on the entries
5374 ** extracted by these queries.
5375 */
5376 for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter);
5377 fts5MultiIterEof(p, pIter)==0;
5378 fts5MultiIterNext(p, pIter, 0, 0)
5379 ){
5380 int n; /* Size of term in bytes */
5381 i64 iPos = 0; /* Position read from poslist */
5382 int iOff = 0; /* Offset within poslist */
5383 i64 iRowid = fts5MultiIterRowid(pIter);
5384 char *z = (char*)fts5MultiIterTerm(pIter, &n);
5385
5386 /* If this is a new term, query for it. Update cksum3 with the results. */
5387 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
5388
5389 poslist.n = 0;
5390 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist);
5391 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
5392 int iCol = FTS5_POS2COLUMN(iPos);
5393 int iTokOff = FTS5_POS2OFFSET(iPos);
5394 cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
5395 }
5396 }
5397 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
5398
5399 fts5MultiIterFree(p, pIter);
5400 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
5401
5402 fts5StructureRelease(pStruct);
5403 #ifdef SQLITE_DEBUG
5404 fts5BufferFree(&term);
5405 #endif
5406 fts5BufferFree(&poslist);
5407 return fts5IndexReturn(p);
5408 }
5409
5410
5411 /*
5412 ** Calculate and return a checksum that is the XOR of the index entry
5413 ** checksum of all entries that would be generated by the token specified
5414 ** by the final 5 arguments.
5415 */
5416 u64 sqlite3Fts5IndexCksum(
5417 Fts5Config *pConfig, /* Configuration object */
5418 i64 iRowid, /* Document term appears in */
5419 int iCol, /* Column term appears in */
5420 int iPos, /* Position term appears in */
5421 const char *pTerm, int nTerm /* Term at iPos */
5422 ){
5423 u64 ret = 0; /* Return value */
5424 int iIdx; /* For iterating through indexes */
5425
5426 ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
5427
5428 for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
5429 int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
5430 if( nByte ){
5431 ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
5432 }
5433 }
5434
5435 return ret;
5436 }
5437
5438 /*************************************************************************
5439 **************************************************************************
5440 ** Below this point is the implementation of the fts5_decode() scalar
5441 ** function only.
5442 */
5443
5444 /*
5445 ** Decode a segment-data rowid from the %_data table. This function is
5446 ** the opposite of macro FTS5_SEGMENT_ROWID().
5447 */
5448 static void fts5DecodeRowid(
5449 i64 iRowid, /* Rowid from %_data table */
5450 int *piSegid, /* OUT: Segment id */
5451 int *pbDlidx, /* OUT: Dlidx flag */
5452 int *piHeight, /* OUT: Height */
5453 int *piPgno /* OUT: Page number */
5454 ){
5455 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
5456 iRowid >>= FTS5_DATA_PAGE_B;
5457
5458 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
5459 iRowid >>= FTS5_DATA_HEIGHT_B;
5460
5461 *pbDlidx = (int)(iRowid & 0x0001);
5462 iRowid >>= FTS5_DATA_DLI_B;
5463
5464 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
5465 }
5466
5467 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
5468 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
5469 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
5470
5471 if( iSegid==0 ){
5472 if( iKey==FTS5_AVERAGES_ROWID ){
5473 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
5474 }else{
5475 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
5476 }
5477 }
5478 else{
5479 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
5480 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
5481 );
5482 }
5483 }
5484
5485 static void fts5DebugStructure(
5486 int *pRc, /* IN/OUT: error code */
5487 Fts5Buffer *pBuf,
5488 Fts5Structure *p
5489 ){
5490 int iLvl, iSeg; /* Iterate through levels, segments */
5491
5492 for(iLvl=0; iLvl<p->nLevel; iLvl++){
5493 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
5494 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
5495 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
5496 );
5497 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
5498 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
5499 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
5500 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
5501 );
5502 }
5503 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
5504 }
5505 }
5506
5507 /*
5508 ** This is part of the fts5_decode() debugging aid.
5509 **
5510 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
5511 ** function appends a human-readable representation of the same object
5512 ** to the buffer passed as the second argument.
5513 */
5514 static void fts5DecodeStructure(
5515 int *pRc, /* IN/OUT: error code */
5516 Fts5Buffer *pBuf,
5517 const u8 *pBlob, int nBlob
5518 ){
5519 int rc; /* Return code */
5520 Fts5Structure *p = 0; /* Decoded structure object */
5521
5522 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
5523 if( rc!=SQLITE_OK ){
5524 *pRc = rc;
5525 return;
5526 }
5527
5528 fts5DebugStructure(pRc, pBuf, p);
5529 fts5StructureRelease(p);
5530 }
5531
5532 /*
5533 ** This is part of the fts5_decode() debugging aid.
5534 **
5535 ** Arguments pBlob/nBlob contain an "averages" record. This function
5536 ** appends a human-readable representation of record to the buffer passed
5537 ** as the second argument.
5538 */
5539 static void fts5DecodeAverages(
5540 int *pRc, /* IN/OUT: error code */
5541 Fts5Buffer *pBuf,
5542 const u8 *pBlob, int nBlob
5543 ){
5544 int i = 0;
5545 const char *zSpace = "";
5546
5547 while( i<nBlob ){
5548 u64 iVal;
5549 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
5550 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
5551 zSpace = " ";
5552 }
5553 }
5554
5555 /*
5556 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
5557 ** each varint and append its string representation to buffer pBuf. Return
5558 ** after either the input buffer is exhausted or a 0 value is read.
5559 **
5560 ** The return value is the number of bytes read from the input buffer.
5561 */
5562 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
5563 int iOff = 0;
5564 while( iOff<n ){
5565 int iVal;
5566 iOff += fts5GetVarint32(&a[iOff], iVal);
5567 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
5568 }
5569 return iOff;
5570 }
5571
5572 /*
5573 ** The start of buffer (a/n) contains the start of a doclist. The doclist
5574 ** may or may not finish within the buffer. This function appends a text
5575 ** representation of the part of the doclist that is present to buffer
5576 ** pBuf.
5577 **
5578 ** The return value is the number of bytes read from the input buffer.
5579 */
5580 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
5581 i64 iDocid = 0;
5582 int iOff = 0;
5583
5584 if( n>0 ){
5585 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
5586 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
5587 }
5588 while( iOff<n ){
5589 int nPos;
5590 int bDel;
5591 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
5592 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
5593 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
5594 if( iOff<n ){
5595 i64 iDelta;
5596 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
5597 iDocid += iDelta;
5598 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
5599 }
5600 }
5601
5602 return iOff;
5603 }
5604
5605 /*
5606 ** The implementation of user-defined scalar function fts5_decode().
5607 */
5608 static void fts5DecodeFunction(
5609 sqlite3_context *pCtx, /* Function call context */
5610 int nArg, /* Number of args (always 2) */
5611 sqlite3_value **apVal /* Function arguments */
5612 ){
5613 i64 iRowid; /* Rowid for record being decoded */
5614 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
5615 const u8 *aBlob; int n; /* Record to decode */
5616 u8 *a = 0;
5617 Fts5Buffer s; /* Build up text to return here */
5618 int rc = SQLITE_OK; /* Return code */
5619 int nSpace = 0;
5620
5621 assert( nArg==2 );
5622 memset(&s, 0, sizeof(Fts5Buffer));
5623 iRowid = sqlite3_value_int64(apVal[0]);
5624
5625 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
5626 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
5627 ** buffer overreads even if the record is corrupt. */
5628 n = sqlite3_value_bytes(apVal[1]);
5629 aBlob = sqlite3_value_blob(apVal[1]);
5630 nSpace = n + FTS5_DATA_ZERO_PADDING;
5631 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
5632 if( a==0 ) goto decode_out;
5633 memcpy(a, aBlob, n);
5634
5635
5636 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
5637
5638 fts5DebugRowid(&rc, &s, iRowid);
5639 if( bDlidx ){
5640 Fts5Data dlidx;
5641 Fts5DlidxLvl lvl;
5642
5643 dlidx.p = a;
5644 dlidx.nn = n;
5645
5646 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
5647 lvl.pData = &dlidx;
5648 lvl.iLeafPgno = iPgno;
5649
5650 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
5651 sqlite3Fts5BufferAppendPrintf(&rc, &s,
5652 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
5653 );
5654 }
5655 }else if( iSegid==0 ){
5656 if( iRowid==FTS5_AVERAGES_ROWID ){
5657 fts5DecodeAverages(&rc, &s, a, n);
5658 }else{
5659 fts5DecodeStructure(&rc, &s, a, n);
5660 }
5661 }else{
5662 Fts5Buffer term; /* Current term read from page */
5663 int szLeaf; /* Offset of pgidx in a[] */
5664 int iPgidxOff;
5665 int iPgidxPrev = 0; /* Previous value read from pgidx */
5666 int iTermOff = 0;
5667 int iRowidOff = 0;
5668 int iOff;
5669 int nDoclist;
5670
5671 memset(&term, 0, sizeof(Fts5Buffer));
5672
5673 if( n<4 ){
5674 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
5675 goto decode_out;
5676 }else{
5677 iRowidOff = fts5GetU16(&a[0]);
5678 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
5679 if( iPgidxOff<n ){
5680 fts5GetVarint32(&a[iPgidxOff], iTermOff);
5681 }
5682 }
5683
5684 /* Decode the position list tail at the start of the page */
5685 if( iRowidOff!=0 ){
5686 iOff = iRowidOff;
5687 }else if( iTermOff!=0 ){
5688 iOff = iTermOff;
5689 }else{
5690 iOff = szLeaf;
5691 }
5692 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
5693
5694 /* Decode any more doclist data that appears on the page before the
5695 ** first term. */
5696 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
5697 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
5698
5699 while( iPgidxOff<n ){
5700 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
5701 int nByte; /* Bytes of data */
5702 int iEnd;
5703
5704 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
5705 iPgidxPrev += nByte;
5706 iOff = iPgidxPrev;
5707
5708 if( iPgidxOff<n ){
5709 fts5GetVarint32(&a[iPgidxOff], nByte);
5710 iEnd = iPgidxPrev + nByte;
5711 }else{
5712 iEnd = szLeaf;
5713 }
5714
5715 if( bFirst==0 ){
5716 iOff += fts5GetVarint32(&a[iOff], nByte);
5717 term.n = nByte;
5718 }
5719 iOff += fts5GetVarint32(&a[iOff], nByte);
5720 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
5721 iOff += nByte;
5722
5723 sqlite3Fts5BufferAppendPrintf(
5724 &rc, &s, " term=%.*s", term.n, (const char*)term.p
5725 );
5726 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
5727 }
5728
5729 fts5BufferFree(&term);
5730 }
5731
5732 decode_out:
5733 sqlite3_free(a);
5734 if( rc==SQLITE_OK ){
5735 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
5736 }else{
5737 sqlite3_result_error_code(pCtx, rc);
5738 }
5739 fts5BufferFree(&s);
5740 }
5741
5742 /*
5743 ** The implementation of user-defined scalar function fts5_rowid().
5744 */
5745 static void fts5RowidFunction(
5746 sqlite3_context *pCtx, /* Function call context */
5747 int nArg, /* Number of args (always 2) */
5748 sqlite3_value **apVal /* Function arguments */
5749 ){
5750 const char *zArg;
5751 if( nArg==0 ){
5752 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
5753 }else{
5754 zArg = (const char*)sqlite3_value_text(apVal[0]);
5755 if( 0==sqlite3_stricmp(zArg, "segment") ){
5756 i64 iRowid;
5757 int segid, pgno;
5758 if( nArg!=3 ){
5759 sqlite3_result_error(pCtx,
5760 "should be: fts5_rowid('segment', segid, pgno))", -1
5761 );
5762 }else{
5763 segid = sqlite3_value_int(apVal[1]);
5764 pgno = sqlite3_value_int(apVal[2]);
5765 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
5766 sqlite3_result_int64(pCtx, iRowid);
5767 }
5768 }else{
5769 sqlite3_result_error(pCtx,
5770 "first arg to fts5_rowid() must be 'segment'" , -1
5771 );
5772 }
5773 }
5774 }
5775
5776 /*
5777 ** This is called as part of registering the FTS5 module with database
5778 ** connection db. It registers several user-defined scalar functions useful
5779 ** with FTS5.
5780 **
5781 ** If successful, SQLITE_OK is returned. If an error occurs, some other
5782 ** SQLite error code is returned instead.
5783 */
5784 int sqlite3Fts5IndexInit(sqlite3 *db){
5785 int rc = sqlite3_create_function(
5786 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
5787 );
5788 if( rc==SQLITE_OK ){
5789 rc = sqlite3_create_function(
5790 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
5791 );
5792 }
5793 return rc;
5794 }
5795
OLDNEW
« no previous file with comments | « third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c ('k') | third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_main.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698