OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2014 May 31 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 ** Low level access to the FTS index stored in the database file. The |
| 14 ** routines in this file file implement all read and write access to the |
| 15 ** %_data table. Other parts of the system access this functionality via |
| 16 ** the interface defined in fts5Int.h. |
| 17 */ |
| 18 |
| 19 |
| 20 #include "fts5Int.h" |
| 21 |
| 22 /* |
| 23 ** Overview: |
| 24 ** |
| 25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table. |
| 26 ** As well as the main term index, there may be up to 31 prefix indexes. |
| 27 ** The format is similar to FTS3/4, except that: |
| 28 ** |
| 29 ** * all segment b-tree leaf data is stored in fixed size page records |
| 30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is |
| 31 ** taken to ensure it is possible to iterate in either direction through |
| 32 ** the entries in a doclist, or to seek to a specific entry within a |
| 33 ** doclist, without loading it into memory. |
| 34 ** |
| 35 ** * large doclists that span many pages have associated "doclist index" |
| 36 ** records that contain a copy of the first rowid on each page spanned by |
| 37 ** the doclist. This is used to speed up seek operations, and merges of |
| 38 ** large doclists with very small doclists. |
| 39 ** |
| 40 ** * extra fields in the "structure record" record the state of ongoing |
| 41 ** incremental merge operations. |
| 42 ** |
| 43 */ |
| 44 |
| 45 |
| 46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ |
| 47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ |
| 48 |
| 49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ |
| 50 |
| 51 #define FTS5_MAIN_PREFIX '0' |
| 52 |
| 53 #if FTS5_MAX_PREFIX_INDEXES > 31 |
| 54 # error "FTS5_MAX_PREFIX_INDEXES is too large" |
| 55 #endif |
| 56 |
| 57 /* |
| 58 ** Details: |
| 59 ** |
| 60 ** The %_data table managed by this module, |
| 61 ** |
| 62 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); |
| 63 ** |
| 64 ** , contains the following 5 types of records. See the comments surrounding |
| 65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are |
| 66 ** assigned to each fo them. |
| 67 ** |
| 68 ** 1. Structure Records: |
| 69 ** |
| 70 ** The set of segments that make up an index - the index structure - are |
| 71 ** recorded in a single record within the %_data table. The record consists |
| 72 ** of a single 32-bit configuration cookie value followed by a list of |
| 73 ** SQLite varints. If the FTS table features more than one index (because |
| 74 ** there are one or more prefix indexes), it is guaranteed that all share |
| 75 ** the same cookie value. |
| 76 ** |
| 77 ** Immediately following the configuration cookie, the record begins with |
| 78 ** three varints: |
| 79 ** |
| 80 ** + number of levels, |
| 81 ** + total number of segments on all levels, |
| 82 ** + value of write counter. |
| 83 ** |
| 84 ** Then, for each level from 0 to nMax: |
| 85 ** |
| 86 ** + number of input segments in ongoing merge. |
| 87 ** + total number of segments in level. |
| 88 ** + for each segment from oldest to newest: |
| 89 ** + segment id (always > 0) |
| 90 ** + first leaf page number (often 1, always greater than 0) |
| 91 ** + final leaf page number |
| 92 ** |
| 93 ** 2. The Averages Record: |
| 94 ** |
| 95 ** A single record within the %_data table. The data is a list of varints. |
| 96 ** The first value is the number of rows in the index. Then, for each column |
| 97 ** from left to right, the total number of tokens in the column for all |
| 98 ** rows of the table. |
| 99 ** |
| 100 ** 3. Segment leaves: |
| 101 ** |
| 102 ** TERM/DOCLIST FORMAT: |
| 103 ** |
| 104 ** Most of each segment leaf is taken up by term/doclist data. The |
| 105 ** general format of term/doclist, starting with the first term |
| 106 ** on the leaf page, is: |
| 107 ** |
| 108 ** varint : size of first term |
| 109 ** blob: first term data |
| 110 ** doclist: first doclist |
| 111 ** zero-or-more { |
| 112 ** varint: number of bytes in common with previous term |
| 113 ** varint: number of bytes of new term data (nNew) |
| 114 ** blob: nNew bytes of new term data |
| 115 ** doclist: next doclist |
| 116 ** } |
| 117 ** |
| 118 ** doclist format: |
| 119 ** |
| 120 ** varint: first rowid |
| 121 ** poslist: first poslist |
| 122 ** zero-or-more { |
| 123 ** varint: rowid delta (always > 0) |
| 124 ** poslist: next poslist |
| 125 ** } |
| 126 ** |
| 127 ** poslist format: |
| 128 ** |
| 129 ** varint: size of poslist in bytes multiplied by 2, not including |
| 130 ** this field. Plus 1 if this entry carries the "delete" flag. |
| 131 ** collist: collist for column 0 |
| 132 ** zero-or-more { |
| 133 ** 0x01 byte |
| 134 ** varint: column number (I) |
| 135 ** collist: collist for column I |
| 136 ** } |
| 137 ** |
| 138 ** collist format: |
| 139 ** |
| 140 ** varint: first offset + 2 |
| 141 ** zero-or-more { |
| 142 ** varint: offset delta + 2 |
| 143 ** } |
| 144 ** |
| 145 ** PAGE FORMAT |
| 146 ** |
| 147 ** Each leaf page begins with a 4-byte header containing 2 16-bit |
| 148 ** unsigned integer fields in big-endian format. They are: |
| 149 ** |
| 150 ** * The byte offset of the first rowid on the page, if it exists |
| 151 ** and occurs before the first term (otherwise 0). |
| 152 ** |
| 153 ** * The byte offset of the start of the page footer. If the page |
| 154 ** footer is 0 bytes in size, then this field is the same as the |
| 155 ** size of the leaf page in bytes. |
| 156 ** |
| 157 ** The page footer consists of a single varint for each term located |
| 158 ** on the page. Each varint is the byte offset of the current term |
| 159 ** within the page, delta-compressed against the previous value. In |
| 160 ** other words, the first varint in the footer is the byte offset of |
| 161 ** the first term, the second is the byte offset of the second less that |
| 162 ** of the first, and so on. |
| 163 ** |
| 164 ** The term/doclist format described above is accurate if the entire |
| 165 ** term/doclist data fits on a single leaf page. If this is not the case, |
| 166 ** the format is changed in two ways: |
| 167 ** |
| 168 ** + if the first rowid on a page occurs before the first term, it |
| 169 ** is stored as a literal value: |
| 170 ** |
| 171 ** varint: first rowid |
| 172 ** |
| 173 ** + the first term on each page is stored in the same way as the |
| 174 ** very first term of the segment: |
| 175 ** |
| 176 ** varint : size of first term |
| 177 ** blob: first term data |
| 178 ** |
| 179 ** 5. Segment doclist indexes: |
| 180 ** |
| 181 ** Doclist indexes are themselves b-trees, however they usually consist of |
| 182 ** a single leaf record only. The format of each doclist index leaf page |
| 183 ** is: |
| 184 ** |
| 185 ** * Flags byte. Bits are: |
| 186 ** 0x01: Clear if leaf is also the root page, otherwise set. |
| 187 ** |
| 188 ** * Page number of fts index leaf page. As a varint. |
| 189 ** |
| 190 ** * First rowid on page indicated by previous field. As a varint. |
| 191 ** |
| 192 ** * A list of varints, one for each subsequent termless page. A |
| 193 ** positive delta if the termless page contains at least one rowid, |
| 194 ** or an 0x00 byte otherwise. |
| 195 ** |
| 196 ** Internal doclist index nodes are: |
| 197 ** |
| 198 ** * Flags byte. Bits are: |
| 199 ** 0x01: Clear for root page, otherwise set. |
| 200 ** |
| 201 ** * Page number of first child page. As a varint. |
| 202 ** |
| 203 ** * Copy of first rowid on page indicated by previous field. As a varint. |
| 204 ** |
| 205 ** * A list of delta-encoded varints - the first rowid on each subsequent |
| 206 ** child page. |
| 207 ** |
| 208 */ |
| 209 |
| 210 /* |
| 211 ** Rowids for the averages and structure records in the %_data table. |
| 212 */ |
| 213 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ |
| 214 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ |
| 215 |
| 216 /* |
| 217 ** Macros determining the rowids used by segment leaves and dlidx leaves |
| 218 ** and nodes. All nodes and leaves are stored in the %_data table with large |
| 219 ** positive rowids. |
| 220 ** |
| 221 ** Each segment has a unique non-zero 16-bit id. |
| 222 ** |
| 223 ** The rowid for each segment leaf is found by passing the segment id and |
| 224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered |
| 225 ** sequentially starting from 1. |
| 226 */ |
| 227 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ |
| 228 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ |
| 229 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ |
| 230 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ |
| 231 |
| 232 #define fts5_dri(segid, dlidx, height, pgno) ( \ |
| 233 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ |
| 234 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ |
| 235 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ |
| 236 ((i64)(pgno)) \ |
| 237 ) |
| 238 |
| 239 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) |
| 240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) |
| 241 |
| 242 /* |
| 243 ** Maximum segments permitted in a single index |
| 244 */ |
| 245 #define FTS5_MAX_SEGMENT 2000 |
| 246 |
| 247 #ifdef SQLITE_DEBUG |
| 248 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } |
| 249 #endif |
| 250 |
| 251 |
| 252 /* |
| 253 ** Each time a blob is read from the %_data table, it is padded with this |
| 254 ** many zero bytes. This makes it easier to decode the various record formats |
| 255 ** without overreading if the records are corrupt. |
| 256 */ |
| 257 #define FTS5_DATA_ZERO_PADDING 8 |
| 258 #define FTS5_DATA_PADDING 20 |
| 259 |
| 260 typedef struct Fts5Data Fts5Data; |
| 261 typedef struct Fts5DlidxIter Fts5DlidxIter; |
| 262 typedef struct Fts5DlidxLvl Fts5DlidxLvl; |
| 263 typedef struct Fts5DlidxWriter Fts5DlidxWriter; |
| 264 typedef struct Fts5Iter Fts5Iter; |
| 265 typedef struct Fts5PageWriter Fts5PageWriter; |
| 266 typedef struct Fts5SegIter Fts5SegIter; |
| 267 typedef struct Fts5DoclistIter Fts5DoclistIter; |
| 268 typedef struct Fts5SegWriter Fts5SegWriter; |
| 269 typedef struct Fts5Structure Fts5Structure; |
| 270 typedef struct Fts5StructureLevel Fts5StructureLevel; |
| 271 typedef struct Fts5StructureSegment Fts5StructureSegment; |
| 272 |
| 273 struct Fts5Data { |
| 274 u8 *p; /* Pointer to buffer containing record */ |
| 275 int nn; /* Size of record in bytes */ |
| 276 int szLeaf; /* Size of leaf without page-index */ |
| 277 }; |
| 278 |
| 279 /* |
| 280 ** One object per %_data table. |
| 281 */ |
| 282 struct Fts5Index { |
| 283 Fts5Config *pConfig; /* Virtual table configuration */ |
| 284 char *zDataTbl; /* Name of %_data table */ |
| 285 int nWorkUnit; /* Leaf pages in a "unit" of work */ |
| 286 |
| 287 /* |
| 288 ** Variables related to the accumulation of tokens and doclists within the |
| 289 ** in-memory hash tables before they are flushed to disk. |
| 290 */ |
| 291 Fts5Hash *pHash; /* Hash table for in-memory data */ |
| 292 int nPendingData; /* Current bytes of pending data */ |
| 293 i64 iWriteRowid; /* Rowid for current doc being written */ |
| 294 int bDelete; /* Current write is a delete */ |
| 295 |
| 296 /* Error state. */ |
| 297 int rc; /* Current error code */ |
| 298 |
| 299 /* State used by the fts5DataXXX() functions. */ |
| 300 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ |
| 301 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ |
| 302 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ |
| 303 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ |
| 304 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ |
| 305 sqlite3_stmt *pIdxSelect; |
| 306 int nRead; /* Total number of blocks read */ |
| 307 |
| 308 sqlite3_stmt *pDataVersion; |
| 309 i64 iStructVersion; /* data_version when pStruct read */ |
| 310 Fts5Structure *pStruct; /* Current db structure (or NULL) */ |
| 311 }; |
| 312 |
| 313 struct Fts5DoclistIter { |
| 314 u8 *aEof; /* Pointer to 1 byte past end of doclist */ |
| 315 |
| 316 /* Output variables. aPoslist==0 at EOF */ |
| 317 i64 iRowid; |
| 318 u8 *aPoslist; |
| 319 int nPoslist; |
| 320 int nSize; |
| 321 }; |
| 322 |
| 323 /* |
| 324 ** The contents of the "structure" record for each index are represented |
| 325 ** using an Fts5Structure record in memory. Which uses instances of the |
| 326 ** other Fts5StructureXXX types as components. |
| 327 */ |
| 328 struct Fts5StructureSegment { |
| 329 int iSegid; /* Segment id */ |
| 330 int pgnoFirst; /* First leaf page number in segment */ |
| 331 int pgnoLast; /* Last leaf page number in segment */ |
| 332 }; |
| 333 struct Fts5StructureLevel { |
| 334 int nMerge; /* Number of segments in incr-merge */ |
| 335 int nSeg; /* Total number of segments on level */ |
| 336 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ |
| 337 }; |
| 338 struct Fts5Structure { |
| 339 int nRef; /* Object reference count */ |
| 340 u64 nWriteCounter; /* Total leaves written to level 0 */ |
| 341 int nSegment; /* Total segments in this structure */ |
| 342 int nLevel; /* Number of levels in this index */ |
| 343 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */ |
| 344 }; |
| 345 |
| 346 /* |
| 347 ** An object of type Fts5SegWriter is used to write to segments. |
| 348 */ |
| 349 struct Fts5PageWriter { |
| 350 int pgno; /* Page number for this page */ |
| 351 int iPrevPgidx; /* Previous value written into pgidx */ |
| 352 Fts5Buffer buf; /* Buffer containing leaf data */ |
| 353 Fts5Buffer pgidx; /* Buffer containing page-index */ |
| 354 Fts5Buffer term; /* Buffer containing previous term on page */ |
| 355 }; |
| 356 struct Fts5DlidxWriter { |
| 357 int pgno; /* Page number for this page */ |
| 358 int bPrevValid; /* True if iPrev is valid */ |
| 359 i64 iPrev; /* Previous rowid value written to page */ |
| 360 Fts5Buffer buf; /* Buffer containing page data */ |
| 361 }; |
| 362 struct Fts5SegWriter { |
| 363 int iSegid; /* Segid to write to */ |
| 364 Fts5PageWriter writer; /* PageWriter object */ |
| 365 i64 iPrevRowid; /* Previous rowid written to current leaf */ |
| 366 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ |
| 367 u8 bFirstRowidInPage; /* True if next rowid is first in page */ |
| 368 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ |
| 369 u8 bFirstTermInPage; /* True if next term will be first in leaf */ |
| 370 int nLeafWritten; /* Number of leaf pages written */ |
| 371 int nEmpty; /* Number of contiguous term-less nodes */ |
| 372 |
| 373 int nDlidx; /* Allocated size of aDlidx[] array */ |
| 374 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ |
| 375 |
| 376 /* Values to insert into the %_idx table */ |
| 377 Fts5Buffer btterm; /* Next term to insert into %_idx table */ |
| 378 int iBtPage; /* Page number corresponding to btterm */ |
| 379 }; |
| 380 |
| 381 typedef struct Fts5CResult Fts5CResult; |
| 382 struct Fts5CResult { |
| 383 u16 iFirst; /* aSeg[] index of firstest iterator */ |
| 384 u8 bTermEq; /* True if the terms are equal */ |
| 385 }; |
| 386 |
| 387 /* |
| 388 ** Object for iterating through a single segment, visiting each term/rowid |
| 389 ** pair in the segment. |
| 390 ** |
| 391 ** pSeg: |
| 392 ** The segment to iterate through. |
| 393 ** |
| 394 ** iLeafPgno: |
| 395 ** Current leaf page number within segment. |
| 396 ** |
| 397 ** iLeafOffset: |
| 398 ** Byte offset within the current leaf that is the first byte of the |
| 399 ** position list data (one byte passed the position-list size field). |
| 400 ** rowid field of the current entry. Usually this is the size field of the |
| 401 ** position list data. The exception is if the rowid for the current entry |
| 402 ** is the last thing on the leaf page. |
| 403 ** |
| 404 ** pLeaf: |
| 405 ** Buffer containing current leaf page data. Set to NULL at EOF. |
| 406 ** |
| 407 ** iTermLeafPgno, iTermLeafOffset: |
| 408 ** Leaf page number containing the last term read from the segment. And |
| 409 ** the offset immediately following the term data. |
| 410 ** |
| 411 ** flags: |
| 412 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: |
| 413 ** |
| 414 ** FTS5_SEGITER_ONETERM: |
| 415 ** If set, set the iterator to point to EOF after the current doclist |
| 416 ** has been exhausted. Do not proceed to the next term in the segment. |
| 417 ** |
| 418 ** FTS5_SEGITER_REVERSE: |
| 419 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If |
| 420 ** it is set, iterate through rowid in descending order instead of the |
| 421 ** default ascending order. |
| 422 ** |
| 423 ** iRowidOffset/nRowidOffset/aRowidOffset: |
| 424 ** These are used if the FTS5_SEGITER_REVERSE flag is set. |
| 425 ** |
| 426 ** For each rowid on the page corresponding to the current term, the |
| 427 ** corresponding aRowidOffset[] entry is set to the byte offset of the |
| 428 ** start of the "position-list-size" field within the page. |
| 429 ** |
| 430 ** iTermIdx: |
| 431 ** Index of current term on iTermLeafPgno. |
| 432 */ |
| 433 struct Fts5SegIter { |
| 434 Fts5StructureSegment *pSeg; /* Segment to iterate through */ |
| 435 int flags; /* Mask of configuration flags */ |
| 436 int iLeafPgno; /* Current leaf page number */ |
| 437 Fts5Data *pLeaf; /* Current leaf data */ |
| 438 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ |
| 439 int iLeafOffset; /* Byte offset within current leaf */ |
| 440 |
| 441 /* Next method */ |
| 442 void (*xNext)(Fts5Index*, Fts5SegIter*, int*); |
| 443 |
| 444 /* The page and offset from which the current term was read. The offset |
| 445 ** is the offset of the first rowid in the current doclist. */ |
| 446 int iTermLeafPgno; |
| 447 int iTermLeafOffset; |
| 448 |
| 449 int iPgidxOff; /* Next offset in pgidx */ |
| 450 int iEndofDoclist; |
| 451 |
| 452 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ |
| 453 int iRowidOffset; /* Current entry in aRowidOffset[] */ |
| 454 int nRowidOffset; /* Allocated size of aRowidOffset[] array */ |
| 455 int *aRowidOffset; /* Array of offset to rowid fields */ |
| 456 |
| 457 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ |
| 458 |
| 459 /* Variables populated based on current entry. */ |
| 460 Fts5Buffer term; /* Current term */ |
| 461 i64 iRowid; /* Current rowid */ |
| 462 int nPos; /* Number of bytes in current position list */ |
| 463 u8 bDel; /* True if the delete flag is set */ |
| 464 }; |
| 465 |
| 466 /* |
| 467 ** Argument is a pointer to an Fts5Data structure that contains a |
| 468 ** leaf page. |
| 469 */ |
| 470 #define ASSERT_SZLEAF_OK(x) assert( \ |
| 471 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ |
| 472 ) |
| 473 |
| 474 #define FTS5_SEGITER_ONETERM 0x01 |
| 475 #define FTS5_SEGITER_REVERSE 0x02 |
| 476 |
| 477 /* |
| 478 ** Argument is a pointer to an Fts5Data structure that contains a leaf |
| 479 ** page. This macro evaluates to true if the leaf contains no terms, or |
| 480 ** false if it contains at least one term. |
| 481 */ |
| 482 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) |
| 483 |
| 484 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) |
| 485 |
| 486 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) |
| 487 |
| 488 /* |
| 489 ** Object for iterating through the merged results of one or more segments, |
| 490 ** visiting each term/rowid pair in the merged data. |
| 491 ** |
| 492 ** nSeg is always a power of two greater than or equal to the number of |
| 493 ** segments that this object is merging data from. Both the aSeg[] and |
| 494 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded |
| 495 ** with zeroed objects - these are handled as if they were iterators opened |
| 496 ** on empty segments. |
| 497 ** |
| 498 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an |
| 499 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the |
| 500 ** comparison in this context is the index of the iterator that currently |
| 501 ** points to the smaller term/rowid combination. Iterators at EOF are |
| 502 ** considered to be greater than all other iterators. |
| 503 ** |
| 504 ** aFirst[1] contains the index in aSeg[] of the iterator that points to |
| 505 ** the smallest key overall. aFirst[0] is unused. |
| 506 ** |
| 507 ** poslist: |
| 508 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. |
| 509 ** There is no way to tell if this is populated or not. |
| 510 */ |
| 511 struct Fts5Iter { |
| 512 Fts5IndexIter base; /* Base class containing output vars */ |
| 513 |
| 514 Fts5Index *pIndex; /* Index that owns this iterator */ |
| 515 Fts5Structure *pStruct; /* Database structure for this iterator */ |
| 516 Fts5Buffer poslist; /* Buffer containing current poslist */ |
| 517 Fts5Colset *pColset; /* Restrict matches to these columns */ |
| 518 |
| 519 /* Invoked to set output variables. */ |
| 520 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); |
| 521 |
| 522 int nSeg; /* Size of aSeg[] array */ |
| 523 int bRev; /* True to iterate in reverse order */ |
| 524 u8 bSkipEmpty; /* True to skip deleted entries */ |
| 525 |
| 526 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ |
| 527 Fts5CResult *aFirst; /* Current merge state (see above) */ |
| 528 Fts5SegIter aSeg[1]; /* Array of segment iterators */ |
| 529 }; |
| 530 |
| 531 |
| 532 /* |
| 533 ** An instance of the following type is used to iterate through the contents |
| 534 ** of a doclist-index record. |
| 535 ** |
| 536 ** pData: |
| 537 ** Record containing the doclist-index data. |
| 538 ** |
| 539 ** bEof: |
| 540 ** Set to true once iterator has reached EOF. |
| 541 ** |
| 542 ** iOff: |
| 543 ** Set to the current offset within record pData. |
| 544 */ |
| 545 struct Fts5DlidxLvl { |
| 546 Fts5Data *pData; /* Data for current page of this level */ |
| 547 int iOff; /* Current offset into pData */ |
| 548 int bEof; /* At EOF already */ |
| 549 int iFirstOff; /* Used by reverse iterators */ |
| 550 |
| 551 /* Output variables */ |
| 552 int iLeafPgno; /* Page number of current leaf page */ |
| 553 i64 iRowid; /* First rowid on leaf iLeafPgno */ |
| 554 }; |
| 555 struct Fts5DlidxIter { |
| 556 int nLvl; |
| 557 int iSegid; |
| 558 Fts5DlidxLvl aLvl[1]; |
| 559 }; |
| 560 |
| 561 static void fts5PutU16(u8 *aOut, u16 iVal){ |
| 562 aOut[0] = (iVal>>8); |
| 563 aOut[1] = (iVal&0xFF); |
| 564 } |
| 565 |
| 566 static u16 fts5GetU16(const u8 *aIn){ |
| 567 return ((u16)aIn[0] << 8) + aIn[1]; |
| 568 } |
| 569 |
| 570 /* |
| 571 ** Allocate and return a buffer at least nByte bytes in size. |
| 572 ** |
| 573 ** If an OOM error is encountered, return NULL and set the error code in |
| 574 ** the Fts5Index handle passed as the first argument. |
| 575 */ |
| 576 static void *fts5IdxMalloc(Fts5Index *p, int nByte){ |
| 577 return sqlite3Fts5MallocZero(&p->rc, nByte); |
| 578 } |
| 579 |
| 580 /* |
| 581 ** Compare the contents of the pLeft buffer with the pRight/nRight blob. |
| 582 ** |
| 583 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
| 584 ** +ve if pRight is smaller than pLeft. In other words: |
| 585 ** |
| 586 ** res = *pLeft - *pRight |
| 587 */ |
| 588 #ifdef SQLITE_DEBUG |
| 589 static int fts5BufferCompareBlob( |
| 590 Fts5Buffer *pLeft, /* Left hand side of comparison */ |
| 591 const u8 *pRight, int nRight /* Right hand side of comparison */ |
| 592 ){ |
| 593 int nCmp = MIN(pLeft->n, nRight); |
| 594 int res = memcmp(pLeft->p, pRight, nCmp); |
| 595 return (res==0 ? (pLeft->n - nRight) : res); |
| 596 } |
| 597 #endif |
| 598 |
| 599 /* |
| 600 ** Compare the contents of the two buffers using memcmp(). If one buffer |
| 601 ** is a prefix of the other, it is considered the lesser. |
| 602 ** |
| 603 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
| 604 ** +ve if pRight is smaller than pLeft. In other words: |
| 605 ** |
| 606 ** res = *pLeft - *pRight |
| 607 */ |
| 608 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ |
| 609 int nCmp = MIN(pLeft->n, pRight->n); |
| 610 int res = memcmp(pLeft->p, pRight->p, nCmp); |
| 611 return (res==0 ? (pLeft->n - pRight->n) : res); |
| 612 } |
| 613 |
| 614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ |
| 615 int ret; |
| 616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); |
| 617 return ret; |
| 618 } |
| 619 |
| 620 /* |
| 621 ** Close the read-only blob handle, if it is open. |
| 622 */ |
| 623 static void fts5CloseReader(Fts5Index *p){ |
| 624 if( p->pReader ){ |
| 625 sqlite3_blob *pReader = p->pReader; |
| 626 p->pReader = 0; |
| 627 sqlite3_blob_close(pReader); |
| 628 } |
| 629 } |
| 630 |
| 631 |
| 632 /* |
| 633 ** Retrieve a record from the %_data table. |
| 634 ** |
| 635 ** If an error occurs, NULL is returned and an error left in the |
| 636 ** Fts5Index object. |
| 637 */ |
| 638 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ |
| 639 Fts5Data *pRet = 0; |
| 640 if( p->rc==SQLITE_OK ){ |
| 641 int rc = SQLITE_OK; |
| 642 |
| 643 if( p->pReader ){ |
| 644 /* This call may return SQLITE_ABORT if there has been a savepoint |
| 645 ** rollback since it was last used. In this case a new blob handle |
| 646 ** is required. */ |
| 647 sqlite3_blob *pBlob = p->pReader; |
| 648 p->pReader = 0; |
| 649 rc = sqlite3_blob_reopen(pBlob, iRowid); |
| 650 assert( p->pReader==0 ); |
| 651 p->pReader = pBlob; |
| 652 if( rc!=SQLITE_OK ){ |
| 653 fts5CloseReader(p); |
| 654 } |
| 655 if( rc==SQLITE_ABORT ) rc = SQLITE_OK; |
| 656 } |
| 657 |
| 658 /* If the blob handle is not open at this point, open it and seek |
| 659 ** to the requested entry. */ |
| 660 if( p->pReader==0 && rc==SQLITE_OK ){ |
| 661 Fts5Config *pConfig = p->pConfig; |
| 662 rc = sqlite3_blob_open(pConfig->db, |
| 663 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader |
| 664 ); |
| 665 } |
| 666 |
| 667 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls |
| 668 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. |
| 669 ** All the reasons those functions might return SQLITE_ERROR - missing |
| 670 ** table, missing row, non-blob/text in block column - indicate |
| 671 ** backing store corruption. */ |
| 672 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; |
| 673 |
| 674 if( rc==SQLITE_OK ){ |
| 675 u8 *aOut = 0; /* Read blob data into this buffer */ |
| 676 int nByte = sqlite3_blob_bytes(p->pReader); |
| 677 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; |
| 678 pRet = (Fts5Data*)sqlite3_malloc(nAlloc); |
| 679 if( pRet ){ |
| 680 pRet->nn = nByte; |
| 681 aOut = pRet->p = (u8*)&pRet[1]; |
| 682 }else{ |
| 683 rc = SQLITE_NOMEM; |
| 684 } |
| 685 |
| 686 if( rc==SQLITE_OK ){ |
| 687 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); |
| 688 } |
| 689 if( rc!=SQLITE_OK ){ |
| 690 sqlite3_free(pRet); |
| 691 pRet = 0; |
| 692 }else{ |
| 693 /* TODO1: Fix this */ |
| 694 pRet->szLeaf = fts5GetU16(&pRet->p[2]); |
| 695 } |
| 696 } |
| 697 p->rc = rc; |
| 698 p->nRead++; |
| 699 } |
| 700 |
| 701 assert( (pRet==0)==(p->rc!=SQLITE_OK) ); |
| 702 return pRet; |
| 703 } |
| 704 |
| 705 /* |
| 706 ** Release a reference to data record returned by an earlier call to |
| 707 ** fts5DataRead(). |
| 708 */ |
| 709 static void fts5DataRelease(Fts5Data *pData){ |
| 710 sqlite3_free(pData); |
| 711 } |
| 712 |
| 713 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ |
| 714 Fts5Data *pRet = fts5DataRead(p, iRowid); |
| 715 if( pRet ){ |
| 716 if( pRet->szLeaf>pRet->nn ){ |
| 717 p->rc = FTS5_CORRUPT; |
| 718 fts5DataRelease(pRet); |
| 719 pRet = 0; |
| 720 } |
| 721 } |
| 722 return pRet; |
| 723 } |
| 724 |
| 725 static int fts5IndexPrepareStmt( |
| 726 Fts5Index *p, |
| 727 sqlite3_stmt **ppStmt, |
| 728 char *zSql |
| 729 ){ |
| 730 if( p->rc==SQLITE_OK ){ |
| 731 if( zSql ){ |
| 732 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0); |
| 733 }else{ |
| 734 p->rc = SQLITE_NOMEM; |
| 735 } |
| 736 } |
| 737 sqlite3_free(zSql); |
| 738 return p->rc; |
| 739 } |
| 740 |
| 741 |
| 742 /* |
| 743 ** INSERT OR REPLACE a record into the %_data table. |
| 744 */ |
| 745 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ |
| 746 if( p->rc!=SQLITE_OK ) return; |
| 747 |
| 748 if( p->pWriter==0 ){ |
| 749 Fts5Config *pConfig = p->pConfig; |
| 750 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( |
| 751 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", |
| 752 pConfig->zDb, pConfig->zName |
| 753 )); |
| 754 if( p->rc ) return; |
| 755 } |
| 756 |
| 757 sqlite3_bind_int64(p->pWriter, 1, iRowid); |
| 758 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); |
| 759 sqlite3_step(p->pWriter); |
| 760 p->rc = sqlite3_reset(p->pWriter); |
| 761 } |
| 762 |
| 763 /* |
| 764 ** Execute the following SQL: |
| 765 ** |
| 766 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast |
| 767 */ |
| 768 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ |
| 769 if( p->rc!=SQLITE_OK ) return; |
| 770 |
| 771 if( p->pDeleter==0 ){ |
| 772 int rc; |
| 773 Fts5Config *pConfig = p->pConfig; |
| 774 char *zSql = sqlite3_mprintf( |
| 775 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", |
| 776 pConfig->zDb, pConfig->zName |
| 777 ); |
| 778 if( zSql==0 ){ |
| 779 rc = SQLITE_NOMEM; |
| 780 }else{ |
| 781 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); |
| 782 sqlite3_free(zSql); |
| 783 } |
| 784 if( rc!=SQLITE_OK ){ |
| 785 p->rc = rc; |
| 786 return; |
| 787 } |
| 788 } |
| 789 |
| 790 sqlite3_bind_int64(p->pDeleter, 1, iFirst); |
| 791 sqlite3_bind_int64(p->pDeleter, 2, iLast); |
| 792 sqlite3_step(p->pDeleter); |
| 793 p->rc = sqlite3_reset(p->pDeleter); |
| 794 } |
| 795 |
| 796 /* |
| 797 ** Remove all records associated with segment iSegid. |
| 798 */ |
| 799 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ |
| 800 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); |
| 801 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; |
| 802 fts5DataDelete(p, iFirst, iLast); |
| 803 if( p->pIdxDeleter==0 ){ |
| 804 Fts5Config *pConfig = p->pConfig; |
| 805 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( |
| 806 "DELETE FROM '%q'.'%q_idx' WHERE segid=?", |
| 807 pConfig->zDb, pConfig->zName |
| 808 )); |
| 809 } |
| 810 if( p->rc==SQLITE_OK ){ |
| 811 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); |
| 812 sqlite3_step(p->pIdxDeleter); |
| 813 p->rc = sqlite3_reset(p->pIdxDeleter); |
| 814 } |
| 815 } |
| 816 |
| 817 /* |
| 818 ** Release a reference to an Fts5Structure object returned by an earlier |
| 819 ** call to fts5StructureRead() or fts5StructureDecode(). |
| 820 */ |
| 821 static void fts5StructureRelease(Fts5Structure *pStruct){ |
| 822 if( pStruct && 0>=(--pStruct->nRef) ){ |
| 823 int i; |
| 824 assert( pStruct->nRef==0 ); |
| 825 for(i=0; i<pStruct->nLevel; i++){ |
| 826 sqlite3_free(pStruct->aLevel[i].aSeg); |
| 827 } |
| 828 sqlite3_free(pStruct); |
| 829 } |
| 830 } |
| 831 |
| 832 static void fts5StructureRef(Fts5Structure *pStruct){ |
| 833 pStruct->nRef++; |
| 834 } |
| 835 |
| 836 /* |
| 837 ** Deserialize and return the structure record currently stored in serialized |
| 838 ** form within buffer pData/nData. |
| 839 ** |
| 840 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
| 841 ** are over-allocated by one slot. This allows the structure contents |
| 842 ** to be more easily edited. |
| 843 ** |
| 844 ** If an error occurs, *ppOut is set to NULL and an SQLite error code |
| 845 ** returned. Otherwise, *ppOut is set to point to the new object and |
| 846 ** SQLITE_OK returned. |
| 847 */ |
| 848 static int fts5StructureDecode( |
| 849 const u8 *pData, /* Buffer containing serialized structure */ |
| 850 int nData, /* Size of buffer pData in bytes */ |
| 851 int *piCookie, /* Configuration cookie value */ |
| 852 Fts5Structure **ppOut /* OUT: Deserialized object */ |
| 853 ){ |
| 854 int rc = SQLITE_OK; |
| 855 int i = 0; |
| 856 int iLvl; |
| 857 int nLevel = 0; |
| 858 int nSegment = 0; |
| 859 int nByte; /* Bytes of space to allocate at pRet */ |
| 860 Fts5Structure *pRet = 0; /* Structure object to return */ |
| 861 |
| 862 /* Grab the cookie value */ |
| 863 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); |
| 864 i = 4; |
| 865 |
| 866 /* Read the total number of levels and segments from the start of the |
| 867 ** structure record. */ |
| 868 i += fts5GetVarint32(&pData[i], nLevel); |
| 869 i += fts5GetVarint32(&pData[i], nSegment); |
| 870 nByte = ( |
| 871 sizeof(Fts5Structure) + /* Main structure */ |
| 872 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */ |
| 873 ); |
| 874 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); |
| 875 |
| 876 if( pRet ){ |
| 877 pRet->nRef = 1; |
| 878 pRet->nLevel = nLevel; |
| 879 pRet->nSegment = nSegment; |
| 880 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); |
| 881 |
| 882 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ |
| 883 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; |
| 884 int nTotal = 0; |
| 885 int iSeg; |
| 886 |
| 887 if( i>=nData ){ |
| 888 rc = FTS5_CORRUPT; |
| 889 }else{ |
| 890 i += fts5GetVarint32(&pData[i], pLvl->nMerge); |
| 891 i += fts5GetVarint32(&pData[i], nTotal); |
| 892 assert( nTotal>=pLvl->nMerge ); |
| 893 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, |
| 894 nTotal * sizeof(Fts5StructureSegment) |
| 895 ); |
| 896 } |
| 897 |
| 898 if( rc==SQLITE_OK ){ |
| 899 pLvl->nSeg = nTotal; |
| 900 for(iSeg=0; iSeg<nTotal; iSeg++){ |
| 901 if( i>=nData ){ |
| 902 rc = FTS5_CORRUPT; |
| 903 break; |
| 904 } |
| 905 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); |
| 906 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); |
| 907 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); |
| 908 } |
| 909 } |
| 910 } |
| 911 if( rc!=SQLITE_OK ){ |
| 912 fts5StructureRelease(pRet); |
| 913 pRet = 0; |
| 914 } |
| 915 } |
| 916 |
| 917 *ppOut = pRet; |
| 918 return rc; |
| 919 } |
| 920 |
| 921 /* |
| 922 ** |
| 923 */ |
| 924 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ |
| 925 if( *pRc==SQLITE_OK ){ |
| 926 Fts5Structure *pStruct = *ppStruct; |
| 927 int nLevel = pStruct->nLevel; |
| 928 int nByte = ( |
| 929 sizeof(Fts5Structure) + /* Main structure */ |
| 930 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ |
| 931 ); |
| 932 |
| 933 pStruct = sqlite3_realloc(pStruct, nByte); |
| 934 if( pStruct ){ |
| 935 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); |
| 936 pStruct->nLevel++; |
| 937 *ppStruct = pStruct; |
| 938 }else{ |
| 939 *pRc = SQLITE_NOMEM; |
| 940 } |
| 941 } |
| 942 } |
| 943 |
| 944 /* |
| 945 ** Extend level iLvl so that there is room for at least nExtra more |
| 946 ** segments. |
| 947 */ |
| 948 static void fts5StructureExtendLevel( |
| 949 int *pRc, |
| 950 Fts5Structure *pStruct, |
| 951 int iLvl, |
| 952 int nExtra, |
| 953 int bInsert |
| 954 ){ |
| 955 if( *pRc==SQLITE_OK ){ |
| 956 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 957 Fts5StructureSegment *aNew; |
| 958 int nByte; |
| 959 |
| 960 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); |
| 961 aNew = sqlite3_realloc(pLvl->aSeg, nByte); |
| 962 if( aNew ){ |
| 963 if( bInsert==0 ){ |
| 964 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); |
| 965 }else{ |
| 966 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); |
| 967 memmove(&aNew[nExtra], aNew, nMove); |
| 968 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); |
| 969 } |
| 970 pLvl->aSeg = aNew; |
| 971 }else{ |
| 972 *pRc = SQLITE_NOMEM; |
| 973 } |
| 974 } |
| 975 } |
| 976 |
| 977 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ |
| 978 Fts5Structure *pRet = 0; |
| 979 Fts5Config *pConfig = p->pConfig; |
| 980 int iCookie; /* Configuration cookie */ |
| 981 Fts5Data *pData; |
| 982 |
| 983 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); |
| 984 if( p->rc==SQLITE_OK ){ |
| 985 /* TODO: Do we need this if the leaf-index is appended? Probably... */ |
| 986 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); |
| 987 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); |
| 988 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ |
| 989 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); |
| 990 } |
| 991 fts5DataRelease(pData); |
| 992 if( p->rc!=SQLITE_OK ){ |
| 993 fts5StructureRelease(pRet); |
| 994 pRet = 0; |
| 995 } |
| 996 } |
| 997 |
| 998 return pRet; |
| 999 } |
| 1000 |
| 1001 static i64 fts5IndexDataVersion(Fts5Index *p){ |
| 1002 i64 iVersion = 0; |
| 1003 |
| 1004 if( p->rc==SQLITE_OK ){ |
| 1005 if( p->pDataVersion==0 ){ |
| 1006 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, |
| 1007 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) |
| 1008 ); |
| 1009 if( p->rc ) return 0; |
| 1010 } |
| 1011 |
| 1012 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ |
| 1013 iVersion = sqlite3_column_int64(p->pDataVersion, 0); |
| 1014 } |
| 1015 p->rc = sqlite3_reset(p->pDataVersion); |
| 1016 } |
| 1017 |
| 1018 return iVersion; |
| 1019 } |
| 1020 |
| 1021 /* |
| 1022 ** Read, deserialize and return the structure record. |
| 1023 ** |
| 1024 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
| 1025 ** are over-allocated as described for function fts5StructureDecode() |
| 1026 ** above. |
| 1027 ** |
| 1028 ** If an error occurs, NULL is returned and an error code left in the |
| 1029 ** Fts5Index handle. If an error has already occurred when this function |
| 1030 ** is called, it is a no-op. |
| 1031 */ |
| 1032 static Fts5Structure *fts5StructureRead(Fts5Index *p){ |
| 1033 |
| 1034 if( p->pStruct==0 ){ |
| 1035 p->iStructVersion = fts5IndexDataVersion(p); |
| 1036 if( p->rc==SQLITE_OK ){ |
| 1037 p->pStruct = fts5StructureReadUncached(p); |
| 1038 } |
| 1039 } |
| 1040 |
| 1041 #if 0 |
| 1042 else{ |
| 1043 Fts5Structure *pTest = fts5StructureReadUncached(p); |
| 1044 if( pTest ){ |
| 1045 int i, j; |
| 1046 assert_nc( p->pStruct->nSegment==pTest->nSegment ); |
| 1047 assert_nc( p->pStruct->nLevel==pTest->nLevel ); |
| 1048 for(i=0; i<pTest->nLevel; i++){ |
| 1049 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge ); |
| 1050 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg ); |
| 1051 for(j=0; j<pTest->aLevel[i].nSeg; j++){ |
| 1052 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; |
| 1053 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; |
| 1054 assert_nc( p1->iSegid==p2->iSegid ); |
| 1055 assert_nc( p1->pgnoFirst==p2->pgnoFirst ); |
| 1056 assert_nc( p1->pgnoLast==p2->pgnoLast ); |
| 1057 } |
| 1058 } |
| 1059 fts5StructureRelease(pTest); |
| 1060 } |
| 1061 } |
| 1062 #endif |
| 1063 |
| 1064 if( p->rc!=SQLITE_OK ) return 0; |
| 1065 assert( p->iStructVersion!=0 ); |
| 1066 assert( p->pStruct!=0 ); |
| 1067 fts5StructureRef(p->pStruct); |
| 1068 return p->pStruct; |
| 1069 } |
| 1070 |
| 1071 static void fts5StructureInvalidate(Fts5Index *p){ |
| 1072 if( p->pStruct ){ |
| 1073 fts5StructureRelease(p->pStruct); |
| 1074 p->pStruct = 0; |
| 1075 } |
| 1076 } |
| 1077 |
| 1078 /* |
| 1079 ** Return the total number of segments in index structure pStruct. This |
| 1080 ** function is only ever used as part of assert() conditions. |
| 1081 */ |
| 1082 #ifdef SQLITE_DEBUG |
| 1083 static int fts5StructureCountSegments(Fts5Structure *pStruct){ |
| 1084 int nSegment = 0; /* Total number of segments */ |
| 1085 if( pStruct ){ |
| 1086 int iLvl; /* Used to iterate through levels */ |
| 1087 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 1088 nSegment += pStruct->aLevel[iLvl].nSeg; |
| 1089 } |
| 1090 } |
| 1091 |
| 1092 return nSegment; |
| 1093 } |
| 1094 #endif |
| 1095 |
| 1096 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ |
| 1097 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \ |
| 1098 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ |
| 1099 (pBuf)->n += nBlob; \ |
| 1100 } |
| 1101 |
| 1102 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \ |
| 1103 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ |
| 1104 assert( (pBuf)->nSpace>=(pBuf)->n ); \ |
| 1105 } |
| 1106 |
| 1107 |
| 1108 /* |
| 1109 ** Serialize and store the "structure" record. |
| 1110 ** |
| 1111 ** If an error occurs, leave an error code in the Fts5Index object. If an |
| 1112 ** error has already occurred, this function is a no-op. |
| 1113 */ |
| 1114 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ |
| 1115 if( p->rc==SQLITE_OK ){ |
| 1116 Fts5Buffer buf; /* Buffer to serialize record into */ |
| 1117 int iLvl; /* Used to iterate through levels */ |
| 1118 int iCookie; /* Cookie value to store */ |
| 1119 |
| 1120 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
| 1121 memset(&buf, 0, sizeof(Fts5Buffer)); |
| 1122 |
| 1123 /* Append the current configuration cookie */ |
| 1124 iCookie = p->pConfig->iCookie; |
| 1125 if( iCookie<0 ) iCookie = 0; |
| 1126 |
| 1127 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){ |
| 1128 sqlite3Fts5Put32(buf.p, iCookie); |
| 1129 buf.n = 4; |
| 1130 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel); |
| 1131 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment); |
| 1132 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter); |
| 1133 } |
| 1134 |
| 1135 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 1136 int iSeg; /* Used to iterate through segments */ |
| 1137 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 1138 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); |
| 1139 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); |
| 1140 assert( pLvl->nMerge<=pLvl->nSeg ); |
| 1141 |
| 1142 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
| 1143 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); |
| 1144 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); |
| 1145 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); |
| 1146 } |
| 1147 } |
| 1148 |
| 1149 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); |
| 1150 fts5BufferFree(&buf); |
| 1151 } |
| 1152 } |
| 1153 |
| 1154 #if 0 |
| 1155 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); |
| 1156 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ |
| 1157 int rc = SQLITE_OK; |
| 1158 Fts5Buffer buf; |
| 1159 memset(&buf, 0, sizeof(buf)); |
| 1160 fts5DebugStructure(&rc, &buf, pStruct); |
| 1161 fprintf(stdout, "%s: %s\n", zCaption, buf.p); |
| 1162 fflush(stdout); |
| 1163 fts5BufferFree(&buf); |
| 1164 } |
| 1165 #else |
| 1166 # define fts5PrintStructure(x,y) |
| 1167 #endif |
| 1168 |
| 1169 static int fts5SegmentSize(Fts5StructureSegment *pSeg){ |
| 1170 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; |
| 1171 } |
| 1172 |
| 1173 /* |
| 1174 ** Return a copy of index structure pStruct. Except, promote as many |
| 1175 ** segments as possible to level iPromote. If an OOM occurs, NULL is |
| 1176 ** returned. |
| 1177 */ |
| 1178 static void fts5StructurePromoteTo( |
| 1179 Fts5Index *p, |
| 1180 int iPromote, |
| 1181 int szPromote, |
| 1182 Fts5Structure *pStruct |
| 1183 ){ |
| 1184 int il, is; |
| 1185 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; |
| 1186 |
| 1187 if( pOut->nMerge==0 ){ |
| 1188 for(il=iPromote+1; il<pStruct->nLevel; il++){ |
| 1189 Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; |
| 1190 if( pLvl->nMerge ) return; |
| 1191 for(is=pLvl->nSeg-1; is>=0; is--){ |
| 1192 int sz = fts5SegmentSize(&pLvl->aSeg[is]); |
| 1193 if( sz>szPromote ) return; |
| 1194 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); |
| 1195 if( p->rc ) return; |
| 1196 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); |
| 1197 pOut->nSeg++; |
| 1198 pLvl->nSeg--; |
| 1199 } |
| 1200 } |
| 1201 } |
| 1202 } |
| 1203 |
| 1204 /* |
| 1205 ** A new segment has just been written to level iLvl of index structure |
| 1206 ** pStruct. This function determines if any segments should be promoted |
| 1207 ** as a result. Segments are promoted in two scenarios: |
| 1208 ** |
| 1209 ** a) If the segment just written is smaller than one or more segments |
| 1210 ** within the previous populated level, it is promoted to the previous |
| 1211 ** populated level. |
| 1212 ** |
| 1213 ** b) If the segment just written is larger than the newest segment on |
| 1214 ** the next populated level, then that segment, and any other adjacent |
| 1215 ** segments that are also smaller than the one just written, are |
| 1216 ** promoted. |
| 1217 ** |
| 1218 ** If one or more segments are promoted, the structure object is updated |
| 1219 ** to reflect this. |
| 1220 */ |
| 1221 static void fts5StructurePromote( |
| 1222 Fts5Index *p, /* FTS5 backend object */ |
| 1223 int iLvl, /* Index level just updated */ |
| 1224 Fts5Structure *pStruct /* Index structure */ |
| 1225 ){ |
| 1226 if( p->rc==SQLITE_OK ){ |
| 1227 int iTst; |
| 1228 int iPromote = -1; |
| 1229 int szPromote = 0; /* Promote anything this size or smaller */ |
| 1230 Fts5StructureSegment *pSeg; /* Segment just written */ |
| 1231 int szSeg; /* Size of segment just written */ |
| 1232 int nSeg = pStruct->aLevel[iLvl].nSeg; |
| 1233 |
| 1234 if( nSeg==0 ) return; |
| 1235 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; |
| 1236 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); |
| 1237 |
| 1238 /* Check for condition (a) */ |
| 1239 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); |
| 1240 if( iTst>=0 ){ |
| 1241 int i; |
| 1242 int szMax = 0; |
| 1243 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; |
| 1244 assert( pTst->nMerge==0 ); |
| 1245 for(i=0; i<pTst->nSeg; i++){ |
| 1246 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; |
| 1247 if( sz>szMax ) szMax = sz; |
| 1248 } |
| 1249 if( szMax>=szSeg ){ |
| 1250 /* Condition (a) is true. Promote the newest segment on level |
| 1251 ** iLvl to level iTst. */ |
| 1252 iPromote = iTst; |
| 1253 szPromote = szMax; |
| 1254 } |
| 1255 } |
| 1256 |
| 1257 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() |
| 1258 ** is a no-op if it is not. */ |
| 1259 if( iPromote<0 ){ |
| 1260 iPromote = iLvl; |
| 1261 szPromote = szSeg; |
| 1262 } |
| 1263 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); |
| 1264 } |
| 1265 } |
| 1266 |
| 1267 |
| 1268 /* |
| 1269 ** Advance the iterator passed as the only argument. If the end of the |
| 1270 ** doclist-index page is reached, return non-zero. |
| 1271 */ |
| 1272 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ |
| 1273 Fts5Data *pData = pLvl->pData; |
| 1274 |
| 1275 if( pLvl->iOff==0 ){ |
| 1276 assert( pLvl->bEof==0 ); |
| 1277 pLvl->iOff = 1; |
| 1278 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); |
| 1279 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); |
| 1280 pLvl->iFirstOff = pLvl->iOff; |
| 1281 }else{ |
| 1282 int iOff; |
| 1283 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ |
| 1284 if( pData->p[iOff] ) break; |
| 1285 } |
| 1286 |
| 1287 if( iOff<pData->nn ){ |
| 1288 i64 iVal; |
| 1289 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; |
| 1290 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); |
| 1291 pLvl->iRowid += iVal; |
| 1292 pLvl->iOff = iOff; |
| 1293 }else{ |
| 1294 pLvl->bEof = 1; |
| 1295 } |
| 1296 } |
| 1297 |
| 1298 return pLvl->bEof; |
| 1299 } |
| 1300 |
| 1301 /* |
| 1302 ** Advance the iterator passed as the only argument. |
| 1303 */ |
| 1304 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
| 1305 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
| 1306 |
| 1307 assert( iLvl<pIter->nLvl ); |
| 1308 if( fts5DlidxLvlNext(pLvl) ){ |
| 1309 if( (iLvl+1) < pIter->nLvl ){ |
| 1310 fts5DlidxIterNextR(p, pIter, iLvl+1); |
| 1311 if( pLvl[1].bEof==0 ){ |
| 1312 fts5DataRelease(pLvl->pData); |
| 1313 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 1314 pLvl->pData = fts5DataRead(p, |
| 1315 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
| 1316 ); |
| 1317 if( pLvl->pData ) fts5DlidxLvlNext(pLvl); |
| 1318 } |
| 1319 } |
| 1320 } |
| 1321 |
| 1322 return pIter->aLvl[0].bEof; |
| 1323 } |
| 1324 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 1325 return fts5DlidxIterNextR(p, pIter, 0); |
| 1326 } |
| 1327 |
| 1328 /* |
| 1329 ** The iterator passed as the first argument has the following fields set |
| 1330 ** as follows. This function sets up the rest of the iterator so that it |
| 1331 ** points to the first rowid in the doclist-index. |
| 1332 ** |
| 1333 ** pData: |
| 1334 ** pointer to doclist-index record, |
| 1335 ** |
| 1336 ** When this function is called pIter->iLeafPgno is the page number the |
| 1337 ** doclist is associated with (the one featuring the term). |
| 1338 */ |
| 1339 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ |
| 1340 int i; |
| 1341 for(i=0; i<pIter->nLvl; i++){ |
| 1342 fts5DlidxLvlNext(&pIter->aLvl[i]); |
| 1343 } |
| 1344 return pIter->aLvl[0].bEof; |
| 1345 } |
| 1346 |
| 1347 |
| 1348 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 1349 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; |
| 1350 } |
| 1351 |
| 1352 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 1353 int i; |
| 1354 |
| 1355 /* Advance each level to the last entry on the last page */ |
| 1356 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ |
| 1357 Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; |
| 1358 while( fts5DlidxLvlNext(pLvl)==0 ); |
| 1359 pLvl->bEof = 0; |
| 1360 |
| 1361 if( i>0 ){ |
| 1362 Fts5DlidxLvl *pChild = &pLvl[-1]; |
| 1363 fts5DataRelease(pChild->pData); |
| 1364 memset(pChild, 0, sizeof(Fts5DlidxLvl)); |
| 1365 pChild->pData = fts5DataRead(p, |
| 1366 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) |
| 1367 ); |
| 1368 } |
| 1369 } |
| 1370 } |
| 1371 |
| 1372 /* |
| 1373 ** Move the iterator passed as the only argument to the previous entry. |
| 1374 */ |
| 1375 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ |
| 1376 int iOff = pLvl->iOff; |
| 1377 |
| 1378 assert( pLvl->bEof==0 ); |
| 1379 if( iOff<=pLvl->iFirstOff ){ |
| 1380 pLvl->bEof = 1; |
| 1381 }else{ |
| 1382 u8 *a = pLvl->pData->p; |
| 1383 i64 iVal; |
| 1384 int iLimit; |
| 1385 int ii; |
| 1386 int nZero = 0; |
| 1387 |
| 1388 /* Currently iOff points to the first byte of a varint. This block |
| 1389 ** decrements iOff until it points to the first byte of the previous |
| 1390 ** varint. Taking care not to read any memory locations that occur |
| 1391 ** before the buffer in memory. */ |
| 1392 iLimit = (iOff>9 ? iOff-9 : 0); |
| 1393 for(iOff--; iOff>iLimit; iOff--){ |
| 1394 if( (a[iOff-1] & 0x80)==0 ) break; |
| 1395 } |
| 1396 |
| 1397 fts5GetVarint(&a[iOff], (u64*)&iVal); |
| 1398 pLvl->iRowid -= iVal; |
| 1399 pLvl->iLeafPgno--; |
| 1400 |
| 1401 /* Skip backwards past any 0x00 varints. */ |
| 1402 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ |
| 1403 nZero++; |
| 1404 } |
| 1405 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ |
| 1406 /* The byte immediately before the last 0x00 byte has the 0x80 bit |
| 1407 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 |
| 1408 ** bytes before a[ii]. */ |
| 1409 int bZero = 0; /* True if last 0x00 counts */ |
| 1410 if( (ii-8)>=pLvl->iFirstOff ){ |
| 1411 int j; |
| 1412 for(j=1; j<=8 && (a[ii-j] & 0x80); j++); |
| 1413 bZero = (j>8); |
| 1414 } |
| 1415 if( bZero==0 ) nZero--; |
| 1416 } |
| 1417 pLvl->iLeafPgno -= nZero; |
| 1418 pLvl->iOff = iOff - nZero; |
| 1419 } |
| 1420 |
| 1421 return pLvl->bEof; |
| 1422 } |
| 1423 |
| 1424 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
| 1425 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
| 1426 |
| 1427 assert( iLvl<pIter->nLvl ); |
| 1428 if( fts5DlidxLvlPrev(pLvl) ){ |
| 1429 if( (iLvl+1) < pIter->nLvl ){ |
| 1430 fts5DlidxIterPrevR(p, pIter, iLvl+1); |
| 1431 if( pLvl[1].bEof==0 ){ |
| 1432 fts5DataRelease(pLvl->pData); |
| 1433 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 1434 pLvl->pData = fts5DataRead(p, |
| 1435 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
| 1436 ); |
| 1437 if( pLvl->pData ){ |
| 1438 while( fts5DlidxLvlNext(pLvl)==0 ); |
| 1439 pLvl->bEof = 0; |
| 1440 } |
| 1441 } |
| 1442 } |
| 1443 } |
| 1444 |
| 1445 return pIter->aLvl[0].bEof; |
| 1446 } |
| 1447 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ |
| 1448 return fts5DlidxIterPrevR(p, pIter, 0); |
| 1449 } |
| 1450 |
| 1451 /* |
| 1452 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). |
| 1453 */ |
| 1454 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ |
| 1455 if( pIter ){ |
| 1456 int i; |
| 1457 for(i=0; i<pIter->nLvl; i++){ |
| 1458 fts5DataRelease(pIter->aLvl[i].pData); |
| 1459 } |
| 1460 sqlite3_free(pIter); |
| 1461 } |
| 1462 } |
| 1463 |
| 1464 static Fts5DlidxIter *fts5DlidxIterInit( |
| 1465 Fts5Index *p, /* Fts5 Backend to iterate within */ |
| 1466 int bRev, /* True for ORDER BY ASC */ |
| 1467 int iSegid, /* Segment id */ |
| 1468 int iLeafPg /* Leaf page number to load dlidx for */ |
| 1469 ){ |
| 1470 Fts5DlidxIter *pIter = 0; |
| 1471 int i; |
| 1472 int bDone = 0; |
| 1473 |
| 1474 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
| 1475 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); |
| 1476 Fts5DlidxIter *pNew; |
| 1477 |
| 1478 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); |
| 1479 if( pNew==0 ){ |
| 1480 p->rc = SQLITE_NOMEM; |
| 1481 }else{ |
| 1482 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); |
| 1483 Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; |
| 1484 pIter = pNew; |
| 1485 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
| 1486 pLvl->pData = fts5DataRead(p, iRowid); |
| 1487 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ |
| 1488 bDone = 1; |
| 1489 } |
| 1490 pIter->nLvl = i+1; |
| 1491 } |
| 1492 } |
| 1493 |
| 1494 if( p->rc==SQLITE_OK ){ |
| 1495 pIter->iSegid = iSegid; |
| 1496 if( bRev==0 ){ |
| 1497 fts5DlidxIterFirst(pIter); |
| 1498 }else{ |
| 1499 fts5DlidxIterLast(p, pIter); |
| 1500 } |
| 1501 } |
| 1502 |
| 1503 if( p->rc!=SQLITE_OK ){ |
| 1504 fts5DlidxIterFree(pIter); |
| 1505 pIter = 0; |
| 1506 } |
| 1507 |
| 1508 return pIter; |
| 1509 } |
| 1510 |
| 1511 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ |
| 1512 return pIter->aLvl[0].iRowid; |
| 1513 } |
| 1514 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ |
| 1515 return pIter->aLvl[0].iLeafPgno; |
| 1516 } |
| 1517 |
| 1518 /* |
| 1519 ** Load the next leaf page into the segment iterator. |
| 1520 */ |
| 1521 static void fts5SegIterNextPage( |
| 1522 Fts5Index *p, /* FTS5 backend object */ |
| 1523 Fts5SegIter *pIter /* Iterator to advance to next page */ |
| 1524 ){ |
| 1525 Fts5Data *pLeaf; |
| 1526 Fts5StructureSegment *pSeg = pIter->pSeg; |
| 1527 fts5DataRelease(pIter->pLeaf); |
| 1528 pIter->iLeafPgno++; |
| 1529 if( pIter->pNextLeaf ){ |
| 1530 pIter->pLeaf = pIter->pNextLeaf; |
| 1531 pIter->pNextLeaf = 0; |
| 1532 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ |
| 1533 pIter->pLeaf = fts5LeafRead(p, |
| 1534 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) |
| 1535 ); |
| 1536 }else{ |
| 1537 pIter->pLeaf = 0; |
| 1538 } |
| 1539 pLeaf = pIter->pLeaf; |
| 1540 |
| 1541 if( pLeaf ){ |
| 1542 pIter->iPgidxOff = pLeaf->szLeaf; |
| 1543 if( fts5LeafIsTermless(pLeaf) ){ |
| 1544 pIter->iEndofDoclist = pLeaf->nn+1; |
| 1545 }else{ |
| 1546 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], |
| 1547 pIter->iEndofDoclist |
| 1548 ); |
| 1549 } |
| 1550 } |
| 1551 } |
| 1552 |
| 1553 /* |
| 1554 ** Argument p points to a buffer containing a varint to be interpreted as a |
| 1555 ** position list size field. Read the varint and return the number of bytes |
| 1556 ** read. Before returning, set *pnSz to the number of bytes in the position |
| 1557 ** list, and *pbDel to true if the delete flag is set, or false otherwise. |
| 1558 */ |
| 1559 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ |
| 1560 int nSz; |
| 1561 int n = 0; |
| 1562 fts5FastGetVarint32(p, n, nSz); |
| 1563 assert_nc( nSz>=0 ); |
| 1564 *pnSz = nSz/2; |
| 1565 *pbDel = nSz & 0x0001; |
| 1566 return n; |
| 1567 } |
| 1568 |
| 1569 /* |
| 1570 ** Fts5SegIter.iLeafOffset currently points to the first byte of a |
| 1571 ** position-list size field. Read the value of the field and store it |
| 1572 ** in the following variables: |
| 1573 ** |
| 1574 ** Fts5SegIter.nPos |
| 1575 ** Fts5SegIter.bDel |
| 1576 ** |
| 1577 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the |
| 1578 ** position list content (if any). |
| 1579 */ |
| 1580 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ |
| 1581 if( p->rc==SQLITE_OK ){ |
| 1582 int iOff = pIter->iLeafOffset; /* Offset to read at */ |
| 1583 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 1584 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 1585 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); |
| 1586 pIter->bDel = 0; |
| 1587 pIter->nPos = 1; |
| 1588 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
| 1589 pIter->bDel = 1; |
| 1590 iOff++; |
| 1591 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
| 1592 pIter->nPos = 1; |
| 1593 iOff++; |
| 1594 }else{ |
| 1595 pIter->nPos = 0; |
| 1596 } |
| 1597 } |
| 1598 }else{ |
| 1599 int nSz; |
| 1600 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); |
| 1601 pIter->bDel = (nSz & 0x0001); |
| 1602 pIter->nPos = nSz>>1; |
| 1603 assert_nc( pIter->nPos>=0 ); |
| 1604 } |
| 1605 pIter->iLeafOffset = iOff; |
| 1606 } |
| 1607 } |
| 1608 |
| 1609 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ |
| 1610 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
| 1611 int iOff = pIter->iLeafOffset; |
| 1612 |
| 1613 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 1614 if( iOff>=pIter->pLeaf->szLeaf ){ |
| 1615 fts5SegIterNextPage(p, pIter); |
| 1616 if( pIter->pLeaf==0 ){ |
| 1617 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; |
| 1618 return; |
| 1619 } |
| 1620 iOff = 4; |
| 1621 a = pIter->pLeaf->p; |
| 1622 } |
| 1623 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
| 1624 pIter->iLeafOffset = iOff; |
| 1625 } |
| 1626 |
| 1627 /* |
| 1628 ** Fts5SegIter.iLeafOffset currently points to the first byte of the |
| 1629 ** "nSuffix" field of a term. Function parameter nKeep contains the value |
| 1630 ** of the "nPrefix" field (if there was one - it is passed 0 if this is |
| 1631 ** the first term in the segment). |
| 1632 ** |
| 1633 ** This function populates: |
| 1634 ** |
| 1635 ** Fts5SegIter.term |
| 1636 ** Fts5SegIter.rowid |
| 1637 ** |
| 1638 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of |
| 1639 ** the first position list. The position list belonging to document |
| 1640 ** (Fts5SegIter.iRowid). |
| 1641 */ |
| 1642 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ |
| 1643 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
| 1644 int iOff = pIter->iLeafOffset; /* Offset to read at */ |
| 1645 int nNew; /* Bytes of new data */ |
| 1646 |
| 1647 iOff += fts5GetVarint32(&a[iOff], nNew); |
| 1648 if( iOff+nNew>pIter->pLeaf->nn ){ |
| 1649 p->rc = FTS5_CORRUPT; |
| 1650 return; |
| 1651 } |
| 1652 pIter->term.n = nKeep; |
| 1653 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
| 1654 iOff += nNew; |
| 1655 pIter->iTermLeafOffset = iOff; |
| 1656 pIter->iTermLeafPgno = pIter->iLeafPgno; |
| 1657 pIter->iLeafOffset = iOff; |
| 1658 |
| 1659 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ |
| 1660 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 1661 }else{ |
| 1662 int nExtra; |
| 1663 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); |
| 1664 pIter->iEndofDoclist += nExtra; |
| 1665 } |
| 1666 |
| 1667 fts5SegIterLoadRowid(p, pIter); |
| 1668 } |
| 1669 |
| 1670 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); |
| 1671 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); |
| 1672 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); |
| 1673 |
| 1674 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ |
| 1675 if( pIter->flags & FTS5_SEGITER_REVERSE ){ |
| 1676 pIter->xNext = fts5SegIterNext_Reverse; |
| 1677 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 1678 pIter->xNext = fts5SegIterNext_None; |
| 1679 }else{ |
| 1680 pIter->xNext = fts5SegIterNext; |
| 1681 } |
| 1682 } |
| 1683 |
| 1684 /* |
| 1685 ** Initialize the iterator object pIter to iterate through the entries in |
| 1686 ** segment pSeg. The iterator is left pointing to the first entry when |
| 1687 ** this function returns. |
| 1688 ** |
| 1689 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 1690 ** an error has already occurred when this function is called, it is a no-op. |
| 1691 */ |
| 1692 static void fts5SegIterInit( |
| 1693 Fts5Index *p, /* FTS index object */ |
| 1694 Fts5StructureSegment *pSeg, /* Description of segment */ |
| 1695 Fts5SegIter *pIter /* Object to populate */ |
| 1696 ){ |
| 1697 if( pSeg->pgnoFirst==0 ){ |
| 1698 /* This happens if the segment is being used as an input to an incremental |
| 1699 ** merge and all data has already been "trimmed". See function |
| 1700 ** fts5TrimSegments() for details. In this case leave the iterator empty. |
| 1701 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is |
| 1702 ** at EOF already. */ |
| 1703 assert( pIter->pLeaf==0 ); |
| 1704 return; |
| 1705 } |
| 1706 |
| 1707 if( p->rc==SQLITE_OK ){ |
| 1708 memset(pIter, 0, sizeof(*pIter)); |
| 1709 fts5SegIterSetNext(p, pIter); |
| 1710 pIter->pSeg = pSeg; |
| 1711 pIter->iLeafPgno = pSeg->pgnoFirst-1; |
| 1712 fts5SegIterNextPage(p, pIter); |
| 1713 } |
| 1714 |
| 1715 if( p->rc==SQLITE_OK ){ |
| 1716 pIter->iLeafOffset = 4; |
| 1717 assert_nc( pIter->pLeaf->nn>4 ); |
| 1718 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); |
| 1719 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; |
| 1720 fts5SegIterLoadTerm(p, pIter, 0); |
| 1721 fts5SegIterLoadNPos(p, pIter); |
| 1722 } |
| 1723 } |
| 1724 |
| 1725 /* |
| 1726 ** This function is only ever called on iterators created by calls to |
| 1727 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. |
| 1728 ** |
| 1729 ** The iterator is in an unusual state when this function is called: the |
| 1730 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of |
| 1731 ** the position-list size field for the first relevant rowid on the page. |
| 1732 ** Fts5SegIter.rowid is set, but nPos and bDel are not. |
| 1733 ** |
| 1734 ** This function advances the iterator so that it points to the last |
| 1735 ** relevant rowid on the page and, if necessary, initializes the |
| 1736 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator |
| 1737 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first |
| 1738 ** byte of the position list content associated with said rowid. |
| 1739 */ |
| 1740 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ |
| 1741 int eDetail = p->pConfig->eDetail; |
| 1742 int n = pIter->pLeaf->szLeaf; |
| 1743 int i = pIter->iLeafOffset; |
| 1744 u8 *a = pIter->pLeaf->p; |
| 1745 int iRowidOffset = 0; |
| 1746 |
| 1747 if( n>pIter->iEndofDoclist ){ |
| 1748 n = pIter->iEndofDoclist; |
| 1749 } |
| 1750 |
| 1751 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 1752 while( 1 ){ |
| 1753 i64 iDelta = 0; |
| 1754 |
| 1755 if( eDetail==FTS5_DETAIL_NONE ){ |
| 1756 /* todo */ |
| 1757 if( i<n && a[i]==0 ){ |
| 1758 i++; |
| 1759 if( i<n && a[i]==0 ) i++; |
| 1760 } |
| 1761 }else{ |
| 1762 int nPos; |
| 1763 int bDummy; |
| 1764 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); |
| 1765 i += nPos; |
| 1766 } |
| 1767 if( i>=n ) break; |
| 1768 i += fts5GetVarint(&a[i], (u64*)&iDelta); |
| 1769 pIter->iRowid += iDelta; |
| 1770 |
| 1771 /* If necessary, grow the pIter->aRowidOffset[] array. */ |
| 1772 if( iRowidOffset>=pIter->nRowidOffset ){ |
| 1773 int nNew = pIter->nRowidOffset + 8; |
| 1774 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); |
| 1775 if( aNew==0 ){ |
| 1776 p->rc = SQLITE_NOMEM; |
| 1777 break; |
| 1778 } |
| 1779 pIter->aRowidOffset = aNew; |
| 1780 pIter->nRowidOffset = nNew; |
| 1781 } |
| 1782 |
| 1783 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; |
| 1784 pIter->iLeafOffset = i; |
| 1785 } |
| 1786 pIter->iRowidOffset = iRowidOffset; |
| 1787 fts5SegIterLoadNPos(p, pIter); |
| 1788 } |
| 1789 |
| 1790 /* |
| 1791 ** |
| 1792 */ |
| 1793 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ |
| 1794 assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
| 1795 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 1796 |
| 1797 fts5DataRelease(pIter->pLeaf); |
| 1798 pIter->pLeaf = 0; |
| 1799 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ |
| 1800 Fts5Data *pNew; |
| 1801 pIter->iLeafPgno--; |
| 1802 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( |
| 1803 pIter->pSeg->iSegid, pIter->iLeafPgno |
| 1804 )); |
| 1805 if( pNew ){ |
| 1806 /* iTermLeafOffset may be equal to szLeaf if the term is the last |
| 1807 ** thing on the page - i.e. the first rowid is on the following page. |
| 1808 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ |
| 1809 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ |
| 1810 assert( pIter->pLeaf==0 ); |
| 1811 if( pIter->iTermLeafOffset<pNew->szLeaf ){ |
| 1812 pIter->pLeaf = pNew; |
| 1813 pIter->iLeafOffset = pIter->iTermLeafOffset; |
| 1814 } |
| 1815 }else{ |
| 1816 int iRowidOff; |
| 1817 iRowidOff = fts5LeafFirstRowidOff(pNew); |
| 1818 if( iRowidOff ){ |
| 1819 pIter->pLeaf = pNew; |
| 1820 pIter->iLeafOffset = iRowidOff; |
| 1821 } |
| 1822 } |
| 1823 |
| 1824 if( pIter->pLeaf ){ |
| 1825 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; |
| 1826 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); |
| 1827 break; |
| 1828 }else{ |
| 1829 fts5DataRelease(pNew); |
| 1830 } |
| 1831 } |
| 1832 } |
| 1833 |
| 1834 if( pIter->pLeaf ){ |
| 1835 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 1836 fts5SegIterReverseInitPage(p, pIter); |
| 1837 } |
| 1838 } |
| 1839 |
| 1840 /* |
| 1841 ** Return true if the iterator passed as the second argument currently |
| 1842 ** points to a delete marker. A delete marker is an entry with a 0 byte |
| 1843 ** position-list. |
| 1844 */ |
| 1845 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ |
| 1846 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
| 1847 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); |
| 1848 } |
| 1849 |
| 1850 /* |
| 1851 ** Advance iterator pIter to the next entry. |
| 1852 ** |
| 1853 ** This version of fts5SegIterNext() is only used by reverse iterators. |
| 1854 */ |
| 1855 static void fts5SegIterNext_Reverse( |
| 1856 Fts5Index *p, /* FTS5 backend object */ |
| 1857 Fts5SegIter *pIter, /* Iterator to advance */ |
| 1858 int *pbUnused /* Unused */ |
| 1859 ){ |
| 1860 assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
| 1861 assert( pIter->pNextLeaf==0 ); |
| 1862 UNUSED_PARAM(pbUnused); |
| 1863 |
| 1864 if( pIter->iRowidOffset>0 ){ |
| 1865 u8 *a = pIter->pLeaf->p; |
| 1866 int iOff; |
| 1867 i64 iDelta; |
| 1868 |
| 1869 pIter->iRowidOffset--; |
| 1870 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; |
| 1871 fts5SegIterLoadNPos(p, pIter); |
| 1872 iOff = pIter->iLeafOffset; |
| 1873 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ |
| 1874 iOff += pIter->nPos; |
| 1875 } |
| 1876 fts5GetVarint(&a[iOff], (u64*)&iDelta); |
| 1877 pIter->iRowid -= iDelta; |
| 1878 }else{ |
| 1879 fts5SegIterReverseNewPage(p, pIter); |
| 1880 } |
| 1881 } |
| 1882 |
| 1883 /* |
| 1884 ** Advance iterator pIter to the next entry. |
| 1885 ** |
| 1886 ** This version of fts5SegIterNext() is only used if detail=none and the |
| 1887 ** iterator is not a reverse direction iterator. |
| 1888 */ |
| 1889 static void fts5SegIterNext_None( |
| 1890 Fts5Index *p, /* FTS5 backend object */ |
| 1891 Fts5SegIter *pIter, /* Iterator to advance */ |
| 1892 int *pbNewTerm /* OUT: Set for new term */ |
| 1893 ){ |
| 1894 int iOff; |
| 1895 |
| 1896 assert( p->rc==SQLITE_OK ); |
| 1897 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); |
| 1898 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 1899 |
| 1900 ASSERT_SZLEAF_OK(pIter->pLeaf); |
| 1901 iOff = pIter->iLeafOffset; |
| 1902 |
| 1903 /* Next entry is on the next page */ |
| 1904 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ |
| 1905 fts5SegIterNextPage(p, pIter); |
| 1906 if( p->rc || pIter->pLeaf==0 ) return; |
| 1907 pIter->iRowid = 0; |
| 1908 iOff = 4; |
| 1909 } |
| 1910 |
| 1911 if( iOff<pIter->iEndofDoclist ){ |
| 1912 /* Next entry is on the current page */ |
| 1913 i64 iDelta; |
| 1914 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); |
| 1915 pIter->iLeafOffset = iOff; |
| 1916 pIter->iRowid += iDelta; |
| 1917 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ |
| 1918 if( pIter->pSeg ){ |
| 1919 int nKeep = 0; |
| 1920 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ |
| 1921 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); |
| 1922 } |
| 1923 pIter->iLeafOffset = iOff; |
| 1924 fts5SegIterLoadTerm(p, pIter, nKeep); |
| 1925 }else{ |
| 1926 const u8 *pList = 0; |
| 1927 const char *zTerm = 0; |
| 1928 int nList; |
| 1929 sqlite3Fts5HashScanNext(p->pHash); |
| 1930 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
| 1931 if( pList==0 ) goto next_none_eof; |
| 1932 pIter->pLeaf->p = (u8*)pList; |
| 1933 pIter->pLeaf->nn = nList; |
| 1934 pIter->pLeaf->szLeaf = nList; |
| 1935 pIter->iEndofDoclist = nList; |
| 1936 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); |
| 1937 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
| 1938 } |
| 1939 |
| 1940 if( pbNewTerm ) *pbNewTerm = 1; |
| 1941 }else{ |
| 1942 goto next_none_eof; |
| 1943 } |
| 1944 |
| 1945 fts5SegIterLoadNPos(p, pIter); |
| 1946 |
| 1947 return; |
| 1948 next_none_eof: |
| 1949 fts5DataRelease(pIter->pLeaf); |
| 1950 pIter->pLeaf = 0; |
| 1951 } |
| 1952 |
| 1953 |
| 1954 /* |
| 1955 ** Advance iterator pIter to the next entry. |
| 1956 ** |
| 1957 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It |
| 1958 ** is not considered an error if the iterator reaches EOF. If an error has |
| 1959 ** already occurred when this function is called, it is a no-op. |
| 1960 */ |
| 1961 static void fts5SegIterNext( |
| 1962 Fts5Index *p, /* FTS5 backend object */ |
| 1963 Fts5SegIter *pIter, /* Iterator to advance */ |
| 1964 int *pbNewTerm /* OUT: Set for new term */ |
| 1965 ){ |
| 1966 Fts5Data *pLeaf = pIter->pLeaf; |
| 1967 int iOff; |
| 1968 int bNewTerm = 0; |
| 1969 int nKeep = 0; |
| 1970 u8 *a; |
| 1971 int n; |
| 1972 |
| 1973 assert( pbNewTerm==0 || *pbNewTerm==0 ); |
| 1974 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 1975 |
| 1976 /* Search for the end of the position list within the current page. */ |
| 1977 a = pLeaf->p; |
| 1978 n = pLeaf->szLeaf; |
| 1979 |
| 1980 ASSERT_SZLEAF_OK(pLeaf); |
| 1981 iOff = pIter->iLeafOffset + pIter->nPos; |
| 1982 |
| 1983 if( iOff<n ){ |
| 1984 /* The next entry is on the current page. */ |
| 1985 assert_nc( iOff<=pIter->iEndofDoclist ); |
| 1986 if( iOff>=pIter->iEndofDoclist ){ |
| 1987 bNewTerm = 1; |
| 1988 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ |
| 1989 iOff += fts5GetVarint32(&a[iOff], nKeep); |
| 1990 } |
| 1991 }else{ |
| 1992 u64 iDelta; |
| 1993 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); |
| 1994 pIter->iRowid += iDelta; |
| 1995 assert_nc( iDelta>0 ); |
| 1996 } |
| 1997 pIter->iLeafOffset = iOff; |
| 1998 |
| 1999 }else if( pIter->pSeg==0 ){ |
| 2000 const u8 *pList = 0; |
| 2001 const char *zTerm = 0; |
| 2002 int nList = 0; |
| 2003 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); |
| 2004 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ |
| 2005 sqlite3Fts5HashScanNext(p->pHash); |
| 2006 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
| 2007 } |
| 2008 if( pList==0 ){ |
| 2009 fts5DataRelease(pIter->pLeaf); |
| 2010 pIter->pLeaf = 0; |
| 2011 }else{ |
| 2012 pIter->pLeaf->p = (u8*)pList; |
| 2013 pIter->pLeaf->nn = nList; |
| 2014 pIter->pLeaf->szLeaf = nList; |
| 2015 pIter->iEndofDoclist = nList+1; |
| 2016 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), |
| 2017 (u8*)zTerm); |
| 2018 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
| 2019 *pbNewTerm = 1; |
| 2020 } |
| 2021 }else{ |
| 2022 iOff = 0; |
| 2023 /* Next entry is not on the current page */ |
| 2024 while( iOff==0 ){ |
| 2025 fts5SegIterNextPage(p, pIter); |
| 2026 pLeaf = pIter->pLeaf; |
| 2027 if( pLeaf==0 ) break; |
| 2028 ASSERT_SZLEAF_OK(pLeaf); |
| 2029 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ |
| 2030 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); |
| 2031 pIter->iLeafOffset = iOff; |
| 2032 |
| 2033 if( pLeaf->nn>pLeaf->szLeaf ){ |
| 2034 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
| 2035 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist |
| 2036 ); |
| 2037 } |
| 2038 } |
| 2039 else if( pLeaf->nn>pLeaf->szLeaf ){ |
| 2040 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
| 2041 &pLeaf->p[pLeaf->szLeaf], iOff |
| 2042 ); |
| 2043 pIter->iLeafOffset = iOff; |
| 2044 pIter->iEndofDoclist = iOff; |
| 2045 bNewTerm = 1; |
| 2046 } |
| 2047 assert_nc( iOff<pLeaf->szLeaf ); |
| 2048 if( iOff>pLeaf->szLeaf ){ |
| 2049 p->rc = FTS5_CORRUPT; |
| 2050 return; |
| 2051 } |
| 2052 } |
| 2053 } |
| 2054 |
| 2055 /* Check if the iterator is now at EOF. If so, return early. */ |
| 2056 if( pIter->pLeaf ){ |
| 2057 if( bNewTerm ){ |
| 2058 if( pIter->flags & FTS5_SEGITER_ONETERM ){ |
| 2059 fts5DataRelease(pIter->pLeaf); |
| 2060 pIter->pLeaf = 0; |
| 2061 }else{ |
| 2062 fts5SegIterLoadTerm(p, pIter, nKeep); |
| 2063 fts5SegIterLoadNPos(p, pIter); |
| 2064 if( pbNewTerm ) *pbNewTerm = 1; |
| 2065 } |
| 2066 }else{ |
| 2067 /* The following could be done by calling fts5SegIterLoadNPos(). But |
| 2068 ** this block is particularly performance critical, so equivalent |
| 2069 ** code is inlined. |
| 2070 ** |
| 2071 ** Later: Switched back to fts5SegIterLoadNPos() because it supports |
| 2072 ** detail=none mode. Not ideal. |
| 2073 */ |
| 2074 int nSz; |
| 2075 assert( p->rc==SQLITE_OK ); |
| 2076 assert( pIter->iLeafOffset<=pIter->pLeaf->nn ); |
| 2077 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); |
| 2078 pIter->bDel = (nSz & 0x0001); |
| 2079 pIter->nPos = nSz>>1; |
| 2080 assert_nc( pIter->nPos>=0 ); |
| 2081 } |
| 2082 } |
| 2083 } |
| 2084 |
| 2085 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } |
| 2086 |
| 2087 #define fts5IndexSkipVarint(a, iOff) { \ |
| 2088 int iEnd = iOff+9; \ |
| 2089 while( (a[iOff++] & 0x80) && iOff<iEnd ); \ |
| 2090 } |
| 2091 |
| 2092 /* |
| 2093 ** Iterator pIter currently points to the first rowid in a doclist. This |
| 2094 ** function sets the iterator up so that iterates in reverse order through |
| 2095 ** the doclist. |
| 2096 */ |
| 2097 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ |
| 2098 Fts5DlidxIter *pDlidx = pIter->pDlidx; |
| 2099 Fts5Data *pLast = 0; |
| 2100 int pgnoLast = 0; |
| 2101 |
| 2102 if( pDlidx ){ |
| 2103 int iSegid = pIter->pSeg->iSegid; |
| 2104 pgnoLast = fts5DlidxIterPgno(pDlidx); |
| 2105 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); |
| 2106 }else{ |
| 2107 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
| 2108 |
| 2109 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of |
| 2110 ** position-list content for the current rowid. Back it up so that it |
| 2111 ** points to the start of the position-list size field. */ |
| 2112 int iPoslist; |
| 2113 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ |
| 2114 iPoslist = pIter->iTermLeafOffset; |
| 2115 }else{ |
| 2116 iPoslist = 4; |
| 2117 } |
| 2118 fts5IndexSkipVarint(pLeaf->p, iPoslist); |
| 2119 pIter->iLeafOffset = iPoslist; |
| 2120 |
| 2121 /* If this condition is true then the largest rowid for the current |
| 2122 ** term may not be stored on the current page. So search forward to |
| 2123 ** see where said rowid really is. */ |
| 2124 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ |
| 2125 int pgno; |
| 2126 Fts5StructureSegment *pSeg = pIter->pSeg; |
| 2127 |
| 2128 /* The last rowid in the doclist may not be on the current page. Search |
| 2129 ** forward to find the page containing the last rowid. */ |
| 2130 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ |
| 2131 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); |
| 2132 Fts5Data *pNew = fts5DataRead(p, iAbs); |
| 2133 if( pNew ){ |
| 2134 int iRowid, bTermless; |
| 2135 iRowid = fts5LeafFirstRowidOff(pNew); |
| 2136 bTermless = fts5LeafIsTermless(pNew); |
| 2137 if( iRowid ){ |
| 2138 SWAPVAL(Fts5Data*, pNew, pLast); |
| 2139 pgnoLast = pgno; |
| 2140 } |
| 2141 fts5DataRelease(pNew); |
| 2142 if( bTermless==0 ) break; |
| 2143 } |
| 2144 } |
| 2145 } |
| 2146 } |
| 2147 |
| 2148 /* If pLast is NULL at this point, then the last rowid for this doclist |
| 2149 ** lies on the page currently indicated by the iterator. In this case |
| 2150 ** pIter->iLeafOffset is already set to point to the position-list size |
| 2151 ** field associated with the first relevant rowid on the page. |
| 2152 ** |
| 2153 ** Or, if pLast is non-NULL, then it is the page that contains the last |
| 2154 ** rowid. In this case configure the iterator so that it points to the |
| 2155 ** first rowid on this page. |
| 2156 */ |
| 2157 if( pLast ){ |
| 2158 int iOff; |
| 2159 fts5DataRelease(pIter->pLeaf); |
| 2160 pIter->pLeaf = pLast; |
| 2161 pIter->iLeafPgno = pgnoLast; |
| 2162 iOff = fts5LeafFirstRowidOff(pLast); |
| 2163 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); |
| 2164 pIter->iLeafOffset = iOff; |
| 2165 |
| 2166 if( fts5LeafIsTermless(pLast) ){ |
| 2167 pIter->iEndofDoclist = pLast->nn+1; |
| 2168 }else{ |
| 2169 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); |
| 2170 } |
| 2171 |
| 2172 } |
| 2173 |
| 2174 fts5SegIterReverseInitPage(p, pIter); |
| 2175 } |
| 2176 |
| 2177 /* |
| 2178 ** Iterator pIter currently points to the first rowid of a doclist. |
| 2179 ** There is a doclist-index associated with the final term on the current |
| 2180 ** page. If the current term is the last term on the page, load the |
| 2181 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). |
| 2182 */ |
| 2183 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ |
| 2184 int iSeg = pIter->pSeg->iSegid; |
| 2185 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
| 2186 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
| 2187 |
| 2188 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 2189 assert( pIter->pDlidx==0 ); |
| 2190 |
| 2191 /* Check if the current doclist ends on this page. If it does, return |
| 2192 ** early without loading the doclist-index (as it belongs to a different |
| 2193 ** term. */ |
| 2194 if( pIter->iTermLeafPgno==pIter->iLeafPgno |
| 2195 && pIter->iEndofDoclist<pLeaf->szLeaf |
| 2196 ){ |
| 2197 return; |
| 2198 } |
| 2199 |
| 2200 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); |
| 2201 } |
| 2202 |
| 2203 /* |
| 2204 ** The iterator object passed as the second argument currently contains |
| 2205 ** no valid values except for the Fts5SegIter.pLeaf member variable. This |
| 2206 ** function searches the leaf page for a term matching (pTerm/nTerm). |
| 2207 ** |
| 2208 ** If the specified term is found on the page, then the iterator is left |
| 2209 ** pointing to it. If argument bGe is zero and the term is not found, |
| 2210 ** the iterator is left pointing at EOF. |
| 2211 ** |
| 2212 ** If bGe is non-zero and the specified term is not found, then the |
| 2213 ** iterator is left pointing to the smallest term in the segment that |
| 2214 ** is larger than the specified term, even if this term is not on the |
| 2215 ** current page. |
| 2216 */ |
| 2217 static void fts5LeafSeek( |
| 2218 Fts5Index *p, /* Leave any error code here */ |
| 2219 int bGe, /* True for a >= search */ |
| 2220 Fts5SegIter *pIter, /* Iterator to seek */ |
| 2221 const u8 *pTerm, int nTerm /* Term to search for */ |
| 2222 ){ |
| 2223 int iOff; |
| 2224 const u8 *a = pIter->pLeaf->p; |
| 2225 int szLeaf = pIter->pLeaf->szLeaf; |
| 2226 int n = pIter->pLeaf->nn; |
| 2227 |
| 2228 int nMatch = 0; |
| 2229 int nKeep = 0; |
| 2230 int nNew = 0; |
| 2231 int iTermOff; |
| 2232 int iPgidx; /* Current offset in pgidx */ |
| 2233 int bEndOfPage = 0; |
| 2234 |
| 2235 assert( p->rc==SQLITE_OK ); |
| 2236 |
| 2237 iPgidx = szLeaf; |
| 2238 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); |
| 2239 iOff = iTermOff; |
| 2240 if( iOff>n ){ |
| 2241 p->rc = FTS5_CORRUPT; |
| 2242 return; |
| 2243 } |
| 2244 |
| 2245 while( 1 ){ |
| 2246 |
| 2247 /* Figure out how many new bytes are in this term */ |
| 2248 fts5FastGetVarint32(a, iOff, nNew); |
| 2249 if( nKeep<nMatch ){ |
| 2250 goto search_failed; |
| 2251 } |
| 2252 |
| 2253 assert( nKeep>=nMatch ); |
| 2254 if( nKeep==nMatch ){ |
| 2255 int nCmp; |
| 2256 int i; |
| 2257 nCmp = MIN(nNew, nTerm-nMatch); |
| 2258 for(i=0; i<nCmp; i++){ |
| 2259 if( a[iOff+i]!=pTerm[nMatch+i] ) break; |
| 2260 } |
| 2261 nMatch += i; |
| 2262 |
| 2263 if( nTerm==nMatch ){ |
| 2264 if( i==nNew ){ |
| 2265 goto search_success; |
| 2266 }else{ |
| 2267 goto search_failed; |
| 2268 } |
| 2269 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ |
| 2270 goto search_failed; |
| 2271 } |
| 2272 } |
| 2273 |
| 2274 if( iPgidx>=n ){ |
| 2275 bEndOfPage = 1; |
| 2276 break; |
| 2277 } |
| 2278 |
| 2279 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); |
| 2280 iTermOff += nKeep; |
| 2281 iOff = iTermOff; |
| 2282 |
| 2283 if( iOff>=n ){ |
| 2284 p->rc = FTS5_CORRUPT; |
| 2285 return; |
| 2286 } |
| 2287 |
| 2288 /* Read the nKeep field of the next term. */ |
| 2289 fts5FastGetVarint32(a, iOff, nKeep); |
| 2290 } |
| 2291 |
| 2292 search_failed: |
| 2293 if( bGe==0 ){ |
| 2294 fts5DataRelease(pIter->pLeaf); |
| 2295 pIter->pLeaf = 0; |
| 2296 return; |
| 2297 }else if( bEndOfPage ){ |
| 2298 do { |
| 2299 fts5SegIterNextPage(p, pIter); |
| 2300 if( pIter->pLeaf==0 ) return; |
| 2301 a = pIter->pLeaf->p; |
| 2302 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ |
| 2303 iPgidx = pIter->pLeaf->szLeaf; |
| 2304 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff); |
| 2305 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ |
| 2306 p->rc = FTS5_CORRUPT; |
| 2307 }else{ |
| 2308 nKeep = 0; |
| 2309 iTermOff = iOff; |
| 2310 n = pIter->pLeaf->nn; |
| 2311 iOff += fts5GetVarint32(&a[iOff], nNew); |
| 2312 break; |
| 2313 } |
| 2314 } |
| 2315 }while( 1 ); |
| 2316 } |
| 2317 |
| 2318 search_success: |
| 2319 |
| 2320 pIter->iLeafOffset = iOff + nNew; |
| 2321 pIter->iTermLeafOffset = pIter->iLeafOffset; |
| 2322 pIter->iTermLeafPgno = pIter->iLeafPgno; |
| 2323 |
| 2324 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); |
| 2325 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
| 2326 |
| 2327 if( iPgidx>=n ){ |
| 2328 pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
| 2329 }else{ |
| 2330 int nExtra; |
| 2331 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); |
| 2332 pIter->iEndofDoclist = iTermOff + nExtra; |
| 2333 } |
| 2334 pIter->iPgidxOff = iPgidx; |
| 2335 |
| 2336 fts5SegIterLoadRowid(p, pIter); |
| 2337 fts5SegIterLoadNPos(p, pIter); |
| 2338 } |
| 2339 |
| 2340 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ |
| 2341 if( p->pIdxSelect==0 ){ |
| 2342 Fts5Config *pConfig = p->pConfig; |
| 2343 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( |
| 2344 "SELECT pgno FROM '%q'.'%q_idx' WHERE " |
| 2345 "segid=? AND term<=? ORDER BY term DESC LIMIT 1", |
| 2346 pConfig->zDb, pConfig->zName |
| 2347 )); |
| 2348 } |
| 2349 return p->pIdxSelect; |
| 2350 } |
| 2351 |
| 2352 /* |
| 2353 ** Initialize the object pIter to point to term pTerm/nTerm within segment |
| 2354 ** pSeg. If there is no such term in the index, the iterator is set to EOF. |
| 2355 ** |
| 2356 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 2357 ** an error has already occurred when this function is called, it is a no-op. |
| 2358 */ |
| 2359 static void fts5SegIterSeekInit( |
| 2360 Fts5Index *p, /* FTS5 backend */ |
| 2361 const u8 *pTerm, int nTerm, /* Term to seek to */ |
| 2362 int flags, /* Mask of FTS5INDEX_XXX flags */ |
| 2363 Fts5StructureSegment *pSeg, /* Description of segment */ |
| 2364 Fts5SegIter *pIter /* Object to populate */ |
| 2365 ){ |
| 2366 int iPg = 1; |
| 2367 int bGe = (flags & FTS5INDEX_QUERY_SCAN); |
| 2368 int bDlidx = 0; /* True if there is a doclist-index */ |
| 2369 sqlite3_stmt *pIdxSelect = 0; |
| 2370 |
| 2371 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); |
| 2372 assert( pTerm && nTerm ); |
| 2373 memset(pIter, 0, sizeof(*pIter)); |
| 2374 pIter->pSeg = pSeg; |
| 2375 |
| 2376 /* This block sets stack variable iPg to the leaf page number that may |
| 2377 ** contain term (pTerm/nTerm), if it is present in the segment. */ |
| 2378 pIdxSelect = fts5IdxSelectStmt(p); |
| 2379 if( p->rc ) return; |
| 2380 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); |
| 2381 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); |
| 2382 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ |
| 2383 i64 val = sqlite3_column_int(pIdxSelect, 0); |
| 2384 iPg = (int)(val>>1); |
| 2385 bDlidx = (val & 0x0001); |
| 2386 } |
| 2387 p->rc = sqlite3_reset(pIdxSelect); |
| 2388 |
| 2389 if( iPg<pSeg->pgnoFirst ){ |
| 2390 iPg = pSeg->pgnoFirst; |
| 2391 bDlidx = 0; |
| 2392 } |
| 2393 |
| 2394 pIter->iLeafPgno = iPg - 1; |
| 2395 fts5SegIterNextPage(p, pIter); |
| 2396 |
| 2397 if( pIter->pLeaf ){ |
| 2398 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); |
| 2399 } |
| 2400 |
| 2401 if( p->rc==SQLITE_OK && bGe==0 ){ |
| 2402 pIter->flags |= FTS5_SEGITER_ONETERM; |
| 2403 if( pIter->pLeaf ){ |
| 2404 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 2405 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 2406 } |
| 2407 if( bDlidx ){ |
| 2408 fts5SegIterLoadDlidx(p, pIter); |
| 2409 } |
| 2410 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 2411 fts5SegIterReverse(p, pIter); |
| 2412 } |
| 2413 } |
| 2414 } |
| 2415 |
| 2416 fts5SegIterSetNext(p, pIter); |
| 2417 |
| 2418 /* Either: |
| 2419 ** |
| 2420 ** 1) an error has occurred, or |
| 2421 ** 2) the iterator points to EOF, or |
| 2422 ** 3) the iterator points to an entry with term (pTerm/nTerm), or |
| 2423 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points |
| 2424 ** to an entry with a term greater than or equal to (pTerm/nTerm). |
| 2425 */ |
| 2426 assert( p->rc!=SQLITE_OK /* 1 */ |
| 2427 || pIter->pLeaf==0 /* 2 */ |
| 2428 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */ |
| 2429 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */ |
| 2430 ); |
| 2431 } |
| 2432 |
| 2433 /* |
| 2434 ** Initialize the object pIter to point to term pTerm/nTerm within the |
| 2435 ** in-memory hash table. If there is no such term in the hash-table, the |
| 2436 ** iterator is set to EOF. |
| 2437 ** |
| 2438 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
| 2439 ** an error has already occurred when this function is called, it is a no-op. |
| 2440 */ |
| 2441 static void fts5SegIterHashInit( |
| 2442 Fts5Index *p, /* FTS5 backend */ |
| 2443 const u8 *pTerm, int nTerm, /* Term to seek to */ |
| 2444 int flags, /* Mask of FTS5INDEX_XXX flags */ |
| 2445 Fts5SegIter *pIter /* Object to populate */ |
| 2446 ){ |
| 2447 const u8 *pList = 0; |
| 2448 int nList = 0; |
| 2449 const u8 *z = 0; |
| 2450 int n = 0; |
| 2451 |
| 2452 assert( p->pHash ); |
| 2453 assert( p->rc==SQLITE_OK ); |
| 2454 |
| 2455 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ |
| 2456 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); |
| 2457 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); |
| 2458 n = (z ? (int)strlen((const char*)z) : 0); |
| 2459 }else{ |
| 2460 pIter->flags |= FTS5_SEGITER_ONETERM; |
| 2461 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); |
| 2462 z = pTerm; |
| 2463 n = nTerm; |
| 2464 } |
| 2465 |
| 2466 if( pList ){ |
| 2467 Fts5Data *pLeaf; |
| 2468 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); |
| 2469 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); |
| 2470 if( pLeaf==0 ) return; |
| 2471 pLeaf->p = (u8*)pList; |
| 2472 pLeaf->nn = pLeaf->szLeaf = nList; |
| 2473 pIter->pLeaf = pLeaf; |
| 2474 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); |
| 2475 pIter->iEndofDoclist = pLeaf->nn; |
| 2476 |
| 2477 if( flags & FTS5INDEX_QUERY_DESC ){ |
| 2478 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 2479 fts5SegIterReverseInitPage(p, pIter); |
| 2480 }else{ |
| 2481 fts5SegIterLoadNPos(p, pIter); |
| 2482 } |
| 2483 } |
| 2484 |
| 2485 fts5SegIterSetNext(p, pIter); |
| 2486 } |
| 2487 |
| 2488 /* |
| 2489 ** Zero the iterator passed as the only argument. |
| 2490 */ |
| 2491 static void fts5SegIterClear(Fts5SegIter *pIter){ |
| 2492 fts5BufferFree(&pIter->term); |
| 2493 fts5DataRelease(pIter->pLeaf); |
| 2494 fts5DataRelease(pIter->pNextLeaf); |
| 2495 fts5DlidxIterFree(pIter->pDlidx); |
| 2496 sqlite3_free(pIter->aRowidOffset); |
| 2497 memset(pIter, 0, sizeof(Fts5SegIter)); |
| 2498 } |
| 2499 |
| 2500 #ifdef SQLITE_DEBUG |
| 2501 |
| 2502 /* |
| 2503 ** This function is used as part of the big assert() procedure implemented by |
| 2504 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored |
| 2505 ** in *pRes is the correct result of comparing the current positions of the |
| 2506 ** two iterators. |
| 2507 */ |
| 2508 static void fts5AssertComparisonResult( |
| 2509 Fts5Iter *pIter, |
| 2510 Fts5SegIter *p1, |
| 2511 Fts5SegIter *p2, |
| 2512 Fts5CResult *pRes |
| 2513 ){ |
| 2514 int i1 = p1 - pIter->aSeg; |
| 2515 int i2 = p2 - pIter->aSeg; |
| 2516 |
| 2517 if( p1->pLeaf || p2->pLeaf ){ |
| 2518 if( p1->pLeaf==0 ){ |
| 2519 assert( pRes->iFirst==i2 ); |
| 2520 }else if( p2->pLeaf==0 ){ |
| 2521 assert( pRes->iFirst==i1 ); |
| 2522 }else{ |
| 2523 int nMin = MIN(p1->term.n, p2->term.n); |
| 2524 int res = memcmp(p1->term.p, p2->term.p, nMin); |
| 2525 if( res==0 ) res = p1->term.n - p2->term.n; |
| 2526 |
| 2527 if( res==0 ){ |
| 2528 assert( pRes->bTermEq==1 ); |
| 2529 assert( p1->iRowid!=p2->iRowid ); |
| 2530 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; |
| 2531 }else{ |
| 2532 assert( pRes->bTermEq==0 ); |
| 2533 } |
| 2534 |
| 2535 if( res<0 ){ |
| 2536 assert( pRes->iFirst==i1 ); |
| 2537 }else{ |
| 2538 assert( pRes->iFirst==i2 ); |
| 2539 } |
| 2540 } |
| 2541 } |
| 2542 } |
| 2543 |
| 2544 /* |
| 2545 ** This function is a no-op unless SQLITE_DEBUG is defined when this module |
| 2546 ** is compiled. In that case, this function is essentially an assert() |
| 2547 ** statement used to verify that the contents of the pIter->aFirst[] array |
| 2548 ** are correct. |
| 2549 */ |
| 2550 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ |
| 2551 if( p->rc==SQLITE_OK ){ |
| 2552 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 2553 int i; |
| 2554 |
| 2555 assert( (pFirst->pLeaf==0)==pIter->base.bEof ); |
| 2556 |
| 2557 /* Check that pIter->iSwitchRowid is set correctly. */ |
| 2558 for(i=0; i<pIter->nSeg; i++){ |
| 2559 Fts5SegIter *p1 = &pIter->aSeg[i]; |
| 2560 assert( p1==pFirst |
| 2561 || p1->pLeaf==0 |
| 2562 || fts5BufferCompare(&pFirst->term, &p1->term) |
| 2563 || p1->iRowid==pIter->iSwitchRowid |
| 2564 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev |
| 2565 ); |
| 2566 } |
| 2567 |
| 2568 for(i=0; i<pIter->nSeg; i+=2){ |
| 2569 Fts5SegIter *p1 = &pIter->aSeg[i]; |
| 2570 Fts5SegIter *p2 = &pIter->aSeg[i+1]; |
| 2571 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; |
| 2572 fts5AssertComparisonResult(pIter, p1, p2, pRes); |
| 2573 } |
| 2574 |
| 2575 for(i=1; i<(pIter->nSeg / 2); i+=2){ |
| 2576 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; |
| 2577 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; |
| 2578 Fts5CResult *pRes = &pIter->aFirst[i]; |
| 2579 fts5AssertComparisonResult(pIter, p1, p2, pRes); |
| 2580 } |
| 2581 } |
| 2582 } |
| 2583 #else |
| 2584 # define fts5AssertMultiIterSetup(x,y) |
| 2585 #endif |
| 2586 |
| 2587 /* |
| 2588 ** Do the comparison necessary to populate pIter->aFirst[iOut]. |
| 2589 ** |
| 2590 ** If the returned value is non-zero, then it is the index of an entry |
| 2591 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing |
| 2592 ** to a key that is a duplicate of another, higher priority, |
| 2593 ** segment-iterator in the pSeg->aSeg[] array. |
| 2594 */ |
| 2595 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ |
| 2596 int i1; /* Index of left-hand Fts5SegIter */ |
| 2597 int i2; /* Index of right-hand Fts5SegIter */ |
| 2598 int iRes; |
| 2599 Fts5SegIter *p1; /* Left-hand Fts5SegIter */ |
| 2600 Fts5SegIter *p2; /* Right-hand Fts5SegIter */ |
| 2601 Fts5CResult *pRes = &pIter->aFirst[iOut]; |
| 2602 |
| 2603 assert( iOut<pIter->nSeg && iOut>0 ); |
| 2604 assert( pIter->bRev==0 || pIter->bRev==1 ); |
| 2605 |
| 2606 if( iOut>=(pIter->nSeg/2) ){ |
| 2607 i1 = (iOut - pIter->nSeg/2) * 2; |
| 2608 i2 = i1 + 1; |
| 2609 }else{ |
| 2610 i1 = pIter->aFirst[iOut*2].iFirst; |
| 2611 i2 = pIter->aFirst[iOut*2+1].iFirst; |
| 2612 } |
| 2613 p1 = &pIter->aSeg[i1]; |
| 2614 p2 = &pIter->aSeg[i2]; |
| 2615 |
| 2616 pRes->bTermEq = 0; |
| 2617 if( p1->pLeaf==0 ){ /* If p1 is at EOF */ |
| 2618 iRes = i2; |
| 2619 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ |
| 2620 iRes = i1; |
| 2621 }else{ |
| 2622 int res = fts5BufferCompare(&p1->term, &p2->term); |
| 2623 if( res==0 ){ |
| 2624 assert( i2>i1 ); |
| 2625 assert( i2!=0 ); |
| 2626 pRes->bTermEq = 1; |
| 2627 if( p1->iRowid==p2->iRowid ){ |
| 2628 p1->bDel = p2->bDel; |
| 2629 return i2; |
| 2630 } |
| 2631 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; |
| 2632 } |
| 2633 assert( res!=0 ); |
| 2634 if( res<0 ){ |
| 2635 iRes = i1; |
| 2636 }else{ |
| 2637 iRes = i2; |
| 2638 } |
| 2639 } |
| 2640 |
| 2641 pRes->iFirst = (u16)iRes; |
| 2642 return 0; |
| 2643 } |
| 2644 |
| 2645 /* |
| 2646 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. |
| 2647 ** It is an error if leaf iLeafPgno does not exist or contains no rowids. |
| 2648 */ |
| 2649 static void fts5SegIterGotoPage( |
| 2650 Fts5Index *p, /* FTS5 backend object */ |
| 2651 Fts5SegIter *pIter, /* Iterator to advance */ |
| 2652 int iLeafPgno |
| 2653 ){ |
| 2654 assert( iLeafPgno>pIter->iLeafPgno ); |
| 2655 |
| 2656 if( iLeafPgno>pIter->pSeg->pgnoLast ){ |
| 2657 p->rc = FTS5_CORRUPT; |
| 2658 }else{ |
| 2659 fts5DataRelease(pIter->pNextLeaf); |
| 2660 pIter->pNextLeaf = 0; |
| 2661 pIter->iLeafPgno = iLeafPgno-1; |
| 2662 fts5SegIterNextPage(p, pIter); |
| 2663 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); |
| 2664 |
| 2665 if( p->rc==SQLITE_OK ){ |
| 2666 int iOff; |
| 2667 u8 *a = pIter->pLeaf->p; |
| 2668 int n = pIter->pLeaf->szLeaf; |
| 2669 |
| 2670 iOff = fts5LeafFirstRowidOff(pIter->pLeaf); |
| 2671 if( iOff<4 || iOff>=n ){ |
| 2672 p->rc = FTS5_CORRUPT; |
| 2673 }else{ |
| 2674 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
| 2675 pIter->iLeafOffset = iOff; |
| 2676 fts5SegIterLoadNPos(p, pIter); |
| 2677 } |
| 2678 } |
| 2679 } |
| 2680 } |
| 2681 |
| 2682 /* |
| 2683 ** Advance the iterator passed as the second argument until it is at or |
| 2684 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is |
| 2685 ** always advanced at least once. |
| 2686 */ |
| 2687 static void fts5SegIterNextFrom( |
| 2688 Fts5Index *p, /* FTS5 backend object */ |
| 2689 Fts5SegIter *pIter, /* Iterator to advance */ |
| 2690 i64 iMatch /* Advance iterator at least this far */ |
| 2691 ){ |
| 2692 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
| 2693 Fts5DlidxIter *pDlidx = pIter->pDlidx; |
| 2694 int iLeafPgno = pIter->iLeafPgno; |
| 2695 int bMove = 1; |
| 2696 |
| 2697 assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
| 2698 assert( pIter->pDlidx ); |
| 2699 assert( pIter->pLeaf ); |
| 2700 |
| 2701 if( bRev==0 ){ |
| 2702 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ |
| 2703 iLeafPgno = fts5DlidxIterPgno(pDlidx); |
| 2704 fts5DlidxIterNext(p, pDlidx); |
| 2705 } |
| 2706 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); |
| 2707 if( iLeafPgno>pIter->iLeafPgno ){ |
| 2708 fts5SegIterGotoPage(p, pIter, iLeafPgno); |
| 2709 bMove = 0; |
| 2710 } |
| 2711 }else{ |
| 2712 assert( pIter->pNextLeaf==0 ); |
| 2713 assert( iMatch<pIter->iRowid ); |
| 2714 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ |
| 2715 fts5DlidxIterPrev(p, pDlidx); |
| 2716 } |
| 2717 iLeafPgno = fts5DlidxIterPgno(pDlidx); |
| 2718 |
| 2719 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); |
| 2720 |
| 2721 if( iLeafPgno<pIter->iLeafPgno ){ |
| 2722 pIter->iLeafPgno = iLeafPgno+1; |
| 2723 fts5SegIterReverseNewPage(p, pIter); |
| 2724 bMove = 0; |
| 2725 } |
| 2726 } |
| 2727 |
| 2728 do{ |
| 2729 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); |
| 2730 if( pIter->pLeaf==0 ) break; |
| 2731 if( bRev==0 && pIter->iRowid>=iMatch ) break; |
| 2732 if( bRev!=0 && pIter->iRowid<=iMatch ) break; |
| 2733 bMove = 1; |
| 2734 }while( p->rc==SQLITE_OK ); |
| 2735 } |
| 2736 |
| 2737 |
| 2738 /* |
| 2739 ** Free the iterator object passed as the second argument. |
| 2740 */ |
| 2741 static void fts5MultiIterFree(Fts5Iter *pIter){ |
| 2742 if( pIter ){ |
| 2743 int i; |
| 2744 for(i=0; i<pIter->nSeg; i++){ |
| 2745 fts5SegIterClear(&pIter->aSeg[i]); |
| 2746 } |
| 2747 fts5StructureRelease(pIter->pStruct); |
| 2748 fts5BufferFree(&pIter->poslist); |
| 2749 sqlite3_free(pIter); |
| 2750 } |
| 2751 } |
| 2752 |
| 2753 static void fts5MultiIterAdvanced( |
| 2754 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 2755 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
| 2756 int iChanged, /* Index of sub-iterator just advanced */ |
| 2757 int iMinset /* Minimum entry in aFirst[] to set */ |
| 2758 ){ |
| 2759 int i; |
| 2760 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ |
| 2761 int iEq; |
| 2762 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ |
| 2763 Fts5SegIter *pSeg = &pIter->aSeg[iEq]; |
| 2764 assert( p->rc==SQLITE_OK ); |
| 2765 pSeg->xNext(p, pSeg, 0); |
| 2766 i = pIter->nSeg + iEq; |
| 2767 } |
| 2768 } |
| 2769 } |
| 2770 |
| 2771 /* |
| 2772 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still |
| 2773 ** points to the same term though - just a different rowid. This function |
| 2774 ** attempts to update the contents of the pIter->aFirst[] accordingly. |
| 2775 ** If it does so successfully, 0 is returned. Otherwise 1. |
| 2776 ** |
| 2777 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() |
| 2778 ** on the iterator instead. That function does the same as this one, except |
| 2779 ** that it deals with more complicated cases as well. |
| 2780 */ |
| 2781 static int fts5MultiIterAdvanceRowid( |
| 2782 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
| 2783 int iChanged, /* Index of sub-iterator just advanced */ |
| 2784 Fts5SegIter **ppFirst |
| 2785 ){ |
| 2786 Fts5SegIter *pNew = &pIter->aSeg[iChanged]; |
| 2787 |
| 2788 if( pNew->iRowid==pIter->iSwitchRowid |
| 2789 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev |
| 2790 ){ |
| 2791 int i; |
| 2792 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; |
| 2793 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; |
| 2794 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ |
| 2795 Fts5CResult *pRes = &pIter->aFirst[i]; |
| 2796 |
| 2797 assert( pNew->pLeaf ); |
| 2798 assert( pRes->bTermEq==0 || pOther->pLeaf ); |
| 2799 |
| 2800 if( pRes->bTermEq ){ |
| 2801 if( pNew->iRowid==pOther->iRowid ){ |
| 2802 return 1; |
| 2803 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ |
| 2804 pIter->iSwitchRowid = pOther->iRowid; |
| 2805 pNew = pOther; |
| 2806 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ |
| 2807 pIter->iSwitchRowid = pOther->iRowid; |
| 2808 } |
| 2809 } |
| 2810 pRes->iFirst = (u16)(pNew - pIter->aSeg); |
| 2811 if( i==1 ) break; |
| 2812 |
| 2813 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; |
| 2814 } |
| 2815 } |
| 2816 |
| 2817 *ppFirst = pNew; |
| 2818 return 0; |
| 2819 } |
| 2820 |
| 2821 /* |
| 2822 ** Set the pIter->bEof variable based on the state of the sub-iterators. |
| 2823 */ |
| 2824 static void fts5MultiIterSetEof(Fts5Iter *pIter){ |
| 2825 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 2826 pIter->base.bEof = pSeg->pLeaf==0; |
| 2827 pIter->iSwitchRowid = pSeg->iRowid; |
| 2828 } |
| 2829 |
| 2830 /* |
| 2831 ** Move the iterator to the next entry. |
| 2832 ** |
| 2833 ** If an error occurs, an error code is left in Fts5Index.rc. It is not |
| 2834 ** considered an error if the iterator reaches EOF, or if it is already at |
| 2835 ** EOF when this function is called. |
| 2836 */ |
| 2837 static void fts5MultiIterNext( |
| 2838 Fts5Index *p, |
| 2839 Fts5Iter *pIter, |
| 2840 int bFrom, /* True if argument iFrom is valid */ |
| 2841 i64 iFrom /* Advance at least as far as this */ |
| 2842 ){ |
| 2843 int bUseFrom = bFrom; |
| 2844 assert( pIter->base.bEof==0 ); |
| 2845 while( p->rc==SQLITE_OK ){ |
| 2846 int iFirst = pIter->aFirst[1].iFirst; |
| 2847 int bNewTerm = 0; |
| 2848 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
| 2849 assert( p->rc==SQLITE_OK ); |
| 2850 if( bUseFrom && pSeg->pDlidx ){ |
| 2851 fts5SegIterNextFrom(p, pSeg, iFrom); |
| 2852 }else{ |
| 2853 pSeg->xNext(p, pSeg, &bNewTerm); |
| 2854 } |
| 2855 |
| 2856 if( pSeg->pLeaf==0 || bNewTerm |
| 2857 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
| 2858 ){ |
| 2859 fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
| 2860 fts5MultiIterSetEof(pIter); |
| 2861 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
| 2862 if( pSeg->pLeaf==0 ) return; |
| 2863 } |
| 2864 |
| 2865 fts5AssertMultiIterSetup(p, pIter); |
| 2866 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); |
| 2867 if( pIter->bSkipEmpty==0 || pSeg->nPos ){ |
| 2868 pIter->xSetOutputs(pIter, pSeg); |
| 2869 return; |
| 2870 } |
| 2871 bUseFrom = 0; |
| 2872 } |
| 2873 } |
| 2874 |
| 2875 static void fts5MultiIterNext2( |
| 2876 Fts5Index *p, |
| 2877 Fts5Iter *pIter, |
| 2878 int *pbNewTerm /* OUT: True if *might* be new term */ |
| 2879 ){ |
| 2880 assert( pIter->bSkipEmpty ); |
| 2881 if( p->rc==SQLITE_OK ){ |
| 2882 do { |
| 2883 int iFirst = pIter->aFirst[1].iFirst; |
| 2884 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
| 2885 int bNewTerm = 0; |
| 2886 |
| 2887 assert( p->rc==SQLITE_OK ); |
| 2888 pSeg->xNext(p, pSeg, &bNewTerm); |
| 2889 if( pSeg->pLeaf==0 || bNewTerm |
| 2890 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
| 2891 ){ |
| 2892 fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
| 2893 fts5MultiIterSetEof(pIter); |
| 2894 *pbNewTerm = 1; |
| 2895 }else{ |
| 2896 *pbNewTerm = 0; |
| 2897 } |
| 2898 fts5AssertMultiIterSetup(p, pIter); |
| 2899 |
| 2900 }while( fts5MultiIterIsEmpty(p, pIter) ); |
| 2901 } |
| 2902 } |
| 2903 |
| 2904 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ |
| 2905 UNUSED_PARAM2(pUnused1, pUnused2); |
| 2906 } |
| 2907 |
| 2908 static Fts5Iter *fts5MultiIterAlloc( |
| 2909 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 2910 int nSeg |
| 2911 ){ |
| 2912 Fts5Iter *pNew; |
| 2913 int nSlot; /* Power of two >= nSeg */ |
| 2914 |
| 2915 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); |
| 2916 pNew = fts5IdxMalloc(p, |
| 2917 sizeof(Fts5Iter) + /* pNew */ |
| 2918 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ |
| 2919 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ |
| 2920 ); |
| 2921 if( pNew ){ |
| 2922 pNew->nSeg = nSlot; |
| 2923 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; |
| 2924 pNew->pIndex = p; |
| 2925 pNew->xSetOutputs = fts5IterSetOutputs_Noop; |
| 2926 } |
| 2927 return pNew; |
| 2928 } |
| 2929 |
| 2930 static void fts5PoslistCallback( |
| 2931 Fts5Index *pUnused, |
| 2932 void *pContext, |
| 2933 const u8 *pChunk, int nChunk |
| 2934 ){ |
| 2935 UNUSED_PARAM(pUnused); |
| 2936 assert_nc( nChunk>=0 ); |
| 2937 if( nChunk>0 ){ |
| 2938 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); |
| 2939 } |
| 2940 } |
| 2941 |
| 2942 typedef struct PoslistCallbackCtx PoslistCallbackCtx; |
| 2943 struct PoslistCallbackCtx { |
| 2944 Fts5Buffer *pBuf; /* Append to this buffer */ |
| 2945 Fts5Colset *pColset; /* Restrict matches to this column */ |
| 2946 int eState; /* See above */ |
| 2947 }; |
| 2948 |
| 2949 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; |
| 2950 struct PoslistOffsetsCtx { |
| 2951 Fts5Buffer *pBuf; /* Append to this buffer */ |
| 2952 Fts5Colset *pColset; /* Restrict matches to this column */ |
| 2953 int iRead; |
| 2954 int iWrite; |
| 2955 }; |
| 2956 |
| 2957 /* |
| 2958 ** TODO: Make this more efficient! |
| 2959 */ |
| 2960 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ |
| 2961 int i; |
| 2962 for(i=0; i<pColset->nCol; i++){ |
| 2963 if( pColset->aiCol[i]==iCol ) return 1; |
| 2964 } |
| 2965 return 0; |
| 2966 } |
| 2967 |
| 2968 static void fts5PoslistOffsetsCallback( |
| 2969 Fts5Index *pUnused, |
| 2970 void *pContext, |
| 2971 const u8 *pChunk, int nChunk |
| 2972 ){ |
| 2973 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; |
| 2974 UNUSED_PARAM(pUnused); |
| 2975 assert_nc( nChunk>=0 ); |
| 2976 if( nChunk>0 ){ |
| 2977 int i = 0; |
| 2978 while( i<nChunk ){ |
| 2979 int iVal; |
| 2980 i += fts5GetVarint32(&pChunk[i], iVal); |
| 2981 iVal += pCtx->iRead - 2; |
| 2982 pCtx->iRead = iVal; |
| 2983 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ |
| 2984 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); |
| 2985 pCtx->iWrite = iVal; |
| 2986 } |
| 2987 } |
| 2988 } |
| 2989 } |
| 2990 |
| 2991 static void fts5PoslistFilterCallback( |
| 2992 Fts5Index *pUnused, |
| 2993 void *pContext, |
| 2994 const u8 *pChunk, int nChunk |
| 2995 ){ |
| 2996 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; |
| 2997 UNUSED_PARAM(pUnused); |
| 2998 assert_nc( nChunk>=0 ); |
| 2999 if( nChunk>0 ){ |
| 3000 /* Search through to find the first varint with value 1. This is the |
| 3001 ** start of the next columns hits. */ |
| 3002 int i = 0; |
| 3003 int iStart = 0; |
| 3004 |
| 3005 if( pCtx->eState==2 ){ |
| 3006 int iCol; |
| 3007 fts5FastGetVarint32(pChunk, i, iCol); |
| 3008 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ |
| 3009 pCtx->eState = 1; |
| 3010 fts5BufferSafeAppendVarint(pCtx->pBuf, 1); |
| 3011 }else{ |
| 3012 pCtx->eState = 0; |
| 3013 } |
| 3014 } |
| 3015 |
| 3016 do { |
| 3017 while( i<nChunk && pChunk[i]!=0x01 ){ |
| 3018 while( pChunk[i] & 0x80 ) i++; |
| 3019 i++; |
| 3020 } |
| 3021 if( pCtx->eState ){ |
| 3022 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
| 3023 } |
| 3024 if( i<nChunk ){ |
| 3025 int iCol; |
| 3026 iStart = i; |
| 3027 i++; |
| 3028 if( i>=nChunk ){ |
| 3029 pCtx->eState = 2; |
| 3030 }else{ |
| 3031 fts5FastGetVarint32(pChunk, i, iCol); |
| 3032 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); |
| 3033 if( pCtx->eState ){ |
| 3034 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
| 3035 iStart = i; |
| 3036 } |
| 3037 } |
| 3038 } |
| 3039 }while( i<nChunk ); |
| 3040 } |
| 3041 } |
| 3042 |
| 3043 static void fts5ChunkIterate( |
| 3044 Fts5Index *p, /* Index object */ |
| 3045 Fts5SegIter *pSeg, /* Poslist of this iterator */ |
| 3046 void *pCtx, /* Context pointer for xChunk callback */ |
| 3047 void (*xChunk)(Fts5Index*, void*, const u8*, int) |
| 3048 ){ |
| 3049 int nRem = pSeg->nPos; /* Number of bytes still to come */ |
| 3050 Fts5Data *pData = 0; |
| 3051 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 3052 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); |
| 3053 int pgno = pSeg->iLeafPgno; |
| 3054 int pgnoSave = 0; |
| 3055 |
| 3056 /* This function does notmwork with detail=none databases. */ |
| 3057 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 3058 |
| 3059 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ |
| 3060 pgnoSave = pgno+1; |
| 3061 } |
| 3062 |
| 3063 while( 1 ){ |
| 3064 xChunk(p, pCtx, pChunk, nChunk); |
| 3065 nRem -= nChunk; |
| 3066 fts5DataRelease(pData); |
| 3067 if( nRem<=0 ){ |
| 3068 break; |
| 3069 }else{ |
| 3070 pgno++; |
| 3071 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); |
| 3072 if( pData==0 ) break; |
| 3073 pChunk = &pData->p[4]; |
| 3074 nChunk = MIN(nRem, pData->szLeaf - 4); |
| 3075 if( pgno==pgnoSave ){ |
| 3076 assert( pSeg->pNextLeaf==0 ); |
| 3077 pSeg->pNextLeaf = pData; |
| 3078 pData = 0; |
| 3079 } |
| 3080 } |
| 3081 } |
| 3082 } |
| 3083 |
| 3084 /* |
| 3085 ** Iterator pIter currently points to a valid entry (not EOF). This |
| 3086 ** function appends the position list data for the current entry to |
| 3087 ** buffer pBuf. It does not make a copy of the position-list size |
| 3088 ** field. |
| 3089 */ |
| 3090 static void fts5SegiterPoslist( |
| 3091 Fts5Index *p, |
| 3092 Fts5SegIter *pSeg, |
| 3093 Fts5Colset *pColset, |
| 3094 Fts5Buffer *pBuf |
| 3095 ){ |
| 3096 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ |
| 3097 if( pColset==0 ){ |
| 3098 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); |
| 3099 }else{ |
| 3100 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 3101 PoslistCallbackCtx sCtx; |
| 3102 sCtx.pBuf = pBuf; |
| 3103 sCtx.pColset = pColset; |
| 3104 sCtx.eState = fts5IndexColsetTest(pColset, 0); |
| 3105 assert( sCtx.eState==0 || sCtx.eState==1 ); |
| 3106 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); |
| 3107 }else{ |
| 3108 PoslistOffsetsCtx sCtx; |
| 3109 memset(&sCtx, 0, sizeof(sCtx)); |
| 3110 sCtx.pBuf = pBuf; |
| 3111 sCtx.pColset = pColset; |
| 3112 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); |
| 3113 } |
| 3114 } |
| 3115 } |
| 3116 } |
| 3117 |
| 3118 /* |
| 3119 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If |
| 3120 ** the position list contains entries for column iCol, then (*pa) is set |
| 3121 ** to point to the sub-position-list for that column and the number of |
| 3122 ** bytes in it returned. Or, if the argument position list does not |
| 3123 ** contain any entries for column iCol, return 0. |
| 3124 */ |
| 3125 static int fts5IndexExtractCol( |
| 3126 const u8 **pa, /* IN/OUT: Pointer to poslist */ |
| 3127 int n, /* IN: Size of poslist in bytes */ |
| 3128 int iCol /* Column to extract from poslist */ |
| 3129 ){ |
| 3130 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ |
| 3131 const u8 *p = *pa; |
| 3132 const u8 *pEnd = &p[n]; /* One byte past end of position list */ |
| 3133 |
| 3134 while( iCol>iCurrent ){ |
| 3135 /* Advance pointer p until it points to pEnd or an 0x01 byte that is |
| 3136 ** not part of a varint. Note that it is not possible for a negative |
| 3137 ** or extremely large varint to occur within an uncorrupted position |
| 3138 ** list. So the last byte of each varint may be assumed to have a clear |
| 3139 ** 0x80 bit. */ |
| 3140 while( *p!=0x01 ){ |
| 3141 while( *p++ & 0x80 ); |
| 3142 if( p>=pEnd ) return 0; |
| 3143 } |
| 3144 *pa = p++; |
| 3145 iCurrent = *p++; |
| 3146 if( iCurrent & 0x80 ){ |
| 3147 p--; |
| 3148 p += fts5GetVarint32(p, iCurrent); |
| 3149 } |
| 3150 } |
| 3151 if( iCol!=iCurrent ) return 0; |
| 3152 |
| 3153 /* Advance pointer p until it points to pEnd or an 0x01 byte that is |
| 3154 ** not part of a varint */ |
| 3155 while( p<pEnd && *p!=0x01 ){ |
| 3156 while( *p++ & 0x80 ); |
| 3157 } |
| 3158 |
| 3159 return p - (*pa); |
| 3160 } |
| 3161 |
| 3162 static int fts5IndexExtractColset ( |
| 3163 Fts5Colset *pColset, /* Colset to filter on */ |
| 3164 const u8 *pPos, int nPos, /* Position list */ |
| 3165 Fts5Buffer *pBuf /* Output buffer */ |
| 3166 ){ |
| 3167 int rc = SQLITE_OK; |
| 3168 int i; |
| 3169 |
| 3170 fts5BufferZero(pBuf); |
| 3171 for(i=0; i<pColset->nCol; i++){ |
| 3172 const u8 *pSub = pPos; |
| 3173 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); |
| 3174 if( nSub ){ |
| 3175 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); |
| 3176 } |
| 3177 } |
| 3178 return rc; |
| 3179 } |
| 3180 |
| 3181 /* |
| 3182 ** xSetOutputs callback used by detail=none tables. |
| 3183 */ |
| 3184 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3185 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); |
| 3186 pIter->base.iRowid = pSeg->iRowid; |
| 3187 pIter->base.nData = pSeg->nPos; |
| 3188 } |
| 3189 |
| 3190 /* |
| 3191 ** xSetOutputs callback used by detail=full and detail=col tables when no |
| 3192 ** column filters are specified. |
| 3193 */ |
| 3194 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3195 pIter->base.iRowid = pSeg->iRowid; |
| 3196 pIter->base.nData = pSeg->nPos; |
| 3197 |
| 3198 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
| 3199 assert( pIter->pColset==0 ); |
| 3200 |
| 3201 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
| 3202 /* All data is stored on the current page. Populate the output |
| 3203 ** variables to point into the body of the page object. */ |
| 3204 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 3205 }else{ |
| 3206 /* The data is distributed over two or more pages. Copy it into the |
| 3207 ** Fts5Iter.poslist buffer and then set the output pointer to point |
| 3208 ** to this buffer. */ |
| 3209 fts5BufferZero(&pIter->poslist); |
| 3210 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); |
| 3211 pIter->base.pData = pIter->poslist.p; |
| 3212 } |
| 3213 } |
| 3214 |
| 3215 /* |
| 3216 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match |
| 3217 ** against no columns at all). |
| 3218 */ |
| 3219 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3220 UNUSED_PARAM(pSeg); |
| 3221 pIter->base.nData = 0; |
| 3222 } |
| 3223 |
| 3224 /* |
| 3225 ** xSetOutputs callback used by detail=col when there is a column filter |
| 3226 ** and there are 100 or more columns. Also called as a fallback from |
| 3227 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. |
| 3228 */ |
| 3229 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3230 fts5BufferZero(&pIter->poslist); |
| 3231 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); |
| 3232 pIter->base.iRowid = pSeg->iRowid; |
| 3233 pIter->base.pData = pIter->poslist.p; |
| 3234 pIter->base.nData = pIter->poslist.n; |
| 3235 } |
| 3236 |
| 3237 /* |
| 3238 ** xSetOutputs callback used when: |
| 3239 ** |
| 3240 ** * detail=col, |
| 3241 ** * there is a column filter, and |
| 3242 ** * the table contains 100 or fewer columns. |
| 3243 ** |
| 3244 ** The last point is to ensure all column numbers are stored as |
| 3245 ** single-byte varints. |
| 3246 */ |
| 3247 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3248 |
| 3249 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 3250 assert( pIter->pColset ); |
| 3251 |
| 3252 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ |
| 3253 fts5IterSetOutputs_Col(pIter, pSeg); |
| 3254 }else{ |
| 3255 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 3256 u8 *pEnd = (u8*)&a[pSeg->nPos]; |
| 3257 int iPrev = 0; |
| 3258 int *aiCol = pIter->pColset->aiCol; |
| 3259 int *aiColEnd = &aiCol[pIter->pColset->nCol]; |
| 3260 |
| 3261 u8 *aOut = pIter->poslist.p; |
| 3262 int iPrevOut = 0; |
| 3263 |
| 3264 pIter->base.iRowid = pSeg->iRowid; |
| 3265 |
| 3266 while( a<pEnd ){ |
| 3267 iPrev += (int)a++[0] - 2; |
| 3268 while( *aiCol<iPrev ){ |
| 3269 aiCol++; |
| 3270 if( aiCol==aiColEnd ) goto setoutputs_col_out; |
| 3271 } |
| 3272 if( *aiCol==iPrev ){ |
| 3273 *aOut++ = (u8)((iPrev - iPrevOut) + 2); |
| 3274 iPrevOut = iPrev; |
| 3275 } |
| 3276 } |
| 3277 |
| 3278 setoutputs_col_out: |
| 3279 pIter->base.pData = pIter->poslist.p; |
| 3280 pIter->base.nData = aOut - pIter->poslist.p; |
| 3281 } |
| 3282 } |
| 3283 |
| 3284 /* |
| 3285 ** xSetOutputs callback used by detail=full when there is a column filter. |
| 3286 */ |
| 3287 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
| 3288 Fts5Colset *pColset = pIter->pColset; |
| 3289 pIter->base.iRowid = pSeg->iRowid; |
| 3290 |
| 3291 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); |
| 3292 assert( pColset ); |
| 3293 |
| 3294 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
| 3295 /* All data is stored on the current page. Populate the output |
| 3296 ** variables to point into the body of the page object. */ |
| 3297 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
| 3298 if( pColset->nCol==1 ){ |
| 3299 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); |
| 3300 pIter->base.pData = a; |
| 3301 }else{ |
| 3302 fts5BufferZero(&pIter->poslist); |
| 3303 fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist); |
| 3304 pIter->base.pData = pIter->poslist.p; |
| 3305 pIter->base.nData = pIter->poslist.n; |
| 3306 } |
| 3307 }else{ |
| 3308 /* The data is distributed over two or more pages. Copy it into the |
| 3309 ** Fts5Iter.poslist buffer and then set the output pointer to point |
| 3310 ** to this buffer. */ |
| 3311 fts5BufferZero(&pIter->poslist); |
| 3312 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); |
| 3313 pIter->base.pData = pIter->poslist.p; |
| 3314 pIter->base.nData = pIter->poslist.n; |
| 3315 } |
| 3316 } |
| 3317 |
| 3318 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ |
| 3319 if( *pRc==SQLITE_OK ){ |
| 3320 Fts5Config *pConfig = pIter->pIndex->pConfig; |
| 3321 if( pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 3322 pIter->xSetOutputs = fts5IterSetOutputs_None; |
| 3323 } |
| 3324 |
| 3325 else if( pIter->pColset==0 ){ |
| 3326 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; |
| 3327 } |
| 3328 |
| 3329 else if( pIter->pColset->nCol==0 ){ |
| 3330 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; |
| 3331 } |
| 3332 |
| 3333 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 3334 pIter->xSetOutputs = fts5IterSetOutputs_Full; |
| 3335 } |
| 3336 |
| 3337 else{ |
| 3338 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 3339 if( pConfig->nCol<=100 ){ |
| 3340 pIter->xSetOutputs = fts5IterSetOutputs_Col100; |
| 3341 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); |
| 3342 }else{ |
| 3343 pIter->xSetOutputs = fts5IterSetOutputs_Col; |
| 3344 } |
| 3345 } |
| 3346 } |
| 3347 } |
| 3348 |
| 3349 |
| 3350 /* |
| 3351 ** Allocate a new Fts5Iter object. |
| 3352 ** |
| 3353 ** The new object will be used to iterate through data in structure pStruct. |
| 3354 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel |
| 3355 ** is zero or greater, data from the first nSegment segments on level iLevel |
| 3356 ** is merged. |
| 3357 ** |
| 3358 ** The iterator initially points to the first term/rowid entry in the |
| 3359 ** iterated data. |
| 3360 */ |
| 3361 static void fts5MultiIterNew( |
| 3362 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 3363 Fts5Structure *pStruct, /* Structure of specific index */ |
| 3364 int flags, /* FTS5INDEX_QUERY_XXX flags */ |
| 3365 Fts5Colset *pColset, /* Colset to filter on (or NULL) */ |
| 3366 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ |
| 3367 int iLevel, /* Level to iterate (-1 for all) */ |
| 3368 int nSegment, /* Number of segments to merge (iLevel>=0) */ |
| 3369 Fts5Iter **ppOut /* New object */ |
| 3370 ){ |
| 3371 int nSeg = 0; /* Number of segment-iters in use */ |
| 3372 int iIter = 0; /* */ |
| 3373 int iSeg; /* Used to iterate through segments */ |
| 3374 Fts5StructureLevel *pLvl; |
| 3375 Fts5Iter *pNew; |
| 3376 |
| 3377 assert( (pTerm==0 && nTerm==0) || iLevel<0 ); |
| 3378 |
| 3379 /* Allocate space for the new multi-seg-iterator. */ |
| 3380 if( p->rc==SQLITE_OK ){ |
| 3381 if( iLevel<0 ){ |
| 3382 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
| 3383 nSeg = pStruct->nSegment; |
| 3384 nSeg += (p->pHash ? 1 : 0); |
| 3385 }else{ |
| 3386 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); |
| 3387 } |
| 3388 } |
| 3389 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); |
| 3390 if( pNew==0 ) return; |
| 3391 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); |
| 3392 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); |
| 3393 pNew->pStruct = pStruct; |
| 3394 pNew->pColset = pColset; |
| 3395 fts5StructureRef(pStruct); |
| 3396 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ |
| 3397 fts5IterSetOutputCb(&p->rc, pNew); |
| 3398 } |
| 3399 |
| 3400 /* Initialize each of the component segment iterators. */ |
| 3401 if( p->rc==SQLITE_OK ){ |
| 3402 if( iLevel<0 ){ |
| 3403 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; |
| 3404 if( p->pHash ){ |
| 3405 /* Add a segment iterator for the current contents of the hash table. */ |
| 3406 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
| 3407 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); |
| 3408 } |
| 3409 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ |
| 3410 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ |
| 3411 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
| 3412 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
| 3413 if( pTerm==0 ){ |
| 3414 fts5SegIterInit(p, pSeg, pIter); |
| 3415 }else{ |
| 3416 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); |
| 3417 } |
| 3418 } |
| 3419 } |
| 3420 }else{ |
| 3421 pLvl = &pStruct->aLevel[iLevel]; |
| 3422 for(iSeg=nSeg-1; iSeg>=0; iSeg--){ |
| 3423 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); |
| 3424 } |
| 3425 } |
| 3426 assert( iIter==nSeg ); |
| 3427 } |
| 3428 |
| 3429 /* If the above was successful, each component iterators now points |
| 3430 ** to the first entry in its segment. In this case initialize the |
| 3431 ** aFirst[] array. Or, if an error has occurred, free the iterator |
| 3432 ** object and set the output variable to NULL. */ |
| 3433 if( p->rc==SQLITE_OK ){ |
| 3434 for(iIter=pNew->nSeg-1; iIter>0; iIter--){ |
| 3435 int iEq; |
| 3436 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ |
| 3437 Fts5SegIter *pSeg = &pNew->aSeg[iEq]; |
| 3438 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); |
| 3439 fts5MultiIterAdvanced(p, pNew, iEq, iIter); |
| 3440 } |
| 3441 } |
| 3442 fts5MultiIterSetEof(pNew); |
| 3443 fts5AssertMultiIterSetup(p, pNew); |
| 3444 |
| 3445 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ |
| 3446 fts5MultiIterNext(p, pNew, 0, 0); |
| 3447 }else if( pNew->base.bEof==0 ){ |
| 3448 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; |
| 3449 pNew->xSetOutputs(pNew, pSeg); |
| 3450 } |
| 3451 |
| 3452 }else{ |
| 3453 fts5MultiIterFree(pNew); |
| 3454 *ppOut = 0; |
| 3455 } |
| 3456 } |
| 3457 |
| 3458 /* |
| 3459 ** Create an Fts5Iter that iterates through the doclist provided |
| 3460 ** as the second argument. |
| 3461 */ |
| 3462 static void fts5MultiIterNew2( |
| 3463 Fts5Index *p, /* FTS5 backend to iterate within */ |
| 3464 Fts5Data *pData, /* Doclist to iterate through */ |
| 3465 int bDesc, /* True for descending rowid order */ |
| 3466 Fts5Iter **ppOut /* New object */ |
| 3467 ){ |
| 3468 Fts5Iter *pNew; |
| 3469 pNew = fts5MultiIterAlloc(p, 2); |
| 3470 if( pNew ){ |
| 3471 Fts5SegIter *pIter = &pNew->aSeg[1]; |
| 3472 |
| 3473 pIter->flags = FTS5_SEGITER_ONETERM; |
| 3474 if( pData->szLeaf>0 ){ |
| 3475 pIter->pLeaf = pData; |
| 3476 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); |
| 3477 pIter->iEndofDoclist = pData->nn; |
| 3478 pNew->aFirst[1].iFirst = 1; |
| 3479 if( bDesc ){ |
| 3480 pNew->bRev = 1; |
| 3481 pIter->flags |= FTS5_SEGITER_REVERSE; |
| 3482 fts5SegIterReverseInitPage(p, pIter); |
| 3483 }else{ |
| 3484 fts5SegIterLoadNPos(p, pIter); |
| 3485 } |
| 3486 pData = 0; |
| 3487 }else{ |
| 3488 pNew->base.bEof = 1; |
| 3489 } |
| 3490 fts5SegIterSetNext(p, pIter); |
| 3491 |
| 3492 *ppOut = pNew; |
| 3493 } |
| 3494 |
| 3495 fts5DataRelease(pData); |
| 3496 } |
| 3497 |
| 3498 /* |
| 3499 ** Return true if the iterator is at EOF or if an error has occurred. |
| 3500 ** False otherwise. |
| 3501 */ |
| 3502 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ |
| 3503 assert( p->rc |
| 3504 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof |
| 3505 ); |
| 3506 return (p->rc || pIter->base.bEof); |
| 3507 } |
| 3508 |
| 3509 /* |
| 3510 ** Return the rowid of the entry that the iterator currently points |
| 3511 ** to. If the iterator points to EOF when this function is called the |
| 3512 ** results are undefined. |
| 3513 */ |
| 3514 static i64 fts5MultiIterRowid(Fts5Iter *pIter){ |
| 3515 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); |
| 3516 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; |
| 3517 } |
| 3518 |
| 3519 /* |
| 3520 ** Move the iterator to the next entry at or following iMatch. |
| 3521 */ |
| 3522 static void fts5MultiIterNextFrom( |
| 3523 Fts5Index *p, |
| 3524 Fts5Iter *pIter, |
| 3525 i64 iMatch |
| 3526 ){ |
| 3527 while( 1 ){ |
| 3528 i64 iRowid; |
| 3529 fts5MultiIterNext(p, pIter, 1, iMatch); |
| 3530 if( fts5MultiIterEof(p, pIter) ) break; |
| 3531 iRowid = fts5MultiIterRowid(pIter); |
| 3532 if( pIter->bRev==0 && iRowid>=iMatch ) break; |
| 3533 if( pIter->bRev!=0 && iRowid<=iMatch ) break; |
| 3534 } |
| 3535 } |
| 3536 |
| 3537 /* |
| 3538 ** Return a pointer to a buffer containing the term associated with the |
| 3539 ** entry that the iterator currently points to. |
| 3540 */ |
| 3541 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ |
| 3542 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 3543 *pn = p->term.n; |
| 3544 return p->term.p; |
| 3545 } |
| 3546 |
| 3547 /* |
| 3548 ** Allocate a new segment-id for the structure pStruct. The new segment |
| 3549 ** id must be between 1 and 65335 inclusive, and must not be used by |
| 3550 ** any currently existing segment. If a free segment id cannot be found, |
| 3551 ** SQLITE_FULL is returned. |
| 3552 ** |
| 3553 ** If an error has already occurred, this function is a no-op. 0 is |
| 3554 ** returned in this case. |
| 3555 */ |
| 3556 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ |
| 3557 int iSegid = 0; |
| 3558 |
| 3559 if( p->rc==SQLITE_OK ){ |
| 3560 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ |
| 3561 p->rc = SQLITE_FULL; |
| 3562 }else{ |
| 3563 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following |
| 3564 ** array is 63 elements, or 252 bytes, in size. */ |
| 3565 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; |
| 3566 int iLvl, iSeg; |
| 3567 int i; |
| 3568 u32 mask; |
| 3569 memset(aUsed, 0, sizeof(aUsed)); |
| 3570 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 3571 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 3572 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; |
| 3573 if( iId<=FTS5_MAX_SEGMENT ){ |
| 3574 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32); |
| 3575 } |
| 3576 } |
| 3577 } |
| 3578 |
| 3579 for(i=0; aUsed[i]==0xFFFFFFFF; i++); |
| 3580 mask = aUsed[i]; |
| 3581 for(iSegid=0; mask & (1 << iSegid); iSegid++); |
| 3582 iSegid += 1 + i*32; |
| 3583 |
| 3584 #ifdef SQLITE_DEBUG |
| 3585 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 3586 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 3587 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); |
| 3588 } |
| 3589 } |
| 3590 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); |
| 3591 |
| 3592 { |
| 3593 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); |
| 3594 if( p->rc==SQLITE_OK ){ |
| 3595 u8 aBlob[2] = {0xff, 0xff}; |
| 3596 sqlite3_bind_int(pIdxSelect, 1, iSegid); |
| 3597 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); |
| 3598 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); |
| 3599 p->rc = sqlite3_reset(pIdxSelect); |
| 3600 } |
| 3601 } |
| 3602 #endif |
| 3603 } |
| 3604 } |
| 3605 |
| 3606 return iSegid; |
| 3607 } |
| 3608 |
| 3609 /* |
| 3610 ** Discard all data currently cached in the hash-tables. |
| 3611 */ |
| 3612 static void fts5IndexDiscardData(Fts5Index *p){ |
| 3613 assert( p->pHash || p->nPendingData==0 ); |
| 3614 if( p->pHash ){ |
| 3615 sqlite3Fts5HashClear(p->pHash); |
| 3616 p->nPendingData = 0; |
| 3617 } |
| 3618 } |
| 3619 |
| 3620 /* |
| 3621 ** Return the size of the prefix, in bytes, that buffer |
| 3622 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). |
| 3623 ** |
| 3624 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater |
| 3625 ** than buffer (pOld/nOld). |
| 3626 */ |
| 3627 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ |
| 3628 int i; |
| 3629 for(i=0; i<nOld; i++){ |
| 3630 if( pOld[i]!=pNew[i] ) break; |
| 3631 } |
| 3632 return i; |
| 3633 } |
| 3634 |
| 3635 static void fts5WriteDlidxClear( |
| 3636 Fts5Index *p, |
| 3637 Fts5SegWriter *pWriter, |
| 3638 int bFlush /* If true, write dlidx to disk */ |
| 3639 ){ |
| 3640 int i; |
| 3641 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); |
| 3642 for(i=0; i<pWriter->nDlidx; i++){ |
| 3643 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
| 3644 if( pDlidx->buf.n==0 ) break; |
| 3645 if( bFlush ){ |
| 3646 assert( pDlidx->pgno!=0 ); |
| 3647 fts5DataWrite(p, |
| 3648 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
| 3649 pDlidx->buf.p, pDlidx->buf.n |
| 3650 ); |
| 3651 } |
| 3652 sqlite3Fts5BufferZero(&pDlidx->buf); |
| 3653 pDlidx->bPrevValid = 0; |
| 3654 } |
| 3655 } |
| 3656 |
| 3657 /* |
| 3658 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. |
| 3659 ** Any new array elements are zeroed before returning. |
| 3660 */ |
| 3661 static int fts5WriteDlidxGrow( |
| 3662 Fts5Index *p, |
| 3663 Fts5SegWriter *pWriter, |
| 3664 int nLvl |
| 3665 ){ |
| 3666 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ |
| 3667 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( |
| 3668 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl |
| 3669 ); |
| 3670 if( aDlidx==0 ){ |
| 3671 p->rc = SQLITE_NOMEM; |
| 3672 }else{ |
| 3673 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); |
| 3674 memset(&aDlidx[pWriter->nDlidx], 0, nByte); |
| 3675 pWriter->aDlidx = aDlidx; |
| 3676 pWriter->nDlidx = nLvl; |
| 3677 } |
| 3678 } |
| 3679 return p->rc; |
| 3680 } |
| 3681 |
| 3682 /* |
| 3683 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large |
| 3684 ** enough, flush it to disk and return 1. Otherwise discard it and return |
| 3685 ** zero. |
| 3686 */ |
| 3687 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 3688 int bFlag = 0; |
| 3689 |
| 3690 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written |
| 3691 ** to the database, also write the doclist-index to disk. */ |
| 3692 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ |
| 3693 bFlag = 1; |
| 3694 } |
| 3695 fts5WriteDlidxClear(p, pWriter, bFlag); |
| 3696 pWriter->nEmpty = 0; |
| 3697 return bFlag; |
| 3698 } |
| 3699 |
| 3700 /* |
| 3701 ** This function is called whenever processing of the doclist for the |
| 3702 ** last term on leaf page (pWriter->iBtPage) is completed. |
| 3703 ** |
| 3704 ** The doclist-index for that term is currently stored in-memory within the |
| 3705 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function |
| 3706 ** writes it out to disk. Or, if it is too small to bother with, discards |
| 3707 ** it. |
| 3708 ** |
| 3709 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. |
| 3710 */ |
| 3711 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 3712 int bFlag; |
| 3713 |
| 3714 assert( pWriter->iBtPage || pWriter->nEmpty==0 ); |
| 3715 if( pWriter->iBtPage==0 ) return; |
| 3716 bFlag = fts5WriteFlushDlidx(p, pWriter); |
| 3717 |
| 3718 if( p->rc==SQLITE_OK ){ |
| 3719 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); |
| 3720 /* The following was already done in fts5WriteInit(): */ |
| 3721 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ |
| 3722 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); |
| 3723 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); |
| 3724 sqlite3_step(p->pIdxWriter); |
| 3725 p->rc = sqlite3_reset(p->pIdxWriter); |
| 3726 } |
| 3727 pWriter->iBtPage = 0; |
| 3728 } |
| 3729 |
| 3730 /* |
| 3731 ** This is called once for each leaf page except the first that contains |
| 3732 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that |
| 3733 ** is larger than all terms written to earlier leaves, and equal to or |
| 3734 ** smaller than the first term on the new leaf. |
| 3735 ** |
| 3736 ** If an error occurs, an error code is left in Fts5Index.rc. If an error |
| 3737 ** has already occurred when this function is called, it is a no-op. |
| 3738 */ |
| 3739 static void fts5WriteBtreeTerm( |
| 3740 Fts5Index *p, /* FTS5 backend object */ |
| 3741 Fts5SegWriter *pWriter, /* Writer object */ |
| 3742 int nTerm, const u8 *pTerm /* First term on new page */ |
| 3743 ){ |
| 3744 fts5WriteFlushBtree(p, pWriter); |
| 3745 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); |
| 3746 pWriter->iBtPage = pWriter->writer.pgno; |
| 3747 } |
| 3748 |
| 3749 /* |
| 3750 ** This function is called when flushing a leaf page that contains no |
| 3751 ** terms at all to disk. |
| 3752 */ |
| 3753 static void fts5WriteBtreeNoTerm( |
| 3754 Fts5Index *p, /* FTS5 backend object */ |
| 3755 Fts5SegWriter *pWriter /* Writer object */ |
| 3756 ){ |
| 3757 /* If there were no rowids on the leaf page either and the doclist-index |
| 3758 ** has already been started, append an 0x00 byte to it. */ |
| 3759 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ |
| 3760 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; |
| 3761 assert( pDlidx->bPrevValid ); |
| 3762 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); |
| 3763 } |
| 3764 |
| 3765 /* Increment the "number of sequential leaves without a term" counter. */ |
| 3766 pWriter->nEmpty++; |
| 3767 } |
| 3768 |
| 3769 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ |
| 3770 i64 iRowid; |
| 3771 int iOff; |
| 3772 |
| 3773 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); |
| 3774 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); |
| 3775 return iRowid; |
| 3776 } |
| 3777 |
| 3778 /* |
| 3779 ** Rowid iRowid has just been appended to the current leaf page. It is the |
| 3780 ** first on the page. This function appends an appropriate entry to the current |
| 3781 ** doclist-index. |
| 3782 */ |
| 3783 static void fts5WriteDlidxAppend( |
| 3784 Fts5Index *p, |
| 3785 Fts5SegWriter *pWriter, |
| 3786 i64 iRowid |
| 3787 ){ |
| 3788 int i; |
| 3789 int bDone = 0; |
| 3790 |
| 3791 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
| 3792 i64 iVal; |
| 3793 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
| 3794 |
| 3795 if( pDlidx->buf.n>=p->pConfig->pgsz ){ |
| 3796 /* The current doclist-index page is full. Write it to disk and push |
| 3797 ** a copy of iRowid (which will become the first rowid on the next |
| 3798 ** doclist-index leaf page) up into the next level of the b-tree |
| 3799 ** hierarchy. If the node being flushed is currently the root node, |
| 3800 ** also push its first rowid upwards. */ |
| 3801 pDlidx->buf.p[0] = 0x01; /* Not the root node */ |
| 3802 fts5DataWrite(p, |
| 3803 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
| 3804 pDlidx->buf.p, pDlidx->buf.n |
| 3805 ); |
| 3806 fts5WriteDlidxGrow(p, pWriter, i+2); |
| 3807 pDlidx = &pWriter->aDlidx[i]; |
| 3808 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ |
| 3809 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); |
| 3810 |
| 3811 /* This was the root node. Push its first rowid up to the new root. */ |
| 3812 pDlidx[1].pgno = pDlidx->pgno; |
| 3813 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); |
| 3814 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); |
| 3815 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); |
| 3816 pDlidx[1].bPrevValid = 1; |
| 3817 pDlidx[1].iPrev = iFirst; |
| 3818 } |
| 3819 |
| 3820 sqlite3Fts5BufferZero(&pDlidx->buf); |
| 3821 pDlidx->bPrevValid = 0; |
| 3822 pDlidx->pgno++; |
| 3823 }else{ |
| 3824 bDone = 1; |
| 3825 } |
| 3826 |
| 3827 if( pDlidx->bPrevValid ){ |
| 3828 iVal = iRowid - pDlidx->iPrev; |
| 3829 }else{ |
| 3830 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); |
| 3831 assert( pDlidx->buf.n==0 ); |
| 3832 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); |
| 3833 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); |
| 3834 iVal = iRowid; |
| 3835 } |
| 3836 |
| 3837 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); |
| 3838 pDlidx->bPrevValid = 1; |
| 3839 pDlidx->iPrev = iRowid; |
| 3840 } |
| 3841 } |
| 3842 |
| 3843 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ |
| 3844 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; |
| 3845 Fts5PageWriter *pPage = &pWriter->writer; |
| 3846 i64 iRowid; |
| 3847 |
| 3848 static int nCall = 0; |
| 3849 nCall++; |
| 3850 |
| 3851 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); |
| 3852 |
| 3853 /* Set the szLeaf header field. */ |
| 3854 assert( 0==fts5GetU16(&pPage->buf.p[2]) ); |
| 3855 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); |
| 3856 |
| 3857 if( pWriter->bFirstTermInPage ){ |
| 3858 /* No term was written to this page. */ |
| 3859 assert( pPage->pgidx.n==0 ); |
| 3860 fts5WriteBtreeNoTerm(p, pWriter); |
| 3861 }else{ |
| 3862 /* Append the pgidx to the page buffer. Set the szLeaf header field. */ |
| 3863 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); |
| 3864 } |
| 3865 |
| 3866 /* Write the page out to disk */ |
| 3867 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); |
| 3868 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); |
| 3869 |
| 3870 /* Initialize the next page. */ |
| 3871 fts5BufferZero(&pPage->buf); |
| 3872 fts5BufferZero(&pPage->pgidx); |
| 3873 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); |
| 3874 pPage->iPrevPgidx = 0; |
| 3875 pPage->pgno++; |
| 3876 |
| 3877 /* Increase the leaves written counter */ |
| 3878 pWriter->nLeafWritten++; |
| 3879 |
| 3880 /* The new leaf holds no terms or rowids */ |
| 3881 pWriter->bFirstTermInPage = 1; |
| 3882 pWriter->bFirstRowidInPage = 1; |
| 3883 } |
| 3884 |
| 3885 /* |
| 3886 ** Append term pTerm/nTerm to the segment being written by the writer passed |
| 3887 ** as the second argument. |
| 3888 ** |
| 3889 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 3890 ** already occurred, this function is a no-op. |
| 3891 */ |
| 3892 static void fts5WriteAppendTerm( |
| 3893 Fts5Index *p, |
| 3894 Fts5SegWriter *pWriter, |
| 3895 int nTerm, const u8 *pTerm |
| 3896 ){ |
| 3897 int nPrefix; /* Bytes of prefix compression for term */ |
| 3898 Fts5PageWriter *pPage = &pWriter->writer; |
| 3899 Fts5Buffer *pPgidx = &pWriter->writer.pgidx; |
| 3900 |
| 3901 assert( p->rc==SQLITE_OK ); |
| 3902 assert( pPage->buf.n>=4 ); |
| 3903 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); |
| 3904 |
| 3905 /* If the current leaf page is full, flush it to disk. */ |
| 3906 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ |
| 3907 if( pPage->buf.n>4 ){ |
| 3908 fts5WriteFlushLeaf(p, pWriter); |
| 3909 } |
| 3910 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); |
| 3911 } |
| 3912 |
| 3913 /* TODO1: Updating pgidx here. */ |
| 3914 pPgidx->n += sqlite3Fts5PutVarint( |
| 3915 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx |
| 3916 ); |
| 3917 pPage->iPrevPgidx = pPage->buf.n; |
| 3918 #if 0 |
| 3919 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); |
| 3920 pPgidx->n += 2; |
| 3921 #endif |
| 3922 |
| 3923 if( pWriter->bFirstTermInPage ){ |
| 3924 nPrefix = 0; |
| 3925 if( pPage->pgno!=1 ){ |
| 3926 /* This is the first term on a leaf that is not the leftmost leaf in |
| 3927 ** the segment b-tree. In this case it is necessary to add a term to |
| 3928 ** the b-tree hierarchy that is (a) larger than the largest term |
| 3929 ** already written to the segment and (b) smaller than or equal to |
| 3930 ** this term. In other words, a prefix of (pTerm/nTerm) that is one |
| 3931 ** byte longer than the longest prefix (pTerm/nTerm) shares with the |
| 3932 ** previous term. |
| 3933 ** |
| 3934 ** Usually, the previous term is available in pPage->term. The exception |
| 3935 ** is if this is the first term written in an incremental-merge step. |
| 3936 ** In this case the previous term is not available, so just write a |
| 3937 ** copy of (pTerm/nTerm) into the parent node. This is slightly |
| 3938 ** inefficient, but still correct. */ |
| 3939 int n = nTerm; |
| 3940 if( pPage->term.n ){ |
| 3941 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); |
| 3942 } |
| 3943 fts5WriteBtreeTerm(p, pWriter, n, pTerm); |
| 3944 pPage = &pWriter->writer; |
| 3945 } |
| 3946 }else{ |
| 3947 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); |
| 3948 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); |
| 3949 } |
| 3950 |
| 3951 /* Append the number of bytes of new data, then the term data itself |
| 3952 ** to the page. */ |
| 3953 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); |
| 3954 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); |
| 3955 |
| 3956 /* Update the Fts5PageWriter.term field. */ |
| 3957 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); |
| 3958 pWriter->bFirstTermInPage = 0; |
| 3959 |
| 3960 pWriter->bFirstRowidInPage = 0; |
| 3961 pWriter->bFirstRowidInDoclist = 1; |
| 3962 |
| 3963 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); |
| 3964 pWriter->aDlidx[0].pgno = pPage->pgno; |
| 3965 } |
| 3966 |
| 3967 /* |
| 3968 ** Append a rowid and position-list size field to the writers output. |
| 3969 */ |
| 3970 static void fts5WriteAppendRowid( |
| 3971 Fts5Index *p, |
| 3972 Fts5SegWriter *pWriter, |
| 3973 i64 iRowid |
| 3974 ){ |
| 3975 if( p->rc==SQLITE_OK ){ |
| 3976 Fts5PageWriter *pPage = &pWriter->writer; |
| 3977 |
| 3978 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ |
| 3979 fts5WriteFlushLeaf(p, pWriter); |
| 3980 } |
| 3981 |
| 3982 /* If this is to be the first rowid written to the page, set the |
| 3983 ** rowid-pointer in the page-header. Also append a value to the dlidx |
| 3984 ** buffer, in case a doclist-index is required. */ |
| 3985 if( pWriter->bFirstRowidInPage ){ |
| 3986 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); |
| 3987 fts5WriteDlidxAppend(p, pWriter, iRowid); |
| 3988 } |
| 3989 |
| 3990 /* Write the rowid. */ |
| 3991 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ |
| 3992 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); |
| 3993 }else{ |
| 3994 assert( p->rc || iRowid>pWriter->iPrevRowid ); |
| 3995 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); |
| 3996 } |
| 3997 pWriter->iPrevRowid = iRowid; |
| 3998 pWriter->bFirstRowidInDoclist = 0; |
| 3999 pWriter->bFirstRowidInPage = 0; |
| 4000 } |
| 4001 } |
| 4002 |
| 4003 static void fts5WriteAppendPoslistData( |
| 4004 Fts5Index *p, |
| 4005 Fts5SegWriter *pWriter, |
| 4006 const u8 *aData, |
| 4007 int nData |
| 4008 ){ |
| 4009 Fts5PageWriter *pPage = &pWriter->writer; |
| 4010 const u8 *a = aData; |
| 4011 int n = nData; |
| 4012 |
| 4013 assert( p->pConfig->pgsz>0 ); |
| 4014 while( p->rc==SQLITE_OK |
| 4015 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz |
| 4016 ){ |
| 4017 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; |
| 4018 int nCopy = 0; |
| 4019 while( nCopy<nReq ){ |
| 4020 i64 dummy; |
| 4021 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); |
| 4022 } |
| 4023 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); |
| 4024 a += nCopy; |
| 4025 n -= nCopy; |
| 4026 fts5WriteFlushLeaf(p, pWriter); |
| 4027 } |
| 4028 if( n>0 ){ |
| 4029 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); |
| 4030 } |
| 4031 } |
| 4032 |
| 4033 /* |
| 4034 ** Flush any data cached by the writer object to the database. Free any |
| 4035 ** allocations associated with the writer. |
| 4036 */ |
| 4037 static void fts5WriteFinish( |
| 4038 Fts5Index *p, |
| 4039 Fts5SegWriter *pWriter, /* Writer object */ |
| 4040 int *pnLeaf /* OUT: Number of leaf pages in b-tree */ |
| 4041 ){ |
| 4042 int i; |
| 4043 Fts5PageWriter *pLeaf = &pWriter->writer; |
| 4044 if( p->rc==SQLITE_OK ){ |
| 4045 assert( pLeaf->pgno>=1 ); |
| 4046 if( pLeaf->buf.n>4 ){ |
| 4047 fts5WriteFlushLeaf(p, pWriter); |
| 4048 } |
| 4049 *pnLeaf = pLeaf->pgno-1; |
| 4050 if( pLeaf->pgno>1 ){ |
| 4051 fts5WriteFlushBtree(p, pWriter); |
| 4052 } |
| 4053 } |
| 4054 fts5BufferFree(&pLeaf->term); |
| 4055 fts5BufferFree(&pLeaf->buf); |
| 4056 fts5BufferFree(&pLeaf->pgidx); |
| 4057 fts5BufferFree(&pWriter->btterm); |
| 4058 |
| 4059 for(i=0; i<pWriter->nDlidx; i++){ |
| 4060 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); |
| 4061 } |
| 4062 sqlite3_free(pWriter->aDlidx); |
| 4063 } |
| 4064 |
| 4065 static void fts5WriteInit( |
| 4066 Fts5Index *p, |
| 4067 Fts5SegWriter *pWriter, |
| 4068 int iSegid |
| 4069 ){ |
| 4070 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; |
| 4071 |
| 4072 memset(pWriter, 0, sizeof(Fts5SegWriter)); |
| 4073 pWriter->iSegid = iSegid; |
| 4074 |
| 4075 fts5WriteDlidxGrow(p, pWriter, 1); |
| 4076 pWriter->writer.pgno = 1; |
| 4077 pWriter->bFirstTermInPage = 1; |
| 4078 pWriter->iBtPage = 1; |
| 4079 |
| 4080 assert( pWriter->writer.buf.n==0 ); |
| 4081 assert( pWriter->writer.pgidx.n==0 ); |
| 4082 |
| 4083 /* Grow the two buffers to pgsz + padding bytes in size. */ |
| 4084 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); |
| 4085 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); |
| 4086 |
| 4087 if( p->pIdxWriter==0 ){ |
| 4088 Fts5Config *pConfig = p->pConfig; |
| 4089 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( |
| 4090 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", |
| 4091 pConfig->zDb, pConfig->zName |
| 4092 )); |
| 4093 } |
| 4094 |
| 4095 if( p->rc==SQLITE_OK ){ |
| 4096 /* Initialize the 4-byte leaf-page header to 0x00. */ |
| 4097 memset(pWriter->writer.buf.p, 0, 4); |
| 4098 pWriter->writer.buf.n = 4; |
| 4099 |
| 4100 /* Bind the current output segment id to the index-writer. This is an |
| 4101 ** optimization over binding the same value over and over as rows are |
| 4102 ** inserted into %_idx by the current writer. */ |
| 4103 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); |
| 4104 } |
| 4105 } |
| 4106 |
| 4107 /* |
| 4108 ** Iterator pIter was used to iterate through the input segments of on an |
| 4109 ** incremental merge operation. This function is called if the incremental |
| 4110 ** merge step has finished but the input has not been completely exhausted. |
| 4111 */ |
| 4112 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ |
| 4113 int i; |
| 4114 Fts5Buffer buf; |
| 4115 memset(&buf, 0, sizeof(Fts5Buffer)); |
| 4116 for(i=0; i<pIter->nSeg; i++){ |
| 4117 Fts5SegIter *pSeg = &pIter->aSeg[i]; |
| 4118 if( pSeg->pSeg==0 ){ |
| 4119 /* no-op */ |
| 4120 }else if( pSeg->pLeaf==0 ){ |
| 4121 /* All keys from this input segment have been transfered to the output. |
| 4122 ** Set both the first and last page-numbers to 0 to indicate that the |
| 4123 ** segment is now empty. */ |
| 4124 pSeg->pSeg->pgnoLast = 0; |
| 4125 pSeg->pSeg->pgnoFirst = 0; |
| 4126 }else{ |
| 4127 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ |
| 4128 i64 iLeafRowid; |
| 4129 Fts5Data *pData; |
| 4130 int iId = pSeg->pSeg->iSegid; |
| 4131 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; |
| 4132 |
| 4133 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); |
| 4134 pData = fts5DataRead(p, iLeafRowid); |
| 4135 if( pData ){ |
| 4136 fts5BufferZero(&buf); |
| 4137 fts5BufferGrow(&p->rc, &buf, pData->nn); |
| 4138 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); |
| 4139 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); |
| 4140 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); |
| 4141 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); |
| 4142 if( p->rc==SQLITE_OK ){ |
| 4143 /* Set the szLeaf field */ |
| 4144 fts5PutU16(&buf.p[2], (u16)buf.n); |
| 4145 } |
| 4146 |
| 4147 /* Set up the new page-index array */ |
| 4148 fts5BufferAppendVarint(&p->rc, &buf, 4); |
| 4149 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno |
| 4150 && pSeg->iEndofDoclist<pData->szLeaf |
| 4151 ){ |
| 4152 int nDiff = pData->szLeaf - pSeg->iEndofDoclist; |
| 4153 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); |
| 4154 fts5BufferAppendBlob(&p->rc, &buf, |
| 4155 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] |
| 4156 ); |
| 4157 } |
| 4158 |
| 4159 fts5DataRelease(pData); |
| 4160 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; |
| 4161 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); |
| 4162 fts5DataWrite(p, iLeafRowid, buf.p, buf.n); |
| 4163 } |
| 4164 } |
| 4165 } |
| 4166 fts5BufferFree(&buf); |
| 4167 } |
| 4168 |
| 4169 static void fts5MergeChunkCallback( |
| 4170 Fts5Index *p, |
| 4171 void *pCtx, |
| 4172 const u8 *pChunk, int nChunk |
| 4173 ){ |
| 4174 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; |
| 4175 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); |
| 4176 } |
| 4177 |
| 4178 /* |
| 4179 ** |
| 4180 */ |
| 4181 static void fts5IndexMergeLevel( |
| 4182 Fts5Index *p, /* FTS5 backend object */ |
| 4183 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ |
| 4184 int iLvl, /* Level to read input from */ |
| 4185 int *pnRem /* Write up to this many output leaves */ |
| 4186 ){ |
| 4187 Fts5Structure *pStruct = *ppStruct; |
| 4188 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 4189 Fts5StructureLevel *pLvlOut; |
| 4190 Fts5Iter *pIter = 0; /* Iterator to read input data */ |
| 4191 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ |
| 4192 int nInput; /* Number of input segments */ |
| 4193 Fts5SegWriter writer; /* Writer object */ |
| 4194 Fts5StructureSegment *pSeg; /* Output segment */ |
| 4195 Fts5Buffer term; |
| 4196 int bOldest; /* True if the output segment is the oldest */ |
| 4197 int eDetail = p->pConfig->eDetail; |
| 4198 const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
| 4199 |
| 4200 assert( iLvl<pStruct->nLevel ); |
| 4201 assert( pLvl->nMerge<=pLvl->nSeg ); |
| 4202 |
| 4203 memset(&writer, 0, sizeof(Fts5SegWriter)); |
| 4204 memset(&term, 0, sizeof(Fts5Buffer)); |
| 4205 if( pLvl->nMerge ){ |
| 4206 pLvlOut = &pStruct->aLevel[iLvl+1]; |
| 4207 assert( pLvlOut->nSeg>0 ); |
| 4208 nInput = pLvl->nMerge; |
| 4209 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; |
| 4210 |
| 4211 fts5WriteInit(p, &writer, pSeg->iSegid); |
| 4212 writer.writer.pgno = pSeg->pgnoLast+1; |
| 4213 writer.iBtPage = 0; |
| 4214 }else{ |
| 4215 int iSegid = fts5AllocateSegid(p, pStruct); |
| 4216 |
| 4217 /* Extend the Fts5Structure object as required to ensure the output |
| 4218 ** segment exists. */ |
| 4219 if( iLvl==pStruct->nLevel-1 ){ |
| 4220 fts5StructureAddLevel(&p->rc, ppStruct); |
| 4221 pStruct = *ppStruct; |
| 4222 } |
| 4223 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); |
| 4224 if( p->rc ) return; |
| 4225 pLvl = &pStruct->aLevel[iLvl]; |
| 4226 pLvlOut = &pStruct->aLevel[iLvl+1]; |
| 4227 |
| 4228 fts5WriteInit(p, &writer, iSegid); |
| 4229 |
| 4230 /* Add the new segment to the output level */ |
| 4231 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; |
| 4232 pLvlOut->nSeg++; |
| 4233 pSeg->pgnoFirst = 1; |
| 4234 pSeg->iSegid = iSegid; |
| 4235 pStruct->nSegment++; |
| 4236 |
| 4237 /* Read input from all segments in the input level */ |
| 4238 nInput = pLvl->nSeg; |
| 4239 } |
| 4240 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); |
| 4241 |
| 4242 assert( iLvl>=0 ); |
| 4243 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); |
| 4244 fts5MultiIterEof(p, pIter)==0; |
| 4245 fts5MultiIterNext(p, pIter, 0, 0) |
| 4246 ){ |
| 4247 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 4248 int nPos; /* position-list size field value */ |
| 4249 int nTerm; |
| 4250 const u8 *pTerm; |
| 4251 |
| 4252 /* Check for key annihilation. */ |
| 4253 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; |
| 4254 |
| 4255 pTerm = fts5MultiIterTerm(pIter, &nTerm); |
| 4256 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ |
| 4257 if( pnRem && writer.nLeafWritten>nRem ){ |
| 4258 break; |
| 4259 } |
| 4260 |
| 4261 /* This is a new term. Append a term to the output segment. */ |
| 4262 fts5WriteAppendTerm(p, &writer, nTerm, pTerm); |
| 4263 fts5BufferSet(&p->rc, &term, nTerm, pTerm); |
| 4264 } |
| 4265 |
| 4266 /* Append the rowid to the output */ |
| 4267 /* WRITEPOSLISTSIZE */ |
| 4268 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); |
| 4269 |
| 4270 if( eDetail==FTS5_DETAIL_NONE ){ |
| 4271 if( pSegIter->bDel ){ |
| 4272 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
| 4273 if( pSegIter->nPos>0 ){ |
| 4274 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
| 4275 } |
| 4276 } |
| 4277 }else{ |
| 4278 /* Append the position-list data to the output */ |
| 4279 nPos = pSegIter->nPos*2 + pSegIter->bDel; |
| 4280 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); |
| 4281 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); |
| 4282 } |
| 4283 } |
| 4284 |
| 4285 /* Flush the last leaf page to disk. Set the output segment b-tree height |
| 4286 ** and last leaf page number at the same time. */ |
| 4287 fts5WriteFinish(p, &writer, &pSeg->pgnoLast); |
| 4288 |
| 4289 if( fts5MultiIterEof(p, pIter) ){ |
| 4290 int i; |
| 4291 |
| 4292 /* Remove the redundant segments from the %_data table */ |
| 4293 for(i=0; i<nInput; i++){ |
| 4294 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); |
| 4295 } |
| 4296 |
| 4297 /* Remove the redundant segments from the input level */ |
| 4298 if( pLvl->nSeg!=nInput ){ |
| 4299 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); |
| 4300 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); |
| 4301 } |
| 4302 pStruct->nSegment -= nInput; |
| 4303 pLvl->nSeg -= nInput; |
| 4304 pLvl->nMerge = 0; |
| 4305 if( pSeg->pgnoLast==0 ){ |
| 4306 pLvlOut->nSeg--; |
| 4307 pStruct->nSegment--; |
| 4308 } |
| 4309 }else{ |
| 4310 assert( pSeg->pgnoLast>0 ); |
| 4311 fts5TrimSegments(p, pIter); |
| 4312 pLvl->nMerge = nInput; |
| 4313 } |
| 4314 |
| 4315 fts5MultiIterFree(pIter); |
| 4316 fts5BufferFree(&term); |
| 4317 if( pnRem ) *pnRem -= writer.nLeafWritten; |
| 4318 } |
| 4319 |
| 4320 /* |
| 4321 ** Do up to nPg pages of automerge work on the index. |
| 4322 ** |
| 4323 ** Return true if any changes were actually made, or false otherwise. |
| 4324 */ |
| 4325 static int fts5IndexMerge( |
| 4326 Fts5Index *p, /* FTS5 backend object */ |
| 4327 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
| 4328 int nPg, /* Pages of work to do */ |
| 4329 int nMin /* Minimum number of segments to merge */ |
| 4330 ){ |
| 4331 int nRem = nPg; |
| 4332 int bRet = 0; |
| 4333 Fts5Structure *pStruct = *ppStruct; |
| 4334 while( nRem>0 && p->rc==SQLITE_OK ){ |
| 4335 int iLvl; /* To iterate through levels */ |
| 4336 int iBestLvl = 0; /* Level offering the most input segments */ |
| 4337 int nBest = 0; /* Number of input segments on best level */ |
| 4338 |
| 4339 /* Set iBestLvl to the level to read input segments from. */ |
| 4340 assert( pStruct->nLevel>0 ); |
| 4341 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 4342 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
| 4343 if( pLvl->nMerge ){ |
| 4344 if( pLvl->nMerge>nBest ){ |
| 4345 iBestLvl = iLvl; |
| 4346 nBest = pLvl->nMerge; |
| 4347 } |
| 4348 break; |
| 4349 } |
| 4350 if( pLvl->nSeg>nBest ){ |
| 4351 nBest = pLvl->nSeg; |
| 4352 iBestLvl = iLvl; |
| 4353 } |
| 4354 } |
| 4355 |
| 4356 /* If nBest is still 0, then the index must be empty. */ |
| 4357 #ifdef SQLITE_DEBUG |
| 4358 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ |
| 4359 assert( pStruct->aLevel[iLvl].nSeg==0 ); |
| 4360 } |
| 4361 #endif |
| 4362 |
| 4363 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
| 4364 break; |
| 4365 } |
| 4366 bRet = 1; |
| 4367 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); |
| 4368 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
| 4369 fts5StructurePromote(p, iBestLvl+1, pStruct); |
| 4370 } |
| 4371 } |
| 4372 *ppStruct = pStruct; |
| 4373 return bRet; |
| 4374 } |
| 4375 |
| 4376 /* |
| 4377 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 |
| 4378 ** segment. This function updates the write-counter accordingly and, if |
| 4379 ** necessary, performs incremental merge work. |
| 4380 ** |
| 4381 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 4382 ** already occurred, this function is a no-op. |
| 4383 */ |
| 4384 static void fts5IndexAutomerge( |
| 4385 Fts5Index *p, /* FTS5 backend object */ |
| 4386 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
| 4387 int nLeaf /* Number of output leaves just written */ |
| 4388 ){ |
| 4389 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ |
| 4390 Fts5Structure *pStruct = *ppStruct; |
| 4391 u64 nWrite; /* Initial value of write-counter */ |
| 4392 int nWork; /* Number of work-quanta to perform */ |
| 4393 int nRem; /* Number of leaf pages left to write */ |
| 4394 |
| 4395 /* Update the write-counter. While doing so, set nWork. */ |
| 4396 nWrite = pStruct->nWriteCounter; |
| 4397 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); |
| 4398 pStruct->nWriteCounter += nLeaf; |
| 4399 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); |
| 4400 |
| 4401 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); |
| 4402 } |
| 4403 } |
| 4404 |
| 4405 static void fts5IndexCrisismerge( |
| 4406 Fts5Index *p, /* FTS5 backend object */ |
| 4407 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ |
| 4408 ){ |
| 4409 const int nCrisis = p->pConfig->nCrisisMerge; |
| 4410 Fts5Structure *pStruct = *ppStruct; |
| 4411 int iLvl = 0; |
| 4412 |
| 4413 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); |
| 4414 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ |
| 4415 fts5IndexMergeLevel(p, &pStruct, iLvl, 0); |
| 4416 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); |
| 4417 fts5StructurePromote(p, iLvl+1, pStruct); |
| 4418 iLvl++; |
| 4419 } |
| 4420 *ppStruct = pStruct; |
| 4421 } |
| 4422 |
| 4423 static int fts5IndexReturn(Fts5Index *p){ |
| 4424 int rc = p->rc; |
| 4425 p->rc = SQLITE_OK; |
| 4426 return rc; |
| 4427 } |
| 4428 |
| 4429 typedef struct Fts5FlushCtx Fts5FlushCtx; |
| 4430 struct Fts5FlushCtx { |
| 4431 Fts5Index *pIdx; |
| 4432 Fts5SegWriter writer; |
| 4433 }; |
| 4434 |
| 4435 /* |
| 4436 ** Buffer aBuf[] contains a list of varints, all small enough to fit |
| 4437 ** in a 32-bit integer. Return the size of the largest prefix of this |
| 4438 ** list nMax bytes or less in size. |
| 4439 */ |
| 4440 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ |
| 4441 int ret; |
| 4442 u32 dummy; |
| 4443 ret = fts5GetVarint32(aBuf, dummy); |
| 4444 if( ret<nMax ){ |
| 4445 while( 1 ){ |
| 4446 int i = fts5GetVarint32(&aBuf[ret], dummy); |
| 4447 if( (ret + i) > nMax ) break; |
| 4448 ret += i; |
| 4449 } |
| 4450 } |
| 4451 return ret; |
| 4452 } |
| 4453 |
| 4454 /* |
| 4455 ** Flush the contents of in-memory hash table iHash to a new level-0 |
| 4456 ** segment on disk. Also update the corresponding structure record. |
| 4457 ** |
| 4458 ** If an error occurs, set the Fts5Index.rc error code. If an error has |
| 4459 ** already occurred, this function is a no-op. |
| 4460 */ |
| 4461 static void fts5FlushOneHash(Fts5Index *p){ |
| 4462 Fts5Hash *pHash = p->pHash; |
| 4463 Fts5Structure *pStruct; |
| 4464 int iSegid; |
| 4465 int pgnoLast = 0; /* Last leaf page number in segment */ |
| 4466 |
| 4467 /* Obtain a reference to the index structure and allocate a new segment-id |
| 4468 ** for the new level-0 segment. */ |
| 4469 pStruct = fts5StructureRead(p); |
| 4470 iSegid = fts5AllocateSegid(p, pStruct); |
| 4471 fts5StructureInvalidate(p); |
| 4472 |
| 4473 if( iSegid ){ |
| 4474 const int pgsz = p->pConfig->pgsz; |
| 4475 int eDetail = p->pConfig->eDetail; |
| 4476 Fts5StructureSegment *pSeg; /* New segment within pStruct */ |
| 4477 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ |
| 4478 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ |
| 4479 |
| 4480 Fts5SegWriter writer; |
| 4481 fts5WriteInit(p, &writer, iSegid); |
| 4482 |
| 4483 pBuf = &writer.writer.buf; |
| 4484 pPgidx = &writer.writer.pgidx; |
| 4485 |
| 4486 /* fts5WriteInit() should have initialized the buffers to (most likely) |
| 4487 ** the maximum space required. */ |
| 4488 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
| 4489 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
| 4490 |
| 4491 /* Begin scanning through hash table entries. This loop runs once for each |
| 4492 ** term/doclist currently stored within the hash table. */ |
| 4493 if( p->rc==SQLITE_OK ){ |
| 4494 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); |
| 4495 } |
| 4496 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ |
| 4497 const char *zTerm; /* Buffer containing term */ |
| 4498 const u8 *pDoclist; /* Pointer to doclist for this term */ |
| 4499 int nDoclist; /* Size of doclist in bytes */ |
| 4500 |
| 4501 /* Write the term for this entry to disk. */ |
| 4502 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); |
| 4503 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); |
| 4504 |
| 4505 assert( writer.bFirstRowidInPage==0 ); |
| 4506 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ |
| 4507 /* The entire doclist will fit on the current leaf. */ |
| 4508 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); |
| 4509 }else{ |
| 4510 i64 iRowid = 0; |
| 4511 i64 iDelta = 0; |
| 4512 int iOff = 0; |
| 4513 |
| 4514 /* The entire doclist will not fit on this leaf. The following |
| 4515 ** loop iterates through the poslists that make up the current |
| 4516 ** doclist. */ |
| 4517 while( p->rc==SQLITE_OK && iOff<nDoclist ){ |
| 4518 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); |
| 4519 iRowid += iDelta; |
| 4520 |
| 4521 if( writer.bFirstRowidInPage ){ |
| 4522 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ |
| 4523 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); |
| 4524 writer.bFirstRowidInPage = 0; |
| 4525 fts5WriteDlidxAppend(p, &writer, iRowid); |
| 4526 }else{ |
| 4527 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); |
| 4528 } |
| 4529 assert( pBuf->n<=pBuf->nSpace ); |
| 4530 |
| 4531 if( eDetail==FTS5_DETAIL_NONE ){ |
| 4532 if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
| 4533 pBuf->p[pBuf->n++] = 0; |
| 4534 iOff++; |
| 4535 if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
| 4536 pBuf->p[pBuf->n++] = 0; |
| 4537 iOff++; |
| 4538 } |
| 4539 } |
| 4540 if( (pBuf->n + pPgidx->n)>=pgsz ){ |
| 4541 fts5WriteFlushLeaf(p, &writer); |
| 4542 } |
| 4543 }else{ |
| 4544 int bDummy; |
| 4545 int nPos; |
| 4546 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); |
| 4547 nCopy += nPos; |
| 4548 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ |
| 4549 /* The entire poslist will fit on the current leaf. So copy |
| 4550 ** it in one go. */ |
| 4551 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); |
| 4552 }else{ |
| 4553 /* The entire poslist will not fit on this leaf. So it needs |
| 4554 ** to be broken into sections. The only qualification being |
| 4555 ** that each varint must be stored contiguously. */ |
| 4556 const u8 *pPoslist = &pDoclist[iOff]; |
| 4557 int iPos = 0; |
| 4558 while( p->rc==SQLITE_OK ){ |
| 4559 int nSpace = pgsz - pBuf->n - pPgidx->n; |
| 4560 int n = 0; |
| 4561 if( (nCopy - iPos)<=nSpace ){ |
| 4562 n = nCopy - iPos; |
| 4563 }else{ |
| 4564 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); |
| 4565 } |
| 4566 assert( n>0 ); |
| 4567 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); |
| 4568 iPos += n; |
| 4569 if( (pBuf->n + pPgidx->n)>=pgsz ){ |
| 4570 fts5WriteFlushLeaf(p, &writer); |
| 4571 } |
| 4572 if( iPos>=nCopy ) break; |
| 4573 } |
| 4574 } |
| 4575 iOff += nCopy; |
| 4576 } |
| 4577 } |
| 4578 } |
| 4579 |
| 4580 /* TODO2: Doclist terminator written here. */ |
| 4581 /* pBuf->p[pBuf->n++] = '\0'; */ |
| 4582 assert( pBuf->n<=pBuf->nSpace ); |
| 4583 sqlite3Fts5HashScanNext(pHash); |
| 4584 } |
| 4585 sqlite3Fts5HashClear(pHash); |
| 4586 fts5WriteFinish(p, &writer, &pgnoLast); |
| 4587 |
| 4588 /* Update the Fts5Structure. It is written back to the database by the |
| 4589 ** fts5StructureRelease() call below. */ |
| 4590 if( pStruct->nLevel==0 ){ |
| 4591 fts5StructureAddLevel(&p->rc, &pStruct); |
| 4592 } |
| 4593 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); |
| 4594 if( p->rc==SQLITE_OK ){ |
| 4595 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; |
| 4596 pSeg->iSegid = iSegid; |
| 4597 pSeg->pgnoFirst = 1; |
| 4598 pSeg->pgnoLast = pgnoLast; |
| 4599 pStruct->nSegment++; |
| 4600 } |
| 4601 fts5StructurePromote(p, 0, pStruct); |
| 4602 } |
| 4603 |
| 4604 fts5IndexAutomerge(p, &pStruct, pgnoLast); |
| 4605 fts5IndexCrisismerge(p, &pStruct); |
| 4606 fts5StructureWrite(p, pStruct); |
| 4607 fts5StructureRelease(pStruct); |
| 4608 } |
| 4609 |
| 4610 /* |
| 4611 ** Flush any data stored in the in-memory hash tables to the database. |
| 4612 */ |
| 4613 static void fts5IndexFlush(Fts5Index *p){ |
| 4614 /* Unless it is empty, flush the hash table to disk */ |
| 4615 if( p->nPendingData ){ |
| 4616 assert( p->pHash ); |
| 4617 p->nPendingData = 0; |
| 4618 fts5FlushOneHash(p); |
| 4619 } |
| 4620 } |
| 4621 |
| 4622 static Fts5Structure *fts5IndexOptimizeStruct( |
| 4623 Fts5Index *p, |
| 4624 Fts5Structure *pStruct |
| 4625 ){ |
| 4626 Fts5Structure *pNew = 0; |
| 4627 int nByte = sizeof(Fts5Structure); |
| 4628 int nSeg = pStruct->nSegment; |
| 4629 int i; |
| 4630 |
| 4631 /* Figure out if this structure requires optimization. A structure does |
| 4632 ** not require optimization if either: |
| 4633 ** |
| 4634 ** + it consists of fewer than two segments, or |
| 4635 ** + all segments are on the same level, or |
| 4636 ** + all segments except one are currently inputs to a merge operation. |
| 4637 ** |
| 4638 ** In the first case, return NULL. In the second, increment the ref-count |
| 4639 ** on *pStruct and return a copy of the pointer to it. |
| 4640 */ |
| 4641 if( nSeg<2 ) return 0; |
| 4642 for(i=0; i<pStruct->nLevel; i++){ |
| 4643 int nThis = pStruct->aLevel[i].nSeg; |
| 4644 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ |
| 4645 fts5StructureRef(pStruct); |
| 4646 return pStruct; |
| 4647 } |
| 4648 assert( pStruct->aLevel[i].nMerge<=nThis ); |
| 4649 } |
| 4650 |
| 4651 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); |
| 4652 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); |
| 4653 |
| 4654 if( pNew ){ |
| 4655 Fts5StructureLevel *pLvl; |
| 4656 nByte = nSeg * sizeof(Fts5StructureSegment); |
| 4657 pNew->nLevel = pStruct->nLevel+1; |
| 4658 pNew->nRef = 1; |
| 4659 pNew->nWriteCounter = pStruct->nWriteCounter; |
| 4660 pLvl = &pNew->aLevel[pStruct->nLevel]; |
| 4661 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); |
| 4662 if( pLvl->aSeg ){ |
| 4663 int iLvl, iSeg; |
| 4664 int iSegOut = 0; |
| 4665 /* Iterate through all segments, from oldest to newest. Add them to |
| 4666 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest |
| 4667 ** segment in the data structure. */ |
| 4668 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ |
| 4669 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 4670 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; |
| 4671 iSegOut++; |
| 4672 } |
| 4673 } |
| 4674 pNew->nSegment = pLvl->nSeg = nSeg; |
| 4675 }else{ |
| 4676 sqlite3_free(pNew); |
| 4677 pNew = 0; |
| 4678 } |
| 4679 } |
| 4680 |
| 4681 return pNew; |
| 4682 } |
| 4683 |
| 4684 int sqlite3Fts5IndexOptimize(Fts5Index *p){ |
| 4685 Fts5Structure *pStruct; |
| 4686 Fts5Structure *pNew = 0; |
| 4687 |
| 4688 assert( p->rc==SQLITE_OK ); |
| 4689 fts5IndexFlush(p); |
| 4690 pStruct = fts5StructureRead(p); |
| 4691 fts5StructureInvalidate(p); |
| 4692 |
| 4693 if( pStruct ){ |
| 4694 pNew = fts5IndexOptimizeStruct(p, pStruct); |
| 4695 } |
| 4696 fts5StructureRelease(pStruct); |
| 4697 |
| 4698 assert( pNew==0 || pNew->nSegment>0 ); |
| 4699 if( pNew ){ |
| 4700 int iLvl; |
| 4701 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} |
| 4702 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ |
| 4703 int nRem = FTS5_OPT_WORK_UNIT; |
| 4704 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); |
| 4705 } |
| 4706 |
| 4707 fts5StructureWrite(p, pNew); |
| 4708 fts5StructureRelease(pNew); |
| 4709 } |
| 4710 |
| 4711 return fts5IndexReturn(p); |
| 4712 } |
| 4713 |
| 4714 /* |
| 4715 ** This is called to implement the special "VALUES('merge', $nMerge)" |
| 4716 ** INSERT command. |
| 4717 */ |
| 4718 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ |
| 4719 Fts5Structure *pStruct = fts5StructureRead(p); |
| 4720 if( pStruct ){ |
| 4721 int nMin = p->pConfig->nUsermerge; |
| 4722 fts5StructureInvalidate(p); |
| 4723 if( nMerge<0 ){ |
| 4724 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); |
| 4725 fts5StructureRelease(pStruct); |
| 4726 pStruct = pNew; |
| 4727 nMin = 2; |
| 4728 nMerge = nMerge*-1; |
| 4729 } |
| 4730 if( pStruct && pStruct->nLevel ){ |
| 4731 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ |
| 4732 fts5StructureWrite(p, pStruct); |
| 4733 } |
| 4734 } |
| 4735 fts5StructureRelease(pStruct); |
| 4736 } |
| 4737 return fts5IndexReturn(p); |
| 4738 } |
| 4739 |
| 4740 static void fts5AppendRowid( |
| 4741 Fts5Index *p, |
| 4742 i64 iDelta, |
| 4743 Fts5Iter *pUnused, |
| 4744 Fts5Buffer *pBuf |
| 4745 ){ |
| 4746 UNUSED_PARAM(pUnused); |
| 4747 fts5BufferAppendVarint(&p->rc, pBuf, iDelta); |
| 4748 } |
| 4749 |
| 4750 static void fts5AppendPoslist( |
| 4751 Fts5Index *p, |
| 4752 i64 iDelta, |
| 4753 Fts5Iter *pMulti, |
| 4754 Fts5Buffer *pBuf |
| 4755 ){ |
| 4756 int nData = pMulti->base.nData; |
| 4757 assert( nData>0 ); |
| 4758 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){ |
| 4759 fts5BufferSafeAppendVarint(pBuf, iDelta); |
| 4760 fts5BufferSafeAppendVarint(pBuf, nData*2); |
| 4761 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); |
| 4762 } |
| 4763 } |
| 4764 |
| 4765 |
| 4766 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ |
| 4767 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; |
| 4768 |
| 4769 assert( pIter->aPoslist ); |
| 4770 if( p>=pIter->aEof ){ |
| 4771 pIter->aPoslist = 0; |
| 4772 }else{ |
| 4773 i64 iDelta; |
| 4774 |
| 4775 p += fts5GetVarint(p, (u64*)&iDelta); |
| 4776 pIter->iRowid += iDelta; |
| 4777 |
| 4778 /* Read position list size */ |
| 4779 if( p[0] & 0x80 ){ |
| 4780 int nPos; |
| 4781 pIter->nSize = fts5GetVarint32(p, nPos); |
| 4782 pIter->nPoslist = (nPos>>1); |
| 4783 }else{ |
| 4784 pIter->nPoslist = ((int)(p[0])) >> 1; |
| 4785 pIter->nSize = 1; |
| 4786 } |
| 4787 |
| 4788 pIter->aPoslist = p; |
| 4789 } |
| 4790 } |
| 4791 |
| 4792 static void fts5DoclistIterInit( |
| 4793 Fts5Buffer *pBuf, |
| 4794 Fts5DoclistIter *pIter |
| 4795 ){ |
| 4796 memset(pIter, 0, sizeof(*pIter)); |
| 4797 pIter->aPoslist = pBuf->p; |
| 4798 pIter->aEof = &pBuf->p[pBuf->n]; |
| 4799 fts5DoclistIterNext(pIter); |
| 4800 } |
| 4801 |
| 4802 #if 0 |
| 4803 /* |
| 4804 ** Append a doclist to buffer pBuf. |
| 4805 ** |
| 4806 ** This function assumes that space within the buffer has already been |
| 4807 ** allocated. |
| 4808 */ |
| 4809 static void fts5MergeAppendDocid( |
| 4810 Fts5Buffer *pBuf, /* Buffer to write to */ |
| 4811 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ |
| 4812 i64 iRowid /* Rowid to append */ |
| 4813 ){ |
| 4814 assert( pBuf->n!=0 || (*piLastRowid)==0 ); |
| 4815 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); |
| 4816 *piLastRowid = iRowid; |
| 4817 } |
| 4818 #endif |
| 4819 |
| 4820 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ |
| 4821 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ |
| 4822 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ |
| 4823 (iLastRowid) = (iRowid); \ |
| 4824 } |
| 4825 |
| 4826 /* |
| 4827 ** Swap the contents of buffer *p1 with that of *p2. |
| 4828 */ |
| 4829 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ |
| 4830 Fts5Buffer tmp = *p1; |
| 4831 *p1 = *p2; |
| 4832 *p2 = tmp; |
| 4833 } |
| 4834 |
| 4835 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ |
| 4836 int i = *piOff; |
| 4837 if( i>=pBuf->n ){ |
| 4838 *piOff = -1; |
| 4839 }else{ |
| 4840 u64 iVal; |
| 4841 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); |
| 4842 *piRowid += iVal; |
| 4843 } |
| 4844 } |
| 4845 |
| 4846 /* |
| 4847 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. |
| 4848 ** In this case the buffers consist of a delta-encoded list of rowids only. |
| 4849 */ |
| 4850 static void fts5MergeRowidLists( |
| 4851 Fts5Index *p, /* FTS5 backend object */ |
| 4852 Fts5Buffer *p1, /* First list to merge */ |
| 4853 Fts5Buffer *p2 /* Second list to merge */ |
| 4854 ){ |
| 4855 int i1 = 0; |
| 4856 int i2 = 0; |
| 4857 i64 iRowid1 = 0; |
| 4858 i64 iRowid2 = 0; |
| 4859 i64 iOut = 0; |
| 4860 |
| 4861 Fts5Buffer out; |
| 4862 memset(&out, 0, sizeof(out)); |
| 4863 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); |
| 4864 if( p->rc ) return; |
| 4865 |
| 4866 fts5NextRowid(p1, &i1, &iRowid1); |
| 4867 fts5NextRowid(p2, &i2, &iRowid2); |
| 4868 while( i1>=0 || i2>=0 ){ |
| 4869 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ |
| 4870 assert( iOut==0 || iRowid1>iOut ); |
| 4871 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); |
| 4872 iOut = iRowid1; |
| 4873 fts5NextRowid(p1, &i1, &iRowid1); |
| 4874 }else{ |
| 4875 assert( iOut==0 || iRowid2>iOut ); |
| 4876 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); |
| 4877 iOut = iRowid2; |
| 4878 if( i1>=0 && iRowid1==iRowid2 ){ |
| 4879 fts5NextRowid(p1, &i1, &iRowid1); |
| 4880 } |
| 4881 fts5NextRowid(p2, &i2, &iRowid2); |
| 4882 } |
| 4883 } |
| 4884 |
| 4885 fts5BufferSwap(&out, p1); |
| 4886 fts5BufferFree(&out); |
| 4887 } |
| 4888 |
| 4889 /* |
| 4890 ** Buffers p1 and p2 contain doclists. This function merges the content |
| 4891 ** of the two doclists together and sets buffer p1 to the result before |
| 4892 ** returning. |
| 4893 ** |
| 4894 ** If an error occurs, an error code is left in p->rc. If an error has |
| 4895 ** already occurred, this function is a no-op. |
| 4896 */ |
| 4897 static void fts5MergePrefixLists( |
| 4898 Fts5Index *p, /* FTS5 backend object */ |
| 4899 Fts5Buffer *p1, /* First list to merge */ |
| 4900 Fts5Buffer *p2 /* Second list to merge */ |
| 4901 ){ |
| 4902 if( p2->n ){ |
| 4903 i64 iLastRowid = 0; |
| 4904 Fts5DoclistIter i1; |
| 4905 Fts5DoclistIter i2; |
| 4906 Fts5Buffer out = {0, 0, 0}; |
| 4907 Fts5Buffer tmp = {0, 0, 0}; |
| 4908 |
| 4909 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return; |
| 4910 fts5DoclistIterInit(p1, &i1); |
| 4911 fts5DoclistIterInit(p2, &i2); |
| 4912 |
| 4913 while( 1 ){ |
| 4914 if( i1.iRowid<i2.iRowid ){ |
| 4915 /* Copy entry from i1 */ |
| 4916 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); |
| 4917 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize); |
| 4918 fts5DoclistIterNext(&i1); |
| 4919 if( i1.aPoslist==0 ) break; |
| 4920 } |
| 4921 else if( i2.iRowid!=i1.iRowid ){ |
| 4922 /* Copy entry from i2 */ |
| 4923 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 4924 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize); |
| 4925 fts5DoclistIterNext(&i2); |
| 4926 if( i2.aPoslist==0 ) break; |
| 4927 } |
| 4928 else{ |
| 4929 /* Merge the two position lists. */ |
| 4930 i64 iPos1 = 0; |
| 4931 i64 iPos2 = 0; |
| 4932 int iOff1 = 0; |
| 4933 int iOff2 = 0; |
| 4934 u8 *a1 = &i1.aPoslist[i1.nSize]; |
| 4935 u8 *a2 = &i2.aPoslist[i2.nSize]; |
| 4936 |
| 4937 i64 iPrev = 0; |
| 4938 Fts5PoslistWriter writer; |
| 4939 memset(&writer, 0, sizeof(writer)); |
| 4940 |
| 4941 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 4942 fts5BufferZero(&tmp); |
| 4943 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist); |
| 4944 if( p->rc ) break; |
| 4945 |
| 4946 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 4947 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 4948 assert( iPos1>=0 && iPos2>=0 ); |
| 4949 |
| 4950 if( iPos1<iPos2 ){ |
| 4951 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 4952 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 4953 }else{ |
| 4954 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 4955 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 4956 } |
| 4957 |
| 4958 if( iPos1>=0 && iPos2>=0 ){ |
| 4959 while( 1 ){ |
| 4960 if( iPos1<iPos2 ){ |
| 4961 if( iPos1!=iPrev ){ |
| 4962 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 4963 } |
| 4964 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); |
| 4965 if( iPos1<0 ) break; |
| 4966 }else{ |
| 4967 assert( iPos2!=iPrev ); |
| 4968 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 4969 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); |
| 4970 if( iPos2<0 ) break; |
| 4971 } |
| 4972 } |
| 4973 } |
| 4974 |
| 4975 if( iPos1>=0 ){ |
| 4976 if( iPos1!=iPrev ){ |
| 4977 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); |
| 4978 } |
| 4979 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1); |
| 4980 }else{ |
| 4981 assert( iPos2>=0 && iPos2!=iPrev ); |
| 4982 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); |
| 4983 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2); |
| 4984 } |
| 4985 |
| 4986 /* WRITEPOSLISTSIZE */ |
| 4987 fts5BufferSafeAppendVarint(&out, tmp.n * 2); |
| 4988 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); |
| 4989 fts5DoclistIterNext(&i1); |
| 4990 fts5DoclistIterNext(&i2); |
| 4991 if( i1.aPoslist==0 || i2.aPoslist==0 ) break; |
| 4992 } |
| 4993 } |
| 4994 |
| 4995 if( i1.aPoslist ){ |
| 4996 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); |
| 4997 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist); |
| 4998 } |
| 4999 else if( i2.aPoslist ){ |
| 5000 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); |
| 5001 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist); |
| 5002 } |
| 5003 |
| 5004 fts5BufferSet(&p->rc, p1, out.n, out.p); |
| 5005 fts5BufferFree(&tmp); |
| 5006 fts5BufferFree(&out); |
| 5007 } |
| 5008 } |
| 5009 |
| 5010 static void fts5SetupPrefixIter( |
| 5011 Fts5Index *p, /* Index to read from */ |
| 5012 int bDesc, /* True for "ORDER BY rowid DESC" */ |
| 5013 const u8 *pToken, /* Buffer containing prefix to match */ |
| 5014 int nToken, /* Size of buffer pToken in bytes */ |
| 5015 Fts5Colset *pColset, /* Restrict matches to these columns */ |
| 5016 Fts5Iter **ppIter /* OUT: New iterator */ |
| 5017 ){ |
| 5018 Fts5Structure *pStruct; |
| 5019 Fts5Buffer *aBuf; |
| 5020 const int nBuf = 32; |
| 5021 |
| 5022 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); |
| 5023 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*); |
| 5024 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 5025 xMerge = fts5MergeRowidLists; |
| 5026 xAppend = fts5AppendRowid; |
| 5027 }else{ |
| 5028 xMerge = fts5MergePrefixLists; |
| 5029 xAppend = fts5AppendPoslist; |
| 5030 } |
| 5031 |
| 5032 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); |
| 5033 pStruct = fts5StructureRead(p); |
| 5034 |
| 5035 if( aBuf && pStruct ){ |
| 5036 const int flags = FTS5INDEX_QUERY_SCAN |
| 5037 | FTS5INDEX_QUERY_SKIPEMPTY |
| 5038 | FTS5INDEX_QUERY_NOOUTPUT; |
| 5039 int i; |
| 5040 i64 iLastRowid = 0; |
| 5041 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ |
| 5042 Fts5Data *pData; |
| 5043 Fts5Buffer doclist; |
| 5044 int bNewTerm = 1; |
| 5045 |
| 5046 memset(&doclist, 0, sizeof(doclist)); |
| 5047 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); |
| 5048 fts5IterSetOutputCb(&p->rc, p1); |
| 5049 for( /* no-op */ ; |
| 5050 fts5MultiIterEof(p, p1)==0; |
| 5051 fts5MultiIterNext2(p, p1, &bNewTerm) |
| 5052 ){ |
| 5053 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; |
| 5054 int nTerm = pSeg->term.n; |
| 5055 const u8 *pTerm = pSeg->term.p; |
| 5056 p1->xSetOutputs(p1, pSeg); |
| 5057 |
| 5058 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); |
| 5059 if( bNewTerm ){ |
| 5060 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; |
| 5061 } |
| 5062 |
| 5063 if( p1->base.nData==0 ) continue; |
| 5064 |
| 5065 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ |
| 5066 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ |
| 5067 assert( i<nBuf ); |
| 5068 if( aBuf[i].n==0 ){ |
| 5069 fts5BufferSwap(&doclist, &aBuf[i]); |
| 5070 fts5BufferZero(&doclist); |
| 5071 }else{ |
| 5072 xMerge(p, &doclist, &aBuf[i]); |
| 5073 fts5BufferZero(&aBuf[i]); |
| 5074 } |
| 5075 } |
| 5076 iLastRowid = 0; |
| 5077 } |
| 5078 |
| 5079 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist); |
| 5080 iLastRowid = p1->base.iRowid; |
| 5081 } |
| 5082 |
| 5083 for(i=0; i<nBuf; i++){ |
| 5084 if( p->rc==SQLITE_OK ){ |
| 5085 xMerge(p, &doclist, &aBuf[i]); |
| 5086 } |
| 5087 fts5BufferFree(&aBuf[i]); |
| 5088 } |
| 5089 fts5MultiIterFree(p1); |
| 5090 |
| 5091 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); |
| 5092 if( pData ){ |
| 5093 pData->p = (u8*)&pData[1]; |
| 5094 pData->nn = pData->szLeaf = doclist.n; |
| 5095 memcpy(pData->p, doclist.p, doclist.n); |
| 5096 fts5MultiIterNew2(p, pData, bDesc, ppIter); |
| 5097 } |
| 5098 fts5BufferFree(&doclist); |
| 5099 } |
| 5100 |
| 5101 fts5StructureRelease(pStruct); |
| 5102 sqlite3_free(aBuf); |
| 5103 } |
| 5104 |
| 5105 |
| 5106 /* |
| 5107 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain |
| 5108 ** to the document with rowid iRowid. |
| 5109 */ |
| 5110 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ |
| 5111 assert( p->rc==SQLITE_OK ); |
| 5112 |
| 5113 /* Allocate the hash table if it has not already been allocated */ |
| 5114 if( p->pHash==0 ){ |
| 5115 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); |
| 5116 } |
| 5117 |
| 5118 /* Flush the hash table to disk if required */ |
| 5119 if( iRowid<p->iWriteRowid |
| 5120 || (iRowid==p->iWriteRowid && p->bDelete==0) |
| 5121 || (p->nPendingData > p->pConfig->nHashSize) |
| 5122 ){ |
| 5123 fts5IndexFlush(p); |
| 5124 } |
| 5125 |
| 5126 p->iWriteRowid = iRowid; |
| 5127 p->bDelete = bDelete; |
| 5128 return fts5IndexReturn(p); |
| 5129 } |
| 5130 |
| 5131 /* |
| 5132 ** Commit data to disk. |
| 5133 */ |
| 5134 int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ |
| 5135 assert( p->rc==SQLITE_OK ); |
| 5136 fts5IndexFlush(p); |
| 5137 if( bCommit ) fts5CloseReader(p); |
| 5138 return fts5IndexReturn(p); |
| 5139 } |
| 5140 |
| 5141 /* |
| 5142 ** Discard any data stored in the in-memory hash tables. Do not write it |
| 5143 ** to the database. Additionally, assume that the contents of the %_data |
| 5144 ** table may have changed on disk. So any in-memory caches of %_data |
| 5145 ** records must be invalidated. |
| 5146 */ |
| 5147 int sqlite3Fts5IndexRollback(Fts5Index *p){ |
| 5148 fts5CloseReader(p); |
| 5149 fts5IndexDiscardData(p); |
| 5150 fts5StructureInvalidate(p); |
| 5151 /* assert( p->rc==SQLITE_OK ); */ |
| 5152 return SQLITE_OK; |
| 5153 } |
| 5154 |
| 5155 /* |
| 5156 ** The %_data table is completely empty when this function is called. This |
| 5157 ** function populates it with the initial structure objects for each index, |
| 5158 ** and the initial version of the "averages" record (a zero-byte blob). |
| 5159 */ |
| 5160 int sqlite3Fts5IndexReinit(Fts5Index *p){ |
| 5161 Fts5Structure s; |
| 5162 fts5StructureInvalidate(p); |
| 5163 memset(&s, 0, sizeof(Fts5Structure)); |
| 5164 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); |
| 5165 fts5StructureWrite(p, &s); |
| 5166 return fts5IndexReturn(p); |
| 5167 } |
| 5168 |
| 5169 /* |
| 5170 ** Open a new Fts5Index handle. If the bCreate argument is true, create |
| 5171 ** and initialize the underlying %_data table. |
| 5172 ** |
| 5173 ** If successful, set *pp to point to the new object and return SQLITE_OK. |
| 5174 ** Otherwise, set *pp to NULL and return an SQLite error code. |
| 5175 */ |
| 5176 int sqlite3Fts5IndexOpen( |
| 5177 Fts5Config *pConfig, |
| 5178 int bCreate, |
| 5179 Fts5Index **pp, |
| 5180 char **pzErr |
| 5181 ){ |
| 5182 int rc = SQLITE_OK; |
| 5183 Fts5Index *p; /* New object */ |
| 5184 |
| 5185 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); |
| 5186 if( rc==SQLITE_OK ){ |
| 5187 p->pConfig = pConfig; |
| 5188 p->nWorkUnit = FTS5_WORK_UNIT; |
| 5189 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); |
| 5190 if( p->zDataTbl && bCreate ){ |
| 5191 rc = sqlite3Fts5CreateTable( |
| 5192 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr |
| 5193 ); |
| 5194 if( rc==SQLITE_OK ){ |
| 5195 rc = sqlite3Fts5CreateTable(pConfig, "idx", |
| 5196 "segid, term, pgno, PRIMARY KEY(segid, term)", |
| 5197 1, pzErr |
| 5198 ); |
| 5199 } |
| 5200 if( rc==SQLITE_OK ){ |
| 5201 rc = sqlite3Fts5IndexReinit(p); |
| 5202 } |
| 5203 } |
| 5204 } |
| 5205 |
| 5206 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); |
| 5207 if( rc ){ |
| 5208 sqlite3Fts5IndexClose(p); |
| 5209 *pp = 0; |
| 5210 } |
| 5211 return rc; |
| 5212 } |
| 5213 |
| 5214 /* |
| 5215 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). |
| 5216 */ |
| 5217 int sqlite3Fts5IndexClose(Fts5Index *p){ |
| 5218 int rc = SQLITE_OK; |
| 5219 if( p ){ |
| 5220 assert( p->pReader==0 ); |
| 5221 fts5StructureInvalidate(p); |
| 5222 sqlite3_finalize(p->pWriter); |
| 5223 sqlite3_finalize(p->pDeleter); |
| 5224 sqlite3_finalize(p->pIdxWriter); |
| 5225 sqlite3_finalize(p->pIdxDeleter); |
| 5226 sqlite3_finalize(p->pIdxSelect); |
| 5227 sqlite3_finalize(p->pDataVersion); |
| 5228 sqlite3Fts5HashFree(p->pHash); |
| 5229 sqlite3_free(p->zDataTbl); |
| 5230 sqlite3_free(p); |
| 5231 } |
| 5232 return rc; |
| 5233 } |
| 5234 |
| 5235 /* |
| 5236 ** Argument p points to a buffer containing utf-8 text that is n bytes in |
| 5237 ** size. Return the number of bytes in the nChar character prefix of the |
| 5238 ** buffer, or 0 if there are less than nChar characters in total. |
| 5239 */ |
| 5240 int sqlite3Fts5IndexCharlenToBytelen( |
| 5241 const char *p, |
| 5242 int nByte, |
| 5243 int nChar |
| 5244 ){ |
| 5245 int n = 0; |
| 5246 int i; |
| 5247 for(i=0; i<nChar; i++){ |
| 5248 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ |
| 5249 if( (unsigned char)p[n++]>=0xc0 ){ |
| 5250 while( (p[n] & 0xc0)==0x80 ) n++; |
| 5251 } |
| 5252 } |
| 5253 return n; |
| 5254 } |
| 5255 |
| 5256 /* |
| 5257 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of |
| 5258 ** unicode characters in the string. |
| 5259 */ |
| 5260 static int fts5IndexCharlen(const char *pIn, int nIn){ |
| 5261 int nChar = 0; |
| 5262 int i = 0; |
| 5263 while( i<nIn ){ |
| 5264 if( (unsigned char)pIn[i++]>=0xc0 ){ |
| 5265 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; |
| 5266 } |
| 5267 nChar++; |
| 5268 } |
| 5269 return nChar; |
| 5270 } |
| 5271 |
| 5272 /* |
| 5273 ** Insert or remove data to or from the index. Each time a document is |
| 5274 ** added to or removed from the index, this function is called one or more |
| 5275 ** times. |
| 5276 ** |
| 5277 ** For an insert, it must be called once for each token in the new document. |
| 5278 ** If the operation is a delete, it must be called (at least) once for each |
| 5279 ** unique token in the document with an iCol value less than zero. The iPos |
| 5280 ** argument is ignored for a delete. |
| 5281 */ |
| 5282 int sqlite3Fts5IndexWrite( |
| 5283 Fts5Index *p, /* Index to write to */ |
| 5284 int iCol, /* Column token appears in (-ve -> delete) */ |
| 5285 int iPos, /* Position of token within column */ |
| 5286 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 5287 ){ |
| 5288 int i; /* Used to iterate through indexes */ |
| 5289 int rc = SQLITE_OK; /* Return code */ |
| 5290 Fts5Config *pConfig = p->pConfig; |
| 5291 |
| 5292 assert( p->rc==SQLITE_OK ); |
| 5293 assert( (iCol<0)==p->bDelete ); |
| 5294 |
| 5295 /* Add the entry to the main terms index. */ |
| 5296 rc = sqlite3Fts5HashWrite( |
| 5297 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken |
| 5298 ); |
| 5299 |
| 5300 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ |
| 5301 const int nChar = pConfig->aPrefix[i]; |
| 5302 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); |
| 5303 if( nByte ){ |
| 5304 rc = sqlite3Fts5HashWrite(p->pHash, |
| 5305 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, |
| 5306 nByte |
| 5307 ); |
| 5308 } |
| 5309 } |
| 5310 |
| 5311 return rc; |
| 5312 } |
| 5313 |
| 5314 /* |
| 5315 ** Open a new iterator to iterate though all rowid that match the |
| 5316 ** specified token or token prefix. |
| 5317 */ |
| 5318 int sqlite3Fts5IndexQuery( |
| 5319 Fts5Index *p, /* FTS index to query */ |
| 5320 const char *pToken, int nToken, /* Token (or prefix) to query for */ |
| 5321 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ |
| 5322 Fts5Colset *pColset, /* Match these columns only */ |
| 5323 Fts5IndexIter **ppIter /* OUT: New iterator object */ |
| 5324 ){ |
| 5325 Fts5Config *pConfig = p->pConfig; |
| 5326 Fts5Iter *pRet = 0; |
| 5327 Fts5Buffer buf = {0, 0, 0}; |
| 5328 |
| 5329 /* If the QUERY_SCAN flag is set, all other flags must be clear. */ |
| 5330 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); |
| 5331 |
| 5332 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ |
| 5333 int iIdx = 0; /* Index to search */ |
| 5334 memcpy(&buf.p[1], pToken, nToken); |
| 5335 |
| 5336 /* Figure out which index to search and set iIdx accordingly. If this |
| 5337 ** is a prefix query for which there is no prefix index, set iIdx to |
| 5338 ** greater than pConfig->nPrefix to indicate that the query will be |
| 5339 ** satisfied by scanning multiple terms in the main index. |
| 5340 ** |
| 5341 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a |
| 5342 ** prefix-query. Instead of using a prefix-index (if one exists), |
| 5343 ** evaluate the prefix query using the main FTS index. This is used |
| 5344 ** for internal sanity checking by the integrity-check in debug |
| 5345 ** mode only. */ |
| 5346 #ifdef SQLITE_DEBUG |
| 5347 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ |
| 5348 assert( flags & FTS5INDEX_QUERY_PREFIX ); |
| 5349 iIdx = 1+pConfig->nPrefix; |
| 5350 }else |
| 5351 #endif |
| 5352 if( flags & FTS5INDEX_QUERY_PREFIX ){ |
| 5353 int nChar = fts5IndexCharlen(pToken, nToken); |
| 5354 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ |
| 5355 if( pConfig->aPrefix[iIdx-1]==nChar ) break; |
| 5356 } |
| 5357 } |
| 5358 |
| 5359 if( iIdx<=pConfig->nPrefix ){ |
| 5360 /* Straight index lookup */ |
| 5361 Fts5Structure *pStruct = fts5StructureRead(p); |
| 5362 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); |
| 5363 if( pStruct ){ |
| 5364 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, |
| 5365 pColset, buf.p, nToken+1, -1, 0, &pRet |
| 5366 ); |
| 5367 fts5StructureRelease(pStruct); |
| 5368 } |
| 5369 }else{ |
| 5370 /* Scan multiple terms in the main index */ |
| 5371 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; |
| 5372 buf.p[0] = FTS5_MAIN_PREFIX; |
| 5373 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); |
| 5374 assert( p->rc!=SQLITE_OK || pRet->pColset==0 ); |
| 5375 fts5IterSetOutputCb(&p->rc, pRet); |
| 5376 if( p->rc==SQLITE_OK ){ |
| 5377 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; |
| 5378 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); |
| 5379 } |
| 5380 } |
| 5381 |
| 5382 if( p->rc ){ |
| 5383 sqlite3Fts5IterClose(&pRet->base); |
| 5384 pRet = 0; |
| 5385 fts5CloseReader(p); |
| 5386 } |
| 5387 |
| 5388 *ppIter = &pRet->base; |
| 5389 sqlite3Fts5BufferFree(&buf); |
| 5390 } |
| 5391 return fts5IndexReturn(p); |
| 5392 } |
| 5393 |
| 5394 /* |
| 5395 ** Return true if the iterator passed as the only argument is at EOF. |
| 5396 */ |
| 5397 /* |
| 5398 ** Move to the next matching rowid. |
| 5399 */ |
| 5400 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ |
| 5401 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 5402 assert( pIter->pIndex->rc==SQLITE_OK ); |
| 5403 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); |
| 5404 return fts5IndexReturn(pIter->pIndex); |
| 5405 } |
| 5406 |
| 5407 /* |
| 5408 ** Move to the next matching term/rowid. Used by the fts5vocab module. |
| 5409 */ |
| 5410 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ |
| 5411 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 5412 Fts5Index *p = pIter->pIndex; |
| 5413 |
| 5414 assert( pIter->pIndex->rc==SQLITE_OK ); |
| 5415 |
| 5416 fts5MultiIterNext(p, pIter, 0, 0); |
| 5417 if( p->rc==SQLITE_OK ){ |
| 5418 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
| 5419 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ |
| 5420 fts5DataRelease(pSeg->pLeaf); |
| 5421 pSeg->pLeaf = 0; |
| 5422 pIter->base.bEof = 1; |
| 5423 } |
| 5424 } |
| 5425 |
| 5426 return fts5IndexReturn(pIter->pIndex); |
| 5427 } |
| 5428 |
| 5429 /* |
| 5430 ** Move to the next matching rowid that occurs at or after iMatch. The |
| 5431 ** definition of "at or after" depends on whether this iterator iterates |
| 5432 ** in ascending or descending rowid order. |
| 5433 */ |
| 5434 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ |
| 5435 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 5436 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); |
| 5437 return fts5IndexReturn(pIter->pIndex); |
| 5438 } |
| 5439 |
| 5440 /* |
| 5441 ** Return the current term. |
| 5442 */ |
| 5443 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ |
| 5444 int n; |
| 5445 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); |
| 5446 *pn = n-1; |
| 5447 return &z[1]; |
| 5448 } |
| 5449 |
| 5450 /* |
| 5451 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). |
| 5452 */ |
| 5453 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ |
| 5454 if( pIndexIter ){ |
| 5455 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
| 5456 Fts5Index *pIndex = pIter->pIndex; |
| 5457 fts5MultiIterFree(pIter); |
| 5458 fts5CloseReader(pIndex); |
| 5459 } |
| 5460 } |
| 5461 |
| 5462 /* |
| 5463 ** Read and decode the "averages" record from the database. |
| 5464 ** |
| 5465 ** Parameter anSize must point to an array of size nCol, where nCol is |
| 5466 ** the number of user defined columns in the FTS table. |
| 5467 */ |
| 5468 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ |
| 5469 int nCol = p->pConfig->nCol; |
| 5470 Fts5Data *pData; |
| 5471 |
| 5472 *pnRow = 0; |
| 5473 memset(anSize, 0, sizeof(i64) * nCol); |
| 5474 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); |
| 5475 if( p->rc==SQLITE_OK && pData->nn ){ |
| 5476 int i = 0; |
| 5477 int iCol; |
| 5478 i += fts5GetVarint(&pData->p[i], (u64*)pnRow); |
| 5479 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ |
| 5480 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); |
| 5481 } |
| 5482 } |
| 5483 |
| 5484 fts5DataRelease(pData); |
| 5485 return fts5IndexReturn(p); |
| 5486 } |
| 5487 |
| 5488 /* |
| 5489 ** Replace the current "averages" record with the contents of the buffer |
| 5490 ** supplied as the second argument. |
| 5491 */ |
| 5492 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ |
| 5493 assert( p->rc==SQLITE_OK ); |
| 5494 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); |
| 5495 return fts5IndexReturn(p); |
| 5496 } |
| 5497 |
| 5498 /* |
| 5499 ** Return the total number of blocks this module has read from the %_data |
| 5500 ** table since it was created. |
| 5501 */ |
| 5502 int sqlite3Fts5IndexReads(Fts5Index *p){ |
| 5503 return p->nRead; |
| 5504 } |
| 5505 |
| 5506 /* |
| 5507 ** Set the 32-bit cookie value stored at the start of all structure |
| 5508 ** records to the value passed as the second argument. |
| 5509 ** |
| 5510 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 5511 ** occurs. |
| 5512 */ |
| 5513 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ |
| 5514 int rc; /* Return code */ |
| 5515 Fts5Config *pConfig = p->pConfig; /* Configuration object */ |
| 5516 u8 aCookie[4]; /* Binary representation of iNew */ |
| 5517 sqlite3_blob *pBlob = 0; |
| 5518 |
| 5519 assert( p->rc==SQLITE_OK ); |
| 5520 sqlite3Fts5Put32(aCookie, iNew); |
| 5521 |
| 5522 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, |
| 5523 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob |
| 5524 ); |
| 5525 if( rc==SQLITE_OK ){ |
| 5526 sqlite3_blob_write(pBlob, aCookie, 4, 0); |
| 5527 rc = sqlite3_blob_close(pBlob); |
| 5528 } |
| 5529 |
| 5530 return rc; |
| 5531 } |
| 5532 |
| 5533 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ |
| 5534 Fts5Structure *pStruct; |
| 5535 pStruct = fts5StructureRead(p); |
| 5536 fts5StructureRelease(pStruct); |
| 5537 return fts5IndexReturn(p); |
| 5538 } |
| 5539 |
| 5540 |
| 5541 /************************************************************************* |
| 5542 ************************************************************************** |
| 5543 ** Below this point is the implementation of the integrity-check |
| 5544 ** functionality. |
| 5545 */ |
| 5546 |
| 5547 /* |
| 5548 ** Return a simple checksum value based on the arguments. |
| 5549 */ |
| 5550 u64 sqlite3Fts5IndexEntryCksum( |
| 5551 i64 iRowid, |
| 5552 int iCol, |
| 5553 int iPos, |
| 5554 int iIdx, |
| 5555 const char *pTerm, |
| 5556 int nTerm |
| 5557 ){ |
| 5558 int i; |
| 5559 u64 ret = iRowid; |
| 5560 ret += (ret<<3) + iCol; |
| 5561 ret += (ret<<3) + iPos; |
| 5562 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); |
| 5563 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; |
| 5564 return ret; |
| 5565 } |
| 5566 |
| 5567 #ifdef SQLITE_DEBUG |
| 5568 /* |
| 5569 ** This function is purely an internal test. It does not contribute to |
| 5570 ** FTS functionality, or even the integrity-check, in any way. |
| 5571 ** |
| 5572 ** Instead, it tests that the same set of pgno/rowid combinations are |
| 5573 ** visited regardless of whether the doclist-index identified by parameters |
| 5574 ** iSegid/iLeaf is iterated in forwards or reverse order. |
| 5575 */ |
| 5576 static void fts5TestDlidxReverse( |
| 5577 Fts5Index *p, |
| 5578 int iSegid, /* Segment id to load from */ |
| 5579 int iLeaf /* Load doclist-index for this leaf */ |
| 5580 ){ |
| 5581 Fts5DlidxIter *pDlidx = 0; |
| 5582 u64 cksum1 = 13; |
| 5583 u64 cksum2 = 13; |
| 5584 |
| 5585 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); |
| 5586 fts5DlidxIterEof(p, pDlidx)==0; |
| 5587 fts5DlidxIterNext(p, pDlidx) |
| 5588 ){ |
| 5589 i64 iRowid = fts5DlidxIterRowid(pDlidx); |
| 5590 int pgno = fts5DlidxIterPgno(pDlidx); |
| 5591 assert( pgno>iLeaf ); |
| 5592 cksum1 += iRowid + ((i64)pgno<<32); |
| 5593 } |
| 5594 fts5DlidxIterFree(pDlidx); |
| 5595 pDlidx = 0; |
| 5596 |
| 5597 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); |
| 5598 fts5DlidxIterEof(p, pDlidx)==0; |
| 5599 fts5DlidxIterPrev(p, pDlidx) |
| 5600 ){ |
| 5601 i64 iRowid = fts5DlidxIterRowid(pDlidx); |
| 5602 int pgno = fts5DlidxIterPgno(pDlidx); |
| 5603 assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); |
| 5604 cksum2 += iRowid + ((i64)pgno<<32); |
| 5605 } |
| 5606 fts5DlidxIterFree(pDlidx); |
| 5607 pDlidx = 0; |
| 5608 |
| 5609 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; |
| 5610 } |
| 5611 |
| 5612 static int fts5QueryCksum( |
| 5613 Fts5Index *p, /* Fts5 index object */ |
| 5614 int iIdx, |
| 5615 const char *z, /* Index key to query for */ |
| 5616 int n, /* Size of index key in bytes */ |
| 5617 int flags, /* Flags for Fts5IndexQuery */ |
| 5618 u64 *pCksum /* IN/OUT: Checksum value */ |
| 5619 ){ |
| 5620 int eDetail = p->pConfig->eDetail; |
| 5621 u64 cksum = *pCksum; |
| 5622 Fts5IndexIter *pIter = 0; |
| 5623 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); |
| 5624 |
| 5625 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ |
| 5626 i64 rowid = pIter->iRowid; |
| 5627 |
| 5628 if( eDetail==FTS5_DETAIL_NONE ){ |
| 5629 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); |
| 5630 }else{ |
| 5631 Fts5PoslistReader sReader; |
| 5632 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); |
| 5633 sReader.bEof==0; |
| 5634 sqlite3Fts5PoslistReaderNext(&sReader) |
| 5635 ){ |
| 5636 int iCol = FTS5_POS2COLUMN(sReader.iPos); |
| 5637 int iOff = FTS5_POS2OFFSET(sReader.iPos); |
| 5638 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); |
| 5639 } |
| 5640 } |
| 5641 if( rc==SQLITE_OK ){ |
| 5642 rc = sqlite3Fts5IterNext(pIter); |
| 5643 } |
| 5644 } |
| 5645 sqlite3Fts5IterClose(pIter); |
| 5646 |
| 5647 *pCksum = cksum; |
| 5648 return rc; |
| 5649 } |
| 5650 |
| 5651 |
| 5652 /* |
| 5653 ** This function is also purely an internal test. It does not contribute to |
| 5654 ** FTS functionality, or even the integrity-check, in any way. |
| 5655 */ |
| 5656 static void fts5TestTerm( |
| 5657 Fts5Index *p, |
| 5658 Fts5Buffer *pPrev, /* Previous term */ |
| 5659 const char *z, int n, /* Possibly new term to test */ |
| 5660 u64 expected, |
| 5661 u64 *pCksum |
| 5662 ){ |
| 5663 int rc = p->rc; |
| 5664 if( pPrev->n==0 ){ |
| 5665 fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
| 5666 }else |
| 5667 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ |
| 5668 u64 cksum3 = *pCksum; |
| 5669 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ |
| 5670 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ |
| 5671 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); |
| 5672 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); |
| 5673 u64 ck1 = 0; |
| 5674 u64 ck2 = 0; |
| 5675 |
| 5676 /* Check that the results returned for ASC and DESC queries are |
| 5677 ** the same. If not, call this corruption. */ |
| 5678 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); |
| 5679 if( rc==SQLITE_OK ){ |
| 5680 int f = flags|FTS5INDEX_QUERY_DESC; |
| 5681 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 5682 } |
| 5683 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 5684 |
| 5685 /* If this is a prefix query, check that the results returned if the |
| 5686 ** the index is disabled are the same. In both ASC and DESC order. |
| 5687 ** |
| 5688 ** This check may only be performed if the hash table is empty. This |
| 5689 ** is because the hash table only supports a single scan query at |
| 5690 ** a time, and the multi-iter loop from which this function is called |
| 5691 ** is already performing such a scan. */ |
| 5692 if( p->nPendingData==0 ){ |
| 5693 if( iIdx>0 && rc==SQLITE_OK ){ |
| 5694 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; |
| 5695 ck2 = 0; |
| 5696 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 5697 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 5698 } |
| 5699 if( iIdx>0 && rc==SQLITE_OK ){ |
| 5700 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; |
| 5701 ck2 = 0; |
| 5702 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
| 5703 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
| 5704 } |
| 5705 } |
| 5706 |
| 5707 cksum3 ^= ck1; |
| 5708 fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
| 5709 |
| 5710 if( rc==SQLITE_OK && cksum3!=expected ){ |
| 5711 rc = FTS5_CORRUPT; |
| 5712 } |
| 5713 *pCksum = cksum3; |
| 5714 } |
| 5715 p->rc = rc; |
| 5716 } |
| 5717 |
| 5718 #else |
| 5719 # define fts5TestDlidxReverse(x,y,z) |
| 5720 # define fts5TestTerm(u,v,w,x,y,z) |
| 5721 #endif |
| 5722 |
| 5723 /* |
| 5724 ** Check that: |
| 5725 ** |
| 5726 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and |
| 5727 ** contain zero terms. |
| 5728 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and |
| 5729 ** contain zero rowids. |
| 5730 */ |
| 5731 static void fts5IndexIntegrityCheckEmpty( |
| 5732 Fts5Index *p, |
| 5733 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ |
| 5734 int iFirst, |
| 5735 int iNoRowid, |
| 5736 int iLast |
| 5737 ){ |
| 5738 int i; |
| 5739 |
| 5740 /* Now check that the iter.nEmpty leaves following the current leaf |
| 5741 ** (a) exist and (b) contain no terms. */ |
| 5742 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ |
| 5743 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); |
| 5744 if( pLeaf ){ |
| 5745 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; |
| 5746 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; |
| 5747 } |
| 5748 fts5DataRelease(pLeaf); |
| 5749 } |
| 5750 } |
| 5751 |
| 5752 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ |
| 5753 int iTermOff = 0; |
| 5754 int ii; |
| 5755 |
| 5756 Fts5Buffer buf1 = {0,0,0}; |
| 5757 Fts5Buffer buf2 = {0,0,0}; |
| 5758 |
| 5759 ii = pLeaf->szLeaf; |
| 5760 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ |
| 5761 int res; |
| 5762 int iOff; |
| 5763 int nIncr; |
| 5764 |
| 5765 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); |
| 5766 iTermOff += nIncr; |
| 5767 iOff = iTermOff; |
| 5768 |
| 5769 if( iOff>=pLeaf->szLeaf ){ |
| 5770 p->rc = FTS5_CORRUPT; |
| 5771 }else if( iTermOff==nIncr ){ |
| 5772 int nByte; |
| 5773 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
| 5774 if( (iOff+nByte)>pLeaf->szLeaf ){ |
| 5775 p->rc = FTS5_CORRUPT; |
| 5776 }else{ |
| 5777 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
| 5778 } |
| 5779 }else{ |
| 5780 int nKeep, nByte; |
| 5781 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); |
| 5782 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
| 5783 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ |
| 5784 p->rc = FTS5_CORRUPT; |
| 5785 }else{ |
| 5786 buf1.n = nKeep; |
| 5787 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
| 5788 } |
| 5789 |
| 5790 if( p->rc==SQLITE_OK ){ |
| 5791 res = fts5BufferCompare(&buf1, &buf2); |
| 5792 if( res<=0 ) p->rc = FTS5_CORRUPT; |
| 5793 } |
| 5794 } |
| 5795 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); |
| 5796 } |
| 5797 |
| 5798 fts5BufferFree(&buf1); |
| 5799 fts5BufferFree(&buf2); |
| 5800 } |
| 5801 |
| 5802 static void fts5IndexIntegrityCheckSegment( |
| 5803 Fts5Index *p, /* FTS5 backend object */ |
| 5804 Fts5StructureSegment *pSeg /* Segment to check internal consistency */ |
| 5805 ){ |
| 5806 Fts5Config *pConfig = p->pConfig; |
| 5807 sqlite3_stmt *pStmt = 0; |
| 5808 int rc2; |
| 5809 int iIdxPrevLeaf = pSeg->pgnoFirst-1; |
| 5810 int iDlidxPrevLeaf = pSeg->pgnoLast; |
| 5811 |
| 5812 if( pSeg->pgnoFirst==0 ) return; |
| 5813 |
| 5814 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( |
| 5815 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d", |
| 5816 pConfig->zDb, pConfig->zName, pSeg->iSegid |
| 5817 )); |
| 5818 |
| 5819 /* Iterate through the b-tree hierarchy. */ |
| 5820 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 5821 i64 iRow; /* Rowid for this leaf */ |
| 5822 Fts5Data *pLeaf; /* Data for this leaf */ |
| 5823 |
| 5824 int nIdxTerm = sqlite3_column_bytes(pStmt, 1); |
| 5825 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); |
| 5826 int iIdxLeaf = sqlite3_column_int(pStmt, 2); |
| 5827 int bIdxDlidx = sqlite3_column_int(pStmt, 3); |
| 5828 |
| 5829 /* If the leaf in question has already been trimmed from the segment, |
| 5830 ** ignore this b-tree entry. Otherwise, load it into memory. */ |
| 5831 if( iIdxLeaf<pSeg->pgnoFirst ) continue; |
| 5832 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); |
| 5833 pLeaf = fts5LeafRead(p, iRow); |
| 5834 if( pLeaf==0 ) break; |
| 5835 |
| 5836 /* Check that the leaf contains at least one term, and that it is equal |
| 5837 ** to or larger than the split-key in zIdxTerm. Also check that if there |
| 5838 ** is also a rowid pointer within the leaf page header, it points to a |
| 5839 ** location before the term. */ |
| 5840 if( pLeaf->nn<=pLeaf->szLeaf ){ |
| 5841 p->rc = FTS5_CORRUPT; |
| 5842 }else{ |
| 5843 int iOff; /* Offset of first term on leaf */ |
| 5844 int iRowidOff; /* Offset of first rowid on leaf */ |
| 5845 int nTerm; /* Size of term on leaf in bytes */ |
| 5846 int res; /* Comparison of term and split-key */ |
| 5847 |
| 5848 iOff = fts5LeafFirstTermOff(pLeaf); |
| 5849 iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
| 5850 if( iRowidOff>=iOff ){ |
| 5851 p->rc = FTS5_CORRUPT; |
| 5852 }else{ |
| 5853 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); |
| 5854 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); |
| 5855 if( res==0 ) res = nTerm - nIdxTerm; |
| 5856 if( res<0 ) p->rc = FTS5_CORRUPT; |
| 5857 } |
| 5858 |
| 5859 fts5IntegrityCheckPgidx(p, pLeaf); |
| 5860 } |
| 5861 fts5DataRelease(pLeaf); |
| 5862 if( p->rc ) break; |
| 5863 |
| 5864 /* Now check that the iter.nEmpty leaves following the current leaf |
| 5865 ** (a) exist and (b) contain no terms. */ |
| 5866 fts5IndexIntegrityCheckEmpty( |
| 5867 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 |
| 5868 ); |
| 5869 if( p->rc ) break; |
| 5870 |
| 5871 /* If there is a doclist-index, check that it looks right. */ |
| 5872 if( bIdxDlidx ){ |
| 5873 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ |
| 5874 int iPrevLeaf = iIdxLeaf; |
| 5875 int iSegid = pSeg->iSegid; |
| 5876 int iPg = 0; |
| 5877 i64 iKey; |
| 5878 |
| 5879 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); |
| 5880 fts5DlidxIterEof(p, pDlidx)==0; |
| 5881 fts5DlidxIterNext(p, pDlidx) |
| 5882 ){ |
| 5883 |
| 5884 /* Check any rowid-less pages that occur before the current leaf. */ |
| 5885 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ |
| 5886 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); |
| 5887 pLeaf = fts5DataRead(p, iKey); |
| 5888 if( pLeaf ){ |
| 5889 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; |
| 5890 fts5DataRelease(pLeaf); |
| 5891 } |
| 5892 } |
| 5893 iPrevLeaf = fts5DlidxIterPgno(pDlidx); |
| 5894 |
| 5895 /* Check that the leaf page indicated by the iterator really does |
| 5896 ** contain the rowid suggested by the same. */ |
| 5897 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); |
| 5898 pLeaf = fts5DataRead(p, iKey); |
| 5899 if( pLeaf ){ |
| 5900 i64 iRowid; |
| 5901 int iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
| 5902 ASSERT_SZLEAF_OK(pLeaf); |
| 5903 if( iRowidOff>=pLeaf->szLeaf ){ |
| 5904 p->rc = FTS5_CORRUPT; |
| 5905 }else{ |
| 5906 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); |
| 5907 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; |
| 5908 } |
| 5909 fts5DataRelease(pLeaf); |
| 5910 } |
| 5911 } |
| 5912 |
| 5913 iDlidxPrevLeaf = iPg; |
| 5914 fts5DlidxIterFree(pDlidx); |
| 5915 fts5TestDlidxReverse(p, iSegid, iIdxLeaf); |
| 5916 }else{ |
| 5917 iDlidxPrevLeaf = pSeg->pgnoLast; |
| 5918 /* TODO: Check there is no doclist index */ |
| 5919 } |
| 5920 |
| 5921 iIdxPrevLeaf = iIdxLeaf; |
| 5922 } |
| 5923 |
| 5924 rc2 = sqlite3_finalize(pStmt); |
| 5925 if( p->rc==SQLITE_OK ) p->rc = rc2; |
| 5926 |
| 5927 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ |
| 5928 #if 0 |
| 5929 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ |
| 5930 p->rc = FTS5_CORRUPT; |
| 5931 } |
| 5932 #endif |
| 5933 } |
| 5934 |
| 5935 |
| 5936 /* |
| 5937 ** Run internal checks to ensure that the FTS index (a) is internally |
| 5938 ** consistent and (b) contains entries for which the XOR of the checksums |
| 5939 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. |
| 5940 ** |
| 5941 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the |
| 5942 ** checksum does not match. Return SQLITE_OK if all checks pass without |
| 5943 ** error, or some other SQLite error code if another error (e.g. OOM) |
| 5944 ** occurs. |
| 5945 */ |
| 5946 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ |
| 5947 int eDetail = p->pConfig->eDetail; |
| 5948 u64 cksum2 = 0; /* Checksum based on contents of indexes */ |
| 5949 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ |
| 5950 Fts5Iter *pIter; /* Used to iterate through entire index */ |
| 5951 Fts5Structure *pStruct; /* Index structure */ |
| 5952 |
| 5953 #ifdef SQLITE_DEBUG |
| 5954 /* Used by extra internal tests only run if NDEBUG is not defined */ |
| 5955 u64 cksum3 = 0; /* Checksum based on contents of indexes */ |
| 5956 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ |
| 5957 #endif |
| 5958 const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
| 5959 |
| 5960 /* Load the FTS index structure */ |
| 5961 pStruct = fts5StructureRead(p); |
| 5962 |
| 5963 /* Check that the internal nodes of each segment match the leaves */ |
| 5964 if( pStruct ){ |
| 5965 int iLvl, iSeg; |
| 5966 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
| 5967 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
| 5968 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; |
| 5969 fts5IndexIntegrityCheckSegment(p, pSeg); |
| 5970 } |
| 5971 } |
| 5972 } |
| 5973 |
| 5974 /* The cksum argument passed to this function is a checksum calculated |
| 5975 ** based on all expected entries in the FTS index (including prefix index |
| 5976 ** entries). This block checks that a checksum calculated based on the |
| 5977 ** actual contents of FTS index is identical. |
| 5978 ** |
| 5979 ** Two versions of the same checksum are calculated. The first (stack |
| 5980 ** variable cksum2) based on entries extracted from the full-text index |
| 5981 ** while doing a linear scan of each individual index in turn. |
| 5982 ** |
| 5983 ** As each term visited by the linear scans, a separate query for the |
| 5984 ** same term is performed. cksum3 is calculated based on the entries |
| 5985 ** extracted by these queries. |
| 5986 */ |
| 5987 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); |
| 5988 fts5MultiIterEof(p, pIter)==0; |
| 5989 fts5MultiIterNext(p, pIter, 0, 0) |
| 5990 ){ |
| 5991 int n; /* Size of term in bytes */ |
| 5992 i64 iPos = 0; /* Position read from poslist */ |
| 5993 int iOff = 0; /* Offset within poslist */ |
| 5994 i64 iRowid = fts5MultiIterRowid(pIter); |
| 5995 char *z = (char*)fts5MultiIterTerm(pIter, &n); |
| 5996 |
| 5997 /* If this is a new term, query for it. Update cksum3 with the results. */ |
| 5998 fts5TestTerm(p, &term, z, n, cksum2, &cksum3); |
| 5999 |
| 6000 if( eDetail==FTS5_DETAIL_NONE ){ |
| 6001 if( 0==fts5MultiIterIsEmpty(p, pIter) ){ |
| 6002 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); |
| 6003 } |
| 6004 }else{ |
| 6005 poslist.n = 0; |
| 6006 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); |
| 6007 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ |
| 6008 int iCol = FTS5_POS2COLUMN(iPos); |
| 6009 int iTokOff = FTS5_POS2OFFSET(iPos); |
| 6010 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); |
| 6011 } |
| 6012 } |
| 6013 } |
| 6014 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); |
| 6015 |
| 6016 fts5MultiIterFree(pIter); |
| 6017 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; |
| 6018 |
| 6019 fts5StructureRelease(pStruct); |
| 6020 #ifdef SQLITE_DEBUG |
| 6021 fts5BufferFree(&term); |
| 6022 #endif |
| 6023 fts5BufferFree(&poslist); |
| 6024 return fts5IndexReturn(p); |
| 6025 } |
| 6026 |
| 6027 /************************************************************************* |
| 6028 ************************************************************************** |
| 6029 ** Below this point is the implementation of the fts5_decode() scalar |
| 6030 ** function only. |
| 6031 */ |
| 6032 |
| 6033 /* |
| 6034 ** Decode a segment-data rowid from the %_data table. This function is |
| 6035 ** the opposite of macro FTS5_SEGMENT_ROWID(). |
| 6036 */ |
| 6037 static void fts5DecodeRowid( |
| 6038 i64 iRowid, /* Rowid from %_data table */ |
| 6039 int *piSegid, /* OUT: Segment id */ |
| 6040 int *pbDlidx, /* OUT: Dlidx flag */ |
| 6041 int *piHeight, /* OUT: Height */ |
| 6042 int *piPgno /* OUT: Page number */ |
| 6043 ){ |
| 6044 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); |
| 6045 iRowid >>= FTS5_DATA_PAGE_B; |
| 6046 |
| 6047 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); |
| 6048 iRowid >>= FTS5_DATA_HEIGHT_B; |
| 6049 |
| 6050 *pbDlidx = (int)(iRowid & 0x0001); |
| 6051 iRowid >>= FTS5_DATA_DLI_B; |
| 6052 |
| 6053 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); |
| 6054 } |
| 6055 |
| 6056 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ |
| 6057 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ |
| 6058 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); |
| 6059 |
| 6060 if( iSegid==0 ){ |
| 6061 if( iKey==FTS5_AVERAGES_ROWID ){ |
| 6062 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); |
| 6063 }else{ |
| 6064 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); |
| 6065 } |
| 6066 } |
| 6067 else{ |
| 6068 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}", |
| 6069 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno |
| 6070 ); |
| 6071 } |
| 6072 } |
| 6073 |
| 6074 static void fts5DebugStructure( |
| 6075 int *pRc, /* IN/OUT: error code */ |
| 6076 Fts5Buffer *pBuf, |
| 6077 Fts5Structure *p |
| 6078 ){ |
| 6079 int iLvl, iSeg; /* Iterate through levels, segments */ |
| 6080 |
| 6081 for(iLvl=0; iLvl<p->nLevel; iLvl++){ |
| 6082 Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; |
| 6083 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, |
| 6084 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg |
| 6085 ); |
| 6086 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
| 6087 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
| 6088 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", |
| 6089 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast |
| 6090 ); |
| 6091 } |
| 6092 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); |
| 6093 } |
| 6094 } |
| 6095 |
| 6096 /* |
| 6097 ** This is part of the fts5_decode() debugging aid. |
| 6098 ** |
| 6099 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This |
| 6100 ** function appends a human-readable representation of the same object |
| 6101 ** to the buffer passed as the second argument. |
| 6102 */ |
| 6103 static void fts5DecodeStructure( |
| 6104 int *pRc, /* IN/OUT: error code */ |
| 6105 Fts5Buffer *pBuf, |
| 6106 const u8 *pBlob, int nBlob |
| 6107 ){ |
| 6108 int rc; /* Return code */ |
| 6109 Fts5Structure *p = 0; /* Decoded structure object */ |
| 6110 |
| 6111 rc = fts5StructureDecode(pBlob, nBlob, 0, &p); |
| 6112 if( rc!=SQLITE_OK ){ |
| 6113 *pRc = rc; |
| 6114 return; |
| 6115 } |
| 6116 |
| 6117 fts5DebugStructure(pRc, pBuf, p); |
| 6118 fts5StructureRelease(p); |
| 6119 } |
| 6120 |
| 6121 /* |
| 6122 ** This is part of the fts5_decode() debugging aid. |
| 6123 ** |
| 6124 ** Arguments pBlob/nBlob contain an "averages" record. This function |
| 6125 ** appends a human-readable representation of record to the buffer passed |
| 6126 ** as the second argument. |
| 6127 */ |
| 6128 static void fts5DecodeAverages( |
| 6129 int *pRc, /* IN/OUT: error code */ |
| 6130 Fts5Buffer *pBuf, |
| 6131 const u8 *pBlob, int nBlob |
| 6132 ){ |
| 6133 int i = 0; |
| 6134 const char *zSpace = ""; |
| 6135 |
| 6136 while( i<nBlob ){ |
| 6137 u64 iVal; |
| 6138 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); |
| 6139 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); |
| 6140 zSpace = " "; |
| 6141 } |
| 6142 } |
| 6143 |
| 6144 /* |
| 6145 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read |
| 6146 ** each varint and append its string representation to buffer pBuf. Return |
| 6147 ** after either the input buffer is exhausted or a 0 value is read. |
| 6148 ** |
| 6149 ** The return value is the number of bytes read from the input buffer. |
| 6150 */ |
| 6151 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
| 6152 int iOff = 0; |
| 6153 while( iOff<n ){ |
| 6154 int iVal; |
| 6155 iOff += fts5GetVarint32(&a[iOff], iVal); |
| 6156 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); |
| 6157 } |
| 6158 return iOff; |
| 6159 } |
| 6160 |
| 6161 /* |
| 6162 ** The start of buffer (a/n) contains the start of a doclist. The doclist |
| 6163 ** may or may not finish within the buffer. This function appends a text |
| 6164 ** representation of the part of the doclist that is present to buffer |
| 6165 ** pBuf. |
| 6166 ** |
| 6167 ** The return value is the number of bytes read from the input buffer. |
| 6168 */ |
| 6169 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
| 6170 i64 iDocid = 0; |
| 6171 int iOff = 0; |
| 6172 |
| 6173 if( n>0 ){ |
| 6174 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); |
| 6175 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); |
| 6176 } |
| 6177 while( iOff<n ){ |
| 6178 int nPos; |
| 6179 int bDel; |
| 6180 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); |
| 6181 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); |
| 6182 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); |
| 6183 if( iOff<n ){ |
| 6184 i64 iDelta; |
| 6185 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); |
| 6186 iDocid += iDelta; |
| 6187 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); |
| 6188 } |
| 6189 } |
| 6190 |
| 6191 return iOff; |
| 6192 } |
| 6193 |
| 6194 /* |
| 6195 ** This function is part of the fts5_decode() debugging function. It is |
| 6196 ** only ever used with detail=none tables. |
| 6197 ** |
| 6198 ** Buffer (pData/nData) contains a doclist in the format used by detail=none |
| 6199 ** tables. This function appends a human-readable version of that list to |
| 6200 ** buffer pBuf. |
| 6201 ** |
| 6202 ** If *pRc is other than SQLITE_OK when this function is called, it is a |
| 6203 ** no-op. If an OOM or other error occurs within this function, *pRc is |
| 6204 ** set to an SQLite error code before returning. The final state of buffer |
| 6205 ** pBuf is undefined in this case. |
| 6206 */ |
| 6207 static void fts5DecodeRowidList( |
| 6208 int *pRc, /* IN/OUT: Error code */ |
| 6209 Fts5Buffer *pBuf, /* Buffer to append text to */ |
| 6210 const u8 *pData, int nData /* Data to decode list-of-rowids from */ |
| 6211 ){ |
| 6212 int i = 0; |
| 6213 i64 iRowid = 0; |
| 6214 |
| 6215 while( i<nData ){ |
| 6216 const char *zApp = ""; |
| 6217 u64 iVal; |
| 6218 i += sqlite3Fts5GetVarint(&pData[i], &iVal); |
| 6219 iRowid += iVal; |
| 6220 |
| 6221 if( i<nData && pData[i]==0x00 ){ |
| 6222 i++; |
| 6223 if( i<nData && pData[i]==0x00 ){ |
| 6224 i++; |
| 6225 zApp = "+"; |
| 6226 }else{ |
| 6227 zApp = "*"; |
| 6228 } |
| 6229 } |
| 6230 |
| 6231 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); |
| 6232 } |
| 6233 } |
| 6234 |
| 6235 /* |
| 6236 ** The implementation of user-defined scalar function fts5_decode(). |
| 6237 */ |
| 6238 static void fts5DecodeFunction( |
| 6239 sqlite3_context *pCtx, /* Function call context */ |
| 6240 int nArg, /* Number of args (always 2) */ |
| 6241 sqlite3_value **apVal /* Function arguments */ |
| 6242 ){ |
| 6243 i64 iRowid; /* Rowid for record being decoded */ |
| 6244 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ |
| 6245 const u8 *aBlob; int n; /* Record to decode */ |
| 6246 u8 *a = 0; |
| 6247 Fts5Buffer s; /* Build up text to return here */ |
| 6248 int rc = SQLITE_OK; /* Return code */ |
| 6249 int nSpace = 0; |
| 6250 int eDetailNone = (sqlite3_user_data(pCtx)!=0); |
| 6251 |
| 6252 assert( nArg==2 ); |
| 6253 UNUSED_PARAM(nArg); |
| 6254 memset(&s, 0, sizeof(Fts5Buffer)); |
| 6255 iRowid = sqlite3_value_int64(apVal[0]); |
| 6256 |
| 6257 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] |
| 6258 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents |
| 6259 ** buffer overreads even if the record is corrupt. */ |
| 6260 n = sqlite3_value_bytes(apVal[1]); |
| 6261 aBlob = sqlite3_value_blob(apVal[1]); |
| 6262 nSpace = n + FTS5_DATA_ZERO_PADDING; |
| 6263 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); |
| 6264 if( a==0 ) goto decode_out; |
| 6265 memcpy(a, aBlob, n); |
| 6266 |
| 6267 |
| 6268 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); |
| 6269 |
| 6270 fts5DebugRowid(&rc, &s, iRowid); |
| 6271 if( bDlidx ){ |
| 6272 Fts5Data dlidx; |
| 6273 Fts5DlidxLvl lvl; |
| 6274 |
| 6275 dlidx.p = a; |
| 6276 dlidx.nn = n; |
| 6277 |
| 6278 memset(&lvl, 0, sizeof(Fts5DlidxLvl)); |
| 6279 lvl.pData = &dlidx; |
| 6280 lvl.iLeafPgno = iPgno; |
| 6281 |
| 6282 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ |
| 6283 sqlite3Fts5BufferAppendPrintf(&rc, &s, |
| 6284 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid |
| 6285 ); |
| 6286 } |
| 6287 }else if( iSegid==0 ){ |
| 6288 if( iRowid==FTS5_AVERAGES_ROWID ){ |
| 6289 fts5DecodeAverages(&rc, &s, a, n); |
| 6290 }else{ |
| 6291 fts5DecodeStructure(&rc, &s, a, n); |
| 6292 } |
| 6293 }else if( eDetailNone ){ |
| 6294 Fts5Buffer term; /* Current term read from page */ |
| 6295 int szLeaf; |
| 6296 int iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
| 6297 int iTermOff; |
| 6298 int nKeep = 0; |
| 6299 int iOff; |
| 6300 |
| 6301 memset(&term, 0, sizeof(Fts5Buffer)); |
| 6302 |
| 6303 /* Decode any entries that occur before the first term. */ |
| 6304 if( szLeaf<n ){ |
| 6305 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff); |
| 6306 }else{ |
| 6307 iTermOff = szLeaf; |
| 6308 } |
| 6309 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); |
| 6310 |
| 6311 iOff = iTermOff; |
| 6312 while( iOff<szLeaf ){ |
| 6313 int nAppend; |
| 6314 |
| 6315 /* Read the term data for the next term*/ |
| 6316 iOff += fts5GetVarint32(&a[iOff], nAppend); |
| 6317 term.n = nKeep; |
| 6318 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); |
| 6319 sqlite3Fts5BufferAppendPrintf( |
| 6320 &rc, &s, " term=%.*s", term.n, (const char*)term.p |
| 6321 ); |
| 6322 iOff += nAppend; |
| 6323 |
| 6324 /* Figure out where the doclist for this term ends */ |
| 6325 if( iPgidxOff<n ){ |
| 6326 int nIncr; |
| 6327 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr); |
| 6328 iTermOff += nIncr; |
| 6329 }else{ |
| 6330 iTermOff = szLeaf; |
| 6331 } |
| 6332 |
| 6333 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); |
| 6334 iOff = iTermOff; |
| 6335 if( iOff<szLeaf ){ |
| 6336 iOff += fts5GetVarint32(&a[iOff], nKeep); |
| 6337 } |
| 6338 } |
| 6339 |
| 6340 fts5BufferFree(&term); |
| 6341 }else{ |
| 6342 Fts5Buffer term; /* Current term read from page */ |
| 6343 int szLeaf; /* Offset of pgidx in a[] */ |
| 6344 int iPgidxOff; |
| 6345 int iPgidxPrev = 0; /* Previous value read from pgidx */ |
| 6346 int iTermOff = 0; |
| 6347 int iRowidOff = 0; |
| 6348 int iOff; |
| 6349 int nDoclist; |
| 6350 |
| 6351 memset(&term, 0, sizeof(Fts5Buffer)); |
| 6352 |
| 6353 if( n<4 ){ |
| 6354 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); |
| 6355 goto decode_out; |
| 6356 }else{ |
| 6357 iRowidOff = fts5GetU16(&a[0]); |
| 6358 iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
| 6359 if( iPgidxOff<n ){ |
| 6360 fts5GetVarint32(&a[iPgidxOff], iTermOff); |
| 6361 } |
| 6362 } |
| 6363 |
| 6364 /* Decode the position list tail at the start of the page */ |
| 6365 if( iRowidOff!=0 ){ |
| 6366 iOff = iRowidOff; |
| 6367 }else if( iTermOff!=0 ){ |
| 6368 iOff = iTermOff; |
| 6369 }else{ |
| 6370 iOff = szLeaf; |
| 6371 } |
| 6372 fts5DecodePoslist(&rc, &s, &a[4], iOff-4); |
| 6373 |
| 6374 /* Decode any more doclist data that appears on the page before the |
| 6375 ** first term. */ |
| 6376 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; |
| 6377 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); |
| 6378 |
| 6379 while( iPgidxOff<n ){ |
| 6380 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ |
| 6381 int nByte; /* Bytes of data */ |
| 6382 int iEnd; |
| 6383 |
| 6384 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); |
| 6385 iPgidxPrev += nByte; |
| 6386 iOff = iPgidxPrev; |
| 6387 |
| 6388 if( iPgidxOff<n ){ |
| 6389 fts5GetVarint32(&a[iPgidxOff], nByte); |
| 6390 iEnd = iPgidxPrev + nByte; |
| 6391 }else{ |
| 6392 iEnd = szLeaf; |
| 6393 } |
| 6394 |
| 6395 if( bFirst==0 ){ |
| 6396 iOff += fts5GetVarint32(&a[iOff], nByte); |
| 6397 term.n = nByte; |
| 6398 } |
| 6399 iOff += fts5GetVarint32(&a[iOff], nByte); |
| 6400 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); |
| 6401 iOff += nByte; |
| 6402 |
| 6403 sqlite3Fts5BufferAppendPrintf( |
| 6404 &rc, &s, " term=%.*s", term.n, (const char*)term.p |
| 6405 ); |
| 6406 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); |
| 6407 } |
| 6408 |
| 6409 fts5BufferFree(&term); |
| 6410 } |
| 6411 |
| 6412 decode_out: |
| 6413 sqlite3_free(a); |
| 6414 if( rc==SQLITE_OK ){ |
| 6415 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); |
| 6416 }else{ |
| 6417 sqlite3_result_error_code(pCtx, rc); |
| 6418 } |
| 6419 fts5BufferFree(&s); |
| 6420 } |
| 6421 |
| 6422 /* |
| 6423 ** The implementation of user-defined scalar function fts5_rowid(). |
| 6424 */ |
| 6425 static void fts5RowidFunction( |
| 6426 sqlite3_context *pCtx, /* Function call context */ |
| 6427 int nArg, /* Number of args (always 2) */ |
| 6428 sqlite3_value **apVal /* Function arguments */ |
| 6429 ){ |
| 6430 const char *zArg; |
| 6431 if( nArg==0 ){ |
| 6432 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); |
| 6433 }else{ |
| 6434 zArg = (const char*)sqlite3_value_text(apVal[0]); |
| 6435 if( 0==sqlite3_stricmp(zArg, "segment") ){ |
| 6436 i64 iRowid; |
| 6437 int segid, pgno; |
| 6438 if( nArg!=3 ){ |
| 6439 sqlite3_result_error(pCtx, |
| 6440 "should be: fts5_rowid('segment', segid, pgno))", -1 |
| 6441 ); |
| 6442 }else{ |
| 6443 segid = sqlite3_value_int(apVal[1]); |
| 6444 pgno = sqlite3_value_int(apVal[2]); |
| 6445 iRowid = FTS5_SEGMENT_ROWID(segid, pgno); |
| 6446 sqlite3_result_int64(pCtx, iRowid); |
| 6447 } |
| 6448 }else{ |
| 6449 sqlite3_result_error(pCtx, |
| 6450 "first arg to fts5_rowid() must be 'segment'" , -1 |
| 6451 ); |
| 6452 } |
| 6453 } |
| 6454 } |
| 6455 |
| 6456 /* |
| 6457 ** This is called as part of registering the FTS5 module with database |
| 6458 ** connection db. It registers several user-defined scalar functions useful |
| 6459 ** with FTS5. |
| 6460 ** |
| 6461 ** If successful, SQLITE_OK is returned. If an error occurs, some other |
| 6462 ** SQLite error code is returned instead. |
| 6463 */ |
| 6464 int sqlite3Fts5IndexInit(sqlite3 *db){ |
| 6465 int rc = sqlite3_create_function( |
| 6466 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 |
| 6467 ); |
| 6468 |
| 6469 if( rc==SQLITE_OK ){ |
| 6470 rc = sqlite3_create_function( |
| 6471 db, "fts5_decode_none", 2, |
| 6472 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 |
| 6473 ); |
| 6474 } |
| 6475 |
| 6476 if( rc==SQLITE_OK ){ |
| 6477 rc = sqlite3_create_function( |
| 6478 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 |
| 6479 ); |
| 6480 } |
| 6481 return rc; |
| 6482 } |
| 6483 |
| 6484 |
| 6485 int sqlite3Fts5IndexReset(Fts5Index *p){ |
| 6486 assert( p->pStruct==0 || p->iStructVersion!=0 ); |
| 6487 if( fts5IndexDataVersion(p)!=p->iStructVersion ){ |
| 6488 fts5StructureInvalidate(p); |
| 6489 } |
| 6490 return fts5IndexReturn(p); |
| 6491 } |
OLD | NEW |