OLD | NEW |
(Empty) | |
| 1 /************** Begin file fts3.c ********************************************/ |
| 2 /* |
| 3 ** 2006 Oct 10 |
| 4 ** |
| 5 ** The author disclaims copyright to this source code. In place of |
| 6 ** a legal notice, here is a blessing: |
| 7 ** |
| 8 ** May you do good and not evil. |
| 9 ** May you find forgiveness for yourself and forgive others. |
| 10 ** May you share freely, never taking more than you give. |
| 11 ** |
| 12 ****************************************************************************** |
| 13 ** |
| 14 ** This is an SQLite module implementing full-text search. |
| 15 */ |
| 16 |
| 17 /* |
| 18 ** The code in this file is only compiled if: |
| 19 ** |
| 20 ** * The FTS3 module is being built as an extension |
| 21 ** (in which case SQLITE_CORE is not defined), or |
| 22 ** |
| 23 ** * The FTS3 module is being built into the core of |
| 24 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 25 */ |
| 26 |
| 27 /* The full-text index is stored in a series of b+tree (-like) |
| 28 ** structures called segments which map terms to doclists. The |
| 29 ** structures are like b+trees in layout, but are constructed from the |
| 30 ** bottom up in optimal fashion and are not updatable. Since trees |
| 31 ** are built from the bottom up, things will be described from the |
| 32 ** bottom up. |
| 33 ** |
| 34 ** |
| 35 **** Varints **** |
| 36 ** The basic unit of encoding is a variable-length integer called a |
| 37 ** varint. We encode variable-length integers in little-endian order |
| 38 ** using seven bits * per byte as follows: |
| 39 ** |
| 40 ** KEY: |
| 41 ** A = 0xxxxxxx 7 bits of data and one flag bit |
| 42 ** B = 1xxxxxxx 7 bits of data and one flag bit |
| 43 ** |
| 44 ** 7 bits - A |
| 45 ** 14 bits - BA |
| 46 ** 21 bits - BBA |
| 47 ** and so on. |
| 48 ** |
| 49 ** This is similar in concept to how sqlite encodes "varints" but |
| 50 ** the encoding is not the same. SQLite varints are big-endian |
| 51 ** are are limited to 9 bytes in length whereas FTS3 varints are |
| 52 ** little-endian and can be up to 10 bytes in length (in theory). |
| 53 ** |
| 54 ** Example encodings: |
| 55 ** |
| 56 ** 1: 0x01 |
| 57 ** 127: 0x7f |
| 58 ** 128: 0x81 0x00 |
| 59 ** |
| 60 ** |
| 61 **** Document lists **** |
| 62 ** A doclist (document list) holds a docid-sorted list of hits for a |
| 63 ** given term. Doclists hold docids and associated token positions. |
| 64 ** A docid is the unique integer identifier for a single document. |
| 65 ** A position is the index of a word within the document. The first |
| 66 ** word of the document has a position of 0. |
| 67 ** |
| 68 ** FTS3 used to optionally store character offsets using a compile-time |
| 69 ** option. But that functionality is no longer supported. |
| 70 ** |
| 71 ** A doclist is stored like this: |
| 72 ** |
| 73 ** array { |
| 74 ** varint docid; (delta from previous doclist) |
| 75 ** array { (position list for column 0) |
| 76 ** varint position; (2 more than the delta from previous position) |
| 77 ** } |
| 78 ** array { |
| 79 ** varint POS_COLUMN; (marks start of position list for new column) |
| 80 ** varint column; (index of new column) |
| 81 ** array { |
| 82 ** varint position; (2 more than the delta from previous position) |
| 83 ** } |
| 84 ** } |
| 85 ** varint POS_END; (marks end of positions for this document. |
| 86 ** } |
| 87 ** |
| 88 ** Here, array { X } means zero or more occurrences of X, adjacent in |
| 89 ** memory. A "position" is an index of a token in the token stream |
| 90 ** generated by the tokenizer. Note that POS_END and POS_COLUMN occur |
| 91 ** in the same logical place as the position element, and act as sentinals |
| 92 ** ending a position list array. POS_END is 0. POS_COLUMN is 1. |
| 93 ** The positions numbers are not stored literally but rather as two more |
| 94 ** than the difference from the prior position, or the just the position plus |
| 95 ** 2 for the first position. Example: |
| 96 ** |
| 97 ** label: A B C D E F G H I J K |
| 98 ** value: 123 5 9 1 1 14 35 0 234 72 0 |
| 99 ** |
| 100 ** The 123 value is the first docid. For column zero in this document |
| 101 ** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1 |
| 102 ** at D signals the start of a new column; the 1 at E indicates that the |
| 103 ** new column is column number 1. There are two positions at 12 and 45 |
| 104 ** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The |
| 105 ** 234 at I is the delta to next docid (357). It has one position 70 |
| 106 ** (72-2) and then terminates with the 0 at K. |
| 107 ** |
| 108 ** A "position-list" is the list of positions for multiple columns for |
| 109 ** a single docid. A "column-list" is the set of positions for a single |
| 110 ** column. Hence, a position-list consists of one or more column-lists, |
| 111 ** a document record consists of a docid followed by a position-list and |
| 112 ** a doclist consists of one or more document records. |
| 113 ** |
| 114 ** A bare doclist omits the position information, becoming an |
| 115 ** array of varint-encoded docids. |
| 116 ** |
| 117 **** Segment leaf nodes **** |
| 118 ** Segment leaf nodes store terms and doclists, ordered by term. Leaf |
| 119 ** nodes are written using LeafWriter, and read using LeafReader (to |
| 120 ** iterate through a single leaf node's data) and LeavesReader (to |
| 121 ** iterate through a segment's entire leaf layer). Leaf nodes have |
| 122 ** the format: |
| 123 ** |
| 124 ** varint iHeight; (height from leaf level, always 0) |
| 125 ** varint nTerm; (length of first term) |
| 126 ** char pTerm[nTerm]; (content of first term) |
| 127 ** varint nDoclist; (length of term's associated doclist) |
| 128 ** char pDoclist[nDoclist]; (content of doclist) |
| 129 ** array { |
| 130 ** (further terms are delta-encoded) |
| 131 ** varint nPrefix; (length of prefix shared with previous term) |
| 132 ** varint nSuffix; (length of unshared suffix) |
| 133 ** char pTermSuffix[nSuffix];(unshared suffix of next term) |
| 134 ** varint nDoclist; (length of term's associated doclist) |
| 135 ** char pDoclist[nDoclist]; (content of doclist) |
| 136 ** } |
| 137 ** |
| 138 ** Here, array { X } means zero or more occurrences of X, adjacent in |
| 139 ** memory. |
| 140 ** |
| 141 ** Leaf nodes are broken into blocks which are stored contiguously in |
| 142 ** the %_segments table in sorted order. This means that when the end |
| 143 ** of a node is reached, the next term is in the node with the next |
| 144 ** greater node id. |
| 145 ** |
| 146 ** New data is spilled to a new leaf node when the current node |
| 147 ** exceeds LEAF_MAX bytes (default 2048). New data which itself is |
| 148 ** larger than STANDALONE_MIN (default 1024) is placed in a standalone |
| 149 ** node (a leaf node with a single term and doclist). The goal of |
| 150 ** these settings is to pack together groups of small doclists while |
| 151 ** making it efficient to directly access large doclists. The |
| 152 ** assumption is that large doclists represent terms which are more |
| 153 ** likely to be query targets. |
| 154 ** |
| 155 ** TODO(shess) It may be useful for blocking decisions to be more |
| 156 ** dynamic. For instance, it may make more sense to have a 2.5k leaf |
| 157 ** node rather than splitting into 2k and .5k nodes. My intuition is |
| 158 ** that this might extend through 2x or 4x the pagesize. |
| 159 ** |
| 160 ** |
| 161 **** Segment interior nodes **** |
| 162 ** Segment interior nodes store blockids for subtree nodes and terms |
| 163 ** to describe what data is stored by the each subtree. Interior |
| 164 ** nodes are written using InteriorWriter, and read using |
| 165 ** InteriorReader. InteriorWriters are created as needed when |
| 166 ** SegmentWriter creates new leaf nodes, or when an interior node |
| 167 ** itself grows too big and must be split. The format of interior |
| 168 ** nodes: |
| 169 ** |
| 170 ** varint iHeight; (height from leaf level, always >0) |
| 171 ** varint iBlockid; (block id of node's leftmost subtree) |
| 172 ** optional { |
| 173 ** varint nTerm; (length of first term) |
| 174 ** char pTerm[nTerm]; (content of first term) |
| 175 ** array { |
| 176 ** (further terms are delta-encoded) |
| 177 ** varint nPrefix; (length of shared prefix with previous term) |
| 178 ** varint nSuffix; (length of unshared suffix) |
| 179 ** char pTermSuffix[nSuffix]; (unshared suffix of next term) |
| 180 ** } |
| 181 ** } |
| 182 ** |
| 183 ** Here, optional { X } means an optional element, while array { X } |
| 184 ** means zero or more occurrences of X, adjacent in memory. |
| 185 ** |
| 186 ** An interior node encodes n terms separating n+1 subtrees. The |
| 187 ** subtree blocks are contiguous, so only the first subtree's blockid |
| 188 ** is encoded. The subtree at iBlockid will contain all terms less |
| 189 ** than the first term encoded (or all terms if no term is encoded). |
| 190 ** Otherwise, for terms greater than or equal to pTerm[i] but less |
| 191 ** than pTerm[i+1], the subtree for that term will be rooted at |
| 192 ** iBlockid+i. Interior nodes only store enough term data to |
| 193 ** distinguish adjacent children (if the rightmost term of the left |
| 194 ** child is "something", and the leftmost term of the right child is |
| 195 ** "wicked", only "w" is stored). |
| 196 ** |
| 197 ** New data is spilled to a new interior node at the same height when |
| 198 ** the current node exceeds INTERIOR_MAX bytes (default 2048). |
| 199 ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing |
| 200 ** interior nodes and making the tree too skinny. The interior nodes |
| 201 ** at a given height are naturally tracked by interior nodes at |
| 202 ** height+1, and so on. |
| 203 ** |
| 204 ** |
| 205 **** Segment directory **** |
| 206 ** The segment directory in table %_segdir stores meta-information for |
| 207 ** merging and deleting segments, and also the root node of the |
| 208 ** segment's tree. |
| 209 ** |
| 210 ** The root node is the top node of the segment's tree after encoding |
| 211 ** the entire segment, restricted to ROOT_MAX bytes (default 1024). |
| 212 ** This could be either a leaf node or an interior node. If the top |
| 213 ** node requires more than ROOT_MAX bytes, it is flushed to %_segments |
| 214 ** and a new root interior node is generated (which should always fit |
| 215 ** within ROOT_MAX because it only needs space for 2 varints, the |
| 216 ** height and the blockid of the previous root). |
| 217 ** |
| 218 ** The meta-information in the segment directory is: |
| 219 ** level - segment level (see below) |
| 220 ** idx - index within level |
| 221 ** - (level,idx uniquely identify a segment) |
| 222 ** start_block - first leaf node |
| 223 ** leaves_end_block - last leaf node |
| 224 ** end_block - last block (including interior nodes) |
| 225 ** root - contents of root node |
| 226 ** |
| 227 ** If the root node is a leaf node, then start_block, |
| 228 ** leaves_end_block, and end_block are all 0. |
| 229 ** |
| 230 ** |
| 231 **** Segment merging **** |
| 232 ** To amortize update costs, segments are grouped into levels and |
| 233 ** merged in batches. Each increase in level represents exponentially |
| 234 ** more documents. |
| 235 ** |
| 236 ** New documents (actually, document updates) are tokenized and |
| 237 ** written individually (using LeafWriter) to a level 0 segment, with |
| 238 ** incrementing idx. When idx reaches MERGE_COUNT (default 16), all |
| 239 ** level 0 segments are merged into a single level 1 segment. Level 1 |
| 240 ** is populated like level 0, and eventually MERGE_COUNT level 1 |
| 241 ** segments are merged to a single level 2 segment (representing |
| 242 ** MERGE_COUNT^2 updates), and so on. |
| 243 ** |
| 244 ** A segment merge traverses all segments at a given level in |
| 245 ** parallel, performing a straightforward sorted merge. Since segment |
| 246 ** leaf nodes are written in to the %_segments table in order, this |
| 247 ** merge traverses the underlying sqlite disk structures efficiently. |
| 248 ** After the merge, all segment blocks from the merged level are |
| 249 ** deleted. |
| 250 ** |
| 251 ** MERGE_COUNT controls how often we merge segments. 16 seems to be |
| 252 ** somewhat of a sweet spot for insertion performance. 32 and 64 show |
| 253 ** very similar performance numbers to 16 on insertion, though they're |
| 254 ** a tiny bit slower (perhaps due to more overhead in merge-time |
| 255 ** sorting). 8 is about 20% slower than 16, 4 about 50% slower than |
| 256 ** 16, 2 about 66% slower than 16. |
| 257 ** |
| 258 ** At query time, high MERGE_COUNT increases the number of segments |
| 259 ** which need to be scanned and merged. For instance, with 100k docs |
| 260 ** inserted: |
| 261 ** |
| 262 ** MERGE_COUNT segments |
| 263 ** 16 25 |
| 264 ** 8 12 |
| 265 ** 4 10 |
| 266 ** 2 6 |
| 267 ** |
| 268 ** This appears to have only a moderate impact on queries for very |
| 269 ** frequent terms (which are somewhat dominated by segment merge |
| 270 ** costs), and infrequent and non-existent terms still seem to be fast |
| 271 ** even with many segments. |
| 272 ** |
| 273 ** TODO(shess) That said, it would be nice to have a better query-side |
| 274 ** argument for MERGE_COUNT of 16. Also, it is possible/likely that |
| 275 ** optimizations to things like doclist merging will swing the sweet |
| 276 ** spot around. |
| 277 ** |
| 278 ** |
| 279 ** |
| 280 **** Handling of deletions and updates **** |
| 281 ** Since we're using a segmented structure, with no docid-oriented |
| 282 ** index into the term index, we clearly cannot simply update the term |
| 283 ** index when a document is deleted or updated. For deletions, we |
| 284 ** write an empty doclist (varint(docid) varint(POS_END)), for updates |
| 285 ** we simply write the new doclist. Segment merges overwrite older |
| 286 ** data for a particular docid with newer data, so deletes or updates |
| 287 ** will eventually overtake the earlier data and knock it out. The |
| 288 ** query logic likewise merges doclists so that newer data knocks out |
| 289 ** older data. |
| 290 */ |
| 291 #define CHROMIUM_FTS3_CHANGES 1 |
| 292 |
| 293 /************** Include fts3Int.h in the middle of fts3.c ********************/ |
| 294 /************** Begin file fts3Int.h *****************************************/ |
| 295 /* |
| 296 ** 2009 Nov 12 |
| 297 ** |
| 298 ** The author disclaims copyright to this source code. In place of |
| 299 ** a legal notice, here is a blessing: |
| 300 ** |
| 301 ** May you do good and not evil. |
| 302 ** May you find forgiveness for yourself and forgive others. |
| 303 ** May you share freely, never taking more than you give. |
| 304 ** |
| 305 ****************************************************************************** |
| 306 ** |
| 307 */ |
| 308 #ifndef _FTSINT_H |
| 309 #define _FTSINT_H |
| 310 |
| 311 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 312 # define NDEBUG 1 |
| 313 #endif |
| 314 |
| 315 /* |
| 316 ** FTS4 is really an extension for FTS3. It is enabled using the |
| 317 ** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all |
| 318 ** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. |
| 319 */ |
| 320 #if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) |
| 321 # define SQLITE_ENABLE_FTS3 |
| 322 #endif |
| 323 |
| 324 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 325 |
| 326 /* If not building as part of the core, include sqlite3ext.h. */ |
| 327 #ifndef SQLITE_CORE |
| 328 /* # include "sqlite3ext.h" */ |
| 329 SQLITE_EXTENSION_INIT3 |
| 330 #endif |
| 331 |
| 332 /* #include "sqlite3.h" */ |
| 333 /************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ |
| 334 /************** Begin file fts3_tokenizer.h **********************************/ |
| 335 /* |
| 336 ** 2006 July 10 |
| 337 ** |
| 338 ** The author disclaims copyright to this source code. |
| 339 ** |
| 340 ************************************************************************* |
| 341 ** Defines the interface to tokenizers used by fulltext-search. There |
| 342 ** are three basic components: |
| 343 ** |
| 344 ** sqlite3_tokenizer_module is a singleton defining the tokenizer |
| 345 ** interface functions. This is essentially the class structure for |
| 346 ** tokenizers. |
| 347 ** |
| 348 ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps |
| 349 ** including customization information defined at creation time. |
| 350 ** |
| 351 ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate |
| 352 ** tokens from a particular input. |
| 353 */ |
| 354 #ifndef _FTS3_TOKENIZER_H_ |
| 355 #define _FTS3_TOKENIZER_H_ |
| 356 |
| 357 /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. |
| 358 ** If tokenizers are to be allowed to call sqlite3_*() functions, then |
| 359 ** we will need a way to register the API consistently. |
| 360 */ |
| 361 /* #include "sqlite3.h" */ |
| 362 |
| 363 /* |
| 364 ** Structures used by the tokenizer interface. When a new tokenizer |
| 365 ** implementation is registered, the caller provides a pointer to |
| 366 ** an sqlite3_tokenizer_module containing pointers to the callback |
| 367 ** functions that make up an implementation. |
| 368 ** |
| 369 ** When an fts3 table is created, it passes any arguments passed to |
| 370 ** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the |
| 371 ** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer |
| 372 ** implementation. The xCreate() function in turn returns an |
| 373 ** sqlite3_tokenizer structure representing the specific tokenizer to |
| 374 ** be used for the fts3 table (customized by the tokenizer clause arguments). |
| 375 ** |
| 376 ** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() |
| 377 ** method is called. It returns an sqlite3_tokenizer_cursor object |
| 378 ** that may be used to tokenize a specific input buffer based on |
| 379 ** the tokenization rules supplied by a specific sqlite3_tokenizer |
| 380 ** object. |
| 381 */ |
| 382 typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; |
| 383 typedef struct sqlite3_tokenizer sqlite3_tokenizer; |
| 384 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; |
| 385 |
| 386 struct sqlite3_tokenizer_module { |
| 387 |
| 388 /* |
| 389 ** Structure version. Should always be set to 0 or 1. |
| 390 */ |
| 391 int iVersion; |
| 392 |
| 393 /* |
| 394 ** Create a new tokenizer. The values in the argv[] array are the |
| 395 ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL |
| 396 ** TABLE statement that created the fts3 table. For example, if |
| 397 ** the following SQL is executed: |
| 398 ** |
| 399 ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2) |
| 400 ** |
| 401 ** then argc is set to 2, and the argv[] array contains pointers |
| 402 ** to the strings "arg1" and "arg2". |
| 403 ** |
| 404 ** This method should return either SQLITE_OK (0), or an SQLite error |
| 405 ** code. If SQLITE_OK is returned, then *ppTokenizer should be set |
| 406 ** to point at the newly created tokenizer structure. The generic |
| 407 ** sqlite3_tokenizer.pModule variable should not be initialized by |
| 408 ** this callback. The caller will do so. |
| 409 */ |
| 410 int (*xCreate)( |
| 411 int argc, /* Size of argv array */ |
| 412 const char *const*argv, /* Tokenizer argument strings */ |
| 413 sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ |
| 414 ); |
| 415 |
| 416 /* |
| 417 ** Destroy an existing tokenizer. The fts3 module calls this method |
| 418 ** exactly once for each successful call to xCreate(). |
| 419 */ |
| 420 int (*xDestroy)(sqlite3_tokenizer *pTokenizer); |
| 421 |
| 422 /* |
| 423 ** Create a tokenizer cursor to tokenize an input buffer. The caller |
| 424 ** is responsible for ensuring that the input buffer remains valid |
| 425 ** until the cursor is closed (using the xClose() method). |
| 426 */ |
| 427 int (*xOpen)( |
| 428 sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ |
| 429 const char *pInput, int nBytes, /* Input buffer */ |
| 430 sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ |
| 431 ); |
| 432 |
| 433 /* |
| 434 ** Destroy an existing tokenizer cursor. The fts3 module calls this |
| 435 ** method exactly once for each successful call to xOpen(). |
| 436 */ |
| 437 int (*xClose)(sqlite3_tokenizer_cursor *pCursor); |
| 438 |
| 439 /* |
| 440 ** Retrieve the next token from the tokenizer cursor pCursor. This |
| 441 ** method should either return SQLITE_OK and set the values of the |
| 442 ** "OUT" variables identified below, or SQLITE_DONE to indicate that |
| 443 ** the end of the buffer has been reached, or an SQLite error code. |
| 444 ** |
| 445 ** *ppToken should be set to point at a buffer containing the |
| 446 ** normalized version of the token (i.e. after any case-folding and/or |
| 447 ** stemming has been performed). *pnBytes should be set to the length |
| 448 ** of this buffer in bytes. The input text that generated the token is |
| 449 ** identified by the byte offsets returned in *piStartOffset and |
| 450 ** *piEndOffset. *piStartOffset should be set to the index of the first |
| 451 ** byte of the token in the input buffer. *piEndOffset should be set |
| 452 ** to the index of the first byte just past the end of the token in |
| 453 ** the input buffer. |
| 454 ** |
| 455 ** The buffer *ppToken is set to point at is managed by the tokenizer |
| 456 ** implementation. It is only required to be valid until the next call |
| 457 ** to xNext() or xClose(). |
| 458 */ |
| 459 /* TODO(shess) current implementation requires pInput to be |
| 460 ** nul-terminated. This should either be fixed, or pInput/nBytes |
| 461 ** should be converted to zInput. |
| 462 */ |
| 463 int (*xNext)( |
| 464 sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ |
| 465 const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ |
| 466 int *piStartOffset, /* OUT: Byte offset of token in input buffer */ |
| 467 int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ |
| 468 int *piPosition /* OUT: Number of tokens returned before this one */ |
| 469 ); |
| 470 |
| 471 /*********************************************************************** |
| 472 ** Methods below this point are only available if iVersion>=1. |
| 473 */ |
| 474 |
| 475 /* |
| 476 ** Configure the language id of a tokenizer cursor. |
| 477 */ |
| 478 int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); |
| 479 }; |
| 480 |
| 481 struct sqlite3_tokenizer { |
| 482 const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ |
| 483 /* Tokenizer implementations will typically add additional fields */ |
| 484 }; |
| 485 |
| 486 struct sqlite3_tokenizer_cursor { |
| 487 sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ |
| 488 /* Tokenizer implementations will typically add additional fields */ |
| 489 }; |
| 490 |
| 491 int fts3_global_term_cnt(int iTerm, int iCol); |
| 492 int fts3_term_cnt(int iTerm, int iCol); |
| 493 |
| 494 |
| 495 #endif /* _FTS3_TOKENIZER_H_ */ |
| 496 |
| 497 /************** End of fts3_tokenizer.h **************************************/ |
| 498 /************** Continuing where we left off in fts3Int.h ********************/ |
| 499 /************** Include fts3_hash.h in the middle of fts3Int.h ***************/ |
| 500 /************** Begin file fts3_hash.h ***************************************/ |
| 501 /* |
| 502 ** 2001 September 22 |
| 503 ** |
| 504 ** The author disclaims copyright to this source code. In place of |
| 505 ** a legal notice, here is a blessing: |
| 506 ** |
| 507 ** May you do good and not evil. |
| 508 ** May you find forgiveness for yourself and forgive others. |
| 509 ** May you share freely, never taking more than you give. |
| 510 ** |
| 511 ************************************************************************* |
| 512 ** This is the header file for the generic hash-table implementation |
| 513 ** used in SQLite. We've modified it slightly to serve as a standalone |
| 514 ** hash table implementation for the full-text indexing module. |
| 515 ** |
| 516 */ |
| 517 #ifndef _FTS3_HASH_H_ |
| 518 #define _FTS3_HASH_H_ |
| 519 |
| 520 /* Forward declarations of structures. */ |
| 521 typedef struct Fts3Hash Fts3Hash; |
| 522 typedef struct Fts3HashElem Fts3HashElem; |
| 523 |
| 524 /* A complete hash table is an instance of the following structure. |
| 525 ** The internals of this structure are intended to be opaque -- client |
| 526 ** code should not attempt to access or modify the fields of this structure |
| 527 ** directly. Change this structure only by using the routines below. |
| 528 ** However, many of the "procedures" and "functions" for modifying and |
| 529 ** accessing this structure are really macros, so we can't really make |
| 530 ** this structure opaque. |
| 531 */ |
| 532 struct Fts3Hash { |
| 533 char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ |
| 534 char copyKey; /* True if copy of key made on insert */ |
| 535 int count; /* Number of entries in this table */ |
| 536 Fts3HashElem *first; /* The first element of the array */ |
| 537 int htsize; /* Number of buckets in the hash table */ |
| 538 struct _fts3ht { /* the hash table */ |
| 539 int count; /* Number of entries with this hash */ |
| 540 Fts3HashElem *chain; /* Pointer to first entry with this hash */ |
| 541 } *ht; |
| 542 }; |
| 543 |
| 544 /* Each element in the hash table is an instance of the following |
| 545 ** structure. All elements are stored on a single doubly-linked list. |
| 546 ** |
| 547 ** Again, this structure is intended to be opaque, but it can't really |
| 548 ** be opaque because it is used by macros. |
| 549 */ |
| 550 struct Fts3HashElem { |
| 551 Fts3HashElem *next, *prev; /* Next and previous elements in the table */ |
| 552 void *data; /* Data associated with this element */ |
| 553 void *pKey; int nKey; /* Key associated with this element */ |
| 554 }; |
| 555 |
| 556 /* |
| 557 ** There are 2 different modes of operation for a hash table: |
| 558 ** |
| 559 ** FTS3_HASH_STRING pKey points to a string that is nKey bytes long |
| 560 ** (including the null-terminator, if any). Case |
| 561 ** is respected in comparisons. |
| 562 ** |
| 563 ** FTS3_HASH_BINARY pKey points to binary data nKey bytes long. |
| 564 ** memcmp() is used to compare keys. |
| 565 ** |
| 566 ** A copy of the key is made if the copyKey parameter to fts3HashInit is 1. |
| 567 */ |
| 568 #define FTS3_HASH_STRING 1 |
| 569 #define FTS3_HASH_BINARY 2 |
| 570 |
| 571 /* |
| 572 ** Access routines. To delete, insert a NULL pointer. |
| 573 */ |
| 574 SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copy
Key); |
| 575 SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey
, void *pData); |
| 576 SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int
nKey); |
| 577 SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*); |
| 578 SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const voi
d *, int); |
| 579 |
| 580 /* |
| 581 ** Shorthand for the functions above |
| 582 */ |
| 583 #define fts3HashInit sqlite3Fts3HashInit |
| 584 #define fts3HashInsert sqlite3Fts3HashInsert |
| 585 #define fts3HashFind sqlite3Fts3HashFind |
| 586 #define fts3HashClear sqlite3Fts3HashClear |
| 587 #define fts3HashFindElem sqlite3Fts3HashFindElem |
| 588 |
| 589 /* |
| 590 ** Macros for looping over all elements of a hash table. The idiom is |
| 591 ** like this: |
| 592 ** |
| 593 ** Fts3Hash h; |
| 594 ** Fts3HashElem *p; |
| 595 ** ... |
| 596 ** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){ |
| 597 ** SomeStructure *pData = fts3HashData(p); |
| 598 ** // do something with pData |
| 599 ** } |
| 600 */ |
| 601 #define fts3HashFirst(H) ((H)->first) |
| 602 #define fts3HashNext(E) ((E)->next) |
| 603 #define fts3HashData(E) ((E)->data) |
| 604 #define fts3HashKey(E) ((E)->pKey) |
| 605 #define fts3HashKeysize(E) ((E)->nKey) |
| 606 |
| 607 /* |
| 608 ** Number of entries in a hash table |
| 609 */ |
| 610 #define fts3HashCount(H) ((H)->count) |
| 611 |
| 612 #endif /* _FTS3_HASH_H_ */ |
| 613 |
| 614 /************** End of fts3_hash.h *******************************************/ |
| 615 /************** Continuing where we left off in fts3Int.h ********************/ |
| 616 |
| 617 /* |
| 618 ** This constant determines the maximum depth of an FTS expression tree |
| 619 ** that the library will create and use. FTS uses recursion to perform |
| 620 ** various operations on the query tree, so the disadvantage of a large |
| 621 ** limit is that it may allow very large queries to use large amounts |
| 622 ** of stack space (perhaps causing a stack overflow). |
| 623 */ |
| 624 #ifndef SQLITE_FTS3_MAX_EXPR_DEPTH |
| 625 # define SQLITE_FTS3_MAX_EXPR_DEPTH 12 |
| 626 #endif |
| 627 |
| 628 |
| 629 /* |
| 630 ** This constant controls how often segments are merged. Once there are |
| 631 ** FTS3_MERGE_COUNT segments of level N, they are merged into a single |
| 632 ** segment of level N+1. |
| 633 */ |
| 634 #define FTS3_MERGE_COUNT 16 |
| 635 |
| 636 /* |
| 637 ** This is the maximum amount of data (in bytes) to store in the |
| 638 ** Fts3Table.pendingTerms hash table. Normally, the hash table is |
| 639 ** populated as documents are inserted/updated/deleted in a transaction |
| 640 ** and used to create a new segment when the transaction is committed. |
| 641 ** However if this limit is reached midway through a transaction, a new |
| 642 ** segment is created and the hash table cleared immediately. |
| 643 */ |
| 644 #define FTS3_MAX_PENDING_DATA (1*1024*1024) |
| 645 |
| 646 /* |
| 647 ** Macro to return the number of elements in an array. SQLite has a |
| 648 ** similar macro called ArraySize(). Use a different name to avoid |
| 649 ** a collision when building an amalgamation with built-in FTS3. |
| 650 */ |
| 651 #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) |
| 652 |
| 653 |
| 654 #ifndef MIN |
| 655 # define MIN(x,y) ((x)<(y)?(x):(y)) |
| 656 #endif |
| 657 #ifndef MAX |
| 658 # define MAX(x,y) ((x)>(y)?(x):(y)) |
| 659 #endif |
| 660 |
| 661 /* |
| 662 ** Maximum length of a varint encoded integer. The varint format is different |
| 663 ** from that used by SQLite, so the maximum length is 10, not 9. |
| 664 */ |
| 665 #define FTS3_VARINT_MAX 10 |
| 666 |
| 667 /* |
| 668 ** FTS4 virtual tables may maintain multiple indexes - one index of all terms |
| 669 ** in the document set and zero or more prefix indexes. All indexes are stored |
| 670 ** as one or more b+-trees in the %_segments and %_segdir tables. |
| 671 ** |
| 672 ** It is possible to determine which index a b+-tree belongs to based on the |
| 673 ** value stored in the "%_segdir.level" column. Given this value L, the index |
| 674 ** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with |
| 675 ** level values between 0 and 1023 (inclusive) belong to index 0, all levels |
| 676 ** between 1024 and 2047 to index 1, and so on. |
| 677 ** |
| 678 ** It is considered impossible for an index to use more than 1024 levels. In |
| 679 ** theory though this may happen, but only after at least |
| 680 ** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. |
| 681 */ |
| 682 #define FTS3_SEGDIR_MAXLEVEL 1024 |
| 683 #define FTS3_SEGDIR_MAXLEVEL_STR "1024" |
| 684 |
| 685 /* |
| 686 ** The testcase() macro is only used by the amalgamation. If undefined, |
| 687 ** make it a no-op. |
| 688 */ |
| 689 #ifndef testcase |
| 690 # define testcase(X) |
| 691 #endif |
| 692 |
| 693 /* |
| 694 ** Terminator values for position-lists and column-lists. |
| 695 */ |
| 696 #define POS_COLUMN (1) /* Column-list terminator */ |
| 697 #define POS_END (0) /* Position-list terminator */ |
| 698 |
| 699 /* |
| 700 ** This section provides definitions to allow the |
| 701 ** FTS3 extension to be compiled outside of the |
| 702 ** amalgamation. |
| 703 */ |
| 704 #ifndef SQLITE_AMALGAMATION |
| 705 /* |
| 706 ** Macros indicating that conditional expressions are always true or |
| 707 ** false. |
| 708 */ |
| 709 #ifdef SQLITE_COVERAGE_TEST |
| 710 # define ALWAYS(x) (1) |
| 711 # define NEVER(X) (0) |
| 712 #elif defined(SQLITE_DEBUG) |
| 713 # define ALWAYS(x) sqlite3Fts3Always((x)!=0) |
| 714 # define NEVER(x) sqlite3Fts3Never((x)!=0) |
| 715 SQLITE_PRIVATE int sqlite3Fts3Always(int b); |
| 716 SQLITE_PRIVATE int sqlite3Fts3Never(int b); |
| 717 #else |
| 718 # define ALWAYS(x) (x) |
| 719 # define NEVER(x) (x) |
| 720 #endif |
| 721 |
| 722 /* |
| 723 ** Internal types used by SQLite. |
| 724 */ |
| 725 typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ |
| 726 typedef short int i16; /* 2-byte (or larger) signed integer */ |
| 727 typedef unsigned int u32; /* 4-byte unsigned integer */ |
| 728 typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ |
| 729 typedef sqlite3_int64 i64; /* 8-byte signed integer */ |
| 730 |
| 731 /* |
| 732 ** Macro used to suppress compiler warnings for unused parameters. |
| 733 */ |
| 734 #define UNUSED_PARAMETER(x) (void)(x) |
| 735 |
| 736 /* |
| 737 ** Activate assert() only if SQLITE_TEST is enabled. |
| 738 */ |
| 739 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 740 # define NDEBUG 1 |
| 741 #endif |
| 742 |
| 743 /* |
| 744 ** The TESTONLY macro is used to enclose variable declarations or |
| 745 ** other bits of code that are needed to support the arguments |
| 746 ** within testcase() and assert() macros. |
| 747 */ |
| 748 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) |
| 749 # define TESTONLY(X) X |
| 750 #else |
| 751 # define TESTONLY(X) |
| 752 #endif |
| 753 |
| 754 #endif /* SQLITE_AMALGAMATION */ |
| 755 |
| 756 #ifdef SQLITE_DEBUG |
| 757 SQLITE_PRIVATE int sqlite3Fts3Corrupt(void); |
| 758 # define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() |
| 759 #else |
| 760 # define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB |
| 761 #endif |
| 762 |
| 763 typedef struct Fts3Table Fts3Table; |
| 764 typedef struct Fts3Cursor Fts3Cursor; |
| 765 typedef struct Fts3Expr Fts3Expr; |
| 766 typedef struct Fts3Phrase Fts3Phrase; |
| 767 typedef struct Fts3PhraseToken Fts3PhraseToken; |
| 768 |
| 769 typedef struct Fts3Doclist Fts3Doclist; |
| 770 typedef struct Fts3SegFilter Fts3SegFilter; |
| 771 typedef struct Fts3DeferredToken Fts3DeferredToken; |
| 772 typedef struct Fts3SegReader Fts3SegReader; |
| 773 typedef struct Fts3MultiSegReader Fts3MultiSegReader; |
| 774 |
| 775 typedef struct MatchinfoBuffer MatchinfoBuffer; |
| 776 |
| 777 /* |
| 778 ** A connection to a fulltext index is an instance of the following |
| 779 ** structure. The xCreate and xConnect methods create an instance |
| 780 ** of this structure and xDestroy and xDisconnect free that instance. |
| 781 ** All other methods receive a pointer to the structure as one of their |
| 782 ** arguments. |
| 783 */ |
| 784 struct Fts3Table { |
| 785 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 786 sqlite3 *db; /* The database connection */ |
| 787 const char *zDb; /* logical database name */ |
| 788 const char *zName; /* virtual table name */ |
| 789 int nColumn; /* number of named columns in virtual table */ |
| 790 char **azColumn; /* column names. malloced */ |
| 791 u8 *abNotindexed; /* True for 'notindexed' columns */ |
| 792 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ |
| 793 char *zContentTbl; /* content=xxx option, or NULL */ |
| 794 char *zLanguageid; /* languageid=xxx option, or NULL */ |
| 795 int nAutoincrmerge; /* Value configured by 'automerge' */ |
| 796 u32 nLeafAdd; /* Number of leaf blocks added this trans */ |
| 797 |
| 798 /* Precompiled statements used by the implementation. Each of these |
| 799 ** statements is run and reset within a single virtual table API call. |
| 800 */ |
| 801 sqlite3_stmt *aStmt[40]; |
| 802 |
| 803 char *zReadExprlist; |
| 804 char *zWriteExprlist; |
| 805 |
| 806 int nNodeSize; /* Soft limit for node size */ |
| 807 u8 bFts4; /* True for FTS4, false for FTS3 */ |
| 808 u8 bHasStat; /* True if %_stat table exists (2==unknown) */ |
| 809 u8 bHasDocsize; /* True if %_docsize table exists */ |
| 810 u8 bDescIdx; /* True if doclists are in reverse order */ |
| 811 u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ |
| 812 int nPgsz; /* Page size for host database */ |
| 813 char *zSegmentsTbl; /* Name of %_segments table */ |
| 814 sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ |
| 815 |
| 816 /* |
| 817 ** The following array of hash tables is used to buffer pending index |
| 818 ** updates during transactions. All pending updates buffered at any one |
| 819 ** time must share a common language-id (see the FTS4 langid= feature). |
| 820 ** The current language id is stored in variable iPrevLangid. |
| 821 ** |
| 822 ** A single FTS4 table may have multiple full-text indexes. For each index |
| 823 ** there is an entry in the aIndex[] array. Index 0 is an index of all the |
| 824 ** terms that appear in the document set. Each subsequent index in aIndex[] |
| 825 ** is an index of prefixes of a specific length. |
| 826 ** |
| 827 ** Variable nPendingData contains an estimate the memory consumed by the |
| 828 ** pending data structures, including hash table overhead, but not including |
| 829 ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash |
| 830 ** tables are flushed to disk. Variable iPrevDocid is the docid of the most |
| 831 ** recently inserted record. |
| 832 */ |
| 833 int nIndex; /* Size of aIndex[] */ |
| 834 struct Fts3Index { |
| 835 int nPrefix; /* Prefix length (0 for main terms index) */ |
| 836 Fts3Hash hPending; /* Pending terms table for this index */ |
| 837 } *aIndex; |
| 838 int nMaxPendingData; /* Max pending data before flush to disk */ |
| 839 int nPendingData; /* Current bytes of pending data */ |
| 840 sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ |
| 841 int iPrevLangid; /* Langid of recently inserted document */ |
| 842 int bPrevDelete; /* True if last operation was a delete */ |
| 843 |
| 844 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) |
| 845 /* State variables used for validating that the transaction control |
| 846 ** methods of the virtual table are called at appropriate times. These |
| 847 ** values do not contribute to FTS functionality; they are used for |
| 848 ** verifying the operation of the SQLite core. |
| 849 */ |
| 850 int inTransaction; /* True after xBegin but before xCommit/xRollback */ |
| 851 int mxSavepoint; /* Largest valid xSavepoint integer */ |
| 852 #endif |
| 853 |
| 854 #ifdef SQLITE_TEST |
| 855 /* True to disable the incremental doclist optimization. This is controled |
| 856 ** by special insert command 'test-no-incr-doclist'. */ |
| 857 int bNoIncrDoclist; |
| 858 #endif |
| 859 }; |
| 860 |
| 861 /* |
| 862 ** When the core wants to read from the virtual table, it creates a |
| 863 ** virtual table cursor (an instance of the following structure) using |
| 864 ** the xOpen method. Cursors are destroyed using the xClose method. |
| 865 */ |
| 866 struct Fts3Cursor { |
| 867 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 868 i16 eSearch; /* Search strategy (see below) */ |
| 869 u8 isEof; /* True if at End Of Results */ |
| 870 u8 isRequireSeek; /* True if must seek pStmt to %_content row */ |
| 871 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ |
| 872 Fts3Expr *pExpr; /* Parsed MATCH query string */ |
| 873 int iLangid; /* Language being queried for */ |
| 874 int nPhrase; /* Number of matchable phrases in query */ |
| 875 Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ |
| 876 sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ |
| 877 char *pNextId; /* Pointer into the body of aDoclist */ |
| 878 char *aDoclist; /* List of docids for full-text queries */ |
| 879 int nDoclist; /* Size of buffer at aDoclist */ |
| 880 u8 bDesc; /* True to sort in descending order */ |
| 881 int eEvalmode; /* An FTS3_EVAL_XX constant */ |
| 882 int nRowAvg; /* Average size of database rows, in pages */ |
| 883 sqlite3_int64 nDoc; /* Documents in table */ |
| 884 i64 iMinDocid; /* Minimum docid to return */ |
| 885 i64 iMaxDocid; /* Maximum docid to return */ |
| 886 int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ |
| 887 MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ |
| 888 }; |
| 889 |
| 890 #define FTS3_EVAL_FILTER 0 |
| 891 #define FTS3_EVAL_NEXT 1 |
| 892 #define FTS3_EVAL_MATCHINFO 2 |
| 893 |
| 894 /* |
| 895 ** The Fts3Cursor.eSearch member is always set to one of the following. |
| 896 ** Actualy, Fts3Cursor.eSearch can be greater than or equal to |
| 897 ** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index |
| 898 ** of the column to be searched. For example, in |
| 899 ** |
| 900 ** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); |
| 901 ** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; |
| 902 ** |
| 903 ** Because the LHS of the MATCH operator is 2nd column "b", |
| 904 ** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, |
| 905 ** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" |
| 906 ** indicating that all columns should be searched, |
| 907 ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. |
| 908 */ |
| 909 #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ |
| 910 #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ |
| 911 #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ |
| 912 |
| 913 /* |
| 914 ** The lower 16-bits of the sqlite3_index_info.idxNum value set by |
| 915 ** the xBestIndex() method contains the Fts3Cursor.eSearch value described |
| 916 ** above. The upper 16-bits contain a combination of the following |
| 917 ** bits, used to describe extra constraints on full-text searches. |
| 918 */ |
| 919 #define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ |
| 920 #define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ |
| 921 #define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ |
| 922 |
| 923 struct Fts3Doclist { |
| 924 char *aAll; /* Array containing doclist (or NULL) */ |
| 925 int nAll; /* Size of a[] in bytes */ |
| 926 char *pNextDocid; /* Pointer to next docid */ |
| 927 |
| 928 sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ |
| 929 int bFreeList; /* True if pList should be sqlite3_free()d */ |
| 930 char *pList; /* Pointer to position list following iDocid */ |
| 931 int nList; /* Length of position list */ |
| 932 }; |
| 933 |
| 934 /* |
| 935 ** A "phrase" is a sequence of one or more tokens that must match in |
| 936 ** sequence. A single token is the base case and the most common case. |
| 937 ** For a sequence of tokens contained in double-quotes (i.e. "one two three") |
| 938 ** nToken will be the number of tokens in the string. |
| 939 */ |
| 940 struct Fts3PhraseToken { |
| 941 char *z; /* Text of the token */ |
| 942 int n; /* Number of bytes in buffer z */ |
| 943 int isPrefix; /* True if token ends with a "*" character */ |
| 944 int bFirst; /* True if token must appear at position 0 */ |
| 945 |
| 946 /* Variables above this point are populated when the expression is |
| 947 ** parsed (by code in fts3_expr.c). Below this point the variables are |
| 948 ** used when evaluating the expression. */ |
| 949 Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ |
| 950 Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ |
| 951 }; |
| 952 |
| 953 struct Fts3Phrase { |
| 954 /* Cache of doclist for this phrase. */ |
| 955 Fts3Doclist doclist; |
| 956 int bIncr; /* True if doclist is loaded incrementally */ |
| 957 int iDoclistToken; |
| 958 |
| 959 /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an |
| 960 ** OR condition. */ |
| 961 char *pOrPoslist; |
| 962 i64 iOrDocid; |
| 963 |
| 964 /* Variables below this point are populated by fts3_expr.c when parsing |
| 965 ** a MATCH expression. Everything above is part of the evaluation phase. |
| 966 */ |
| 967 int nToken; /* Number of tokens in the phrase */ |
| 968 int iColumn; /* Index of column this phrase must match */ |
| 969 Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ |
| 970 }; |
| 971 |
| 972 /* |
| 973 ** A tree of these objects forms the RHS of a MATCH operator. |
| 974 ** |
| 975 ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist |
| 976 ** points to a malloced buffer, size nDoclist bytes, containing the results |
| 977 ** of this phrase query in FTS3 doclist format. As usual, the initial |
| 978 ** "Length" field found in doclists stored on disk is omitted from this |
| 979 ** buffer. |
| 980 ** |
| 981 ** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global |
| 982 ** matchinfo data. If it is not NULL, it points to an array of size nCol*3, |
| 983 ** where nCol is the number of columns in the queried FTS table. The array |
| 984 ** is populated as follows: |
| 985 ** |
| 986 ** aMI[iCol*3 + 0] = Undefined |
| 987 ** aMI[iCol*3 + 1] = Number of occurrences |
| 988 ** aMI[iCol*3 + 2] = Number of rows containing at least one instance |
| 989 ** |
| 990 ** The aMI array is allocated using sqlite3_malloc(). It should be freed |
| 991 ** when the expression node is. |
| 992 */ |
| 993 struct Fts3Expr { |
| 994 int eType; /* One of the FTSQUERY_XXX values defined below */ |
| 995 int nNear; /* Valid if eType==FTSQUERY_NEAR */ |
| 996 Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ |
| 997 Fts3Expr *pLeft; /* Left operand */ |
| 998 Fts3Expr *pRight; /* Right operand */ |
| 999 Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ |
| 1000 |
| 1001 /* The following are used by the fts3_eval.c module. */ |
| 1002 sqlite3_int64 iDocid; /* Current docid */ |
| 1003 u8 bEof; /* True this expression is at EOF already */ |
| 1004 u8 bStart; /* True if iDocid is valid */ |
| 1005 u8 bDeferred; /* True if this expression is entirely deferred */ |
| 1006 |
| 1007 /* The following are used by the fts3_snippet.c module. */ |
| 1008 int iPhrase; /* Index of this phrase in matchinfo() results */ |
| 1009 u32 *aMI; /* See above */ |
| 1010 }; |
| 1011 |
| 1012 /* |
| 1013 ** Candidate values for Fts3Query.eType. Note that the order of the first |
| 1014 ** four values is in order of precedence when parsing expressions. For |
| 1015 ** example, the following: |
| 1016 ** |
| 1017 ** "a OR b AND c NOT d NEAR e" |
| 1018 ** |
| 1019 ** is equivalent to: |
| 1020 ** |
| 1021 ** "a OR (b AND (c NOT (d NEAR e)))" |
| 1022 */ |
| 1023 #define FTSQUERY_NEAR 1 |
| 1024 #define FTSQUERY_NOT 2 |
| 1025 #define FTSQUERY_AND 3 |
| 1026 #define FTSQUERY_OR 4 |
| 1027 #define FTSQUERY_PHRASE 5 |
| 1028 |
| 1029 |
| 1030 /* fts3_write.c */ |
| 1031 SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sql
ite3_int64*); |
| 1032 SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *); |
| 1033 SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *); |
| 1034 SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *); |
| 1035 SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, |
| 1036 sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); |
| 1037 SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( |
| 1038 Fts3Table*,int,const char*,int,int,Fts3SegReader**); |
| 1039 SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *); |
| 1040 SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt
**); |
| 1041 SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*
, int*); |
| 1042 |
| 1043 SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); |
| 1044 SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_
stmt **); |
| 1045 |
| 1046 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 1047 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); |
| 1048 SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); |
| 1049 SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); |
| 1050 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); |
| 1051 SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, in
t *); |
| 1052 #else |
| 1053 # define sqlite3Fts3FreeDeferredTokens(x) |
| 1054 # define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK |
| 1055 # define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK |
| 1056 # define sqlite3Fts3FreeDeferredDoclists(x) |
| 1057 # define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK |
| 1058 #endif |
| 1059 |
| 1060 SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *); |
| 1061 SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *); |
| 1062 |
| 1063 /* Special values interpreted by sqlite3SegReaderCursor() */ |
| 1064 #define FTS3_SEGCURSOR_PENDING -1 |
| 1065 #define FTS3_SEGCURSOR_ALL -2 |
| 1066 |
| 1067 SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Ft
s3SegFilter*); |
| 1068 SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); |
| 1069 SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); |
| 1070 |
| 1071 SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, |
| 1072 int, int, int, const char *, int, int, int, Fts3MultiSegReader *); |
| 1073 |
| 1074 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ |
| 1075 #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 |
| 1076 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 |
| 1077 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 |
| 1078 #define FTS3_SEGMENT_PREFIX 0x00000008 |
| 1079 #define FTS3_SEGMENT_SCAN 0x00000010 |
| 1080 #define FTS3_SEGMENT_FIRST 0x00000020 |
| 1081 |
| 1082 /* Type passed as 4th argument to SegmentReaderIterate() */ |
| 1083 struct Fts3SegFilter { |
| 1084 const char *zTerm; |
| 1085 int nTerm; |
| 1086 int iCol; |
| 1087 int flags; |
| 1088 }; |
| 1089 |
| 1090 struct Fts3MultiSegReader { |
| 1091 /* Used internally by sqlite3Fts3SegReaderXXX() calls */ |
| 1092 Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ |
| 1093 int nSegment; /* Size of apSegment array */ |
| 1094 int nAdvance; /* How many seg-readers to advance */ |
| 1095 Fts3SegFilter *pFilter; /* Pointer to filter object */ |
| 1096 char *aBuffer; /* Buffer to merge doclists in */ |
| 1097 int nBuffer; /* Allocated size of aBuffer[] in bytes */ |
| 1098 |
| 1099 int iColFilter; /* If >=0, filter for this column */ |
| 1100 int bRestart; |
| 1101 |
| 1102 /* Used by fts3.c only. */ |
| 1103 int nCost; /* Cost of running iterator */ |
| 1104 int bLookup; /* True if a lookup of a single entry. */ |
| 1105 |
| 1106 /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ |
| 1107 char *zTerm; /* Pointer to term buffer */ |
| 1108 int nTerm; /* Size of zTerm in bytes */ |
| 1109 char *aDoclist; /* Pointer to doclist buffer */ |
| 1110 int nDoclist; /* Size of aDoclist[] in bytes */ |
| 1111 }; |
| 1112 |
| 1113 SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int); |
| 1114 |
| 1115 #define fts3GetVarint32(p, piVal) ( \ |
| 1116 (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ |
| 1117 ) |
| 1118 |
| 1119 /* fts3.c */ |
| 1120 SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char**,const char*,...); |
| 1121 SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64); |
| 1122 SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); |
| 1123 SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *); |
| 1124 SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64); |
| 1125 SQLITE_PRIVATE void sqlite3Fts3Dequote(char *); |
| 1126 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,i
nt*,u8*); |
| 1127 SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); |
| 1128 SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); |
| 1129 SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); |
| 1130 SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); |
| 1131 |
| 1132 /* fts3_tokenizer.c */ |
| 1133 SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); |
| 1134 SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *)
; |
| 1135 SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, |
| 1136 sqlite3_tokenizer **, char ** |
| 1137 ); |
| 1138 SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char); |
| 1139 |
| 1140 /* fts3_snippet.c */ |
| 1141 SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); |
| 1142 SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const ch
ar *, |
| 1143 const char *, const char *, int, int |
| 1144 ); |
| 1145 SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const
char *); |
| 1146 SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); |
| 1147 |
| 1148 /* fts3_expr.c */ |
| 1149 SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, |
| 1150 char **, int, int, int, const char *, int, Fts3Expr **, char ** |
| 1151 ); |
| 1152 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); |
| 1153 #ifdef SQLITE_TEST |
| 1154 SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); |
| 1155 SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db); |
| 1156 #endif |
| 1157 |
| 1158 SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char
*, int, |
| 1159 sqlite3_tokenizer_cursor ** |
| 1160 ); |
| 1161 |
| 1162 /* fts3_aux.c */ |
| 1163 SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db); |
| 1164 |
| 1165 SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); |
| 1166 |
| 1167 SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( |
| 1168 Fts3Table*, Fts3MultiSegReader*, int, const char*, int); |
| 1169 SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( |
| 1170 Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); |
| 1171 SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iC
ol, char **); |
| 1172 SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *)
; |
| 1173 SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); |
| 1174 |
| 1175 /* fts3_tokenize_vtab.c */ |
| 1176 SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); |
| 1177 |
| 1178 /* fts3_unicode2.c (functions generated by parsing unicode text files) */ |
| 1179 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 1180 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); |
| 1181 SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int); |
| 1182 SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); |
| 1183 #endif |
| 1184 |
| 1185 #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ |
| 1186 #endif /* _FTSINT_H */ |
| 1187 |
| 1188 /************** End of fts3Int.h *********************************************/ |
| 1189 /************** Continuing where we left off in fts3.c ***********************/ |
| 1190 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 1191 |
| 1192 #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) |
| 1193 # define SQLITE_CORE 1 |
| 1194 #endif |
| 1195 |
| 1196 /* #include <assert.h> */ |
| 1197 /* #include <stdlib.h> */ |
| 1198 /* #include <stddef.h> */ |
| 1199 /* #include <stdio.h> */ |
| 1200 /* #include <string.h> */ |
| 1201 /* #include <stdarg.h> */ |
| 1202 |
| 1203 /* #include "fts3.h" */ |
| 1204 #ifndef SQLITE_CORE |
| 1205 /* # include "sqlite3ext.h" */ |
| 1206 SQLITE_EXTENSION_INIT1 |
| 1207 #endif |
| 1208 |
| 1209 static int fts3EvalNext(Fts3Cursor *pCsr); |
| 1210 static int fts3EvalStart(Fts3Cursor *pCsr); |
| 1211 static int fts3TermSegReaderCursor( |
| 1212 Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); |
| 1213 |
| 1214 #ifndef SQLITE_AMALGAMATION |
| 1215 # if defined(SQLITE_DEBUG) |
| 1216 SQLITE_PRIVATE int sqlite3Fts3Always(int b) { assert( b ); return b; } |
| 1217 SQLITE_PRIVATE int sqlite3Fts3Never(int b) { assert( !b ); return b; } |
| 1218 # endif |
| 1219 #endif |
| 1220 |
| 1221 /* |
| 1222 ** Write a 64-bit variable-length integer to memory starting at p[0]. |
| 1223 ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. |
| 1224 ** The number of bytes written is returned. |
| 1225 */ |
| 1226 SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ |
| 1227 unsigned char *q = (unsigned char *) p; |
| 1228 sqlite_uint64 vu = v; |
| 1229 do{ |
| 1230 *q++ = (unsigned char) ((vu & 0x7f) | 0x80); |
| 1231 vu >>= 7; |
| 1232 }while( vu!=0 ); |
| 1233 q[-1] &= 0x7f; /* turn off high bit in final byte */ |
| 1234 assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); |
| 1235 return (int) (q - (unsigned char *)p); |
| 1236 } |
| 1237 |
| 1238 #define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ |
| 1239 v = (v & mask1) | ( (*ptr++) << shift ); \ |
| 1240 if( (v & mask2)==0 ){ var = v; return ret; } |
| 1241 #define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ |
| 1242 v = (*ptr++); \ |
| 1243 if( (v & mask2)==0 ){ var = v; return ret; } |
| 1244 |
| 1245 /* |
| 1246 ** Read a 64-bit variable-length integer from memory starting at p[0]. |
| 1247 ** Return the number of bytes read, or 0 on error. |
| 1248 ** The value is stored in *v. |
| 1249 */ |
| 1250 SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ |
| 1251 const char *pStart = p; |
| 1252 u32 a; |
| 1253 u64 b; |
| 1254 int shift; |
| 1255 |
| 1256 GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); |
| 1257 GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); |
| 1258 GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); |
| 1259 GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); |
| 1260 b = (a & 0x0FFFFFFF ); |
| 1261 |
| 1262 for(shift=28; shift<=63; shift+=7){ |
| 1263 u64 c = *p++; |
| 1264 b += (c&0x7F) << shift; |
| 1265 if( (c & 0x80)==0 ) break; |
| 1266 } |
| 1267 *v = b; |
| 1268 return (int)(p - pStart); |
| 1269 } |
| 1270 |
| 1271 /* |
| 1272 ** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a |
| 1273 ** 32-bit integer before it is returned. |
| 1274 */ |
| 1275 SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *p, int *pi){ |
| 1276 u32 a; |
| 1277 |
| 1278 #ifndef fts3GetVarint32 |
| 1279 GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1); |
| 1280 #else |
| 1281 a = (*p++); |
| 1282 assert( a & 0x80 ); |
| 1283 #endif |
| 1284 |
| 1285 GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2); |
| 1286 GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3); |
| 1287 GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); |
| 1288 a = (a & 0x0FFFFFFF ); |
| 1289 *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); |
| 1290 return 5; |
| 1291 } |
| 1292 |
| 1293 /* |
| 1294 ** Return the number of bytes required to encode v as a varint |
| 1295 */ |
| 1296 SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){ |
| 1297 int i = 0; |
| 1298 do{ |
| 1299 i++; |
| 1300 v >>= 7; |
| 1301 }while( v!=0 ); |
| 1302 return i; |
| 1303 } |
| 1304 |
| 1305 /* |
| 1306 ** Convert an SQL-style quoted string into a normal string by removing |
| 1307 ** the quote characters. The conversion is done in-place. If the |
| 1308 ** input does not begin with a quote character, then this routine |
| 1309 ** is a no-op. |
| 1310 ** |
| 1311 ** Examples: |
| 1312 ** |
| 1313 ** "abc" becomes abc |
| 1314 ** 'xyz' becomes xyz |
| 1315 ** [pqr] becomes pqr |
| 1316 ** `mno` becomes mno |
| 1317 ** |
| 1318 */ |
| 1319 SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){ |
| 1320 char quote; /* Quote character (if any ) */ |
| 1321 |
| 1322 quote = z[0]; |
| 1323 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ |
| 1324 int iIn = 1; /* Index of next byte to read from input */ |
| 1325 int iOut = 0; /* Index of next byte to write to output */ |
| 1326 |
| 1327 /* If the first byte was a '[', then the close-quote character is a ']' */ |
| 1328 if( quote=='[' ) quote = ']'; |
| 1329 |
| 1330 while( z[iIn] ){ |
| 1331 if( z[iIn]==quote ){ |
| 1332 if( z[iIn+1]!=quote ) break; |
| 1333 z[iOut++] = quote; |
| 1334 iIn += 2; |
| 1335 }else{ |
| 1336 z[iOut++] = z[iIn++]; |
| 1337 } |
| 1338 } |
| 1339 z[iOut] = '\0'; |
| 1340 } |
| 1341 } |
| 1342 |
| 1343 /* |
| 1344 ** Read a single varint from the doclist at *pp and advance *pp to point |
| 1345 ** to the first byte past the end of the varint. Add the value of the varint |
| 1346 ** to *pVal. |
| 1347 */ |
| 1348 static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ |
| 1349 sqlite3_int64 iVal; |
| 1350 *pp += sqlite3Fts3GetVarint(*pp, &iVal); |
| 1351 *pVal += iVal; |
| 1352 } |
| 1353 |
| 1354 /* |
| 1355 ** When this function is called, *pp points to the first byte following a |
| 1356 ** varint that is part of a doclist (or position-list, or any other list |
| 1357 ** of varints). This function moves *pp to point to the start of that varint, |
| 1358 ** and sets *pVal by the varint value. |
| 1359 ** |
| 1360 ** Argument pStart points to the first byte of the doclist that the |
| 1361 ** varint is part of. |
| 1362 */ |
| 1363 static void fts3GetReverseVarint( |
| 1364 char **pp, |
| 1365 char *pStart, |
| 1366 sqlite3_int64 *pVal |
| 1367 ){ |
| 1368 sqlite3_int64 iVal; |
| 1369 char *p; |
| 1370 |
| 1371 /* Pointer p now points at the first byte past the varint we are |
| 1372 ** interested in. So, unless the doclist is corrupt, the 0x80 bit is |
| 1373 ** clear on character p[-1]. */ |
| 1374 for(p = (*pp)-2; p>=pStart && *p&0x80; p--); |
| 1375 p++; |
| 1376 *pp = p; |
| 1377 |
| 1378 sqlite3Fts3GetVarint(p, &iVal); |
| 1379 *pVal = iVal; |
| 1380 } |
| 1381 |
| 1382 /* |
| 1383 ** The xDisconnect() virtual table method. |
| 1384 */ |
| 1385 static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ |
| 1386 Fts3Table *p = (Fts3Table *)pVtab; |
| 1387 int i; |
| 1388 |
| 1389 assert( p->nPendingData==0 ); |
| 1390 assert( p->pSegments==0 ); |
| 1391 |
| 1392 /* Free any prepared statements held */ |
| 1393 for(i=0; i<SizeofArray(p->aStmt); i++){ |
| 1394 sqlite3_finalize(p->aStmt[i]); |
| 1395 } |
| 1396 sqlite3_free(p->zSegmentsTbl); |
| 1397 sqlite3_free(p->zReadExprlist); |
| 1398 sqlite3_free(p->zWriteExprlist); |
| 1399 sqlite3_free(p->zContentTbl); |
| 1400 sqlite3_free(p->zLanguageid); |
| 1401 |
| 1402 /* Invoke the tokenizer destructor to free the tokenizer. */ |
| 1403 p->pTokenizer->pModule->xDestroy(p->pTokenizer); |
| 1404 |
| 1405 sqlite3_free(p); |
| 1406 return SQLITE_OK; |
| 1407 } |
| 1408 |
| 1409 /* |
| 1410 ** Write an error message into *pzErr |
| 1411 */ |
| 1412 SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){ |
| 1413 va_list ap; |
| 1414 sqlite3_free(*pzErr); |
| 1415 va_start(ap, zFormat); |
| 1416 *pzErr = sqlite3_vmprintf(zFormat, ap); |
| 1417 va_end(ap); |
| 1418 } |
| 1419 |
| 1420 /* |
| 1421 ** Construct one or more SQL statements from the format string given |
| 1422 ** and then evaluate those statements. The success code is written |
| 1423 ** into *pRc. |
| 1424 ** |
| 1425 ** If *pRc is initially non-zero then this routine is a no-op. |
| 1426 */ |
| 1427 static void fts3DbExec( |
| 1428 int *pRc, /* Success code */ |
| 1429 sqlite3 *db, /* Database in which to run SQL */ |
| 1430 const char *zFormat, /* Format string for SQL */ |
| 1431 ... /* Arguments to the format string */ |
| 1432 ){ |
| 1433 va_list ap; |
| 1434 char *zSql; |
| 1435 if( *pRc ) return; |
| 1436 va_start(ap, zFormat); |
| 1437 zSql = sqlite3_vmprintf(zFormat, ap); |
| 1438 va_end(ap); |
| 1439 if( zSql==0 ){ |
| 1440 *pRc = SQLITE_NOMEM; |
| 1441 }else{ |
| 1442 *pRc = sqlite3_exec(db, zSql, 0, 0, 0); |
| 1443 sqlite3_free(zSql); |
| 1444 } |
| 1445 } |
| 1446 |
| 1447 /* |
| 1448 ** The xDestroy() virtual table method. |
| 1449 */ |
| 1450 static int fts3DestroyMethod(sqlite3_vtab *pVtab){ |
| 1451 Fts3Table *p = (Fts3Table *)pVtab; |
| 1452 int rc = SQLITE_OK; /* Return code */ |
| 1453 const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ |
| 1454 sqlite3 *db = p->db; /* Database handle */ |
| 1455 |
| 1456 /* Drop the shadow tables */ |
| 1457 if( p->zContentTbl==0 ){ |
| 1458 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); |
| 1459 } |
| 1460 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); |
| 1461 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); |
| 1462 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); |
| 1463 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); |
| 1464 |
| 1465 /* If everything has worked, invoke fts3DisconnectMethod() to free the |
| 1466 ** memory associated with the Fts3Table structure and return SQLITE_OK. |
| 1467 ** Otherwise, return an SQLite error code. |
| 1468 */ |
| 1469 return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); |
| 1470 } |
| 1471 |
| 1472 |
| 1473 /* |
| 1474 ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table |
| 1475 ** passed as the first argument. This is done as part of the xConnect() |
| 1476 ** and xCreate() methods. |
| 1477 ** |
| 1478 ** If *pRc is non-zero when this function is called, it is a no-op. |
| 1479 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc |
| 1480 ** before returning. |
| 1481 */ |
| 1482 static void fts3DeclareVtab(int *pRc, Fts3Table *p){ |
| 1483 if( *pRc==SQLITE_OK ){ |
| 1484 int i; /* Iterator variable */ |
| 1485 int rc; /* Return code */ |
| 1486 char *zSql; /* SQL statement passed to declare_vtab() */ |
| 1487 char *zCols; /* List of user defined columns */ |
| 1488 const char *zLanguageid; |
| 1489 |
| 1490 zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); |
| 1491 sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); |
| 1492 |
| 1493 /* Create a list of user columns for the virtual table */ |
| 1494 zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); |
| 1495 for(i=1; zCols && i<p->nColumn; i++){ |
| 1496 zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); |
| 1497 } |
| 1498 |
| 1499 /* Create the whole "CREATE TABLE" statement to pass to SQLite */ |
| 1500 zSql = sqlite3_mprintf( |
| 1501 "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", |
| 1502 zCols, p->zName, zLanguageid |
| 1503 ); |
| 1504 if( !zCols || !zSql ){ |
| 1505 rc = SQLITE_NOMEM; |
| 1506 }else{ |
| 1507 rc = sqlite3_declare_vtab(p->db, zSql); |
| 1508 } |
| 1509 |
| 1510 sqlite3_free(zSql); |
| 1511 sqlite3_free(zCols); |
| 1512 *pRc = rc; |
| 1513 } |
| 1514 } |
| 1515 |
| 1516 /* |
| 1517 ** Create the %_stat table if it does not already exist. |
| 1518 */ |
| 1519 SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ |
| 1520 fts3DbExec(pRc, p->db, |
| 1521 "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" |
| 1522 "(id INTEGER PRIMARY KEY, value BLOB);", |
| 1523 p->zDb, p->zName |
| 1524 ); |
| 1525 if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; |
| 1526 } |
| 1527 |
| 1528 /* |
| 1529 ** Create the backing store tables (%_content, %_segments and %_segdir) |
| 1530 ** required by the FTS3 table passed as the only argument. This is done |
| 1531 ** as part of the vtab xCreate() method. |
| 1532 ** |
| 1533 ** If the p->bHasDocsize boolean is true (indicating that this is an |
| 1534 ** FTS4 table, not an FTS3 table) then also create the %_docsize and |
| 1535 ** %_stat tables required by FTS4. |
| 1536 */ |
| 1537 static int fts3CreateTables(Fts3Table *p){ |
| 1538 int rc = SQLITE_OK; /* Return code */ |
| 1539 int i; /* Iterator variable */ |
| 1540 sqlite3 *db = p->db; /* The database connection */ |
| 1541 |
| 1542 if( p->zContentTbl==0 ){ |
| 1543 const char *zLanguageid = p->zLanguageid; |
| 1544 char *zContentCols; /* Columns of %_content table */ |
| 1545 |
| 1546 /* Create a list of user columns for the content table */ |
| 1547 zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); |
| 1548 for(i=0; zContentCols && i<p->nColumn; i++){ |
| 1549 char *z = p->azColumn[i]; |
| 1550 zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); |
| 1551 } |
| 1552 if( zLanguageid && zContentCols ){ |
| 1553 zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); |
| 1554 } |
| 1555 if( zContentCols==0 ) rc = SQLITE_NOMEM; |
| 1556 |
| 1557 /* Create the content table */ |
| 1558 fts3DbExec(&rc, db, |
| 1559 "CREATE TABLE %Q.'%q_content'(%s)", |
| 1560 p->zDb, p->zName, zContentCols |
| 1561 ); |
| 1562 sqlite3_free(zContentCols); |
| 1563 } |
| 1564 |
| 1565 /* Create other tables */ |
| 1566 fts3DbExec(&rc, db, |
| 1567 "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", |
| 1568 p->zDb, p->zName |
| 1569 ); |
| 1570 fts3DbExec(&rc, db, |
| 1571 "CREATE TABLE %Q.'%q_segdir'(" |
| 1572 "level INTEGER," |
| 1573 "idx INTEGER," |
| 1574 "start_block INTEGER," |
| 1575 "leaves_end_block INTEGER," |
| 1576 "end_block INTEGER," |
| 1577 "root BLOB," |
| 1578 "PRIMARY KEY(level, idx)" |
| 1579 ");", |
| 1580 p->zDb, p->zName |
| 1581 ); |
| 1582 if( p->bHasDocsize ){ |
| 1583 fts3DbExec(&rc, db, |
| 1584 "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", |
| 1585 p->zDb, p->zName |
| 1586 ); |
| 1587 } |
| 1588 assert( p->bHasStat==p->bFts4 ); |
| 1589 if( p->bHasStat ){ |
| 1590 sqlite3Fts3CreateStatTable(&rc, p); |
| 1591 } |
| 1592 return rc; |
| 1593 } |
| 1594 |
| 1595 /* |
| 1596 ** Store the current database page-size in bytes in p->nPgsz. |
| 1597 ** |
| 1598 ** If *pRc is non-zero when this function is called, it is a no-op. |
| 1599 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc |
| 1600 ** before returning. |
| 1601 */ |
| 1602 static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ |
| 1603 if( *pRc==SQLITE_OK ){ |
| 1604 int rc; /* Return code */ |
| 1605 char *zSql; /* SQL text "PRAGMA %Q.page_size" */ |
| 1606 sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ |
| 1607 |
| 1608 zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); |
| 1609 if( !zSql ){ |
| 1610 rc = SQLITE_NOMEM; |
| 1611 }else{ |
| 1612 rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); |
| 1613 if( rc==SQLITE_OK ){ |
| 1614 sqlite3_step(pStmt); |
| 1615 p->nPgsz = sqlite3_column_int(pStmt, 0); |
| 1616 rc = sqlite3_finalize(pStmt); |
| 1617 }else if( rc==SQLITE_AUTH ){ |
| 1618 p->nPgsz = 1024; |
| 1619 rc = SQLITE_OK; |
| 1620 } |
| 1621 } |
| 1622 assert( p->nPgsz>0 || rc!=SQLITE_OK ); |
| 1623 sqlite3_free(zSql); |
| 1624 *pRc = rc; |
| 1625 } |
| 1626 } |
| 1627 |
| 1628 /* |
| 1629 ** "Special" FTS4 arguments are column specifications of the following form: |
| 1630 ** |
| 1631 ** <key> = <value> |
| 1632 ** |
| 1633 ** There may not be whitespace surrounding the "=" character. The <value> |
| 1634 ** term may be quoted, but the <key> may not. |
| 1635 */ |
| 1636 static int fts3IsSpecialColumn( |
| 1637 const char *z, |
| 1638 int *pnKey, |
| 1639 char **pzValue |
| 1640 ){ |
| 1641 char *zValue; |
| 1642 const char *zCsr = z; |
| 1643 |
| 1644 while( *zCsr!='=' ){ |
| 1645 if( *zCsr=='\0' ) return 0; |
| 1646 zCsr++; |
| 1647 } |
| 1648 |
| 1649 *pnKey = (int)(zCsr-z); |
| 1650 zValue = sqlite3_mprintf("%s", &zCsr[1]); |
| 1651 if( zValue ){ |
| 1652 sqlite3Fts3Dequote(zValue); |
| 1653 } |
| 1654 *pzValue = zValue; |
| 1655 return 1; |
| 1656 } |
| 1657 |
| 1658 /* |
| 1659 ** Append the output of a printf() style formatting to an existing string. |
| 1660 */ |
| 1661 static void fts3Appendf( |
| 1662 int *pRc, /* IN/OUT: Error code */ |
| 1663 char **pz, /* IN/OUT: Pointer to string buffer */ |
| 1664 const char *zFormat, /* Printf format string to append */ |
| 1665 ... /* Arguments for printf format string */ |
| 1666 ){ |
| 1667 if( *pRc==SQLITE_OK ){ |
| 1668 va_list ap; |
| 1669 char *z; |
| 1670 va_start(ap, zFormat); |
| 1671 z = sqlite3_vmprintf(zFormat, ap); |
| 1672 va_end(ap); |
| 1673 if( z && *pz ){ |
| 1674 char *z2 = sqlite3_mprintf("%s%s", *pz, z); |
| 1675 sqlite3_free(z); |
| 1676 z = z2; |
| 1677 } |
| 1678 if( z==0 ) *pRc = SQLITE_NOMEM; |
| 1679 sqlite3_free(*pz); |
| 1680 *pz = z; |
| 1681 } |
| 1682 } |
| 1683 |
| 1684 /* |
| 1685 ** Return a copy of input string zInput enclosed in double-quotes (") and |
| 1686 ** with all double quote characters escaped. For example: |
| 1687 ** |
| 1688 ** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\"" |
| 1689 ** |
| 1690 ** The pointer returned points to memory obtained from sqlite3_malloc(). It |
| 1691 ** is the callers responsibility to call sqlite3_free() to release this |
| 1692 ** memory. |
| 1693 */ |
| 1694 static char *fts3QuoteId(char const *zInput){ |
| 1695 int nRet; |
| 1696 char *zRet; |
| 1697 nRet = 2 + (int)strlen(zInput)*2 + 1; |
| 1698 zRet = sqlite3_malloc(nRet); |
| 1699 if( zRet ){ |
| 1700 int i; |
| 1701 char *z = zRet; |
| 1702 *(z++) = '"'; |
| 1703 for(i=0; zInput[i]; i++){ |
| 1704 if( zInput[i]=='"' ) *(z++) = '"'; |
| 1705 *(z++) = zInput[i]; |
| 1706 } |
| 1707 *(z++) = '"'; |
| 1708 *(z++) = '\0'; |
| 1709 } |
| 1710 return zRet; |
| 1711 } |
| 1712 |
| 1713 /* |
| 1714 ** Return a list of comma separated SQL expressions and a FROM clause that |
| 1715 ** could be used in a SELECT statement such as the following: |
| 1716 ** |
| 1717 ** SELECT <list of expressions> FROM %_content AS x ... |
| 1718 ** |
| 1719 ** to return the docid, followed by each column of text data in order |
| 1720 ** from left to write. If parameter zFunc is not NULL, then instead of |
| 1721 ** being returned directly each column of text data is passed to an SQL |
| 1722 ** function named zFunc first. For example, if zFunc is "unzip" and the |
| 1723 ** table has the three user-defined columns "a", "b", and "c", the following |
| 1724 ** string is returned: |
| 1725 ** |
| 1726 ** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" |
| 1727 ** |
| 1728 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It |
| 1729 ** is the responsibility of the caller to eventually free it. |
| 1730 ** |
| 1731 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and |
| 1732 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered |
| 1733 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If |
| 1734 ** no error occurs, *pRc is left unmodified. |
| 1735 */ |
| 1736 static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ |
| 1737 char *zRet = 0; |
| 1738 char *zFree = 0; |
| 1739 char *zFunction; |
| 1740 int i; |
| 1741 |
| 1742 if( p->zContentTbl==0 ){ |
| 1743 if( !zFunc ){ |
| 1744 zFunction = ""; |
| 1745 }else{ |
| 1746 zFree = zFunction = fts3QuoteId(zFunc); |
| 1747 } |
| 1748 fts3Appendf(pRc, &zRet, "docid"); |
| 1749 for(i=0; i<p->nColumn; i++){ |
| 1750 fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); |
| 1751 } |
| 1752 if( p->zLanguageid ){ |
| 1753 fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); |
| 1754 } |
| 1755 sqlite3_free(zFree); |
| 1756 }else{ |
| 1757 fts3Appendf(pRc, &zRet, "rowid"); |
| 1758 for(i=0; i<p->nColumn; i++){ |
| 1759 fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); |
| 1760 } |
| 1761 if( p->zLanguageid ){ |
| 1762 fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); |
| 1763 } |
| 1764 } |
| 1765 fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", |
| 1766 p->zDb, |
| 1767 (p->zContentTbl ? p->zContentTbl : p->zName), |
| 1768 (p->zContentTbl ? "" : "_content") |
| 1769 ); |
| 1770 return zRet; |
| 1771 } |
| 1772 |
| 1773 /* |
| 1774 ** Return a list of N comma separated question marks, where N is the number |
| 1775 ** of columns in the %_content table (one for the docid plus one for each |
| 1776 ** user-defined text column). |
| 1777 ** |
| 1778 ** If argument zFunc is not NULL, then all but the first question mark |
| 1779 ** is preceded by zFunc and an open bracket, and followed by a closed |
| 1780 ** bracket. For example, if zFunc is "zip" and the FTS3 table has three |
| 1781 ** user-defined text columns, the following string is returned: |
| 1782 ** |
| 1783 ** "?, zip(?), zip(?), zip(?)" |
| 1784 ** |
| 1785 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It |
| 1786 ** is the responsibility of the caller to eventually free it. |
| 1787 ** |
| 1788 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and |
| 1789 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered |
| 1790 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If |
| 1791 ** no error occurs, *pRc is left unmodified. |
| 1792 */ |
| 1793 static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ |
| 1794 char *zRet = 0; |
| 1795 char *zFree = 0; |
| 1796 char *zFunction; |
| 1797 int i; |
| 1798 |
| 1799 if( !zFunc ){ |
| 1800 zFunction = ""; |
| 1801 }else{ |
| 1802 zFree = zFunction = fts3QuoteId(zFunc); |
| 1803 } |
| 1804 fts3Appendf(pRc, &zRet, "?"); |
| 1805 for(i=0; i<p->nColumn; i++){ |
| 1806 fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); |
| 1807 } |
| 1808 if( p->zLanguageid ){ |
| 1809 fts3Appendf(pRc, &zRet, ", ?"); |
| 1810 } |
| 1811 sqlite3_free(zFree); |
| 1812 return zRet; |
| 1813 } |
| 1814 |
| 1815 /* |
| 1816 ** This function interprets the string at (*pp) as a non-negative integer |
| 1817 ** value. It reads the integer and sets *pnOut to the value read, then |
| 1818 ** sets *pp to point to the byte immediately following the last byte of |
| 1819 ** the integer value. |
| 1820 ** |
| 1821 ** Only decimal digits ('0'..'9') may be part of an integer value. |
| 1822 ** |
| 1823 ** If *pp does not being with a decimal digit SQLITE_ERROR is returned and |
| 1824 ** the output value undefined. Otherwise SQLITE_OK is returned. |
| 1825 ** |
| 1826 ** This function is used when parsing the "prefix=" FTS4 parameter. |
| 1827 */ |
| 1828 static int fts3GobbleInt(const char **pp, int *pnOut){ |
| 1829 const int MAX_NPREFIX = 10000000; |
| 1830 const char *p; /* Iterator pointer */ |
| 1831 int nInt = 0; /* Output value */ |
| 1832 |
| 1833 for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ |
| 1834 nInt = nInt * 10 + (p[0] - '0'); |
| 1835 if( nInt>MAX_NPREFIX ){ |
| 1836 nInt = 0; |
| 1837 break; |
| 1838 } |
| 1839 } |
| 1840 if( p==*pp ) return SQLITE_ERROR; |
| 1841 *pnOut = nInt; |
| 1842 *pp = p; |
| 1843 return SQLITE_OK; |
| 1844 } |
| 1845 |
| 1846 /* |
| 1847 ** This function is called to allocate an array of Fts3Index structures |
| 1848 ** representing the indexes maintained by the current FTS table. FTS tables |
| 1849 ** always maintain the main "terms" index, but may also maintain one or |
| 1850 ** more "prefix" indexes, depending on the value of the "prefix=" parameter |
| 1851 ** (if any) specified as part of the CREATE VIRTUAL TABLE statement. |
| 1852 ** |
| 1853 ** Argument zParam is passed the value of the "prefix=" option if one was |
| 1854 ** specified, or NULL otherwise. |
| 1855 ** |
| 1856 ** If no error occurs, SQLITE_OK is returned and *apIndex set to point to |
| 1857 ** the allocated array. *pnIndex is set to the number of elements in the |
| 1858 ** array. If an error does occur, an SQLite error code is returned. |
| 1859 ** |
| 1860 ** Regardless of whether or not an error is returned, it is the responsibility |
| 1861 ** of the caller to call sqlite3_free() on the output array to free it. |
| 1862 */ |
| 1863 static int fts3PrefixParameter( |
| 1864 const char *zParam, /* ABC in prefix=ABC parameter to parse */ |
| 1865 int *pnIndex, /* OUT: size of *apIndex[] array */ |
| 1866 struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ |
| 1867 ){ |
| 1868 struct Fts3Index *aIndex; /* Allocated array */ |
| 1869 int nIndex = 1; /* Number of entries in array */ |
| 1870 |
| 1871 if( zParam && zParam[0] ){ |
| 1872 const char *p; |
| 1873 nIndex++; |
| 1874 for(p=zParam; *p; p++){ |
| 1875 if( *p==',' ) nIndex++; |
| 1876 } |
| 1877 } |
| 1878 |
| 1879 aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); |
| 1880 *apIndex = aIndex; |
| 1881 if( !aIndex ){ |
| 1882 return SQLITE_NOMEM; |
| 1883 } |
| 1884 |
| 1885 memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); |
| 1886 if( zParam ){ |
| 1887 const char *p = zParam; |
| 1888 int i; |
| 1889 for(i=1; i<nIndex; i++){ |
| 1890 int nPrefix = 0; |
| 1891 if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR; |
| 1892 assert( nPrefix>=0 ); |
| 1893 if( nPrefix==0 ){ |
| 1894 nIndex--; |
| 1895 i--; |
| 1896 }else{ |
| 1897 aIndex[i].nPrefix = nPrefix; |
| 1898 } |
| 1899 p++; |
| 1900 } |
| 1901 } |
| 1902 |
| 1903 *pnIndex = nIndex; |
| 1904 return SQLITE_OK; |
| 1905 } |
| 1906 |
| 1907 /* |
| 1908 ** This function is called when initializing an FTS4 table that uses the |
| 1909 ** content=xxx option. It determines the number of and names of the columns |
| 1910 ** of the new FTS4 table. |
| 1911 ** |
| 1912 ** The third argument passed to this function is the value passed to the |
| 1913 ** config=xxx option (i.e. "xxx"). This function queries the database for |
| 1914 ** a table of that name. If found, the output variables are populated |
| 1915 ** as follows: |
| 1916 ** |
| 1917 ** *pnCol: Set to the number of columns table xxx has, |
| 1918 ** |
| 1919 ** *pnStr: Set to the total amount of space required to store a copy |
| 1920 ** of each columns name, including the nul-terminator. |
| 1921 ** |
| 1922 ** *pazCol: Set to point to an array of *pnCol strings. Each string is |
| 1923 ** the name of the corresponding column in table xxx. The array |
| 1924 ** and its contents are allocated using a single allocation. It |
| 1925 ** is the responsibility of the caller to free this allocation |
| 1926 ** by eventually passing the *pazCol value to sqlite3_free(). |
| 1927 ** |
| 1928 ** If the table cannot be found, an error code is returned and the output |
| 1929 ** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is |
| 1930 ** returned (and the output variables are undefined). |
| 1931 */ |
| 1932 static int fts3ContentColumns( |
| 1933 sqlite3 *db, /* Database handle */ |
| 1934 const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ |
| 1935 const char *zTbl, /* Name of content table */ |
| 1936 const char ***pazCol, /* OUT: Malloc'd array of column names */ |
| 1937 int *pnCol, /* OUT: Size of array *pazCol */ |
| 1938 int *pnStr, /* OUT: Bytes of string content */ |
| 1939 char **pzErr /* OUT: error message */ |
| 1940 ){ |
| 1941 int rc = SQLITE_OK; /* Return code */ |
| 1942 char *zSql; /* "SELECT *" statement on zTbl */ |
| 1943 sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ |
| 1944 |
| 1945 zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); |
| 1946 if( !zSql ){ |
| 1947 rc = SQLITE_NOMEM; |
| 1948 }else{ |
| 1949 rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); |
| 1950 if( rc!=SQLITE_OK ){ |
| 1951 sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db)); |
| 1952 } |
| 1953 } |
| 1954 sqlite3_free(zSql); |
| 1955 |
| 1956 if( rc==SQLITE_OK ){ |
| 1957 const char **azCol; /* Output array */ |
| 1958 int nStr = 0; /* Size of all column names (incl. 0x00) */ |
| 1959 int nCol; /* Number of table columns */ |
| 1960 int i; /* Used to iterate through columns */ |
| 1961 |
| 1962 /* Loop through the returned columns. Set nStr to the number of bytes of |
| 1963 ** space required to store a copy of each column name, including the |
| 1964 ** nul-terminator byte. */ |
| 1965 nCol = sqlite3_column_count(pStmt); |
| 1966 for(i=0; i<nCol; i++){ |
| 1967 const char *zCol = sqlite3_column_name(pStmt, i); |
| 1968 nStr += (int)strlen(zCol) + 1; |
| 1969 } |
| 1970 |
| 1971 /* Allocate and populate the array to return. */ |
| 1972 azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr); |
| 1973 if( azCol==0 ){ |
| 1974 rc = SQLITE_NOMEM; |
| 1975 }else{ |
| 1976 char *p = (char *)&azCol[nCol]; |
| 1977 for(i=0; i<nCol; i++){ |
| 1978 const char *zCol = sqlite3_column_name(pStmt, i); |
| 1979 int n = (int)strlen(zCol)+1; |
| 1980 memcpy(p, zCol, n); |
| 1981 azCol[i] = p; |
| 1982 p += n; |
| 1983 } |
| 1984 } |
| 1985 sqlite3_finalize(pStmt); |
| 1986 |
| 1987 /* Set the output variables. */ |
| 1988 *pnCol = nCol; |
| 1989 *pnStr = nStr; |
| 1990 *pazCol = azCol; |
| 1991 } |
| 1992 |
| 1993 return rc; |
| 1994 } |
| 1995 |
| 1996 /* |
| 1997 ** This function is the implementation of both the xConnect and xCreate |
| 1998 ** methods of the FTS3 virtual table. |
| 1999 ** |
| 2000 ** The argv[] array contains the following: |
| 2001 ** |
| 2002 ** argv[0] -> module name ("fts3" or "fts4") |
| 2003 ** argv[1] -> database name |
| 2004 ** argv[2] -> table name |
| 2005 ** argv[...] -> "column name" and other module argument fields. |
| 2006 */ |
| 2007 static int fts3InitVtab( |
| 2008 int isCreate, /* True for xCreate, false for xConnect */ |
| 2009 sqlite3 *db, /* The SQLite database connection */ |
| 2010 void *pAux, /* Hash table containing tokenizers */ |
| 2011 int argc, /* Number of elements in argv array */ |
| 2012 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2013 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
| 2014 char **pzErr /* Write any error message here */ |
| 2015 ){ |
| 2016 Fts3Hash *pHash = (Fts3Hash *)pAux; |
| 2017 Fts3Table *p = 0; /* Pointer to allocated vtab */ |
| 2018 int rc = SQLITE_OK; /* Return code */ |
| 2019 int i; /* Iterator variable */ |
| 2020 int nByte; /* Size of allocation used for *p */ |
| 2021 int iCol; /* Column index */ |
| 2022 int nString = 0; /* Bytes required to hold all column names */ |
| 2023 int nCol = 0; /* Number of columns in the FTS table */ |
| 2024 char *zCsr; /* Space for holding column names */ |
| 2025 int nDb; /* Bytes required to hold database name */ |
| 2026 int nName; /* Bytes required to hold table name */ |
| 2027 int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ |
| 2028 const char **aCol; /* Array of column names */ |
| 2029 sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ |
| 2030 |
| 2031 int nIndex = 0; /* Size of aIndex[] array */ |
| 2032 struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ |
| 2033 |
| 2034 /* The results of parsing supported FTS4 key=value options: */ |
| 2035 int bNoDocsize = 0; /* True to omit %_docsize table */ |
| 2036 int bDescIdx = 0; /* True to store descending indexes */ |
| 2037 char *zPrefix = 0; /* Prefix parameter value (or NULL) */ |
| 2038 char *zCompress = 0; /* compress=? parameter (or NULL) */ |
| 2039 char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ |
| 2040 char *zContent = 0; /* content=? parameter (or NULL) */ |
| 2041 char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ |
| 2042 char **azNotindexed = 0; /* The set of notindexed= columns */ |
| 2043 int nNotindexed = 0; /* Size of azNotindexed[] array */ |
| 2044 |
| 2045 assert( strlen(argv[0])==4 ); |
| 2046 assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) |
| 2047 || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) |
| 2048 ); |
| 2049 |
| 2050 nDb = (int)strlen(argv[1]) + 1; |
| 2051 nName = (int)strlen(argv[2]) + 1; |
| 2052 |
| 2053 nByte = sizeof(const char *) * (argc-2); |
| 2054 aCol = (const char **)sqlite3_malloc(nByte); |
| 2055 if( aCol ){ |
| 2056 memset((void*)aCol, 0, nByte); |
| 2057 azNotindexed = (char **)sqlite3_malloc(nByte); |
| 2058 } |
| 2059 if( azNotindexed ){ |
| 2060 memset(azNotindexed, 0, nByte); |
| 2061 } |
| 2062 if( !aCol || !azNotindexed ){ |
| 2063 rc = SQLITE_NOMEM; |
| 2064 goto fts3_init_out; |
| 2065 } |
| 2066 |
| 2067 /* Loop through all of the arguments passed by the user to the FTS3/4 |
| 2068 ** module (i.e. all the column names and special arguments). This loop |
| 2069 ** does the following: |
| 2070 ** |
| 2071 ** + Figures out the number of columns the FTSX table will have, and |
| 2072 ** the number of bytes of space that must be allocated to store copies |
| 2073 ** of the column names. |
| 2074 ** |
| 2075 ** + If there is a tokenizer specification included in the arguments, |
| 2076 ** initializes the tokenizer pTokenizer. |
| 2077 */ |
| 2078 for(i=3; rc==SQLITE_OK && i<argc; i++){ |
| 2079 char const *z = argv[i]; |
| 2080 int nKey; |
| 2081 char *zVal; |
| 2082 |
| 2083 /* Check if this is a tokenizer specification */ |
| 2084 if( !pTokenizer |
| 2085 && strlen(z)>8 |
| 2086 && 0==sqlite3_strnicmp(z, "tokenize", 8) |
| 2087 && 0==sqlite3Fts3IsIdChar(z[8]) |
| 2088 ){ |
| 2089 rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); |
| 2090 } |
| 2091 |
| 2092 /* Check if it is an FTS4 special argument. */ |
| 2093 else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ |
| 2094 struct Fts4Option { |
| 2095 const char *zOpt; |
| 2096 int nOpt; |
| 2097 } aFts4Opt[] = { |
| 2098 { "matchinfo", 9 }, /* 0 -> MATCHINFO */ |
| 2099 { "prefix", 6 }, /* 1 -> PREFIX */ |
| 2100 { "compress", 8 }, /* 2 -> COMPRESS */ |
| 2101 { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ |
| 2102 { "order", 5 }, /* 4 -> ORDER */ |
| 2103 { "content", 7 }, /* 5 -> CONTENT */ |
| 2104 { "languageid", 10 }, /* 6 -> LANGUAGEID */ |
| 2105 { "notindexed", 10 } /* 7 -> NOTINDEXED */ |
| 2106 }; |
| 2107 |
| 2108 int iOpt; |
| 2109 if( !zVal ){ |
| 2110 rc = SQLITE_NOMEM; |
| 2111 }else{ |
| 2112 for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){ |
| 2113 struct Fts4Option *pOp = &aFts4Opt[iOpt]; |
| 2114 if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ |
| 2115 break; |
| 2116 } |
| 2117 } |
| 2118 if( iOpt==SizeofArray(aFts4Opt) ){ |
| 2119 sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z); |
| 2120 rc = SQLITE_ERROR; |
| 2121 }else{ |
| 2122 switch( iOpt ){ |
| 2123 case 0: /* MATCHINFO */ |
| 2124 if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ |
| 2125 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal); |
| 2126 rc = SQLITE_ERROR; |
| 2127 } |
| 2128 bNoDocsize = 1; |
| 2129 break; |
| 2130 |
| 2131 case 1: /* PREFIX */ |
| 2132 sqlite3_free(zPrefix); |
| 2133 zPrefix = zVal; |
| 2134 zVal = 0; |
| 2135 break; |
| 2136 |
| 2137 case 2: /* COMPRESS */ |
| 2138 sqlite3_free(zCompress); |
| 2139 zCompress = zVal; |
| 2140 zVal = 0; |
| 2141 break; |
| 2142 |
| 2143 case 3: /* UNCOMPRESS */ |
| 2144 sqlite3_free(zUncompress); |
| 2145 zUncompress = zVal; |
| 2146 zVal = 0; |
| 2147 break; |
| 2148 |
| 2149 case 4: /* ORDER */ |
| 2150 if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) |
| 2151 && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) |
| 2152 ){ |
| 2153 sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal); |
| 2154 rc = SQLITE_ERROR; |
| 2155 } |
| 2156 bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); |
| 2157 break; |
| 2158 |
| 2159 case 5: /* CONTENT */ |
| 2160 sqlite3_free(zContent); |
| 2161 zContent = zVal; |
| 2162 zVal = 0; |
| 2163 break; |
| 2164 |
| 2165 case 6: /* LANGUAGEID */ |
| 2166 assert( iOpt==6 ); |
| 2167 sqlite3_free(zLanguageid); |
| 2168 zLanguageid = zVal; |
| 2169 zVal = 0; |
| 2170 break; |
| 2171 |
| 2172 case 7: /* NOTINDEXED */ |
| 2173 azNotindexed[nNotindexed++] = zVal; |
| 2174 zVal = 0; |
| 2175 break; |
| 2176 } |
| 2177 } |
| 2178 sqlite3_free(zVal); |
| 2179 } |
| 2180 } |
| 2181 |
| 2182 /* Otherwise, the argument is a column name. */ |
| 2183 else { |
| 2184 nString += (int)(strlen(z) + 1); |
| 2185 aCol[nCol++] = z; |
| 2186 } |
| 2187 } |
| 2188 |
| 2189 /* If a content=xxx option was specified, the following: |
| 2190 ** |
| 2191 ** 1. Ignore any compress= and uncompress= options. |
| 2192 ** |
| 2193 ** 2. If no column names were specified as part of the CREATE VIRTUAL |
| 2194 ** TABLE statement, use all columns from the content table. |
| 2195 */ |
| 2196 if( rc==SQLITE_OK && zContent ){ |
| 2197 sqlite3_free(zCompress); |
| 2198 sqlite3_free(zUncompress); |
| 2199 zCompress = 0; |
| 2200 zUncompress = 0; |
| 2201 if( nCol==0 ){ |
| 2202 sqlite3_free((void*)aCol); |
| 2203 aCol = 0; |
| 2204 rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr); |
| 2205 |
| 2206 /* If a languageid= option was specified, remove the language id |
| 2207 ** column from the aCol[] array. */ |
| 2208 if( rc==SQLITE_OK && zLanguageid ){ |
| 2209 int j; |
| 2210 for(j=0; j<nCol; j++){ |
| 2211 if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){ |
| 2212 int k; |
| 2213 for(k=j; k<nCol; k++) aCol[k] = aCol[k+1]; |
| 2214 nCol--; |
| 2215 break; |
| 2216 } |
| 2217 } |
| 2218 } |
| 2219 } |
| 2220 } |
| 2221 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2222 |
| 2223 if( nCol==0 ){ |
| 2224 assert( nString==0 ); |
| 2225 aCol[0] = "content"; |
| 2226 nString = 8; |
| 2227 nCol = 1; |
| 2228 } |
| 2229 |
| 2230 if( pTokenizer==0 ){ |
| 2231 rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr); |
| 2232 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2233 } |
| 2234 assert( pTokenizer ); |
| 2235 |
| 2236 rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); |
| 2237 if( rc==SQLITE_ERROR ){ |
| 2238 assert( zPrefix ); |
| 2239 sqlite3Fts3ErrMsg(pzErr, "error parsing prefix parameter: %s", zPrefix); |
| 2240 } |
| 2241 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2242 |
| 2243 /* Allocate and populate the Fts3Table structure. */ |
| 2244 nByte = sizeof(Fts3Table) + /* Fts3Table */ |
| 2245 nCol * sizeof(char *) + /* azColumn */ |
| 2246 nIndex * sizeof(struct Fts3Index) + /* aIndex */ |
| 2247 nCol * sizeof(u8) + /* abNotindexed */ |
| 2248 nName + /* zName */ |
| 2249 nDb + /* zDb */ |
| 2250 nString; /* Space for azColumn strings */ |
| 2251 p = (Fts3Table*)sqlite3_malloc(nByte); |
| 2252 if( p==0 ){ |
| 2253 rc = SQLITE_NOMEM; |
| 2254 goto fts3_init_out; |
| 2255 } |
| 2256 memset(p, 0, nByte); |
| 2257 p->db = db; |
| 2258 p->nColumn = nCol; |
| 2259 p->nPendingData = 0; |
| 2260 p->azColumn = (char **)&p[1]; |
| 2261 p->pTokenizer = pTokenizer; |
| 2262 p->nMaxPendingData = FTS3_MAX_PENDING_DATA; |
| 2263 p->bHasDocsize = (isFts4 && bNoDocsize==0); |
| 2264 p->bHasStat = isFts4; |
| 2265 p->bFts4 = isFts4; |
| 2266 p->bDescIdx = bDescIdx; |
| 2267 p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ |
| 2268 p->zContentTbl = zContent; |
| 2269 p->zLanguageid = zLanguageid; |
| 2270 zContent = 0; |
| 2271 zLanguageid = 0; |
| 2272 TESTONLY( p->inTransaction = -1 ); |
| 2273 TESTONLY( p->mxSavepoint = -1 ); |
| 2274 |
| 2275 p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; |
| 2276 memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); |
| 2277 p->nIndex = nIndex; |
| 2278 for(i=0; i<nIndex; i++){ |
| 2279 fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1); |
| 2280 } |
| 2281 p->abNotindexed = (u8 *)&p->aIndex[nIndex]; |
| 2282 |
| 2283 /* Fill in the zName and zDb fields of the vtab structure. */ |
| 2284 zCsr = (char *)&p->abNotindexed[nCol]; |
| 2285 p->zName = zCsr; |
| 2286 memcpy(zCsr, argv[2], nName); |
| 2287 zCsr += nName; |
| 2288 p->zDb = zCsr; |
| 2289 memcpy(zCsr, argv[1], nDb); |
| 2290 zCsr += nDb; |
| 2291 |
| 2292 /* Fill in the azColumn array */ |
| 2293 for(iCol=0; iCol<nCol; iCol++){ |
| 2294 char *z; |
| 2295 int n = 0; |
| 2296 z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n); |
| 2297 memcpy(zCsr, z, n); |
| 2298 zCsr[n] = '\0'; |
| 2299 sqlite3Fts3Dequote(zCsr); |
| 2300 p->azColumn[iCol] = zCsr; |
| 2301 zCsr += n+1; |
| 2302 assert( zCsr <= &((char *)p)[nByte] ); |
| 2303 } |
| 2304 |
| 2305 /* Fill in the abNotindexed array */ |
| 2306 for(iCol=0; iCol<nCol; iCol++){ |
| 2307 int n = (int)strlen(p->azColumn[iCol]); |
| 2308 for(i=0; i<nNotindexed; i++){ |
| 2309 char *zNot = azNotindexed[i]; |
| 2310 if( zNot && n==(int)strlen(zNot) |
| 2311 && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n) |
| 2312 ){ |
| 2313 p->abNotindexed[iCol] = 1; |
| 2314 sqlite3_free(zNot); |
| 2315 azNotindexed[i] = 0; |
| 2316 } |
| 2317 } |
| 2318 } |
| 2319 for(i=0; i<nNotindexed; i++){ |
| 2320 if( azNotindexed[i] ){ |
| 2321 sqlite3Fts3ErrMsg(pzErr, "no such column: %s", azNotindexed[i]); |
| 2322 rc = SQLITE_ERROR; |
| 2323 } |
| 2324 } |
| 2325 |
| 2326 if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){ |
| 2327 char const *zMiss = (zCompress==0 ? "compress" : "uncompress"); |
| 2328 rc = SQLITE_ERROR; |
| 2329 sqlite3Fts3ErrMsg(pzErr, "missing %s parameter in fts4 constructor", zMiss); |
| 2330 } |
| 2331 p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc); |
| 2332 p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); |
| 2333 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2334 |
| 2335 /* If this is an xCreate call, create the underlying tables in the |
| 2336 ** database. TODO: For xConnect(), it could verify that said tables exist. |
| 2337 */ |
| 2338 if( isCreate ){ |
| 2339 rc = fts3CreateTables(p); |
| 2340 } |
| 2341 |
| 2342 /* Check to see if a legacy fts3 table has been "upgraded" by the |
| 2343 ** addition of a %_stat table so that it can use incremental merge. |
| 2344 */ |
| 2345 if( !isFts4 && !isCreate ){ |
| 2346 p->bHasStat = 2; |
| 2347 } |
| 2348 |
| 2349 /* Figure out the page-size for the database. This is required in order to |
| 2350 ** estimate the cost of loading large doclists from the database. */ |
| 2351 fts3DatabasePageSize(&rc, p); |
| 2352 p->nNodeSize = p->nPgsz-35; |
| 2353 |
| 2354 /* Declare the table schema to SQLite. */ |
| 2355 fts3DeclareVtab(&rc, p); |
| 2356 |
| 2357 fts3_init_out: |
| 2358 sqlite3_free(zPrefix); |
| 2359 sqlite3_free(aIndex); |
| 2360 sqlite3_free(zCompress); |
| 2361 sqlite3_free(zUncompress); |
| 2362 sqlite3_free(zContent); |
| 2363 sqlite3_free(zLanguageid); |
| 2364 for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]); |
| 2365 sqlite3_free((void *)aCol); |
| 2366 sqlite3_free((void *)azNotindexed); |
| 2367 if( rc!=SQLITE_OK ){ |
| 2368 if( p ){ |
| 2369 fts3DisconnectMethod((sqlite3_vtab *)p); |
| 2370 }else if( pTokenizer ){ |
| 2371 pTokenizer->pModule->xDestroy(pTokenizer); |
| 2372 } |
| 2373 }else{ |
| 2374 assert( p->pSegments==0 ); |
| 2375 *ppVTab = &p->base; |
| 2376 } |
| 2377 return rc; |
| 2378 } |
| 2379 |
| 2380 /* |
| 2381 ** The xConnect() and xCreate() methods for the virtual table. All the |
| 2382 ** work is done in function fts3InitVtab(). |
| 2383 */ |
| 2384 static int fts3ConnectMethod( |
| 2385 sqlite3 *db, /* Database connection */ |
| 2386 void *pAux, /* Pointer to tokenizer hash table */ |
| 2387 int argc, /* Number of elements in argv array */ |
| 2388 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2389 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 2390 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 2391 ){ |
| 2392 return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); |
| 2393 } |
| 2394 static int fts3CreateMethod( |
| 2395 sqlite3 *db, /* Database connection */ |
| 2396 void *pAux, /* Pointer to tokenizer hash table */ |
| 2397 int argc, /* Number of elements in argv array */ |
| 2398 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2399 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 2400 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 2401 ){ |
| 2402 return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); |
| 2403 } |
| 2404 |
| 2405 /* |
| 2406 ** Set the pIdxInfo->estimatedRows variable to nRow. Unless this |
| 2407 ** extension is currently being used by a version of SQLite too old to |
| 2408 ** support estimatedRows. In that case this function is a no-op. |
| 2409 */ |
| 2410 static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ |
| 2411 #if SQLITE_VERSION_NUMBER>=3008002 |
| 2412 if( sqlite3_libversion_number()>=3008002 ){ |
| 2413 pIdxInfo->estimatedRows = nRow; |
| 2414 } |
| 2415 #endif |
| 2416 } |
| 2417 |
| 2418 /* |
| 2419 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this |
| 2420 ** extension is currently being used by a version of SQLite too old to |
| 2421 ** support index-info flags. In that case this function is a no-op. |
| 2422 */ |
| 2423 static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){ |
| 2424 #if SQLITE_VERSION_NUMBER>=3008012 |
| 2425 if( sqlite3_libversion_number()>=3008012 ){ |
| 2426 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; |
| 2427 } |
| 2428 #endif |
| 2429 } |
| 2430 |
| 2431 /* |
| 2432 ** Implementation of the xBestIndex method for FTS3 tables. There |
| 2433 ** are three possible strategies, in order of preference: |
| 2434 ** |
| 2435 ** 1. Direct lookup by rowid or docid. |
| 2436 ** 2. Full-text search using a MATCH operator on a non-docid column. |
| 2437 ** 3. Linear scan of %_content table. |
| 2438 */ |
| 2439 static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |
| 2440 Fts3Table *p = (Fts3Table *)pVTab; |
| 2441 int i; /* Iterator variable */ |
| 2442 int iCons = -1; /* Index of constraint to use */ |
| 2443 |
| 2444 int iLangidCons = -1; /* Index of langid=x constraint, if present */ |
| 2445 int iDocidGe = -1; /* Index of docid>=x constraint, if present */ |
| 2446 int iDocidLe = -1; /* Index of docid<=x constraint, if present */ |
| 2447 int iIdx; |
| 2448 |
| 2449 /* By default use a full table scan. This is an expensive option, |
| 2450 ** so search through the constraints to see if a more efficient |
| 2451 ** strategy is possible. |
| 2452 */ |
| 2453 pInfo->idxNum = FTS3_FULLSCAN_SEARCH; |
| 2454 pInfo->estimatedCost = 5000000; |
| 2455 for(i=0; i<pInfo->nConstraint; i++){ |
| 2456 int bDocid; /* True if this constraint is on docid */ |
| 2457 struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; |
| 2458 if( pCons->usable==0 ){ |
| 2459 if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |
| 2460 /* There exists an unusable MATCH constraint. This means that if |
| 2461 ** the planner does elect to use the results of this call as part |
| 2462 ** of the overall query plan the user will see an "unable to use |
| 2463 ** function MATCH in the requested context" error. To discourage |
| 2464 ** this, return a very high cost here. */ |
| 2465 pInfo->idxNum = FTS3_FULLSCAN_SEARCH; |
| 2466 pInfo->estimatedCost = 1e50; |
| 2467 fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); |
| 2468 return SQLITE_OK; |
| 2469 } |
| 2470 continue; |
| 2471 } |
| 2472 |
| 2473 bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); |
| 2474 |
| 2475 /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ |
| 2476 if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ |
| 2477 pInfo->idxNum = FTS3_DOCID_SEARCH; |
| 2478 pInfo->estimatedCost = 1.0; |
| 2479 iCons = i; |
| 2480 } |
| 2481 |
| 2482 /* A MATCH constraint. Use a full-text search. |
| 2483 ** |
| 2484 ** If there is more than one MATCH constraint available, use the first |
| 2485 ** one encountered. If there is both a MATCH constraint and a direct |
| 2486 ** rowid/docid lookup, prefer the MATCH strategy. This is done even |
| 2487 ** though the rowid/docid lookup is faster than a MATCH query, selecting |
| 2488 ** it would lead to an "unable to use function MATCH in the requested |
| 2489 ** context" error. |
| 2490 */ |
| 2491 if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH |
| 2492 && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn |
| 2493 ){ |
| 2494 pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; |
| 2495 pInfo->estimatedCost = 2.0; |
| 2496 iCons = i; |
| 2497 } |
| 2498 |
| 2499 /* Equality constraint on the langid column */ |
| 2500 if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ |
| 2501 && pCons->iColumn==p->nColumn + 2 |
| 2502 ){ |
| 2503 iLangidCons = i; |
| 2504 } |
| 2505 |
| 2506 if( bDocid ){ |
| 2507 switch( pCons->op ){ |
| 2508 case SQLITE_INDEX_CONSTRAINT_GE: |
| 2509 case SQLITE_INDEX_CONSTRAINT_GT: |
| 2510 iDocidGe = i; |
| 2511 break; |
| 2512 |
| 2513 case SQLITE_INDEX_CONSTRAINT_LE: |
| 2514 case SQLITE_INDEX_CONSTRAINT_LT: |
| 2515 iDocidLe = i; |
| 2516 break; |
| 2517 } |
| 2518 } |
| 2519 } |
| 2520 |
| 2521 /* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */ |
| 2522 if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo); |
| 2523 |
| 2524 iIdx = 1; |
| 2525 if( iCons>=0 ){ |
| 2526 pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; |
| 2527 pInfo->aConstraintUsage[iCons].omit = 1; |
| 2528 } |
| 2529 if( iLangidCons>=0 ){ |
| 2530 pInfo->idxNum |= FTS3_HAVE_LANGID; |
| 2531 pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; |
| 2532 } |
| 2533 if( iDocidGe>=0 ){ |
| 2534 pInfo->idxNum |= FTS3_HAVE_DOCID_GE; |
| 2535 pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; |
| 2536 } |
| 2537 if( iDocidLe>=0 ){ |
| 2538 pInfo->idxNum |= FTS3_HAVE_DOCID_LE; |
| 2539 pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; |
| 2540 } |
| 2541 |
| 2542 /* Regardless of the strategy selected, FTS can deliver rows in rowid (or |
| 2543 ** docid) order. Both ascending and descending are possible. |
| 2544 */ |
| 2545 if( pInfo->nOrderBy==1 ){ |
| 2546 struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; |
| 2547 if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ |
| 2548 if( pOrder->desc ){ |
| 2549 pInfo->idxStr = "DESC"; |
| 2550 }else{ |
| 2551 pInfo->idxStr = "ASC"; |
| 2552 } |
| 2553 pInfo->orderByConsumed = 1; |
| 2554 } |
| 2555 } |
| 2556 |
| 2557 assert( p->pSegments==0 ); |
| 2558 return SQLITE_OK; |
| 2559 } |
| 2560 |
| 2561 /* |
| 2562 ** Implementation of xOpen method. |
| 2563 */ |
| 2564 static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 2565 sqlite3_vtab_cursor *pCsr; /* Allocated cursor */ |
| 2566 |
| 2567 UNUSED_PARAMETER(pVTab); |
| 2568 |
| 2569 /* Allocate a buffer large enough for an Fts3Cursor structure. If the |
| 2570 ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, |
| 2571 ** if the allocation fails, return SQLITE_NOMEM. |
| 2572 */ |
| 2573 *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); |
| 2574 if( !pCsr ){ |
| 2575 return SQLITE_NOMEM; |
| 2576 } |
| 2577 memset(pCsr, 0, sizeof(Fts3Cursor)); |
| 2578 return SQLITE_OK; |
| 2579 } |
| 2580 |
| 2581 /* |
| 2582 ** Close the cursor. For additional information see the documentation |
| 2583 ** on the xClose method of the virtual table interface. |
| 2584 */ |
| 2585 static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 2586 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 2587 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 2588 sqlite3_finalize(pCsr->pStmt); |
| 2589 sqlite3Fts3ExprFree(pCsr->pExpr); |
| 2590 sqlite3Fts3FreeDeferredTokens(pCsr); |
| 2591 sqlite3_free(pCsr->aDoclist); |
| 2592 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 2593 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 2594 sqlite3_free(pCsr); |
| 2595 return SQLITE_OK; |
| 2596 } |
| 2597 |
| 2598 /* |
| 2599 ** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then |
| 2600 ** compose and prepare an SQL statement of the form: |
| 2601 ** |
| 2602 ** "SELECT <columns> FROM %_content WHERE rowid = ?" |
| 2603 ** |
| 2604 ** (or the equivalent for a content=xxx table) and set pCsr->pStmt to |
| 2605 ** it. If an error occurs, return an SQLite error code. |
| 2606 ** |
| 2607 ** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK. |
| 2608 */ |
| 2609 static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){ |
| 2610 int rc = SQLITE_OK; |
| 2611 if( pCsr->pStmt==0 ){ |
| 2612 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 2613 char *zSql; |
| 2614 zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); |
| 2615 if( !zSql ) return SQLITE_NOMEM; |
| 2616 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); |
| 2617 sqlite3_free(zSql); |
| 2618 } |
| 2619 *ppStmt = pCsr->pStmt; |
| 2620 return rc; |
| 2621 } |
| 2622 |
| 2623 /* |
| 2624 ** Position the pCsr->pStmt statement so that it is on the row |
| 2625 ** of the %_content table that contains the last match. Return |
| 2626 ** SQLITE_OK on success. |
| 2627 */ |
| 2628 static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ |
| 2629 int rc = SQLITE_OK; |
| 2630 if( pCsr->isRequireSeek ){ |
| 2631 sqlite3_stmt *pStmt = 0; |
| 2632 |
| 2633 rc = fts3CursorSeekStmt(pCsr, &pStmt); |
| 2634 if( rc==SQLITE_OK ){ |
| 2635 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); |
| 2636 pCsr->isRequireSeek = 0; |
| 2637 if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ |
| 2638 return SQLITE_OK; |
| 2639 }else{ |
| 2640 rc = sqlite3_reset(pCsr->pStmt); |
| 2641 if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ |
| 2642 /* If no row was found and no error has occurred, then the %_content |
| 2643 ** table is missing a row that is present in the full-text index. |
| 2644 ** The data structures are corrupt. */ |
| 2645 rc = FTS_CORRUPT_VTAB; |
| 2646 pCsr->isEof = 1; |
| 2647 } |
| 2648 } |
| 2649 } |
| 2650 } |
| 2651 |
| 2652 if( rc!=SQLITE_OK && pContext ){ |
| 2653 sqlite3_result_error_code(pContext, rc); |
| 2654 } |
| 2655 return rc; |
| 2656 } |
| 2657 |
| 2658 /* |
| 2659 ** This function is used to process a single interior node when searching |
| 2660 ** a b-tree for a term or term prefix. The node data is passed to this |
| 2661 ** function via the zNode/nNode parameters. The term to search for is |
| 2662 ** passed in zTerm/nTerm. |
| 2663 ** |
| 2664 ** If piFirst is not NULL, then this function sets *piFirst to the blockid |
| 2665 ** of the child node that heads the sub-tree that may contain the term. |
| 2666 ** |
| 2667 ** If piLast is not NULL, then *piLast is set to the right-most child node |
| 2668 ** that heads a sub-tree that may contain a term for which zTerm/nTerm is |
| 2669 ** a prefix. |
| 2670 ** |
| 2671 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. |
| 2672 */ |
| 2673 static int fts3ScanInteriorNode( |
| 2674 const char *zTerm, /* Term to select leaves for */ |
| 2675 int nTerm, /* Size of term zTerm in bytes */ |
| 2676 const char *zNode, /* Buffer containing segment interior node */ |
| 2677 int nNode, /* Size of buffer at zNode */ |
| 2678 sqlite3_int64 *piFirst, /* OUT: Selected child node */ |
| 2679 sqlite3_int64 *piLast /* OUT: Selected child node */ |
| 2680 ){ |
| 2681 int rc = SQLITE_OK; /* Return code */ |
| 2682 const char *zCsr = zNode; /* Cursor to iterate through node */ |
| 2683 const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ |
| 2684 char *zBuffer = 0; /* Buffer to load terms into */ |
| 2685 int nAlloc = 0; /* Size of allocated buffer */ |
| 2686 int isFirstTerm = 1; /* True when processing first term on page */ |
| 2687 sqlite3_int64 iChild; /* Block id of child node to descend to */ |
| 2688 |
| 2689 /* Skip over the 'height' varint that occurs at the start of every |
| 2690 ** interior node. Then load the blockid of the left-child of the b-tree |
| 2691 ** node into variable iChild. |
| 2692 ** |
| 2693 ** Even if the data structure on disk is corrupted, this (reading two |
| 2694 ** varints from the buffer) does not risk an overread. If zNode is a |
| 2695 ** root node, then the buffer comes from a SELECT statement. SQLite does |
| 2696 ** not make this guarantee explicitly, but in practice there are always |
| 2697 ** either more than 20 bytes of allocated space following the nNode bytes of |
| 2698 ** contents, or two zero bytes. Or, if the node is read from the %_segments |
| 2699 ** table, then there are always 20 bytes of zeroed padding following the |
| 2700 ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). |
| 2701 */ |
| 2702 zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); |
| 2703 zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); |
| 2704 if( zCsr>zEnd ){ |
| 2705 return FTS_CORRUPT_VTAB; |
| 2706 } |
| 2707 |
| 2708 while( zCsr<zEnd && (piFirst || piLast) ){ |
| 2709 int cmp; /* memcmp() result */ |
| 2710 int nSuffix; /* Size of term suffix */ |
| 2711 int nPrefix = 0; /* Size of term prefix */ |
| 2712 int nBuffer; /* Total term size */ |
| 2713 |
| 2714 /* Load the next term on the node into zBuffer. Use realloc() to expand |
| 2715 ** the size of zBuffer if required. */ |
| 2716 if( !isFirstTerm ){ |
| 2717 zCsr += fts3GetVarint32(zCsr, &nPrefix); |
| 2718 } |
| 2719 isFirstTerm = 0; |
| 2720 zCsr += fts3GetVarint32(zCsr, &nSuffix); |
| 2721 |
| 2722 /* NOTE(shess): Previous code checked for negative nPrefix and |
| 2723 ** nSuffix and suffix overrunning zEnd. Additionally corrupt if |
| 2724 ** the prefix is longer than the previous term, or if the suffix |
| 2725 ** causes overflow. |
| 2726 */ |
| 2727 if( nPrefix<0 || nSuffix<0 /* || nPrefix>nBuffer */ |
| 2728 || &zCsr[nSuffix]<zCsr || &zCsr[nSuffix]>zEnd ){ |
| 2729 rc = SQLITE_CORRUPT; |
| 2730 goto finish_scan; |
| 2731 } |
| 2732 if( nPrefix+nSuffix>nAlloc ){ |
| 2733 char *zNew; |
| 2734 nAlloc = (nPrefix+nSuffix) * 2; |
| 2735 zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); |
| 2736 if( !zNew ){ |
| 2737 rc = SQLITE_NOMEM; |
| 2738 goto finish_scan; |
| 2739 } |
| 2740 zBuffer = zNew; |
| 2741 } |
| 2742 assert( zBuffer ); |
| 2743 memcpy(&zBuffer[nPrefix], zCsr, nSuffix); |
| 2744 nBuffer = nPrefix + nSuffix; |
| 2745 zCsr += nSuffix; |
| 2746 |
| 2747 /* Compare the term we are searching for with the term just loaded from |
| 2748 ** the interior node. If the specified term is greater than or equal |
| 2749 ** to the term from the interior node, then all terms on the sub-tree |
| 2750 ** headed by node iChild are smaller than zTerm. No need to search |
| 2751 ** iChild. |
| 2752 ** |
| 2753 ** If the interior node term is larger than the specified term, then |
| 2754 ** the tree headed by iChild may contain the specified term. |
| 2755 */ |
| 2756 cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); |
| 2757 if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ |
| 2758 *piFirst = iChild; |
| 2759 piFirst = 0; |
| 2760 } |
| 2761 |
| 2762 if( piLast && cmp<0 ){ |
| 2763 *piLast = iChild; |
| 2764 piLast = 0; |
| 2765 } |
| 2766 |
| 2767 iChild++; |
| 2768 }; |
| 2769 |
| 2770 if( piFirst ) *piFirst = iChild; |
| 2771 if( piLast ) *piLast = iChild; |
| 2772 |
| 2773 finish_scan: |
| 2774 sqlite3_free(zBuffer); |
| 2775 return rc; |
| 2776 } |
| 2777 |
| 2778 |
| 2779 /* |
| 2780 ** The buffer pointed to by argument zNode (size nNode bytes) contains an |
| 2781 ** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) |
| 2782 ** contains a term. This function searches the sub-tree headed by the zNode |
| 2783 ** node for the range of leaf nodes that may contain the specified term |
| 2784 ** or terms for which the specified term is a prefix. |
| 2785 ** |
| 2786 ** If piLeaf is not NULL, then *piLeaf is set to the blockid of the |
| 2787 ** left-most leaf node in the tree that may contain the specified term. |
| 2788 ** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the |
| 2789 ** right-most leaf node that may contain a term for which the specified |
| 2790 ** term is a prefix. |
| 2791 ** |
| 2792 ** It is possible that the range of returned leaf nodes does not contain |
| 2793 ** the specified term or any terms for which it is a prefix. However, if the |
| 2794 ** segment does contain any such terms, they are stored within the identified |
| 2795 ** range. Because this function only inspects interior segment nodes (and |
| 2796 ** never loads leaf nodes into memory), it is not possible to be sure. |
| 2797 ** |
| 2798 ** If an error occurs, an error code other than SQLITE_OK is returned. |
| 2799 */ |
| 2800 static int fts3SelectLeaf( |
| 2801 Fts3Table *p, /* Virtual table handle */ |
| 2802 const char *zTerm, /* Term to select leaves for */ |
| 2803 int nTerm, /* Size of term zTerm in bytes */ |
| 2804 const char *zNode, /* Buffer containing segment interior node */ |
| 2805 int nNode, /* Size of buffer at zNode */ |
| 2806 sqlite3_int64 *piLeaf, /* Selected leaf node */ |
| 2807 sqlite3_int64 *piLeaf2 /* Selected leaf node */ |
| 2808 ){ |
| 2809 int rc = SQLITE_OK; /* Return code */ |
| 2810 int iHeight; /* Height of this node in tree */ |
| 2811 |
| 2812 assert( piLeaf || piLeaf2 ); |
| 2813 |
| 2814 fts3GetVarint32(zNode, &iHeight); |
| 2815 rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); |
| 2816 assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); |
| 2817 |
| 2818 if( rc==SQLITE_OK && iHeight>1 ){ |
| 2819 char *zBlob = 0; /* Blob read from %_segments table */ |
| 2820 int nBlob = 0; /* Size of zBlob in bytes */ |
| 2821 |
| 2822 if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ |
| 2823 rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); |
| 2824 if( rc==SQLITE_OK ){ |
| 2825 rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); |
| 2826 } |
| 2827 sqlite3_free(zBlob); |
| 2828 piLeaf = 0; |
| 2829 zBlob = 0; |
| 2830 } |
| 2831 |
| 2832 if( rc==SQLITE_OK ){ |
| 2833 rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); |
| 2834 } |
| 2835 if( rc==SQLITE_OK ){ |
| 2836 rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); |
| 2837 } |
| 2838 sqlite3_free(zBlob); |
| 2839 } |
| 2840 |
| 2841 return rc; |
| 2842 } |
| 2843 |
| 2844 /* |
| 2845 ** This function is used to create delta-encoded serialized lists of FTS3 |
| 2846 ** varints. Each call to this function appends a single varint to a list. |
| 2847 */ |
| 2848 static void fts3PutDeltaVarint( |
| 2849 char **pp, /* IN/OUT: Output pointer */ |
| 2850 sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ |
| 2851 sqlite3_int64 iVal /* Write this value to the list */ |
| 2852 ){ |
| 2853 assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); |
| 2854 *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); |
| 2855 *piPrev = iVal; |
| 2856 } |
| 2857 |
| 2858 /* |
| 2859 ** When this function is called, *ppPoslist is assumed to point to the |
| 2860 ** start of a position-list. After it returns, *ppPoslist points to the |
| 2861 ** first byte after the position-list. |
| 2862 ** |
| 2863 ** A position list is list of positions (delta encoded) and columns for |
| 2864 ** a single document record of a doclist. So, in other words, this |
| 2865 ** routine advances *ppPoslist so that it points to the next docid in |
| 2866 ** the doclist, or to the first byte past the end of the doclist. |
| 2867 ** |
| 2868 ** If pp is not NULL, then the contents of the position list are copied |
| 2869 ** to *pp. *pp is set to point to the first byte past the last byte copied |
| 2870 ** before this function returns. |
| 2871 */ |
| 2872 static void fts3PoslistCopy(char **pp, char **ppPoslist){ |
| 2873 char *pEnd = *ppPoslist; |
| 2874 char c = 0; |
| 2875 |
| 2876 /* The end of a position list is marked by a zero encoded as an FTS3 |
| 2877 ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by |
| 2878 ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail |
| 2879 ** of some other, multi-byte, value. |
| 2880 ** |
| 2881 ** The following while-loop moves pEnd to point to the first byte that is not |
| 2882 ** immediately preceded by a byte with the 0x80 bit set. Then increments |
| 2883 ** pEnd once more so that it points to the byte immediately following the |
| 2884 ** last byte in the position-list. |
| 2885 */ |
| 2886 while( *pEnd | c ){ |
| 2887 c = *pEnd++ & 0x80; |
| 2888 testcase( c!=0 && (*pEnd)==0 ); |
| 2889 } |
| 2890 pEnd++; /* Advance past the POS_END terminator byte */ |
| 2891 |
| 2892 if( pp ){ |
| 2893 int n = (int)(pEnd - *ppPoslist); |
| 2894 char *p = *pp; |
| 2895 memcpy(p, *ppPoslist, n); |
| 2896 p += n; |
| 2897 *pp = p; |
| 2898 } |
| 2899 *ppPoslist = pEnd; |
| 2900 } |
| 2901 |
| 2902 /* |
| 2903 ** When this function is called, *ppPoslist is assumed to point to the |
| 2904 ** start of a column-list. After it returns, *ppPoslist points to the |
| 2905 ** to the terminator (POS_COLUMN or POS_END) byte of the column-list. |
| 2906 ** |
| 2907 ** A column-list is list of delta-encoded positions for a single column |
| 2908 ** within a single document within a doclist. |
| 2909 ** |
| 2910 ** The column-list is terminated either by a POS_COLUMN varint (1) or |
| 2911 ** a POS_END varint (0). This routine leaves *ppPoslist pointing to |
| 2912 ** the POS_COLUMN or POS_END that terminates the column-list. |
| 2913 ** |
| 2914 ** If pp is not NULL, then the contents of the column-list are copied |
| 2915 ** to *pp. *pp is set to point to the first byte past the last byte copied |
| 2916 ** before this function returns. The POS_COLUMN or POS_END terminator |
| 2917 ** is not copied into *pp. |
| 2918 */ |
| 2919 static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ |
| 2920 char *pEnd = *ppPoslist; |
| 2921 char c = 0; |
| 2922 |
| 2923 /* A column-list is terminated by either a 0x01 or 0x00 byte that is |
| 2924 ** not part of a multi-byte varint. |
| 2925 */ |
| 2926 while( 0xFE & (*pEnd | c) ){ |
| 2927 c = *pEnd++ & 0x80; |
| 2928 testcase( c!=0 && ((*pEnd)&0xfe)==0 ); |
| 2929 } |
| 2930 if( pp ){ |
| 2931 int n = (int)(pEnd - *ppPoslist); |
| 2932 char *p = *pp; |
| 2933 memcpy(p, *ppPoslist, n); |
| 2934 p += n; |
| 2935 *pp = p; |
| 2936 } |
| 2937 *ppPoslist = pEnd; |
| 2938 } |
| 2939 |
| 2940 /* |
| 2941 ** Value used to signify the end of an position-list. This is safe because |
| 2942 ** it is not possible to have a document with 2^31 terms. |
| 2943 */ |
| 2944 #define POSITION_LIST_END 0x7fffffff |
| 2945 |
| 2946 /* |
| 2947 ** This function is used to help parse position-lists. When this function is |
| 2948 ** called, *pp may point to the start of the next varint in the position-list |
| 2949 ** being parsed, or it may point to 1 byte past the end of the position-list |
| 2950 ** (in which case **pp will be a terminator bytes POS_END (0) or |
| 2951 ** (1)). |
| 2952 ** |
| 2953 ** If *pp points past the end of the current position-list, set *pi to |
| 2954 ** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, |
| 2955 ** increment the current value of *pi by the value read, and set *pp to |
| 2956 ** point to the next value before returning. |
| 2957 ** |
| 2958 ** Before calling this routine *pi must be initialized to the value of |
| 2959 ** the previous position, or zero if we are reading the first position |
| 2960 ** in the position-list. Because positions are delta-encoded, the value |
| 2961 ** of the previous position is needed in order to compute the value of |
| 2962 ** the next position. |
| 2963 */ |
| 2964 static void fts3ReadNextPos( |
| 2965 char **pp, /* IN/OUT: Pointer into position-list buffer */ |
| 2966 sqlite3_int64 *pi /* IN/OUT: Value read from position-list */ |
| 2967 ){ |
| 2968 if( (**pp)&0xFE ){ |
| 2969 fts3GetDeltaVarint(pp, pi); |
| 2970 *pi -= 2; |
| 2971 }else{ |
| 2972 *pi = POSITION_LIST_END; |
| 2973 } |
| 2974 } |
| 2975 |
| 2976 /* |
| 2977 ** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by |
| 2978 ** the value of iCol encoded as a varint to *pp. This will start a new |
| 2979 ** column list. |
| 2980 ** |
| 2981 ** Set *pp to point to the byte just after the last byte written before |
| 2982 ** returning (do not modify it if iCol==0). Return the total number of bytes |
| 2983 ** written (0 if iCol==0). |
| 2984 */ |
| 2985 static int fts3PutColNumber(char **pp, int iCol){ |
| 2986 int n = 0; /* Number of bytes written */ |
| 2987 if( iCol ){ |
| 2988 char *p = *pp; /* Output pointer */ |
| 2989 n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); |
| 2990 *p = 0x01; |
| 2991 *pp = &p[n]; |
| 2992 } |
| 2993 return n; |
| 2994 } |
| 2995 |
| 2996 /* |
| 2997 ** Compute the union of two position lists. The output written |
| 2998 ** into *pp contains all positions of both *pp1 and *pp2 in sorted |
| 2999 ** order and with any duplicates removed. All pointers are |
| 3000 ** updated appropriately. The caller is responsible for insuring |
| 3001 ** that there is enough space in *pp to hold the complete output. |
| 3002 */ |
| 3003 static void fts3PoslistMerge( |
| 3004 char **pp, /* Output buffer */ |
| 3005 char **pp1, /* Left input list */ |
| 3006 char **pp2 /* Right input list */ |
| 3007 ){ |
| 3008 char *p = *pp; |
| 3009 char *p1 = *pp1; |
| 3010 char *p2 = *pp2; |
| 3011 |
| 3012 while( *p1 || *p2 ){ |
| 3013 int iCol1; /* The current column index in pp1 */ |
| 3014 int iCol2; /* The current column index in pp2 */ |
| 3015 |
| 3016 if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); |
| 3017 else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; |
| 3018 else iCol1 = 0; |
| 3019 |
| 3020 if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); |
| 3021 else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; |
| 3022 else iCol2 = 0; |
| 3023 |
| 3024 if( iCol1==iCol2 ){ |
| 3025 sqlite3_int64 i1 = 0; /* Last position from pp1 */ |
| 3026 sqlite3_int64 i2 = 0; /* Last position from pp2 */ |
| 3027 sqlite3_int64 iPrev = 0; |
| 3028 int n = fts3PutColNumber(&p, iCol1); |
| 3029 p1 += n; |
| 3030 p2 += n; |
| 3031 |
| 3032 /* At this point, both p1 and p2 point to the start of column-lists |
| 3033 ** for the same column (the column with index iCol1 and iCol2). |
| 3034 ** A column-list is a list of non-negative delta-encoded varints, each |
| 3035 ** incremented by 2 before being stored. Each list is terminated by a |
| 3036 ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists |
| 3037 ** and writes the results to buffer p. p is left pointing to the byte |
| 3038 ** after the list written. No terminator (POS_END or POS_COLUMN) is |
| 3039 ** written to the output. |
| 3040 */ |
| 3041 fts3GetDeltaVarint(&p1, &i1); |
| 3042 fts3GetDeltaVarint(&p2, &i2); |
| 3043 do { |
| 3044 fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2); |
| 3045 iPrev -= 2; |
| 3046 if( i1==i2 ){ |
| 3047 fts3ReadNextPos(&p1, &i1); |
| 3048 fts3ReadNextPos(&p2, &i2); |
| 3049 }else if( i1<i2 ){ |
| 3050 fts3ReadNextPos(&p1, &i1); |
| 3051 }else{ |
| 3052 fts3ReadNextPos(&p2, &i2); |
| 3053 } |
| 3054 }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END ); |
| 3055 }else if( iCol1<iCol2 ){ |
| 3056 p1 += fts3PutColNumber(&p, iCol1); |
| 3057 fts3ColumnlistCopy(&p, &p1); |
| 3058 }else{ |
| 3059 p2 += fts3PutColNumber(&p, iCol2); |
| 3060 fts3ColumnlistCopy(&p, &p2); |
| 3061 } |
| 3062 } |
| 3063 |
| 3064 *p++ = POS_END; |
| 3065 *pp = p; |
| 3066 *pp1 = p1 + 1; |
| 3067 *pp2 = p2 + 1; |
| 3068 } |
| 3069 |
| 3070 /* |
| 3071 ** This function is used to merge two position lists into one. When it is |
| 3072 ** called, *pp1 and *pp2 must both point to position lists. A position-list is |
| 3073 ** the part of a doclist that follows each document id. For example, if a row |
| 3074 ** contains: |
| 3075 ** |
| 3076 ** 'a b c'|'x y z'|'a b b a' |
| 3077 ** |
| 3078 ** Then the position list for this row for token 'b' would consist of: |
| 3079 ** |
| 3080 ** 0x02 0x01 0x02 0x03 0x03 0x00 |
| 3081 ** |
| 3082 ** When this function returns, both *pp1 and *pp2 are left pointing to the |
| 3083 ** byte following the 0x00 terminator of their respective position lists. |
| 3084 ** |
| 3085 ** If isSaveLeft is 0, an entry is added to the output position list for |
| 3086 ** each position in *pp2 for which there exists one or more positions in |
| 3087 ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. |
| 3088 ** when the *pp1 token appears before the *pp2 token, but not more than nToken |
| 3089 ** slots before it. |
| 3090 ** |
| 3091 ** e.g. nToken==1 searches for adjacent positions. |
| 3092 */ |
| 3093 static int fts3PoslistPhraseMerge( |
| 3094 char **pp, /* IN/OUT: Preallocated output buffer */ |
| 3095 int nToken, /* Maximum difference in token positions */ |
| 3096 int isSaveLeft, /* Save the left position */ |
| 3097 int isExact, /* If *pp1 is exactly nTokens before *pp2 */ |
| 3098 char **pp1, /* IN/OUT: Left input list */ |
| 3099 char **pp2 /* IN/OUT: Right input list */ |
| 3100 ){ |
| 3101 char *p = *pp; |
| 3102 char *p1 = *pp1; |
| 3103 char *p2 = *pp2; |
| 3104 int iCol1 = 0; |
| 3105 int iCol2 = 0; |
| 3106 |
| 3107 /* Never set both isSaveLeft and isExact for the same invocation. */ |
| 3108 assert( isSaveLeft==0 || isExact==0 ); |
| 3109 |
| 3110 assert( p!=0 && *p1!=0 && *p2!=0 ); |
| 3111 if( *p1==POS_COLUMN ){ |
| 3112 p1++; |
| 3113 p1 += fts3GetVarint32(p1, &iCol1); |
| 3114 } |
| 3115 if( *p2==POS_COLUMN ){ |
| 3116 p2++; |
| 3117 p2 += fts3GetVarint32(p2, &iCol2); |
| 3118 } |
| 3119 |
| 3120 while( 1 ){ |
| 3121 if( iCol1==iCol2 ){ |
| 3122 char *pSave = p; |
| 3123 sqlite3_int64 iPrev = 0; |
| 3124 sqlite3_int64 iPos1 = 0; |
| 3125 sqlite3_int64 iPos2 = 0; |
| 3126 |
| 3127 if( iCol1 ){ |
| 3128 *p++ = POS_COLUMN; |
| 3129 p += sqlite3Fts3PutVarint(p, iCol1); |
| 3130 } |
| 3131 |
| 3132 assert( *p1!=POS_END && *p1!=POS_COLUMN ); |
| 3133 assert( *p2!=POS_END && *p2!=POS_COLUMN ); |
| 3134 fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; |
| 3135 fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; |
| 3136 |
| 3137 while( 1 ){ |
| 3138 if( iPos2==iPos1+nToken |
| 3139 || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) |
| 3140 ){ |
| 3141 sqlite3_int64 iSave; |
| 3142 iSave = isSaveLeft ? iPos1 : iPos2; |
| 3143 fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; |
| 3144 pSave = 0; |
| 3145 assert( p ); |
| 3146 } |
| 3147 if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ |
| 3148 if( (*p2&0xFE)==0 ) break; |
| 3149 fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; |
| 3150 }else{ |
| 3151 if( (*p1&0xFE)==0 ) break; |
| 3152 fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; |
| 3153 } |
| 3154 } |
| 3155 |
| 3156 if( pSave ){ |
| 3157 assert( pp && p ); |
| 3158 p = pSave; |
| 3159 } |
| 3160 |
| 3161 fts3ColumnlistCopy(0, &p1); |
| 3162 fts3ColumnlistCopy(0, &p2); |
| 3163 assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); |
| 3164 if( 0==*p1 || 0==*p2 ) break; |
| 3165 |
| 3166 p1++; |
| 3167 p1 += fts3GetVarint32(p1, &iCol1); |
| 3168 p2++; |
| 3169 p2 += fts3GetVarint32(p2, &iCol2); |
| 3170 } |
| 3171 |
| 3172 /* Advance pointer p1 or p2 (whichever corresponds to the smaller of |
| 3173 ** iCol1 and iCol2) so that it points to either the 0x00 that marks the |
| 3174 ** end of the position list, or the 0x01 that precedes the next |
| 3175 ** column-number in the position list. |
| 3176 */ |
| 3177 else if( iCol1<iCol2 ){ |
| 3178 fts3ColumnlistCopy(0, &p1); |
| 3179 if( 0==*p1 ) break; |
| 3180 p1++; |
| 3181 p1 += fts3GetVarint32(p1, &iCol1); |
| 3182 }else{ |
| 3183 fts3ColumnlistCopy(0, &p2); |
| 3184 if( 0==*p2 ) break; |
| 3185 p2++; |
| 3186 p2 += fts3GetVarint32(p2, &iCol2); |
| 3187 } |
| 3188 } |
| 3189 |
| 3190 fts3PoslistCopy(0, &p2); |
| 3191 fts3PoslistCopy(0, &p1); |
| 3192 *pp1 = p1; |
| 3193 *pp2 = p2; |
| 3194 if( *pp==p ){ |
| 3195 return 0; |
| 3196 } |
| 3197 *p++ = 0x00; |
| 3198 *pp = p; |
| 3199 return 1; |
| 3200 } |
| 3201 |
| 3202 /* |
| 3203 ** Merge two position-lists as required by the NEAR operator. The argument |
| 3204 ** position lists correspond to the left and right phrases of an expression |
| 3205 ** like: |
| 3206 ** |
| 3207 ** "phrase 1" NEAR "phrase number 2" |
| 3208 ** |
| 3209 ** Position list *pp1 corresponds to the left-hand side of the NEAR |
| 3210 ** expression and *pp2 to the right. As usual, the indexes in the position |
| 3211 ** lists are the offsets of the last token in each phrase (tokens "1" and "2" |
| 3212 ** in the example above). |
| 3213 ** |
| 3214 ** The output position list - written to *pp - is a copy of *pp2 with those |
| 3215 ** entries that are not sufficiently NEAR entries in *pp1 removed. |
| 3216 */ |
| 3217 static int fts3PoslistNearMerge( |
| 3218 char **pp, /* Output buffer */ |
| 3219 char *aTmp, /* Temporary buffer space */ |
| 3220 int nRight, /* Maximum difference in token positions */ |
| 3221 int nLeft, /* Maximum difference in token positions */ |
| 3222 char **pp1, /* IN/OUT: Left input list */ |
| 3223 char **pp2 /* IN/OUT: Right input list */ |
| 3224 ){ |
| 3225 char *p1 = *pp1; |
| 3226 char *p2 = *pp2; |
| 3227 |
| 3228 char *pTmp1 = aTmp; |
| 3229 char *pTmp2; |
| 3230 char *aTmp2; |
| 3231 int res = 1; |
| 3232 |
| 3233 fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); |
| 3234 aTmp2 = pTmp2 = pTmp1; |
| 3235 *pp1 = p1; |
| 3236 *pp2 = p2; |
| 3237 fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); |
| 3238 if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ |
| 3239 fts3PoslistMerge(pp, &aTmp, &aTmp2); |
| 3240 }else if( pTmp1!=aTmp ){ |
| 3241 fts3PoslistCopy(pp, &aTmp); |
| 3242 }else if( pTmp2!=aTmp2 ){ |
| 3243 fts3PoslistCopy(pp, &aTmp2); |
| 3244 }else{ |
| 3245 res = 0; |
| 3246 } |
| 3247 |
| 3248 return res; |
| 3249 } |
| 3250 |
| 3251 /* |
| 3252 ** An instance of this function is used to merge together the (potentially |
| 3253 ** large number of) doclists for each term that matches a prefix query. |
| 3254 ** See function fts3TermSelectMerge() for details. |
| 3255 */ |
| 3256 typedef struct TermSelect TermSelect; |
| 3257 struct TermSelect { |
| 3258 char *aaOutput[16]; /* Malloc'd output buffers */ |
| 3259 int anOutput[16]; /* Size each output buffer in bytes */ |
| 3260 }; |
| 3261 |
| 3262 /* |
| 3263 ** This function is used to read a single varint from a buffer. Parameter |
| 3264 ** pEnd points 1 byte past the end of the buffer. When this function is |
| 3265 ** called, if *pp points to pEnd or greater, then the end of the buffer |
| 3266 ** has been reached. In this case *pp is set to 0 and the function returns. |
| 3267 ** |
| 3268 ** If *pp does not point to or past pEnd, then a single varint is read |
| 3269 ** from *pp. *pp is then set to point 1 byte past the end of the read varint. |
| 3270 ** |
| 3271 ** If bDescIdx is false, the value read is added to *pVal before returning. |
| 3272 ** If it is true, the value read is subtracted from *pVal before this |
| 3273 ** function returns. |
| 3274 */ |
| 3275 static void fts3GetDeltaVarint3( |
| 3276 char **pp, /* IN/OUT: Point to read varint from */ |
| 3277 char *pEnd, /* End of buffer */ |
| 3278 int bDescIdx, /* True if docids are descending */ |
| 3279 sqlite3_int64 *pVal /* IN/OUT: Integer value */ |
| 3280 ){ |
| 3281 if( *pp>=pEnd ){ |
| 3282 *pp = 0; |
| 3283 }else{ |
| 3284 sqlite3_int64 iVal; |
| 3285 *pp += sqlite3Fts3GetVarint(*pp, &iVal); |
| 3286 if( bDescIdx ){ |
| 3287 *pVal -= iVal; |
| 3288 }else{ |
| 3289 *pVal += iVal; |
| 3290 } |
| 3291 } |
| 3292 } |
| 3293 |
| 3294 /* |
| 3295 ** This function is used to write a single varint to a buffer. The varint |
| 3296 ** is written to *pp. Before returning, *pp is set to point 1 byte past the |
| 3297 ** end of the value written. |
| 3298 ** |
| 3299 ** If *pbFirst is zero when this function is called, the value written to |
| 3300 ** the buffer is that of parameter iVal. |
| 3301 ** |
| 3302 ** If *pbFirst is non-zero when this function is called, then the value |
| 3303 ** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) |
| 3304 ** (if bDescIdx is non-zero). |
| 3305 ** |
| 3306 ** Before returning, this function always sets *pbFirst to 1 and *piPrev |
| 3307 ** to the value of parameter iVal. |
| 3308 */ |
| 3309 static void fts3PutDeltaVarint3( |
| 3310 char **pp, /* IN/OUT: Output pointer */ |
| 3311 int bDescIdx, /* True for descending docids */ |
| 3312 sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ |
| 3313 int *pbFirst, /* IN/OUT: True after first int written */ |
| 3314 sqlite3_int64 iVal /* Write this value to the list */ |
| 3315 ){ |
| 3316 sqlite3_int64 iWrite; |
| 3317 if( bDescIdx==0 || *pbFirst==0 ){ |
| 3318 iWrite = iVal - *piPrev; |
| 3319 }else{ |
| 3320 iWrite = *piPrev - iVal; |
| 3321 } |
| 3322 assert( *pbFirst || *piPrev==0 ); |
| 3323 assert( *pbFirst==0 || iWrite>0 ); |
| 3324 *pp += sqlite3Fts3PutVarint(*pp, iWrite); |
| 3325 *piPrev = iVal; |
| 3326 *pbFirst = 1; |
| 3327 } |
| 3328 |
| 3329 |
| 3330 /* |
| 3331 ** This macro is used by various functions that merge doclists. The two |
| 3332 ** arguments are 64-bit docid values. If the value of the stack variable |
| 3333 ** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). |
| 3334 ** Otherwise, (i2-i1). |
| 3335 ** |
| 3336 ** Using this makes it easier to write code that can merge doclists that are |
| 3337 ** sorted in either ascending or descending order. |
| 3338 */ |
| 3339 #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) |
| 3340 |
| 3341 /* |
| 3342 ** This function does an "OR" merge of two doclists (output contains all |
| 3343 ** positions contained in either argument doclist). If the docids in the |
| 3344 ** input doclists are sorted in ascending order, parameter bDescDoclist |
| 3345 ** should be false. If they are sorted in ascending order, it should be |
| 3346 ** passed a non-zero value. |
| 3347 ** |
| 3348 ** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer |
| 3349 ** containing the output doclist and SQLITE_OK is returned. In this case |
| 3350 ** *pnOut is set to the number of bytes in the output doclist. |
| 3351 ** |
| 3352 ** If an error occurs, an SQLite error code is returned. The output values |
| 3353 ** are undefined in this case. |
| 3354 */ |
| 3355 static int fts3DoclistOrMerge( |
| 3356 int bDescDoclist, /* True if arguments are desc */ |
| 3357 char *a1, int n1, /* First doclist */ |
| 3358 char *a2, int n2, /* Second doclist */ |
| 3359 char **paOut, int *pnOut /* OUT: Malloc'd doclist */ |
| 3360 ){ |
| 3361 sqlite3_int64 i1 = 0; |
| 3362 sqlite3_int64 i2 = 0; |
| 3363 sqlite3_int64 iPrev = 0; |
| 3364 char *pEnd1 = &a1[n1]; |
| 3365 char *pEnd2 = &a2[n2]; |
| 3366 char *p1 = a1; |
| 3367 char *p2 = a2; |
| 3368 char *p; |
| 3369 char *aOut; |
| 3370 int bFirstOut = 0; |
| 3371 |
| 3372 *paOut = 0; |
| 3373 *pnOut = 0; |
| 3374 |
| 3375 /* Allocate space for the output. Both the input and output doclists |
| 3376 ** are delta encoded. If they are in ascending order (bDescDoclist==0), |
| 3377 ** then the first docid in each list is simply encoded as a varint. For |
| 3378 ** each subsequent docid, the varint stored is the difference between the |
| 3379 ** current and previous docid (a positive number - since the list is in |
| 3380 ** ascending order). |
| 3381 ** |
| 3382 ** The first docid written to the output is therefore encoded using the |
| 3383 ** same number of bytes as it is in whichever of the input lists it is |
| 3384 ** read from. And each subsequent docid read from the same input list |
| 3385 ** consumes either the same or less bytes as it did in the input (since |
| 3386 ** the difference between it and the previous value in the output must |
| 3387 ** be a positive value less than or equal to the delta value read from |
| 3388 ** the input list). The same argument applies to all but the first docid |
| 3389 ** read from the 'other' list. And to the contents of all position lists |
| 3390 ** that will be copied and merged from the input to the output. |
| 3391 ** |
| 3392 ** However, if the first docid copied to the output is a negative number, |
| 3393 ** then the encoding of the first docid from the 'other' input list may |
| 3394 ** be larger in the output than it was in the input (since the delta value |
| 3395 ** may be a larger positive integer than the actual docid). |
| 3396 ** |
| 3397 ** The space required to store the output is therefore the sum of the |
| 3398 ** sizes of the two inputs, plus enough space for exactly one of the input |
| 3399 ** docids to grow. |
| 3400 ** |
| 3401 ** A symetric argument may be made if the doclists are in descending |
| 3402 ** order. |
| 3403 */ |
| 3404 aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); |
| 3405 if( !aOut ) return SQLITE_NOMEM; |
| 3406 |
| 3407 p = aOut; |
| 3408 fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); |
| 3409 fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); |
| 3410 while( p1 || p2 ){ |
| 3411 sqlite3_int64 iDiff = DOCID_CMP(i1, i2); |
| 3412 |
| 3413 if( p2 && p1 && iDiff==0 ){ |
| 3414 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3415 fts3PoslistMerge(&p, &p1, &p2); |
| 3416 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3417 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3418 }else if( !p2 || (p1 && iDiff<0) ){ |
| 3419 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3420 fts3PoslistCopy(&p, &p1); |
| 3421 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3422 }else{ |
| 3423 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); |
| 3424 fts3PoslistCopy(&p, &p2); |
| 3425 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3426 } |
| 3427 } |
| 3428 |
| 3429 *paOut = aOut; |
| 3430 *pnOut = (int)(p-aOut); |
| 3431 assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); |
| 3432 return SQLITE_OK; |
| 3433 } |
| 3434 |
| 3435 /* |
| 3436 ** This function does a "phrase" merge of two doclists. In a phrase merge, |
| 3437 ** the output contains a copy of each position from the right-hand input |
| 3438 ** doclist for which there is a position in the left-hand input doclist |
| 3439 ** exactly nDist tokens before it. |
| 3440 ** |
| 3441 ** If the docids in the input doclists are sorted in ascending order, |
| 3442 ** parameter bDescDoclist should be false. If they are sorted in ascending |
| 3443 ** order, it should be passed a non-zero value. |
| 3444 ** |
| 3445 ** The right-hand input doclist is overwritten by this function. |
| 3446 */ |
| 3447 static int fts3DoclistPhraseMerge( |
| 3448 int bDescDoclist, /* True if arguments are desc */ |
| 3449 int nDist, /* Distance from left to right (1=adjacent) */ |
| 3450 char *aLeft, int nLeft, /* Left doclist */ |
| 3451 char **paRight, int *pnRight /* IN/OUT: Right/output doclist */ |
| 3452 ){ |
| 3453 sqlite3_int64 i1 = 0; |
| 3454 sqlite3_int64 i2 = 0; |
| 3455 sqlite3_int64 iPrev = 0; |
| 3456 char *aRight = *paRight; |
| 3457 char *pEnd1 = &aLeft[nLeft]; |
| 3458 char *pEnd2 = &aRight[*pnRight]; |
| 3459 char *p1 = aLeft; |
| 3460 char *p2 = aRight; |
| 3461 char *p; |
| 3462 int bFirstOut = 0; |
| 3463 char *aOut; |
| 3464 |
| 3465 assert( nDist>0 ); |
| 3466 if( bDescDoclist ){ |
| 3467 aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX); |
| 3468 if( aOut==0 ) return SQLITE_NOMEM; |
| 3469 }else{ |
| 3470 aOut = aRight; |
| 3471 } |
| 3472 p = aOut; |
| 3473 |
| 3474 fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); |
| 3475 fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); |
| 3476 |
| 3477 while( p1 && p2 ){ |
| 3478 sqlite3_int64 iDiff = DOCID_CMP(i1, i2); |
| 3479 if( iDiff==0 ){ |
| 3480 char *pSave = p; |
| 3481 sqlite3_int64 iPrevSave = iPrev; |
| 3482 int bFirstOutSave = bFirstOut; |
| 3483 |
| 3484 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3485 if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ |
| 3486 p = pSave; |
| 3487 iPrev = iPrevSave; |
| 3488 bFirstOut = bFirstOutSave; |
| 3489 } |
| 3490 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3491 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3492 }else if( iDiff<0 ){ |
| 3493 fts3PoslistCopy(0, &p1); |
| 3494 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3495 }else{ |
| 3496 fts3PoslistCopy(0, &p2); |
| 3497 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3498 } |
| 3499 } |
| 3500 |
| 3501 *pnRight = (int)(p - aOut); |
| 3502 if( bDescDoclist ){ |
| 3503 sqlite3_free(aRight); |
| 3504 *paRight = aOut; |
| 3505 } |
| 3506 |
| 3507 return SQLITE_OK; |
| 3508 } |
| 3509 |
| 3510 /* |
| 3511 ** Argument pList points to a position list nList bytes in size. This |
| 3512 ** function checks to see if the position list contains any entries for |
| 3513 ** a token in position 0 (of any column). If so, it writes argument iDelta |
| 3514 ** to the output buffer pOut, followed by a position list consisting only |
| 3515 ** of the entries from pList at position 0, and terminated by an 0x00 byte. |
| 3516 ** The value returned is the number of bytes written to pOut (if any). |
| 3517 */ |
| 3518 SQLITE_PRIVATE int sqlite3Fts3FirstFilter( |
| 3519 sqlite3_int64 iDelta, /* Varint that may be written to pOut */ |
| 3520 char *pList, /* Position list (no 0x00 term) */ |
| 3521 int nList, /* Size of pList in bytes */ |
| 3522 char *pOut /* Write output here */ |
| 3523 ){ |
| 3524 int nOut = 0; |
| 3525 int bWritten = 0; /* True once iDelta has been written */ |
| 3526 char *p = pList; |
| 3527 char *pEnd = &pList[nList]; |
| 3528 |
| 3529 if( *p!=0x01 ){ |
| 3530 if( *p==0x02 ){ |
| 3531 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); |
| 3532 pOut[nOut++] = 0x02; |
| 3533 bWritten = 1; |
| 3534 } |
| 3535 fts3ColumnlistCopy(0, &p); |
| 3536 } |
| 3537 |
| 3538 while( p<pEnd && *p==0x01 ){ |
| 3539 sqlite3_int64 iCol; |
| 3540 p++; |
| 3541 p += sqlite3Fts3GetVarint(p, &iCol); |
| 3542 if( *p==0x02 ){ |
| 3543 if( bWritten==0 ){ |
| 3544 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); |
| 3545 bWritten = 1; |
| 3546 } |
| 3547 pOut[nOut++] = 0x01; |
| 3548 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol); |
| 3549 pOut[nOut++] = 0x02; |
| 3550 } |
| 3551 fts3ColumnlistCopy(0, &p); |
| 3552 } |
| 3553 if( bWritten ){ |
| 3554 pOut[nOut++] = 0x00; |
| 3555 } |
| 3556 |
| 3557 return nOut; |
| 3558 } |
| 3559 |
| 3560 |
| 3561 /* |
| 3562 ** Merge all doclists in the TermSelect.aaOutput[] array into a single |
| 3563 ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all |
| 3564 ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. |
| 3565 ** |
| 3566 ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is |
| 3567 ** the responsibility of the caller to free any doclists left in the |
| 3568 ** TermSelect.aaOutput[] array. |
| 3569 */ |
| 3570 static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ |
| 3571 char *aOut = 0; |
| 3572 int nOut = 0; |
| 3573 int i; |
| 3574 |
| 3575 /* Loop through the doclists in the aaOutput[] array. Merge them all |
| 3576 ** into a single doclist. |
| 3577 */ |
| 3578 for(i=0; i<SizeofArray(pTS->aaOutput); i++){ |
| 3579 if( pTS->aaOutput[i] ){ |
| 3580 if( !aOut ){ |
| 3581 aOut = pTS->aaOutput[i]; |
| 3582 nOut = pTS->anOutput[i]; |
| 3583 pTS->aaOutput[i] = 0; |
| 3584 }else{ |
| 3585 int nNew; |
| 3586 char *aNew; |
| 3587 |
| 3588 int rc = fts3DoclistOrMerge(p->bDescIdx, |
| 3589 pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew |
| 3590 ); |
| 3591 if( rc!=SQLITE_OK ){ |
| 3592 sqlite3_free(aOut); |
| 3593 return rc; |
| 3594 } |
| 3595 |
| 3596 sqlite3_free(pTS->aaOutput[i]); |
| 3597 sqlite3_free(aOut); |
| 3598 pTS->aaOutput[i] = 0; |
| 3599 aOut = aNew; |
| 3600 nOut = nNew; |
| 3601 } |
| 3602 } |
| 3603 } |
| 3604 |
| 3605 pTS->aaOutput[0] = aOut; |
| 3606 pTS->anOutput[0] = nOut; |
| 3607 return SQLITE_OK; |
| 3608 } |
| 3609 |
| 3610 /* |
| 3611 ** Merge the doclist aDoclist/nDoclist into the TermSelect object passed |
| 3612 ** as the first argument. The merge is an "OR" merge (see function |
| 3613 ** fts3DoclistOrMerge() for details). |
| 3614 ** |
| 3615 ** This function is called with the doclist for each term that matches |
| 3616 ** a queried prefix. It merges all these doclists into one, the doclist |
| 3617 ** for the specified prefix. Since there can be a very large number of |
| 3618 ** doclists to merge, the merging is done pair-wise using the TermSelect |
| 3619 ** object. |
| 3620 ** |
| 3621 ** This function returns SQLITE_OK if the merge is successful, or an |
| 3622 ** SQLite error code (SQLITE_NOMEM) if an error occurs. |
| 3623 */ |
| 3624 static int fts3TermSelectMerge( |
| 3625 Fts3Table *p, /* FTS table handle */ |
| 3626 TermSelect *pTS, /* TermSelect object to merge into */ |
| 3627 char *aDoclist, /* Pointer to doclist */ |
| 3628 int nDoclist /* Size of aDoclist in bytes */ |
| 3629 ){ |
| 3630 if( pTS->aaOutput[0]==0 ){ |
| 3631 /* If this is the first term selected, copy the doclist to the output |
| 3632 ** buffer using memcpy(). |
| 3633 ** |
| 3634 ** Add FTS3_VARINT_MAX bytes of unused space to the end of the |
| 3635 ** allocation. This is so as to ensure that the buffer is big enough |
| 3636 ** to hold the current doclist AND'd with any other doclist. If the |
| 3637 ** doclists are stored in order=ASC order, this padding would not be |
| 3638 ** required (since the size of [doclistA AND doclistB] is always less |
| 3639 ** than or equal to the size of [doclistA] in that case). But this is |
| 3640 ** not true for order=DESC. For example, a doclist containing (1, -1) |
| 3641 ** may be smaller than (-1), as in the first example the -1 may be stored |
| 3642 ** as a single-byte delta, whereas in the second it must be stored as a |
| 3643 ** FTS3_VARINT_MAX byte varint. |
| 3644 ** |
| 3645 ** Similar padding is added in the fts3DoclistOrMerge() function. |
| 3646 */ |
| 3647 pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); |
| 3648 pTS->anOutput[0] = nDoclist; |
| 3649 if( pTS->aaOutput[0] ){ |
| 3650 memcpy(pTS->aaOutput[0], aDoclist, nDoclist); |
| 3651 }else{ |
| 3652 return SQLITE_NOMEM; |
| 3653 } |
| 3654 }else{ |
| 3655 char *aMerge = aDoclist; |
| 3656 int nMerge = nDoclist; |
| 3657 int iOut; |
| 3658 |
| 3659 for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ |
| 3660 if( pTS->aaOutput[iOut]==0 ){ |
| 3661 assert( iOut>0 ); |
| 3662 pTS->aaOutput[iOut] = aMerge; |
| 3663 pTS->anOutput[iOut] = nMerge; |
| 3664 break; |
| 3665 }else{ |
| 3666 char *aNew; |
| 3667 int nNew; |
| 3668 |
| 3669 int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, |
| 3670 pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew |
| 3671 ); |
| 3672 if( rc!=SQLITE_OK ){ |
| 3673 if( aMerge!=aDoclist ) sqlite3_free(aMerge); |
| 3674 return rc; |
| 3675 } |
| 3676 |
| 3677 if( aMerge!=aDoclist ) sqlite3_free(aMerge); |
| 3678 sqlite3_free(pTS->aaOutput[iOut]); |
| 3679 pTS->aaOutput[iOut] = 0; |
| 3680 |
| 3681 aMerge = aNew; |
| 3682 nMerge = nNew; |
| 3683 if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ |
| 3684 pTS->aaOutput[iOut] = aMerge; |
| 3685 pTS->anOutput[iOut] = nMerge; |
| 3686 } |
| 3687 } |
| 3688 } |
| 3689 } |
| 3690 return SQLITE_OK; |
| 3691 } |
| 3692 |
| 3693 /* |
| 3694 ** Append SegReader object pNew to the end of the pCsr->apSegment[] array. |
| 3695 */ |
| 3696 static int fts3SegReaderCursorAppend( |
| 3697 Fts3MultiSegReader *pCsr, |
| 3698 Fts3SegReader *pNew |
| 3699 ){ |
| 3700 if( (pCsr->nSegment%16)==0 ){ |
| 3701 Fts3SegReader **apNew; |
| 3702 int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); |
| 3703 apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); |
| 3704 if( !apNew ){ |
| 3705 sqlite3Fts3SegReaderFree(pNew); |
| 3706 return SQLITE_NOMEM; |
| 3707 } |
| 3708 pCsr->apSegment = apNew; |
| 3709 } |
| 3710 pCsr->apSegment[pCsr->nSegment++] = pNew; |
| 3711 return SQLITE_OK; |
| 3712 } |
| 3713 |
| 3714 /* |
| 3715 ** Add seg-reader objects to the Fts3MultiSegReader object passed as the |
| 3716 ** 8th argument. |
| 3717 ** |
| 3718 ** This function returns SQLITE_OK if successful, or an SQLite error code |
| 3719 ** otherwise. |
| 3720 */ |
| 3721 static int fts3SegReaderCursor( |
| 3722 Fts3Table *p, /* FTS3 table handle */ |
| 3723 int iLangid, /* Language id */ |
| 3724 int iIndex, /* Index to search (from 0 to p->nIndex-1) */ |
| 3725 int iLevel, /* Level of segments to scan */ |
| 3726 const char *zTerm, /* Term to query for */ |
| 3727 int nTerm, /* Size of zTerm in bytes */ |
| 3728 int isPrefix, /* True for a prefix search */ |
| 3729 int isScan, /* True to scan from zTerm to EOF */ |
| 3730 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 3731 ){ |
| 3732 int rc = SQLITE_OK; /* Error code */ |
| 3733 sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ |
| 3734 int rc2; /* Result of sqlite3_reset() */ |
| 3735 |
| 3736 /* If iLevel is less than 0 and this is not a scan, include a seg-reader |
| 3737 ** for the pending-terms. If this is a scan, then this call must be being |
| 3738 ** made by an fts4aux module, not an FTS table. In this case calling |
| 3739 ** Fts3SegReaderPending might segfault, as the data structures used by |
| 3740 ** fts4aux are not completely populated. So it's easiest to filter these |
| 3741 ** calls out here. */ |
| 3742 if( iLevel<0 && p->aIndex ){ |
| 3743 Fts3SegReader *pSeg = 0; |
| 3744 rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan,
&pSeg); |
| 3745 if( rc==SQLITE_OK && pSeg ){ |
| 3746 rc = fts3SegReaderCursorAppend(pCsr, pSeg); |
| 3747 } |
| 3748 } |
| 3749 |
| 3750 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 3751 if( rc==SQLITE_OK ){ |
| 3752 rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); |
| 3753 } |
| 3754 |
| 3755 while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ |
| 3756 Fts3SegReader *pSeg = 0; |
| 3757 |
| 3758 /* Read the values returned by the SELECT into local variables. */ |
| 3759 sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); |
| 3760 sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); |
| 3761 sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); |
| 3762 int nRoot = sqlite3_column_bytes(pStmt, 4); |
| 3763 char const *zRoot = sqlite3_column_blob(pStmt, 4); |
| 3764 |
| 3765 /* If zTerm is not NULL, and this segment is not stored entirely on its |
| 3766 ** root node, the range of leaves scanned can be reduced. Do this. */ |
| 3767 if( iStartBlock && zTerm ){ |
| 3768 sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); |
| 3769 rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); |
| 3770 if( rc!=SQLITE_OK ) goto finished; |
| 3771 if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; |
| 3772 } |
| 3773 |
| 3774 rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, |
| 3775 (isPrefix==0 && isScan==0), |
| 3776 iStartBlock, iLeavesEndBlock, |
| 3777 iEndBlock, zRoot, nRoot, &pSeg |
| 3778 ); |
| 3779 if( rc!=SQLITE_OK ) goto finished; |
| 3780 rc = fts3SegReaderCursorAppend(pCsr, pSeg); |
| 3781 } |
| 3782 } |
| 3783 |
| 3784 finished: |
| 3785 rc2 = sqlite3_reset(pStmt); |
| 3786 if( rc==SQLITE_DONE ) rc = rc2; |
| 3787 |
| 3788 return rc; |
| 3789 } |
| 3790 |
| 3791 /* |
| 3792 ** Set up a cursor object for iterating through a full-text index or a |
| 3793 ** single level therein. |
| 3794 */ |
| 3795 SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor( |
| 3796 Fts3Table *p, /* FTS3 table handle */ |
| 3797 int iLangid, /* Language-id to search */ |
| 3798 int iIndex, /* Index to search (from 0 to p->nIndex-1) */ |
| 3799 int iLevel, /* Level of segments to scan */ |
| 3800 const char *zTerm, /* Term to query for */ |
| 3801 int nTerm, /* Size of zTerm in bytes */ |
| 3802 int isPrefix, /* True for a prefix search */ |
| 3803 int isScan, /* True to scan from zTerm to EOF */ |
| 3804 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 3805 ){ |
| 3806 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 3807 assert( iLevel==FTS3_SEGCURSOR_ALL |
| 3808 || iLevel==FTS3_SEGCURSOR_PENDING |
| 3809 || iLevel>=0 |
| 3810 ); |
| 3811 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 3812 assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 ); |
| 3813 assert( isPrefix==0 || isScan==0 ); |
| 3814 |
| 3815 memset(pCsr, 0, sizeof(Fts3MultiSegReader)); |
| 3816 return fts3SegReaderCursor( |
| 3817 p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr |
| 3818 ); |
| 3819 } |
| 3820 |
| 3821 /* |
| 3822 ** In addition to its current configuration, have the Fts3MultiSegReader |
| 3823 ** passed as the 4th argument also scan the doclist for term zTerm/nTerm. |
| 3824 ** |
| 3825 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 3826 */ |
| 3827 static int fts3SegReaderCursorAddZero( |
| 3828 Fts3Table *p, /* FTS virtual table handle */ |
| 3829 int iLangid, |
| 3830 const char *zTerm, /* Term to scan doclist of */ |
| 3831 int nTerm, /* Number of bytes in zTerm */ |
| 3832 Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ |
| 3833 ){ |
| 3834 return fts3SegReaderCursor(p, |
| 3835 iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr |
| 3836 ); |
| 3837 } |
| 3838 |
| 3839 /* |
| 3840 ** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, |
| 3841 ** if isPrefix is true, to scan the doclist for all terms for which |
| 3842 ** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write |
| 3843 ** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return |
| 3844 ** an SQLite error code. |
| 3845 ** |
| 3846 ** It is the responsibility of the caller to free this object by eventually |
| 3847 ** passing it to fts3SegReaderCursorFree() |
| 3848 ** |
| 3849 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 3850 ** Output parameter *ppSegcsr is set to 0 if an error occurs. |
| 3851 */ |
| 3852 static int fts3TermSegReaderCursor( |
| 3853 Fts3Cursor *pCsr, /* Virtual table cursor handle */ |
| 3854 const char *zTerm, /* Term to query for */ |
| 3855 int nTerm, /* Size of zTerm in bytes */ |
| 3856 int isPrefix, /* True for a prefix search */ |
| 3857 Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ |
| 3858 ){ |
| 3859 Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ |
| 3860 int rc = SQLITE_NOMEM; /* Return code */ |
| 3861 |
| 3862 pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); |
| 3863 if( pSegcsr ){ |
| 3864 int i; |
| 3865 int bFound = 0; /* True once an index has been found */ |
| 3866 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 3867 |
| 3868 if( isPrefix ){ |
| 3869 for(i=1; bFound==0 && i<p->nIndex; i++){ |
| 3870 if( p->aIndex[i].nPrefix==nTerm ){ |
| 3871 bFound = 1; |
| 3872 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3873 i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr |
| 3874 ); |
| 3875 pSegcsr->bLookup = 1; |
| 3876 } |
| 3877 } |
| 3878 |
| 3879 for(i=1; bFound==0 && i<p->nIndex; i++){ |
| 3880 if( p->aIndex[i].nPrefix==nTerm+1 ){ |
| 3881 bFound = 1; |
| 3882 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3883 i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr |
| 3884 ); |
| 3885 if( rc==SQLITE_OK ){ |
| 3886 rc = fts3SegReaderCursorAddZero( |
| 3887 p, pCsr->iLangid, zTerm, nTerm, pSegcsr |
| 3888 ); |
| 3889 } |
| 3890 } |
| 3891 } |
| 3892 } |
| 3893 |
| 3894 if( bFound==0 ){ |
| 3895 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3896 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr |
| 3897 ); |
| 3898 pSegcsr->bLookup = !isPrefix; |
| 3899 } |
| 3900 } |
| 3901 |
| 3902 *ppSegcsr = pSegcsr; |
| 3903 return rc; |
| 3904 } |
| 3905 |
| 3906 /* |
| 3907 ** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). |
| 3908 */ |
| 3909 static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ |
| 3910 sqlite3Fts3SegReaderFinish(pSegcsr); |
| 3911 sqlite3_free(pSegcsr); |
| 3912 } |
| 3913 |
| 3914 /* |
| 3915 ** This function retrieves the doclist for the specified term (or term |
| 3916 ** prefix) from the database. |
| 3917 */ |
| 3918 static int fts3TermSelect( |
| 3919 Fts3Table *p, /* Virtual table handle */ |
| 3920 Fts3PhraseToken *pTok, /* Token to query for */ |
| 3921 int iColumn, /* Column to query (or -ve for all columns) */ |
| 3922 int *pnOut, /* OUT: Size of buffer at *ppOut */ |
| 3923 char **ppOut /* OUT: Malloced result buffer */ |
| 3924 ){ |
| 3925 int rc; /* Return code */ |
| 3926 Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ |
| 3927 TermSelect tsc; /* Object for pair-wise doclist merging */ |
| 3928 Fts3SegFilter filter; /* Segment term filter configuration */ |
| 3929 |
| 3930 pSegcsr = pTok->pSegcsr; |
| 3931 memset(&tsc, 0, sizeof(TermSelect)); |
| 3932 |
| 3933 filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS |
| 3934 | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) |
| 3935 | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) |
| 3936 | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); |
| 3937 filter.iCol = iColumn; |
| 3938 filter.zTerm = pTok->z; |
| 3939 filter.nTerm = pTok->n; |
| 3940 |
| 3941 rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); |
| 3942 while( SQLITE_OK==rc |
| 3943 && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) |
| 3944 ){ |
| 3945 rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); |
| 3946 } |
| 3947 |
| 3948 if( rc==SQLITE_OK ){ |
| 3949 rc = fts3TermSelectFinishMerge(p, &tsc); |
| 3950 } |
| 3951 if( rc==SQLITE_OK ){ |
| 3952 *ppOut = tsc.aaOutput[0]; |
| 3953 *pnOut = tsc.anOutput[0]; |
| 3954 }else{ |
| 3955 int i; |
| 3956 for(i=0; i<SizeofArray(tsc.aaOutput); i++){ |
| 3957 sqlite3_free(tsc.aaOutput[i]); |
| 3958 } |
| 3959 } |
| 3960 |
| 3961 fts3SegReaderCursorFree(pSegcsr); |
| 3962 pTok->pSegcsr = 0; |
| 3963 return rc; |
| 3964 } |
| 3965 |
| 3966 /* |
| 3967 ** This function counts the total number of docids in the doclist stored |
| 3968 ** in buffer aList[], size nList bytes. |
| 3969 ** |
| 3970 ** If the isPoslist argument is true, then it is assumed that the doclist |
| 3971 ** contains a position-list following each docid. Otherwise, it is assumed |
| 3972 ** that the doclist is simply a list of docids stored as delta encoded |
| 3973 ** varints. |
| 3974 */ |
| 3975 static int fts3DoclistCountDocids(char *aList, int nList){ |
| 3976 int nDoc = 0; /* Return value */ |
| 3977 if( aList ){ |
| 3978 char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ |
| 3979 char *p = aList; /* Cursor */ |
| 3980 while( p<aEnd ){ |
| 3981 nDoc++; |
| 3982 while( (*p++)&0x80 ); /* Skip docid varint */ |
| 3983 fts3PoslistCopy(0, &p); /* Skip over position list */ |
| 3984 } |
| 3985 } |
| 3986 |
| 3987 return nDoc; |
| 3988 } |
| 3989 |
| 3990 /* |
| 3991 ** Advance the cursor to the next row in the %_content table that |
| 3992 ** matches the search criteria. For a MATCH search, this will be |
| 3993 ** the next row that matches. For a full-table scan, this will be |
| 3994 ** simply the next row in the %_content table. For a docid lookup, |
| 3995 ** this routine simply sets the EOF flag. |
| 3996 ** |
| 3997 ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned |
| 3998 ** even if we reach end-of-file. The fts3EofMethod() will be called |
| 3999 ** subsequently to determine whether or not an EOF was hit. |
| 4000 */ |
| 4001 static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ |
| 4002 int rc; |
| 4003 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 4004 if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ |
| 4005 if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ |
| 4006 pCsr->isEof = 1; |
| 4007 rc = sqlite3_reset(pCsr->pStmt); |
| 4008 }else{ |
| 4009 pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); |
| 4010 rc = SQLITE_OK; |
| 4011 } |
| 4012 }else{ |
| 4013 rc = fts3EvalNext((Fts3Cursor *)pCursor); |
| 4014 } |
| 4015 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 4016 return rc; |
| 4017 } |
| 4018 |
| 4019 /* |
| 4020 ** The following are copied from sqliteInt.h. |
| 4021 ** |
| 4022 ** Constants for the largest and smallest possible 64-bit signed integers. |
| 4023 ** These macros are designed to work correctly on both 32-bit and 64-bit |
| 4024 ** compilers. |
| 4025 */ |
| 4026 #ifndef SQLITE_AMALGAMATION |
| 4027 # define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) |
| 4028 # define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) |
| 4029 #endif |
| 4030 |
| 4031 /* |
| 4032 ** If the numeric type of argument pVal is "integer", then return it |
| 4033 ** converted to a 64-bit signed integer. Otherwise, return a copy of |
| 4034 ** the second parameter, iDefault. |
| 4035 */ |
| 4036 static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ |
| 4037 if( pVal ){ |
| 4038 int eType = sqlite3_value_numeric_type(pVal); |
| 4039 if( eType==SQLITE_INTEGER ){ |
| 4040 return sqlite3_value_int64(pVal); |
| 4041 } |
| 4042 } |
| 4043 return iDefault; |
| 4044 } |
| 4045 |
| 4046 /* |
| 4047 ** This is the xFilter interface for the virtual table. See |
| 4048 ** the virtual table xFilter method documentation for additional |
| 4049 ** information. |
| 4050 ** |
| 4051 ** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against |
| 4052 ** the %_content table. |
| 4053 ** |
| 4054 ** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry |
| 4055 ** in the %_content table. |
| 4056 ** |
| 4057 ** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The |
| 4058 ** column on the left-hand side of the MATCH operator is column |
| 4059 ** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand |
| 4060 ** side of the MATCH operator. |
| 4061 */ |
| 4062 static int fts3FilterMethod( |
| 4063 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 4064 int idxNum, /* Strategy index */ |
| 4065 const char *idxStr, /* Unused */ |
| 4066 int nVal, /* Number of elements in apVal */ |
| 4067 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 4068 ){ |
| 4069 int rc = SQLITE_OK; |
| 4070 char *zSql; /* SQL statement used to access %_content */ |
| 4071 int eSearch; |
| 4072 Fts3Table *p = (Fts3Table *)pCursor->pVtab; |
| 4073 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 4074 |
| 4075 sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ |
| 4076 sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ |
| 4077 sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ |
| 4078 sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ |
| 4079 int iIdx; |
| 4080 |
| 4081 UNUSED_PARAMETER(idxStr); |
| 4082 UNUSED_PARAMETER(nVal); |
| 4083 |
| 4084 eSearch = (idxNum & 0x0000FFFF); |
| 4085 assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); |
| 4086 assert( p->pSegments==0 ); |
| 4087 |
| 4088 /* Collect arguments into local variables */ |
| 4089 iIdx = 0; |
| 4090 if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; |
| 4091 if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; |
| 4092 if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; |
| 4093 if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; |
| 4094 assert( iIdx==nVal ); |
| 4095 |
| 4096 /* In case the cursor has been used before, clear it now. */ |
| 4097 sqlite3_finalize(pCsr->pStmt); |
| 4098 sqlite3_free(pCsr->aDoclist); |
| 4099 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 4100 sqlite3Fts3ExprFree(pCsr->pExpr); |
| 4101 memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); |
| 4102 |
| 4103 /* Set the lower and upper bounds on docids to return */ |
| 4104 pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); |
| 4105 pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); |
| 4106 |
| 4107 if( idxStr ){ |
| 4108 pCsr->bDesc = (idxStr[0]=='D'); |
| 4109 }else{ |
| 4110 pCsr->bDesc = p->bDescIdx; |
| 4111 } |
| 4112 pCsr->eSearch = (i16)eSearch; |
| 4113 |
| 4114 if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ |
| 4115 int iCol = eSearch-FTS3_FULLTEXT_SEARCH; |
| 4116 const char *zQuery = (const char *)sqlite3_value_text(pCons); |
| 4117 |
| 4118 if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ |
| 4119 return SQLITE_NOMEM; |
| 4120 } |
| 4121 |
| 4122 pCsr->iLangid = 0; |
| 4123 if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); |
| 4124 |
| 4125 assert( p->base.zErrMsg==0 ); |
| 4126 rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, |
| 4127 p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, |
| 4128 &p->base.zErrMsg |
| 4129 ); |
| 4130 if( rc!=SQLITE_OK ){ |
| 4131 return rc; |
| 4132 } |
| 4133 |
| 4134 rc = fts3EvalStart(pCsr); |
| 4135 sqlite3Fts3SegmentsClose(p); |
| 4136 if( rc!=SQLITE_OK ) return rc; |
| 4137 pCsr->pNextId = pCsr->aDoclist; |
| 4138 pCsr->iPrevId = 0; |
| 4139 } |
| 4140 |
| 4141 /* Compile a SELECT statement for this cursor. For a full-table-scan, the |
| 4142 ** statement loops through all rows of the %_content table. For a |
| 4143 ** full-text query or docid lookup, the statement retrieves a single |
| 4144 ** row by docid. |
| 4145 */ |
| 4146 if( eSearch==FTS3_FULLSCAN_SEARCH ){ |
| 4147 if( pDocidGe || pDocidLe ){ |
| 4148 zSql = sqlite3_mprintf( |
| 4149 "SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s", |
| 4150 p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid, |
| 4151 (pCsr->bDesc ? "DESC" : "ASC") |
| 4152 ); |
| 4153 }else{ |
| 4154 zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", |
| 4155 p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") |
| 4156 ); |
| 4157 } |
| 4158 if( zSql ){ |
| 4159 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); |
| 4160 sqlite3_free(zSql); |
| 4161 }else{ |
| 4162 rc = SQLITE_NOMEM; |
| 4163 } |
| 4164 }else if( eSearch==FTS3_DOCID_SEARCH ){ |
| 4165 rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); |
| 4166 if( rc==SQLITE_OK ){ |
| 4167 rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); |
| 4168 } |
| 4169 } |
| 4170 if( rc!=SQLITE_OK ) return rc; |
| 4171 |
| 4172 return fts3NextMethod(pCursor); |
| 4173 } |
| 4174 |
| 4175 /* |
| 4176 ** This is the xEof method of the virtual table. SQLite calls this |
| 4177 ** routine to find out if it has reached the end of a result set. |
| 4178 */ |
| 4179 static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ |
| 4180 return ((Fts3Cursor *)pCursor)->isEof; |
| 4181 } |
| 4182 |
| 4183 /* |
| 4184 ** This is the xRowid method. The SQLite core calls this routine to |
| 4185 ** retrieve the rowid for the current row of the result set. fts3 |
| 4186 ** exposes %_content.docid as the rowid for the virtual table. The |
| 4187 ** rowid should be written to *pRowid. |
| 4188 */ |
| 4189 static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |
| 4190 Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; |
| 4191 *pRowid = pCsr->iPrevId; |
| 4192 return SQLITE_OK; |
| 4193 } |
| 4194 |
| 4195 /* |
| 4196 ** This is the xColumn method, called by SQLite to request a value from |
| 4197 ** the row that the supplied cursor currently points to. |
| 4198 ** |
| 4199 ** If: |
| 4200 ** |
| 4201 ** (iCol < p->nColumn) -> The value of the iCol'th user column. |
| 4202 ** (iCol == p->nColumn) -> Magic column with the same name as the table. |
| 4203 ** (iCol == p->nColumn+1) -> Docid column |
| 4204 ** (iCol == p->nColumn+2) -> Langid column |
| 4205 */ |
| 4206 static int fts3ColumnMethod( |
| 4207 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 4208 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 4209 int iCol /* Index of column to read value from */ |
| 4210 ){ |
| 4211 int rc = SQLITE_OK; /* Return Code */ |
| 4212 Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; |
| 4213 Fts3Table *p = (Fts3Table *)pCursor->pVtab; |
| 4214 |
| 4215 /* The column value supplied by SQLite must be in range. */ |
| 4216 assert( iCol>=0 && iCol<=p->nColumn+2 ); |
| 4217 |
| 4218 if( iCol==p->nColumn+1 ){ |
| 4219 /* This call is a request for the "docid" column. Since "docid" is an |
| 4220 ** alias for "rowid", use the xRowid() method to obtain the value. |
| 4221 */ |
| 4222 sqlite3_result_int64(pCtx, pCsr->iPrevId); |
| 4223 }else if( iCol==p->nColumn ){ |
| 4224 /* The extra column whose name is the same as the table. |
| 4225 ** Return a blob which is a pointer to the cursor. */ |
| 4226 sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); |
| 4227 }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ |
| 4228 sqlite3_result_int64(pCtx, pCsr->iLangid); |
| 4229 }else{ |
| 4230 /* The requested column is either a user column (one that contains |
| 4231 ** indexed data), or the language-id column. */ |
| 4232 rc = fts3CursorSeek(0, pCsr); |
| 4233 |
| 4234 if( rc==SQLITE_OK ){ |
| 4235 if( iCol==p->nColumn+2 ){ |
| 4236 int iLangid = 0; |
| 4237 if( p->zLanguageid ){ |
| 4238 iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); |
| 4239 } |
| 4240 sqlite3_result_int(pCtx, iLangid); |
| 4241 }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ |
| 4242 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); |
| 4243 } |
| 4244 } |
| 4245 } |
| 4246 |
| 4247 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 4248 return rc; |
| 4249 } |
| 4250 |
| 4251 /* |
| 4252 ** This function is the implementation of the xUpdate callback used by |
| 4253 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be |
| 4254 ** inserted, updated or deleted. |
| 4255 */ |
| 4256 static int fts3UpdateMethod( |
| 4257 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4258 int nArg, /* Size of argument array */ |
| 4259 sqlite3_value **apVal, /* Array of arguments */ |
| 4260 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
| 4261 ){ |
| 4262 return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); |
| 4263 } |
| 4264 |
| 4265 /* |
| 4266 ** Implementation of xSync() method. Flush the contents of the pending-terms |
| 4267 ** hash-table to the database. |
| 4268 */ |
| 4269 static int fts3SyncMethod(sqlite3_vtab *pVtab){ |
| 4270 |
| 4271 /* Following an incremental-merge operation, assuming that the input |
| 4272 ** segments are not completely consumed (the usual case), they are updated |
| 4273 ** in place to remove the entries that have already been merged. This |
| 4274 ** involves updating the leaf block that contains the smallest unmerged |
| 4275 ** entry and each block (if any) between the leaf and the root node. So |
| 4276 ** if the height of the input segment b-trees is N, and input segments |
| 4277 ** are merged eight at a time, updating the input segments at the end |
| 4278 ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually |
| 4279 ** small - often between 0 and 2. So the overhead of the incremental |
| 4280 ** merge is somewhere between 8 and 24 blocks. To avoid this overhead |
| 4281 ** dwarfing the actual productive work accomplished, the incremental merge |
| 4282 ** is only attempted if it will write at least 64 leaf blocks. Hence |
| 4283 ** nMinMerge. |
| 4284 ** |
| 4285 ** Of course, updating the input segments also involves deleting a bunch |
| 4286 ** of blocks from the segments table. But this is not considered overhead |
| 4287 ** as it would also be required by a crisis-merge that used the same input |
| 4288 ** segments. |
| 4289 */ |
| 4290 const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ |
| 4291 |
| 4292 Fts3Table *p = (Fts3Table*)pVtab; |
| 4293 int rc = sqlite3Fts3PendingTermsFlush(p); |
| 4294 |
| 4295 if( rc==SQLITE_OK |
| 4296 && p->nLeafAdd>(nMinMerge/16) |
| 4297 && p->nAutoincrmerge && p->nAutoincrmerge!=0xff |
| 4298 ){ |
| 4299 int mxLevel = 0; /* Maximum relative level value in db */ |
| 4300 int A; /* Incr-merge parameter A */ |
| 4301 |
| 4302 rc = sqlite3Fts3MaxLevel(p, &mxLevel); |
| 4303 assert( rc==SQLITE_OK || mxLevel==0 ); |
| 4304 A = p->nLeafAdd * mxLevel; |
| 4305 A += (A/2); |
| 4306 if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); |
| 4307 } |
| 4308 sqlite3Fts3SegmentsClose(p); |
| 4309 return rc; |
| 4310 } |
| 4311 |
| 4312 /* |
| 4313 ** If it is currently unknown whether or not the FTS table has an %_stat |
| 4314 ** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat |
| 4315 ** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code |
| 4316 ** if an error occurs. |
| 4317 */ |
| 4318 static int fts3SetHasStat(Fts3Table *p){ |
| 4319 int rc = SQLITE_OK; |
| 4320 if( p->bHasStat==2 ){ |
| 4321 const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; |
| 4322 char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); |
| 4323 if( zSql ){ |
| 4324 sqlite3_stmt *pStmt = 0; |
| 4325 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 4326 if( rc==SQLITE_OK ){ |
| 4327 int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); |
| 4328 rc = sqlite3_finalize(pStmt); |
| 4329 if( rc==SQLITE_OK ) p->bHasStat = bHasStat; |
| 4330 } |
| 4331 sqlite3_free(zSql); |
| 4332 }else{ |
| 4333 rc = SQLITE_NOMEM; |
| 4334 } |
| 4335 } |
| 4336 return rc; |
| 4337 } |
| 4338 |
| 4339 /* |
| 4340 ** Implementation of xBegin() method. |
| 4341 */ |
| 4342 static int fts3BeginMethod(sqlite3_vtab *pVtab){ |
| 4343 Fts3Table *p = (Fts3Table*)pVtab; |
| 4344 UNUSED_PARAMETER(pVtab); |
| 4345 assert( p->pSegments==0 ); |
| 4346 assert( p->nPendingData==0 ); |
| 4347 assert( p->inTransaction!=1 ); |
| 4348 TESTONLY( p->inTransaction = 1 ); |
| 4349 TESTONLY( p->mxSavepoint = -1; ); |
| 4350 p->nLeafAdd = 0; |
| 4351 return fts3SetHasStat(p); |
| 4352 } |
| 4353 |
| 4354 /* |
| 4355 ** Implementation of xCommit() method. This is a no-op. The contents of |
| 4356 ** the pending-terms hash-table have already been flushed into the database |
| 4357 ** by fts3SyncMethod(). |
| 4358 */ |
| 4359 static int fts3CommitMethod(sqlite3_vtab *pVtab){ |
| 4360 TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); |
| 4361 UNUSED_PARAMETER(pVtab); |
| 4362 assert( p->nPendingData==0 ); |
| 4363 assert( p->inTransaction!=0 ); |
| 4364 assert( p->pSegments==0 ); |
| 4365 TESTONLY( p->inTransaction = 0 ); |
| 4366 TESTONLY( p->mxSavepoint = -1; ); |
| 4367 return SQLITE_OK; |
| 4368 } |
| 4369 |
| 4370 /* |
| 4371 ** Implementation of xRollback(). Discard the contents of the pending-terms |
| 4372 ** hash-table. Any changes made to the database are reverted by SQLite. |
| 4373 */ |
| 4374 static int fts3RollbackMethod(sqlite3_vtab *pVtab){ |
| 4375 Fts3Table *p = (Fts3Table*)pVtab; |
| 4376 sqlite3Fts3PendingTermsClear(p); |
| 4377 assert( p->inTransaction!=0 ); |
| 4378 TESTONLY( p->inTransaction = 0 ); |
| 4379 TESTONLY( p->mxSavepoint = -1; ); |
| 4380 return SQLITE_OK; |
| 4381 } |
| 4382 |
| 4383 /* |
| 4384 ** When called, *ppPoslist must point to the byte immediately following the |
| 4385 ** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function |
| 4386 ** moves *ppPoslist so that it instead points to the first byte of the |
| 4387 ** same position list. |
| 4388 */ |
| 4389 static void fts3ReversePoslist(char *pStart, char **ppPoslist){ |
| 4390 char *p = &(*ppPoslist)[-2]; |
| 4391 char c = 0; |
| 4392 |
| 4393 /* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */ |
| 4394 while( p>pStart && (c=*p--)==0 ); |
| 4395 |
| 4396 /* Search backwards for a varint with value zero (the end of the previous |
| 4397 ** poslist). This is an 0x00 byte preceded by some byte that does not |
| 4398 ** have the 0x80 bit set. */ |
| 4399 while( p>pStart && (*p & 0x80) | c ){ |
| 4400 c = *p--; |
| 4401 } |
| 4402 assert( p==pStart || c==0 ); |
| 4403 |
| 4404 /* At this point p points to that preceding byte without the 0x80 bit |
| 4405 ** set. So to find the start of the poslist, skip forward 2 bytes then |
| 4406 ** over a varint. |
| 4407 ** |
| 4408 ** Normally. The other case is that p==pStart and the poslist to return |
| 4409 ** is the first in the doclist. In this case do not skip forward 2 bytes. |
| 4410 ** The second part of the if condition (c==0 && *ppPoslist>&p[2]) |
| 4411 ** is required for cases where the first byte of a doclist and the |
| 4412 ** doclist is empty. For example, if the first docid is 10, a doclist |
| 4413 ** that begins with: |
| 4414 ** |
| 4415 ** 0x0A 0x00 <next docid delta varint> |
| 4416 */ |
| 4417 if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; } |
| 4418 while( *p++&0x80 ); |
| 4419 *ppPoslist = p; |
| 4420 } |
| 4421 |
| 4422 /* |
| 4423 ** Helper function used by the implementation of the overloaded snippet(), |
| 4424 ** offsets() and optimize() SQL functions. |
| 4425 ** |
| 4426 ** If the value passed as the third argument is a blob of size |
| 4427 ** sizeof(Fts3Cursor*), then the blob contents are copied to the |
| 4428 ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error |
| 4429 ** message is written to context pContext and SQLITE_ERROR returned. The |
| 4430 ** string passed via zFunc is used as part of the error message. |
| 4431 */ |
| 4432 static int fts3FunctionArg( |
| 4433 sqlite3_context *pContext, /* SQL function call context */ |
| 4434 const char *zFunc, /* Function name */ |
| 4435 sqlite3_value *pVal, /* argv[0] passed to function */ |
| 4436 Fts3Cursor **ppCsr /* OUT: Store cursor handle here */ |
| 4437 ){ |
| 4438 Fts3Cursor *pRet; |
| 4439 if( sqlite3_value_type(pVal)!=SQLITE_BLOB |
| 4440 || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *) |
| 4441 ){ |
| 4442 char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); |
| 4443 sqlite3_result_error(pContext, zErr, -1); |
| 4444 sqlite3_free(zErr); |
| 4445 return SQLITE_ERROR; |
| 4446 } |
| 4447 memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *)); |
| 4448 *ppCsr = pRet; |
| 4449 return SQLITE_OK; |
| 4450 } |
| 4451 |
| 4452 /* |
| 4453 ** Implementation of the snippet() function for FTS3 |
| 4454 */ |
| 4455 static void fts3SnippetFunc( |
| 4456 sqlite3_context *pContext, /* SQLite function call context */ |
| 4457 int nVal, /* Size of apVal[] array */ |
| 4458 sqlite3_value **apVal /* Array of arguments */ |
| 4459 ){ |
| 4460 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4461 const char *zStart = "<b>"; |
| 4462 const char *zEnd = "</b>"; |
| 4463 const char *zEllipsis = "<b>...</b>"; |
| 4464 int iCol = -1; |
| 4465 int nToken = 15; /* Default number of tokens in snippet */ |
| 4466 |
| 4467 /* There must be at least one argument passed to this function (otherwise |
| 4468 ** the non-overloaded version would have been called instead of this one). |
| 4469 */ |
| 4470 assert( nVal>=1 ); |
| 4471 |
| 4472 if( nVal>6 ){ |
| 4473 sqlite3_result_error(pContext, |
| 4474 "wrong number of arguments to function snippet()", -1); |
| 4475 return; |
| 4476 } |
| 4477 if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; |
| 4478 |
| 4479 switch( nVal ){ |
| 4480 case 6: nToken = sqlite3_value_int(apVal[5]); |
| 4481 case 5: iCol = sqlite3_value_int(apVal[4]); |
| 4482 case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); |
| 4483 case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); |
| 4484 case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); |
| 4485 } |
| 4486 if( !zEllipsis || !zEnd || !zStart ){ |
| 4487 sqlite3_result_error_nomem(pContext); |
| 4488 }else if( nToken==0 ){ |
| 4489 sqlite3_result_text(pContext, "", -1, SQLITE_STATIC); |
| 4490 }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ |
| 4491 sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); |
| 4492 } |
| 4493 } |
| 4494 |
| 4495 /* |
| 4496 ** Implementation of the offsets() function for FTS3 |
| 4497 */ |
| 4498 static void fts3OffsetsFunc( |
| 4499 sqlite3_context *pContext, /* SQLite function call context */ |
| 4500 int nVal, /* Size of argument array */ |
| 4501 sqlite3_value **apVal /* Array of arguments */ |
| 4502 ){ |
| 4503 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4504 |
| 4505 UNUSED_PARAMETER(nVal); |
| 4506 |
| 4507 assert( nVal==1 ); |
| 4508 if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; |
| 4509 assert( pCsr ); |
| 4510 if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ |
| 4511 sqlite3Fts3Offsets(pContext, pCsr); |
| 4512 } |
| 4513 } |
| 4514 |
| 4515 /* |
| 4516 ** Implementation of the special optimize() function for FTS3. This |
| 4517 ** function merges all segments in the database to a single segment. |
| 4518 ** Example usage is: |
| 4519 ** |
| 4520 ** SELECT optimize(t) FROM t LIMIT 1; |
| 4521 ** |
| 4522 ** where 't' is the name of an FTS3 table. |
| 4523 */ |
| 4524 static void fts3OptimizeFunc( |
| 4525 sqlite3_context *pContext, /* SQLite function call context */ |
| 4526 int nVal, /* Size of argument array */ |
| 4527 sqlite3_value **apVal /* Array of arguments */ |
| 4528 ){ |
| 4529 int rc; /* Return code */ |
| 4530 Fts3Table *p; /* Virtual table handle */ |
| 4531 Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */ |
| 4532 |
| 4533 UNUSED_PARAMETER(nVal); |
| 4534 |
| 4535 assert( nVal==1 ); |
| 4536 if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; |
| 4537 p = (Fts3Table *)pCursor->base.pVtab; |
| 4538 assert( p ); |
| 4539 |
| 4540 rc = sqlite3Fts3Optimize(p); |
| 4541 |
| 4542 switch( rc ){ |
| 4543 case SQLITE_OK: |
| 4544 sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); |
| 4545 break; |
| 4546 case SQLITE_DONE: |
| 4547 sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); |
| 4548 break; |
| 4549 default: |
| 4550 sqlite3_result_error_code(pContext, rc); |
| 4551 break; |
| 4552 } |
| 4553 } |
| 4554 |
| 4555 /* |
| 4556 ** Implementation of the matchinfo() function for FTS3 |
| 4557 */ |
| 4558 static void fts3MatchinfoFunc( |
| 4559 sqlite3_context *pContext, /* SQLite function call context */ |
| 4560 int nVal, /* Size of argument array */ |
| 4561 sqlite3_value **apVal /* Array of arguments */ |
| 4562 ){ |
| 4563 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4564 assert( nVal==1 || nVal==2 ); |
| 4565 if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ |
| 4566 const char *zArg = 0; |
| 4567 if( nVal>1 ){ |
| 4568 zArg = (const char *)sqlite3_value_text(apVal[1]); |
| 4569 } |
| 4570 sqlite3Fts3Matchinfo(pContext, pCsr, zArg); |
| 4571 } |
| 4572 } |
| 4573 |
| 4574 /* |
| 4575 ** This routine implements the xFindFunction method for the FTS3 |
| 4576 ** virtual table. |
| 4577 */ |
| 4578 static int fts3FindFunctionMethod( |
| 4579 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4580 int nArg, /* Number of SQL function arguments */ |
| 4581 const char *zName, /* Name of SQL function */ |
| 4582 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ |
| 4583 void **ppArg /* Unused */ |
| 4584 ){ |
| 4585 struct Overloaded { |
| 4586 const char *zName; |
| 4587 void (*xFunc)(sqlite3_context*,int,sqlite3_value**); |
| 4588 } aOverload[] = { |
| 4589 { "snippet", fts3SnippetFunc }, |
| 4590 { "offsets", fts3OffsetsFunc }, |
| 4591 { "optimize", fts3OptimizeFunc }, |
| 4592 { "matchinfo", fts3MatchinfoFunc }, |
| 4593 }; |
| 4594 int i; /* Iterator variable */ |
| 4595 |
| 4596 UNUSED_PARAMETER(pVtab); |
| 4597 UNUSED_PARAMETER(nArg); |
| 4598 UNUSED_PARAMETER(ppArg); |
| 4599 |
| 4600 for(i=0; i<SizeofArray(aOverload); i++){ |
| 4601 if( strcmp(zName, aOverload[i].zName)==0 ){ |
| 4602 *pxFunc = aOverload[i].xFunc; |
| 4603 return 1; |
| 4604 } |
| 4605 } |
| 4606 |
| 4607 /* No function of the specified name was found. Return 0. */ |
| 4608 return 0; |
| 4609 } |
| 4610 |
| 4611 /* |
| 4612 ** Implementation of FTS3 xRename method. Rename an fts3 table. |
| 4613 */ |
| 4614 static int fts3RenameMethod( |
| 4615 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4616 const char *zName /* New name of table */ |
| 4617 ){ |
| 4618 Fts3Table *p = (Fts3Table *)pVtab; |
| 4619 sqlite3 *db = p->db; /* Database connection */ |
| 4620 int rc; /* Return Code */ |
| 4621 |
| 4622 /* At this point it must be known if the %_stat table exists or not. |
| 4623 ** So bHasStat may not be 2. */ |
| 4624 rc = fts3SetHasStat(p); |
| 4625 |
| 4626 /* As it happens, the pending terms table is always empty here. This is |
| 4627 ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction |
| 4628 ** always opens a savepoint transaction. And the xSavepoint() method |
| 4629 ** flushes the pending terms table. But leave the (no-op) call to |
| 4630 ** PendingTermsFlush() in in case that changes. |
| 4631 */ |
| 4632 assert( p->nPendingData==0 ); |
| 4633 if( rc==SQLITE_OK ){ |
| 4634 rc = sqlite3Fts3PendingTermsFlush(p); |
| 4635 } |
| 4636 |
| 4637 if( p->zContentTbl==0 ){ |
| 4638 fts3DbExec(&rc, db, |
| 4639 "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", |
| 4640 p->zDb, p->zName, zName |
| 4641 ); |
| 4642 } |
| 4643 |
| 4644 if( p->bHasDocsize ){ |
| 4645 fts3DbExec(&rc, db, |
| 4646 "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", |
| 4647 p->zDb, p->zName, zName |
| 4648 ); |
| 4649 } |
| 4650 if( p->bHasStat ){ |
| 4651 fts3DbExec(&rc, db, |
| 4652 "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';", |
| 4653 p->zDb, p->zName, zName |
| 4654 ); |
| 4655 } |
| 4656 fts3DbExec(&rc, db, |
| 4657 "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';", |
| 4658 p->zDb, p->zName, zName |
| 4659 ); |
| 4660 fts3DbExec(&rc, db, |
| 4661 "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", |
| 4662 p->zDb, p->zName, zName |
| 4663 ); |
| 4664 return rc; |
| 4665 } |
| 4666 |
| 4667 /* |
| 4668 ** The xSavepoint() method. |
| 4669 ** |
| 4670 ** Flush the contents of the pending-terms table to disk. |
| 4671 */ |
| 4672 static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4673 int rc = SQLITE_OK; |
| 4674 UNUSED_PARAMETER(iSavepoint); |
| 4675 assert( ((Fts3Table *)pVtab)->inTransaction ); |
| 4676 assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); |
| 4677 TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); |
| 4678 if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ |
| 4679 rc = fts3SyncMethod(pVtab); |
| 4680 } |
| 4681 return rc; |
| 4682 } |
| 4683 |
| 4684 /* |
| 4685 ** The xRelease() method. |
| 4686 ** |
| 4687 ** This is a no-op. |
| 4688 */ |
| 4689 static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4690 TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); |
| 4691 UNUSED_PARAMETER(iSavepoint); |
| 4692 UNUSED_PARAMETER(pVtab); |
| 4693 assert( p->inTransaction ); |
| 4694 assert( p->mxSavepoint >= iSavepoint ); |
| 4695 TESTONLY( p->mxSavepoint = iSavepoint-1 ); |
| 4696 return SQLITE_OK; |
| 4697 } |
| 4698 |
| 4699 /* |
| 4700 ** The xRollbackTo() method. |
| 4701 ** |
| 4702 ** Discard the contents of the pending terms table. |
| 4703 */ |
| 4704 static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4705 Fts3Table *p = (Fts3Table*)pVtab; |
| 4706 UNUSED_PARAMETER(iSavepoint); |
| 4707 assert( p->inTransaction ); |
| 4708 assert( p->mxSavepoint >= iSavepoint ); |
| 4709 TESTONLY( p->mxSavepoint = iSavepoint ); |
| 4710 sqlite3Fts3PendingTermsClear(p); |
| 4711 return SQLITE_OK; |
| 4712 } |
| 4713 |
| 4714 static const sqlite3_module fts3Module = { |
| 4715 /* iVersion */ 2, |
| 4716 /* xCreate */ fts3CreateMethod, |
| 4717 /* xConnect */ fts3ConnectMethod, |
| 4718 /* xBestIndex */ fts3BestIndexMethod, |
| 4719 /* xDisconnect */ fts3DisconnectMethod, |
| 4720 /* xDestroy */ fts3DestroyMethod, |
| 4721 /* xOpen */ fts3OpenMethod, |
| 4722 /* xClose */ fts3CloseMethod, |
| 4723 /* xFilter */ fts3FilterMethod, |
| 4724 /* xNext */ fts3NextMethod, |
| 4725 /* xEof */ fts3EofMethod, |
| 4726 /* xColumn */ fts3ColumnMethod, |
| 4727 /* xRowid */ fts3RowidMethod, |
| 4728 /* xUpdate */ fts3UpdateMethod, |
| 4729 /* xBegin */ fts3BeginMethod, |
| 4730 /* xSync */ fts3SyncMethod, |
| 4731 /* xCommit */ fts3CommitMethod, |
| 4732 /* xRollback */ fts3RollbackMethod, |
| 4733 /* xFindFunction */ fts3FindFunctionMethod, |
| 4734 /* xRename */ fts3RenameMethod, |
| 4735 /* xSavepoint */ fts3SavepointMethod, |
| 4736 /* xRelease */ fts3ReleaseMethod, |
| 4737 /* xRollbackTo */ fts3RollbackToMethod, |
| 4738 }; |
| 4739 |
| 4740 /* |
| 4741 ** This function is registered as the module destructor (called when an |
| 4742 ** FTS3 enabled database connection is closed). It frees the memory |
| 4743 ** allocated for the tokenizer hash table. |
| 4744 */ |
| 4745 static void hashDestroy(void *p){ |
| 4746 Fts3Hash *pHash = (Fts3Hash *)p; |
| 4747 sqlite3Fts3HashClear(pHash); |
| 4748 sqlite3_free(pHash); |
| 4749 } |
| 4750 |
| 4751 /* |
| 4752 ** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are |
| 4753 ** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c |
| 4754 ** respectively. The following three forward declarations are for functions |
| 4755 ** declared in these files used to retrieve the respective implementations. |
| 4756 ** |
| 4757 ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed |
| 4758 ** to by the argument to point to the "simple" tokenizer implementation. |
| 4759 ** And so on. |
| 4760 */ |
| 4761 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 4762 SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 4763 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4764 SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**
ppModule); |
| 4765 #endif |
| 4766 #ifdef SQLITE_ENABLE_ICU |
| 4767 SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const
**ppModule); |
| 4768 #endif |
| 4769 |
| 4770 /* |
| 4771 ** Initialize the fts3 extension. If this extension is built as part |
| 4772 ** of the sqlite library, then this function is called directly by |
| 4773 ** SQLite. If fts3 is built as a dynamically loadable extension, this |
| 4774 ** function is called by the sqlite3_extension_init() entry point. |
| 4775 */ |
| 4776 SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ |
| 4777 int rc = SQLITE_OK; |
| 4778 Fts3Hash *pHash = 0; |
| 4779 const sqlite3_tokenizer_module *pSimple = 0; |
| 4780 const sqlite3_tokenizer_module *pPorter = 0; |
| 4781 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4782 const sqlite3_tokenizer_module *pUnicode = 0; |
| 4783 #endif |
| 4784 |
| 4785 #ifdef SQLITE_ENABLE_ICU |
| 4786 const sqlite3_tokenizer_module *pIcu = 0; |
| 4787 sqlite3Fts3IcuTokenizerModule(&pIcu); |
| 4788 #endif |
| 4789 |
| 4790 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4791 sqlite3Fts3UnicodeTokenizer(&pUnicode); |
| 4792 #endif |
| 4793 |
| 4794 #ifdef SQLITE_TEST |
| 4795 rc = sqlite3Fts3InitTerm(db); |
| 4796 if( rc!=SQLITE_OK ) return rc; |
| 4797 #endif |
| 4798 |
| 4799 rc = sqlite3Fts3InitAux(db); |
| 4800 if( rc!=SQLITE_OK ) return rc; |
| 4801 |
| 4802 sqlite3Fts3SimpleTokenizerModule(&pSimple); |
| 4803 sqlite3Fts3PorterTokenizerModule(&pPorter); |
| 4804 |
| 4805 /* Allocate and initialize the hash-table used to store tokenizers. */ |
| 4806 pHash = sqlite3_malloc(sizeof(Fts3Hash)); |
| 4807 if( !pHash ){ |
| 4808 rc = SQLITE_NOMEM; |
| 4809 }else{ |
| 4810 sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |
| 4811 } |
| 4812 |
| 4813 /* Load the built-in tokenizers into the hash table */ |
| 4814 if( rc==SQLITE_OK ){ |
| 4815 if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) |
| 4816 || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) |
| 4817 |
| 4818 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4819 || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) |
| 4820 #endif |
| 4821 #ifdef SQLITE_ENABLE_ICU |
| 4822 || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) |
| 4823 #endif |
| 4824 ){ |
| 4825 rc = SQLITE_NOMEM; |
| 4826 } |
| 4827 } |
| 4828 |
| 4829 #ifdef SQLITE_TEST |
| 4830 if( rc==SQLITE_OK ){ |
| 4831 rc = sqlite3Fts3ExprInitTestInterface(db); |
| 4832 } |
| 4833 #endif |
| 4834 |
| 4835 /* Create the virtual table wrapper around the hash-table and overload |
| 4836 ** the two scalar functions. If this is successful, register the |
| 4837 ** module with sqlite. |
| 4838 */ |
| 4839 if( SQLITE_OK==rc |
| 4840 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST |
| 4841 /* fts3_tokenizer() disabled for security reasons. */ |
| 4842 #else |
| 4843 && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) |
| 4844 #endif |
| 4845 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) |
| 4846 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) |
| 4847 && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) |
| 4848 && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) |
| 4849 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) |
| 4850 ){ |
| 4851 rc = sqlite3_create_module_v2( |
| 4852 db, "fts3", &fts3Module, (void *)pHash, hashDestroy |
| 4853 ); |
| 4854 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST |
| 4855 /* Disable fts4 and tokenizer vtab pending review. */ |
| 4856 #else |
| 4857 if( rc==SQLITE_OK ){ |
| 4858 rc = sqlite3_create_module_v2( |
| 4859 db, "fts4", &fts3Module, (void *)pHash, 0 |
| 4860 ); |
| 4861 } |
| 4862 if( rc==SQLITE_OK ){ |
| 4863 rc = sqlite3Fts3InitTok(db, (void *)pHash); |
| 4864 } |
| 4865 #endif |
| 4866 return rc; |
| 4867 } |
| 4868 |
| 4869 |
| 4870 /* An error has occurred. Delete the hash table and return the error code. */ |
| 4871 assert( rc!=SQLITE_OK ); |
| 4872 if( pHash ){ |
| 4873 sqlite3Fts3HashClear(pHash); |
| 4874 sqlite3_free(pHash); |
| 4875 } |
| 4876 return rc; |
| 4877 } |
| 4878 |
| 4879 /* |
| 4880 ** Allocate an Fts3MultiSegReader for each token in the expression headed |
| 4881 ** by pExpr. |
| 4882 ** |
| 4883 ** An Fts3SegReader object is a cursor that can seek or scan a range of |
| 4884 ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple |
| 4885 ** Fts3SegReader objects internally to provide an interface to seek or scan |
| 4886 ** within the union of all segments of a b-tree. Hence the name. |
| 4887 ** |
| 4888 ** If the allocated Fts3MultiSegReader just seeks to a single entry in a |
| 4889 ** segment b-tree (if the term is not a prefix or it is a prefix for which |
| 4890 ** there exists prefix b-tree of the right length) then it may be traversed |
| 4891 ** and merged incrementally. Otherwise, it has to be merged into an in-memory |
| 4892 ** doclist and then traversed. |
| 4893 */ |
| 4894 static void fts3EvalAllocateReaders( |
| 4895 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 4896 Fts3Expr *pExpr, /* Allocate readers for this expression */ |
| 4897 int *pnToken, /* OUT: Total number of tokens in phrase. */ |
| 4898 int *pnOr, /* OUT: Total number of OR nodes in expr. */ |
| 4899 int *pRc /* IN/OUT: Error code */ |
| 4900 ){ |
| 4901 if( pExpr && SQLITE_OK==*pRc ){ |
| 4902 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 4903 int i; |
| 4904 int nToken = pExpr->pPhrase->nToken; |
| 4905 *pnToken += nToken; |
| 4906 for(i=0; i<nToken; i++){ |
| 4907 Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; |
| 4908 int rc = fts3TermSegReaderCursor(pCsr, |
| 4909 pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr |
| 4910 ); |
| 4911 if( rc!=SQLITE_OK ){ |
| 4912 *pRc = rc; |
| 4913 return; |
| 4914 } |
| 4915 } |
| 4916 assert( pExpr->pPhrase->iDoclistToken==0 ); |
| 4917 pExpr->pPhrase->iDoclistToken = -1; |
| 4918 }else{ |
| 4919 *pnOr += (pExpr->eType==FTSQUERY_OR); |
| 4920 fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); |
| 4921 fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); |
| 4922 } |
| 4923 } |
| 4924 } |
| 4925 |
| 4926 /* |
| 4927 ** Arguments pList/nList contain the doclist for token iToken of phrase p. |
| 4928 ** It is merged into the main doclist stored in p->doclist.aAll/nAll. |
| 4929 ** |
| 4930 ** This function assumes that pList points to a buffer allocated using |
| 4931 ** sqlite3_malloc(). This function takes responsibility for eventually |
| 4932 ** freeing the buffer. |
| 4933 ** |
| 4934 ** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs. |
| 4935 */ |
| 4936 static int fts3EvalPhraseMergeToken( |
| 4937 Fts3Table *pTab, /* FTS Table pointer */ |
| 4938 Fts3Phrase *p, /* Phrase to merge pList/nList into */ |
| 4939 int iToken, /* Token pList/nList corresponds to */ |
| 4940 char *pList, /* Pointer to doclist */ |
| 4941 int nList /* Number of bytes in pList */ |
| 4942 ){ |
| 4943 int rc = SQLITE_OK; |
| 4944 assert( iToken!=p->iDoclistToken ); |
| 4945 |
| 4946 if( pList==0 ){ |
| 4947 sqlite3_free(p->doclist.aAll); |
| 4948 p->doclist.aAll = 0; |
| 4949 p->doclist.nAll = 0; |
| 4950 } |
| 4951 |
| 4952 else if( p->iDoclistToken<0 ){ |
| 4953 p->doclist.aAll = pList; |
| 4954 p->doclist.nAll = nList; |
| 4955 } |
| 4956 |
| 4957 else if( p->doclist.aAll==0 ){ |
| 4958 sqlite3_free(pList); |
| 4959 } |
| 4960 |
| 4961 else { |
| 4962 char *pLeft; |
| 4963 char *pRight; |
| 4964 int nLeft; |
| 4965 int nRight; |
| 4966 int nDiff; |
| 4967 |
| 4968 if( p->iDoclistToken<iToken ){ |
| 4969 pLeft = p->doclist.aAll; |
| 4970 nLeft = p->doclist.nAll; |
| 4971 pRight = pList; |
| 4972 nRight = nList; |
| 4973 nDiff = iToken - p->iDoclistToken; |
| 4974 }else{ |
| 4975 pRight = p->doclist.aAll; |
| 4976 nRight = p->doclist.nAll; |
| 4977 pLeft = pList; |
| 4978 nLeft = nList; |
| 4979 nDiff = p->iDoclistToken - iToken; |
| 4980 } |
| 4981 |
| 4982 rc = fts3DoclistPhraseMerge( |
| 4983 pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight |
| 4984 ); |
| 4985 sqlite3_free(pLeft); |
| 4986 p->doclist.aAll = pRight; |
| 4987 p->doclist.nAll = nRight; |
| 4988 } |
| 4989 |
| 4990 if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; |
| 4991 return rc; |
| 4992 } |
| 4993 |
| 4994 /* |
| 4995 ** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist |
| 4996 ** does not take deferred tokens into account. |
| 4997 ** |
| 4998 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 4999 */ |
| 5000 static int fts3EvalPhraseLoad( |
| 5001 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5002 Fts3Phrase *p /* Phrase object */ |
| 5003 ){ |
| 5004 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5005 int iToken; |
| 5006 int rc = SQLITE_OK; |
| 5007 |
| 5008 for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){ |
| 5009 Fts3PhraseToken *pToken = &p->aToken[iToken]; |
| 5010 assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); |
| 5011 |
| 5012 if( pToken->pSegcsr ){ |
| 5013 int nThis = 0; |
| 5014 char *pThis = 0; |
| 5015 rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); |
| 5016 if( rc==SQLITE_OK ){ |
| 5017 rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); |
| 5018 } |
| 5019 } |
| 5020 assert( pToken->pSegcsr==0 ); |
| 5021 } |
| 5022 |
| 5023 return rc; |
| 5024 } |
| 5025 |
| 5026 /* |
| 5027 ** This function is called on each phrase after the position lists for |
| 5028 ** any deferred tokens have been loaded into memory. It updates the phrases |
| 5029 ** current position list to include only those positions that are really |
| 5030 ** instances of the phrase (after considering deferred tokens). If this |
| 5031 ** means that the phrase does not appear in the current row, doclist.pList |
| 5032 ** and doclist.nList are both zeroed. |
| 5033 ** |
| 5034 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 5035 */ |
| 5036 static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ |
| 5037 int iToken; /* Used to iterate through phrase tokens */ |
| 5038 char *aPoslist = 0; /* Position list for deferred tokens */ |
| 5039 int nPoslist = 0; /* Number of bytes in aPoslist */ |
| 5040 int iPrev = -1; /* Token number of previous deferred token */ |
| 5041 |
| 5042 assert( pPhrase->doclist.bFreeList==0 ); |
| 5043 |
| 5044 for(iToken=0; iToken<pPhrase->nToken; iToken++){ |
| 5045 Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; |
| 5046 Fts3DeferredToken *pDeferred = pToken->pDeferred; |
| 5047 |
| 5048 if( pDeferred ){ |
| 5049 char *pList = 0; |
| 5050 int nList = 0; |
| 5051 int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); |
| 5052 if( rc!=SQLITE_OK ) return rc; |
| 5053 |
| 5054 if( pList==0 ){ |
| 5055 sqlite3_free(aPoslist); |
| 5056 pPhrase->doclist.pList = 0; |
| 5057 pPhrase->doclist.nList = 0; |
| 5058 return SQLITE_OK; |
| 5059 |
| 5060 }else if( aPoslist==0 ){ |
| 5061 aPoslist = pList; |
| 5062 nPoslist = nList; |
| 5063 |
| 5064 }else{ |
| 5065 char *aOut = pList; |
| 5066 char *p1 = aPoslist; |
| 5067 char *p2 = aOut; |
| 5068 |
| 5069 assert( iPrev>=0 ); |
| 5070 fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); |
| 5071 sqlite3_free(aPoslist); |
| 5072 aPoslist = pList; |
| 5073 nPoslist = (int)(aOut - aPoslist); |
| 5074 if( nPoslist==0 ){ |
| 5075 sqlite3_free(aPoslist); |
| 5076 pPhrase->doclist.pList = 0; |
| 5077 pPhrase->doclist.nList = 0; |
| 5078 return SQLITE_OK; |
| 5079 } |
| 5080 } |
| 5081 iPrev = iToken; |
| 5082 } |
| 5083 } |
| 5084 |
| 5085 if( iPrev>=0 ){ |
| 5086 int nMaxUndeferred = pPhrase->iDoclistToken; |
| 5087 if( nMaxUndeferred<0 ){ |
| 5088 pPhrase->doclist.pList = aPoslist; |
| 5089 pPhrase->doclist.nList = nPoslist; |
| 5090 pPhrase->doclist.iDocid = pCsr->iPrevId; |
| 5091 pPhrase->doclist.bFreeList = 1; |
| 5092 }else{ |
| 5093 int nDistance; |
| 5094 char *p1; |
| 5095 char *p2; |
| 5096 char *aOut; |
| 5097 |
| 5098 if( nMaxUndeferred>iPrev ){ |
| 5099 p1 = aPoslist; |
| 5100 p2 = pPhrase->doclist.pList; |
| 5101 nDistance = nMaxUndeferred - iPrev; |
| 5102 }else{ |
| 5103 p1 = pPhrase->doclist.pList; |
| 5104 p2 = aPoslist; |
| 5105 nDistance = iPrev - nMaxUndeferred; |
| 5106 } |
| 5107 |
| 5108 aOut = (char *)sqlite3_malloc(nPoslist+8); |
| 5109 if( !aOut ){ |
| 5110 sqlite3_free(aPoslist); |
| 5111 return SQLITE_NOMEM; |
| 5112 } |
| 5113 |
| 5114 pPhrase->doclist.pList = aOut; |
| 5115 if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ |
| 5116 pPhrase->doclist.bFreeList = 1; |
| 5117 pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); |
| 5118 }else{ |
| 5119 sqlite3_free(aOut); |
| 5120 pPhrase->doclist.pList = 0; |
| 5121 pPhrase->doclist.nList = 0; |
| 5122 } |
| 5123 sqlite3_free(aPoslist); |
| 5124 } |
| 5125 } |
| 5126 |
| 5127 return SQLITE_OK; |
| 5128 } |
| 5129 |
| 5130 /* |
| 5131 ** Maximum number of tokens a phrase may have to be considered for the |
| 5132 ** incremental doclists strategy. |
| 5133 */ |
| 5134 #define MAX_INCR_PHRASE_TOKENS 4 |
| 5135 |
| 5136 /* |
| 5137 ** This function is called for each Fts3Phrase in a full-text query |
| 5138 ** expression to initialize the mechanism for returning rows. Once this |
| 5139 ** function has been called successfully on an Fts3Phrase, it may be |
| 5140 ** used with fts3EvalPhraseNext() to iterate through the matching docids. |
| 5141 ** |
| 5142 ** If parameter bOptOk is true, then the phrase may (or may not) use the |
| 5143 ** incremental loading strategy. Otherwise, the entire doclist is loaded into |
| 5144 ** memory within this call. |
| 5145 ** |
| 5146 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 5147 */ |
| 5148 static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ |
| 5149 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5150 int rc = SQLITE_OK; /* Error code */ |
| 5151 int i; |
| 5152 |
| 5153 /* Determine if doclists may be loaded from disk incrementally. This is |
| 5154 ** possible if the bOptOk argument is true, the FTS doclists will be |
| 5155 ** scanned in forward order, and the phrase consists of |
| 5156 ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" |
| 5157 ** tokens or prefix tokens that cannot use a prefix-index. */ |
| 5158 int bHaveIncr = 0; |
| 5159 int bIncrOk = (bOptOk |
| 5160 && pCsr->bDesc==pTab->bDescIdx |
| 5161 && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 |
| 5162 #ifdef SQLITE_TEST |
| 5163 && pTab->bNoIncrDoclist==0 |
| 5164 #endif |
| 5165 ); |
| 5166 for(i=0; bIncrOk==1 && i<p->nToken; i++){ |
| 5167 Fts3PhraseToken *pToken = &p->aToken[i]; |
| 5168 if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ |
| 5169 bIncrOk = 0; |
| 5170 } |
| 5171 if( pToken->pSegcsr ) bHaveIncr = 1; |
| 5172 } |
| 5173 |
| 5174 if( bIncrOk && bHaveIncr ){ |
| 5175 /* Use the incremental approach. */ |
| 5176 int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); |
| 5177 for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ |
| 5178 Fts3PhraseToken *pToken = &p->aToken[i]; |
| 5179 Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; |
| 5180 if( pSegcsr ){ |
| 5181 rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); |
| 5182 } |
| 5183 } |
| 5184 p->bIncr = 1; |
| 5185 }else{ |
| 5186 /* Load the full doclist for the phrase into memory. */ |
| 5187 rc = fts3EvalPhraseLoad(pCsr, p); |
| 5188 p->bIncr = 0; |
| 5189 } |
| 5190 |
| 5191 assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); |
| 5192 return rc; |
| 5193 } |
| 5194 |
| 5195 /* |
| 5196 ** This function is used to iterate backwards (from the end to start) |
| 5197 ** through doclists. It is used by this module to iterate through phrase |
| 5198 ** doclists in reverse and by the fts3_write.c module to iterate through |
| 5199 ** pending-terms lists when writing to databases with "order=desc". |
| 5200 ** |
| 5201 ** The doclist may be sorted in ascending (parameter bDescIdx==0) or |
| 5202 ** descending (parameter bDescIdx==1) order of docid. Regardless, this |
| 5203 ** function iterates from the end of the doclist to the beginning. |
| 5204 */ |
| 5205 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev( |
| 5206 int bDescIdx, /* True if the doclist is desc */ |
| 5207 char *aDoclist, /* Pointer to entire doclist */ |
| 5208 int nDoclist, /* Length of aDoclist in bytes */ |
| 5209 char **ppIter, /* IN/OUT: Iterator pointer */ |
| 5210 sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ |
| 5211 int *pnList, /* OUT: List length pointer */ |
| 5212 u8 *pbEof /* OUT: End-of-file flag */ |
| 5213 ){ |
| 5214 char *p = *ppIter; |
| 5215 |
| 5216 assert( nDoclist>0 ); |
| 5217 assert( *pbEof==0 ); |
| 5218 assert( p || *piDocid==0 ); |
| 5219 assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); |
| 5220 |
| 5221 if( p==0 ){ |
| 5222 sqlite3_int64 iDocid = 0; |
| 5223 char *pNext = 0; |
| 5224 char *pDocid = aDoclist; |
| 5225 char *pEnd = &aDoclist[nDoclist]; |
| 5226 int iMul = 1; |
| 5227 |
| 5228 while( pDocid<pEnd ){ |
| 5229 sqlite3_int64 iDelta; |
| 5230 pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta); |
| 5231 iDocid += (iMul * iDelta); |
| 5232 pNext = pDocid; |
| 5233 fts3PoslistCopy(0, &pDocid); |
| 5234 while( pDocid<pEnd && *pDocid==0 ) pDocid++; |
| 5235 iMul = (bDescIdx ? -1 : 1); |
| 5236 } |
| 5237 |
| 5238 *pnList = (int)(pEnd - pNext); |
| 5239 *ppIter = pNext; |
| 5240 *piDocid = iDocid; |
| 5241 }else{ |
| 5242 int iMul = (bDescIdx ? -1 : 1); |
| 5243 sqlite3_int64 iDelta; |
| 5244 fts3GetReverseVarint(&p, aDoclist, &iDelta); |
| 5245 *piDocid -= (iMul * iDelta); |
| 5246 |
| 5247 if( p==aDoclist ){ |
| 5248 *pbEof = 1; |
| 5249 }else{ |
| 5250 char *pSave = p; |
| 5251 fts3ReversePoslist(aDoclist, &p); |
| 5252 *pnList = (int)(pSave - p); |
| 5253 } |
| 5254 *ppIter = p; |
| 5255 } |
| 5256 } |
| 5257 |
| 5258 /* |
| 5259 ** Iterate forwards through a doclist. |
| 5260 */ |
| 5261 SQLITE_PRIVATE void sqlite3Fts3DoclistNext( |
| 5262 int bDescIdx, /* True if the doclist is desc */ |
| 5263 char *aDoclist, /* Pointer to entire doclist */ |
| 5264 int nDoclist, /* Length of aDoclist in bytes */ |
| 5265 char **ppIter, /* IN/OUT: Iterator pointer */ |
| 5266 sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ |
| 5267 u8 *pbEof /* OUT: End-of-file flag */ |
| 5268 ){ |
| 5269 char *p = *ppIter; |
| 5270 |
| 5271 assert( nDoclist>0 ); |
| 5272 assert( *pbEof==0 ); |
| 5273 assert( p || *piDocid==0 ); |
| 5274 assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); |
| 5275 |
| 5276 if( p==0 ){ |
| 5277 p = aDoclist; |
| 5278 p += sqlite3Fts3GetVarint(p, piDocid); |
| 5279 }else{ |
| 5280 fts3PoslistCopy(0, &p); |
| 5281 while( p<&aDoclist[nDoclist] && *p==0 ) p++; |
| 5282 if( p>=&aDoclist[nDoclist] ){ |
| 5283 *pbEof = 1; |
| 5284 }else{ |
| 5285 sqlite3_int64 iVar; |
| 5286 p += sqlite3Fts3GetVarint(p, &iVar); |
| 5287 *piDocid += ((bDescIdx ? -1 : 1) * iVar); |
| 5288 } |
| 5289 } |
| 5290 |
| 5291 *ppIter = p; |
| 5292 } |
| 5293 |
| 5294 /* |
| 5295 ** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof |
| 5296 ** to true if EOF is reached. |
| 5297 */ |
| 5298 static void fts3EvalDlPhraseNext( |
| 5299 Fts3Table *pTab, |
| 5300 Fts3Doclist *pDL, |
| 5301 u8 *pbEof |
| 5302 ){ |
| 5303 char *pIter; /* Used to iterate through aAll */ |
| 5304 char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ |
| 5305 |
| 5306 if( pDL->pNextDocid ){ |
| 5307 pIter = pDL->pNextDocid; |
| 5308 }else{ |
| 5309 pIter = pDL->aAll; |
| 5310 } |
| 5311 |
| 5312 if( pIter>=pEnd ){ |
| 5313 /* We have already reached the end of this doclist. EOF. */ |
| 5314 *pbEof = 1; |
| 5315 }else{ |
| 5316 sqlite3_int64 iDelta; |
| 5317 pIter += sqlite3Fts3GetVarint(pIter, &iDelta); |
| 5318 if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ |
| 5319 pDL->iDocid += iDelta; |
| 5320 }else{ |
| 5321 pDL->iDocid -= iDelta; |
| 5322 } |
| 5323 pDL->pList = pIter; |
| 5324 fts3PoslistCopy(0, &pIter); |
| 5325 pDL->nList = (int)(pIter - pDL->pList); |
| 5326 |
| 5327 /* pIter now points just past the 0x00 that terminates the position- |
| 5328 ** list for document pDL->iDocid. However, if this position-list was |
| 5329 ** edited in place by fts3EvalNearTrim(), then pIter may not actually |
| 5330 ** point to the start of the next docid value. The following line deals |
| 5331 ** with this case by advancing pIter past the zero-padding added by |
| 5332 ** fts3EvalNearTrim(). */ |
| 5333 while( pIter<pEnd && *pIter==0 ) pIter++; |
| 5334 |
| 5335 pDL->pNextDocid = pIter; |
| 5336 assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); |
| 5337 *pbEof = 0; |
| 5338 } |
| 5339 } |
| 5340 |
| 5341 /* |
| 5342 ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). |
| 5343 */ |
| 5344 typedef struct TokenDoclist TokenDoclist; |
| 5345 struct TokenDoclist { |
| 5346 int bIgnore; |
| 5347 sqlite3_int64 iDocid; |
| 5348 char *pList; |
| 5349 int nList; |
| 5350 }; |
| 5351 |
| 5352 /* |
| 5353 ** Token pToken is an incrementally loaded token that is part of a |
| 5354 ** multi-token phrase. Advance it to the next matching document in the |
| 5355 ** database and populate output variable *p with the details of the new |
| 5356 ** entry. Or, if the iterator has reached EOF, set *pbEof to true. |
| 5357 ** |
| 5358 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5359 ** SQLITE_OK. |
| 5360 */ |
| 5361 static int incrPhraseTokenNext( |
| 5362 Fts3Table *pTab, /* Virtual table handle */ |
| 5363 Fts3Phrase *pPhrase, /* Phrase to advance token of */ |
| 5364 int iToken, /* Specific token to advance */ |
| 5365 TokenDoclist *p, /* OUT: Docid and doclist for new entry */ |
| 5366 u8 *pbEof /* OUT: True if iterator is at EOF */ |
| 5367 ){ |
| 5368 int rc = SQLITE_OK; |
| 5369 |
| 5370 if( pPhrase->iDoclistToken==iToken ){ |
| 5371 assert( p->bIgnore==0 ); |
| 5372 assert( pPhrase->aToken[iToken].pSegcsr==0 ); |
| 5373 fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); |
| 5374 p->pList = pPhrase->doclist.pList; |
| 5375 p->nList = pPhrase->doclist.nList; |
| 5376 p->iDocid = pPhrase->doclist.iDocid; |
| 5377 }else{ |
| 5378 Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; |
| 5379 assert( pToken->pDeferred==0 ); |
| 5380 assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); |
| 5381 if( pToken->pSegcsr ){ |
| 5382 assert( p->bIgnore==0 ); |
| 5383 rc = sqlite3Fts3MsrIncrNext( |
| 5384 pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList |
| 5385 ); |
| 5386 if( p->pList==0 ) *pbEof = 1; |
| 5387 }else{ |
| 5388 p->bIgnore = 1; |
| 5389 } |
| 5390 } |
| 5391 |
| 5392 return rc; |
| 5393 } |
| 5394 |
| 5395 |
| 5396 /* |
| 5397 ** The phrase iterator passed as the second argument: |
| 5398 ** |
| 5399 ** * features at least one token that uses an incremental doclist, and |
| 5400 ** |
| 5401 ** * does not contain any deferred tokens. |
| 5402 ** |
| 5403 ** Advance it to the next matching documnent in the database and populate |
| 5404 ** the Fts3Doclist.pList and nList fields. |
| 5405 ** |
| 5406 ** If there is no "next" entry and no error occurs, then *pbEof is set to |
| 5407 ** 1 before returning. Otherwise, if no error occurs and the iterator is |
| 5408 ** successfully advanced, *pbEof is set to 0. |
| 5409 ** |
| 5410 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5411 ** SQLITE_OK. |
| 5412 */ |
| 5413 static int fts3EvalIncrPhraseNext( |
| 5414 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5415 Fts3Phrase *p, /* Phrase object to advance to next docid */ |
| 5416 u8 *pbEof /* OUT: Set to 1 if EOF */ |
| 5417 ){ |
| 5418 int rc = SQLITE_OK; |
| 5419 Fts3Doclist *pDL = &p->doclist; |
| 5420 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5421 u8 bEof = 0; |
| 5422 |
| 5423 /* This is only called if it is guaranteed that the phrase has at least |
| 5424 ** one incremental token. In which case the bIncr flag is set. */ |
| 5425 assert( p->bIncr==1 ); |
| 5426 |
| 5427 if( p->nToken==1 && p->bIncr ){ |
| 5428 rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, |
| 5429 &pDL->iDocid, &pDL->pList, &pDL->nList |
| 5430 ); |
| 5431 if( pDL->pList==0 ) bEof = 1; |
| 5432 }else{ |
| 5433 int bDescDoclist = pCsr->bDesc; |
| 5434 struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; |
| 5435 |
| 5436 memset(a, 0, sizeof(a)); |
| 5437 assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); |
| 5438 assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS ); |
| 5439 |
| 5440 while( bEof==0 ){ |
| 5441 int bMaxSet = 0; |
| 5442 sqlite3_int64 iMax = 0; /* Largest docid for all iterators */ |
| 5443 int i; /* Used to iterate through tokens */ |
| 5444 |
| 5445 /* Advance the iterator for each token in the phrase once. */ |
| 5446 for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){ |
| 5447 rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); |
| 5448 if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ |
| 5449 iMax = a[i].iDocid; |
| 5450 bMaxSet = 1; |
| 5451 } |
| 5452 } |
| 5453 assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) ); |
| 5454 assert( rc!=SQLITE_OK || bMaxSet ); |
| 5455 |
| 5456 /* Keep advancing iterators until they all point to the same document */ |
| 5457 for(i=0; i<p->nToken; i++){ |
| 5458 while( rc==SQLITE_OK && bEof==0 |
| 5459 && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 |
| 5460 ){ |
| 5461 rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); |
| 5462 if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ |
| 5463 iMax = a[i].iDocid; |
| 5464 i = 0; |
| 5465 } |
| 5466 } |
| 5467 } |
| 5468 |
| 5469 /* Check if the current entries really are a phrase match */ |
| 5470 if( bEof==0 ){ |
| 5471 int nList = 0; |
| 5472 int nByte = a[p->nToken-1].nList; |
| 5473 char *aDoclist = sqlite3_malloc(nByte+1); |
| 5474 if( !aDoclist ) return SQLITE_NOMEM; |
| 5475 memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); |
| 5476 |
| 5477 for(i=0; i<(p->nToken-1); i++){ |
| 5478 if( a[i].bIgnore==0 ){ |
| 5479 char *pL = a[i].pList; |
| 5480 char *pR = aDoclist; |
| 5481 char *pOut = aDoclist; |
| 5482 int nDist = p->nToken-1-i; |
| 5483 int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); |
| 5484 if( res==0 ) break; |
| 5485 nList = (int)(pOut - aDoclist); |
| 5486 } |
| 5487 } |
| 5488 if( i==(p->nToken-1) ){ |
| 5489 pDL->iDocid = iMax; |
| 5490 pDL->pList = aDoclist; |
| 5491 pDL->nList = nList; |
| 5492 pDL->bFreeList = 1; |
| 5493 break; |
| 5494 } |
| 5495 sqlite3_free(aDoclist); |
| 5496 } |
| 5497 } |
| 5498 } |
| 5499 |
| 5500 *pbEof = bEof; |
| 5501 return rc; |
| 5502 } |
| 5503 |
| 5504 /* |
| 5505 ** Attempt to move the phrase iterator to point to the next matching docid. |
| 5506 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5507 ** SQLITE_OK. |
| 5508 ** |
| 5509 ** If there is no "next" entry and no error occurs, then *pbEof is set to |
| 5510 ** 1 before returning. Otherwise, if no error occurs and the iterator is |
| 5511 ** successfully advanced, *pbEof is set to 0. |
| 5512 */ |
| 5513 static int fts3EvalPhraseNext( |
| 5514 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5515 Fts3Phrase *p, /* Phrase object to advance to next docid */ |
| 5516 u8 *pbEof /* OUT: Set to 1 if EOF */ |
| 5517 ){ |
| 5518 int rc = SQLITE_OK; |
| 5519 Fts3Doclist *pDL = &p->doclist; |
| 5520 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5521 |
| 5522 if( p->bIncr ){ |
| 5523 rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); |
| 5524 }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ |
| 5525 sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, |
| 5526 &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof |
| 5527 ); |
| 5528 pDL->pList = pDL->pNextDocid; |
| 5529 }else{ |
| 5530 fts3EvalDlPhraseNext(pTab, pDL, pbEof); |
| 5531 } |
| 5532 |
| 5533 return rc; |
| 5534 } |
| 5535 |
| 5536 /* |
| 5537 ** |
| 5538 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 5539 ** Otherwise, fts3EvalPhraseStart() is called on all phrases within the |
| 5540 ** expression. Also the Fts3Expr.bDeferred variable is set to true for any |
| 5541 ** expressions for which all descendent tokens are deferred. |
| 5542 ** |
| 5543 ** If parameter bOptOk is zero, then it is guaranteed that the |
| 5544 ** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for |
| 5545 ** each phrase in the expression (subject to deferred token processing). |
| 5546 ** Or, if bOptOk is non-zero, then one or more tokens within the expression |
| 5547 ** may be loaded incrementally, meaning doclist.aAll/nAll is not available. |
| 5548 ** |
| 5549 ** If an error occurs within this function, *pRc is set to an SQLite error |
| 5550 ** code before returning. |
| 5551 */ |
| 5552 static void fts3EvalStartReaders( |
| 5553 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5554 Fts3Expr *pExpr, /* Expression to initialize phrases in */ |
| 5555 int *pRc /* IN/OUT: Error code */ |
| 5556 ){ |
| 5557 if( pExpr && SQLITE_OK==*pRc ){ |
| 5558 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 5559 int nToken = pExpr->pPhrase->nToken; |
| 5560 if( nToken ){ |
| 5561 int i; |
| 5562 for(i=0; i<nToken; i++){ |
| 5563 if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; |
| 5564 } |
| 5565 pExpr->bDeferred = (i==nToken); |
| 5566 } |
| 5567 *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); |
| 5568 }else{ |
| 5569 fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); |
| 5570 fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); |
| 5571 pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); |
| 5572 } |
| 5573 } |
| 5574 } |
| 5575 |
| 5576 /* |
| 5577 ** An array of the following structures is assembled as part of the process |
| 5578 ** of selecting tokens to defer before the query starts executing (as part |
| 5579 ** of the xFilter() method). There is one element in the array for each |
| 5580 ** token in the FTS expression. |
| 5581 ** |
| 5582 ** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong |
| 5583 ** to phrases that are connected only by AND and NEAR operators (not OR or |
| 5584 ** NOT). When determining tokens to defer, each AND/NEAR cluster is considered |
| 5585 ** separately. The root of a tokens AND/NEAR cluster is stored in |
| 5586 ** Fts3TokenAndCost.pRoot. |
| 5587 */ |
| 5588 typedef struct Fts3TokenAndCost Fts3TokenAndCost; |
| 5589 struct Fts3TokenAndCost { |
| 5590 Fts3Phrase *pPhrase; /* The phrase the token belongs to */ |
| 5591 int iToken; /* Position of token in phrase */ |
| 5592 Fts3PhraseToken *pToken; /* The token itself */ |
| 5593 Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ |
| 5594 int nOvfl; /* Number of overflow pages to load doclist */ |
| 5595 int iCol; /* The column the token must match */ |
| 5596 }; |
| 5597 |
| 5598 /* |
| 5599 ** This function is used to populate an allocated Fts3TokenAndCost array. |
| 5600 ** |
| 5601 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 5602 ** Otherwise, if an error occurs during execution, *pRc is set to an |
| 5603 ** SQLite error code. |
| 5604 */ |
| 5605 static void fts3EvalTokenCosts( |
| 5606 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5607 Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ |
| 5608 Fts3Expr *pExpr, /* Expression to consider */ |
| 5609 Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ |
| 5610 Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ |
| 5611 int *pRc /* IN/OUT: Error code */ |
| 5612 ){ |
| 5613 if( *pRc==SQLITE_OK ){ |
| 5614 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 5615 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 5616 int i; |
| 5617 for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ |
| 5618 Fts3TokenAndCost *pTC = (*ppTC)++; |
| 5619 pTC->pPhrase = pPhrase; |
| 5620 pTC->iToken = i; |
| 5621 pTC->pRoot = pRoot; |
| 5622 pTC->pToken = &pPhrase->aToken[i]; |
| 5623 pTC->iCol = pPhrase->iColumn; |
| 5624 *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); |
| 5625 } |
| 5626 }else if( pExpr->eType!=FTSQUERY_NOT ){ |
| 5627 assert( pExpr->eType==FTSQUERY_OR |
| 5628 || pExpr->eType==FTSQUERY_AND |
| 5629 || pExpr->eType==FTSQUERY_NEAR |
| 5630 ); |
| 5631 assert( pExpr->pLeft && pExpr->pRight ); |
| 5632 if( pExpr->eType==FTSQUERY_OR ){ |
| 5633 pRoot = pExpr->pLeft; |
| 5634 **ppOr = pRoot; |
| 5635 (*ppOr)++; |
| 5636 } |
| 5637 fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); |
| 5638 if( pExpr->eType==FTSQUERY_OR ){ |
| 5639 pRoot = pExpr->pRight; |
| 5640 **ppOr = pRoot; |
| 5641 (*ppOr)++; |
| 5642 } |
| 5643 fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); |
| 5644 } |
| 5645 } |
| 5646 } |
| 5647 |
| 5648 /* |
| 5649 ** Determine the average document (row) size in pages. If successful, |
| 5650 ** write this value to *pnPage and return SQLITE_OK. Otherwise, return |
| 5651 ** an SQLite error code. |
| 5652 ** |
| 5653 ** The average document size in pages is calculated by first calculating |
| 5654 ** determining the average size in bytes, B. If B is less than the amount |
| 5655 ** of data that will fit on a single leaf page of an intkey table in |
| 5656 ** this database, then the average docsize is 1. Otherwise, it is 1 plus |
| 5657 ** the number of overflow pages consumed by a record B bytes in size. |
| 5658 */ |
| 5659 static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ |
| 5660 if( pCsr->nRowAvg==0 ){ |
| 5661 /* The average document size, which is required to calculate the cost |
| 5662 ** of each doclist, has not yet been determined. Read the required |
| 5663 ** data from the %_stat table to calculate it. |
| 5664 ** |
| 5665 ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 |
| 5666 ** varints, where nCol is the number of columns in the FTS3 table. |
| 5667 ** The first varint is the number of documents currently stored in |
| 5668 ** the table. The following nCol varints contain the total amount of |
| 5669 ** data stored in all rows of each column of the table, from left |
| 5670 ** to right. |
| 5671 */ |
| 5672 int rc; |
| 5673 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; |
| 5674 sqlite3_stmt *pStmt; |
| 5675 sqlite3_int64 nDoc = 0; |
| 5676 sqlite3_int64 nByte = 0; |
| 5677 const char *pEnd; |
| 5678 const char *a; |
| 5679 |
| 5680 rc = sqlite3Fts3SelectDoctotal(p, &pStmt); |
| 5681 if( rc!=SQLITE_OK ) return rc; |
| 5682 a = sqlite3_column_blob(pStmt, 0); |
| 5683 assert( a ); |
| 5684 |
| 5685 pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; |
| 5686 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 5687 while( a<pEnd ){ |
| 5688 a += sqlite3Fts3GetVarint(a, &nByte); |
| 5689 } |
| 5690 if( nDoc==0 || nByte==0 ){ |
| 5691 sqlite3_reset(pStmt); |
| 5692 return FTS_CORRUPT_VTAB; |
| 5693 } |
| 5694 |
| 5695 pCsr->nDoc = nDoc; |
| 5696 pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); |
| 5697 assert( pCsr->nRowAvg>0 ); |
| 5698 rc = sqlite3_reset(pStmt); |
| 5699 if( rc!=SQLITE_OK ) return rc; |
| 5700 } |
| 5701 |
| 5702 *pnPage = pCsr->nRowAvg; |
| 5703 return SQLITE_OK; |
| 5704 } |
| 5705 |
| 5706 /* |
| 5707 ** This function is called to select the tokens (if any) that will be |
| 5708 ** deferred. The array aTC[] has already been populated when this is |
| 5709 ** called. |
| 5710 ** |
| 5711 ** This function is called once for each AND/NEAR cluster in the |
| 5712 ** expression. Each invocation determines which tokens to defer within |
| 5713 ** the cluster with root node pRoot. See comments above the definition |
| 5714 ** of struct Fts3TokenAndCost for more details. |
| 5715 ** |
| 5716 ** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() |
| 5717 ** called on each token to defer. Otherwise, an SQLite error code is |
| 5718 ** returned. |
| 5719 */ |
| 5720 static int fts3EvalSelectDeferred( |
| 5721 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5722 Fts3Expr *pRoot, /* Consider tokens with this root node */ |
| 5723 Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ |
| 5724 int nTC /* Number of entries in aTC[] */ |
| 5725 ){ |
| 5726 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5727 int nDocSize = 0; /* Number of pages per doc loaded */ |
| 5728 int rc = SQLITE_OK; /* Return code */ |
| 5729 int ii; /* Iterator variable for various purposes */ |
| 5730 int nOvfl = 0; /* Total overflow pages used by doclists */ |
| 5731 int nToken = 0; /* Total number of tokens in cluster */ |
| 5732 |
| 5733 int nMinEst = 0; /* The minimum count for any phrase so far. */ |
| 5734 int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ |
| 5735 |
| 5736 /* Tokens are never deferred for FTS tables created using the content=xxx |
| 5737 ** option. The reason being that it is not guaranteed that the content |
| 5738 ** table actually contains the same data as the index. To prevent this from |
| 5739 ** causing any problems, the deferred token optimization is completely |
| 5740 ** disabled for content=xxx tables. */ |
| 5741 if( pTab->zContentTbl ){ |
| 5742 return SQLITE_OK; |
| 5743 } |
| 5744 |
| 5745 /* Count the tokens in this AND/NEAR cluster. If none of the doclists |
| 5746 ** associated with the tokens spill onto overflow pages, or if there is |
| 5747 ** only 1 token, exit early. No tokens to defer in this case. */ |
| 5748 for(ii=0; ii<nTC; ii++){ |
| 5749 if( aTC[ii].pRoot==pRoot ){ |
| 5750 nOvfl += aTC[ii].nOvfl; |
| 5751 nToken++; |
| 5752 } |
| 5753 } |
| 5754 if( nOvfl==0 || nToken<2 ) return SQLITE_OK; |
| 5755 |
| 5756 /* Obtain the average docsize (in pages). */ |
| 5757 rc = fts3EvalAverageDocsize(pCsr, &nDocSize); |
| 5758 assert( rc!=SQLITE_OK || nDocSize>0 ); |
| 5759 |
| 5760 |
| 5761 /* Iterate through all tokens in this AND/NEAR cluster, in ascending order |
| 5762 ** of the number of overflow pages that will be loaded by the pager layer |
| 5763 ** to retrieve the entire doclist for the token from the full-text index. |
| 5764 ** Load the doclists for tokens that are either: |
| 5765 ** |
| 5766 ** a. The cheapest token in the entire query (i.e. the one visited by the |
| 5767 ** first iteration of this loop), or |
| 5768 ** |
| 5769 ** b. Part of a multi-token phrase. |
| 5770 ** |
| 5771 ** After each token doclist is loaded, merge it with the others from the |
| 5772 ** same phrase and count the number of documents that the merged doclist |
| 5773 ** contains. Set variable "nMinEst" to the smallest number of documents in |
| 5774 ** any phrase doclist for which 1 or more token doclists have been loaded. |
| 5775 ** Let nOther be the number of other phrases for which it is certain that |
| 5776 ** one or more tokens will not be deferred. |
| 5777 ** |
| 5778 ** Then, for each token, defer it if loading the doclist would result in |
| 5779 ** loading N or more overflow pages into memory, where N is computed as: |
| 5780 ** |
| 5781 ** (nMinEst + 4^nOther - 1) / (4^nOther) |
| 5782 */ |
| 5783 for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){ |
| 5784 int iTC; /* Used to iterate through aTC[] array. */ |
| 5785 Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */ |
| 5786 |
| 5787 /* Set pTC to point to the cheapest remaining token. */ |
| 5788 for(iTC=0; iTC<nTC; iTC++){ |
| 5789 if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot |
| 5790 && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) |
| 5791 ){ |
| 5792 pTC = &aTC[iTC]; |
| 5793 } |
| 5794 } |
| 5795 assert( pTC ); |
| 5796 |
| 5797 if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ |
| 5798 /* The number of overflow pages to load for this (and therefore all |
| 5799 ** subsequent) tokens is greater than the estimated number of pages |
| 5800 ** that will be loaded if all subsequent tokens are deferred. |
| 5801 */ |
| 5802 Fts3PhraseToken *pToken = pTC->pToken; |
| 5803 rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); |
| 5804 fts3SegReaderCursorFree(pToken->pSegcsr); |
| 5805 pToken->pSegcsr = 0; |
| 5806 }else{ |
| 5807 /* Set nLoad4 to the value of (4^nOther) for the next iteration of the |
| 5808 ** for-loop. Except, limit the value to 2^24 to prevent it from |
| 5809 ** overflowing the 32-bit integer it is stored in. */ |
| 5810 if( ii<12 ) nLoad4 = nLoad4*4; |
| 5811 |
| 5812 if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ |
| 5813 /* Either this is the cheapest token in the entire query, or it is |
| 5814 ** part of a multi-token phrase. Either way, the entire doclist will |
| 5815 ** (eventually) be loaded into memory. It may as well be now. */ |
| 5816 Fts3PhraseToken *pToken = pTC->pToken; |
| 5817 int nList = 0; |
| 5818 char *pList = 0; |
| 5819 rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); |
| 5820 assert( rc==SQLITE_OK || pList==0 ); |
| 5821 if( rc==SQLITE_OK ){ |
| 5822 rc = fts3EvalPhraseMergeToken( |
| 5823 pTab, pTC->pPhrase, pTC->iToken,pList,nList |
| 5824 ); |
| 5825 } |
| 5826 if( rc==SQLITE_OK ){ |
| 5827 int nCount; |
| 5828 nCount = fts3DoclistCountDocids( |
| 5829 pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll |
| 5830 ); |
| 5831 if( ii==0 || nCount<nMinEst ) nMinEst = nCount; |
| 5832 } |
| 5833 } |
| 5834 } |
| 5835 pTC->pToken = 0; |
| 5836 } |
| 5837 |
| 5838 return rc; |
| 5839 } |
| 5840 |
| 5841 /* |
| 5842 ** This function is called from within the xFilter method. It initializes |
| 5843 ** the full-text query currently stored in pCsr->pExpr. To iterate through |
| 5844 ** the results of a query, the caller does: |
| 5845 ** |
| 5846 ** fts3EvalStart(pCsr); |
| 5847 ** while( 1 ){ |
| 5848 ** fts3EvalNext(pCsr); |
| 5849 ** if( pCsr->bEof ) break; |
| 5850 ** ... return row pCsr->iPrevId to the caller ... |
| 5851 ** } |
| 5852 */ |
| 5853 static int fts3EvalStart(Fts3Cursor *pCsr){ |
| 5854 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5855 int rc = SQLITE_OK; |
| 5856 int nToken = 0; |
| 5857 int nOr = 0; |
| 5858 |
| 5859 /* Allocate a MultiSegReader for each token in the expression. */ |
| 5860 fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); |
| 5861 |
| 5862 /* Determine which, if any, tokens in the expression should be deferred. */ |
| 5863 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 5864 if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ |
| 5865 Fts3TokenAndCost *aTC; |
| 5866 Fts3Expr **apOr; |
| 5867 aTC = (Fts3TokenAndCost *)sqlite3_malloc( |
| 5868 sizeof(Fts3TokenAndCost) * nToken |
| 5869 + sizeof(Fts3Expr *) * nOr * 2 |
| 5870 ); |
| 5871 apOr = (Fts3Expr **)&aTC[nToken]; |
| 5872 |
| 5873 if( !aTC ){ |
| 5874 rc = SQLITE_NOMEM; |
| 5875 }else{ |
| 5876 int ii; |
| 5877 Fts3TokenAndCost *pTC = aTC; |
| 5878 Fts3Expr **ppOr = apOr; |
| 5879 |
| 5880 fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); |
| 5881 nToken = (int)(pTC-aTC); |
| 5882 nOr = (int)(ppOr-apOr); |
| 5883 |
| 5884 if( rc==SQLITE_OK ){ |
| 5885 rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); |
| 5886 for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){ |
| 5887 rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken); |
| 5888 } |
| 5889 } |
| 5890 |
| 5891 sqlite3_free(aTC); |
| 5892 } |
| 5893 } |
| 5894 #endif |
| 5895 |
| 5896 fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); |
| 5897 return rc; |
| 5898 } |
| 5899 |
| 5900 /* |
| 5901 ** Invalidate the current position list for phrase pPhrase. |
| 5902 */ |
| 5903 static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ |
| 5904 if( pPhrase->doclist.bFreeList ){ |
| 5905 sqlite3_free(pPhrase->doclist.pList); |
| 5906 } |
| 5907 pPhrase->doclist.pList = 0; |
| 5908 pPhrase->doclist.nList = 0; |
| 5909 pPhrase->doclist.bFreeList = 0; |
| 5910 } |
| 5911 |
| 5912 /* |
| 5913 ** This function is called to edit the position list associated with |
| 5914 ** the phrase object passed as the fifth argument according to a NEAR |
| 5915 ** condition. For example: |
| 5916 ** |
| 5917 ** abc NEAR/5 "def ghi" |
| 5918 ** |
| 5919 ** Parameter nNear is passed the NEAR distance of the expression (5 in |
| 5920 ** the example above). When this function is called, *paPoslist points to |
| 5921 ** the position list, and *pnToken is the number of phrase tokens in, the |
| 5922 ** phrase on the other side of the NEAR operator to pPhrase. For example, |
| 5923 ** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to |
| 5924 ** the position list associated with phrase "abc". |
| 5925 ** |
| 5926 ** All positions in the pPhrase position list that are not sufficiently |
| 5927 ** close to a position in the *paPoslist position list are removed. If this |
| 5928 ** leaves 0 positions, zero is returned. Otherwise, non-zero. |
| 5929 ** |
| 5930 ** Before returning, *paPoslist is set to point to the position lsit |
| 5931 ** associated with pPhrase. And *pnToken is set to the number of tokens in |
| 5932 ** pPhrase. |
| 5933 */ |
| 5934 static int fts3EvalNearTrim( |
| 5935 int nNear, /* NEAR distance. As in "NEAR/nNear". */ |
| 5936 char *aTmp, /* Temporary space to use */ |
| 5937 char **paPoslist, /* IN/OUT: Position list */ |
| 5938 int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ |
| 5939 Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ |
| 5940 ){ |
| 5941 int nParam1 = nNear + pPhrase->nToken; |
| 5942 int nParam2 = nNear + *pnToken; |
| 5943 int nNew; |
| 5944 char *p2; |
| 5945 char *pOut; |
| 5946 int res; |
| 5947 |
| 5948 assert( pPhrase->doclist.pList ); |
| 5949 |
| 5950 p2 = pOut = pPhrase->doclist.pList; |
| 5951 res = fts3PoslistNearMerge( |
| 5952 &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 |
| 5953 ); |
| 5954 if( res ){ |
| 5955 nNew = (int)(pOut - pPhrase->doclist.pList) - 1; |
| 5956 assert( pPhrase->doclist.pList[nNew]=='\0' ); |
| 5957 assert( nNew<=pPhrase->doclist.nList && nNew>0 ); |
| 5958 memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); |
| 5959 pPhrase->doclist.nList = nNew; |
| 5960 *paPoslist = pPhrase->doclist.pList; |
| 5961 *pnToken = pPhrase->nToken; |
| 5962 } |
| 5963 |
| 5964 return res; |
| 5965 } |
| 5966 |
| 5967 /* |
| 5968 ** This function is a no-op if *pRc is other than SQLITE_OK when it is called. |
| 5969 ** Otherwise, it advances the expression passed as the second argument to |
| 5970 ** point to the next matching row in the database. Expressions iterate through |
| 5971 ** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, |
| 5972 ** or descending if it is non-zero. |
| 5973 ** |
| 5974 ** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if |
| 5975 ** successful, the following variables in pExpr are set: |
| 5976 ** |
| 5977 ** Fts3Expr.bEof (non-zero if EOF - there is no next row) |
| 5978 ** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) |
| 5979 ** |
| 5980 ** If the expression is of type FTSQUERY_PHRASE, and the expression is not |
| 5981 ** at EOF, then the following variables are populated with the position list |
| 5982 ** for the phrase for the visited row: |
| 5983 ** |
| 5984 ** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) |
| 5985 ** FTs3Expr.pPhrase->doclist.pList (pointer to position list) |
| 5986 ** |
| 5987 ** It says above that this function advances the expression to the next |
| 5988 ** matching row. This is usually true, but there are the following exceptions: |
| 5989 ** |
| 5990 ** 1. Deferred tokens are not taken into account. If a phrase consists |
| 5991 ** entirely of deferred tokens, it is assumed to match every row in |
| 5992 ** the db. In this case the position-list is not populated at all. |
| 5993 ** |
| 5994 ** Or, if a phrase contains one or more deferred tokens and one or |
| 5995 ** more non-deferred tokens, then the expression is advanced to the |
| 5996 ** next possible match, considering only non-deferred tokens. In other |
| 5997 ** words, if the phrase is "A B C", and "B" is deferred, the expression |
| 5998 ** is advanced to the next row that contains an instance of "A * C", |
| 5999 ** where "*" may match any single token. The position list in this case |
| 6000 ** is populated as for "A * C" before returning. |
| 6001 ** |
| 6002 ** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is |
| 6003 ** advanced to point to the next row that matches "x AND y". |
| 6004 ** |
| 6005 ** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is |
| 6006 ** really a match, taking into account deferred tokens and NEAR operators. |
| 6007 */ |
| 6008 static void fts3EvalNextRow( |
| 6009 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 6010 Fts3Expr *pExpr, /* Expr. to advance to next matching row */ |
| 6011 int *pRc /* IN/OUT: Error code */ |
| 6012 ){ |
| 6013 if( *pRc==SQLITE_OK ){ |
| 6014 int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ |
| 6015 assert( pExpr->bEof==0 ); |
| 6016 pExpr->bStart = 1; |
| 6017 |
| 6018 switch( pExpr->eType ){ |
| 6019 case FTSQUERY_NEAR: |
| 6020 case FTSQUERY_AND: { |
| 6021 Fts3Expr *pLeft = pExpr->pLeft; |
| 6022 Fts3Expr *pRight = pExpr->pRight; |
| 6023 assert( !pLeft->bDeferred || !pRight->bDeferred ); |
| 6024 |
| 6025 if( pLeft->bDeferred ){ |
| 6026 /* LHS is entirely deferred. So we assume it matches every row. |
| 6027 ** Advance the RHS iterator to find the next row visited. */ |
| 6028 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6029 pExpr->iDocid = pRight->iDocid; |
| 6030 pExpr->bEof = pRight->bEof; |
| 6031 }else if( pRight->bDeferred ){ |
| 6032 /* RHS is entirely deferred. So we assume it matches every row. |
| 6033 ** Advance the LHS iterator to find the next row visited. */ |
| 6034 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6035 pExpr->iDocid = pLeft->iDocid; |
| 6036 pExpr->bEof = pLeft->bEof; |
| 6037 }else{ |
| 6038 /* Neither the RHS or LHS are deferred. */ |
| 6039 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6040 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6041 while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ |
| 6042 sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6043 if( iDiff==0 ) break; |
| 6044 if( iDiff<0 ){ |
| 6045 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6046 }else{ |
| 6047 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6048 } |
| 6049 } |
| 6050 pExpr->iDocid = pLeft->iDocid; |
| 6051 pExpr->bEof = (pLeft->bEof || pRight->bEof); |
| 6052 if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){ |
| 6053 if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){ |
| 6054 Fts3Doclist *pDl = &pRight->pPhrase->doclist; |
| 6055 while( *pRc==SQLITE_OK && pRight->bEof==0 ){ |
| 6056 memset(pDl->pList, 0, pDl->nList); |
| 6057 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6058 } |
| 6059 } |
| 6060 if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){ |
| 6061 Fts3Doclist *pDl = &pLeft->pPhrase->doclist; |
| 6062 while( *pRc==SQLITE_OK && pLeft->bEof==0 ){ |
| 6063 memset(pDl->pList, 0, pDl->nList); |
| 6064 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6065 } |
| 6066 } |
| 6067 } |
| 6068 } |
| 6069 break; |
| 6070 } |
| 6071 |
| 6072 case FTSQUERY_OR: { |
| 6073 Fts3Expr *pLeft = pExpr->pLeft; |
| 6074 Fts3Expr *pRight = pExpr->pRight; |
| 6075 sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6076 |
| 6077 assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); |
| 6078 assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); |
| 6079 |
| 6080 if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ |
| 6081 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6082 }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ |
| 6083 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6084 }else{ |
| 6085 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6086 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6087 } |
| 6088 |
| 6089 pExpr->bEof = (pLeft->bEof && pRight->bEof); |
| 6090 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6091 if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ |
| 6092 pExpr->iDocid = pLeft->iDocid; |
| 6093 }else{ |
| 6094 pExpr->iDocid = pRight->iDocid; |
| 6095 } |
| 6096 |
| 6097 break; |
| 6098 } |
| 6099 |
| 6100 case FTSQUERY_NOT: { |
| 6101 Fts3Expr *pLeft = pExpr->pLeft; |
| 6102 Fts3Expr *pRight = pExpr->pRight; |
| 6103 |
| 6104 if( pRight->bStart==0 ){ |
| 6105 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6106 assert( *pRc!=SQLITE_OK || pRight->bStart ); |
| 6107 } |
| 6108 |
| 6109 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6110 if( pLeft->bEof==0 ){ |
| 6111 while( !*pRc |
| 6112 && !pRight->bEof |
| 6113 && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 |
| 6114 ){ |
| 6115 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6116 } |
| 6117 } |
| 6118 pExpr->iDocid = pLeft->iDocid; |
| 6119 pExpr->bEof = pLeft->bEof; |
| 6120 break; |
| 6121 } |
| 6122 |
| 6123 default: { |
| 6124 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6125 fts3EvalInvalidatePoslist(pPhrase); |
| 6126 *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); |
| 6127 pExpr->iDocid = pPhrase->doclist.iDocid; |
| 6128 break; |
| 6129 } |
| 6130 } |
| 6131 } |
| 6132 } |
| 6133 |
| 6134 /* |
| 6135 ** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR |
| 6136 ** cluster, then this function returns 1 immediately. |
| 6137 ** |
| 6138 ** Otherwise, it checks if the current row really does match the NEAR |
| 6139 ** expression, using the data currently stored in the position lists |
| 6140 ** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. |
| 6141 ** |
| 6142 ** If the current row is a match, the position list associated with each |
| 6143 ** phrase in the NEAR expression is edited in place to contain only those |
| 6144 ** phrase instances sufficiently close to their peers to satisfy all NEAR |
| 6145 ** constraints. In this case it returns 1. If the NEAR expression does not |
| 6146 ** match the current row, 0 is returned. The position lists may or may not |
| 6147 ** be edited if 0 is returned. |
| 6148 */ |
| 6149 static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ |
| 6150 int res = 1; |
| 6151 |
| 6152 /* The following block runs if pExpr is the root of a NEAR query. |
| 6153 ** For example, the query: |
| 6154 ** |
| 6155 ** "w" NEAR "x" NEAR "y" NEAR "z" |
| 6156 ** |
| 6157 ** which is represented in tree form as: |
| 6158 ** |
| 6159 ** | |
| 6160 ** +--NEAR--+ <-- root of NEAR query |
| 6161 ** | | |
| 6162 ** +--NEAR--+ "z" |
| 6163 ** | | |
| 6164 ** +--NEAR--+ "y" |
| 6165 ** | | |
| 6166 ** "w" "x" |
| 6167 ** |
| 6168 ** The right-hand child of a NEAR node is always a phrase. The |
| 6169 ** left-hand child may be either a phrase or a NEAR node. There are |
| 6170 ** no exceptions to this - it's the way the parser in fts3_expr.c works. |
| 6171 */ |
| 6172 if( *pRc==SQLITE_OK |
| 6173 && pExpr->eType==FTSQUERY_NEAR |
| 6174 && pExpr->bEof==0 |
| 6175 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) |
| 6176 ){ |
| 6177 Fts3Expr *p; |
| 6178 int nTmp = 0; /* Bytes of temp space */ |
| 6179 char *aTmp; /* Temp space for PoslistNearMerge() */ |
| 6180 |
| 6181 /* Allocate temporary working space. */ |
| 6182 for(p=pExpr; p->pLeft; p=p->pLeft){ |
| 6183 nTmp += p->pRight->pPhrase->doclist.nList; |
| 6184 } |
| 6185 nTmp += p->pPhrase->doclist.nList; |
| 6186 if( nTmp==0 ){ |
| 6187 res = 0; |
| 6188 }else{ |
| 6189 aTmp = sqlite3_malloc(nTmp*2); |
| 6190 if( !aTmp ){ |
| 6191 *pRc = SQLITE_NOMEM; |
| 6192 res = 0; |
| 6193 }else{ |
| 6194 char *aPoslist = p->pPhrase->doclist.pList; |
| 6195 int nToken = p->pPhrase->nToken; |
| 6196 |
| 6197 for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ |
| 6198 Fts3Phrase *pPhrase = p->pRight->pPhrase; |
| 6199 int nNear = p->nNear; |
| 6200 res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); |
| 6201 } |
| 6202 |
| 6203 aPoslist = pExpr->pRight->pPhrase->doclist.pList; |
| 6204 nToken = pExpr->pRight->pPhrase->nToken; |
| 6205 for(p=pExpr->pLeft; p && res; p=p->pLeft){ |
| 6206 int nNear; |
| 6207 Fts3Phrase *pPhrase; |
| 6208 assert( p->pParent && p->pParent->pLeft==p ); |
| 6209 nNear = p->pParent->nNear; |
| 6210 pPhrase = ( |
| 6211 p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase |
| 6212 ); |
| 6213 res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); |
| 6214 } |
| 6215 } |
| 6216 |
| 6217 sqlite3_free(aTmp); |
| 6218 } |
| 6219 } |
| 6220 |
| 6221 return res; |
| 6222 } |
| 6223 |
| 6224 /* |
| 6225 ** This function is a helper function for sqlite3Fts3EvalTestDeferred(). |
| 6226 ** Assuming no error occurs or has occurred, It returns non-zero if the |
| 6227 ** expression passed as the second argument matches the row that pCsr |
| 6228 ** currently points to, or zero if it does not. |
| 6229 ** |
| 6230 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 6231 ** If an error occurs during execution of this function, *pRc is set to |
| 6232 ** the appropriate SQLite error code. In this case the returned value is |
| 6233 ** undefined. |
| 6234 */ |
| 6235 static int fts3EvalTestExpr( |
| 6236 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 6237 Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ |
| 6238 int *pRc /* IN/OUT: Error code */ |
| 6239 ){ |
| 6240 int bHit = 1; /* Return value */ |
| 6241 if( *pRc==SQLITE_OK ){ |
| 6242 switch( pExpr->eType ){ |
| 6243 case FTSQUERY_NEAR: |
| 6244 case FTSQUERY_AND: |
| 6245 bHit = ( |
| 6246 fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) |
| 6247 && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) |
| 6248 && fts3EvalNearTest(pExpr, pRc) |
| 6249 ); |
| 6250 |
| 6251 /* If the NEAR expression does not match any rows, zero the doclist for |
| 6252 ** all phrases involved in the NEAR. This is because the snippet(), |
| 6253 ** offsets() and matchinfo() functions are not supposed to recognize |
| 6254 ** any instances of phrases that are part of unmatched NEAR queries. |
| 6255 ** For example if this expression: |
| 6256 ** |
| 6257 ** ... MATCH 'a OR (b NEAR c)' |
| 6258 ** |
| 6259 ** is matched against a row containing: |
| 6260 ** |
| 6261 ** 'a b d e' |
| 6262 ** |
| 6263 ** then any snippet() should ony highlight the "a" term, not the "b" |
| 6264 ** (as "b" is part of a non-matching NEAR clause). |
| 6265 */ |
| 6266 if( bHit==0 |
| 6267 && pExpr->eType==FTSQUERY_NEAR |
| 6268 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) |
| 6269 ){ |
| 6270 Fts3Expr *p; |
| 6271 for(p=pExpr; p->pPhrase==0; p=p->pLeft){ |
| 6272 if( p->pRight->iDocid==pCsr->iPrevId ){ |
| 6273 fts3EvalInvalidatePoslist(p->pRight->pPhrase); |
| 6274 } |
| 6275 } |
| 6276 if( p->iDocid==pCsr->iPrevId ){ |
| 6277 fts3EvalInvalidatePoslist(p->pPhrase); |
| 6278 } |
| 6279 } |
| 6280 |
| 6281 break; |
| 6282 |
| 6283 case FTSQUERY_OR: { |
| 6284 int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); |
| 6285 int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); |
| 6286 bHit = bHit1 || bHit2; |
| 6287 break; |
| 6288 } |
| 6289 |
| 6290 case FTSQUERY_NOT: |
| 6291 bHit = ( |
| 6292 fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) |
| 6293 && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) |
| 6294 ); |
| 6295 break; |
| 6296 |
| 6297 default: { |
| 6298 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 6299 if( pCsr->pDeferred |
| 6300 && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) |
| 6301 ){ |
| 6302 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6303 assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); |
| 6304 if( pExpr->bDeferred ){ |
| 6305 fts3EvalInvalidatePoslist(pPhrase); |
| 6306 } |
| 6307 *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); |
| 6308 bHit = (pPhrase->doclist.pList!=0); |
| 6309 pExpr->iDocid = pCsr->iPrevId; |
| 6310 }else |
| 6311 #endif |
| 6312 { |
| 6313 bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); |
| 6314 } |
| 6315 break; |
| 6316 } |
| 6317 } |
| 6318 } |
| 6319 return bHit; |
| 6320 } |
| 6321 |
| 6322 /* |
| 6323 ** This function is called as the second part of each xNext operation when |
| 6324 ** iterating through the results of a full-text query. At this point the |
| 6325 ** cursor points to a row that matches the query expression, with the |
| 6326 ** following caveats: |
| 6327 ** |
| 6328 ** * Up until this point, "NEAR" operators in the expression have been |
| 6329 ** treated as "AND". |
| 6330 ** |
| 6331 ** * Deferred tokens have not yet been considered. |
| 6332 ** |
| 6333 ** If *pRc is not SQLITE_OK when this function is called, it immediately |
| 6334 ** returns 0. Otherwise, it tests whether or not after considering NEAR |
| 6335 ** operators and deferred tokens the current row is still a match for the |
| 6336 ** expression. It returns 1 if both of the following are true: |
| 6337 ** |
| 6338 ** 1. *pRc is SQLITE_OK when this function returns, and |
| 6339 ** |
| 6340 ** 2. After scanning the current FTS table row for the deferred tokens, |
| 6341 ** it is determined that the row does *not* match the query. |
| 6342 ** |
| 6343 ** Or, if no error occurs and it seems the current row does match the FTS |
| 6344 ** query, return 0. |
| 6345 */ |
| 6346 SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){ |
| 6347 int rc = *pRc; |
| 6348 int bMiss = 0; |
| 6349 if( rc==SQLITE_OK ){ |
| 6350 |
| 6351 /* If there are one or more deferred tokens, load the current row into |
| 6352 ** memory and scan it to determine the position list for each deferred |
| 6353 ** token. Then, see if this row is really a match, considering deferred |
| 6354 ** tokens and NEAR operators (neither of which were taken into account |
| 6355 ** earlier, by fts3EvalNextRow()). |
| 6356 */ |
| 6357 if( pCsr->pDeferred ){ |
| 6358 rc = fts3CursorSeek(0, pCsr); |
| 6359 if( rc==SQLITE_OK ){ |
| 6360 rc = sqlite3Fts3CacheDeferredDoclists(pCsr); |
| 6361 } |
| 6362 } |
| 6363 bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); |
| 6364 |
| 6365 /* Free the position-lists accumulated for each deferred token above. */ |
| 6366 sqlite3Fts3FreeDeferredDoclists(pCsr); |
| 6367 *pRc = rc; |
| 6368 } |
| 6369 return (rc==SQLITE_OK && bMiss); |
| 6370 } |
| 6371 |
| 6372 /* |
| 6373 ** Advance to the next document that matches the FTS expression in |
| 6374 ** Fts3Cursor.pExpr. |
| 6375 */ |
| 6376 static int fts3EvalNext(Fts3Cursor *pCsr){ |
| 6377 int rc = SQLITE_OK; /* Return Code */ |
| 6378 Fts3Expr *pExpr = pCsr->pExpr; |
| 6379 assert( pCsr->isEof==0 ); |
| 6380 if( pExpr==0 ){ |
| 6381 pCsr->isEof = 1; |
| 6382 }else{ |
| 6383 do { |
| 6384 if( pCsr->isRequireSeek==0 ){ |
| 6385 sqlite3_reset(pCsr->pStmt); |
| 6386 } |
| 6387 assert( sqlite3_data_count(pCsr->pStmt)==0 ); |
| 6388 fts3EvalNextRow(pCsr, pExpr, &rc); |
| 6389 pCsr->isEof = pExpr->bEof; |
| 6390 pCsr->isRequireSeek = 1; |
| 6391 pCsr->isMatchinfoNeeded = 1; |
| 6392 pCsr->iPrevId = pExpr->iDocid; |
| 6393 }while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); |
| 6394 } |
| 6395 |
| 6396 /* Check if the cursor is past the end of the docid range specified |
| 6397 ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ |
| 6398 if( rc==SQLITE_OK && ( |
| 6399 (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) |
| 6400 || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid) |
| 6401 )){ |
| 6402 pCsr->isEof = 1; |
| 6403 } |
| 6404 |
| 6405 return rc; |
| 6406 } |
| 6407 |
| 6408 /* |
| 6409 ** Restart interation for expression pExpr so that the next call to |
| 6410 ** fts3EvalNext() visits the first row. Do not allow incremental |
| 6411 ** loading or merging of phrase doclists for this iteration. |
| 6412 ** |
| 6413 ** If *pRc is other than SQLITE_OK when this function is called, it is |
| 6414 ** a no-op. If an error occurs within this function, *pRc is set to an |
| 6415 ** SQLite error code before returning. |
| 6416 */ |
| 6417 static void fts3EvalRestart( |
| 6418 Fts3Cursor *pCsr, |
| 6419 Fts3Expr *pExpr, |
| 6420 int *pRc |
| 6421 ){ |
| 6422 if( pExpr && *pRc==SQLITE_OK ){ |
| 6423 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6424 |
| 6425 if( pPhrase ){ |
| 6426 fts3EvalInvalidatePoslist(pPhrase); |
| 6427 if( pPhrase->bIncr ){ |
| 6428 int i; |
| 6429 for(i=0; i<pPhrase->nToken; i++){ |
| 6430 Fts3PhraseToken *pToken = &pPhrase->aToken[i]; |
| 6431 assert( pToken->pDeferred==0 ); |
| 6432 if( pToken->pSegcsr ){ |
| 6433 sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); |
| 6434 } |
| 6435 } |
| 6436 *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); |
| 6437 } |
| 6438 pPhrase->doclist.pNextDocid = 0; |
| 6439 pPhrase->doclist.iDocid = 0; |
| 6440 pPhrase->pOrPoslist = 0; |
| 6441 } |
| 6442 |
| 6443 pExpr->iDocid = 0; |
| 6444 pExpr->bEof = 0; |
| 6445 pExpr->bStart = 0; |
| 6446 |
| 6447 fts3EvalRestart(pCsr, pExpr->pLeft, pRc); |
| 6448 fts3EvalRestart(pCsr, pExpr->pRight, pRc); |
| 6449 } |
| 6450 } |
| 6451 |
| 6452 /* |
| 6453 ** After allocating the Fts3Expr.aMI[] array for each phrase in the |
| 6454 ** expression rooted at pExpr, the cursor iterates through all rows matched |
| 6455 ** by pExpr, calling this function for each row. This function increments |
| 6456 ** the values in Fts3Expr.aMI[] according to the position-list currently |
| 6457 ** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase |
| 6458 ** expression nodes. |
| 6459 */ |
| 6460 static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ |
| 6461 if( pExpr ){ |
| 6462 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6463 if( pPhrase && pPhrase->doclist.pList ){ |
| 6464 int iCol = 0; |
| 6465 char *p = pPhrase->doclist.pList; |
| 6466 |
| 6467 assert( *p ); |
| 6468 while( 1 ){ |
| 6469 u8 c = 0; |
| 6470 int iCnt = 0; |
| 6471 while( 0xFE & (*p | c) ){ |
| 6472 if( (c&0x80)==0 ) iCnt++; |
| 6473 c = *p++ & 0x80; |
| 6474 } |
| 6475 |
| 6476 /* aMI[iCol*3 + 1] = Number of occurrences |
| 6477 ** aMI[iCol*3 + 2] = Number of rows containing at least one instance |
| 6478 */ |
| 6479 pExpr->aMI[iCol*3 + 1] += iCnt; |
| 6480 pExpr->aMI[iCol*3 + 2] += (iCnt>0); |
| 6481 if( *p==0x00 ) break; |
| 6482 p++; |
| 6483 p += fts3GetVarint32(p, &iCol); |
| 6484 } |
| 6485 } |
| 6486 |
| 6487 fts3EvalUpdateCounts(pExpr->pLeft); |
| 6488 fts3EvalUpdateCounts(pExpr->pRight); |
| 6489 } |
| 6490 } |
| 6491 |
| 6492 /* |
| 6493 ** Expression pExpr must be of type FTSQUERY_PHRASE. |
| 6494 ** |
| 6495 ** If it is not already allocated and populated, this function allocates and |
| 6496 ** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part |
| 6497 ** of a NEAR expression, then it also allocates and populates the same array |
| 6498 ** for all other phrases that are part of the NEAR expression. |
| 6499 ** |
| 6500 ** SQLITE_OK is returned if the aMI[] array is successfully allocated and |
| 6501 ** populated. Otherwise, if an error occurs, an SQLite error code is returned. |
| 6502 */ |
| 6503 static int fts3EvalGatherStats( |
| 6504 Fts3Cursor *pCsr, /* Cursor object */ |
| 6505 Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ |
| 6506 ){ |
| 6507 int rc = SQLITE_OK; /* Return code */ |
| 6508 |
| 6509 assert( pExpr->eType==FTSQUERY_PHRASE ); |
| 6510 if( pExpr->aMI==0 ){ |
| 6511 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6512 Fts3Expr *pRoot; /* Root of NEAR expression */ |
| 6513 Fts3Expr *p; /* Iterator used for several purposes */ |
| 6514 |
| 6515 sqlite3_int64 iPrevId = pCsr->iPrevId; |
| 6516 sqlite3_int64 iDocid; |
| 6517 u8 bEof; |
| 6518 |
| 6519 /* Find the root of the NEAR expression */ |
| 6520 pRoot = pExpr; |
| 6521 while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ |
| 6522 pRoot = pRoot->pParent; |
| 6523 } |
| 6524 iDocid = pRoot->iDocid; |
| 6525 bEof = pRoot->bEof; |
| 6526 assert( pRoot->bStart ); |
| 6527 |
| 6528 /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ |
| 6529 for(p=pRoot; p; p=p->pLeft){ |
| 6530 Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); |
| 6531 assert( pE->aMI==0 ); |
| 6532 pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); |
| 6533 if( !pE->aMI ) return SQLITE_NOMEM; |
| 6534 memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); |
| 6535 } |
| 6536 |
| 6537 fts3EvalRestart(pCsr, pRoot, &rc); |
| 6538 |
| 6539 while( pCsr->isEof==0 && rc==SQLITE_OK ){ |
| 6540 |
| 6541 do { |
| 6542 /* Ensure the %_content statement is reset. */ |
| 6543 if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); |
| 6544 assert( sqlite3_data_count(pCsr->pStmt)==0 ); |
| 6545 |
| 6546 /* Advance to the next document */ |
| 6547 fts3EvalNextRow(pCsr, pRoot, &rc); |
| 6548 pCsr->isEof = pRoot->bEof; |
| 6549 pCsr->isRequireSeek = 1; |
| 6550 pCsr->isMatchinfoNeeded = 1; |
| 6551 pCsr->iPrevId = pRoot->iDocid; |
| 6552 }while( pCsr->isEof==0 |
| 6553 && pRoot->eType==FTSQUERY_NEAR |
| 6554 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) |
| 6555 ); |
| 6556 |
| 6557 if( rc==SQLITE_OK && pCsr->isEof==0 ){ |
| 6558 fts3EvalUpdateCounts(pRoot); |
| 6559 } |
| 6560 } |
| 6561 |
| 6562 pCsr->isEof = 0; |
| 6563 pCsr->iPrevId = iPrevId; |
| 6564 |
| 6565 if( bEof ){ |
| 6566 pRoot->bEof = bEof; |
| 6567 }else{ |
| 6568 /* Caution: pRoot may iterate through docids in ascending or descending |
| 6569 ** order. For this reason, even though it seems more defensive, the |
| 6570 ** do loop can not be written: |
| 6571 ** |
| 6572 ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK ); |
| 6573 */ |
| 6574 fts3EvalRestart(pCsr, pRoot, &rc); |
| 6575 do { |
| 6576 fts3EvalNextRow(pCsr, pRoot, &rc); |
| 6577 assert( pRoot->bEof==0 ); |
| 6578 }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); |
| 6579 } |
| 6580 } |
| 6581 return rc; |
| 6582 } |
| 6583 |
| 6584 /* |
| 6585 ** This function is used by the matchinfo() module to query a phrase |
| 6586 ** expression node for the following information: |
| 6587 ** |
| 6588 ** 1. The total number of occurrences of the phrase in each column of |
| 6589 ** the FTS table (considering all rows), and |
| 6590 ** |
| 6591 ** 2. For each column, the number of rows in the table for which the |
| 6592 ** column contains at least one instance of the phrase. |
| 6593 ** |
| 6594 ** If no error occurs, SQLITE_OK is returned and the values for each column |
| 6595 ** written into the array aiOut as follows: |
| 6596 ** |
| 6597 ** aiOut[iCol*3 + 1] = Number of occurrences |
| 6598 ** aiOut[iCol*3 + 2] = Number of rows containing at least one instance |
| 6599 ** |
| 6600 ** Caveats: |
| 6601 ** |
| 6602 ** * If a phrase consists entirely of deferred tokens, then all output |
| 6603 ** values are set to the number of documents in the table. In other |
| 6604 ** words we assume that very common tokens occur exactly once in each |
| 6605 ** column of each row of the table. |
| 6606 ** |
| 6607 ** * If a phrase contains some deferred tokens (and some non-deferred |
| 6608 ** tokens), count the potential occurrence identified by considering |
| 6609 ** the non-deferred tokens instead of actual phrase occurrences. |
| 6610 ** |
| 6611 ** * If the phrase is part of a NEAR expression, then only phrase instances |
| 6612 ** that meet the NEAR constraint are included in the counts. |
| 6613 */ |
| 6614 SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats( |
| 6615 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 6616 Fts3Expr *pExpr, /* Phrase expression */ |
| 6617 u32 *aiOut /* Array to write results into (see above) */ |
| 6618 ){ |
| 6619 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6620 int rc = SQLITE_OK; |
| 6621 int iCol; |
| 6622 |
| 6623 if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ |
| 6624 assert( pCsr->nDoc>0 ); |
| 6625 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 6626 aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; |
| 6627 aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; |
| 6628 } |
| 6629 }else{ |
| 6630 rc = fts3EvalGatherStats(pCsr, pExpr); |
| 6631 if( rc==SQLITE_OK ){ |
| 6632 assert( pExpr->aMI ); |
| 6633 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 6634 aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; |
| 6635 aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; |
| 6636 } |
| 6637 } |
| 6638 } |
| 6639 |
| 6640 return rc; |
| 6641 } |
| 6642 |
| 6643 /* |
| 6644 ** The expression pExpr passed as the second argument to this function |
| 6645 ** must be of type FTSQUERY_PHRASE. |
| 6646 ** |
| 6647 ** The returned value is either NULL or a pointer to a buffer containing |
| 6648 ** a position-list indicating the occurrences of the phrase in column iCol |
| 6649 ** of the current row. |
| 6650 ** |
| 6651 ** More specifically, the returned buffer contains 1 varint for each |
| 6652 ** occurrence of the phrase in the column, stored using the normal (delta+2) |
| 6653 ** compression and is terminated by either an 0x01 or 0x00 byte. For example, |
| 6654 ** if the requested column contains "a b X c d X X" and the position-list |
| 6655 ** for 'X' is requested, the buffer returned may contain: |
| 6656 ** |
| 6657 ** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 |
| 6658 ** |
| 6659 ** This function works regardless of whether or not the phrase is deferred, |
| 6660 ** incremental, or neither. |
| 6661 */ |
| 6662 SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( |
| 6663 Fts3Cursor *pCsr, /* FTS3 cursor object */ |
| 6664 Fts3Expr *pExpr, /* Phrase to return doclist for */ |
| 6665 int iCol, /* Column to return position list for */ |
| 6666 char **ppOut /* OUT: Pointer to position list */ |
| 6667 ){ |
| 6668 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6669 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6670 char *pIter; |
| 6671 int iThis; |
| 6672 sqlite3_int64 iDocid; |
| 6673 |
| 6674 /* If this phrase is applies specifically to some column other than |
| 6675 ** column iCol, return a NULL pointer. */ |
| 6676 *ppOut = 0; |
| 6677 assert( iCol>=0 && iCol<pTab->nColumn ); |
| 6678 if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){ |
| 6679 return SQLITE_OK; |
| 6680 } |
| 6681 |
| 6682 iDocid = pExpr->iDocid; |
| 6683 pIter = pPhrase->doclist.pList; |
| 6684 if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ |
| 6685 int rc = SQLITE_OK; |
| 6686 int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ |
| 6687 int bOr = 0; |
| 6688 u8 bTreeEof = 0; |
| 6689 Fts3Expr *p; /* Used to iterate from pExpr to root */ |
| 6690 Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ |
| 6691 int bMatch; |
| 6692 |
| 6693 /* Check if this phrase descends from an OR expression node. If not, |
| 6694 ** return NULL. Otherwise, the entry that corresponds to docid |
| 6695 ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the |
| 6696 ** tree that the node is part of has been marked as EOF, but the node |
| 6697 ** itself is not EOF, then it may point to an earlier entry. */ |
| 6698 pNear = pExpr; |
| 6699 for(p=pExpr->pParent; p; p=p->pParent){ |
| 6700 if( p->eType==FTSQUERY_OR ) bOr = 1; |
| 6701 if( p->eType==FTSQUERY_NEAR ) pNear = p; |
| 6702 if( p->bEof ) bTreeEof = 1; |
| 6703 } |
| 6704 if( bOr==0 ) return SQLITE_OK; |
| 6705 |
| 6706 /* This is the descendent of an OR node. In this case we cannot use |
| 6707 ** an incremental phrase. Load the entire doclist for the phrase |
| 6708 ** into memory in this case. */ |
| 6709 if( pPhrase->bIncr ){ |
| 6710 int bEofSave = pNear->bEof; |
| 6711 fts3EvalRestart(pCsr, pNear, &rc); |
| 6712 while( rc==SQLITE_OK && !pNear->bEof ){ |
| 6713 fts3EvalNextRow(pCsr, pNear, &rc); |
| 6714 if( bEofSave==0 && pNear->iDocid==iDocid ) break; |
| 6715 } |
| 6716 assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); |
| 6717 } |
| 6718 if( bTreeEof ){ |
| 6719 while( rc==SQLITE_OK && !pNear->bEof ){ |
| 6720 fts3EvalNextRow(pCsr, pNear, &rc); |
| 6721 } |
| 6722 } |
| 6723 if( rc!=SQLITE_OK ) return rc; |
| 6724 |
| 6725 bMatch = 1; |
| 6726 for(p=pNear; p; p=p->pLeft){ |
| 6727 u8 bEof = 0; |
| 6728 Fts3Expr *pTest = p; |
| 6729 Fts3Phrase *pPh; |
| 6730 assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE ); |
| 6731 if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight; |
| 6732 assert( pTest->eType==FTSQUERY_PHRASE ); |
| 6733 pPh = pTest->pPhrase; |
| 6734 |
| 6735 pIter = pPh->pOrPoslist; |
| 6736 iDocid = pPh->iOrDocid; |
| 6737 if( pCsr->bDesc==bDescDoclist ){ |
| 6738 bEof = !pPh->doclist.nAll || |
| 6739 (pIter >= (pPh->doclist.aAll + pPh->doclist.nAll)); |
| 6740 while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ |
| 6741 sqlite3Fts3DoclistNext( |
| 6742 bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, |
| 6743 &pIter, &iDocid, &bEof |
| 6744 ); |
| 6745 } |
| 6746 }else{ |
| 6747 bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll); |
| 6748 while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ |
| 6749 int dummy; |
| 6750 sqlite3Fts3DoclistPrev( |
| 6751 bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, |
| 6752 &pIter, &iDocid, &dummy, &bEof |
| 6753 ); |
| 6754 } |
| 6755 } |
| 6756 pPh->pOrPoslist = pIter; |
| 6757 pPh->iOrDocid = iDocid; |
| 6758 if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0; |
| 6759 } |
| 6760 |
| 6761 if( bMatch ){ |
| 6762 pIter = pPhrase->pOrPoslist; |
| 6763 }else{ |
| 6764 pIter = 0; |
| 6765 } |
| 6766 } |
| 6767 if( pIter==0 ) return SQLITE_OK; |
| 6768 |
| 6769 if( *pIter==0x01 ){ |
| 6770 pIter++; |
| 6771 pIter += fts3GetVarint32(pIter, &iThis); |
| 6772 }else{ |
| 6773 iThis = 0; |
| 6774 } |
| 6775 while( iThis<iCol ){ |
| 6776 fts3ColumnlistCopy(0, &pIter); |
| 6777 if( *pIter==0x00 ) return SQLITE_OK; |
| 6778 pIter++; |
| 6779 pIter += fts3GetVarint32(pIter, &iThis); |
| 6780 } |
| 6781 if( *pIter==0x00 ){ |
| 6782 pIter = 0; |
| 6783 } |
| 6784 |
| 6785 *ppOut = ((iCol==iThis)?pIter:0); |
| 6786 return SQLITE_OK; |
| 6787 } |
| 6788 |
| 6789 /* |
| 6790 ** Free all components of the Fts3Phrase structure that were allocated by |
| 6791 ** the eval module. Specifically, this means to free: |
| 6792 ** |
| 6793 ** * the contents of pPhrase->doclist, and |
| 6794 ** * any Fts3MultiSegReader objects held by phrase tokens. |
| 6795 */ |
| 6796 SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ |
| 6797 if( pPhrase ){ |
| 6798 int i; |
| 6799 sqlite3_free(pPhrase->doclist.aAll); |
| 6800 fts3EvalInvalidatePoslist(pPhrase); |
| 6801 memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); |
| 6802 for(i=0; i<pPhrase->nToken; i++){ |
| 6803 fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); |
| 6804 pPhrase->aToken[i].pSegcsr = 0; |
| 6805 } |
| 6806 } |
| 6807 } |
| 6808 |
| 6809 |
| 6810 /* |
| 6811 ** Return SQLITE_CORRUPT_VTAB. |
| 6812 */ |
| 6813 #ifdef SQLITE_DEBUG |
| 6814 SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ |
| 6815 return SQLITE_CORRUPT_VTAB; |
| 6816 } |
| 6817 #endif |
| 6818 |
| 6819 #if !SQLITE_CORE |
| 6820 /* |
| 6821 ** Initialize API pointer table, if required. |
| 6822 */ |
| 6823 #ifdef _WIN32 |
| 6824 __declspec(dllexport) |
| 6825 #endif |
| 6826 SQLITE_API int SQLITE_STDCALL sqlite3_fts3_init( |
| 6827 sqlite3 *db, |
| 6828 char **pzErrMsg, |
| 6829 const sqlite3_api_routines *pApi |
| 6830 ){ |
| 6831 SQLITE_EXTENSION_INIT2(pApi) |
| 6832 return sqlite3Fts3Init(db); |
| 6833 } |
| 6834 #endif |
| 6835 |
| 6836 #endif |
| 6837 |
| 6838 /************** End of fts3.c ************************************************/ |
| 6839 /************** Begin file fts3_aux.c ****************************************/ |
| 6840 /* |
| 6841 ** 2011 Jan 27 |
| 6842 ** |
| 6843 ** The author disclaims copyright to this source code. In place of |
| 6844 ** a legal notice, here is a blessing: |
| 6845 ** |
| 6846 ** May you do good and not evil. |
| 6847 ** May you find forgiveness for yourself and forgive others. |
| 6848 ** May you share freely, never taking more than you give. |
| 6849 ** |
| 6850 ****************************************************************************** |
| 6851 ** |
| 6852 */ |
| 6853 /* #include "fts3Int.h" */ |
| 6854 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 6855 |
| 6856 /* #include <string.h> */ |
| 6857 /* #include <assert.h> */ |
| 6858 |
| 6859 typedef struct Fts3auxTable Fts3auxTable; |
| 6860 typedef struct Fts3auxCursor Fts3auxCursor; |
| 6861 |
| 6862 struct Fts3auxTable { |
| 6863 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 6864 Fts3Table *pFts3Tab; |
| 6865 }; |
| 6866 |
| 6867 struct Fts3auxCursor { |
| 6868 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 6869 Fts3MultiSegReader csr; /* Must be right after "base" */ |
| 6870 Fts3SegFilter filter; |
| 6871 char *zStop; |
| 6872 int nStop; /* Byte-length of string zStop */ |
| 6873 int iLangid; /* Language id to query */ |
| 6874 int isEof; /* True if cursor is at EOF */ |
| 6875 sqlite3_int64 iRowid; /* Current rowid */ |
| 6876 |
| 6877 int iCol; /* Current value of 'col' column */ |
| 6878 int nStat; /* Size of aStat[] array */ |
| 6879 struct Fts3auxColstats { |
| 6880 sqlite3_int64 nDoc; /* 'documents' values for current csr row */ |
| 6881 sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */ |
| 6882 } *aStat; |
| 6883 }; |
| 6884 |
| 6885 /* |
| 6886 ** Schema of the terms table. |
| 6887 */ |
| 6888 #define FTS3_AUX_SCHEMA \ |
| 6889 "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" |
| 6890 |
| 6891 /* |
| 6892 ** This function does all the work for both the xConnect and xCreate methods. |
| 6893 ** These tables have no persistent representation of their own, so xConnect |
| 6894 ** and xCreate are identical operations. |
| 6895 */ |
| 6896 static int fts3auxConnectMethod( |
| 6897 sqlite3 *db, /* Database connection */ |
| 6898 void *pUnused, /* Unused */ |
| 6899 int argc, /* Number of elements in argv array */ |
| 6900 const char * const *argv, /* xCreate/xConnect argument array */ |
| 6901 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 6902 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 6903 ){ |
| 6904 char const *zDb; /* Name of database (e.g. "main") */ |
| 6905 char const *zFts3; /* Name of fts3 table */ |
| 6906 int nDb; /* Result of strlen(zDb) */ |
| 6907 int nFts3; /* Result of strlen(zFts3) */ |
| 6908 int nByte; /* Bytes of space to allocate here */ |
| 6909 int rc; /* value returned by declare_vtab() */ |
| 6910 Fts3auxTable *p; /* Virtual table object to return */ |
| 6911 |
| 6912 UNUSED_PARAMETER(pUnused); |
| 6913 |
| 6914 /* The user should invoke this in one of two forms: |
| 6915 ** |
| 6916 ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); |
| 6917 ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); |
| 6918 */ |
| 6919 if( argc!=4 && argc!=5 ) goto bad_args; |
| 6920 |
| 6921 zDb = argv[1]; |
| 6922 nDb = (int)strlen(zDb); |
| 6923 if( argc==5 ){ |
| 6924 if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ |
| 6925 zDb = argv[3]; |
| 6926 nDb = (int)strlen(zDb); |
| 6927 zFts3 = argv[4]; |
| 6928 }else{ |
| 6929 goto bad_args; |
| 6930 } |
| 6931 }else{ |
| 6932 zFts3 = argv[3]; |
| 6933 } |
| 6934 nFts3 = (int)strlen(zFts3); |
| 6935 |
| 6936 rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); |
| 6937 if( rc!=SQLITE_OK ) return rc; |
| 6938 |
| 6939 nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; |
| 6940 p = (Fts3auxTable *)sqlite3_malloc(nByte); |
| 6941 if( !p ) return SQLITE_NOMEM; |
| 6942 memset(p, 0, nByte); |
| 6943 |
| 6944 p->pFts3Tab = (Fts3Table *)&p[1]; |
| 6945 p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; |
| 6946 p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; |
| 6947 p->pFts3Tab->db = db; |
| 6948 p->pFts3Tab->nIndex = 1; |
| 6949 |
| 6950 memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); |
| 6951 memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); |
| 6952 sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); |
| 6953 |
| 6954 *ppVtab = (sqlite3_vtab *)p; |
| 6955 return SQLITE_OK; |
| 6956 |
| 6957 bad_args: |
| 6958 sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor"); |
| 6959 return SQLITE_ERROR; |
| 6960 } |
| 6961 |
| 6962 /* |
| 6963 ** This function does the work for both the xDisconnect and xDestroy methods. |
| 6964 ** These tables have no persistent representation of their own, so xDisconnect |
| 6965 ** and xDestroy are identical operations. |
| 6966 */ |
| 6967 static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){ |
| 6968 Fts3auxTable *p = (Fts3auxTable *)pVtab; |
| 6969 Fts3Table *pFts3 = p->pFts3Tab; |
| 6970 int i; |
| 6971 |
| 6972 /* Free any prepared statements held */ |
| 6973 for(i=0; i<SizeofArray(pFts3->aStmt); i++){ |
| 6974 sqlite3_finalize(pFts3->aStmt[i]); |
| 6975 } |
| 6976 sqlite3_free(pFts3->zSegmentsTbl); |
| 6977 sqlite3_free(p); |
| 6978 return SQLITE_OK; |
| 6979 } |
| 6980 |
| 6981 #define FTS4AUX_EQ_CONSTRAINT 1 |
| 6982 #define FTS4AUX_GE_CONSTRAINT 2 |
| 6983 #define FTS4AUX_LE_CONSTRAINT 4 |
| 6984 |
| 6985 /* |
| 6986 ** xBestIndex - Analyze a WHERE and ORDER BY clause. |
| 6987 */ |
| 6988 static int fts3auxBestIndexMethod( |
| 6989 sqlite3_vtab *pVTab, |
| 6990 sqlite3_index_info *pInfo |
| 6991 ){ |
| 6992 int i; |
| 6993 int iEq = -1; |
| 6994 int iGe = -1; |
| 6995 int iLe = -1; |
| 6996 int iLangid = -1; |
| 6997 int iNext = 1; /* Next free argvIndex value */ |
| 6998 |
| 6999 UNUSED_PARAMETER(pVTab); |
| 7000 |
| 7001 /* This vtab delivers always results in "ORDER BY term ASC" order. */ |
| 7002 if( pInfo->nOrderBy==1 |
| 7003 && pInfo->aOrderBy[0].iColumn==0 |
| 7004 && pInfo->aOrderBy[0].desc==0 |
| 7005 ){ |
| 7006 pInfo->orderByConsumed = 1; |
| 7007 } |
| 7008 |
| 7009 /* Search for equality and range constraints on the "term" column. |
| 7010 ** And equality constraints on the hidden "languageid" column. */ |
| 7011 for(i=0; i<pInfo->nConstraint; i++){ |
| 7012 if( pInfo->aConstraint[i].usable ){ |
| 7013 int op = pInfo->aConstraint[i].op; |
| 7014 int iCol = pInfo->aConstraint[i].iColumn; |
| 7015 |
| 7016 if( iCol==0 ){ |
| 7017 if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; |
| 7018 if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; |
| 7019 if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; |
| 7020 if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; |
| 7021 if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; |
| 7022 } |
| 7023 if( iCol==4 ){ |
| 7024 if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; |
| 7025 } |
| 7026 } |
| 7027 } |
| 7028 |
| 7029 if( iEq>=0 ){ |
| 7030 pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; |
| 7031 pInfo->aConstraintUsage[iEq].argvIndex = iNext++; |
| 7032 pInfo->estimatedCost = 5; |
| 7033 }else{ |
| 7034 pInfo->idxNum = 0; |
| 7035 pInfo->estimatedCost = 20000; |
| 7036 if( iGe>=0 ){ |
| 7037 pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; |
| 7038 pInfo->aConstraintUsage[iGe].argvIndex = iNext++; |
| 7039 pInfo->estimatedCost /= 2; |
| 7040 } |
| 7041 if( iLe>=0 ){ |
| 7042 pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; |
| 7043 pInfo->aConstraintUsage[iLe].argvIndex = iNext++; |
| 7044 pInfo->estimatedCost /= 2; |
| 7045 } |
| 7046 } |
| 7047 if( iLangid>=0 ){ |
| 7048 pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; |
| 7049 pInfo->estimatedCost--; |
| 7050 } |
| 7051 |
| 7052 return SQLITE_OK; |
| 7053 } |
| 7054 |
| 7055 /* |
| 7056 ** xOpen - Open a cursor. |
| 7057 */ |
| 7058 static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 7059 Fts3auxCursor *pCsr; /* Pointer to cursor object to return */ |
| 7060 |
| 7061 UNUSED_PARAMETER(pVTab); |
| 7062 |
| 7063 pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor)); |
| 7064 if( !pCsr ) return SQLITE_NOMEM; |
| 7065 memset(pCsr, 0, sizeof(Fts3auxCursor)); |
| 7066 |
| 7067 *ppCsr = (sqlite3_vtab_cursor *)pCsr; |
| 7068 return SQLITE_OK; |
| 7069 } |
| 7070 |
| 7071 /* |
| 7072 ** xClose - Close a cursor. |
| 7073 */ |
| 7074 static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 7075 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7076 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7077 |
| 7078 sqlite3Fts3SegmentsClose(pFts3); |
| 7079 sqlite3Fts3SegReaderFinish(&pCsr->csr); |
| 7080 sqlite3_free((void *)pCsr->filter.zTerm); |
| 7081 sqlite3_free(pCsr->zStop); |
| 7082 sqlite3_free(pCsr->aStat); |
| 7083 sqlite3_free(pCsr); |
| 7084 return SQLITE_OK; |
| 7085 } |
| 7086 |
| 7087 static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ |
| 7088 if( nSize>pCsr->nStat ){ |
| 7089 struct Fts3auxColstats *aNew; |
| 7090 aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, |
| 7091 sizeof(struct Fts3auxColstats) * nSize |
| 7092 ); |
| 7093 if( aNew==0 ) return SQLITE_NOMEM; |
| 7094 memset(&aNew[pCsr->nStat], 0, |
| 7095 sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) |
| 7096 ); |
| 7097 pCsr->aStat = aNew; |
| 7098 pCsr->nStat = nSize; |
| 7099 } |
| 7100 return SQLITE_OK; |
| 7101 } |
| 7102 |
| 7103 /* |
| 7104 ** xNext - Advance the cursor to the next row, if any. |
| 7105 */ |
| 7106 static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){ |
| 7107 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7108 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7109 int rc; |
| 7110 |
| 7111 /* Increment our pretend rowid value. */ |
| 7112 pCsr->iRowid++; |
| 7113 |
| 7114 for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){ |
| 7115 if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK; |
| 7116 } |
| 7117 |
| 7118 rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr); |
| 7119 if( rc==SQLITE_ROW ){ |
| 7120 int i = 0; |
| 7121 int nDoclist = pCsr->csr.nDoclist; |
| 7122 char *aDoclist = pCsr->csr.aDoclist; |
| 7123 int iCol; |
| 7124 |
| 7125 int eState = 0; |
| 7126 |
| 7127 if( pCsr->zStop ){ |
| 7128 int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm; |
| 7129 int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n); |
| 7130 if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){ |
| 7131 pCsr->isEof = 1; |
| 7132 return SQLITE_OK; |
| 7133 } |
| 7134 } |
| 7135 |
| 7136 if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM; |
| 7137 memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat); |
| 7138 iCol = 0; |
| 7139 |
| 7140 while( i<nDoclist ){ |
| 7141 sqlite3_int64 v = 0; |
| 7142 |
| 7143 i += sqlite3Fts3GetVarint(&aDoclist[i], &v); |
| 7144 switch( eState ){ |
| 7145 /* State 0. In this state the integer just read was a docid. */ |
| 7146 case 0: |
| 7147 pCsr->aStat[0].nDoc++; |
| 7148 eState = 1; |
| 7149 iCol = 0; |
| 7150 break; |
| 7151 |
| 7152 /* State 1. In this state we are expecting either a 1, indicating |
| 7153 ** that the following integer will be a column number, or the |
| 7154 ** start of a position list for column 0. |
| 7155 ** |
| 7156 ** The only difference between state 1 and state 2 is that if the |
| 7157 ** integer encountered in state 1 is not 0 or 1, then we need to |
| 7158 ** increment the column 0 "nDoc" count for this term. |
| 7159 */ |
| 7160 case 1: |
| 7161 assert( iCol==0 ); |
| 7162 if( v>1 ){ |
| 7163 pCsr->aStat[1].nDoc++; |
| 7164 } |
| 7165 eState = 2; |
| 7166 /* fall through */ |
| 7167 |
| 7168 case 2: |
| 7169 if( v==0 ){ /* 0x00. Next integer will be a docid. */ |
| 7170 eState = 0; |
| 7171 }else if( v==1 ){ /* 0x01. Next integer will be a column number. */ |
| 7172 eState = 3; |
| 7173 }else{ /* 2 or greater. A position. */ |
| 7174 pCsr->aStat[iCol+1].nOcc++; |
| 7175 pCsr->aStat[0].nOcc++; |
| 7176 } |
| 7177 break; |
| 7178 |
| 7179 /* State 3. The integer just read is a column number. */ |
| 7180 default: assert( eState==3 ); |
| 7181 iCol = (int)v; |
| 7182 if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM; |
| 7183 pCsr->aStat[iCol+1].nDoc++; |
| 7184 eState = 2; |
| 7185 break; |
| 7186 } |
| 7187 } |
| 7188 |
| 7189 pCsr->iCol = 0; |
| 7190 rc = SQLITE_OK; |
| 7191 }else{ |
| 7192 pCsr->isEof = 1; |
| 7193 } |
| 7194 return rc; |
| 7195 } |
| 7196 |
| 7197 /* |
| 7198 ** xFilter - Initialize a cursor to point at the start of its data. |
| 7199 */ |
| 7200 static int fts3auxFilterMethod( |
| 7201 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 7202 int idxNum, /* Strategy index */ |
| 7203 const char *idxStr, /* Unused */ |
| 7204 int nVal, /* Number of elements in apVal */ |
| 7205 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 7206 ){ |
| 7207 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7208 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7209 int rc; |
| 7210 int isScan = 0; |
| 7211 int iLangVal = 0; /* Language id to query */ |
| 7212 |
| 7213 int iEq = -1; /* Index of term=? value in apVal */ |
| 7214 int iGe = -1; /* Index of term>=? value in apVal */ |
| 7215 int iLe = -1; /* Index of term<=? value in apVal */ |
| 7216 int iLangid = -1; /* Index of languageid=? value in apVal */ |
| 7217 int iNext = 0; |
| 7218 |
| 7219 UNUSED_PARAMETER(nVal); |
| 7220 UNUSED_PARAMETER(idxStr); |
| 7221 |
| 7222 assert( idxStr==0 ); |
| 7223 assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0 |
| 7224 || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT |
| 7225 || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) |
| 7226 ); |
| 7227 |
| 7228 if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ |
| 7229 iEq = iNext++; |
| 7230 }else{ |
| 7231 isScan = 1; |
| 7232 if( idxNum & FTS4AUX_GE_CONSTRAINT ){ |
| 7233 iGe = iNext++; |
| 7234 } |
| 7235 if( idxNum & FTS4AUX_LE_CONSTRAINT ){ |
| 7236 iLe = iNext++; |
| 7237 } |
| 7238 } |
| 7239 if( iNext<nVal ){ |
| 7240 iLangid = iNext++; |
| 7241 } |
| 7242 |
| 7243 /* In case this cursor is being reused, close and zero it. */ |
| 7244 testcase(pCsr->filter.zTerm); |
| 7245 sqlite3Fts3SegReaderFinish(&pCsr->csr); |
| 7246 sqlite3_free((void *)pCsr->filter.zTerm); |
| 7247 sqlite3_free(pCsr->aStat); |
| 7248 memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); |
| 7249 |
| 7250 pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; |
| 7251 if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; |
| 7252 |
| 7253 if( iEq>=0 || iGe>=0 ){ |
| 7254 const unsigned char *zStr = sqlite3_value_text(apVal[0]); |
| 7255 assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); |
| 7256 if( zStr ){ |
| 7257 pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); |
| 7258 pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); |
| 7259 if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; |
| 7260 } |
| 7261 } |
| 7262 |
| 7263 if( iLe>=0 ){ |
| 7264 pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); |
| 7265 pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); |
| 7266 if( pCsr->zStop==0 ) return SQLITE_NOMEM; |
| 7267 } |
| 7268 |
| 7269 if( iLangid>=0 ){ |
| 7270 iLangVal = sqlite3_value_int(apVal[iLangid]); |
| 7271 |
| 7272 /* If the user specified a negative value for the languageid, use zero |
| 7273 ** instead. This works, as the "languageid=?" constraint will also |
| 7274 ** be tested by the VDBE layer. The test will always be false (since |
| 7275 ** this module will not return a row with a negative languageid), and |
| 7276 ** so the overall query will return zero rows. */ |
| 7277 if( iLangVal<0 ) iLangVal = 0; |
| 7278 } |
| 7279 pCsr->iLangid = iLangVal; |
| 7280 |
| 7281 rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, |
| 7282 pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr |
| 7283 ); |
| 7284 if( rc==SQLITE_OK ){ |
| 7285 rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); |
| 7286 } |
| 7287 |
| 7288 if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); |
| 7289 return rc; |
| 7290 } |
| 7291 |
| 7292 /* |
| 7293 ** xEof - Return true if the cursor is at EOF, or false otherwise. |
| 7294 */ |
| 7295 static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ |
| 7296 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7297 return pCsr->isEof; |
| 7298 } |
| 7299 |
| 7300 /* |
| 7301 ** xColumn - Return a column value. |
| 7302 */ |
| 7303 static int fts3auxColumnMethod( |
| 7304 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 7305 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 7306 int iCol /* Index of column to read value from */ |
| 7307 ){ |
| 7308 Fts3auxCursor *p = (Fts3auxCursor *)pCursor; |
| 7309 |
| 7310 assert( p->isEof==0 ); |
| 7311 switch( iCol ){ |
| 7312 case 0: /* term */ |
| 7313 sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); |
| 7314 break; |
| 7315 |
| 7316 case 1: /* col */ |
| 7317 if( p->iCol ){ |
| 7318 sqlite3_result_int(pCtx, p->iCol-1); |
| 7319 }else{ |
| 7320 sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); |
| 7321 } |
| 7322 break; |
| 7323 |
| 7324 case 2: /* documents */ |
| 7325 sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); |
| 7326 break; |
| 7327 |
| 7328 case 3: /* occurrences */ |
| 7329 sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); |
| 7330 break; |
| 7331 |
| 7332 default: /* languageid */ |
| 7333 assert( iCol==4 ); |
| 7334 sqlite3_result_int(pCtx, p->iLangid); |
| 7335 break; |
| 7336 } |
| 7337 |
| 7338 return SQLITE_OK; |
| 7339 } |
| 7340 |
| 7341 /* |
| 7342 ** xRowid - Return the current rowid for the cursor. |
| 7343 */ |
| 7344 static int fts3auxRowidMethod( |
| 7345 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 7346 sqlite_int64 *pRowid /* OUT: Rowid value */ |
| 7347 ){ |
| 7348 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7349 *pRowid = pCsr->iRowid; |
| 7350 return SQLITE_OK; |
| 7351 } |
| 7352 |
| 7353 /* |
| 7354 ** Register the fts3aux module with database connection db. Return SQLITE_OK |
| 7355 ** if successful or an error code if sqlite3_create_module() fails. |
| 7356 */ |
| 7357 SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ |
| 7358 static const sqlite3_module fts3aux_module = { |
| 7359 0, /* iVersion */ |
| 7360 fts3auxConnectMethod, /* xCreate */ |
| 7361 fts3auxConnectMethod, /* xConnect */ |
| 7362 fts3auxBestIndexMethod, /* xBestIndex */ |
| 7363 fts3auxDisconnectMethod, /* xDisconnect */ |
| 7364 fts3auxDisconnectMethod, /* xDestroy */ |
| 7365 fts3auxOpenMethod, /* xOpen */ |
| 7366 fts3auxCloseMethod, /* xClose */ |
| 7367 fts3auxFilterMethod, /* xFilter */ |
| 7368 fts3auxNextMethod, /* xNext */ |
| 7369 fts3auxEofMethod, /* xEof */ |
| 7370 fts3auxColumnMethod, /* xColumn */ |
| 7371 fts3auxRowidMethod, /* xRowid */ |
| 7372 0, /* xUpdate */ |
| 7373 0, /* xBegin */ |
| 7374 0, /* xSync */ |
| 7375 0, /* xCommit */ |
| 7376 0, /* xRollback */ |
| 7377 0, /* xFindFunction */ |
| 7378 0, /* xRename */ |
| 7379 0, /* xSavepoint */ |
| 7380 0, /* xRelease */ |
| 7381 0 /* xRollbackTo */ |
| 7382 }; |
| 7383 int rc; /* Return code */ |
| 7384 |
| 7385 rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); |
| 7386 return rc; |
| 7387 } |
| 7388 |
| 7389 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 7390 |
| 7391 /************** End of fts3_aux.c ********************************************/ |
| 7392 /************** Begin file fts3_expr.c ***************************************/ |
| 7393 /* |
| 7394 ** 2008 Nov 28 |
| 7395 ** |
| 7396 ** The author disclaims copyright to this source code. In place of |
| 7397 ** a legal notice, here is a blessing: |
| 7398 ** |
| 7399 ** May you do good and not evil. |
| 7400 ** May you find forgiveness for yourself and forgive others. |
| 7401 ** May you share freely, never taking more than you give. |
| 7402 ** |
| 7403 ****************************************************************************** |
| 7404 ** |
| 7405 ** This module contains code that implements a parser for fts3 query strings |
| 7406 ** (the right-hand argument to the MATCH operator). Because the supported |
| 7407 ** syntax is relatively simple, the whole tokenizer/parser system is |
| 7408 ** hand-coded. |
| 7409 */ |
| 7410 /* #include "fts3Int.h" */ |
| 7411 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 7412 |
| 7413 /* |
| 7414 ** By default, this module parses the legacy syntax that has been |
| 7415 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7416 ** is defined, then it uses the new syntax. The differences between |
| 7417 ** the new and the old syntaxes are: |
| 7418 ** |
| 7419 ** a) The new syntax supports parenthesis. The old does not. |
| 7420 ** |
| 7421 ** b) The new syntax supports the AND and NOT operators. The old does not. |
| 7422 ** |
| 7423 ** c) The old syntax supports the "-" token qualifier. This is not |
| 7424 ** supported by the new syntax (it is replaced by the NOT operator). |
| 7425 ** |
| 7426 ** d) When using the old syntax, the OR operator has a greater precedence |
| 7427 ** than an implicit AND. When using the new, both implicity and explicit |
| 7428 ** AND operators have a higher precedence than OR. |
| 7429 ** |
| 7430 ** If compiled with SQLITE_TEST defined, then this module exports the |
| 7431 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable |
| 7432 ** to zero causes the module to use the old syntax. If it is set to |
| 7433 ** non-zero the new syntax is activated. This is so both syntaxes can |
| 7434 ** be tested using a single build of testfixture. |
| 7435 ** |
| 7436 ** The following describes the syntax supported by the fts3 MATCH |
| 7437 ** operator in a similar format to that used by the lemon parser |
| 7438 ** generator. This module does not use actually lemon, it uses a |
| 7439 ** custom parser. |
| 7440 ** |
| 7441 ** query ::= andexpr (OR andexpr)*. |
| 7442 ** |
| 7443 ** andexpr ::= notexpr (AND? notexpr)*. |
| 7444 ** |
| 7445 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. |
| 7446 ** notexpr ::= LP query RP. |
| 7447 ** |
| 7448 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. |
| 7449 ** |
| 7450 ** distance_opt ::= . |
| 7451 ** distance_opt ::= / INTEGER. |
| 7452 ** |
| 7453 ** phrase ::= TOKEN. |
| 7454 ** phrase ::= COLUMN:TOKEN. |
| 7455 ** phrase ::= "TOKEN TOKEN TOKEN...". |
| 7456 */ |
| 7457 |
| 7458 #ifdef SQLITE_TEST |
| 7459 SQLITE_API int sqlite3_fts3_enable_parentheses = 0; |
| 7460 #else |
| 7461 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7462 # define sqlite3_fts3_enable_parentheses 1 |
| 7463 # else |
| 7464 # define sqlite3_fts3_enable_parentheses 0 |
| 7465 # endif |
| 7466 #endif |
| 7467 |
| 7468 /* |
| 7469 ** Default span for NEAR operators. |
| 7470 */ |
| 7471 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
| 7472 |
| 7473 /* #include <string.h> */ |
| 7474 /* #include <assert.h> */ |
| 7475 |
| 7476 /* |
| 7477 ** isNot: |
| 7478 ** This variable is used by function getNextNode(). When getNextNode() is |
| 7479 ** called, it sets ParseContext.isNot to true if the 'next node' is a |
| 7480 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the |
| 7481 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to |
| 7482 ** zero. |
| 7483 */ |
| 7484 typedef struct ParseContext ParseContext; |
| 7485 struct ParseContext { |
| 7486 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
| 7487 int iLangid; /* Language id used with tokenizer */ |
| 7488 const char **azCol; /* Array of column names for fts3 table */ |
| 7489 int bFts4; /* True to allow FTS4-only syntax */ |
| 7490 int nCol; /* Number of entries in azCol[] */ |
| 7491 int iDefaultCol; /* Default column to query */ |
| 7492 int isNot; /* True if getNextNode() sees a unary - */ |
| 7493 sqlite3_context *pCtx; /* Write error message here */ |
| 7494 int nNest; /* Number of nested brackets */ |
| 7495 }; |
| 7496 |
| 7497 /* |
| 7498 ** This function is equivalent to the standard isspace() function. |
| 7499 ** |
| 7500 ** The standard isspace() can be awkward to use safely, because although it |
| 7501 ** is defined to accept an argument of type int, its behavior when passed |
| 7502 ** an integer that falls outside of the range of the unsigned char type |
| 7503 ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
| 7504 ** is defined to accept an argument of type char, and always returns 0 for |
| 7505 ** any values that fall outside of the range of the unsigned char type (i.e. |
| 7506 ** negative values). |
| 7507 */ |
| 7508 static int fts3isspace(char c){ |
| 7509 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
| 7510 } |
| 7511 |
| 7512 /* |
| 7513 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
| 7514 ** zero the memory before returning a pointer to it. If unsuccessful, |
| 7515 ** return NULL. |
| 7516 */ |
| 7517 static void *fts3MallocZero(int nByte){ |
| 7518 void *pRet = sqlite3_malloc(nByte); |
| 7519 if( pRet ) memset(pRet, 0, nByte); |
| 7520 return pRet; |
| 7521 } |
| 7522 |
| 7523 SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( |
| 7524 sqlite3_tokenizer *pTokenizer, |
| 7525 int iLangid, |
| 7526 const char *z, |
| 7527 int n, |
| 7528 sqlite3_tokenizer_cursor **ppCsr |
| 7529 ){ |
| 7530 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7531 sqlite3_tokenizer_cursor *pCsr = 0; |
| 7532 int rc; |
| 7533 |
| 7534 rc = pModule->xOpen(pTokenizer, z, n, &pCsr); |
| 7535 assert( rc==SQLITE_OK || pCsr==0 ); |
| 7536 if( rc==SQLITE_OK ){ |
| 7537 pCsr->pTokenizer = pTokenizer; |
| 7538 if( pModule->iVersion>=1 ){ |
| 7539 rc = pModule->xLanguageid(pCsr, iLangid); |
| 7540 if( rc!=SQLITE_OK ){ |
| 7541 pModule->xClose(pCsr); |
| 7542 pCsr = 0; |
| 7543 } |
| 7544 } |
| 7545 } |
| 7546 *ppCsr = pCsr; |
| 7547 return rc; |
| 7548 } |
| 7549 |
| 7550 /* |
| 7551 ** Function getNextNode(), which is called by fts3ExprParse(), may itself |
| 7552 ** call fts3ExprParse(). So this forward declaration is required. |
| 7553 */ |
| 7554 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
| 7555 |
| 7556 /* |
| 7557 ** Extract the next token from buffer z (length n) using the tokenizer |
| 7558 ** and other information (column names etc.) in pParse. Create an Fts3Expr |
| 7559 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
| 7560 ** single token and set *ppExpr to point to it. If the end of the buffer is |
| 7561 ** reached before a token is found, set *ppExpr to zero. It is the |
| 7562 ** responsibility of the caller to eventually deallocate the allocated |
| 7563 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
| 7564 ** |
| 7565 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
| 7566 ** fails. |
| 7567 */ |
| 7568 static int getNextToken( |
| 7569 ParseContext *pParse, /* fts3 query parse context */ |
| 7570 int iCol, /* Value for Fts3Phrase.iColumn */ |
| 7571 const char *z, int n, /* Input string */ |
| 7572 Fts3Expr **ppExpr, /* OUT: expression */ |
| 7573 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 7574 ){ |
| 7575 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 7576 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7577 int rc; |
| 7578 sqlite3_tokenizer_cursor *pCursor; |
| 7579 Fts3Expr *pRet = 0; |
| 7580 int i = 0; |
| 7581 |
| 7582 /* Set variable i to the maximum number of bytes of input to tokenize. */ |
| 7583 for(i=0; i<n; i++){ |
| 7584 if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; |
| 7585 if( z[i]=='"' ) break; |
| 7586 } |
| 7587 |
| 7588 *pnConsumed = i; |
| 7589 rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); |
| 7590 if( rc==SQLITE_OK ){ |
| 7591 const char *zToken; |
| 7592 int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; |
| 7593 int nByte; /* total space to allocate */ |
| 7594 |
| 7595 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
| 7596 if( rc==SQLITE_OK ){ |
| 7597 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
| 7598 pRet = (Fts3Expr *)fts3MallocZero(nByte); |
| 7599 if( !pRet ){ |
| 7600 rc = SQLITE_NOMEM; |
| 7601 }else{ |
| 7602 pRet->eType = FTSQUERY_PHRASE; |
| 7603 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
| 7604 pRet->pPhrase->nToken = 1; |
| 7605 pRet->pPhrase->iColumn = iCol; |
| 7606 pRet->pPhrase->aToken[0].n = nToken; |
| 7607 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
| 7608 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
| 7609 |
| 7610 if( iEnd<n && z[iEnd]=='*' ){ |
| 7611 pRet->pPhrase->aToken[0].isPrefix = 1; |
| 7612 iEnd++; |
| 7613 } |
| 7614 |
| 7615 while( 1 ){ |
| 7616 if( !sqlite3_fts3_enable_parentheses |
| 7617 && iStart>0 && z[iStart-1]=='-' |
| 7618 ){ |
| 7619 pParse->isNot = 1; |
| 7620 iStart--; |
| 7621 }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ |
| 7622 pRet->pPhrase->aToken[0].bFirst = 1; |
| 7623 iStart--; |
| 7624 }else{ |
| 7625 break; |
| 7626 } |
| 7627 } |
| 7628 |
| 7629 } |
| 7630 *pnConsumed = iEnd; |
| 7631 }else if( i && rc==SQLITE_DONE ){ |
| 7632 rc = SQLITE_OK; |
| 7633 } |
| 7634 |
| 7635 pModule->xClose(pCursor); |
| 7636 } |
| 7637 |
| 7638 *ppExpr = pRet; |
| 7639 return rc; |
| 7640 } |
| 7641 |
| 7642 |
| 7643 /* |
| 7644 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
| 7645 ** then free the old allocation. |
| 7646 */ |
| 7647 static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
| 7648 void *pRet = sqlite3_realloc(pOrig, nNew); |
| 7649 if( !pRet ){ |
| 7650 sqlite3_free(pOrig); |
| 7651 } |
| 7652 return pRet; |
| 7653 } |
| 7654 |
| 7655 /* |
| 7656 ** Buffer zInput, length nInput, contains the contents of a quoted string |
| 7657 ** that appeared as part of an fts3 query expression. Neither quote character |
| 7658 ** is included in the buffer. This function attempts to tokenize the entire |
| 7659 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE |
| 7660 ** containing the results. |
| 7661 ** |
| 7662 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the |
| 7663 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory |
| 7664 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set |
| 7665 ** to 0. |
| 7666 */ |
| 7667 static int getNextString( |
| 7668 ParseContext *pParse, /* fts3 query parse context */ |
| 7669 const char *zInput, int nInput, /* Input string */ |
| 7670 Fts3Expr **ppExpr /* OUT: expression */ |
| 7671 ){ |
| 7672 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 7673 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7674 int rc; |
| 7675 Fts3Expr *p = 0; |
| 7676 sqlite3_tokenizer_cursor *pCursor = 0; |
| 7677 char *zTemp = 0; |
| 7678 int nTemp = 0; |
| 7679 |
| 7680 const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| 7681 int nToken = 0; |
| 7682 |
| 7683 /* The final Fts3Expr data structure, including the Fts3Phrase, |
| 7684 ** Fts3PhraseToken structures token buffers are all stored as a single |
| 7685 ** allocation so that the expression can be freed with a single call to |
| 7686 ** sqlite3_free(). Setting this up requires a two pass approach. |
| 7687 ** |
| 7688 ** The first pass, in the block below, uses a tokenizer cursor to iterate |
| 7689 ** through the tokens in the expression. This pass uses fts3ReallocOrFree() |
| 7690 ** to assemble data in two dynamic buffers: |
| 7691 ** |
| 7692 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase |
| 7693 ** structure, followed by the array of Fts3PhraseToken |
| 7694 ** structures. This pass only populates the Fts3PhraseToken array. |
| 7695 ** |
| 7696 ** Buffer zTemp: Contains copies of all tokens. |
| 7697 ** |
| 7698 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, |
| 7699 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase |
| 7700 ** structures. |
| 7701 */ |
| 7702 rc = sqlite3Fts3OpenTokenizer( |
| 7703 pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); |
| 7704 if( rc==SQLITE_OK ){ |
| 7705 int ii; |
| 7706 for(ii=0; rc==SQLITE_OK; ii++){ |
| 7707 const char *zByte; |
| 7708 int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; |
| 7709 rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); |
| 7710 if( rc==SQLITE_OK ){ |
| 7711 Fts3PhraseToken *pToken; |
| 7712 |
| 7713 p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); |
| 7714 if( !p ) goto no_mem; |
| 7715 |
| 7716 zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); |
| 7717 if( !zTemp ) goto no_mem; |
| 7718 |
| 7719 assert( nToken==ii ); |
| 7720 pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; |
| 7721 memset(pToken, 0, sizeof(Fts3PhraseToken)); |
| 7722 |
| 7723 memcpy(&zTemp[nTemp], zByte, nByte); |
| 7724 nTemp += nByte; |
| 7725 |
| 7726 pToken->n = nByte; |
| 7727 pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); |
| 7728 pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); |
| 7729 nToken = ii+1; |
| 7730 } |
| 7731 } |
| 7732 |
| 7733 pModule->xClose(pCursor); |
| 7734 pCursor = 0; |
| 7735 } |
| 7736 |
| 7737 if( rc==SQLITE_DONE ){ |
| 7738 int jj; |
| 7739 char *zBuf = 0; |
| 7740 |
| 7741 p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); |
| 7742 if( !p ) goto no_mem; |
| 7743 memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); |
| 7744 p->eType = FTSQUERY_PHRASE; |
| 7745 p->pPhrase = (Fts3Phrase *)&p[1]; |
| 7746 p->pPhrase->iColumn = pParse->iDefaultCol; |
| 7747 p->pPhrase->nToken = nToken; |
| 7748 |
| 7749 zBuf = (char *)&p->pPhrase->aToken[nToken]; |
| 7750 if( zTemp ){ |
| 7751 memcpy(zBuf, zTemp, nTemp); |
| 7752 sqlite3_free(zTemp); |
| 7753 }else{ |
| 7754 assert( nTemp==0 ); |
| 7755 } |
| 7756 |
| 7757 for(jj=0; jj<p->pPhrase->nToken; jj++){ |
| 7758 p->pPhrase->aToken[jj].z = zBuf; |
| 7759 zBuf += p->pPhrase->aToken[jj].n; |
| 7760 } |
| 7761 rc = SQLITE_OK; |
| 7762 } |
| 7763 |
| 7764 *ppExpr = p; |
| 7765 return rc; |
| 7766 no_mem: |
| 7767 |
| 7768 if( pCursor ){ |
| 7769 pModule->xClose(pCursor); |
| 7770 } |
| 7771 sqlite3_free(zTemp); |
| 7772 sqlite3_free(p); |
| 7773 *ppExpr = 0; |
| 7774 return SQLITE_NOMEM; |
| 7775 } |
| 7776 |
| 7777 /* |
| 7778 ** The output variable *ppExpr is populated with an allocated Fts3Expr |
| 7779 ** structure, or set to 0 if the end of the input buffer is reached. |
| 7780 ** |
| 7781 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM |
| 7782 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
| 7783 ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
| 7784 */ |
| 7785 static int getNextNode( |
| 7786 ParseContext *pParse, /* fts3 query parse context */ |
| 7787 const char *z, int n, /* Input string */ |
| 7788 Fts3Expr **ppExpr, /* OUT: expression */ |
| 7789 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 7790 ){ |
| 7791 static const struct Fts3Keyword { |
| 7792 char *z; /* Keyword text */ |
| 7793 unsigned char n; /* Length of the keyword */ |
| 7794 unsigned char parenOnly; /* Only valid in paren mode */ |
| 7795 unsigned char eType; /* Keyword code */ |
| 7796 } aKeyword[] = { |
| 7797 { "OR" , 2, 0, FTSQUERY_OR }, |
| 7798 { "AND", 3, 1, FTSQUERY_AND }, |
| 7799 { "NOT", 3, 1, FTSQUERY_NOT }, |
| 7800 { "NEAR", 4, 0, FTSQUERY_NEAR } |
| 7801 }; |
| 7802 int ii; |
| 7803 int iCol; |
| 7804 int iColLen; |
| 7805 int rc; |
| 7806 Fts3Expr *pRet = 0; |
| 7807 |
| 7808 const char *zInput = z; |
| 7809 int nInput = n; |
| 7810 |
| 7811 pParse->isNot = 0; |
| 7812 |
| 7813 /* Skip over any whitespace before checking for a keyword, an open or |
| 7814 ** close bracket, or a quoted string. |
| 7815 */ |
| 7816 while( nInput>0 && fts3isspace(*zInput) ){ |
| 7817 nInput--; |
| 7818 zInput++; |
| 7819 } |
| 7820 if( nInput==0 ){ |
| 7821 return SQLITE_DONE; |
| 7822 } |
| 7823 |
| 7824 /* See if we are dealing with a keyword. */ |
| 7825 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ |
| 7826 const struct Fts3Keyword *pKey = &aKeyword[ii]; |
| 7827 |
| 7828 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ |
| 7829 continue; |
| 7830 } |
| 7831 |
| 7832 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ |
| 7833 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; |
| 7834 int nKey = pKey->n; |
| 7835 char cNext; |
| 7836 |
| 7837 /* If this is a "NEAR" keyword, check for an explicit nearness. */ |
| 7838 if( pKey->eType==FTSQUERY_NEAR ){ |
| 7839 assert( nKey==4 ); |
| 7840 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ |
| 7841 nNear = 0; |
| 7842 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ |
| 7843 nNear = nNear * 10 + (zInput[nKey] - '0'); |
| 7844 } |
| 7845 } |
| 7846 } |
| 7847 |
| 7848 /* At this point this is probably a keyword. But for that to be true, |
| 7849 ** the next byte must contain either whitespace, an open or close |
| 7850 ** parenthesis, a quote character, or EOF. |
| 7851 */ |
| 7852 cNext = zInput[nKey]; |
| 7853 if( fts3isspace(cNext) |
| 7854 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 |
| 7855 ){ |
| 7856 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); |
| 7857 if( !pRet ){ |
| 7858 return SQLITE_NOMEM; |
| 7859 } |
| 7860 pRet->eType = pKey->eType; |
| 7861 pRet->nNear = nNear; |
| 7862 *ppExpr = pRet; |
| 7863 *pnConsumed = (int)((zInput - z) + nKey); |
| 7864 return SQLITE_OK; |
| 7865 } |
| 7866 |
| 7867 /* Turns out that wasn't a keyword after all. This happens if the |
| 7868 ** user has supplied a token such as "ORacle". Continue. |
| 7869 */ |
| 7870 } |
| 7871 } |
| 7872 |
| 7873 /* See if we are dealing with a quoted phrase. If this is the case, then |
| 7874 ** search for the closing quote and pass the whole string to getNextString() |
| 7875 ** for processing. This is easy to do, as fts3 has no syntax for escaping |
| 7876 ** a quote character embedded in a string. |
| 7877 */ |
| 7878 if( *zInput=='"' ){ |
| 7879 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
| 7880 *pnConsumed = (int)((zInput - z) + ii + 1); |
| 7881 if( ii==nInput ){ |
| 7882 return SQLITE_ERROR; |
| 7883 } |
| 7884 return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
| 7885 } |
| 7886 |
| 7887 if( sqlite3_fts3_enable_parentheses ){ |
| 7888 if( *zInput=='(' ){ |
| 7889 int nConsumed = 0; |
| 7890 pParse->nNest++; |
| 7891 rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); |
| 7892 if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } |
| 7893 *pnConsumed = (int)(zInput - z) + 1 + nConsumed; |
| 7894 return rc; |
| 7895 }else if( *zInput==')' ){ |
| 7896 pParse->nNest--; |
| 7897 *pnConsumed = (int)((zInput - z) + 1); |
| 7898 *ppExpr = 0; |
| 7899 return SQLITE_DONE; |
| 7900 } |
| 7901 } |
| 7902 |
| 7903 /* If control flows to this point, this must be a regular token, or |
| 7904 ** the end of the input. Read a regular token using the sqlite3_tokenizer |
| 7905 ** interface. Before doing so, figure out if there is an explicit |
| 7906 ** column specifier for the token. |
| 7907 ** |
| 7908 ** TODO: Strangely, it is not possible to associate a column specifier |
| 7909 ** with a quoted phrase, only with a single token. Not sure if this was |
| 7910 ** an implementation artifact or an intentional decision when fts3 was |
| 7911 ** first implemented. Whichever it was, this module duplicates the |
| 7912 ** limitation. |
| 7913 */ |
| 7914 iCol = pParse->iDefaultCol; |
| 7915 iColLen = 0; |
| 7916 for(ii=0; ii<pParse->nCol; ii++){ |
| 7917 const char *zStr = pParse->azCol[ii]; |
| 7918 int nStr = (int)strlen(zStr); |
| 7919 if( nInput>nStr && zInput[nStr]==':' |
| 7920 && sqlite3_strnicmp(zStr, zInput, nStr)==0 |
| 7921 ){ |
| 7922 iCol = ii; |
| 7923 iColLen = (int)((zInput - z) + nStr + 1); |
| 7924 break; |
| 7925 } |
| 7926 } |
| 7927 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); |
| 7928 *pnConsumed += iColLen; |
| 7929 return rc; |
| 7930 } |
| 7931 |
| 7932 /* |
| 7933 ** The argument is an Fts3Expr structure for a binary operator (any type |
| 7934 ** except an FTSQUERY_PHRASE). Return an integer value representing the |
| 7935 ** precedence of the operator. Lower values have a higher precedence (i.e. |
| 7936 ** group more tightly). For example, in the C language, the == operator |
| 7937 ** groups more tightly than ||, and would therefore have a higher precedence. |
| 7938 ** |
| 7939 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7940 ** is defined), the order of the operators in precedence from highest to |
| 7941 ** lowest is: |
| 7942 ** |
| 7943 ** NEAR |
| 7944 ** NOT |
| 7945 ** AND (including implicit ANDs) |
| 7946 ** OR |
| 7947 ** |
| 7948 ** Note that when using the old query syntax, the OR operator has a higher |
| 7949 ** precedence than the AND operator. |
| 7950 */ |
| 7951 static int opPrecedence(Fts3Expr *p){ |
| 7952 assert( p->eType!=FTSQUERY_PHRASE ); |
| 7953 if( sqlite3_fts3_enable_parentheses ){ |
| 7954 return p->eType; |
| 7955 }else if( p->eType==FTSQUERY_NEAR ){ |
| 7956 return 1; |
| 7957 }else if( p->eType==FTSQUERY_OR ){ |
| 7958 return 2; |
| 7959 } |
| 7960 assert( p->eType==FTSQUERY_AND ); |
| 7961 return 3; |
| 7962 } |
| 7963 |
| 7964 /* |
| 7965 ** Argument ppHead contains a pointer to the current head of a query |
| 7966 ** expression tree being parsed. pPrev is the expression node most recently |
| 7967 ** inserted into the tree. This function adds pNew, which is always a binary |
| 7968 ** operator node, into the expression tree based on the relative precedence |
| 7969 ** of pNew and the existing nodes of the tree. This may result in the head |
| 7970 ** of the tree changing, in which case *ppHead is set to the new root node. |
| 7971 */ |
| 7972 static void insertBinaryOperator( |
| 7973 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ |
| 7974 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ |
| 7975 Fts3Expr *pNew /* New binary node to insert into expression tree */ |
| 7976 ){ |
| 7977 Fts3Expr *pSplit = pPrev; |
| 7978 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ |
| 7979 pSplit = pSplit->pParent; |
| 7980 } |
| 7981 |
| 7982 if( pSplit->pParent ){ |
| 7983 assert( pSplit->pParent->pRight==pSplit ); |
| 7984 pSplit->pParent->pRight = pNew; |
| 7985 pNew->pParent = pSplit->pParent; |
| 7986 }else{ |
| 7987 *ppHead = pNew; |
| 7988 } |
| 7989 pNew->pLeft = pSplit; |
| 7990 pSplit->pParent = pNew; |
| 7991 } |
| 7992 |
| 7993 /* |
| 7994 ** Parse the fts3 query expression found in buffer z, length n. This function |
| 7995 ** returns either when the end of the buffer is reached or an unmatched |
| 7996 ** closing bracket - ')' - is encountered. |
| 7997 ** |
| 7998 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the |
| 7999 ** parsed form of the expression and *pnConsumed is set to the number of |
| 8000 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM |
| 8001 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. |
| 8002 */ |
| 8003 static int fts3ExprParse( |
| 8004 ParseContext *pParse, /* fts3 query parse context */ |
| 8005 const char *z, int n, /* Text of MATCH query */ |
| 8006 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 8007 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 8008 ){ |
| 8009 Fts3Expr *pRet = 0; |
| 8010 Fts3Expr *pPrev = 0; |
| 8011 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ |
| 8012 int nIn = n; |
| 8013 const char *zIn = z; |
| 8014 int rc = SQLITE_OK; |
| 8015 int isRequirePhrase = 1; |
| 8016 |
| 8017 while( rc==SQLITE_OK ){ |
| 8018 Fts3Expr *p = 0; |
| 8019 int nByte = 0; |
| 8020 |
| 8021 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
| 8022 assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); |
| 8023 if( rc==SQLITE_OK ){ |
| 8024 if( p ){ |
| 8025 int isPhrase; |
| 8026 |
| 8027 if( !sqlite3_fts3_enable_parentheses |
| 8028 && p->eType==FTSQUERY_PHRASE && pParse->isNot |
| 8029 ){ |
| 8030 /* Create an implicit NOT operator. */ |
| 8031 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
| 8032 if( !pNot ){ |
| 8033 sqlite3Fts3ExprFree(p); |
| 8034 rc = SQLITE_NOMEM; |
| 8035 goto exprparse_out; |
| 8036 } |
| 8037 pNot->eType = FTSQUERY_NOT; |
| 8038 pNot->pRight = p; |
| 8039 p->pParent = pNot; |
| 8040 if( pNotBranch ){ |
| 8041 pNot->pLeft = pNotBranch; |
| 8042 pNotBranch->pParent = pNot; |
| 8043 } |
| 8044 pNotBranch = pNot; |
| 8045 p = pPrev; |
| 8046 }else{ |
| 8047 int eType = p->eType; |
| 8048 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
| 8049 |
| 8050 /* The isRequirePhrase variable is set to true if a phrase or |
| 8051 ** an expression contained in parenthesis is required. If a |
| 8052 ** binary operator (AND, OR, NOT or NEAR) is encounted when |
| 8053 ** isRequirePhrase is set, this is a syntax error. |
| 8054 */ |
| 8055 if( !isPhrase && isRequirePhrase ){ |
| 8056 sqlite3Fts3ExprFree(p); |
| 8057 rc = SQLITE_ERROR; |
| 8058 goto exprparse_out; |
| 8059 } |
| 8060 |
| 8061 if( isPhrase && !isRequirePhrase ){ |
| 8062 /* Insert an implicit AND operator. */ |
| 8063 Fts3Expr *pAnd; |
| 8064 assert( pRet && pPrev ); |
| 8065 pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
| 8066 if( !pAnd ){ |
| 8067 sqlite3Fts3ExprFree(p); |
| 8068 rc = SQLITE_NOMEM; |
| 8069 goto exprparse_out; |
| 8070 } |
| 8071 pAnd->eType = FTSQUERY_AND; |
| 8072 insertBinaryOperator(&pRet, pPrev, pAnd); |
| 8073 pPrev = pAnd; |
| 8074 } |
| 8075 |
| 8076 /* This test catches attempts to make either operand of a NEAR |
| 8077 ** operator something other than a phrase. For example, either of |
| 8078 ** the following: |
| 8079 ** |
| 8080 ** (bracketed expression) NEAR phrase |
| 8081 ** phrase NEAR (bracketed expression) |
| 8082 ** |
| 8083 ** Return an error in either case. |
| 8084 */ |
| 8085 if( pPrev && ( |
| 8086 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
| 8087 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
| 8088 )){ |
| 8089 sqlite3Fts3ExprFree(p); |
| 8090 rc = SQLITE_ERROR; |
| 8091 goto exprparse_out; |
| 8092 } |
| 8093 |
| 8094 if( isPhrase ){ |
| 8095 if( pRet ){ |
| 8096 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
| 8097 pPrev->pRight = p; |
| 8098 p->pParent = pPrev; |
| 8099 }else{ |
| 8100 pRet = p; |
| 8101 } |
| 8102 }else{ |
| 8103 insertBinaryOperator(&pRet, pPrev, p); |
| 8104 } |
| 8105 isRequirePhrase = !isPhrase; |
| 8106 } |
| 8107 pPrev = p; |
| 8108 } |
| 8109 assert( nByte>0 ); |
| 8110 } |
| 8111 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
| 8112 nIn -= nByte; |
| 8113 zIn += nByte; |
| 8114 } |
| 8115 |
| 8116 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
| 8117 rc = SQLITE_ERROR; |
| 8118 } |
| 8119 |
| 8120 if( rc==SQLITE_DONE ){ |
| 8121 rc = SQLITE_OK; |
| 8122 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ |
| 8123 if( !pRet ){ |
| 8124 rc = SQLITE_ERROR; |
| 8125 }else{ |
| 8126 Fts3Expr *pIter = pNotBranch; |
| 8127 while( pIter->pLeft ){ |
| 8128 pIter = pIter->pLeft; |
| 8129 } |
| 8130 pIter->pLeft = pRet; |
| 8131 pRet->pParent = pIter; |
| 8132 pRet = pNotBranch; |
| 8133 } |
| 8134 } |
| 8135 } |
| 8136 *pnConsumed = n - nIn; |
| 8137 |
| 8138 exprparse_out: |
| 8139 if( rc!=SQLITE_OK ){ |
| 8140 sqlite3Fts3ExprFree(pRet); |
| 8141 sqlite3Fts3ExprFree(pNotBranch); |
| 8142 pRet = 0; |
| 8143 } |
| 8144 *ppExpr = pRet; |
| 8145 return rc; |
| 8146 } |
| 8147 |
| 8148 /* |
| 8149 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed |
| 8150 ** as the only argument is more than nMaxDepth. |
| 8151 */ |
| 8152 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ |
| 8153 int rc = SQLITE_OK; |
| 8154 if( p ){ |
| 8155 if( nMaxDepth<0 ){ |
| 8156 rc = SQLITE_TOOBIG; |
| 8157 }else{ |
| 8158 rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); |
| 8159 if( rc==SQLITE_OK ){ |
| 8160 rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); |
| 8161 } |
| 8162 } |
| 8163 } |
| 8164 return rc; |
| 8165 } |
| 8166 |
| 8167 /* |
| 8168 ** This function attempts to transform the expression tree at (*pp) to |
| 8169 ** an equivalent but more balanced form. The tree is modified in place. |
| 8170 ** If successful, SQLITE_OK is returned and (*pp) set to point to the |
| 8171 ** new root expression node. |
| 8172 ** |
| 8173 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. |
| 8174 ** |
| 8175 ** Otherwise, if an error occurs, an SQLite error code is returned and |
| 8176 ** expression (*pp) freed. |
| 8177 */ |
| 8178 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ |
| 8179 int rc = SQLITE_OK; /* Return code */ |
| 8180 Fts3Expr *pRoot = *pp; /* Initial root node */ |
| 8181 Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ |
| 8182 int eType = pRoot->eType; /* Type of node in this tree */ |
| 8183 |
| 8184 if( nMaxDepth==0 ){ |
| 8185 rc = SQLITE_ERROR; |
| 8186 } |
| 8187 |
| 8188 if( rc==SQLITE_OK ){ |
| 8189 if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ |
| 8190 Fts3Expr **apLeaf; |
| 8191 apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); |
| 8192 if( 0==apLeaf ){ |
| 8193 rc = SQLITE_NOMEM; |
| 8194 }else{ |
| 8195 memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); |
| 8196 } |
| 8197 |
| 8198 if( rc==SQLITE_OK ){ |
| 8199 int i; |
| 8200 Fts3Expr *p; |
| 8201 |
| 8202 /* Set $p to point to the left-most leaf in the tree of eType nodes. */ |
| 8203 for(p=pRoot; p->eType==eType; p=p->pLeft){ |
| 8204 assert( p->pParent==0 || p->pParent->pLeft==p ); |
| 8205 assert( p->pLeft && p->pRight ); |
| 8206 } |
| 8207 |
| 8208 /* This loop runs once for each leaf in the tree of eType nodes. */ |
| 8209 while( 1 ){ |
| 8210 int iLvl; |
| 8211 Fts3Expr *pParent = p->pParent; /* Current parent of p */ |
| 8212 |
| 8213 assert( pParent==0 || pParent->pLeft==p ); |
| 8214 p->pParent = 0; |
| 8215 if( pParent ){ |
| 8216 pParent->pLeft = 0; |
| 8217 }else{ |
| 8218 pRoot = 0; |
| 8219 } |
| 8220 rc = fts3ExprBalance(&p, nMaxDepth-1); |
| 8221 if( rc!=SQLITE_OK ) break; |
| 8222 |
| 8223 for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ |
| 8224 if( apLeaf[iLvl]==0 ){ |
| 8225 apLeaf[iLvl] = p; |
| 8226 p = 0; |
| 8227 }else{ |
| 8228 assert( pFree ); |
| 8229 pFree->pLeft = apLeaf[iLvl]; |
| 8230 pFree->pRight = p; |
| 8231 pFree->pLeft->pParent = pFree; |
| 8232 pFree->pRight->pParent = pFree; |
| 8233 |
| 8234 p = pFree; |
| 8235 pFree = pFree->pParent; |
| 8236 p->pParent = 0; |
| 8237 apLeaf[iLvl] = 0; |
| 8238 } |
| 8239 } |
| 8240 if( p ){ |
| 8241 sqlite3Fts3ExprFree(p); |
| 8242 rc = SQLITE_TOOBIG; |
| 8243 break; |
| 8244 } |
| 8245 |
| 8246 /* If that was the last leaf node, break out of the loop */ |
| 8247 if( pParent==0 ) break; |
| 8248 |
| 8249 /* Set $p to point to the next leaf in the tree of eType nodes */ |
| 8250 for(p=pParent->pRight; p->eType==eType; p=p->pLeft); |
| 8251 |
| 8252 /* Remove pParent from the original tree. */ |
| 8253 assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); |
| 8254 pParent->pRight->pParent = pParent->pParent; |
| 8255 if( pParent->pParent ){ |
| 8256 pParent->pParent->pLeft = pParent->pRight; |
| 8257 }else{ |
| 8258 assert( pParent==pRoot ); |
| 8259 pRoot = pParent->pRight; |
| 8260 } |
| 8261 |
| 8262 /* Link pParent into the free node list. It will be used as an |
| 8263 ** internal node of the new tree. */ |
| 8264 pParent->pParent = pFree; |
| 8265 pFree = pParent; |
| 8266 } |
| 8267 |
| 8268 if( rc==SQLITE_OK ){ |
| 8269 p = 0; |
| 8270 for(i=0; i<nMaxDepth; i++){ |
| 8271 if( apLeaf[i] ){ |
| 8272 if( p==0 ){ |
| 8273 p = apLeaf[i]; |
| 8274 p->pParent = 0; |
| 8275 }else{ |
| 8276 assert( pFree!=0 ); |
| 8277 pFree->pRight = p; |
| 8278 pFree->pLeft = apLeaf[i]; |
| 8279 pFree->pLeft->pParent = pFree; |
| 8280 pFree->pRight->pParent = pFree; |
| 8281 |
| 8282 p = pFree; |
| 8283 pFree = pFree->pParent; |
| 8284 p->pParent = 0; |
| 8285 } |
| 8286 } |
| 8287 } |
| 8288 pRoot = p; |
| 8289 }else{ |
| 8290 /* An error occurred. Delete the contents of the apLeaf[] array |
| 8291 ** and pFree list. Everything else is cleaned up by the call to |
| 8292 ** sqlite3Fts3ExprFree(pRoot) below. */ |
| 8293 Fts3Expr *pDel; |
| 8294 for(i=0; i<nMaxDepth; i++){ |
| 8295 sqlite3Fts3ExprFree(apLeaf[i]); |
| 8296 } |
| 8297 while( (pDel=pFree)!=0 ){ |
| 8298 pFree = pDel->pParent; |
| 8299 sqlite3_free(pDel); |
| 8300 } |
| 8301 } |
| 8302 |
| 8303 assert( pFree==0 ); |
| 8304 sqlite3_free( apLeaf ); |
| 8305 } |
| 8306 }else if( eType==FTSQUERY_NOT ){ |
| 8307 Fts3Expr *pLeft = pRoot->pLeft; |
| 8308 Fts3Expr *pRight = pRoot->pRight; |
| 8309 |
| 8310 pRoot->pLeft = 0; |
| 8311 pRoot->pRight = 0; |
| 8312 pLeft->pParent = 0; |
| 8313 pRight->pParent = 0; |
| 8314 |
| 8315 rc = fts3ExprBalance(&pLeft, nMaxDepth-1); |
| 8316 if( rc==SQLITE_OK ){ |
| 8317 rc = fts3ExprBalance(&pRight, nMaxDepth-1); |
| 8318 } |
| 8319 |
| 8320 if( rc!=SQLITE_OK ){ |
| 8321 sqlite3Fts3ExprFree(pRight); |
| 8322 sqlite3Fts3ExprFree(pLeft); |
| 8323 }else{ |
| 8324 assert( pLeft && pRight ); |
| 8325 pRoot->pLeft = pLeft; |
| 8326 pLeft->pParent = pRoot; |
| 8327 pRoot->pRight = pRight; |
| 8328 pRight->pParent = pRoot; |
| 8329 } |
| 8330 } |
| 8331 } |
| 8332 |
| 8333 if( rc!=SQLITE_OK ){ |
| 8334 sqlite3Fts3ExprFree(pRoot); |
| 8335 pRoot = 0; |
| 8336 } |
| 8337 *pp = pRoot; |
| 8338 return rc; |
| 8339 } |
| 8340 |
| 8341 /* |
| 8342 ** This function is similar to sqlite3Fts3ExprParse(), with the following |
| 8343 ** differences: |
| 8344 ** |
| 8345 ** 1. It does not do expression rebalancing. |
| 8346 ** 2. It does not check that the expression does not exceed the |
| 8347 ** maximum allowable depth. |
| 8348 ** 3. Even if it fails, *ppExpr may still be set to point to an |
| 8349 ** expression tree. It should be deleted using sqlite3Fts3ExprFree() |
| 8350 ** in this case. |
| 8351 */ |
| 8352 static int fts3ExprParseUnbalanced( |
| 8353 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 8354 int iLangid, /* Language id for tokenizer */ |
| 8355 char **azCol, /* Array of column names for fts3 table */ |
| 8356 int bFts4, /* True to allow FTS4-only syntax */ |
| 8357 int nCol, /* Number of entries in azCol[] */ |
| 8358 int iDefaultCol, /* Default column to query */ |
| 8359 const char *z, int n, /* Text of MATCH query */ |
| 8360 Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
| 8361 ){ |
| 8362 int nParsed; |
| 8363 int rc; |
| 8364 ParseContext sParse; |
| 8365 |
| 8366 memset(&sParse, 0, sizeof(ParseContext)); |
| 8367 sParse.pTokenizer = pTokenizer; |
| 8368 sParse.iLangid = iLangid; |
| 8369 sParse.azCol = (const char **)azCol; |
| 8370 sParse.nCol = nCol; |
| 8371 sParse.iDefaultCol = iDefaultCol; |
| 8372 sParse.bFts4 = bFts4; |
| 8373 if( z==0 ){ |
| 8374 *ppExpr = 0; |
| 8375 return SQLITE_OK; |
| 8376 } |
| 8377 if( n<0 ){ |
| 8378 n = (int)strlen(z); |
| 8379 } |
| 8380 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
| 8381 assert( rc==SQLITE_OK || *ppExpr==0 ); |
| 8382 |
| 8383 /* Check for mismatched parenthesis */ |
| 8384 if( rc==SQLITE_OK && sParse.nNest ){ |
| 8385 rc = SQLITE_ERROR; |
| 8386 } |
| 8387 |
| 8388 return rc; |
| 8389 } |
| 8390 |
| 8391 /* |
| 8392 ** Parameters z and n contain a pointer to and length of a buffer containing |
| 8393 ** an fts3 query expression, respectively. This function attempts to parse the |
| 8394 ** query expression and create a tree of Fts3Expr structures representing the |
| 8395 ** parsed expression. If successful, *ppExpr is set to point to the head |
| 8396 ** of the parsed expression tree and SQLITE_OK is returned. If an error |
| 8397 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse |
| 8398 ** error) is returned and *ppExpr is set to 0. |
| 8399 ** |
| 8400 ** If parameter n is a negative number, then z is assumed to point to a |
| 8401 ** nul-terminated string and the length is determined using strlen(). |
| 8402 ** |
| 8403 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to |
| 8404 ** use to normalize query tokens while parsing the expression. The azCol[] |
| 8405 ** array, which is assumed to contain nCol entries, should contain the names |
| 8406 ** of each column in the target fts3 table, in order from left to right. |
| 8407 ** Column names must be nul-terminated strings. |
| 8408 ** |
| 8409 ** The iDefaultCol parameter should be passed the index of the table column |
| 8410 ** that appears on the left-hand-side of the MATCH operator (the default |
| 8411 ** column to match against for tokens for which a column name is not explicitly |
| 8412 ** specified as part of the query string), or -1 if tokens may by default |
| 8413 ** match any table column. |
| 8414 */ |
| 8415 SQLITE_PRIVATE int sqlite3Fts3ExprParse( |
| 8416 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 8417 int iLangid, /* Language id for tokenizer */ |
| 8418 char **azCol, /* Array of column names for fts3 table */ |
| 8419 int bFts4, /* True to allow FTS4-only syntax */ |
| 8420 int nCol, /* Number of entries in azCol[] */ |
| 8421 int iDefaultCol, /* Default column to query */ |
| 8422 const char *z, int n, /* Text of MATCH query */ |
| 8423 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 8424 char **pzErr /* OUT: Error message (sqlite3_malloc) */ |
| 8425 ){ |
| 8426 int rc = fts3ExprParseUnbalanced( |
| 8427 pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr |
| 8428 ); |
| 8429 |
| 8430 /* Rebalance the expression. And check that its depth does not exceed |
| 8431 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ |
| 8432 if( rc==SQLITE_OK && *ppExpr ){ |
| 8433 rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 8434 if( rc==SQLITE_OK ){ |
| 8435 rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 8436 } |
| 8437 } |
| 8438 |
| 8439 if( rc!=SQLITE_OK ){ |
| 8440 sqlite3Fts3ExprFree(*ppExpr); |
| 8441 *ppExpr = 0; |
| 8442 if( rc==SQLITE_TOOBIG ){ |
| 8443 sqlite3Fts3ErrMsg(pzErr, |
| 8444 "FTS expression tree is too large (maximum depth %d)", |
| 8445 SQLITE_FTS3_MAX_EXPR_DEPTH |
| 8446 ); |
| 8447 rc = SQLITE_ERROR; |
| 8448 }else if( rc==SQLITE_ERROR ){ |
| 8449 sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); |
| 8450 } |
| 8451 } |
| 8452 |
| 8453 return rc; |
| 8454 } |
| 8455 |
| 8456 /* |
| 8457 ** Free a single node of an expression tree. |
| 8458 */ |
| 8459 static void fts3FreeExprNode(Fts3Expr *p){ |
| 8460 assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); |
| 8461 sqlite3Fts3EvalPhraseCleanup(p->pPhrase); |
| 8462 sqlite3_free(p->aMI); |
| 8463 sqlite3_free(p); |
| 8464 } |
| 8465 |
| 8466 /* |
| 8467 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
| 8468 ** |
| 8469 ** This function would be simpler if it recursively called itself. But |
| 8470 ** that would mean passing a sufficiently large expression to ExprParse() |
| 8471 ** could cause a stack overflow. |
| 8472 */ |
| 8473 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ |
| 8474 Fts3Expr *p; |
| 8475 assert( pDel==0 || pDel->pParent==0 ); |
| 8476 for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ |
| 8477 assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 8478 } |
| 8479 while( p ){ |
| 8480 Fts3Expr *pParent = p->pParent; |
| 8481 fts3FreeExprNode(p); |
| 8482 if( pParent && p==pParent->pLeft && pParent->pRight ){ |
| 8483 p = pParent->pRight; |
| 8484 while( p && (p->pLeft || p->pRight) ){ |
| 8485 assert( p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 8486 p = (p->pLeft ? p->pLeft : p->pRight); |
| 8487 } |
| 8488 }else{ |
| 8489 p = pParent; |
| 8490 } |
| 8491 } |
| 8492 } |
| 8493 |
| 8494 /**************************************************************************** |
| 8495 ***************************************************************************** |
| 8496 ** Everything after this point is just test code. |
| 8497 */ |
| 8498 |
| 8499 #ifdef SQLITE_TEST |
| 8500 |
| 8501 /* #include <stdio.h> */ |
| 8502 |
| 8503 /* |
| 8504 ** Function to query the hash-table of tokenizers (see README.tokenizers). |
| 8505 */ |
| 8506 static int queryTestTokenizer( |
| 8507 sqlite3 *db, |
| 8508 const char *zName, |
| 8509 const sqlite3_tokenizer_module **pp |
| 8510 ){ |
| 8511 int rc; |
| 8512 sqlite3_stmt *pStmt; |
| 8513 const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| 8514 |
| 8515 *pp = 0; |
| 8516 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 8517 if( rc!=SQLITE_OK ){ |
| 8518 return rc; |
| 8519 } |
| 8520 |
| 8521 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 8522 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 8523 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| 8524 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| 8525 } |
| 8526 } |
| 8527 |
| 8528 return sqlite3_finalize(pStmt); |
| 8529 } |
| 8530 |
| 8531 /* |
| 8532 ** Return a pointer to a buffer containing a text representation of the |
| 8533 ** expression passed as the first argument. The buffer is obtained from |
| 8534 ** sqlite3_malloc(). It is the responsibility of the caller to use |
| 8535 ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
| 8536 ** NULL is returned. |
| 8537 ** |
| 8538 ** If the second argument is not NULL, then its contents are prepended to |
| 8539 ** the returned expression text and then freed using sqlite3_free(). |
| 8540 */ |
| 8541 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
| 8542 if( pExpr==0 ){ |
| 8543 return sqlite3_mprintf(""); |
| 8544 } |
| 8545 switch( pExpr->eType ){ |
| 8546 case FTSQUERY_PHRASE: { |
| 8547 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 8548 int i; |
| 8549 zBuf = sqlite3_mprintf( |
| 8550 "%zPHRASE %d 0", zBuf, pPhrase->iColumn); |
| 8551 for(i=0; zBuf && i<pPhrase->nToken; i++){ |
| 8552 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
| 8553 pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
| 8554 (pPhrase->aToken[i].isPrefix?"+":"") |
| 8555 ); |
| 8556 } |
| 8557 return zBuf; |
| 8558 } |
| 8559 |
| 8560 case FTSQUERY_NEAR: |
| 8561 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); |
| 8562 break; |
| 8563 case FTSQUERY_NOT: |
| 8564 zBuf = sqlite3_mprintf("%zNOT ", zBuf); |
| 8565 break; |
| 8566 case FTSQUERY_AND: |
| 8567 zBuf = sqlite3_mprintf("%zAND ", zBuf); |
| 8568 break; |
| 8569 case FTSQUERY_OR: |
| 8570 zBuf = sqlite3_mprintf("%zOR ", zBuf); |
| 8571 break; |
| 8572 } |
| 8573 |
| 8574 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); |
| 8575 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); |
| 8576 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); |
| 8577 |
| 8578 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); |
| 8579 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); |
| 8580 |
| 8581 return zBuf; |
| 8582 } |
| 8583 |
| 8584 /* |
| 8585 ** This is the implementation of a scalar SQL function used to test the |
| 8586 ** expression parser. It should be called as follows: |
| 8587 ** |
| 8588 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); |
| 8589 ** |
| 8590 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used |
| 8591 ** to parse the query expression (see README.tokenizers). The second argument |
| 8592 ** is the query expression to parse. Each subsequent argument is the name |
| 8593 ** of a column of the fts3 table that the query expression may refer to. |
| 8594 ** For example: |
| 8595 ** |
| 8596 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); |
| 8597 */ |
| 8598 static void fts3ExprTest( |
| 8599 sqlite3_context *context, |
| 8600 int argc, |
| 8601 sqlite3_value **argv |
| 8602 ){ |
| 8603 sqlite3_tokenizer_module const *pModule = 0; |
| 8604 sqlite3_tokenizer *pTokenizer = 0; |
| 8605 int rc; |
| 8606 char **azCol = 0; |
| 8607 const char *zExpr; |
| 8608 int nExpr; |
| 8609 int nCol; |
| 8610 int ii; |
| 8611 Fts3Expr *pExpr; |
| 8612 char *zBuf = 0; |
| 8613 sqlite3 *db = sqlite3_context_db_handle(context); |
| 8614 |
| 8615 if( argc<3 ){ |
| 8616 sqlite3_result_error(context, |
| 8617 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 |
| 8618 ); |
| 8619 return; |
| 8620 } |
| 8621 |
| 8622 rc = queryTestTokenizer(db, |
| 8623 (const char *)sqlite3_value_text(argv[0]), &pModule); |
| 8624 if( rc==SQLITE_NOMEM ){ |
| 8625 sqlite3_result_error_nomem(context); |
| 8626 goto exprtest_out; |
| 8627 }else if( !pModule ){ |
| 8628 sqlite3_result_error(context, "No such tokenizer module", -1); |
| 8629 goto exprtest_out; |
| 8630 } |
| 8631 |
| 8632 rc = pModule->xCreate(0, 0, &pTokenizer); |
| 8633 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); |
| 8634 if( rc==SQLITE_NOMEM ){ |
| 8635 sqlite3_result_error_nomem(context); |
| 8636 goto exprtest_out; |
| 8637 } |
| 8638 pTokenizer->pModule = pModule; |
| 8639 |
| 8640 zExpr = (const char *)sqlite3_value_text(argv[1]); |
| 8641 nExpr = sqlite3_value_bytes(argv[1]); |
| 8642 nCol = argc-2; |
| 8643 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); |
| 8644 if( !azCol ){ |
| 8645 sqlite3_result_error_nomem(context); |
| 8646 goto exprtest_out; |
| 8647 } |
| 8648 for(ii=0; ii<nCol; ii++){ |
| 8649 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
| 8650 } |
| 8651 |
| 8652 if( sqlite3_user_data(context) ){ |
| 8653 char *zDummy = 0; |
| 8654 rc = sqlite3Fts3ExprParse( |
| 8655 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy |
| 8656 ); |
| 8657 assert( rc==SQLITE_OK || pExpr==0 ); |
| 8658 sqlite3_free(zDummy); |
| 8659 }else{ |
| 8660 rc = fts3ExprParseUnbalanced( |
| 8661 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr |
| 8662 ); |
| 8663 } |
| 8664 |
| 8665 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| 8666 sqlite3Fts3ExprFree(pExpr); |
| 8667 sqlite3_result_error(context, "Error parsing expression", -1); |
| 8668 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
| 8669 sqlite3_result_error_nomem(context); |
| 8670 }else{ |
| 8671 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
| 8672 sqlite3_free(zBuf); |
| 8673 } |
| 8674 |
| 8675 sqlite3Fts3ExprFree(pExpr); |
| 8676 |
| 8677 exprtest_out: |
| 8678 if( pModule && pTokenizer ){ |
| 8679 rc = pModule->xDestroy(pTokenizer); |
| 8680 } |
| 8681 sqlite3_free(azCol); |
| 8682 } |
| 8683 |
| 8684 /* |
| 8685 ** Register the query expression parser test function fts3_exprtest() |
| 8686 ** with database connection db. |
| 8687 */ |
| 8688 SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
| 8689 int rc = sqlite3_create_function( |
| 8690 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
| 8691 ); |
| 8692 if( rc==SQLITE_OK ){ |
| 8693 rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", |
| 8694 -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 |
| 8695 ); |
| 8696 } |
| 8697 return rc; |
| 8698 } |
| 8699 |
| 8700 #endif |
| 8701 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 8702 |
| 8703 /************** End of fts3_expr.c *******************************************/ |
| 8704 /************** Begin file fts3_hash.c ***************************************/ |
| 8705 /* |
| 8706 ** 2001 September 22 |
| 8707 ** |
| 8708 ** The author disclaims copyright to this source code. In place of |
| 8709 ** a legal notice, here is a blessing: |
| 8710 ** |
| 8711 ** May you do good and not evil. |
| 8712 ** May you find forgiveness for yourself and forgive others. |
| 8713 ** May you share freely, never taking more than you give. |
| 8714 ** |
| 8715 ************************************************************************* |
| 8716 ** This is the implementation of generic hash-tables used in SQLite. |
| 8717 ** We've modified it slightly to serve as a standalone hash table |
| 8718 ** implementation for the full-text indexing module. |
| 8719 */ |
| 8720 |
| 8721 /* |
| 8722 ** The code in this file is only compiled if: |
| 8723 ** |
| 8724 ** * The FTS3 module is being built as an extension |
| 8725 ** (in which case SQLITE_CORE is not defined), or |
| 8726 ** |
| 8727 ** * The FTS3 module is being built into the core of |
| 8728 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 8729 */ |
| 8730 /* #include "fts3Int.h" */ |
| 8731 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 8732 |
| 8733 /* #include <assert.h> */ |
| 8734 /* #include <stdlib.h> */ |
| 8735 /* #include <string.h> */ |
| 8736 |
| 8737 /* #include "fts3_hash.h" */ |
| 8738 |
| 8739 /* |
| 8740 ** Malloc and Free functions |
| 8741 */ |
| 8742 static void *fts3HashMalloc(int n){ |
| 8743 void *p = sqlite3_malloc(n); |
| 8744 if( p ){ |
| 8745 memset(p, 0, n); |
| 8746 } |
| 8747 return p; |
| 8748 } |
| 8749 static void fts3HashFree(void *p){ |
| 8750 sqlite3_free(p); |
| 8751 } |
| 8752 |
| 8753 /* Turn bulk memory into a hash table object by initializing the |
| 8754 ** fields of the Hash structure. |
| 8755 ** |
| 8756 ** "pNew" is a pointer to the hash table that is to be initialized. |
| 8757 ** keyClass is one of the constants |
| 8758 ** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass |
| 8759 ** determines what kind of key the hash table will use. "copyKey" is |
| 8760 ** true if the hash table should make its own private copy of keys and |
| 8761 ** false if it should just use the supplied pointer. |
| 8762 */ |
| 8763 SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copy
Key){ |
| 8764 assert( pNew!=0 ); |
| 8765 assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); |
| 8766 pNew->keyClass = keyClass; |
| 8767 pNew->copyKey = copyKey; |
| 8768 pNew->first = 0; |
| 8769 pNew->count = 0; |
| 8770 pNew->htsize = 0; |
| 8771 pNew->ht = 0; |
| 8772 } |
| 8773 |
| 8774 /* Remove all entries from a hash table. Reclaim all memory. |
| 8775 ** Call this routine to delete a hash table or to reset a hash table |
| 8776 ** to the empty state. |
| 8777 */ |
| 8778 SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ |
| 8779 Fts3HashElem *elem; /* For looping over all elements of the table */ |
| 8780 |
| 8781 assert( pH!=0 ); |
| 8782 elem = pH->first; |
| 8783 pH->first = 0; |
| 8784 fts3HashFree(pH->ht); |
| 8785 pH->ht = 0; |
| 8786 pH->htsize = 0; |
| 8787 while( elem ){ |
| 8788 Fts3HashElem *next_elem = elem->next; |
| 8789 if( pH->copyKey && elem->pKey ){ |
| 8790 fts3HashFree(elem->pKey); |
| 8791 } |
| 8792 fts3HashFree(elem); |
| 8793 elem = next_elem; |
| 8794 } |
| 8795 pH->count = 0; |
| 8796 } |
| 8797 |
| 8798 /* |
| 8799 ** Hash and comparison functions when the mode is FTS3_HASH_STRING |
| 8800 */ |
| 8801 static int fts3StrHash(const void *pKey, int nKey){ |
| 8802 const char *z = (const char *)pKey; |
| 8803 unsigned h = 0; |
| 8804 if( nKey<=0 ) nKey = (int) strlen(z); |
| 8805 while( nKey > 0 ){ |
| 8806 h = (h<<3) ^ h ^ *z++; |
| 8807 nKey--; |
| 8808 } |
| 8809 return (int)(h & 0x7fffffff); |
| 8810 } |
| 8811 static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ |
| 8812 if( n1!=n2 ) return 1; |
| 8813 return strncmp((const char*)pKey1,(const char*)pKey2,n1); |
| 8814 } |
| 8815 |
| 8816 /* |
| 8817 ** Hash and comparison functions when the mode is FTS3_HASH_BINARY |
| 8818 */ |
| 8819 static int fts3BinHash(const void *pKey, int nKey){ |
| 8820 int h = 0; |
| 8821 const char *z = (const char *)pKey; |
| 8822 while( nKey-- > 0 ){ |
| 8823 h = (h<<3) ^ h ^ *(z++); |
| 8824 } |
| 8825 return h & 0x7fffffff; |
| 8826 } |
| 8827 static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ |
| 8828 if( n1!=n2 ) return 1; |
| 8829 return memcmp(pKey1,pKey2,n1); |
| 8830 } |
| 8831 |
| 8832 /* |
| 8833 ** Return a pointer to the appropriate hash function given the key class. |
| 8834 ** |
| 8835 ** The C syntax in this function definition may be unfamilar to some |
| 8836 ** programmers, so we provide the following additional explanation: |
| 8837 ** |
| 8838 ** The name of the function is "ftsHashFunction". The function takes a |
| 8839 ** single parameter "keyClass". The return value of ftsHashFunction() |
| 8840 ** is a pointer to another function. Specifically, the return value |
| 8841 ** of ftsHashFunction() is a pointer to a function that takes two parameters |
| 8842 ** with types "const void*" and "int" and returns an "int". |
| 8843 */ |
| 8844 static int (*ftsHashFunction(int keyClass))(const void*,int){ |
| 8845 if( keyClass==FTS3_HASH_STRING ){ |
| 8846 return &fts3StrHash; |
| 8847 }else{ |
| 8848 assert( keyClass==FTS3_HASH_BINARY ); |
| 8849 return &fts3BinHash; |
| 8850 } |
| 8851 } |
| 8852 |
| 8853 /* |
| 8854 ** Return a pointer to the appropriate hash function given the key class. |
| 8855 ** |
| 8856 ** For help in interpreted the obscure C code in the function definition, |
| 8857 ** see the header comment on the previous function. |
| 8858 */ |
| 8859 static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ |
| 8860 if( keyClass==FTS3_HASH_STRING ){ |
| 8861 return &fts3StrCompare; |
| 8862 }else{ |
| 8863 assert( keyClass==FTS3_HASH_BINARY ); |
| 8864 return &fts3BinCompare; |
| 8865 } |
| 8866 } |
| 8867 |
| 8868 /* Link an element into the hash table |
| 8869 */ |
| 8870 static void fts3HashInsertElement( |
| 8871 Fts3Hash *pH, /* The complete hash table */ |
| 8872 struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ |
| 8873 Fts3HashElem *pNew /* The element to be inserted */ |
| 8874 ){ |
| 8875 Fts3HashElem *pHead; /* First element already in pEntry */ |
| 8876 pHead = pEntry->chain; |
| 8877 if( pHead ){ |
| 8878 pNew->next = pHead; |
| 8879 pNew->prev = pHead->prev; |
| 8880 if( pHead->prev ){ pHead->prev->next = pNew; } |
| 8881 else { pH->first = pNew; } |
| 8882 pHead->prev = pNew; |
| 8883 }else{ |
| 8884 pNew->next = pH->first; |
| 8885 if( pH->first ){ pH->first->prev = pNew; } |
| 8886 pNew->prev = 0; |
| 8887 pH->first = pNew; |
| 8888 } |
| 8889 pEntry->count++; |
| 8890 pEntry->chain = pNew; |
| 8891 } |
| 8892 |
| 8893 |
| 8894 /* Resize the hash table so that it cantains "new_size" buckets. |
| 8895 ** "new_size" must be a power of 2. The hash table might fail |
| 8896 ** to resize if sqliteMalloc() fails. |
| 8897 ** |
| 8898 ** Return non-zero if a memory allocation error occurs. |
| 8899 */ |
| 8900 static int fts3Rehash(Fts3Hash *pH, int new_size){ |
| 8901 struct _fts3ht *new_ht; /* The new hash table */ |
| 8902 Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ |
| 8903 int (*xHash)(const void*,int); /* The hash function */ |
| 8904 |
| 8905 assert( (new_size & (new_size-1))==0 ); |
| 8906 new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); |
| 8907 if( new_ht==0 ) return 1; |
| 8908 fts3HashFree(pH->ht); |
| 8909 pH->ht = new_ht; |
| 8910 pH->htsize = new_size; |
| 8911 xHash = ftsHashFunction(pH->keyClass); |
| 8912 for(elem=pH->first, pH->first=0; elem; elem = next_elem){ |
| 8913 int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); |
| 8914 next_elem = elem->next; |
| 8915 fts3HashInsertElement(pH, &new_ht[h], elem); |
| 8916 } |
| 8917 return 0; |
| 8918 } |
| 8919 |
| 8920 /* This function (for internal use only) locates an element in an |
| 8921 ** hash table that matches the given key. The hash for this key has |
| 8922 ** already been computed and is passed as the 4th parameter. |
| 8923 */ |
| 8924 static Fts3HashElem *fts3FindElementByHash( |
| 8925 const Fts3Hash *pH, /* The pH to be searched */ |
| 8926 const void *pKey, /* The key we are searching for */ |
| 8927 int nKey, |
| 8928 int h /* The hash for this key. */ |
| 8929 ){ |
| 8930 Fts3HashElem *elem; /* Used to loop thru the element list */ |
| 8931 int count; /* Number of elements left to test */ |
| 8932 int (*xCompare)(const void*,int,const void*,int); /* comparison function */ |
| 8933 |
| 8934 if( pH->ht ){ |
| 8935 struct _fts3ht *pEntry = &pH->ht[h]; |
| 8936 elem = pEntry->chain; |
| 8937 count = pEntry->count; |
| 8938 xCompare = ftsCompareFunction(pH->keyClass); |
| 8939 while( count-- && elem ){ |
| 8940 if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ |
| 8941 return elem; |
| 8942 } |
| 8943 elem = elem->next; |
| 8944 } |
| 8945 } |
| 8946 return 0; |
| 8947 } |
| 8948 |
| 8949 /* Remove a single entry from the hash table given a pointer to that |
| 8950 ** element and a hash on the element's key. |
| 8951 */ |
| 8952 static void fts3RemoveElementByHash( |
| 8953 Fts3Hash *pH, /* The pH containing "elem" */ |
| 8954 Fts3HashElem* elem, /* The element to be removed from the pH */ |
| 8955 int h /* Hash value for the element */ |
| 8956 ){ |
| 8957 struct _fts3ht *pEntry; |
| 8958 if( elem->prev ){ |
| 8959 elem->prev->next = elem->next; |
| 8960 }else{ |
| 8961 pH->first = elem->next; |
| 8962 } |
| 8963 if( elem->next ){ |
| 8964 elem->next->prev = elem->prev; |
| 8965 } |
| 8966 pEntry = &pH->ht[h]; |
| 8967 if( pEntry->chain==elem ){ |
| 8968 pEntry->chain = elem->next; |
| 8969 } |
| 8970 pEntry->count--; |
| 8971 if( pEntry->count<=0 ){ |
| 8972 pEntry->chain = 0; |
| 8973 } |
| 8974 if( pH->copyKey && elem->pKey ){ |
| 8975 fts3HashFree(elem->pKey); |
| 8976 } |
| 8977 fts3HashFree( elem ); |
| 8978 pH->count--; |
| 8979 if( pH->count<=0 ){ |
| 8980 assert( pH->first==0 ); |
| 8981 assert( pH->count==0 ); |
| 8982 fts3HashClear(pH); |
| 8983 } |
| 8984 } |
| 8985 |
| 8986 SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( |
| 8987 const Fts3Hash *pH, |
| 8988 const void *pKey, |
| 8989 int nKey |
| 8990 ){ |
| 8991 int h; /* A hash on key */ |
| 8992 int (*xHash)(const void*,int); /* The hash function */ |
| 8993 |
| 8994 if( pH==0 || pH->ht==0 ) return 0; |
| 8995 xHash = ftsHashFunction(pH->keyClass); |
| 8996 assert( xHash!=0 ); |
| 8997 h = (*xHash)(pKey,nKey); |
| 8998 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 8999 return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); |
| 9000 } |
| 9001 |
| 9002 /* |
| 9003 ** Attempt to locate an element of the hash table pH with a key |
| 9004 ** that matches pKey,nKey. Return the data for this element if it is |
| 9005 ** found, or NULL if there is no match. |
| 9006 */ |
| 9007 SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, i
nt nKey){ |
| 9008 Fts3HashElem *pElem; /* The element that matches key (if any) */ |
| 9009 |
| 9010 pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); |
| 9011 return pElem ? pElem->data : 0; |
| 9012 } |
| 9013 |
| 9014 /* Insert an element into the hash table pH. The key is pKey,nKey |
| 9015 ** and the data is "data". |
| 9016 ** |
| 9017 ** If no element exists with a matching key, then a new |
| 9018 ** element is created. A copy of the key is made if the copyKey |
| 9019 ** flag is set. NULL is returned. |
| 9020 ** |
| 9021 ** If another element already exists with the same key, then the |
| 9022 ** new data replaces the old data and the old data is returned. |
| 9023 ** The key is not copied in this instance. If a malloc fails, then |
| 9024 ** the new data is returned and the hash table is unchanged. |
| 9025 ** |
| 9026 ** If the "data" parameter to this function is NULL, then the |
| 9027 ** element corresponding to "key" is removed from the hash table. |
| 9028 */ |
| 9029 SQLITE_PRIVATE void *sqlite3Fts3HashInsert( |
| 9030 Fts3Hash *pH, /* The hash table to insert into */ |
| 9031 const void *pKey, /* The key */ |
| 9032 int nKey, /* Number of bytes in the key */ |
| 9033 void *data /* The data */ |
| 9034 ){ |
| 9035 int hraw; /* Raw hash value of the key */ |
| 9036 int h; /* the hash of the key modulo hash table size */ |
| 9037 Fts3HashElem *elem; /* Used to loop thru the element list */ |
| 9038 Fts3HashElem *new_elem; /* New element added to the pH */ |
| 9039 int (*xHash)(const void*,int); /* The hash function */ |
| 9040 |
| 9041 assert( pH!=0 ); |
| 9042 xHash = ftsHashFunction(pH->keyClass); |
| 9043 assert( xHash!=0 ); |
| 9044 hraw = (*xHash)(pKey, nKey); |
| 9045 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 9046 h = hraw & (pH->htsize-1); |
| 9047 elem = fts3FindElementByHash(pH,pKey,nKey,h); |
| 9048 if( elem ){ |
| 9049 void *old_data = elem->data; |
| 9050 if( data==0 ){ |
| 9051 fts3RemoveElementByHash(pH,elem,h); |
| 9052 }else{ |
| 9053 elem->data = data; |
| 9054 } |
| 9055 return old_data; |
| 9056 } |
| 9057 if( data==0 ) return 0; |
| 9058 if( (pH->htsize==0 && fts3Rehash(pH,8)) |
| 9059 || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) |
| 9060 ){ |
| 9061 pH->count = 0; |
| 9062 return data; |
| 9063 } |
| 9064 assert( pH->htsize>0 ); |
| 9065 new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); |
| 9066 if( new_elem==0 ) return data; |
| 9067 if( pH->copyKey && pKey!=0 ){ |
| 9068 new_elem->pKey = fts3HashMalloc( nKey ); |
| 9069 if( new_elem->pKey==0 ){ |
| 9070 fts3HashFree(new_elem); |
| 9071 return data; |
| 9072 } |
| 9073 memcpy((void*)new_elem->pKey, pKey, nKey); |
| 9074 }else{ |
| 9075 new_elem->pKey = (void*)pKey; |
| 9076 } |
| 9077 new_elem->nKey = nKey; |
| 9078 pH->count++; |
| 9079 assert( pH->htsize>0 ); |
| 9080 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 9081 h = hraw & (pH->htsize-1); |
| 9082 fts3HashInsertElement(pH, &pH->ht[h], new_elem); |
| 9083 new_elem->data = data; |
| 9084 return 0; |
| 9085 } |
| 9086 |
| 9087 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 9088 |
| 9089 /************** End of fts3_hash.c *******************************************/ |
| 9090 /************** Begin file fts3_porter.c *************************************/ |
| 9091 /* |
| 9092 ** 2006 September 30 |
| 9093 ** |
| 9094 ** The author disclaims copyright to this source code. In place of |
| 9095 ** a legal notice, here is a blessing: |
| 9096 ** |
| 9097 ** May you do good and not evil. |
| 9098 ** May you find forgiveness for yourself and forgive others. |
| 9099 ** May you share freely, never taking more than you give. |
| 9100 ** |
| 9101 ************************************************************************* |
| 9102 ** Implementation of the full-text-search tokenizer that implements |
| 9103 ** a Porter stemmer. |
| 9104 */ |
| 9105 |
| 9106 /* |
| 9107 ** The code in this file is only compiled if: |
| 9108 ** |
| 9109 ** * The FTS3 module is being built as an extension |
| 9110 ** (in which case SQLITE_CORE is not defined), or |
| 9111 ** |
| 9112 ** * The FTS3 module is being built into the core of |
| 9113 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 9114 */ |
| 9115 /* #include "fts3Int.h" */ |
| 9116 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 9117 |
| 9118 /* #include <assert.h> */ |
| 9119 /* #include <stdlib.h> */ |
| 9120 /* #include <stdio.h> */ |
| 9121 /* #include <string.h> */ |
| 9122 |
| 9123 /* #include "fts3_tokenizer.h" */ |
| 9124 |
| 9125 /* |
| 9126 ** Class derived from sqlite3_tokenizer |
| 9127 */ |
| 9128 typedef struct porter_tokenizer { |
| 9129 sqlite3_tokenizer base; /* Base class */ |
| 9130 } porter_tokenizer; |
| 9131 |
| 9132 /* |
| 9133 ** Class derived from sqlite3_tokenizer_cursor |
| 9134 */ |
| 9135 typedef struct porter_tokenizer_cursor { |
| 9136 sqlite3_tokenizer_cursor base; |
| 9137 const char *zInput; /* input we are tokenizing */ |
| 9138 int nInput; /* size of the input */ |
| 9139 int iOffset; /* current position in zInput */ |
| 9140 int iToken; /* index of next token to be returned */ |
| 9141 char *zToken; /* storage for current token */ |
| 9142 int nAllocated; /* space allocated to zToken buffer */ |
| 9143 } porter_tokenizer_cursor; |
| 9144 |
| 9145 |
| 9146 /* |
| 9147 ** Create a new tokenizer instance. |
| 9148 */ |
| 9149 static int porterCreate( |
| 9150 int argc, const char * const *argv, |
| 9151 sqlite3_tokenizer **ppTokenizer |
| 9152 ){ |
| 9153 porter_tokenizer *t; |
| 9154 |
| 9155 UNUSED_PARAMETER(argc); |
| 9156 UNUSED_PARAMETER(argv); |
| 9157 |
| 9158 t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); |
| 9159 if( t==NULL ) return SQLITE_NOMEM; |
| 9160 memset(t, 0, sizeof(*t)); |
| 9161 *ppTokenizer = &t->base; |
| 9162 return SQLITE_OK; |
| 9163 } |
| 9164 |
| 9165 /* |
| 9166 ** Destroy a tokenizer |
| 9167 */ |
| 9168 static int porterDestroy(sqlite3_tokenizer *pTokenizer){ |
| 9169 sqlite3_free(pTokenizer); |
| 9170 return SQLITE_OK; |
| 9171 } |
| 9172 |
| 9173 /* |
| 9174 ** Prepare to begin tokenizing a particular string. The input |
| 9175 ** string to be tokenized is zInput[0..nInput-1]. A cursor |
| 9176 ** used to incrementally tokenize this string is returned in |
| 9177 ** *ppCursor. |
| 9178 */ |
| 9179 static int porterOpen( |
| 9180 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 9181 const char *zInput, int nInput, /* String to be tokenized */ |
| 9182 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 9183 ){ |
| 9184 porter_tokenizer_cursor *c; |
| 9185 |
| 9186 UNUSED_PARAMETER(pTokenizer); |
| 9187 |
| 9188 c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); |
| 9189 if( c==NULL ) return SQLITE_NOMEM; |
| 9190 |
| 9191 c->zInput = zInput; |
| 9192 if( zInput==0 ){ |
| 9193 c->nInput = 0; |
| 9194 }else if( nInput<0 ){ |
| 9195 c->nInput = (int)strlen(zInput); |
| 9196 }else{ |
| 9197 c->nInput = nInput; |
| 9198 } |
| 9199 c->iOffset = 0; /* start tokenizing at the beginning */ |
| 9200 c->iToken = 0; |
| 9201 c->zToken = NULL; /* no space allocated, yet. */ |
| 9202 c->nAllocated = 0; |
| 9203 |
| 9204 *ppCursor = &c->base; |
| 9205 return SQLITE_OK; |
| 9206 } |
| 9207 |
| 9208 /* |
| 9209 ** Close a tokenization cursor previously opened by a call to |
| 9210 ** porterOpen() above. |
| 9211 */ |
| 9212 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ |
| 9213 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
| 9214 sqlite3_free(c->zToken); |
| 9215 sqlite3_free(c); |
| 9216 return SQLITE_OK; |
| 9217 } |
| 9218 /* |
| 9219 ** Vowel or consonant |
| 9220 */ |
| 9221 static const char vOrCType[] = { |
| 9222 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, |
| 9223 1, 1, 1, 2, 1 |
| 9224 }; |
| 9225 |
| 9226 /* |
| 9227 ** isConsonant() and isVowel() determine if their first character in |
| 9228 ** the string they point to is a consonant or a vowel, according |
| 9229 ** to Porter ruls. |
| 9230 ** |
| 9231 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. |
| 9232 ** 'Y' is a consonant unless it follows another consonant, |
| 9233 ** in which case it is a vowel. |
| 9234 ** |
| 9235 ** In these routine, the letters are in reverse order. So the 'y' rule |
| 9236 ** is that 'y' is a consonant unless it is followed by another |
| 9237 ** consonent. |
| 9238 */ |
| 9239 static int isVowel(const char*); |
| 9240 static int isConsonant(const char *z){ |
| 9241 int j; |
| 9242 char x = *z; |
| 9243 if( x==0 ) return 0; |
| 9244 assert( x>='a' && x<='z' ); |
| 9245 j = vOrCType[x-'a']; |
| 9246 if( j<2 ) return j; |
| 9247 return z[1]==0 || isVowel(z + 1); |
| 9248 } |
| 9249 static int isVowel(const char *z){ |
| 9250 int j; |
| 9251 char x = *z; |
| 9252 if( x==0 ) return 0; |
| 9253 assert( x>='a' && x<='z' ); |
| 9254 j = vOrCType[x-'a']; |
| 9255 if( j<2 ) return 1-j; |
| 9256 return isConsonant(z + 1); |
| 9257 } |
| 9258 |
| 9259 /* |
| 9260 ** Let any sequence of one or more vowels be represented by V and let |
| 9261 ** C be sequence of one or more consonants. Then every word can be |
| 9262 ** represented as: |
| 9263 ** |
| 9264 ** [C] (VC){m} [V] |
| 9265 ** |
| 9266 ** In prose: A word is an optional consonant followed by zero or |
| 9267 ** vowel-consonant pairs followed by an optional vowel. "m" is the |
| 9268 ** number of vowel consonant pairs. This routine computes the value |
| 9269 ** of m for the first i bytes of a word. |
| 9270 ** |
| 9271 ** Return true if the m-value for z is 1 or more. In other words, |
| 9272 ** return true if z contains at least one vowel that is followed |
| 9273 ** by a consonant. |
| 9274 ** |
| 9275 ** In this routine z[] is in reverse order. So we are really looking |
| 9276 ** for an instance of a consonant followed by a vowel. |
| 9277 */ |
| 9278 static int m_gt_0(const char *z){ |
| 9279 while( isVowel(z) ){ z++; } |
| 9280 if( *z==0 ) return 0; |
| 9281 while( isConsonant(z) ){ z++; } |
| 9282 return *z!=0; |
| 9283 } |
| 9284 |
| 9285 /* Like mgt0 above except we are looking for a value of m which is |
| 9286 ** exactly 1 |
| 9287 */ |
| 9288 static int m_eq_1(const char *z){ |
| 9289 while( isVowel(z) ){ z++; } |
| 9290 if( *z==0 ) return 0; |
| 9291 while( isConsonant(z) ){ z++; } |
| 9292 if( *z==0 ) return 0; |
| 9293 while( isVowel(z) ){ z++; } |
| 9294 if( *z==0 ) return 1; |
| 9295 while( isConsonant(z) ){ z++; } |
| 9296 return *z==0; |
| 9297 } |
| 9298 |
| 9299 /* Like mgt0 above except we are looking for a value of m>1 instead |
| 9300 ** or m>0 |
| 9301 */ |
| 9302 static int m_gt_1(const char *z){ |
| 9303 while( isVowel(z) ){ z++; } |
| 9304 if( *z==0 ) return 0; |
| 9305 while( isConsonant(z) ){ z++; } |
| 9306 if( *z==0 ) return 0; |
| 9307 while( isVowel(z) ){ z++; } |
| 9308 if( *z==0 ) return 0; |
| 9309 while( isConsonant(z) ){ z++; } |
| 9310 return *z!=0; |
| 9311 } |
| 9312 |
| 9313 /* |
| 9314 ** Return TRUE if there is a vowel anywhere within z[0..n-1] |
| 9315 */ |
| 9316 static int hasVowel(const char *z){ |
| 9317 while( isConsonant(z) ){ z++; } |
| 9318 return *z!=0; |
| 9319 } |
| 9320 |
| 9321 /* |
| 9322 ** Return TRUE if the word ends in a double consonant. |
| 9323 ** |
| 9324 ** The text is reversed here. So we are really looking at |
| 9325 ** the first two characters of z[]. |
| 9326 */ |
| 9327 static int doubleConsonant(const char *z){ |
| 9328 return isConsonant(z) && z[0]==z[1]; |
| 9329 } |
| 9330 |
| 9331 /* |
| 9332 ** Return TRUE if the word ends with three letters which |
| 9333 ** are consonant-vowel-consonent and where the final consonant |
| 9334 ** is not 'w', 'x', or 'y'. |
| 9335 ** |
| 9336 ** The word is reversed here. So we are really checking the |
| 9337 ** first three letters and the first one cannot be in [wxy]. |
| 9338 */ |
| 9339 static int star_oh(const char *z){ |
| 9340 return |
| 9341 isConsonant(z) && |
| 9342 z[0]!='w' && z[0]!='x' && z[0]!='y' && |
| 9343 isVowel(z+1) && |
| 9344 isConsonant(z+2); |
| 9345 } |
| 9346 |
| 9347 /* |
| 9348 ** If the word ends with zFrom and xCond() is true for the stem |
| 9349 ** of the word that preceeds the zFrom ending, then change the |
| 9350 ** ending to zTo. |
| 9351 ** |
| 9352 ** The input word *pz and zFrom are both in reverse order. zTo |
| 9353 ** is in normal order. |
| 9354 ** |
| 9355 ** Return TRUE if zFrom matches. Return FALSE if zFrom does not |
| 9356 ** match. Not that TRUE is returned even if xCond() fails and |
| 9357 ** no substitution occurs. |
| 9358 */ |
| 9359 static int stem( |
| 9360 char **pz, /* The word being stemmed (Reversed) */ |
| 9361 const char *zFrom, /* If the ending matches this... (Reversed) */ |
| 9362 const char *zTo, /* ... change the ending to this (not reversed) */ |
| 9363 int (*xCond)(const char*) /* Condition that must be true */ |
| 9364 ){ |
| 9365 char *z = *pz; |
| 9366 while( *zFrom && *zFrom==*z ){ z++; zFrom++; } |
| 9367 if( *zFrom!=0 ) return 0; |
| 9368 if( xCond && !xCond(z) ) return 1; |
| 9369 while( *zTo ){ |
| 9370 *(--z) = *(zTo++); |
| 9371 } |
| 9372 *pz = z; |
| 9373 return 1; |
| 9374 } |
| 9375 |
| 9376 /* |
| 9377 ** This is the fallback stemmer used when the porter stemmer is |
| 9378 ** inappropriate. The input word is copied into the output with |
| 9379 ** US-ASCII case folding. If the input word is too long (more |
| 9380 ** than 20 bytes if it contains no digits or more than 6 bytes if |
| 9381 ** it contains digits) then word is truncated to 20 or 6 bytes |
| 9382 ** by taking 10 or 3 bytes from the beginning and end. |
| 9383 */ |
| 9384 static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ |
| 9385 int i, mx, j; |
| 9386 int hasDigit = 0; |
| 9387 for(i=0; i<nIn; i++){ |
| 9388 char c = zIn[i]; |
| 9389 if( c>='A' && c<='Z' ){ |
| 9390 zOut[i] = c - 'A' + 'a'; |
| 9391 }else{ |
| 9392 if( c>='0' && c<='9' ) hasDigit = 1; |
| 9393 zOut[i] = c; |
| 9394 } |
| 9395 } |
| 9396 mx = hasDigit ? 3 : 10; |
| 9397 if( nIn>mx*2 ){ |
| 9398 for(j=mx, i=nIn-mx; i<nIn; i++, j++){ |
| 9399 zOut[j] = zOut[i]; |
| 9400 } |
| 9401 i = j; |
| 9402 } |
| 9403 zOut[i] = 0; |
| 9404 *pnOut = i; |
| 9405 } |
| 9406 |
| 9407 |
| 9408 /* |
| 9409 ** Stem the input word zIn[0..nIn-1]. Store the output in zOut. |
| 9410 ** zOut is at least big enough to hold nIn bytes. Write the actual |
| 9411 ** size of the output word (exclusive of the '\0' terminator) into *pnOut. |
| 9412 ** |
| 9413 ** Any upper-case characters in the US-ASCII character set ([A-Z]) |
| 9414 ** are converted to lower case. Upper-case UTF characters are |
| 9415 ** unchanged. |
| 9416 ** |
| 9417 ** Words that are longer than about 20 bytes are stemmed by retaining |
| 9418 ** a few bytes from the beginning and the end of the word. If the |
| 9419 ** word contains digits, 3 bytes are taken from the beginning and |
| 9420 ** 3 bytes from the end. For long words without digits, 10 bytes |
| 9421 ** are taken from each end. US-ASCII case folding still applies. |
| 9422 ** |
| 9423 ** If the input word contains not digits but does characters not |
| 9424 ** in [a-zA-Z] then no stemming is attempted and this routine just |
| 9425 ** copies the input into the input into the output with US-ASCII |
| 9426 ** case folding. |
| 9427 ** |
| 9428 ** Stemming never increases the length of the word. So there is |
| 9429 ** no chance of overflowing the zOut buffer. |
| 9430 */ |
| 9431 static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ |
| 9432 int i, j; |
| 9433 char zReverse[28]; |
| 9434 char *z, *z2; |
| 9435 if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ |
| 9436 /* The word is too big or too small for the porter stemmer. |
| 9437 ** Fallback to the copy stemmer */ |
| 9438 copy_stemmer(zIn, nIn, zOut, pnOut); |
| 9439 return; |
| 9440 } |
| 9441 for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){ |
| 9442 char c = zIn[i]; |
| 9443 if( c>='A' && c<='Z' ){ |
| 9444 zReverse[j] = c + 'a' - 'A'; |
| 9445 }else if( c>='a' && c<='z' ){ |
| 9446 zReverse[j] = c; |
| 9447 }else{ |
| 9448 /* The use of a character not in [a-zA-Z] means that we fallback |
| 9449 ** to the copy stemmer */ |
| 9450 copy_stemmer(zIn, nIn, zOut, pnOut); |
| 9451 return; |
| 9452 } |
| 9453 } |
| 9454 memset(&zReverse[sizeof(zReverse)-5], 0, 5); |
| 9455 z = &zReverse[j+1]; |
| 9456 |
| 9457 |
| 9458 /* Step 1a */ |
| 9459 if( z[0]=='s' ){ |
| 9460 if( |
| 9461 !stem(&z, "sess", "ss", 0) && |
| 9462 !stem(&z, "sei", "i", 0) && |
| 9463 !stem(&z, "ss", "ss", 0) |
| 9464 ){ |
| 9465 z++; |
| 9466 } |
| 9467 } |
| 9468 |
| 9469 /* Step 1b */ |
| 9470 z2 = z; |
| 9471 if( stem(&z, "dee", "ee", m_gt_0) ){ |
| 9472 /* Do nothing. The work was all in the test */ |
| 9473 }else if( |
| 9474 (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) |
| 9475 && z!=z2 |
| 9476 ){ |
| 9477 if( stem(&z, "ta", "ate", 0) || |
| 9478 stem(&z, "lb", "ble", 0) || |
| 9479 stem(&z, "zi", "ize", 0) ){ |
| 9480 /* Do nothing. The work was all in the test */ |
| 9481 }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ |
| 9482 z++; |
| 9483 }else if( m_eq_1(z) && star_oh(z) ){ |
| 9484 *(--z) = 'e'; |
| 9485 } |
| 9486 } |
| 9487 |
| 9488 /* Step 1c */ |
| 9489 if( z[0]=='y' && hasVowel(z+1) ){ |
| 9490 z[0] = 'i'; |
| 9491 } |
| 9492 |
| 9493 /* Step 2 */ |
| 9494 switch( z[1] ){ |
| 9495 case 'a': |
| 9496 if( !stem(&z, "lanoita", "ate", m_gt_0) ){ |
| 9497 stem(&z, "lanoit", "tion", m_gt_0); |
| 9498 } |
| 9499 break; |
| 9500 case 'c': |
| 9501 if( !stem(&z, "icne", "ence", m_gt_0) ){ |
| 9502 stem(&z, "icna", "ance", m_gt_0); |
| 9503 } |
| 9504 break; |
| 9505 case 'e': |
| 9506 stem(&z, "rezi", "ize", m_gt_0); |
| 9507 break; |
| 9508 case 'g': |
| 9509 stem(&z, "igol", "log", m_gt_0); |
| 9510 break; |
| 9511 case 'l': |
| 9512 if( !stem(&z, "ilb", "ble", m_gt_0) |
| 9513 && !stem(&z, "illa", "al", m_gt_0) |
| 9514 && !stem(&z, "iltne", "ent", m_gt_0) |
| 9515 && !stem(&z, "ile", "e", m_gt_0) |
| 9516 ){ |
| 9517 stem(&z, "ilsuo", "ous", m_gt_0); |
| 9518 } |
| 9519 break; |
| 9520 case 'o': |
| 9521 if( !stem(&z, "noitazi", "ize", m_gt_0) |
| 9522 && !stem(&z, "noita", "ate", m_gt_0) |
| 9523 ){ |
| 9524 stem(&z, "rota", "ate", m_gt_0); |
| 9525 } |
| 9526 break; |
| 9527 case 's': |
| 9528 if( !stem(&z, "msila", "al", m_gt_0) |
| 9529 && !stem(&z, "ssenevi", "ive", m_gt_0) |
| 9530 && !stem(&z, "ssenluf", "ful", m_gt_0) |
| 9531 ){ |
| 9532 stem(&z, "ssensuo", "ous", m_gt_0); |
| 9533 } |
| 9534 break; |
| 9535 case 't': |
| 9536 if( !stem(&z, "itila", "al", m_gt_0) |
| 9537 && !stem(&z, "itivi", "ive", m_gt_0) |
| 9538 ){ |
| 9539 stem(&z, "itilib", "ble", m_gt_0); |
| 9540 } |
| 9541 break; |
| 9542 } |
| 9543 |
| 9544 /* Step 3 */ |
| 9545 switch( z[0] ){ |
| 9546 case 'e': |
| 9547 if( !stem(&z, "etaci", "ic", m_gt_0) |
| 9548 && !stem(&z, "evita", "", m_gt_0) |
| 9549 ){ |
| 9550 stem(&z, "ezila", "al", m_gt_0); |
| 9551 } |
| 9552 break; |
| 9553 case 'i': |
| 9554 stem(&z, "itici", "ic", m_gt_0); |
| 9555 break; |
| 9556 case 'l': |
| 9557 if( !stem(&z, "laci", "ic", m_gt_0) ){ |
| 9558 stem(&z, "luf", "", m_gt_0); |
| 9559 } |
| 9560 break; |
| 9561 case 's': |
| 9562 stem(&z, "ssen", "", m_gt_0); |
| 9563 break; |
| 9564 } |
| 9565 |
| 9566 /* Step 4 */ |
| 9567 switch( z[1] ){ |
| 9568 case 'a': |
| 9569 if( z[0]=='l' && m_gt_1(z+2) ){ |
| 9570 z += 2; |
| 9571 } |
| 9572 break; |
| 9573 case 'c': |
| 9574 if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ |
| 9575 z += 4; |
| 9576 } |
| 9577 break; |
| 9578 case 'e': |
| 9579 if( z[0]=='r' && m_gt_1(z+2) ){ |
| 9580 z += 2; |
| 9581 } |
| 9582 break; |
| 9583 case 'i': |
| 9584 if( z[0]=='c' && m_gt_1(z+2) ){ |
| 9585 z += 2; |
| 9586 } |
| 9587 break; |
| 9588 case 'l': |
| 9589 if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ |
| 9590 z += 4; |
| 9591 } |
| 9592 break; |
| 9593 case 'n': |
| 9594 if( z[0]=='t' ){ |
| 9595 if( z[2]=='a' ){ |
| 9596 if( m_gt_1(z+3) ){ |
| 9597 z += 3; |
| 9598 } |
| 9599 }else if( z[2]=='e' ){ |
| 9600 if( !stem(&z, "tneme", "", m_gt_1) |
| 9601 && !stem(&z, "tnem", "", m_gt_1) |
| 9602 ){ |
| 9603 stem(&z, "tne", "", m_gt_1); |
| 9604 } |
| 9605 } |
| 9606 } |
| 9607 break; |
| 9608 case 'o': |
| 9609 if( z[0]=='u' ){ |
| 9610 if( m_gt_1(z+2) ){ |
| 9611 z += 2; |
| 9612 } |
| 9613 }else if( z[3]=='s' || z[3]=='t' ){ |
| 9614 stem(&z, "noi", "", m_gt_1); |
| 9615 } |
| 9616 break; |
| 9617 case 's': |
| 9618 if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ |
| 9619 z += 3; |
| 9620 } |
| 9621 break; |
| 9622 case 't': |
| 9623 if( !stem(&z, "eta", "", m_gt_1) ){ |
| 9624 stem(&z, "iti", "", m_gt_1); |
| 9625 } |
| 9626 break; |
| 9627 case 'u': |
| 9628 if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ |
| 9629 z += 3; |
| 9630 } |
| 9631 break; |
| 9632 case 'v': |
| 9633 case 'z': |
| 9634 if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ |
| 9635 z += 3; |
| 9636 } |
| 9637 break; |
| 9638 } |
| 9639 |
| 9640 /* Step 5a */ |
| 9641 if( z[0]=='e' ){ |
| 9642 if( m_gt_1(z+1) ){ |
| 9643 z++; |
| 9644 }else if( m_eq_1(z+1) && !star_oh(z+1) ){ |
| 9645 z++; |
| 9646 } |
| 9647 } |
| 9648 |
| 9649 /* Step 5b */ |
| 9650 if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ |
| 9651 z++; |
| 9652 } |
| 9653 |
| 9654 /* z[] is now the stemmed word in reverse order. Flip it back |
| 9655 ** around into forward order and return. |
| 9656 */ |
| 9657 *pnOut = i = (int)strlen(z); |
| 9658 zOut[i] = 0; |
| 9659 while( *z ){ |
| 9660 zOut[--i] = *(z++); |
| 9661 } |
| 9662 } |
| 9663 |
| 9664 /* |
| 9665 ** Characters that can be part of a token. We assume any character |
| 9666 ** whose value is greater than 0x80 (any UTF character) can be |
| 9667 ** part of a token. In other words, delimiters all must have |
| 9668 ** values of 0x7f or lower. |
| 9669 */ |
| 9670 static const char porterIdChar[] = { |
| 9671 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 9672 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 9673 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 9674 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 9675 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 9676 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 9677 }; |
| 9678 #define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) |
| 9679 |
| 9680 /* |
| 9681 ** Extract the next token from a tokenization cursor. The cursor must |
| 9682 ** have been opened by a prior call to porterOpen(). |
| 9683 */ |
| 9684 static int porterNext( |
| 9685 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ |
| 9686 const char **pzToken, /* OUT: *pzToken is the token text */ |
| 9687 int *pnBytes, /* OUT: Number of bytes in token */ |
| 9688 int *piStartOffset, /* OUT: Starting offset of token */ |
| 9689 int *piEndOffset, /* OUT: Ending offset of token */ |
| 9690 int *piPosition /* OUT: Position integer of token */ |
| 9691 ){ |
| 9692 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
| 9693 const char *z = c->zInput; |
| 9694 |
| 9695 while( c->iOffset<c->nInput ){ |
| 9696 int iStartOffset, ch; |
| 9697 |
| 9698 /* Scan past delimiter characters */ |
| 9699 while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){ |
| 9700 c->iOffset++; |
| 9701 } |
| 9702 |
| 9703 /* Count non-delimiter characters. */ |
| 9704 iStartOffset = c->iOffset; |
| 9705 while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){ |
| 9706 c->iOffset++; |
| 9707 } |
| 9708 |
| 9709 if( c->iOffset>iStartOffset ){ |
| 9710 int n = c->iOffset-iStartOffset; |
| 9711 if( n>c->nAllocated ){ |
| 9712 char *pNew; |
| 9713 c->nAllocated = n+20; |
| 9714 pNew = sqlite3_realloc(c->zToken, c->nAllocated); |
| 9715 if( !pNew ) return SQLITE_NOMEM; |
| 9716 c->zToken = pNew; |
| 9717 } |
| 9718 porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); |
| 9719 *pzToken = c->zToken; |
| 9720 *piStartOffset = iStartOffset; |
| 9721 *piEndOffset = c->iOffset; |
| 9722 *piPosition = c->iToken++; |
| 9723 return SQLITE_OK; |
| 9724 } |
| 9725 } |
| 9726 return SQLITE_DONE; |
| 9727 } |
| 9728 |
| 9729 /* |
| 9730 ** The set of routines that implement the porter-stemmer tokenizer |
| 9731 */ |
| 9732 static const sqlite3_tokenizer_module porterTokenizerModule = { |
| 9733 0, |
| 9734 porterCreate, |
| 9735 porterDestroy, |
| 9736 porterOpen, |
| 9737 porterClose, |
| 9738 porterNext, |
| 9739 0 |
| 9740 }; |
| 9741 |
| 9742 /* |
| 9743 ** Allocate a new porter tokenizer. Return a pointer to the new |
| 9744 ** tokenizer in *ppModule |
| 9745 */ |
| 9746 SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( |
| 9747 sqlite3_tokenizer_module const**ppModule |
| 9748 ){ |
| 9749 *ppModule = &porterTokenizerModule; |
| 9750 } |
| 9751 |
| 9752 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 9753 |
| 9754 /************** End of fts3_porter.c *****************************************/ |
| 9755 /************** Begin file fts3_tokenizer.c **********************************/ |
| 9756 /* |
| 9757 ** 2007 June 22 |
| 9758 ** |
| 9759 ** The author disclaims copyright to this source code. In place of |
| 9760 ** a legal notice, here is a blessing: |
| 9761 ** |
| 9762 ** May you do good and not evil. |
| 9763 ** May you find forgiveness for yourself and forgive others. |
| 9764 ** May you share freely, never taking more than you give. |
| 9765 ** |
| 9766 ****************************************************************************** |
| 9767 ** |
| 9768 ** This is part of an SQLite module implementing full-text search. |
| 9769 ** This particular file implements the generic tokenizer interface. |
| 9770 */ |
| 9771 |
| 9772 /* |
| 9773 ** The code in this file is only compiled if: |
| 9774 ** |
| 9775 ** * The FTS3 module is being built as an extension |
| 9776 ** (in which case SQLITE_CORE is not defined), or |
| 9777 ** |
| 9778 ** * The FTS3 module is being built into the core of |
| 9779 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 9780 */ |
| 9781 /* #include "fts3Int.h" */ |
| 9782 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 9783 |
| 9784 /* #include <assert.h> */ |
| 9785 /* #include <string.h> */ |
| 9786 |
| 9787 /* |
| 9788 ** Implementation of the SQL scalar function for accessing the underlying |
| 9789 ** hash table. This function may be called as follows: |
| 9790 ** |
| 9791 ** SELECT <function-name>(<key-name>); |
| 9792 ** SELECT <function-name>(<key-name>, <pointer>); |
| 9793 ** |
| 9794 ** where <function-name> is the name passed as the second argument |
| 9795 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). |
| 9796 ** |
| 9797 ** If the <pointer> argument is specified, it must be a blob value |
| 9798 ** containing a pointer to be stored as the hash data corresponding |
| 9799 ** to the string <key-name>. If <pointer> is not specified, then |
| 9800 ** the string <key-name> must already exist in the has table. Otherwise, |
| 9801 ** an error is returned. |
| 9802 ** |
| 9803 ** Whether or not the <pointer> argument is specified, the value returned |
| 9804 ** is a blob containing the pointer stored as the hash data corresponding |
| 9805 ** to string <key-name> (after the hash-table is updated, if applicable). |
| 9806 */ |
| 9807 static void scalarFunc( |
| 9808 sqlite3_context *context, |
| 9809 int argc, |
| 9810 sqlite3_value **argv |
| 9811 ){ |
| 9812 Fts3Hash *pHash; |
| 9813 void *pPtr = 0; |
| 9814 const unsigned char *zName; |
| 9815 int nName; |
| 9816 |
| 9817 assert( argc==1 || argc==2 ); |
| 9818 |
| 9819 pHash = (Fts3Hash *)sqlite3_user_data(context); |
| 9820 |
| 9821 zName = sqlite3_value_text(argv[0]); |
| 9822 nName = sqlite3_value_bytes(argv[0])+1; |
| 9823 |
| 9824 if( argc==2 ){ |
| 9825 void *pOld; |
| 9826 int n = sqlite3_value_bytes(argv[1]); |
| 9827 if( zName==0 || n!=sizeof(pPtr) ){ |
| 9828 sqlite3_result_error(context, "argument type mismatch", -1); |
| 9829 return; |
| 9830 } |
| 9831 pPtr = *(void **)sqlite3_value_blob(argv[1]); |
| 9832 pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); |
| 9833 if( pOld==pPtr ){ |
| 9834 sqlite3_result_error(context, "out of memory", -1); |
| 9835 return; |
| 9836 } |
| 9837 }else{ |
| 9838 if( zName ){ |
| 9839 pPtr = sqlite3Fts3HashFind(pHash, zName, nName); |
| 9840 } |
| 9841 if( !pPtr ){ |
| 9842 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); |
| 9843 sqlite3_result_error(context, zErr, -1); |
| 9844 sqlite3_free(zErr); |
| 9845 return; |
| 9846 } |
| 9847 } |
| 9848 |
| 9849 sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); |
| 9850 } |
| 9851 |
| 9852 SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ |
| 9853 static const char isFtsIdChar[] = { |
| 9854 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 9855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 9856 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 9857 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 9858 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 9859 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 9860 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 9861 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 9862 }; |
| 9863 return (c&0x80 || isFtsIdChar[(int)(c)]); |
| 9864 } |
| 9865 |
| 9866 SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ |
| 9867 const char *z1; |
| 9868 const char *z2 = 0; |
| 9869 |
| 9870 /* Find the start of the next token. */ |
| 9871 z1 = zStr; |
| 9872 while( z2==0 ){ |
| 9873 char c = *z1; |
| 9874 switch( c ){ |
| 9875 case '\0': return 0; /* No more tokens here */ |
| 9876 case '\'': |
| 9877 case '"': |
| 9878 case '`': { |
| 9879 z2 = z1; |
| 9880 while( *++z2 && (*z2!=c || *++z2==c) ); |
| 9881 break; |
| 9882 } |
| 9883 case '[': |
| 9884 z2 = &z1[1]; |
| 9885 while( *z2 && z2[0]!=']' ) z2++; |
| 9886 if( *z2 ) z2++; |
| 9887 break; |
| 9888 |
| 9889 default: |
| 9890 if( sqlite3Fts3IsIdChar(*z1) ){ |
| 9891 z2 = &z1[1]; |
| 9892 while( sqlite3Fts3IsIdChar(*z2) ) z2++; |
| 9893 }else{ |
| 9894 z1++; |
| 9895 } |
| 9896 } |
| 9897 } |
| 9898 |
| 9899 *pn = (int)(z2-z1); |
| 9900 return z1; |
| 9901 } |
| 9902 |
| 9903 SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( |
| 9904 Fts3Hash *pHash, /* Tokenizer hash table */ |
| 9905 const char *zArg, /* Tokenizer name */ |
| 9906 sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ |
| 9907 char **pzErr /* OUT: Set to malloced error message */ |
| 9908 ){ |
| 9909 int rc; |
| 9910 char *z = (char *)zArg; |
| 9911 int n = 0; |
| 9912 char *zCopy; |
| 9913 char *zEnd; /* Pointer to nul-term of zCopy */ |
| 9914 sqlite3_tokenizer_module *m; |
| 9915 |
| 9916 zCopy = sqlite3_mprintf("%s", zArg); |
| 9917 if( !zCopy ) return SQLITE_NOMEM; |
| 9918 zEnd = &zCopy[strlen(zCopy)]; |
| 9919 |
| 9920 z = (char *)sqlite3Fts3NextToken(zCopy, &n); |
| 9921 if( z==0 ){ |
| 9922 assert( n==0 ); |
| 9923 z = zCopy; |
| 9924 } |
| 9925 z[n] = '\0'; |
| 9926 sqlite3Fts3Dequote(z); |
| 9927 |
| 9928 m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); |
| 9929 if( !m ){ |
| 9930 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); |
| 9931 rc = SQLITE_ERROR; |
| 9932 }else{ |
| 9933 char const **aArg = 0; |
| 9934 int iArg = 0; |
| 9935 z = &z[n+1]; |
| 9936 while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ |
| 9937 int nNew = sizeof(char *)*(iArg+1); |
| 9938 char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); |
| 9939 if( !aNew ){ |
| 9940 sqlite3_free(zCopy); |
| 9941 sqlite3_free((void *)aArg); |
| 9942 return SQLITE_NOMEM; |
| 9943 } |
| 9944 aArg = aNew; |
| 9945 aArg[iArg++] = z; |
| 9946 z[n] = '\0'; |
| 9947 sqlite3Fts3Dequote(z); |
| 9948 z = &z[n+1]; |
| 9949 } |
| 9950 rc = m->xCreate(iArg, aArg, ppTok); |
| 9951 assert( rc!=SQLITE_OK || *ppTok ); |
| 9952 if( rc!=SQLITE_OK ){ |
| 9953 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); |
| 9954 }else{ |
| 9955 (*ppTok)->pModule = m; |
| 9956 } |
| 9957 sqlite3_free((void *)aArg); |
| 9958 } |
| 9959 |
| 9960 sqlite3_free(zCopy); |
| 9961 return rc; |
| 9962 } |
| 9963 |
| 9964 |
| 9965 #ifdef SQLITE_TEST |
| 9966 |
| 9967 #include <tcl.h> |
| 9968 /* #include <string.h> */ |
| 9969 |
| 9970 /* |
| 9971 ** Implementation of a special SQL scalar function for testing tokenizers |
| 9972 ** designed to be used in concert with the Tcl testing framework. This |
| 9973 ** function must be called with two or more arguments: |
| 9974 ** |
| 9975 ** SELECT <function-name>(<key-name>, ..., <input-string>); |
| 9976 ** |
| 9977 ** where <function-name> is the name passed as the second argument |
| 9978 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') |
| 9979 ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). |
| 9980 ** |
| 9981 ** The return value is a string that may be interpreted as a Tcl |
| 9982 ** list. For each token in the <input-string>, three elements are |
| 9983 ** added to the returned list. The first is the token position, the |
| 9984 ** second is the token text (folded, stemmed, etc.) and the third is the |
| 9985 ** substring of <input-string> associated with the token. For example, |
| 9986 ** using the built-in "simple" tokenizer: |
| 9987 ** |
| 9988 ** SELECT fts_tokenizer_test('simple', 'I don't see how'); |
| 9989 ** |
| 9990 ** will return the string: |
| 9991 ** |
| 9992 ** "{0 i I 1 dont don't 2 see see 3 how how}" |
| 9993 ** |
| 9994 */ |
| 9995 static void testFunc( |
| 9996 sqlite3_context *context, |
| 9997 int argc, |
| 9998 sqlite3_value **argv |
| 9999 ){ |
| 10000 Fts3Hash *pHash; |
| 10001 sqlite3_tokenizer_module *p; |
| 10002 sqlite3_tokenizer *pTokenizer = 0; |
| 10003 sqlite3_tokenizer_cursor *pCsr = 0; |
| 10004 |
| 10005 const char *zErr = 0; |
| 10006 |
| 10007 const char *zName; |
| 10008 int nName; |
| 10009 const char *zInput; |
| 10010 int nInput; |
| 10011 |
| 10012 const char *azArg[64]; |
| 10013 |
| 10014 const char *zToken; |
| 10015 int nToken = 0; |
| 10016 int iStart = 0; |
| 10017 int iEnd = 0; |
| 10018 int iPos = 0; |
| 10019 int i; |
| 10020 |
| 10021 Tcl_Obj *pRet; |
| 10022 |
| 10023 if( argc<2 ){ |
| 10024 sqlite3_result_error(context, "insufficient arguments", -1); |
| 10025 return; |
| 10026 } |
| 10027 |
| 10028 nName = sqlite3_value_bytes(argv[0]); |
| 10029 zName = (const char *)sqlite3_value_text(argv[0]); |
| 10030 nInput = sqlite3_value_bytes(argv[argc-1]); |
| 10031 zInput = (const char *)sqlite3_value_text(argv[argc-1]); |
| 10032 |
| 10033 pHash = (Fts3Hash *)sqlite3_user_data(context); |
| 10034 p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); |
| 10035 |
| 10036 if( !p ){ |
| 10037 char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); |
| 10038 sqlite3_result_error(context, zErr2, -1); |
| 10039 sqlite3_free(zErr2); |
| 10040 return; |
| 10041 } |
| 10042 |
| 10043 pRet = Tcl_NewObj(); |
| 10044 Tcl_IncrRefCount(pRet); |
| 10045 |
| 10046 for(i=1; i<argc-1; i++){ |
| 10047 azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); |
| 10048 } |
| 10049 |
| 10050 if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ |
| 10051 zErr = "error in xCreate()"; |
| 10052 goto finish; |
| 10053 } |
| 10054 pTokenizer->pModule = p; |
| 10055 if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ |
| 10056 zErr = "error in xOpen()"; |
| 10057 goto finish; |
| 10058 } |
| 10059 |
| 10060 while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ |
| 10061 Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); |
| 10062 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |
| 10063 zToken = &zInput[iStart]; |
| 10064 nToken = iEnd-iStart; |
| 10065 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |
| 10066 } |
| 10067 |
| 10068 if( SQLITE_OK!=p->xClose(pCsr) ){ |
| 10069 zErr = "error in xClose()"; |
| 10070 goto finish; |
| 10071 } |
| 10072 if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ |
| 10073 zErr = "error in xDestroy()"; |
| 10074 goto finish; |
| 10075 } |
| 10076 |
| 10077 finish: |
| 10078 if( zErr ){ |
| 10079 sqlite3_result_error(context, zErr, -1); |
| 10080 }else{ |
| 10081 sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); |
| 10082 } |
| 10083 Tcl_DecrRefCount(pRet); |
| 10084 } |
| 10085 |
| 10086 static |
| 10087 int registerTokenizer( |
| 10088 sqlite3 *db, |
| 10089 char *zName, |
| 10090 const sqlite3_tokenizer_module *p |
| 10091 ){ |
| 10092 int rc; |
| 10093 sqlite3_stmt *pStmt; |
| 10094 const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; |
| 10095 |
| 10096 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 10097 if( rc!=SQLITE_OK ){ |
| 10098 return rc; |
| 10099 } |
| 10100 |
| 10101 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 10102 sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); |
| 10103 sqlite3_step(pStmt); |
| 10104 |
| 10105 return sqlite3_finalize(pStmt); |
| 10106 } |
| 10107 |
| 10108 static |
| 10109 int queryTokenizer( |
| 10110 sqlite3 *db, |
| 10111 char *zName, |
| 10112 const sqlite3_tokenizer_module **pp |
| 10113 ){ |
| 10114 int rc; |
| 10115 sqlite3_stmt *pStmt; |
| 10116 const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| 10117 |
| 10118 *pp = 0; |
| 10119 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 10120 if( rc!=SQLITE_OK ){ |
| 10121 return rc; |
| 10122 } |
| 10123 |
| 10124 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 10125 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 10126 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| 10127 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| 10128 } |
| 10129 } |
| 10130 |
| 10131 return sqlite3_finalize(pStmt); |
| 10132 } |
| 10133 |
| 10134 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 10135 |
| 10136 /* |
| 10137 ** Implementation of the scalar function fts3_tokenizer_internal_test(). |
| 10138 ** This function is used for testing only, it is not included in the |
| 10139 ** build unless SQLITE_TEST is defined. |
| 10140 ** |
| 10141 ** The purpose of this is to test that the fts3_tokenizer() function |
| 10142 ** can be used as designed by the C-code in the queryTokenizer and |
| 10143 ** registerTokenizer() functions above. These two functions are repeated |
| 10144 ** in the README.tokenizer file as an example, so it is important to |
| 10145 ** test them. |
| 10146 ** |
| 10147 ** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar |
| 10148 ** function with no arguments. An assert() will fail if a problem is |
| 10149 ** detected. i.e.: |
| 10150 ** |
| 10151 ** SELECT fts3_tokenizer_internal_test(); |
| 10152 ** |
| 10153 */ |
| 10154 static void intTestFunc( |
| 10155 sqlite3_context *context, |
| 10156 int argc, |
| 10157 sqlite3_value **argv |
| 10158 ){ |
| 10159 int rc; |
| 10160 const sqlite3_tokenizer_module *p1; |
| 10161 const sqlite3_tokenizer_module *p2; |
| 10162 sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); |
| 10163 |
| 10164 UNUSED_PARAMETER(argc); |
| 10165 UNUSED_PARAMETER(argv); |
| 10166 |
| 10167 /* Test the query function */ |
| 10168 sqlite3Fts3SimpleTokenizerModule(&p1); |
| 10169 rc = queryTokenizer(db, "simple", &p2); |
| 10170 assert( rc==SQLITE_OK ); |
| 10171 assert( p1==p2 ); |
| 10172 rc = queryTokenizer(db, "nosuchtokenizer", &p2); |
| 10173 assert( rc==SQLITE_ERROR ); |
| 10174 assert( p2==0 ); |
| 10175 assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); |
| 10176 |
| 10177 /* Test the storage function */ |
| 10178 rc = registerTokenizer(db, "nosuchtokenizer", p1); |
| 10179 assert( rc==SQLITE_OK ); |
| 10180 rc = queryTokenizer(db, "nosuchtokenizer", &p2); |
| 10181 assert( rc==SQLITE_OK ); |
| 10182 assert( p2==p1 ); |
| 10183 |
| 10184 sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); |
| 10185 } |
| 10186 |
| 10187 #endif |
| 10188 |
| 10189 /* |
| 10190 ** Set up SQL objects in database db used to access the contents of |
| 10191 ** the hash table pointed to by argument pHash. The hash table must |
| 10192 ** been initialized to use string keys, and to take a private copy |
| 10193 ** of the key when a value is inserted. i.e. by a call similar to: |
| 10194 ** |
| 10195 ** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |
| 10196 ** |
| 10197 ** This function adds a scalar function (see header comment above |
| 10198 ** scalarFunc() in this file for details) and, if ENABLE_TABLE is |
| 10199 ** defined at compilation time, a temporary virtual table (see header |
| 10200 ** comment above struct HashTableVtab) to the database schema. Both |
| 10201 ** provide read/write access to the contents of *pHash. |
| 10202 ** |
| 10203 ** The third argument to this function, zName, is used as the name |
| 10204 ** of both the scalar and, if created, the virtual table. |
| 10205 */ |
| 10206 SQLITE_PRIVATE int sqlite3Fts3InitHashTable( |
| 10207 sqlite3 *db, |
| 10208 Fts3Hash *pHash, |
| 10209 const char *zName |
| 10210 ){ |
| 10211 int rc = SQLITE_OK; |
| 10212 void *p = (void *)pHash; |
| 10213 const int any = SQLITE_ANY; |
| 10214 |
| 10215 #ifdef SQLITE_TEST |
| 10216 char *zTest = 0; |
| 10217 char *zTest2 = 0; |
| 10218 void *pdb = (void *)db; |
| 10219 zTest = sqlite3_mprintf("%s_test", zName); |
| 10220 zTest2 = sqlite3_mprintf("%s_internal_test", zName); |
| 10221 if( !zTest || !zTest2 ){ |
| 10222 rc = SQLITE_NOMEM; |
| 10223 } |
| 10224 #endif |
| 10225 |
| 10226 if( SQLITE_OK==rc ){ |
| 10227 rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0); |
| 10228 } |
| 10229 if( SQLITE_OK==rc ){ |
| 10230 rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); |
| 10231 } |
| 10232 #ifdef SQLITE_TEST |
| 10233 if( SQLITE_OK==rc ){ |
| 10234 rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); |
| 10235 } |
| 10236 if( SQLITE_OK==rc ){ |
| 10237 rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); |
| 10238 } |
| 10239 #endif |
| 10240 |
| 10241 #ifdef SQLITE_TEST |
| 10242 sqlite3_free(zTest); |
| 10243 sqlite3_free(zTest2); |
| 10244 #endif |
| 10245 |
| 10246 return rc; |
| 10247 } |
| 10248 |
| 10249 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10250 |
| 10251 /************** End of fts3_tokenizer.c **************************************/ |
| 10252 /************** Begin file fts3_tokenizer1.c *********************************/ |
| 10253 /* |
| 10254 ** 2006 Oct 10 |
| 10255 ** |
| 10256 ** The author disclaims copyright to this source code. In place of |
| 10257 ** a legal notice, here is a blessing: |
| 10258 ** |
| 10259 ** May you do good and not evil. |
| 10260 ** May you find forgiveness for yourself and forgive others. |
| 10261 ** May you share freely, never taking more than you give. |
| 10262 ** |
| 10263 ****************************************************************************** |
| 10264 ** |
| 10265 ** Implementation of the "simple" full-text-search tokenizer. |
| 10266 */ |
| 10267 |
| 10268 /* |
| 10269 ** The code in this file is only compiled if: |
| 10270 ** |
| 10271 ** * The FTS3 module is being built as an extension |
| 10272 ** (in which case SQLITE_CORE is not defined), or |
| 10273 ** |
| 10274 ** * The FTS3 module is being built into the core of |
| 10275 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 10276 */ |
| 10277 /* #include "fts3Int.h" */ |
| 10278 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 10279 |
| 10280 /* #include <assert.h> */ |
| 10281 /* #include <stdlib.h> */ |
| 10282 /* #include <stdio.h> */ |
| 10283 /* #include <string.h> */ |
| 10284 |
| 10285 /* #include "fts3_tokenizer.h" */ |
| 10286 |
| 10287 typedef struct simple_tokenizer { |
| 10288 sqlite3_tokenizer base; |
| 10289 char delim[128]; /* flag ASCII delimiters */ |
| 10290 } simple_tokenizer; |
| 10291 |
| 10292 typedef struct simple_tokenizer_cursor { |
| 10293 sqlite3_tokenizer_cursor base; |
| 10294 const char *pInput; /* input we are tokenizing */ |
| 10295 int nBytes; /* size of the input */ |
| 10296 int iOffset; /* current position in pInput */ |
| 10297 int iToken; /* index of next token to be returned */ |
| 10298 char *pToken; /* storage for current token */ |
| 10299 int nTokenAllocated; /* space allocated to zToken buffer */ |
| 10300 } simple_tokenizer_cursor; |
| 10301 |
| 10302 |
| 10303 static int simpleDelim(simple_tokenizer *t, unsigned char c){ |
| 10304 return c<0x80 && t->delim[c]; |
| 10305 } |
| 10306 static int fts3_isalnum(int x){ |
| 10307 return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); |
| 10308 } |
| 10309 |
| 10310 /* |
| 10311 ** Create a new tokenizer instance. |
| 10312 */ |
| 10313 static int simpleCreate( |
| 10314 int argc, const char * const *argv, |
| 10315 sqlite3_tokenizer **ppTokenizer |
| 10316 ){ |
| 10317 simple_tokenizer *t; |
| 10318 |
| 10319 t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); |
| 10320 if( t==NULL ) return SQLITE_NOMEM; |
| 10321 memset(t, 0, sizeof(*t)); |
| 10322 |
| 10323 /* TODO(shess) Delimiters need to remain the same from run to run, |
| 10324 ** else we need to reindex. One solution would be a meta-table to |
| 10325 ** track such information in the database, then we'd only want this |
| 10326 ** information on the initial create. |
| 10327 */ |
| 10328 if( argc>1 ){ |
| 10329 int i, n = (int)strlen(argv[1]); |
| 10330 for(i=0; i<n; i++){ |
| 10331 unsigned char ch = argv[1][i]; |
| 10332 /* We explicitly don't support UTF-8 delimiters for now. */ |
| 10333 if( ch>=0x80 ){ |
| 10334 sqlite3_free(t); |
| 10335 return SQLITE_ERROR; |
| 10336 } |
| 10337 t->delim[ch] = 1; |
| 10338 } |
| 10339 } else { |
| 10340 /* Mark non-alphanumeric ASCII characters as delimiters */ |
| 10341 int i; |
| 10342 for(i=1; i<0x80; i++){ |
| 10343 t->delim[i] = !fts3_isalnum(i) ? -1 : 0; |
| 10344 } |
| 10345 } |
| 10346 |
| 10347 *ppTokenizer = &t->base; |
| 10348 return SQLITE_OK; |
| 10349 } |
| 10350 |
| 10351 /* |
| 10352 ** Destroy a tokenizer |
| 10353 */ |
| 10354 static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ |
| 10355 sqlite3_free(pTokenizer); |
| 10356 return SQLITE_OK; |
| 10357 } |
| 10358 |
| 10359 /* |
| 10360 ** Prepare to begin tokenizing a particular string. The input |
| 10361 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 10362 ** used to incrementally tokenize this string is returned in |
| 10363 ** *ppCursor. |
| 10364 */ |
| 10365 static int simpleOpen( |
| 10366 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 10367 const char *pInput, int nBytes, /* String to be tokenized */ |
| 10368 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 10369 ){ |
| 10370 simple_tokenizer_cursor *c; |
| 10371 |
| 10372 UNUSED_PARAMETER(pTokenizer); |
| 10373 |
| 10374 c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); |
| 10375 if( c==NULL ) return SQLITE_NOMEM; |
| 10376 |
| 10377 c->pInput = pInput; |
| 10378 if( pInput==0 ){ |
| 10379 c->nBytes = 0; |
| 10380 }else if( nBytes<0 ){ |
| 10381 c->nBytes = (int)strlen(pInput); |
| 10382 }else{ |
| 10383 c->nBytes = nBytes; |
| 10384 } |
| 10385 c->iOffset = 0; /* start tokenizing at the beginning */ |
| 10386 c->iToken = 0; |
| 10387 c->pToken = NULL; /* no space allocated, yet. */ |
| 10388 c->nTokenAllocated = 0; |
| 10389 |
| 10390 *ppCursor = &c->base; |
| 10391 return SQLITE_OK; |
| 10392 } |
| 10393 |
| 10394 /* |
| 10395 ** Close a tokenization cursor previously opened by a call to |
| 10396 ** simpleOpen() above. |
| 10397 */ |
| 10398 static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ |
| 10399 simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
| 10400 sqlite3_free(c->pToken); |
| 10401 sqlite3_free(c); |
| 10402 return SQLITE_OK; |
| 10403 } |
| 10404 |
| 10405 /* |
| 10406 ** Extract the next token from a tokenization cursor. The cursor must |
| 10407 ** have been opened by a prior call to simpleOpen(). |
| 10408 */ |
| 10409 static int simpleNext( |
| 10410 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
| 10411 const char **ppToken, /* OUT: *ppToken is the token text */ |
| 10412 int *pnBytes, /* OUT: Number of bytes in token */ |
| 10413 int *piStartOffset, /* OUT: Starting offset of token */ |
| 10414 int *piEndOffset, /* OUT: Ending offset of token */ |
| 10415 int *piPosition /* OUT: Position integer of token */ |
| 10416 ){ |
| 10417 simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
| 10418 simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; |
| 10419 unsigned char *p = (unsigned char *)c->pInput; |
| 10420 |
| 10421 while( c->iOffset<c->nBytes ){ |
| 10422 int iStartOffset; |
| 10423 |
| 10424 /* Scan past delimiter characters */ |
| 10425 while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ |
| 10426 c->iOffset++; |
| 10427 } |
| 10428 |
| 10429 /* Count non-delimiter characters. */ |
| 10430 iStartOffset = c->iOffset; |
| 10431 while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ |
| 10432 c->iOffset++; |
| 10433 } |
| 10434 |
| 10435 if( c->iOffset>iStartOffset ){ |
| 10436 int i, n = c->iOffset-iStartOffset; |
| 10437 if( n>c->nTokenAllocated ){ |
| 10438 char *pNew; |
| 10439 c->nTokenAllocated = n+20; |
| 10440 pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); |
| 10441 if( !pNew ) return SQLITE_NOMEM; |
| 10442 c->pToken = pNew; |
| 10443 } |
| 10444 for(i=0; i<n; i++){ |
| 10445 /* TODO(shess) This needs expansion to handle UTF-8 |
| 10446 ** case-insensitivity. |
| 10447 */ |
| 10448 unsigned char ch = p[iStartOffset+i]; |
| 10449 c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); |
| 10450 } |
| 10451 *ppToken = c->pToken; |
| 10452 *pnBytes = n; |
| 10453 *piStartOffset = iStartOffset; |
| 10454 *piEndOffset = c->iOffset; |
| 10455 *piPosition = c->iToken++; |
| 10456 |
| 10457 return SQLITE_OK; |
| 10458 } |
| 10459 } |
| 10460 return SQLITE_DONE; |
| 10461 } |
| 10462 |
| 10463 /* |
| 10464 ** The set of routines that implement the simple tokenizer |
| 10465 */ |
| 10466 static const sqlite3_tokenizer_module simpleTokenizerModule = { |
| 10467 0, |
| 10468 simpleCreate, |
| 10469 simpleDestroy, |
| 10470 simpleOpen, |
| 10471 simpleClose, |
| 10472 simpleNext, |
| 10473 0, |
| 10474 }; |
| 10475 |
| 10476 /* |
| 10477 ** Allocate a new simple tokenizer. Return a pointer to the new |
| 10478 ** tokenizer in *ppModule |
| 10479 */ |
| 10480 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( |
| 10481 sqlite3_tokenizer_module const**ppModule |
| 10482 ){ |
| 10483 *ppModule = &simpleTokenizerModule; |
| 10484 } |
| 10485 |
| 10486 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10487 |
| 10488 /************** End of fts3_tokenizer1.c *************************************/ |
| 10489 /************** Begin file fts3_tokenize_vtab.c ******************************/ |
| 10490 /* |
| 10491 ** 2013 Apr 22 |
| 10492 ** |
| 10493 ** The author disclaims copyright to this source code. In place of |
| 10494 ** a legal notice, here is a blessing: |
| 10495 ** |
| 10496 ** May you do good and not evil. |
| 10497 ** May you find forgiveness for yourself and forgive others. |
| 10498 ** May you share freely, never taking more than you give. |
| 10499 ** |
| 10500 ****************************************************************************** |
| 10501 ** |
| 10502 ** This file contains code for the "fts3tokenize" virtual table module. |
| 10503 ** An fts3tokenize virtual table is created as follows: |
| 10504 ** |
| 10505 ** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize( |
| 10506 ** <tokenizer-name>, <arg-1>, ... |
| 10507 ** ); |
| 10508 ** |
| 10509 ** The table created has the following schema: |
| 10510 ** |
| 10511 ** CREATE TABLE <tbl>(input, token, start, end, position) |
| 10512 ** |
| 10513 ** When queried, the query must include a WHERE clause of type: |
| 10514 ** |
| 10515 ** input = <string> |
| 10516 ** |
| 10517 ** The virtual table module tokenizes this <string>, using the FTS3 |
| 10518 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE |
| 10519 ** statement and returns one row for each token in the result. With |
| 10520 ** fields set as follows: |
| 10521 ** |
| 10522 ** input: Always set to a copy of <string> |
| 10523 ** token: A token from the input. |
| 10524 ** start: Byte offset of the token within the input <string>. |
| 10525 ** end: Byte offset of the byte immediately following the end of the |
| 10526 ** token within the input string. |
| 10527 ** pos: Token offset of token within input. |
| 10528 ** |
| 10529 */ |
| 10530 /* #include "fts3Int.h" */ |
| 10531 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 10532 |
| 10533 /* #include <string.h> */ |
| 10534 /* #include <assert.h> */ |
| 10535 |
| 10536 typedef struct Fts3tokTable Fts3tokTable; |
| 10537 typedef struct Fts3tokCursor Fts3tokCursor; |
| 10538 |
| 10539 /* |
| 10540 ** Virtual table structure. |
| 10541 */ |
| 10542 struct Fts3tokTable { |
| 10543 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 10544 const sqlite3_tokenizer_module *pMod; |
| 10545 sqlite3_tokenizer *pTok; |
| 10546 }; |
| 10547 |
| 10548 /* |
| 10549 ** Virtual table cursor structure. |
| 10550 */ |
| 10551 struct Fts3tokCursor { |
| 10552 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 10553 char *zInput; /* Input string */ |
| 10554 sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ |
| 10555 int iRowid; /* Current 'rowid' value */ |
| 10556 const char *zToken; /* Current 'token' value */ |
| 10557 int nToken; /* Size of zToken in bytes */ |
| 10558 int iStart; /* Current 'start' value */ |
| 10559 int iEnd; /* Current 'end' value */ |
| 10560 int iPos; /* Current 'pos' value */ |
| 10561 }; |
| 10562 |
| 10563 /* |
| 10564 ** Query FTS for the tokenizer implementation named zName. |
| 10565 */ |
| 10566 static int fts3tokQueryTokenizer( |
| 10567 Fts3Hash *pHash, |
| 10568 const char *zName, |
| 10569 const sqlite3_tokenizer_module **pp, |
| 10570 char **pzErr |
| 10571 ){ |
| 10572 sqlite3_tokenizer_module *p; |
| 10573 int nName = (int)strlen(zName); |
| 10574 |
| 10575 p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); |
| 10576 if( !p ){ |
| 10577 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); |
| 10578 return SQLITE_ERROR; |
| 10579 } |
| 10580 |
| 10581 *pp = p; |
| 10582 return SQLITE_OK; |
| 10583 } |
| 10584 |
| 10585 /* |
| 10586 ** The second argument, argv[], is an array of pointers to nul-terminated |
| 10587 ** strings. This function makes a copy of the array and strings into a |
| 10588 ** single block of memory. It then dequotes any of the strings that appear |
| 10589 ** to be quoted. |
| 10590 ** |
| 10591 ** If successful, output parameter *pazDequote is set to point at the |
| 10592 ** array of dequoted strings and SQLITE_OK is returned. The caller is |
| 10593 ** responsible for eventually calling sqlite3_free() to free the array |
| 10594 ** in this case. Or, if an error occurs, an SQLite error code is returned. |
| 10595 ** The final value of *pazDequote is undefined in this case. |
| 10596 */ |
| 10597 static int fts3tokDequoteArray( |
| 10598 int argc, /* Number of elements in argv[] */ |
| 10599 const char * const *argv, /* Input array */ |
| 10600 char ***pazDequote /* Output array */ |
| 10601 ){ |
| 10602 int rc = SQLITE_OK; /* Return code */ |
| 10603 if( argc==0 ){ |
| 10604 *pazDequote = 0; |
| 10605 }else{ |
| 10606 int i; |
| 10607 int nByte = 0; |
| 10608 char **azDequote; |
| 10609 |
| 10610 for(i=0; i<argc; i++){ |
| 10611 nByte += (int)(strlen(argv[i]) + 1); |
| 10612 } |
| 10613 |
| 10614 *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); |
| 10615 if( azDequote==0 ){ |
| 10616 rc = SQLITE_NOMEM; |
| 10617 }else{ |
| 10618 char *pSpace = (char *)&azDequote[argc]; |
| 10619 for(i=0; i<argc; i++){ |
| 10620 int n = (int)strlen(argv[i]); |
| 10621 azDequote[i] = pSpace; |
| 10622 memcpy(pSpace, argv[i], n+1); |
| 10623 sqlite3Fts3Dequote(pSpace); |
| 10624 pSpace += (n+1); |
| 10625 } |
| 10626 } |
| 10627 } |
| 10628 |
| 10629 return rc; |
| 10630 } |
| 10631 |
| 10632 /* |
| 10633 ** Schema of the tokenizer table. |
| 10634 */ |
| 10635 #define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" |
| 10636 |
| 10637 /* |
| 10638 ** This function does all the work for both the xConnect and xCreate methods. |
| 10639 ** These tables have no persistent representation of their own, so xConnect |
| 10640 ** and xCreate are identical operations. |
| 10641 ** |
| 10642 ** argv[0]: module name |
| 10643 ** argv[1]: database name |
| 10644 ** argv[2]: table name |
| 10645 ** argv[3]: first argument (tokenizer name) |
| 10646 */ |
| 10647 static int fts3tokConnectMethod( |
| 10648 sqlite3 *db, /* Database connection */ |
| 10649 void *pHash, /* Hash table of tokenizers */ |
| 10650 int argc, /* Number of elements in argv array */ |
| 10651 const char * const *argv, /* xCreate/xConnect argument array */ |
| 10652 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 10653 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 10654 ){ |
| 10655 Fts3tokTable *pTab = 0; |
| 10656 const sqlite3_tokenizer_module *pMod = 0; |
| 10657 sqlite3_tokenizer *pTok = 0; |
| 10658 int rc; |
| 10659 char **azDequote = 0; |
| 10660 int nDequote; |
| 10661 |
| 10662 rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); |
| 10663 if( rc!=SQLITE_OK ) return rc; |
| 10664 |
| 10665 nDequote = argc-3; |
| 10666 rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); |
| 10667 |
| 10668 if( rc==SQLITE_OK ){ |
| 10669 const char *zModule; |
| 10670 if( nDequote<1 ){ |
| 10671 zModule = "simple"; |
| 10672 }else{ |
| 10673 zModule = azDequote[0]; |
| 10674 } |
| 10675 rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); |
| 10676 } |
| 10677 |
| 10678 assert( (rc==SQLITE_OK)==(pMod!=0) ); |
| 10679 if( rc==SQLITE_OK ){ |
| 10680 const char * const *azArg = (const char * const *)&azDequote[1]; |
| 10681 rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); |
| 10682 } |
| 10683 |
| 10684 if( rc==SQLITE_OK ){ |
| 10685 pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); |
| 10686 if( pTab==0 ){ |
| 10687 rc = SQLITE_NOMEM; |
| 10688 } |
| 10689 } |
| 10690 |
| 10691 if( rc==SQLITE_OK ){ |
| 10692 memset(pTab, 0, sizeof(Fts3tokTable)); |
| 10693 pTab->pMod = pMod; |
| 10694 pTab->pTok = pTok; |
| 10695 *ppVtab = &pTab->base; |
| 10696 }else{ |
| 10697 if( pTok ){ |
| 10698 pMod->xDestroy(pTok); |
| 10699 } |
| 10700 } |
| 10701 |
| 10702 sqlite3_free(azDequote); |
| 10703 return rc; |
| 10704 } |
| 10705 |
| 10706 /* |
| 10707 ** This function does the work for both the xDisconnect and xDestroy methods. |
| 10708 ** These tables have no persistent representation of their own, so xDisconnect |
| 10709 ** and xDestroy are identical operations. |
| 10710 */ |
| 10711 static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ |
| 10712 Fts3tokTable *pTab = (Fts3tokTable *)pVtab; |
| 10713 |
| 10714 pTab->pMod->xDestroy(pTab->pTok); |
| 10715 sqlite3_free(pTab); |
| 10716 return SQLITE_OK; |
| 10717 } |
| 10718 |
| 10719 /* |
| 10720 ** xBestIndex - Analyze a WHERE and ORDER BY clause. |
| 10721 */ |
| 10722 static int fts3tokBestIndexMethod( |
| 10723 sqlite3_vtab *pVTab, |
| 10724 sqlite3_index_info *pInfo |
| 10725 ){ |
| 10726 int i; |
| 10727 UNUSED_PARAMETER(pVTab); |
| 10728 |
| 10729 for(i=0; i<pInfo->nConstraint; i++){ |
| 10730 if( pInfo->aConstraint[i].usable |
| 10731 && pInfo->aConstraint[i].iColumn==0 |
| 10732 && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ |
| 10733 ){ |
| 10734 pInfo->idxNum = 1; |
| 10735 pInfo->aConstraintUsage[i].argvIndex = 1; |
| 10736 pInfo->aConstraintUsage[i].omit = 1; |
| 10737 pInfo->estimatedCost = 1; |
| 10738 return SQLITE_OK; |
| 10739 } |
| 10740 } |
| 10741 |
| 10742 pInfo->idxNum = 0; |
| 10743 assert( pInfo->estimatedCost>1000000.0 ); |
| 10744 |
| 10745 return SQLITE_OK; |
| 10746 } |
| 10747 |
| 10748 /* |
| 10749 ** xOpen - Open a cursor. |
| 10750 */ |
| 10751 static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 10752 Fts3tokCursor *pCsr; |
| 10753 UNUSED_PARAMETER(pVTab); |
| 10754 |
| 10755 pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); |
| 10756 if( pCsr==0 ){ |
| 10757 return SQLITE_NOMEM; |
| 10758 } |
| 10759 memset(pCsr, 0, sizeof(Fts3tokCursor)); |
| 10760 |
| 10761 *ppCsr = (sqlite3_vtab_cursor *)pCsr; |
| 10762 return SQLITE_OK; |
| 10763 } |
| 10764 |
| 10765 /* |
| 10766 ** Reset the tokenizer cursor passed as the only argument. As if it had |
| 10767 ** just been returned by fts3tokOpenMethod(). |
| 10768 */ |
| 10769 static void fts3tokResetCursor(Fts3tokCursor *pCsr){ |
| 10770 if( pCsr->pCsr ){ |
| 10771 Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); |
| 10772 pTab->pMod->xClose(pCsr->pCsr); |
| 10773 pCsr->pCsr = 0; |
| 10774 } |
| 10775 sqlite3_free(pCsr->zInput); |
| 10776 pCsr->zInput = 0; |
| 10777 pCsr->zToken = 0; |
| 10778 pCsr->nToken = 0; |
| 10779 pCsr->iStart = 0; |
| 10780 pCsr->iEnd = 0; |
| 10781 pCsr->iPos = 0; |
| 10782 pCsr->iRowid = 0; |
| 10783 } |
| 10784 |
| 10785 /* |
| 10786 ** xClose - Close a cursor. |
| 10787 */ |
| 10788 static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 10789 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10790 |
| 10791 fts3tokResetCursor(pCsr); |
| 10792 sqlite3_free(pCsr); |
| 10793 return SQLITE_OK; |
| 10794 } |
| 10795 |
| 10796 /* |
| 10797 ** xNext - Advance the cursor to the next row, if any. |
| 10798 */ |
| 10799 static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ |
| 10800 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10801 Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); |
| 10802 int rc; /* Return code */ |
| 10803 |
| 10804 pCsr->iRowid++; |
| 10805 rc = pTab->pMod->xNext(pCsr->pCsr, |
| 10806 &pCsr->zToken, &pCsr->nToken, |
| 10807 &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos |
| 10808 ); |
| 10809 |
| 10810 if( rc!=SQLITE_OK ){ |
| 10811 fts3tokResetCursor(pCsr); |
| 10812 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 10813 } |
| 10814 |
| 10815 return rc; |
| 10816 } |
| 10817 |
| 10818 /* |
| 10819 ** xFilter - Initialize a cursor to point at the start of its data. |
| 10820 */ |
| 10821 static int fts3tokFilterMethod( |
| 10822 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 10823 int idxNum, /* Strategy index */ |
| 10824 const char *idxStr, /* Unused */ |
| 10825 int nVal, /* Number of elements in apVal */ |
| 10826 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 10827 ){ |
| 10828 int rc = SQLITE_ERROR; |
| 10829 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10830 Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); |
| 10831 UNUSED_PARAMETER(idxStr); |
| 10832 UNUSED_PARAMETER(nVal); |
| 10833 |
| 10834 fts3tokResetCursor(pCsr); |
| 10835 if( idxNum==1 ){ |
| 10836 const char *zByte = (const char *)sqlite3_value_text(apVal[0]); |
| 10837 int nByte = sqlite3_value_bytes(apVal[0]); |
| 10838 pCsr->zInput = sqlite3_malloc(nByte+1); |
| 10839 if( pCsr->zInput==0 ){ |
| 10840 rc = SQLITE_NOMEM; |
| 10841 }else{ |
| 10842 memcpy(pCsr->zInput, zByte, nByte); |
| 10843 pCsr->zInput[nByte] = 0; |
| 10844 rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); |
| 10845 if( rc==SQLITE_OK ){ |
| 10846 pCsr->pCsr->pTokenizer = pTab->pTok; |
| 10847 } |
| 10848 } |
| 10849 } |
| 10850 |
| 10851 if( rc!=SQLITE_OK ) return rc; |
| 10852 return fts3tokNextMethod(pCursor); |
| 10853 } |
| 10854 |
| 10855 /* |
| 10856 ** xEof - Return true if the cursor is at EOF, or false otherwise. |
| 10857 */ |
| 10858 static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ |
| 10859 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10860 return (pCsr->zToken==0); |
| 10861 } |
| 10862 |
| 10863 /* |
| 10864 ** xColumn - Return a column value. |
| 10865 */ |
| 10866 static int fts3tokColumnMethod( |
| 10867 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 10868 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 10869 int iCol /* Index of column to read value from */ |
| 10870 ){ |
| 10871 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10872 |
| 10873 /* CREATE TABLE x(input, token, start, end, position) */ |
| 10874 switch( iCol ){ |
| 10875 case 0: |
| 10876 sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); |
| 10877 break; |
| 10878 case 1: |
| 10879 sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); |
| 10880 break; |
| 10881 case 2: |
| 10882 sqlite3_result_int(pCtx, pCsr->iStart); |
| 10883 break; |
| 10884 case 3: |
| 10885 sqlite3_result_int(pCtx, pCsr->iEnd); |
| 10886 break; |
| 10887 default: |
| 10888 assert( iCol==4 ); |
| 10889 sqlite3_result_int(pCtx, pCsr->iPos); |
| 10890 break; |
| 10891 } |
| 10892 return SQLITE_OK; |
| 10893 } |
| 10894 |
| 10895 /* |
| 10896 ** xRowid - Return the current rowid for the cursor. |
| 10897 */ |
| 10898 static int fts3tokRowidMethod( |
| 10899 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 10900 sqlite_int64 *pRowid /* OUT: Rowid value */ |
| 10901 ){ |
| 10902 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10903 *pRowid = (sqlite3_int64)pCsr->iRowid; |
| 10904 return SQLITE_OK; |
| 10905 } |
| 10906 |
| 10907 /* |
| 10908 ** Register the fts3tok module with database connection db. Return SQLITE_OK |
| 10909 ** if successful or an error code if sqlite3_create_module() fails. |
| 10910 */ |
| 10911 SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ |
| 10912 static const sqlite3_module fts3tok_module = { |
| 10913 0, /* iVersion */ |
| 10914 fts3tokConnectMethod, /* xCreate */ |
| 10915 fts3tokConnectMethod, /* xConnect */ |
| 10916 fts3tokBestIndexMethod, /* xBestIndex */ |
| 10917 fts3tokDisconnectMethod, /* xDisconnect */ |
| 10918 fts3tokDisconnectMethod, /* xDestroy */ |
| 10919 fts3tokOpenMethod, /* xOpen */ |
| 10920 fts3tokCloseMethod, /* xClose */ |
| 10921 fts3tokFilterMethod, /* xFilter */ |
| 10922 fts3tokNextMethod, /* xNext */ |
| 10923 fts3tokEofMethod, /* xEof */ |
| 10924 fts3tokColumnMethod, /* xColumn */ |
| 10925 fts3tokRowidMethod, /* xRowid */ |
| 10926 0, /* xUpdate */ |
| 10927 0, /* xBegin */ |
| 10928 0, /* xSync */ |
| 10929 0, /* xCommit */ |
| 10930 0, /* xRollback */ |
| 10931 0, /* xFindFunction */ |
| 10932 0, /* xRename */ |
| 10933 0, /* xSavepoint */ |
| 10934 0, /* xRelease */ |
| 10935 0 /* xRollbackTo */ |
| 10936 }; |
| 10937 int rc; /* Return code */ |
| 10938 |
| 10939 rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); |
| 10940 return rc; |
| 10941 } |
| 10942 |
| 10943 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10944 |
| 10945 /************** End of fts3_tokenize_vtab.c **********************************/ |
| 10946 /************** Begin file fts3_write.c **************************************/ |
| 10947 /* |
| 10948 ** 2009 Oct 23 |
| 10949 ** |
| 10950 ** The author disclaims copyright to this source code. In place of |
| 10951 ** a legal notice, here is a blessing: |
| 10952 ** |
| 10953 ** May you do good and not evil. |
| 10954 ** May you find forgiveness for yourself and forgive others. |
| 10955 ** May you share freely, never taking more than you give. |
| 10956 ** |
| 10957 ****************************************************************************** |
| 10958 ** |
| 10959 ** This file is part of the SQLite FTS3 extension module. Specifically, |
| 10960 ** this file contains code to insert, update and delete rows from FTS3 |
| 10961 ** tables. It also contains code to merge FTS3 b-tree segments. Some |
| 10962 ** of the sub-routines used to merge segments are also used by the query |
| 10963 ** code in fts3.c. |
| 10964 */ |
| 10965 |
| 10966 /* #include "fts3Int.h" */ |
| 10967 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 10968 |
| 10969 /* #include <string.h> */ |
| 10970 /* #include <assert.h> */ |
| 10971 /* #include <stdlib.h> */ |
| 10972 |
| 10973 |
| 10974 #define FTS_MAX_APPENDABLE_HEIGHT 16 |
| 10975 |
| 10976 /* |
| 10977 ** When full-text index nodes are loaded from disk, the buffer that they |
| 10978 ** are loaded into has the following number of bytes of padding at the end |
| 10979 ** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer |
| 10980 ** of 920 bytes is allocated for it. |
| 10981 ** |
| 10982 ** This means that if we have a pointer into a buffer containing node data, |
| 10983 ** it is always safe to read up to two varints from it without risking an |
| 10984 ** overread, even if the node data is corrupted. |
| 10985 */ |
| 10986 #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) |
| 10987 |
| 10988 /* |
| 10989 ** Under certain circumstances, b-tree nodes (doclists) can be loaded into |
| 10990 ** memory incrementally instead of all at once. This can be a big performance |
| 10991 ** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() |
| 10992 ** method before retrieving all query results (as may happen, for example, |
| 10993 ** if a query has a LIMIT clause). |
| 10994 ** |
| 10995 ** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD |
| 10996 ** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. |
| 10997 ** The code is written so that the hard lower-limit for each of these values |
| 10998 ** is 1. Clearly such small values would be inefficient, but can be useful |
| 10999 ** for testing purposes. |
| 11000 ** |
| 11001 ** If this module is built with SQLITE_TEST defined, these constants may |
| 11002 ** be overridden at runtime for testing purposes. File fts3_test.c contains |
| 11003 ** a Tcl interface to read and write the values. |
| 11004 */ |
| 11005 #ifdef SQLITE_TEST |
| 11006 int test_fts3_node_chunksize = (4*1024); |
| 11007 int test_fts3_node_chunk_threshold = (4*1024)*4; |
| 11008 # define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize |
| 11009 # define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold |
| 11010 #else |
| 11011 # define FTS3_NODE_CHUNKSIZE (4*1024) |
| 11012 # define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) |
| 11013 #endif |
| 11014 |
| 11015 /* |
| 11016 ** The two values that may be meaningfully bound to the :1 parameter in |
| 11017 ** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. |
| 11018 */ |
| 11019 #define FTS_STAT_DOCTOTAL 0 |
| 11020 #define FTS_STAT_INCRMERGEHINT 1 |
| 11021 #define FTS_STAT_AUTOINCRMERGE 2 |
| 11022 |
| 11023 /* |
| 11024 ** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic |
| 11025 ** and incremental merge operation that takes place. This is used for |
| 11026 ** debugging FTS only, it should not usually be turned on in production |
| 11027 ** systems. |
| 11028 */ |
| 11029 #ifdef FTS3_LOG_MERGES |
| 11030 static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ |
| 11031 sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); |
| 11032 } |
| 11033 #else |
| 11034 #define fts3LogMerge(x, y) |
| 11035 #endif |
| 11036 |
| 11037 |
| 11038 typedef struct PendingList PendingList; |
| 11039 typedef struct SegmentNode SegmentNode; |
| 11040 typedef struct SegmentWriter SegmentWriter; |
| 11041 |
| 11042 /* |
| 11043 ** An instance of the following data structure is used to build doclists |
| 11044 ** incrementally. See function fts3PendingListAppend() for details. |
| 11045 */ |
| 11046 struct PendingList { |
| 11047 int nData; |
| 11048 char *aData; |
| 11049 int nSpace; |
| 11050 sqlite3_int64 iLastDocid; |
| 11051 sqlite3_int64 iLastCol; |
| 11052 sqlite3_int64 iLastPos; |
| 11053 }; |
| 11054 |
| 11055 |
| 11056 /* |
| 11057 ** Each cursor has a (possibly empty) linked list of the following objects. |
| 11058 */ |
| 11059 struct Fts3DeferredToken { |
| 11060 Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ |
| 11061 int iCol; /* Column token must occur in */ |
| 11062 Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ |
| 11063 PendingList *pList; /* Doclist is assembled here */ |
| 11064 }; |
| 11065 |
| 11066 /* |
| 11067 ** An instance of this structure is used to iterate through the terms on |
| 11068 ** a contiguous set of segment b-tree leaf nodes. Although the details of |
| 11069 ** this structure are only manipulated by code in this file, opaque handles |
| 11070 ** of type Fts3SegReader* are also used by code in fts3.c to iterate through |
| 11071 ** terms when querying the full-text index. See functions: |
| 11072 ** |
| 11073 ** sqlite3Fts3SegReaderNew() |
| 11074 ** sqlite3Fts3SegReaderFree() |
| 11075 ** sqlite3Fts3SegReaderIterate() |
| 11076 ** |
| 11077 ** Methods used to manipulate Fts3SegReader structures: |
| 11078 ** |
| 11079 ** fts3SegReaderNext() |
| 11080 ** fts3SegReaderFirstDocid() |
| 11081 ** fts3SegReaderNextDocid() |
| 11082 */ |
| 11083 struct Fts3SegReader { |
| 11084 int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ |
| 11085 u8 bLookup; /* True for a lookup only */ |
| 11086 u8 rootOnly; /* True for a root-only reader */ |
| 11087 |
| 11088 sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ |
| 11089 sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ |
| 11090 sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ |
| 11091 sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ |
| 11092 |
| 11093 char *aNode; /* Pointer to node data (or NULL) */ |
| 11094 int nNode; /* Size of buffer at aNode (or 0) */ |
| 11095 int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ |
| 11096 sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ |
| 11097 |
| 11098 Fts3HashElem **ppNextElem; |
| 11099 |
| 11100 /* Variables set by fts3SegReaderNext(). These may be read directly |
| 11101 ** by the caller. They are valid from the time SegmentReaderNew() returns |
| 11102 ** until SegmentReaderNext() returns something other than SQLITE_OK |
| 11103 ** (i.e. SQLITE_DONE). |
| 11104 */ |
| 11105 int nTerm; /* Number of bytes in current term */ |
| 11106 char *zTerm; /* Pointer to current term */ |
| 11107 int nTermAlloc; /* Allocated size of zTerm buffer */ |
| 11108 char *aDoclist; /* Pointer to doclist of current entry */ |
| 11109 int nDoclist; /* Size of doclist in current entry */ |
| 11110 |
| 11111 /* The following variables are used by fts3SegReaderNextDocid() to iterate |
| 11112 ** through the current doclist (aDoclist/nDoclist). |
| 11113 */ |
| 11114 char *pOffsetList; |
| 11115 int nOffsetList; /* For descending pending seg-readers only */ |
| 11116 sqlite3_int64 iDocid; |
| 11117 }; |
| 11118 |
| 11119 #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) |
| 11120 #define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) |
| 11121 |
| 11122 /* |
| 11123 ** An instance of this structure is used to create a segment b-tree in the |
| 11124 ** database. The internal details of this type are only accessed by the |
| 11125 ** following functions: |
| 11126 ** |
| 11127 ** fts3SegWriterAdd() |
| 11128 ** fts3SegWriterFlush() |
| 11129 ** fts3SegWriterFree() |
| 11130 */ |
| 11131 struct SegmentWriter { |
| 11132 SegmentNode *pTree; /* Pointer to interior tree structure */ |
| 11133 sqlite3_int64 iFirst; /* First slot in %_segments written */ |
| 11134 sqlite3_int64 iFree; /* Next free slot in %_segments */ |
| 11135 char *zTerm; /* Pointer to previous term buffer */ |
| 11136 int nTerm; /* Number of bytes in zTerm */ |
| 11137 int nMalloc; /* Size of malloc'd buffer at zMalloc */ |
| 11138 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ |
| 11139 int nSize; /* Size of allocation at aData */ |
| 11140 int nData; /* Bytes of data in aData */ |
| 11141 char *aData; /* Pointer to block from malloc() */ |
| 11142 i64 nLeafData; /* Number of bytes of leaf data written */ |
| 11143 }; |
| 11144 |
| 11145 /* |
| 11146 ** Type SegmentNode is used by the following three functions to create |
| 11147 ** the interior part of the segment b+-tree structures (everything except |
| 11148 ** the leaf nodes). These functions and type are only ever used by code |
| 11149 ** within the fts3SegWriterXXX() family of functions described above. |
| 11150 ** |
| 11151 ** fts3NodeAddTerm() |
| 11152 ** fts3NodeWrite() |
| 11153 ** fts3NodeFree() |
| 11154 ** |
| 11155 ** When a b+tree is written to the database (either as a result of a merge |
| 11156 ** or the pending-terms table being flushed), leaves are written into the |
| 11157 ** database file as soon as they are completely populated. The interior of |
| 11158 ** the tree is assembled in memory and written out only once all leaves have |
| 11159 ** been populated and stored. This is Ok, as the b+-tree fanout is usually |
| 11160 ** very large, meaning that the interior of the tree consumes relatively |
| 11161 ** little memory. |
| 11162 */ |
| 11163 struct SegmentNode { |
| 11164 SegmentNode *pParent; /* Parent node (or NULL for root node) */ |
| 11165 SegmentNode *pRight; /* Pointer to right-sibling */ |
| 11166 SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ |
| 11167 int nEntry; /* Number of terms written to node so far */ |
| 11168 char *zTerm; /* Pointer to previous term buffer */ |
| 11169 int nTerm; /* Number of bytes in zTerm */ |
| 11170 int nMalloc; /* Size of malloc'd buffer at zMalloc */ |
| 11171 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ |
| 11172 int nData; /* Bytes of valid data so far */ |
| 11173 char *aData; /* Node data */ |
| 11174 }; |
| 11175 |
| 11176 /* |
| 11177 ** Valid values for the second argument to fts3SqlStmt(). |
| 11178 */ |
| 11179 #define SQL_DELETE_CONTENT 0 |
| 11180 #define SQL_IS_EMPTY 1 |
| 11181 #define SQL_DELETE_ALL_CONTENT 2 |
| 11182 #define SQL_DELETE_ALL_SEGMENTS 3 |
| 11183 #define SQL_DELETE_ALL_SEGDIR 4 |
| 11184 #define SQL_DELETE_ALL_DOCSIZE 5 |
| 11185 #define SQL_DELETE_ALL_STAT 6 |
| 11186 #define SQL_SELECT_CONTENT_BY_ROWID 7 |
| 11187 #define SQL_NEXT_SEGMENT_INDEX 8 |
| 11188 #define SQL_INSERT_SEGMENTS 9 |
| 11189 #define SQL_NEXT_SEGMENTS_ID 10 |
| 11190 #define SQL_INSERT_SEGDIR 11 |
| 11191 #define SQL_SELECT_LEVEL 12 |
| 11192 #define SQL_SELECT_LEVEL_RANGE 13 |
| 11193 #define SQL_SELECT_LEVEL_COUNT 14 |
| 11194 #define SQL_SELECT_SEGDIR_MAX_LEVEL 15 |
| 11195 #define SQL_DELETE_SEGDIR_LEVEL 16 |
| 11196 #define SQL_DELETE_SEGMENTS_RANGE 17 |
| 11197 #define SQL_CONTENT_INSERT 18 |
| 11198 #define SQL_DELETE_DOCSIZE 19 |
| 11199 #define SQL_REPLACE_DOCSIZE 20 |
| 11200 #define SQL_SELECT_DOCSIZE 21 |
| 11201 #define SQL_SELECT_STAT 22 |
| 11202 #define SQL_REPLACE_STAT 23 |
| 11203 |
| 11204 #define SQL_SELECT_ALL_PREFIX_LEVEL 24 |
| 11205 #define SQL_DELETE_ALL_TERMS_SEGDIR 25 |
| 11206 #define SQL_DELETE_SEGDIR_RANGE 26 |
| 11207 #define SQL_SELECT_ALL_LANGID 27 |
| 11208 #define SQL_FIND_MERGE_LEVEL 28 |
| 11209 #define SQL_MAX_LEAF_NODE_ESTIMATE 29 |
| 11210 #define SQL_DELETE_SEGDIR_ENTRY 30 |
| 11211 #define SQL_SHIFT_SEGDIR_ENTRY 31 |
| 11212 #define SQL_SELECT_SEGDIR 32 |
| 11213 #define SQL_CHOMP_SEGDIR 33 |
| 11214 #define SQL_SEGMENT_IS_APPENDABLE 34 |
| 11215 #define SQL_SELECT_INDEXES 35 |
| 11216 #define SQL_SELECT_MXLEVEL 36 |
| 11217 |
| 11218 #define SQL_SELECT_LEVEL_RANGE2 37 |
| 11219 #define SQL_UPDATE_LEVEL_IDX 38 |
| 11220 #define SQL_UPDATE_LEVEL 39 |
| 11221 |
| 11222 /* |
| 11223 ** This function is used to obtain an SQLite prepared statement handle |
| 11224 ** for the statement identified by the second argument. If successful, |
| 11225 ** *pp is set to the requested statement handle and SQLITE_OK returned. |
| 11226 ** Otherwise, an SQLite error code is returned and *pp is set to 0. |
| 11227 ** |
| 11228 ** If argument apVal is not NULL, then it must point to an array with |
| 11229 ** at least as many entries as the requested statement has bound |
| 11230 ** parameters. The values are bound to the statements parameters before |
| 11231 ** returning. |
| 11232 */ |
| 11233 static int fts3SqlStmt( |
| 11234 Fts3Table *p, /* Virtual table handle */ |
| 11235 int eStmt, /* One of the SQL_XXX constants above */ |
| 11236 sqlite3_stmt **pp, /* OUT: Statement handle */ |
| 11237 sqlite3_value **apVal /* Values to bind to statement */ |
| 11238 ){ |
| 11239 const char *azSql[] = { |
| 11240 /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", |
| 11241 /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", |
| 11242 /* 2 */ "DELETE FROM %Q.'%q_content'", |
| 11243 /* 3 */ "DELETE FROM %Q.'%q_segments'", |
| 11244 /* 4 */ "DELETE FROM %Q.'%q_segdir'", |
| 11245 /* 5 */ "DELETE FROM %Q.'%q_docsize'", |
| 11246 /* 6 */ "DELETE FROM %Q.'%q_stat'", |
| 11247 /* 7 */ "SELECT %s WHERE rowid=?", |
| 11248 /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", |
| 11249 /* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", |
| 11250 /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", |
| 11251 /* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", |
| 11252 |
| 11253 /* Return segments in order from oldest to newest.*/ |
| 11254 /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11255 "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", |
| 11256 /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11257 "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" |
| 11258 "ORDER BY level DESC, idx ASC", |
| 11259 |
| 11260 /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", |
| 11261 /* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", |
| 11262 |
| 11263 /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", |
| 11264 /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", |
| 11265 /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", |
| 11266 /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", |
| 11267 /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", |
| 11268 /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", |
| 11269 /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", |
| 11270 /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", |
| 11271 /* 24 */ "", |
| 11272 /* 25 */ "", |
| 11273 |
| 11274 /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", |
| 11275 /* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", |
| 11276 |
| 11277 /* This statement is used to determine which level to read the input from |
| 11278 ** when performing an incremental merge. It returns the absolute level number |
| 11279 ** of the oldest level in the db that contains at least ? segments. Or, |
| 11280 ** if no level in the FTS index contains more than ? segments, the statement |
| 11281 ** returns zero rows. */ |
| 11282 /* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?" |
| 11283 " ORDER BY (level %% 1024) ASC LIMIT 1", |
| 11284 |
| 11285 /* Estimate the upper limit on the number of leaf nodes in a new segment |
| 11286 ** created by merging the oldest :2 segments from absolute level :1. See |
| 11287 ** function sqlite3Fts3Incrmerge() for details. */ |
| 11288 /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " |
| 11289 " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", |
| 11290 |
| 11291 /* SQL_DELETE_SEGDIR_ENTRY |
| 11292 ** Delete the %_segdir entry on absolute level :1 with index :2. */ |
| 11293 /* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", |
| 11294 |
| 11295 /* SQL_SHIFT_SEGDIR_ENTRY |
| 11296 ** Modify the idx value for the segment with idx=:3 on absolute level :2 |
| 11297 ** to :1. */ |
| 11298 /* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", |
| 11299 |
| 11300 /* SQL_SELECT_SEGDIR |
| 11301 ** Read a single entry from the %_segdir table. The entry from absolute |
| 11302 ** level :1 with index value :2. */ |
| 11303 /* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11304 "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", |
| 11305 |
| 11306 /* SQL_CHOMP_SEGDIR |
| 11307 ** Update the start_block (:1) and root (:2) fields of the %_segdir |
| 11308 ** entry located on absolute level :3 with index :4. */ |
| 11309 /* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" |
| 11310 "WHERE level = ? AND idx = ?", |
| 11311 |
| 11312 /* SQL_SEGMENT_IS_APPENDABLE |
| 11313 ** Return a single row if the segment with end_block=? is appendable. Or |
| 11314 ** no rows otherwise. */ |
| 11315 /* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", |
| 11316 |
| 11317 /* SQL_SELECT_INDEXES |
| 11318 ** Return the list of valid segment indexes for absolute level ? */ |
| 11319 /* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", |
| 11320 |
| 11321 /* SQL_SELECT_MXLEVEL |
| 11322 ** Return the largest relative level in the FTS index or indexes. */ |
| 11323 /* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", |
| 11324 |
| 11325 /* Return segments in order from oldest to newest.*/ |
| 11326 /* 37 */ "SELECT level, idx, end_block " |
| 11327 "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " |
| 11328 "ORDER BY level DESC, idx ASC", |
| 11329 |
| 11330 /* Update statements used while promoting segments */ |
| 11331 /* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " |
| 11332 "WHERE level=? AND idx=?", |
| 11333 /* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" |
| 11334 |
| 11335 }; |
| 11336 int rc = SQLITE_OK; |
| 11337 sqlite3_stmt *pStmt; |
| 11338 |
| 11339 assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); |
| 11340 assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); |
| 11341 |
| 11342 pStmt = p->aStmt[eStmt]; |
| 11343 if( !pStmt ){ |
| 11344 char *zSql; |
| 11345 if( eStmt==SQL_CONTENT_INSERT ){ |
| 11346 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); |
| 11347 }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ |
| 11348 zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); |
| 11349 }else{ |
| 11350 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); |
| 11351 } |
| 11352 if( !zSql ){ |
| 11353 rc = SQLITE_NOMEM; |
| 11354 }else{ |
| 11355 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); |
| 11356 sqlite3_free(zSql); |
| 11357 assert( rc==SQLITE_OK || pStmt==0 ); |
| 11358 p->aStmt[eStmt] = pStmt; |
| 11359 } |
| 11360 } |
| 11361 if( apVal ){ |
| 11362 int i; |
| 11363 int nParam = sqlite3_bind_parameter_count(pStmt); |
| 11364 for(i=0; rc==SQLITE_OK && i<nParam; i++){ |
| 11365 rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); |
| 11366 } |
| 11367 } |
| 11368 *pp = pStmt; |
| 11369 return rc; |
| 11370 } |
| 11371 |
| 11372 |
| 11373 static int fts3SelectDocsize( |
| 11374 Fts3Table *pTab, /* FTS3 table handle */ |
| 11375 sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ |
| 11376 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11377 ){ |
| 11378 sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ |
| 11379 int rc; /* Return code */ |
| 11380 |
| 11381 rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); |
| 11382 if( rc==SQLITE_OK ){ |
| 11383 sqlite3_bind_int64(pStmt, 1, iDocid); |
| 11384 rc = sqlite3_step(pStmt); |
| 11385 if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ |
| 11386 rc = sqlite3_reset(pStmt); |
| 11387 if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; |
| 11388 pStmt = 0; |
| 11389 }else{ |
| 11390 rc = SQLITE_OK; |
| 11391 } |
| 11392 } |
| 11393 |
| 11394 *ppStmt = pStmt; |
| 11395 return rc; |
| 11396 } |
| 11397 |
| 11398 SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( |
| 11399 Fts3Table *pTab, /* Fts3 table handle */ |
| 11400 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11401 ){ |
| 11402 sqlite3_stmt *pStmt = 0; |
| 11403 int rc; |
| 11404 rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); |
| 11405 if( rc==SQLITE_OK ){ |
| 11406 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 11407 if( sqlite3_step(pStmt)!=SQLITE_ROW |
| 11408 || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB |
| 11409 ){ |
| 11410 rc = sqlite3_reset(pStmt); |
| 11411 if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; |
| 11412 pStmt = 0; |
| 11413 } |
| 11414 } |
| 11415 *ppStmt = pStmt; |
| 11416 return rc; |
| 11417 } |
| 11418 |
| 11419 SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( |
| 11420 Fts3Table *pTab, /* Fts3 table handle */ |
| 11421 sqlite3_int64 iDocid, /* Docid to read size data for */ |
| 11422 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11423 ){ |
| 11424 return fts3SelectDocsize(pTab, iDocid, ppStmt); |
| 11425 } |
| 11426 |
| 11427 /* |
| 11428 ** Similar to fts3SqlStmt(). Except, after binding the parameters in |
| 11429 ** array apVal[] to the SQL statement identified by eStmt, the statement |
| 11430 ** is executed. |
| 11431 ** |
| 11432 ** Returns SQLITE_OK if the statement is successfully executed, or an |
| 11433 ** SQLite error code otherwise. |
| 11434 */ |
| 11435 static void fts3SqlExec( |
| 11436 int *pRC, /* Result code */ |
| 11437 Fts3Table *p, /* The FTS3 table */ |
| 11438 int eStmt, /* Index of statement to evaluate */ |
| 11439 sqlite3_value **apVal /* Parameters to bind */ |
| 11440 ){ |
| 11441 sqlite3_stmt *pStmt; |
| 11442 int rc; |
| 11443 if( *pRC ) return; |
| 11444 rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); |
| 11445 if( rc==SQLITE_OK ){ |
| 11446 sqlite3_step(pStmt); |
| 11447 rc = sqlite3_reset(pStmt); |
| 11448 } |
| 11449 *pRC = rc; |
| 11450 } |
| 11451 |
| 11452 |
| 11453 /* |
| 11454 ** This function ensures that the caller has obtained an exclusive |
| 11455 ** shared-cache table-lock on the %_segdir table. This is required before |
| 11456 ** writing data to the fts3 table. If this lock is not acquired first, then |
| 11457 ** the caller may end up attempting to take this lock as part of committing |
| 11458 ** a transaction, causing SQLite to return SQLITE_LOCKED or |
| 11459 ** LOCKED_SHAREDCACHEto a COMMIT command. |
| 11460 ** |
| 11461 ** It is best to avoid this because if FTS3 returns any error when |
| 11462 ** committing a transaction, the whole transaction will be rolled back. |
| 11463 ** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. |
| 11464 ** It can still happen if the user locks the underlying tables directly |
| 11465 ** instead of accessing them via FTS. |
| 11466 */ |
| 11467 static int fts3Writelock(Fts3Table *p){ |
| 11468 int rc = SQLITE_OK; |
| 11469 |
| 11470 if( p->nPendingData==0 ){ |
| 11471 sqlite3_stmt *pStmt; |
| 11472 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); |
| 11473 if( rc==SQLITE_OK ){ |
| 11474 sqlite3_bind_null(pStmt, 1); |
| 11475 sqlite3_step(pStmt); |
| 11476 rc = sqlite3_reset(pStmt); |
| 11477 } |
| 11478 } |
| 11479 |
| 11480 return rc; |
| 11481 } |
| 11482 |
| 11483 /* |
| 11484 ** FTS maintains a separate indexes for each language-id (a 32-bit integer). |
| 11485 ** Within each language id, a separate index is maintained to store the |
| 11486 ** document terms, and each configured prefix size (configured the FTS |
| 11487 ** "prefix=" option). And each index consists of multiple levels ("relative |
| 11488 ** levels"). |
| 11489 ** |
| 11490 ** All three of these values (the language id, the specific index and the |
| 11491 ** level within the index) are encoded in 64-bit integer values stored |
| 11492 ** in the %_segdir table on disk. This function is used to convert three |
| 11493 ** separate component values into the single 64-bit integer value that |
| 11494 ** can be used to query the %_segdir table. |
| 11495 ** |
| 11496 ** Specifically, each language-id/index combination is allocated 1024 |
| 11497 ** 64-bit integer level values ("absolute levels"). The main terms index |
| 11498 ** for language-id 0 is allocate values 0-1023. The first prefix index |
| 11499 ** (if any) for language-id 0 is allocated values 1024-2047. And so on. |
| 11500 ** Language 1 indexes are allocated immediately following language 0. |
| 11501 ** |
| 11502 ** So, for a system with nPrefix prefix indexes configured, the block of |
| 11503 ** absolute levels that corresponds to language-id iLangid and index |
| 11504 ** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). |
| 11505 */ |
| 11506 static sqlite3_int64 getAbsoluteLevel( |
| 11507 Fts3Table *p, /* FTS3 table handle */ |
| 11508 int iLangid, /* Language id */ |
| 11509 int iIndex, /* Index in p->aIndex[] */ |
| 11510 int iLevel /* Level of segments */ |
| 11511 ){ |
| 11512 sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ |
| 11513 assert( iLangid>=0 ); |
| 11514 assert( p->nIndex>0 ); |
| 11515 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 11516 |
| 11517 iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; |
| 11518 return iBase + iLevel; |
| 11519 } |
| 11520 |
| 11521 /* |
| 11522 ** Set *ppStmt to a statement handle that may be used to iterate through |
| 11523 ** all rows in the %_segdir table, from oldest to newest. If successful, |
| 11524 ** return SQLITE_OK. If an error occurs while preparing the statement, |
| 11525 ** return an SQLite error code. |
| 11526 ** |
| 11527 ** There is only ever one instance of this SQL statement compiled for |
| 11528 ** each FTS3 table. |
| 11529 ** |
| 11530 ** The statement returns the following columns from the %_segdir table: |
| 11531 ** |
| 11532 ** 0: idx |
| 11533 ** 1: start_block |
| 11534 ** 2: leaves_end_block |
| 11535 ** 3: end_block |
| 11536 ** 4: root |
| 11537 */ |
| 11538 SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( |
| 11539 Fts3Table *p, /* FTS3 table */ |
| 11540 int iLangid, /* Language being queried */ |
| 11541 int iIndex, /* Index for p->aIndex[] */ |
| 11542 int iLevel, /* Level to select (relative level) */ |
| 11543 sqlite3_stmt **ppStmt /* OUT: Compiled statement */ |
| 11544 ){ |
| 11545 int rc; |
| 11546 sqlite3_stmt *pStmt = 0; |
| 11547 |
| 11548 assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); |
| 11549 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 11550 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 11551 |
| 11552 if( iLevel<0 ){ |
| 11553 /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ |
| 11554 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); |
| 11555 if( rc==SQLITE_OK ){ |
| 11556 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 11557 sqlite3_bind_int64(pStmt, 2, |
| 11558 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 11559 ); |
| 11560 } |
| 11561 }else{ |
| 11562 /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ |
| 11563 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); |
| 11564 if( rc==SQLITE_OK ){ |
| 11565 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); |
| 11566 } |
| 11567 } |
| 11568 *ppStmt = pStmt; |
| 11569 return rc; |
| 11570 } |
| 11571 |
| 11572 |
| 11573 /* |
| 11574 ** Append a single varint to a PendingList buffer. SQLITE_OK is returned |
| 11575 ** if successful, or an SQLite error code otherwise. |
| 11576 ** |
| 11577 ** This function also serves to allocate the PendingList structure itself. |
| 11578 ** For example, to create a new PendingList structure containing two |
| 11579 ** varints: |
| 11580 ** |
| 11581 ** PendingList *p = 0; |
| 11582 ** fts3PendingListAppendVarint(&p, 1); |
| 11583 ** fts3PendingListAppendVarint(&p, 2); |
| 11584 */ |
| 11585 static int fts3PendingListAppendVarint( |
| 11586 PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ |
| 11587 sqlite3_int64 i /* Value to append to data */ |
| 11588 ){ |
| 11589 PendingList *p = *pp; |
| 11590 |
| 11591 /* Allocate or grow the PendingList as required. */ |
| 11592 if( !p ){ |
| 11593 p = sqlite3_malloc(sizeof(*p) + 100); |
| 11594 if( !p ){ |
| 11595 return SQLITE_NOMEM; |
| 11596 } |
| 11597 p->nSpace = 100; |
| 11598 p->aData = (char *)&p[1]; |
| 11599 p->nData = 0; |
| 11600 } |
| 11601 else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ |
| 11602 int nNew = p->nSpace * 2; |
| 11603 p = sqlite3_realloc(p, sizeof(*p) + nNew); |
| 11604 if( !p ){ |
| 11605 sqlite3_free(*pp); |
| 11606 *pp = 0; |
| 11607 return SQLITE_NOMEM; |
| 11608 } |
| 11609 p->nSpace = nNew; |
| 11610 p->aData = (char *)&p[1]; |
| 11611 } |
| 11612 |
| 11613 /* Append the new serialized varint to the end of the list. */ |
| 11614 p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); |
| 11615 p->aData[p->nData] = '\0'; |
| 11616 *pp = p; |
| 11617 return SQLITE_OK; |
| 11618 } |
| 11619 |
| 11620 /* |
| 11621 ** Add a docid/column/position entry to a PendingList structure. Non-zero |
| 11622 ** is returned if the structure is sqlite3_realloced as part of adding |
| 11623 ** the entry. Otherwise, zero. |
| 11624 ** |
| 11625 ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. |
| 11626 ** Zero is always returned in this case. Otherwise, if no OOM error occurs, |
| 11627 ** it is set to SQLITE_OK. |
| 11628 */ |
| 11629 static int fts3PendingListAppend( |
| 11630 PendingList **pp, /* IN/OUT: PendingList structure */ |
| 11631 sqlite3_int64 iDocid, /* Docid for entry to add */ |
| 11632 sqlite3_int64 iCol, /* Column for entry to add */ |
| 11633 sqlite3_int64 iPos, /* Position of term for entry to add */ |
| 11634 int *pRc /* OUT: Return code */ |
| 11635 ){ |
| 11636 PendingList *p = *pp; |
| 11637 int rc = SQLITE_OK; |
| 11638 |
| 11639 assert( !p || p->iLastDocid<=iDocid ); |
| 11640 |
| 11641 if( !p || p->iLastDocid!=iDocid ){ |
| 11642 sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); |
| 11643 if( p ){ |
| 11644 assert( p->nData<p->nSpace ); |
| 11645 assert( p->aData[p->nData]==0 ); |
| 11646 p->nData++; |
| 11647 } |
| 11648 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ |
| 11649 goto pendinglistappend_out; |
| 11650 } |
| 11651 p->iLastCol = -1; |
| 11652 p->iLastPos = 0; |
| 11653 p->iLastDocid = iDocid; |
| 11654 } |
| 11655 if( iCol>0 && p->iLastCol!=iCol ){ |
| 11656 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) |
| 11657 || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) |
| 11658 ){ |
| 11659 goto pendinglistappend_out; |
| 11660 } |
| 11661 p->iLastCol = iCol; |
| 11662 p->iLastPos = 0; |
| 11663 } |
| 11664 if( iCol>=0 ){ |
| 11665 assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); |
| 11666 rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); |
| 11667 if( rc==SQLITE_OK ){ |
| 11668 p->iLastPos = iPos; |
| 11669 } |
| 11670 } |
| 11671 |
| 11672 pendinglistappend_out: |
| 11673 *pRc = rc; |
| 11674 if( p!=*pp ){ |
| 11675 *pp = p; |
| 11676 return 1; |
| 11677 } |
| 11678 return 0; |
| 11679 } |
| 11680 |
| 11681 /* |
| 11682 ** Free a PendingList object allocated by fts3PendingListAppend(). |
| 11683 */ |
| 11684 static void fts3PendingListDelete(PendingList *pList){ |
| 11685 sqlite3_free(pList); |
| 11686 } |
| 11687 |
| 11688 /* |
| 11689 ** Add an entry to one of the pending-terms hash tables. |
| 11690 */ |
| 11691 static int fts3PendingTermsAddOne( |
| 11692 Fts3Table *p, |
| 11693 int iCol, |
| 11694 int iPos, |
| 11695 Fts3Hash *pHash, /* Pending terms hash table to add entry to */ |
| 11696 const char *zToken, |
| 11697 int nToken |
| 11698 ){ |
| 11699 PendingList *pList; |
| 11700 int rc = SQLITE_OK; |
| 11701 |
| 11702 pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); |
| 11703 if( pList ){ |
| 11704 p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); |
| 11705 } |
| 11706 if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ |
| 11707 if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ |
| 11708 /* Malloc failed while inserting the new entry. This can only |
| 11709 ** happen if there was no previous entry for this token. |
| 11710 */ |
| 11711 assert( 0==fts3HashFind(pHash, zToken, nToken) ); |
| 11712 sqlite3_free(pList); |
| 11713 rc = SQLITE_NOMEM; |
| 11714 } |
| 11715 } |
| 11716 if( rc==SQLITE_OK ){ |
| 11717 p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); |
| 11718 } |
| 11719 return rc; |
| 11720 } |
| 11721 |
| 11722 /* |
| 11723 ** Tokenize the nul-terminated string zText and add all tokens to the |
| 11724 ** pending-terms hash-table. The docid used is that currently stored in |
| 11725 ** p->iPrevDocid, and the column is specified by argument iCol. |
| 11726 ** |
| 11727 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. |
| 11728 */ |
| 11729 static int fts3PendingTermsAdd( |
| 11730 Fts3Table *p, /* Table into which text will be inserted */ |
| 11731 int iLangid, /* Language id to use */ |
| 11732 const char *zText, /* Text of document to be inserted */ |
| 11733 int iCol, /* Column into which text is being inserted */ |
| 11734 u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ |
| 11735 ){ |
| 11736 int rc; |
| 11737 int iStart = 0; |
| 11738 int iEnd = 0; |
| 11739 int iPos = 0; |
| 11740 int nWord = 0; |
| 11741 |
| 11742 char const *zToken; |
| 11743 int nToken = 0; |
| 11744 |
| 11745 sqlite3_tokenizer *pTokenizer = p->pTokenizer; |
| 11746 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 11747 sqlite3_tokenizer_cursor *pCsr; |
| 11748 int (*xNext)(sqlite3_tokenizer_cursor *pCursor, |
| 11749 const char**,int*,int*,int*,int*); |
| 11750 |
| 11751 assert( pTokenizer && pModule ); |
| 11752 |
| 11753 /* If the user has inserted a NULL value, this function may be called with |
| 11754 ** zText==0. In this case, add zero token entries to the hash table and |
| 11755 ** return early. */ |
| 11756 if( zText==0 ){ |
| 11757 *pnWord = 0; |
| 11758 return SQLITE_OK; |
| 11759 } |
| 11760 |
| 11761 rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); |
| 11762 if( rc!=SQLITE_OK ){ |
| 11763 return rc; |
| 11764 } |
| 11765 |
| 11766 xNext = pModule->xNext; |
| 11767 while( SQLITE_OK==rc |
| 11768 && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) |
| 11769 ){ |
| 11770 int i; |
| 11771 if( iPos>=nWord ) nWord = iPos+1; |
| 11772 |
| 11773 /* Positions cannot be negative; we use -1 as a terminator internally. |
| 11774 ** Tokens must have a non-zero length. |
| 11775 */ |
| 11776 if( iPos<0 || !zToken || nToken<=0 ){ |
| 11777 rc = SQLITE_ERROR; |
| 11778 break; |
| 11779 } |
| 11780 |
| 11781 /* Add the term to the terms index */ |
| 11782 rc = fts3PendingTermsAddOne( |
| 11783 p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken |
| 11784 ); |
| 11785 |
| 11786 /* Add the term to each of the prefix indexes that it is not too |
| 11787 ** short for. */ |
| 11788 for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 11789 struct Fts3Index *pIndex = &p->aIndex[i]; |
| 11790 if( nToken<pIndex->nPrefix ) continue; |
| 11791 rc = fts3PendingTermsAddOne( |
| 11792 p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix |
| 11793 ); |
| 11794 } |
| 11795 } |
| 11796 |
| 11797 pModule->xClose(pCsr); |
| 11798 *pnWord += nWord; |
| 11799 return (rc==SQLITE_DONE ? SQLITE_OK : rc); |
| 11800 } |
| 11801 |
| 11802 /* |
| 11803 ** Calling this function indicates that subsequent calls to |
| 11804 ** fts3PendingTermsAdd() are to add term/position-list pairs for the |
| 11805 ** contents of the document with docid iDocid. |
| 11806 */ |
| 11807 static int fts3PendingTermsDocid( |
| 11808 Fts3Table *p, /* Full-text table handle */ |
| 11809 int bDelete, /* True if this op is a delete */ |
| 11810 int iLangid, /* Language id of row being written */ |
| 11811 sqlite_int64 iDocid /* Docid of row being written */ |
| 11812 ){ |
| 11813 assert( iLangid>=0 ); |
| 11814 assert( bDelete==1 || bDelete==0 ); |
| 11815 |
| 11816 /* TODO(shess) Explore whether partially flushing the buffer on |
| 11817 ** forced-flush would provide better performance. I suspect that if |
| 11818 ** we ordered the doclists by size and flushed the largest until the |
| 11819 ** buffer was half empty, that would let the less frequent terms |
| 11820 ** generate longer doclists. |
| 11821 */ |
| 11822 if( iDocid<p->iPrevDocid |
| 11823 || (iDocid==p->iPrevDocid && p->bPrevDelete==0) |
| 11824 || p->iPrevLangid!=iLangid |
| 11825 || p->nPendingData>p->nMaxPendingData |
| 11826 ){ |
| 11827 int rc = sqlite3Fts3PendingTermsFlush(p); |
| 11828 if( rc!=SQLITE_OK ) return rc; |
| 11829 } |
| 11830 p->iPrevDocid = iDocid; |
| 11831 p->iPrevLangid = iLangid; |
| 11832 p->bPrevDelete = bDelete; |
| 11833 return SQLITE_OK; |
| 11834 } |
| 11835 |
| 11836 /* |
| 11837 ** Discard the contents of the pending-terms hash tables. |
| 11838 */ |
| 11839 SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ |
| 11840 int i; |
| 11841 for(i=0; i<p->nIndex; i++){ |
| 11842 Fts3HashElem *pElem; |
| 11843 Fts3Hash *pHash = &p->aIndex[i].hPending; |
| 11844 for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ |
| 11845 PendingList *pList = (PendingList *)fts3HashData(pElem); |
| 11846 fts3PendingListDelete(pList); |
| 11847 } |
| 11848 fts3HashClear(pHash); |
| 11849 } |
| 11850 p->nPendingData = 0; |
| 11851 } |
| 11852 |
| 11853 /* |
| 11854 ** This function is called by the xUpdate() method as part of an INSERT |
| 11855 ** operation. It adds entries for each term in the new record to the |
| 11856 ** pendingTerms hash table. |
| 11857 ** |
| 11858 ** Argument apVal is the same as the similarly named argument passed to |
| 11859 ** fts3InsertData(). Parameter iDocid is the docid of the new row. |
| 11860 */ |
| 11861 static int fts3InsertTerms( |
| 11862 Fts3Table *p, |
| 11863 int iLangid, |
| 11864 sqlite3_value **apVal, |
| 11865 u32 *aSz |
| 11866 ){ |
| 11867 int i; /* Iterator variable */ |
| 11868 for(i=2; i<p->nColumn+2; i++){ |
| 11869 int iCol = i-2; |
| 11870 if( p->abNotindexed[iCol]==0 ){ |
| 11871 const char *zText = (const char *)sqlite3_value_text(apVal[i]); |
| 11872 int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); |
| 11873 if( rc!=SQLITE_OK ){ |
| 11874 return rc; |
| 11875 } |
| 11876 aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); |
| 11877 } |
| 11878 } |
| 11879 return SQLITE_OK; |
| 11880 } |
| 11881 |
| 11882 /* |
| 11883 ** This function is called by the xUpdate() method for an INSERT operation. |
| 11884 ** The apVal parameter is passed a copy of the apVal argument passed by |
| 11885 ** SQLite to the xUpdate() method. i.e: |
| 11886 ** |
| 11887 ** apVal[0] Not used for INSERT. |
| 11888 ** apVal[1] rowid |
| 11889 ** apVal[2] Left-most user-defined column |
| 11890 ** ... |
| 11891 ** apVal[p->nColumn+1] Right-most user-defined column |
| 11892 ** apVal[p->nColumn+2] Hidden column with same name as table |
| 11893 ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) |
| 11894 ** apVal[p->nColumn+4] Hidden languageid column |
| 11895 */ |
| 11896 static int fts3InsertData( |
| 11897 Fts3Table *p, /* Full-text table */ |
| 11898 sqlite3_value **apVal, /* Array of values to insert */ |
| 11899 sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ |
| 11900 ){ |
| 11901 int rc; /* Return code */ |
| 11902 sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ |
| 11903 |
| 11904 if( p->zContentTbl ){ |
| 11905 sqlite3_value *pRowid = apVal[p->nColumn+3]; |
| 11906 if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ |
| 11907 pRowid = apVal[1]; |
| 11908 } |
| 11909 if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ |
| 11910 return SQLITE_CONSTRAINT; |
| 11911 } |
| 11912 *piDocid = sqlite3_value_int64(pRowid); |
| 11913 return SQLITE_OK; |
| 11914 } |
| 11915 |
| 11916 /* Locate the statement handle used to insert data into the %_content |
| 11917 ** table. The SQL for this statement is: |
| 11918 ** |
| 11919 ** INSERT INTO %_content VALUES(?, ?, ?, ...) |
| 11920 ** |
| 11921 ** The statement features N '?' variables, where N is the number of user |
| 11922 ** defined columns in the FTS3 table, plus one for the docid field. |
| 11923 */ |
| 11924 rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); |
| 11925 if( rc==SQLITE_OK && p->zLanguageid ){ |
| 11926 rc = sqlite3_bind_int( |
| 11927 pContentInsert, p->nColumn+2, |
| 11928 sqlite3_value_int(apVal[p->nColumn+4]) |
| 11929 ); |
| 11930 } |
| 11931 if( rc!=SQLITE_OK ) return rc; |
| 11932 |
| 11933 /* There is a quirk here. The users INSERT statement may have specified |
| 11934 ** a value for the "rowid" field, for the "docid" field, or for both. |
| 11935 ** Which is a problem, since "rowid" and "docid" are aliases for the |
| 11936 ** same value. For example: |
| 11937 ** |
| 11938 ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); |
| 11939 ** |
| 11940 ** In FTS3, this is an error. It is an error to specify non-NULL values |
| 11941 ** for both docid and some other rowid alias. |
| 11942 */ |
| 11943 if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ |
| 11944 if( SQLITE_NULL==sqlite3_value_type(apVal[0]) |
| 11945 && SQLITE_NULL!=sqlite3_value_type(apVal[1]) |
| 11946 ){ |
| 11947 /* A rowid/docid conflict. */ |
| 11948 return SQLITE_ERROR; |
| 11949 } |
| 11950 rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); |
| 11951 if( rc!=SQLITE_OK ) return rc; |
| 11952 } |
| 11953 |
| 11954 /* Execute the statement to insert the record. Set *piDocid to the |
| 11955 ** new docid value. |
| 11956 */ |
| 11957 sqlite3_step(pContentInsert); |
| 11958 rc = sqlite3_reset(pContentInsert); |
| 11959 |
| 11960 *piDocid = sqlite3_last_insert_rowid(p->db); |
| 11961 return rc; |
| 11962 } |
| 11963 |
| 11964 |
| 11965 |
| 11966 /* |
| 11967 ** Remove all data from the FTS3 table. Clear the hash table containing |
| 11968 ** pending terms. |
| 11969 */ |
| 11970 static int fts3DeleteAll(Fts3Table *p, int bContent){ |
| 11971 int rc = SQLITE_OK; /* Return code */ |
| 11972 |
| 11973 /* Discard the contents of the pending-terms hash table. */ |
| 11974 sqlite3Fts3PendingTermsClear(p); |
| 11975 |
| 11976 /* Delete everything from the shadow tables. Except, leave %_content as |
| 11977 ** is if bContent is false. */ |
| 11978 assert( p->zContentTbl==0 || bContent==0 ); |
| 11979 if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); |
| 11980 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); |
| 11981 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); |
| 11982 if( p->bHasDocsize ){ |
| 11983 fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); |
| 11984 } |
| 11985 if( p->bHasStat ){ |
| 11986 fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); |
| 11987 } |
| 11988 return rc; |
| 11989 } |
| 11990 |
| 11991 /* |
| 11992 ** |
| 11993 */ |
| 11994 static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ |
| 11995 int iLangid = 0; |
| 11996 if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); |
| 11997 return iLangid; |
| 11998 } |
| 11999 |
| 12000 /* |
| 12001 ** The first element in the apVal[] array is assumed to contain the docid |
| 12002 ** (an integer) of a row about to be deleted. Remove all terms from the |
| 12003 ** full-text index. |
| 12004 */ |
| 12005 static void fts3DeleteTerms( |
| 12006 int *pRC, /* Result code */ |
| 12007 Fts3Table *p, /* The FTS table to delete from */ |
| 12008 sqlite3_value *pRowid, /* The docid to be deleted */ |
| 12009 u32 *aSz, /* Sizes of deleted document written here */ |
| 12010 int *pbFound /* OUT: Set to true if row really does exist */ |
| 12011 ){ |
| 12012 int rc; |
| 12013 sqlite3_stmt *pSelect; |
| 12014 |
| 12015 assert( *pbFound==0 ); |
| 12016 if( *pRC ) return; |
| 12017 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); |
| 12018 if( rc==SQLITE_OK ){ |
| 12019 if( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 12020 int i; |
| 12021 int iLangid = langidFromSelect(p, pSelect); |
| 12022 i64 iDocid = sqlite3_column_int64(pSelect, 0); |
| 12023 rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid); |
| 12024 for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ |
| 12025 int iCol = i-1; |
| 12026 if( p->abNotindexed[iCol]==0 ){ |
| 12027 const char *zText = (const char *)sqlite3_column_text(pSelect, i); |
| 12028 rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); |
| 12029 aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); |
| 12030 } |
| 12031 } |
| 12032 if( rc!=SQLITE_OK ){ |
| 12033 sqlite3_reset(pSelect); |
| 12034 *pRC = rc; |
| 12035 return; |
| 12036 } |
| 12037 *pbFound = 1; |
| 12038 } |
| 12039 rc = sqlite3_reset(pSelect); |
| 12040 }else{ |
| 12041 sqlite3_reset(pSelect); |
| 12042 } |
| 12043 *pRC = rc; |
| 12044 } |
| 12045 |
| 12046 /* |
| 12047 ** Forward declaration to account for the circular dependency between |
| 12048 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). |
| 12049 */ |
| 12050 static int fts3SegmentMerge(Fts3Table *, int, int, int); |
| 12051 |
| 12052 /* |
| 12053 ** This function allocates a new level iLevel index in the segdir table. |
| 12054 ** Usually, indexes are allocated within a level sequentially starting |
| 12055 ** with 0, so the allocated index is one greater than the value returned |
| 12056 ** by: |
| 12057 ** |
| 12058 ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel |
| 12059 ** |
| 12060 ** However, if there are already FTS3_MERGE_COUNT indexes at the requested |
| 12061 ** level, they are merged into a single level (iLevel+1) segment and the |
| 12062 ** allocated index is 0. |
| 12063 ** |
| 12064 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK |
| 12065 ** returned. Otherwise, an SQLite error code is returned. |
| 12066 */ |
| 12067 static int fts3AllocateSegdirIdx( |
| 12068 Fts3Table *p, |
| 12069 int iLangid, /* Language id */ |
| 12070 int iIndex, /* Index for p->aIndex */ |
| 12071 int iLevel, |
| 12072 int *piIdx |
| 12073 ){ |
| 12074 int rc; /* Return Code */ |
| 12075 sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ |
| 12076 int iNext = 0; /* Result of query pNextIdx */ |
| 12077 |
| 12078 assert( iLangid>=0 ); |
| 12079 assert( p->nIndex>=1 ); |
| 12080 |
| 12081 /* Set variable iNext to the next available segdir index at level iLevel. */ |
| 12082 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); |
| 12083 if( rc==SQLITE_OK ){ |
| 12084 sqlite3_bind_int64( |
| 12085 pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) |
| 12086 ); |
| 12087 if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ |
| 12088 iNext = sqlite3_column_int(pNextIdx, 0); |
| 12089 } |
| 12090 rc = sqlite3_reset(pNextIdx); |
| 12091 } |
| 12092 |
| 12093 if( rc==SQLITE_OK ){ |
| 12094 /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already |
| 12095 ** full, merge all segments in level iLevel into a single iLevel+1 |
| 12096 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, |
| 12097 ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. |
| 12098 */ |
| 12099 if( iNext>=FTS3_MERGE_COUNT ){ |
| 12100 fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); |
| 12101 rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); |
| 12102 *piIdx = 0; |
| 12103 }else{ |
| 12104 *piIdx = iNext; |
| 12105 } |
| 12106 } |
| 12107 |
| 12108 return rc; |
| 12109 } |
| 12110 |
| 12111 /* |
| 12112 ** The %_segments table is declared as follows: |
| 12113 ** |
| 12114 ** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) |
| 12115 ** |
| 12116 ** This function reads data from a single row of the %_segments table. The |
| 12117 ** specific row is identified by the iBlockid parameter. If paBlob is not |
| 12118 ** NULL, then a buffer is allocated using sqlite3_malloc() and populated |
| 12119 ** with the contents of the blob stored in the "block" column of the |
| 12120 ** identified table row is. Whether or not paBlob is NULL, *pnBlob is set |
| 12121 ** to the size of the blob in bytes before returning. |
| 12122 ** |
| 12123 ** If an error occurs, or the table does not contain the specified row, |
| 12124 ** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If |
| 12125 ** paBlob is non-NULL, then it is the responsibility of the caller to |
| 12126 ** eventually free the returned buffer. |
| 12127 ** |
| 12128 ** This function may leave an open sqlite3_blob* handle in the |
| 12129 ** Fts3Table.pSegments variable. This handle is reused by subsequent calls |
| 12130 ** to this function. The handle may be closed by calling the |
| 12131 ** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy |
| 12132 ** performance improvement, but the blob handle should always be closed |
| 12133 ** before control is returned to the user (to prevent a lock being held |
| 12134 ** on the database file for longer than necessary). Thus, any virtual table |
| 12135 ** method (xFilter etc.) that may directly or indirectly call this function |
| 12136 ** must call sqlite3Fts3SegmentsClose() before returning. |
| 12137 */ |
| 12138 SQLITE_PRIVATE int sqlite3Fts3ReadBlock( |
| 12139 Fts3Table *p, /* FTS3 table handle */ |
| 12140 sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ |
| 12141 char **paBlob, /* OUT: Blob data in malloc'd buffer */ |
| 12142 int *pnBlob, /* OUT: Size of blob data */ |
| 12143 int *pnLoad /* OUT: Bytes actually loaded */ |
| 12144 ){ |
| 12145 int rc; /* Return code */ |
| 12146 |
| 12147 /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ |
| 12148 assert( pnBlob ); |
| 12149 |
| 12150 if( p->pSegments ){ |
| 12151 rc = sqlite3_blob_reopen(p->pSegments, iBlockid); |
| 12152 }else{ |
| 12153 if( 0==p->zSegmentsTbl ){ |
| 12154 p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); |
| 12155 if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; |
| 12156 } |
| 12157 rc = sqlite3_blob_open( |
| 12158 p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments |
| 12159 ); |
| 12160 } |
| 12161 |
| 12162 if( rc==SQLITE_OK ){ |
| 12163 int nByte = sqlite3_blob_bytes(p->pSegments); |
| 12164 *pnBlob = nByte; |
| 12165 if( paBlob ){ |
| 12166 char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); |
| 12167 if( !aByte ){ |
| 12168 rc = SQLITE_NOMEM; |
| 12169 }else{ |
| 12170 if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ |
| 12171 nByte = FTS3_NODE_CHUNKSIZE; |
| 12172 *pnLoad = nByte; |
| 12173 } |
| 12174 rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); |
| 12175 memset(&aByte[nByte], 0, FTS3_NODE_PADDING); |
| 12176 if( rc!=SQLITE_OK ){ |
| 12177 sqlite3_free(aByte); |
| 12178 aByte = 0; |
| 12179 } |
| 12180 } |
| 12181 *paBlob = aByte; |
| 12182 } |
| 12183 } |
| 12184 |
| 12185 return rc; |
| 12186 } |
| 12187 |
| 12188 /* |
| 12189 ** Close the blob handle at p->pSegments, if it is open. See comments above |
| 12190 ** the sqlite3Fts3ReadBlock() function for details. |
| 12191 */ |
| 12192 SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ |
| 12193 sqlite3_blob_close(p->pSegments); |
| 12194 p->pSegments = 0; |
| 12195 } |
| 12196 |
| 12197 static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ |
| 12198 int nRead; /* Number of bytes to read */ |
| 12199 int rc; /* Return code */ |
| 12200 |
| 12201 nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); |
| 12202 rc = sqlite3_blob_read( |
| 12203 pReader->pBlob, |
| 12204 &pReader->aNode[pReader->nPopulate], |
| 12205 nRead, |
| 12206 pReader->nPopulate |
| 12207 ); |
| 12208 |
| 12209 if( rc==SQLITE_OK ){ |
| 12210 pReader->nPopulate += nRead; |
| 12211 memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); |
| 12212 if( pReader->nPopulate==pReader->nNode ){ |
| 12213 sqlite3_blob_close(pReader->pBlob); |
| 12214 pReader->pBlob = 0; |
| 12215 pReader->nPopulate = 0; |
| 12216 } |
| 12217 } |
| 12218 return rc; |
| 12219 } |
| 12220 |
| 12221 static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ |
| 12222 int rc = SQLITE_OK; |
| 12223 assert( !pReader->pBlob |
| 12224 || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) |
| 12225 ); |
| 12226 while( pReader->pBlob && rc==SQLITE_OK |
| 12227 && (pFrom - pReader->aNode + nByte)>pReader->nPopulate |
| 12228 ){ |
| 12229 rc = fts3SegReaderIncrRead(pReader); |
| 12230 } |
| 12231 return rc; |
| 12232 } |
| 12233 |
| 12234 /* |
| 12235 ** Set an Fts3SegReader cursor to point at EOF. |
| 12236 */ |
| 12237 static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ |
| 12238 if( !fts3SegReaderIsRootOnly(pSeg) ){ |
| 12239 sqlite3_free(pSeg->aNode); |
| 12240 sqlite3_blob_close(pSeg->pBlob); |
| 12241 pSeg->pBlob = 0; |
| 12242 } |
| 12243 pSeg->aNode = 0; |
| 12244 } |
| 12245 |
| 12246 /* |
| 12247 ** Move the iterator passed as the first argument to the next term in the |
| 12248 ** segment. If successful, SQLITE_OK is returned. If there is no next term, |
| 12249 ** SQLITE_DONE. Otherwise, an SQLite error code. |
| 12250 */ |
| 12251 static int fts3SegReaderNext( |
| 12252 Fts3Table *p, |
| 12253 Fts3SegReader *pReader, |
| 12254 int bIncr |
| 12255 ){ |
| 12256 int rc; /* Return code of various sub-routines */ |
| 12257 char *pNext; /* Cursor variable */ |
| 12258 int nPrefix; /* Number of bytes in term prefix */ |
| 12259 int nSuffix; /* Number of bytes in term suffix */ |
| 12260 |
| 12261 if( !pReader->aDoclist ){ |
| 12262 pNext = pReader->aNode; |
| 12263 }else{ |
| 12264 pNext = &pReader->aDoclist[pReader->nDoclist]; |
| 12265 } |
| 12266 |
| 12267 if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ |
| 12268 |
| 12269 if( fts3SegReaderIsPending(pReader) ){ |
| 12270 Fts3HashElem *pElem = *(pReader->ppNextElem); |
| 12271 sqlite3_free(pReader->aNode); |
| 12272 pReader->aNode = 0; |
| 12273 if( pElem ){ |
| 12274 char *aCopy; |
| 12275 PendingList *pList = (PendingList *)fts3HashData(pElem); |
| 12276 int nCopy = pList->nData+1; |
| 12277 pReader->zTerm = (char *)fts3HashKey(pElem); |
| 12278 pReader->nTerm = fts3HashKeysize(pElem); |
| 12279 aCopy = (char*)sqlite3_malloc(nCopy); |
| 12280 if( !aCopy ) return SQLITE_NOMEM; |
| 12281 memcpy(aCopy, pList->aData, nCopy); |
| 12282 pReader->nNode = pReader->nDoclist = nCopy; |
| 12283 pReader->aNode = pReader->aDoclist = aCopy; |
| 12284 pReader->ppNextElem++; |
| 12285 assert( pReader->aNode ); |
| 12286 } |
| 12287 return SQLITE_OK; |
| 12288 } |
| 12289 |
| 12290 fts3SegReaderSetEof(pReader); |
| 12291 |
| 12292 /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf |
| 12293 ** blocks have already been traversed. */ |
| 12294 assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); |
| 12295 if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ |
| 12296 return SQLITE_OK; |
| 12297 } |
| 12298 |
| 12299 rc = sqlite3Fts3ReadBlock( |
| 12300 p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, |
| 12301 (bIncr ? &pReader->nPopulate : 0) |
| 12302 ); |
| 12303 if( rc!=SQLITE_OK ) return rc; |
| 12304 assert( pReader->pBlob==0 ); |
| 12305 if( bIncr && pReader->nPopulate<pReader->nNode ){ |
| 12306 pReader->pBlob = p->pSegments; |
| 12307 p->pSegments = 0; |
| 12308 } |
| 12309 pNext = pReader->aNode; |
| 12310 } |
| 12311 |
| 12312 assert( !fts3SegReaderIsPending(pReader) ); |
| 12313 |
| 12314 rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); |
| 12315 if( rc!=SQLITE_OK ) return rc; |
| 12316 |
| 12317 /* Because of the FTS3_NODE_PADDING bytes of padding, the following is |
| 12318 ** safe (no risk of overread) even if the node data is corrupted. */ |
| 12319 pNext += fts3GetVarint32(pNext, &nPrefix); |
| 12320 pNext += fts3GetVarint32(pNext, &nSuffix); |
| 12321 if( nPrefix<0 || nSuffix<=0 |
| 12322 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] |
| 12323 ){ |
| 12324 return FTS_CORRUPT_VTAB; |
| 12325 } |
| 12326 |
| 12327 if( nPrefix+nSuffix>pReader->nTermAlloc ){ |
| 12328 int nNew = (nPrefix+nSuffix)*2; |
| 12329 char *zNew = sqlite3_realloc(pReader->zTerm, nNew); |
| 12330 if( !zNew ){ |
| 12331 return SQLITE_NOMEM; |
| 12332 } |
| 12333 pReader->zTerm = zNew; |
| 12334 pReader->nTermAlloc = nNew; |
| 12335 } |
| 12336 |
| 12337 rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); |
| 12338 if( rc!=SQLITE_OK ) return rc; |
| 12339 |
| 12340 memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); |
| 12341 pReader->nTerm = nPrefix+nSuffix; |
| 12342 pNext += nSuffix; |
| 12343 pNext += fts3GetVarint32(pNext, &pReader->nDoclist); |
| 12344 pReader->aDoclist = pNext; |
| 12345 pReader->pOffsetList = 0; |
| 12346 |
| 12347 /* Check that the doclist does not appear to extend past the end of the |
| 12348 ** b-tree node. And that the final byte of the doclist is 0x00. If either |
| 12349 ** of these statements is untrue, then the data structure is corrupt. |
| 12350 */ |
| 12351 if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] |
| 12352 || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) |
| 12353 ){ |
| 12354 return FTS_CORRUPT_VTAB; |
| 12355 } |
| 12356 return SQLITE_OK; |
| 12357 } |
| 12358 |
| 12359 /* |
| 12360 ** Set the SegReader to point to the first docid in the doclist associated |
| 12361 ** with the current term. |
| 12362 */ |
| 12363 static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ |
| 12364 int rc = SQLITE_OK; |
| 12365 assert( pReader->aDoclist ); |
| 12366 assert( !pReader->pOffsetList ); |
| 12367 if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ |
| 12368 u8 bEof = 0; |
| 12369 pReader->iDocid = 0; |
| 12370 pReader->nOffsetList = 0; |
| 12371 sqlite3Fts3DoclistPrev(0, |
| 12372 pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, |
| 12373 &pReader->iDocid, &pReader->nOffsetList, &bEof |
| 12374 ); |
| 12375 }else{ |
| 12376 rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); |
| 12377 if( rc==SQLITE_OK ){ |
| 12378 int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); |
| 12379 pReader->pOffsetList = &pReader->aDoclist[n]; |
| 12380 } |
| 12381 } |
| 12382 return rc; |
| 12383 } |
| 12384 |
| 12385 /* |
| 12386 ** Advance the SegReader to point to the next docid in the doclist |
| 12387 ** associated with the current term. |
| 12388 ** |
| 12389 ** If arguments ppOffsetList and pnOffsetList are not NULL, then |
| 12390 ** *ppOffsetList is set to point to the first column-offset list |
| 12391 ** in the doclist entry (i.e. immediately past the docid varint). |
| 12392 ** *pnOffsetList is set to the length of the set of column-offset |
| 12393 ** lists, not including the nul-terminator byte. For example: |
| 12394 */ |
| 12395 static int fts3SegReaderNextDocid( |
| 12396 Fts3Table *pTab, |
| 12397 Fts3SegReader *pReader, /* Reader to advance to next docid */ |
| 12398 char **ppOffsetList, /* OUT: Pointer to current position-list */ |
| 12399 int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ |
| 12400 ){ |
| 12401 int rc = SQLITE_OK; |
| 12402 char *p = pReader->pOffsetList; |
| 12403 char c = 0; |
| 12404 |
| 12405 assert( p ); |
| 12406 |
| 12407 if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ |
| 12408 /* A pending-terms seg-reader for an FTS4 table that uses order=desc. |
| 12409 ** Pending-terms doclists are always built up in ascending order, so |
| 12410 ** we have to iterate through them backwards here. */ |
| 12411 u8 bEof = 0; |
| 12412 if( ppOffsetList ){ |
| 12413 *ppOffsetList = pReader->pOffsetList; |
| 12414 *pnOffsetList = pReader->nOffsetList - 1; |
| 12415 } |
| 12416 sqlite3Fts3DoclistPrev(0, |
| 12417 pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, |
| 12418 &pReader->nOffsetList, &bEof |
| 12419 ); |
| 12420 if( bEof ){ |
| 12421 pReader->pOffsetList = 0; |
| 12422 }else{ |
| 12423 pReader->pOffsetList = p; |
| 12424 } |
| 12425 }else{ |
| 12426 char *pEnd = &pReader->aDoclist[pReader->nDoclist]; |
| 12427 |
| 12428 /* Pointer p currently points at the first byte of an offset list. The |
| 12429 ** following block advances it to point one byte past the end of |
| 12430 ** the same offset list. */ |
| 12431 while( 1 ){ |
| 12432 |
| 12433 /* The following line of code (and the "p++" below the while() loop) is |
| 12434 ** normally all that is required to move pointer p to the desired |
| 12435 ** position. The exception is if this node is being loaded from disk |
| 12436 ** incrementally and pointer "p" now points to the first byte past |
| 12437 ** the populated part of pReader->aNode[]. |
| 12438 */ |
| 12439 while( *p | c ) c = *p++ & 0x80; |
| 12440 assert( *p==0 ); |
| 12441 |
| 12442 if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; |
| 12443 rc = fts3SegReaderIncrRead(pReader); |
| 12444 if( rc!=SQLITE_OK ) return rc; |
| 12445 } |
| 12446 p++; |
| 12447 |
| 12448 /* If required, populate the output variables with a pointer to and the |
| 12449 ** size of the previous offset-list. |
| 12450 */ |
| 12451 if( ppOffsetList ){ |
| 12452 *ppOffsetList = pReader->pOffsetList; |
| 12453 *pnOffsetList = (int)(p - pReader->pOffsetList - 1); |
| 12454 } |
| 12455 |
| 12456 /* List may have been edited in place by fts3EvalNearTrim() */ |
| 12457 while( p<pEnd && *p==0 ) p++; |
| 12458 |
| 12459 /* If there are no more entries in the doclist, set pOffsetList to |
| 12460 ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and |
| 12461 ** Fts3SegReader.pOffsetList to point to the next offset list before |
| 12462 ** returning. |
| 12463 */ |
| 12464 if( p>=pEnd ){ |
| 12465 pReader->pOffsetList = 0; |
| 12466 }else{ |
| 12467 rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); |
| 12468 if( rc==SQLITE_OK ){ |
| 12469 sqlite3_int64 iDelta; |
| 12470 pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); |
| 12471 if( pTab->bDescIdx ){ |
| 12472 pReader->iDocid -= iDelta; |
| 12473 }else{ |
| 12474 pReader->iDocid += iDelta; |
| 12475 } |
| 12476 } |
| 12477 } |
| 12478 } |
| 12479 |
| 12480 return SQLITE_OK; |
| 12481 } |
| 12482 |
| 12483 |
| 12484 SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( |
| 12485 Fts3Cursor *pCsr, |
| 12486 Fts3MultiSegReader *pMsr, |
| 12487 int *pnOvfl |
| 12488 ){ |
| 12489 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; |
| 12490 int nOvfl = 0; |
| 12491 int ii; |
| 12492 int rc = SQLITE_OK; |
| 12493 int pgsz = p->nPgsz; |
| 12494 |
| 12495 assert( p->bFts4 ); |
| 12496 assert( pgsz>0 ); |
| 12497 |
| 12498 for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){ |
| 12499 Fts3SegReader *pReader = pMsr->apSegment[ii]; |
| 12500 if( !fts3SegReaderIsPending(pReader) |
| 12501 && !fts3SegReaderIsRootOnly(pReader) |
| 12502 ){ |
| 12503 sqlite3_int64 jj; |
| 12504 for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ |
| 12505 int nBlob; |
| 12506 rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); |
| 12507 if( rc!=SQLITE_OK ) break; |
| 12508 if( (nBlob+35)>pgsz ){ |
| 12509 nOvfl += (nBlob + 34)/pgsz; |
| 12510 } |
| 12511 } |
| 12512 } |
| 12513 } |
| 12514 *pnOvfl = nOvfl; |
| 12515 return rc; |
| 12516 } |
| 12517 |
| 12518 /* |
| 12519 ** Free all allocations associated with the iterator passed as the |
| 12520 ** second argument. |
| 12521 */ |
| 12522 SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ |
| 12523 if( pReader ){ |
| 12524 if( !fts3SegReaderIsPending(pReader) ){ |
| 12525 sqlite3_free(pReader->zTerm); |
| 12526 } |
| 12527 if( !fts3SegReaderIsRootOnly(pReader) ){ |
| 12528 sqlite3_free(pReader->aNode); |
| 12529 } |
| 12530 sqlite3_blob_close(pReader->pBlob); |
| 12531 } |
| 12532 sqlite3_free(pReader); |
| 12533 } |
| 12534 |
| 12535 /* |
| 12536 ** Allocate a new SegReader object. |
| 12537 */ |
| 12538 SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( |
| 12539 int iAge, /* Segment "age". */ |
| 12540 int bLookup, /* True for a lookup only */ |
| 12541 sqlite3_int64 iStartLeaf, /* First leaf to traverse */ |
| 12542 sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ |
| 12543 sqlite3_int64 iEndBlock, /* Final block of segment */ |
| 12544 const char *zRoot, /* Buffer containing root node */ |
| 12545 int nRoot, /* Size of buffer containing root node */ |
| 12546 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ |
| 12547 ){ |
| 12548 Fts3SegReader *pReader; /* Newly allocated SegReader object */ |
| 12549 int nExtra = 0; /* Bytes to allocate segment root node */ |
| 12550 |
| 12551 assert( iStartLeaf<=iEndLeaf ); |
| 12552 if( iStartLeaf==0 ){ |
| 12553 nExtra = nRoot + FTS3_NODE_PADDING; |
| 12554 } |
| 12555 |
| 12556 pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); |
| 12557 if( !pReader ){ |
| 12558 return SQLITE_NOMEM; |
| 12559 } |
| 12560 memset(pReader, 0, sizeof(Fts3SegReader)); |
| 12561 pReader->iIdx = iAge; |
| 12562 pReader->bLookup = bLookup!=0; |
| 12563 pReader->iStartBlock = iStartLeaf; |
| 12564 pReader->iLeafEndBlock = iEndLeaf; |
| 12565 pReader->iEndBlock = iEndBlock; |
| 12566 |
| 12567 if( nExtra ){ |
| 12568 /* The entire segment is stored in the root node. */ |
| 12569 pReader->aNode = (char *)&pReader[1]; |
| 12570 pReader->rootOnly = 1; |
| 12571 pReader->nNode = nRoot; |
| 12572 memcpy(pReader->aNode, zRoot, nRoot); |
| 12573 memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); |
| 12574 }else{ |
| 12575 pReader->iCurrentBlock = iStartLeaf-1; |
| 12576 } |
| 12577 *ppReader = pReader; |
| 12578 return SQLITE_OK; |
| 12579 } |
| 12580 |
| 12581 /* |
| 12582 ** This is a comparison function used as a qsort() callback when sorting |
| 12583 ** an array of pending terms by term. This occurs as part of flushing |
| 12584 ** the contents of the pending-terms hash table to the database. |
| 12585 */ |
| 12586 static int SQLITE_CDECL fts3CompareElemByTerm( |
| 12587 const void *lhs, |
| 12588 const void *rhs |
| 12589 ){ |
| 12590 char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); |
| 12591 char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); |
| 12592 int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); |
| 12593 int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); |
| 12594 |
| 12595 int n = (n1<n2 ? n1 : n2); |
| 12596 int c = memcmp(z1, z2, n); |
| 12597 if( c==0 ){ |
| 12598 c = n1 - n2; |
| 12599 } |
| 12600 return c; |
| 12601 } |
| 12602 |
| 12603 /* |
| 12604 ** This function is used to allocate an Fts3SegReader that iterates through |
| 12605 ** a subset of the terms stored in the Fts3Table.pendingTerms array. |
| 12606 ** |
| 12607 ** If the isPrefixIter parameter is zero, then the returned SegReader iterates |
| 12608 ** through each term in the pending-terms table. Or, if isPrefixIter is |
| 12609 ** non-zero, it iterates through each term and its prefixes. For example, if |
| 12610 ** the pending terms hash table contains the terms "sqlite", "mysql" and |
| 12611 ** "firebird", then the iterator visits the following 'terms' (in the order |
| 12612 ** shown): |
| 12613 ** |
| 12614 ** f fi fir fire fireb firebi firebir firebird |
| 12615 ** m my mys mysq mysql |
| 12616 ** s sq sql sqli sqlit sqlite |
| 12617 ** |
| 12618 ** Whereas if isPrefixIter is zero, the terms visited are: |
| 12619 ** |
| 12620 ** firebird mysql sqlite |
| 12621 */ |
| 12622 SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( |
| 12623 Fts3Table *p, /* Virtual table handle */ |
| 12624 int iIndex, /* Index for p->aIndex */ |
| 12625 const char *zTerm, /* Term to search for */ |
| 12626 int nTerm, /* Size of buffer zTerm */ |
| 12627 int bPrefix, /* True for a prefix iterator */ |
| 12628 Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ |
| 12629 ){ |
| 12630 Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ |
| 12631 Fts3HashElem *pE; /* Iterator variable */ |
| 12632 Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ |
| 12633 int nElem = 0; /* Size of array at aElem */ |
| 12634 int rc = SQLITE_OK; /* Return Code */ |
| 12635 Fts3Hash *pHash; |
| 12636 |
| 12637 pHash = &p->aIndex[iIndex].hPending; |
| 12638 if( bPrefix ){ |
| 12639 int nAlloc = 0; /* Size of allocated array at aElem */ |
| 12640 |
| 12641 for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ |
| 12642 char *zKey = (char *)fts3HashKey(pE); |
| 12643 int nKey = fts3HashKeysize(pE); |
| 12644 if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ |
| 12645 if( nElem==nAlloc ){ |
| 12646 Fts3HashElem **aElem2; |
| 12647 nAlloc += 16; |
| 12648 aElem2 = (Fts3HashElem **)sqlite3_realloc( |
| 12649 aElem, nAlloc*sizeof(Fts3HashElem *) |
| 12650 ); |
| 12651 if( !aElem2 ){ |
| 12652 rc = SQLITE_NOMEM; |
| 12653 nElem = 0; |
| 12654 break; |
| 12655 } |
| 12656 aElem = aElem2; |
| 12657 } |
| 12658 |
| 12659 aElem[nElem++] = pE; |
| 12660 } |
| 12661 } |
| 12662 |
| 12663 /* If more than one term matches the prefix, sort the Fts3HashElem |
| 12664 ** objects in term order using qsort(). This uses the same comparison |
| 12665 ** callback as is used when flushing terms to disk. |
| 12666 */ |
| 12667 if( nElem>1 ){ |
| 12668 qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); |
| 12669 } |
| 12670 |
| 12671 }else{ |
| 12672 /* The query is a simple term lookup that matches at most one term in |
| 12673 ** the index. All that is required is a straight hash-lookup. |
| 12674 ** |
| 12675 ** Because the stack address of pE may be accessed via the aElem pointer |
| 12676 ** below, the "Fts3HashElem *pE" must be declared so that it is valid |
| 12677 ** within this entire function, not just this "else{...}" block. |
| 12678 */ |
| 12679 pE = fts3HashFindElem(pHash, zTerm, nTerm); |
| 12680 if( pE ){ |
| 12681 aElem = &pE; |
| 12682 nElem = 1; |
| 12683 } |
| 12684 } |
| 12685 |
| 12686 if( nElem>0 ){ |
| 12687 int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); |
| 12688 pReader = (Fts3SegReader *)sqlite3_malloc(nByte); |
| 12689 if( !pReader ){ |
| 12690 rc = SQLITE_NOMEM; |
| 12691 }else{ |
| 12692 memset(pReader, 0, nByte); |
| 12693 pReader->iIdx = 0x7FFFFFFF; |
| 12694 pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; |
| 12695 memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); |
| 12696 } |
| 12697 } |
| 12698 |
| 12699 if( bPrefix ){ |
| 12700 sqlite3_free(aElem); |
| 12701 } |
| 12702 *ppReader = pReader; |
| 12703 return rc; |
| 12704 } |
| 12705 |
| 12706 /* |
| 12707 ** Compare the entries pointed to by two Fts3SegReader structures. |
| 12708 ** Comparison is as follows: |
| 12709 ** |
| 12710 ** 1) EOF is greater than not EOF. |
| 12711 ** |
| 12712 ** 2) The current terms (if any) are compared using memcmp(). If one |
| 12713 ** term is a prefix of another, the longer term is considered the |
| 12714 ** larger. |
| 12715 ** |
| 12716 ** 3) By segment age. An older segment is considered larger. |
| 12717 */ |
| 12718 static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12719 int rc; |
| 12720 if( pLhs->aNode && pRhs->aNode ){ |
| 12721 int rc2 = pLhs->nTerm - pRhs->nTerm; |
| 12722 if( rc2<0 ){ |
| 12723 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); |
| 12724 }else{ |
| 12725 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); |
| 12726 } |
| 12727 if( rc==0 ){ |
| 12728 rc = rc2; |
| 12729 } |
| 12730 }else{ |
| 12731 rc = (pLhs->aNode==0) - (pRhs->aNode==0); |
| 12732 } |
| 12733 if( rc==0 ){ |
| 12734 rc = pRhs->iIdx - pLhs->iIdx; |
| 12735 } |
| 12736 assert( rc!=0 ); |
| 12737 return rc; |
| 12738 } |
| 12739 |
| 12740 /* |
| 12741 ** A different comparison function for SegReader structures. In this |
| 12742 ** version, it is assumed that each SegReader points to an entry in |
| 12743 ** a doclist for identical terms. Comparison is made as follows: |
| 12744 ** |
| 12745 ** 1) EOF (end of doclist in this case) is greater than not EOF. |
| 12746 ** |
| 12747 ** 2) By current docid. |
| 12748 ** |
| 12749 ** 3) By segment age. An older segment is considered larger. |
| 12750 */ |
| 12751 static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12752 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); |
| 12753 if( rc==0 ){ |
| 12754 if( pLhs->iDocid==pRhs->iDocid ){ |
| 12755 rc = pRhs->iIdx - pLhs->iIdx; |
| 12756 }else{ |
| 12757 rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; |
| 12758 } |
| 12759 } |
| 12760 assert( pLhs->aNode && pRhs->aNode ); |
| 12761 return rc; |
| 12762 } |
| 12763 static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12764 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); |
| 12765 if( rc==0 ){ |
| 12766 if( pLhs->iDocid==pRhs->iDocid ){ |
| 12767 rc = pRhs->iIdx - pLhs->iIdx; |
| 12768 }else{ |
| 12769 rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; |
| 12770 } |
| 12771 } |
| 12772 assert( pLhs->aNode && pRhs->aNode ); |
| 12773 return rc; |
| 12774 } |
| 12775 |
| 12776 /* |
| 12777 ** Compare the term that the Fts3SegReader object passed as the first argument |
| 12778 ** points to with the term specified by arguments zTerm and nTerm. |
| 12779 ** |
| 12780 ** If the pSeg iterator is already at EOF, return 0. Otherwise, return |
| 12781 ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are |
| 12782 ** equal, or +ve if the pSeg term is greater than zTerm/nTerm. |
| 12783 */ |
| 12784 static int fts3SegReaderTermCmp( |
| 12785 Fts3SegReader *pSeg, /* Segment reader object */ |
| 12786 const char *zTerm, /* Term to compare to */ |
| 12787 int nTerm /* Size of term zTerm in bytes */ |
| 12788 ){ |
| 12789 int res = 0; |
| 12790 if( pSeg->aNode ){ |
| 12791 if( pSeg->nTerm>nTerm ){ |
| 12792 res = memcmp(pSeg->zTerm, zTerm, nTerm); |
| 12793 }else{ |
| 12794 res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); |
| 12795 } |
| 12796 if( res==0 ){ |
| 12797 res = pSeg->nTerm-nTerm; |
| 12798 } |
| 12799 } |
| 12800 return res; |
| 12801 } |
| 12802 |
| 12803 /* |
| 12804 ** Argument apSegment is an array of nSegment elements. It is known that |
| 12805 ** the final (nSegment-nSuspect) members are already in sorted order |
| 12806 ** (according to the comparison function provided). This function shuffles |
| 12807 ** the array around until all entries are in sorted order. |
| 12808 */ |
| 12809 static void fts3SegReaderSort( |
| 12810 Fts3SegReader **apSegment, /* Array to sort entries of */ |
| 12811 int nSegment, /* Size of apSegment array */ |
| 12812 int nSuspect, /* Unsorted entry count */ |
| 12813 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ |
| 12814 ){ |
| 12815 int i; /* Iterator variable */ |
| 12816 |
| 12817 assert( nSuspect<=nSegment ); |
| 12818 |
| 12819 if( nSuspect==nSegment ) nSuspect--; |
| 12820 for(i=nSuspect-1; i>=0; i--){ |
| 12821 int j; |
| 12822 for(j=i; j<(nSegment-1); j++){ |
| 12823 Fts3SegReader *pTmp; |
| 12824 if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; |
| 12825 pTmp = apSegment[j+1]; |
| 12826 apSegment[j+1] = apSegment[j]; |
| 12827 apSegment[j] = pTmp; |
| 12828 } |
| 12829 } |
| 12830 |
| 12831 #ifndef NDEBUG |
| 12832 /* Check that the list really is sorted now. */ |
| 12833 for(i=0; i<(nSuspect-1); i++){ |
| 12834 assert( xCmp(apSegment[i], apSegment[i+1])<0 ); |
| 12835 } |
| 12836 #endif |
| 12837 } |
| 12838 |
| 12839 /* |
| 12840 ** Insert a record into the %_segments table. |
| 12841 */ |
| 12842 static int fts3WriteSegment( |
| 12843 Fts3Table *p, /* Virtual table handle */ |
| 12844 sqlite3_int64 iBlock, /* Block id for new block */ |
| 12845 char *z, /* Pointer to buffer containing block data */ |
| 12846 int n /* Size of buffer z in bytes */ |
| 12847 ){ |
| 12848 sqlite3_stmt *pStmt; |
| 12849 int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); |
| 12850 if( rc==SQLITE_OK ){ |
| 12851 sqlite3_bind_int64(pStmt, 1, iBlock); |
| 12852 sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); |
| 12853 sqlite3_step(pStmt); |
| 12854 rc = sqlite3_reset(pStmt); |
| 12855 } |
| 12856 return rc; |
| 12857 } |
| 12858 |
| 12859 /* |
| 12860 ** Find the largest relative level number in the table. If successful, set |
| 12861 ** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, |
| 12862 ** set *pnMax to zero and return an SQLite error code. |
| 12863 */ |
| 12864 SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ |
| 12865 int rc; |
| 12866 int mxLevel = 0; |
| 12867 sqlite3_stmt *pStmt = 0; |
| 12868 |
| 12869 rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); |
| 12870 if( rc==SQLITE_OK ){ |
| 12871 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 12872 mxLevel = sqlite3_column_int(pStmt, 0); |
| 12873 } |
| 12874 rc = sqlite3_reset(pStmt); |
| 12875 } |
| 12876 *pnMax = mxLevel; |
| 12877 return rc; |
| 12878 } |
| 12879 |
| 12880 /* |
| 12881 ** Insert a record into the %_segdir table. |
| 12882 */ |
| 12883 static int fts3WriteSegdir( |
| 12884 Fts3Table *p, /* Virtual table handle */ |
| 12885 sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ |
| 12886 int iIdx, /* Value for "idx" field */ |
| 12887 sqlite3_int64 iStartBlock, /* Value for "start_block" field */ |
| 12888 sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ |
| 12889 sqlite3_int64 iEndBlock, /* Value for "end_block" field */ |
| 12890 sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ |
| 12891 char *zRoot, /* Blob value for "root" field */ |
| 12892 int nRoot /* Number of bytes in buffer zRoot */ |
| 12893 ){ |
| 12894 sqlite3_stmt *pStmt; |
| 12895 int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); |
| 12896 if( rc==SQLITE_OK ){ |
| 12897 sqlite3_bind_int64(pStmt, 1, iLevel); |
| 12898 sqlite3_bind_int(pStmt, 2, iIdx); |
| 12899 sqlite3_bind_int64(pStmt, 3, iStartBlock); |
| 12900 sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); |
| 12901 if( nLeafData==0 ){ |
| 12902 sqlite3_bind_int64(pStmt, 5, iEndBlock); |
| 12903 }else{ |
| 12904 char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); |
| 12905 if( !zEnd ) return SQLITE_NOMEM; |
| 12906 sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); |
| 12907 } |
| 12908 sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); |
| 12909 sqlite3_step(pStmt); |
| 12910 rc = sqlite3_reset(pStmt); |
| 12911 } |
| 12912 return rc; |
| 12913 } |
| 12914 |
| 12915 /* |
| 12916 ** Return the size of the common prefix (if any) shared by zPrev and |
| 12917 ** zNext, in bytes. For example, |
| 12918 ** |
| 12919 ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 |
| 12920 ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 |
| 12921 ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 |
| 12922 */ |
| 12923 static int fts3PrefixCompress( |
| 12924 const char *zPrev, /* Buffer containing previous term */ |
| 12925 int nPrev, /* Size of buffer zPrev in bytes */ |
| 12926 const char *zNext, /* Buffer containing next term */ |
| 12927 int nNext /* Size of buffer zNext in bytes */ |
| 12928 ){ |
| 12929 int n; |
| 12930 UNUSED_PARAMETER(nNext); |
| 12931 for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++); |
| 12932 return n; |
| 12933 } |
| 12934 |
| 12935 /* |
| 12936 ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger |
| 12937 ** (according to memcmp) than the previous term. |
| 12938 */ |
| 12939 static int fts3NodeAddTerm( |
| 12940 Fts3Table *p, /* Virtual table handle */ |
| 12941 SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */ |
| 12942 int isCopyTerm, /* True if zTerm/nTerm is transient */ |
| 12943 const char *zTerm, /* Pointer to buffer containing term */ |
| 12944 int nTerm /* Size of term in bytes */ |
| 12945 ){ |
| 12946 SegmentNode *pTree = *ppTree; |
| 12947 int rc; |
| 12948 SegmentNode *pNew; |
| 12949 |
| 12950 /* First try to append the term to the current node. Return early if |
| 12951 ** this is possible. |
| 12952 */ |
| 12953 if( pTree ){ |
| 12954 int nData = pTree->nData; /* Current size of node in bytes */ |
| 12955 int nReq = nData; /* Required space after adding zTerm */ |
| 12956 int nPrefix; /* Number of bytes of prefix compression */ |
| 12957 int nSuffix; /* Suffix length */ |
| 12958 |
| 12959 nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); |
| 12960 nSuffix = nTerm-nPrefix; |
| 12961 |
| 12962 nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; |
| 12963 if( nReq<=p->nNodeSize || !pTree->zTerm ){ |
| 12964 |
| 12965 if( nReq>p->nNodeSize ){ |
| 12966 /* An unusual case: this is the first term to be added to the node |
| 12967 ** and the static node buffer (p->nNodeSize bytes) is not large |
| 12968 ** enough. Use a separately malloced buffer instead This wastes |
| 12969 ** p->nNodeSize bytes, but since this scenario only comes about when |
| 12970 ** the database contain two terms that share a prefix of almost 2KB, |
| 12971 ** this is not expected to be a serious problem. |
| 12972 */ |
| 12973 assert( pTree->aData==(char *)&pTree[1] ); |
| 12974 pTree->aData = (char *)sqlite3_malloc(nReq); |
| 12975 if( !pTree->aData ){ |
| 12976 return SQLITE_NOMEM; |
| 12977 } |
| 12978 } |
| 12979 |
| 12980 if( pTree->zTerm ){ |
| 12981 /* There is no prefix-length field for first term in a node */ |
| 12982 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); |
| 12983 } |
| 12984 |
| 12985 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); |
| 12986 memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); |
| 12987 pTree->nData = nData + nSuffix; |
| 12988 pTree->nEntry++; |
| 12989 |
| 12990 if( isCopyTerm ){ |
| 12991 if( pTree->nMalloc<nTerm ){ |
| 12992 char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2); |
| 12993 if( !zNew ){ |
| 12994 return SQLITE_NOMEM; |
| 12995 } |
| 12996 pTree->nMalloc = nTerm*2; |
| 12997 pTree->zMalloc = zNew; |
| 12998 } |
| 12999 pTree->zTerm = pTree->zMalloc; |
| 13000 memcpy(pTree->zTerm, zTerm, nTerm); |
| 13001 pTree->nTerm = nTerm; |
| 13002 }else{ |
| 13003 pTree->zTerm = (char *)zTerm; |
| 13004 pTree->nTerm = nTerm; |
| 13005 } |
| 13006 return SQLITE_OK; |
| 13007 } |
| 13008 } |
| 13009 |
| 13010 /* If control flows to here, it was not possible to append zTerm to the |
| 13011 ** current node. Create a new node (a right-sibling of the current node). |
| 13012 ** If this is the first node in the tree, the term is added to it. |
| 13013 ** |
| 13014 ** Otherwise, the term is not added to the new node, it is left empty for |
| 13015 ** now. Instead, the term is inserted into the parent of pTree. If pTree |
| 13016 ** has no parent, one is created here. |
| 13017 */ |
| 13018 pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); |
| 13019 if( !pNew ){ |
| 13020 return SQLITE_NOMEM; |
| 13021 } |
| 13022 memset(pNew, 0, sizeof(SegmentNode)); |
| 13023 pNew->nData = 1 + FTS3_VARINT_MAX; |
| 13024 pNew->aData = (char *)&pNew[1]; |
| 13025 |
| 13026 if( pTree ){ |
| 13027 SegmentNode *pParent = pTree->pParent; |
| 13028 rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); |
| 13029 if( pTree->pParent==0 ){ |
| 13030 pTree->pParent = pParent; |
| 13031 } |
| 13032 pTree->pRight = pNew; |
| 13033 pNew->pLeftmost = pTree->pLeftmost; |
| 13034 pNew->pParent = pParent; |
| 13035 pNew->zMalloc = pTree->zMalloc; |
| 13036 pNew->nMalloc = pTree->nMalloc; |
| 13037 pTree->zMalloc = 0; |
| 13038 }else{ |
| 13039 pNew->pLeftmost = pNew; |
| 13040 rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); |
| 13041 } |
| 13042 |
| 13043 *ppTree = pNew; |
| 13044 return rc; |
| 13045 } |
| 13046 |
| 13047 /* |
| 13048 ** Helper function for fts3NodeWrite(). |
| 13049 */ |
| 13050 static int fts3TreeFinishNode( |
| 13051 SegmentNode *pTree, |
| 13052 int iHeight, |
| 13053 sqlite3_int64 iLeftChild |
| 13054 ){ |
| 13055 int nStart; |
| 13056 assert( iHeight>=1 && iHeight<128 ); |
| 13057 nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); |
| 13058 pTree->aData[nStart] = (char)iHeight; |
| 13059 sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); |
| 13060 return nStart; |
| 13061 } |
| 13062 |
| 13063 /* |
| 13064 ** Write the buffer for the segment node pTree and all of its peers to the |
| 13065 ** database. Then call this function recursively to write the parent of |
| 13066 ** pTree and its peers to the database. |
| 13067 ** |
| 13068 ** Except, if pTree is a root node, do not write it to the database. Instead, |
| 13069 ** set output variables *paRoot and *pnRoot to contain the root node. |
| 13070 ** |
| 13071 ** If successful, SQLITE_OK is returned and output variable *piLast is |
| 13072 ** set to the largest blockid written to the database (or zero if no |
| 13073 ** blocks were written to the db). Otherwise, an SQLite error code is |
| 13074 ** returned. |
| 13075 */ |
| 13076 static int fts3NodeWrite( |
| 13077 Fts3Table *p, /* Virtual table handle */ |
| 13078 SegmentNode *pTree, /* SegmentNode handle */ |
| 13079 int iHeight, /* Height of this node in tree */ |
| 13080 sqlite3_int64 iLeaf, /* Block id of first leaf node */ |
| 13081 sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ |
| 13082 sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ |
| 13083 char **paRoot, /* OUT: Data for root node */ |
| 13084 int *pnRoot /* OUT: Size of root node in bytes */ |
| 13085 ){ |
| 13086 int rc = SQLITE_OK; |
| 13087 |
| 13088 if( !pTree->pParent ){ |
| 13089 /* Root node of the tree. */ |
| 13090 int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); |
| 13091 *piLast = iFree-1; |
| 13092 *pnRoot = pTree->nData - nStart; |
| 13093 *paRoot = &pTree->aData[nStart]; |
| 13094 }else{ |
| 13095 SegmentNode *pIter; |
| 13096 sqlite3_int64 iNextFree = iFree; |
| 13097 sqlite3_int64 iNextLeaf = iLeaf; |
| 13098 for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ |
| 13099 int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); |
| 13100 int nWrite = pIter->nData - nStart; |
| 13101 |
| 13102 rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); |
| 13103 iNextFree++; |
| 13104 iNextLeaf += (pIter->nEntry+1); |
| 13105 } |
| 13106 if( rc==SQLITE_OK ){ |
| 13107 assert( iNextLeaf==iFree ); |
| 13108 rc = fts3NodeWrite( |
| 13109 p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot |
| 13110 ); |
| 13111 } |
| 13112 } |
| 13113 |
| 13114 return rc; |
| 13115 } |
| 13116 |
| 13117 /* |
| 13118 ** Free all memory allocations associated with the tree pTree. |
| 13119 */ |
| 13120 static void fts3NodeFree(SegmentNode *pTree){ |
| 13121 if( pTree ){ |
| 13122 SegmentNode *p = pTree->pLeftmost; |
| 13123 fts3NodeFree(p->pParent); |
| 13124 while( p ){ |
| 13125 SegmentNode *pRight = p->pRight; |
| 13126 if( p->aData!=(char *)&p[1] ){ |
| 13127 sqlite3_free(p->aData); |
| 13128 } |
| 13129 assert( pRight==0 || p->zMalloc==0 ); |
| 13130 sqlite3_free(p->zMalloc); |
| 13131 sqlite3_free(p); |
| 13132 p = pRight; |
| 13133 } |
| 13134 } |
| 13135 } |
| 13136 |
| 13137 /* |
| 13138 ** Add a term to the segment being constructed by the SegmentWriter object |
| 13139 ** *ppWriter. When adding the first term to a segment, *ppWriter should |
| 13140 ** be passed NULL. This function will allocate a new SegmentWriter object |
| 13141 ** and return it via the input/output variable *ppWriter in this case. |
| 13142 ** |
| 13143 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. |
| 13144 */ |
| 13145 static int fts3SegWriterAdd( |
| 13146 Fts3Table *p, /* Virtual table handle */ |
| 13147 SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ |
| 13148 int isCopyTerm, /* True if buffer zTerm must be copied */ |
| 13149 const char *zTerm, /* Pointer to buffer containing term */ |
| 13150 int nTerm, /* Size of term in bytes */ |
| 13151 const char *aDoclist, /* Pointer to buffer containing doclist */ |
| 13152 int nDoclist /* Size of doclist in bytes */ |
| 13153 ){ |
| 13154 int nPrefix; /* Size of term prefix in bytes */ |
| 13155 int nSuffix; /* Size of term suffix in bytes */ |
| 13156 int nReq; /* Number of bytes required on leaf page */ |
| 13157 int nData; |
| 13158 SegmentWriter *pWriter = *ppWriter; |
| 13159 |
| 13160 if( !pWriter ){ |
| 13161 int rc; |
| 13162 sqlite3_stmt *pStmt; |
| 13163 |
| 13164 /* Allocate the SegmentWriter structure */ |
| 13165 pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); |
| 13166 if( !pWriter ) return SQLITE_NOMEM; |
| 13167 memset(pWriter, 0, sizeof(SegmentWriter)); |
| 13168 *ppWriter = pWriter; |
| 13169 |
| 13170 /* Allocate a buffer in which to accumulate data */ |
| 13171 pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); |
| 13172 if( !pWriter->aData ) return SQLITE_NOMEM; |
| 13173 pWriter->nSize = p->nNodeSize; |
| 13174 |
| 13175 /* Find the next free blockid in the %_segments table */ |
| 13176 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); |
| 13177 if( rc!=SQLITE_OK ) return rc; |
| 13178 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13179 pWriter->iFree = sqlite3_column_int64(pStmt, 0); |
| 13180 pWriter->iFirst = pWriter->iFree; |
| 13181 } |
| 13182 rc = sqlite3_reset(pStmt); |
| 13183 if( rc!=SQLITE_OK ) return rc; |
| 13184 } |
| 13185 nData = pWriter->nData; |
| 13186 |
| 13187 nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); |
| 13188 nSuffix = nTerm-nPrefix; |
| 13189 |
| 13190 /* Figure out how many bytes are required by this new entry */ |
| 13191 nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ |
| 13192 sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ |
| 13193 nSuffix + /* Term suffix */ |
| 13194 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ |
| 13195 nDoclist; /* Doclist data */ |
| 13196 |
| 13197 if( nData>0 && nData+nReq>p->nNodeSize ){ |
| 13198 int rc; |
| 13199 |
| 13200 /* The current leaf node is full. Write it out to the database. */ |
| 13201 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); |
| 13202 if( rc!=SQLITE_OK ) return rc; |
| 13203 p->nLeafAdd++; |
| 13204 |
| 13205 /* Add the current term to the interior node tree. The term added to |
| 13206 ** the interior tree must: |
| 13207 ** |
| 13208 ** a) be greater than the largest term on the leaf node just written |
| 13209 ** to the database (still available in pWriter->zTerm), and |
| 13210 ** |
| 13211 ** b) be less than or equal to the term about to be added to the new |
| 13212 ** leaf node (zTerm/nTerm). |
| 13213 ** |
| 13214 ** In other words, it must be the prefix of zTerm 1 byte longer than |
| 13215 ** the common prefix (if any) of zTerm and pWriter->zTerm. |
| 13216 */ |
| 13217 assert( nPrefix<nTerm ); |
| 13218 rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1); |
| 13219 if( rc!=SQLITE_OK ) return rc; |
| 13220 |
| 13221 nData = 0; |
| 13222 pWriter->nTerm = 0; |
| 13223 |
| 13224 nPrefix = 0; |
| 13225 nSuffix = nTerm; |
| 13226 nReq = 1 + /* varint containing prefix size */ |
| 13227 sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ |
| 13228 nTerm + /* Term suffix */ |
| 13229 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ |
| 13230 nDoclist; /* Doclist data */ |
| 13231 } |
| 13232 |
| 13233 /* Increase the total number of bytes written to account for the new entry. */ |
| 13234 pWriter->nLeafData += nReq; |
| 13235 |
| 13236 /* If the buffer currently allocated is too small for this entry, realloc |
| 13237 ** the buffer to make it large enough. |
| 13238 */ |
| 13239 if( nReq>pWriter->nSize ){ |
| 13240 char *aNew = sqlite3_realloc(pWriter->aData, nReq); |
| 13241 if( !aNew ) return SQLITE_NOMEM; |
| 13242 pWriter->aData = aNew; |
| 13243 pWriter->nSize = nReq; |
| 13244 } |
| 13245 assert( nData+nReq<=pWriter->nSize ); |
| 13246 |
| 13247 /* Append the prefix-compressed term and doclist to the buffer. */ |
| 13248 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); |
| 13249 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); |
| 13250 memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); |
| 13251 nData += nSuffix; |
| 13252 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); |
| 13253 memcpy(&pWriter->aData[nData], aDoclist, nDoclist); |
| 13254 pWriter->nData = nData + nDoclist; |
| 13255 |
| 13256 /* Save the current term so that it can be used to prefix-compress the next. |
| 13257 ** If the isCopyTerm parameter is true, then the buffer pointed to by |
| 13258 ** zTerm is transient, so take a copy of the term data. Otherwise, just |
| 13259 ** store a copy of the pointer. |
| 13260 */ |
| 13261 if( isCopyTerm ){ |
| 13262 if( nTerm>pWriter->nMalloc ){ |
| 13263 char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); |
| 13264 if( !zNew ){ |
| 13265 return SQLITE_NOMEM; |
| 13266 } |
| 13267 pWriter->nMalloc = nTerm*2; |
| 13268 pWriter->zMalloc = zNew; |
| 13269 pWriter->zTerm = zNew; |
| 13270 } |
| 13271 assert( pWriter->zTerm==pWriter->zMalloc ); |
| 13272 memcpy(pWriter->zTerm, zTerm, nTerm); |
| 13273 }else{ |
| 13274 pWriter->zTerm = (char *)zTerm; |
| 13275 } |
| 13276 pWriter->nTerm = nTerm; |
| 13277 |
| 13278 return SQLITE_OK; |
| 13279 } |
| 13280 |
| 13281 /* |
| 13282 ** Flush all data associated with the SegmentWriter object pWriter to the |
| 13283 ** database. This function must be called after all terms have been added |
| 13284 ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is |
| 13285 ** returned. Otherwise, an SQLite error code. |
| 13286 */ |
| 13287 static int fts3SegWriterFlush( |
| 13288 Fts3Table *p, /* Virtual table handle */ |
| 13289 SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ |
| 13290 sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ |
| 13291 int iIdx /* Value for 'idx' column of %_segdir */ |
| 13292 ){ |
| 13293 int rc; /* Return code */ |
| 13294 if( pWriter->pTree ){ |
| 13295 sqlite3_int64 iLast = 0; /* Largest block id written to database */ |
| 13296 sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ |
| 13297 char *zRoot = NULL; /* Pointer to buffer containing root node */ |
| 13298 int nRoot = 0; /* Size of buffer zRoot */ |
| 13299 |
| 13300 iLastLeaf = pWriter->iFree; |
| 13301 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); |
| 13302 if( rc==SQLITE_OK ){ |
| 13303 rc = fts3NodeWrite(p, pWriter->pTree, 1, |
| 13304 pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); |
| 13305 } |
| 13306 if( rc==SQLITE_OK ){ |
| 13307 rc = fts3WriteSegdir(p, iLevel, iIdx, |
| 13308 pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); |
| 13309 } |
| 13310 }else{ |
| 13311 /* The entire tree fits on the root node. Write it to the segdir table. */ |
| 13312 rc = fts3WriteSegdir(p, iLevel, iIdx, |
| 13313 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); |
| 13314 } |
| 13315 p->nLeafAdd++; |
| 13316 return rc; |
| 13317 } |
| 13318 |
| 13319 /* |
| 13320 ** Release all memory held by the SegmentWriter object passed as the |
| 13321 ** first argument. |
| 13322 */ |
| 13323 static void fts3SegWriterFree(SegmentWriter *pWriter){ |
| 13324 if( pWriter ){ |
| 13325 sqlite3_free(pWriter->aData); |
| 13326 sqlite3_free(pWriter->zMalloc); |
| 13327 fts3NodeFree(pWriter->pTree); |
| 13328 sqlite3_free(pWriter); |
| 13329 } |
| 13330 } |
| 13331 |
| 13332 /* |
| 13333 ** The first value in the apVal[] array is assumed to contain an integer. |
| 13334 ** This function tests if there exist any documents with docid values that |
| 13335 ** are different from that integer. i.e. if deleting the document with docid |
| 13336 ** pRowid would mean the FTS3 table were empty. |
| 13337 ** |
| 13338 ** If successful, *pisEmpty is set to true if the table is empty except for |
| 13339 ** document pRowid, or false otherwise, and SQLITE_OK is returned. If an |
| 13340 ** error occurs, an SQLite error code is returned. |
| 13341 */ |
| 13342 static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ |
| 13343 sqlite3_stmt *pStmt; |
| 13344 int rc; |
| 13345 if( p->zContentTbl ){ |
| 13346 /* If using the content=xxx option, assume the table is never empty */ |
| 13347 *pisEmpty = 0; |
| 13348 rc = SQLITE_OK; |
| 13349 }else{ |
| 13350 rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); |
| 13351 if( rc==SQLITE_OK ){ |
| 13352 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13353 *pisEmpty = sqlite3_column_int(pStmt, 0); |
| 13354 } |
| 13355 rc = sqlite3_reset(pStmt); |
| 13356 } |
| 13357 } |
| 13358 return rc; |
| 13359 } |
| 13360 |
| 13361 /* |
| 13362 ** Set *pnMax to the largest segment level in the database for the index |
| 13363 ** iIndex. |
| 13364 ** |
| 13365 ** Segment levels are stored in the 'level' column of the %_segdir table. |
| 13366 ** |
| 13367 ** Return SQLITE_OK if successful, or an SQLite error code if not. |
| 13368 */ |
| 13369 static int fts3SegmentMaxLevel( |
| 13370 Fts3Table *p, |
| 13371 int iLangid, |
| 13372 int iIndex, |
| 13373 sqlite3_int64 *pnMax |
| 13374 ){ |
| 13375 sqlite3_stmt *pStmt; |
| 13376 int rc; |
| 13377 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 13378 |
| 13379 /* Set pStmt to the compiled version of: |
| 13380 ** |
| 13381 ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? |
| 13382 ** |
| 13383 ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). |
| 13384 */ |
| 13385 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); |
| 13386 if( rc!=SQLITE_OK ) return rc; |
| 13387 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 13388 sqlite3_bind_int64(pStmt, 2, |
| 13389 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 13390 ); |
| 13391 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13392 *pnMax = sqlite3_column_int64(pStmt, 0); |
| 13393 } |
| 13394 return sqlite3_reset(pStmt); |
| 13395 } |
| 13396 |
| 13397 /* |
| 13398 ** iAbsLevel is an absolute level that may be assumed to exist within |
| 13399 ** the database. This function checks if it is the largest level number |
| 13400 ** within its index. Assuming no error occurs, *pbMax is set to 1 if |
| 13401 ** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK |
| 13402 ** is returned. If an error occurs, an error code is returned and the |
| 13403 ** final value of *pbMax is undefined. |
| 13404 */ |
| 13405 static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ |
| 13406 |
| 13407 /* Set pStmt to the compiled version of: |
| 13408 ** |
| 13409 ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? |
| 13410 ** |
| 13411 ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). |
| 13412 */ |
| 13413 sqlite3_stmt *pStmt; |
| 13414 int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); |
| 13415 if( rc!=SQLITE_OK ) return rc; |
| 13416 sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); |
| 13417 sqlite3_bind_int64(pStmt, 2, |
| 13418 ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL |
| 13419 ); |
| 13420 |
| 13421 *pbMax = 0; |
| 13422 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13423 *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; |
| 13424 } |
| 13425 return sqlite3_reset(pStmt); |
| 13426 } |
| 13427 |
| 13428 /* |
| 13429 ** Delete all entries in the %_segments table associated with the segment |
| 13430 ** opened with seg-reader pSeg. This function does not affect the contents |
| 13431 ** of the %_segdir table. |
| 13432 */ |
| 13433 static int fts3DeleteSegment( |
| 13434 Fts3Table *p, /* FTS table handle */ |
| 13435 Fts3SegReader *pSeg /* Segment to delete */ |
| 13436 ){ |
| 13437 int rc = SQLITE_OK; /* Return code */ |
| 13438 if( pSeg->iStartBlock ){ |
| 13439 sqlite3_stmt *pDelete; /* SQL statement to delete rows */ |
| 13440 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); |
| 13441 if( rc==SQLITE_OK ){ |
| 13442 sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); |
| 13443 sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); |
| 13444 sqlite3_step(pDelete); |
| 13445 rc = sqlite3_reset(pDelete); |
| 13446 } |
| 13447 } |
| 13448 return rc; |
| 13449 } |
| 13450 |
| 13451 /* |
| 13452 ** This function is used after merging multiple segments into a single large |
| 13453 ** segment to delete the old, now redundant, segment b-trees. Specifically, |
| 13454 ** it: |
| 13455 ** |
| 13456 ** 1) Deletes all %_segments entries for the segments associated with |
| 13457 ** each of the SegReader objects in the array passed as the third |
| 13458 ** argument, and |
| 13459 ** |
| 13460 ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir |
| 13461 ** entries regardless of level if (iLevel<0). |
| 13462 ** |
| 13463 ** SQLITE_OK is returned if successful, otherwise an SQLite error code. |
| 13464 */ |
| 13465 static int fts3DeleteSegdir( |
| 13466 Fts3Table *p, /* Virtual table handle */ |
| 13467 int iLangid, /* Language id */ |
| 13468 int iIndex, /* Index for p->aIndex */ |
| 13469 int iLevel, /* Level of %_segdir entries to delete */ |
| 13470 Fts3SegReader **apSegment, /* Array of SegReader objects */ |
| 13471 int nReader /* Size of array apSegment */ |
| 13472 ){ |
| 13473 int rc = SQLITE_OK; /* Return Code */ |
| 13474 int i; /* Iterator variable */ |
| 13475 sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ |
| 13476 |
| 13477 for(i=0; rc==SQLITE_OK && i<nReader; i++){ |
| 13478 rc = fts3DeleteSegment(p, apSegment[i]); |
| 13479 } |
| 13480 if( rc!=SQLITE_OK ){ |
| 13481 return rc; |
| 13482 } |
| 13483 |
| 13484 assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); |
| 13485 if( iLevel==FTS3_SEGCURSOR_ALL ){ |
| 13486 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); |
| 13487 if( rc==SQLITE_OK ){ |
| 13488 sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 13489 sqlite3_bind_int64(pDelete, 2, |
| 13490 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 13491 ); |
| 13492 } |
| 13493 }else{ |
| 13494 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); |
| 13495 if( rc==SQLITE_OK ){ |
| 13496 sqlite3_bind_int64( |
| 13497 pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) |
| 13498 ); |
| 13499 } |
| 13500 } |
| 13501 |
| 13502 if( rc==SQLITE_OK ){ |
| 13503 sqlite3_step(pDelete); |
| 13504 rc = sqlite3_reset(pDelete); |
| 13505 } |
| 13506 |
| 13507 return rc; |
| 13508 } |
| 13509 |
| 13510 /* |
| 13511 ** When this function is called, buffer *ppList (size *pnList bytes) contains |
| 13512 ** a position list that may (or may not) feature multiple columns. This |
| 13513 ** function adjusts the pointer *ppList and the length *pnList so that they |
| 13514 ** identify the subset of the position list that corresponds to column iCol. |
| 13515 ** |
| 13516 ** If there are no entries in the input position list for column iCol, then |
| 13517 ** *pnList is set to zero before returning. |
| 13518 ** |
| 13519 ** If parameter bZero is non-zero, then any part of the input list following |
| 13520 ** the end of the output list is zeroed before returning. |
| 13521 */ |
| 13522 static void fts3ColumnFilter( |
| 13523 int iCol, /* Column to filter on */ |
| 13524 int bZero, /* Zero out anything following *ppList */ |
| 13525 char **ppList, /* IN/OUT: Pointer to position list */ |
| 13526 int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ |
| 13527 ){ |
| 13528 char *pList = *ppList; |
| 13529 int nList = *pnList; |
| 13530 char *pEnd = &pList[nList]; |
| 13531 int iCurrent = 0; |
| 13532 char *p = pList; |
| 13533 |
| 13534 assert( iCol>=0 ); |
| 13535 while( 1 ){ |
| 13536 char c = 0; |
| 13537 while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80; |
| 13538 |
| 13539 if( iCol==iCurrent ){ |
| 13540 nList = (int)(p - pList); |
| 13541 break; |
| 13542 } |
| 13543 |
| 13544 nList -= (int)(p - pList); |
| 13545 pList = p; |
| 13546 if( nList==0 ){ |
| 13547 break; |
| 13548 } |
| 13549 p = &pList[1]; |
| 13550 p += fts3GetVarint32(p, &iCurrent); |
| 13551 } |
| 13552 |
| 13553 if( bZero && &pList[nList]!=pEnd ){ |
| 13554 memset(&pList[nList], 0, pEnd - &pList[nList]); |
| 13555 } |
| 13556 *ppList = pList; |
| 13557 *pnList = nList; |
| 13558 } |
| 13559 |
| 13560 /* |
| 13561 ** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any |
| 13562 ** existing data). Grow the buffer if required. |
| 13563 ** |
| 13564 ** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered |
| 13565 ** trying to resize the buffer, return SQLITE_NOMEM. |
| 13566 */ |
| 13567 static int fts3MsrBufferData( |
| 13568 Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ |
| 13569 char *pList, |
| 13570 int nList |
| 13571 ){ |
| 13572 if( nList>pMsr->nBuffer ){ |
| 13573 char *pNew; |
| 13574 pMsr->nBuffer = nList*2; |
| 13575 pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); |
| 13576 if( !pNew ) return SQLITE_NOMEM; |
| 13577 pMsr->aBuffer = pNew; |
| 13578 } |
| 13579 |
| 13580 memcpy(pMsr->aBuffer, pList, nList); |
| 13581 return SQLITE_OK; |
| 13582 } |
| 13583 |
| 13584 SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( |
| 13585 Fts3Table *p, /* Virtual table handle */ |
| 13586 Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ |
| 13587 sqlite3_int64 *piDocid, /* OUT: Docid value */ |
| 13588 char **paPoslist, /* OUT: Pointer to position list */ |
| 13589 int *pnPoslist /* OUT: Size of position list in bytes */ |
| 13590 ){ |
| 13591 int nMerge = pMsr->nAdvance; |
| 13592 Fts3SegReader **apSegment = pMsr->apSegment; |
| 13593 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13594 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13595 ); |
| 13596 |
| 13597 if( nMerge==0 ){ |
| 13598 *paPoslist = 0; |
| 13599 return SQLITE_OK; |
| 13600 } |
| 13601 |
| 13602 while( 1 ){ |
| 13603 Fts3SegReader *pSeg; |
| 13604 pSeg = pMsr->apSegment[0]; |
| 13605 |
| 13606 if( pSeg->pOffsetList==0 ){ |
| 13607 *paPoslist = 0; |
| 13608 break; |
| 13609 }else{ |
| 13610 int rc; |
| 13611 char *pList; |
| 13612 int nList; |
| 13613 int j; |
| 13614 sqlite3_int64 iDocid = apSegment[0]->iDocid; |
| 13615 |
| 13616 rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); |
| 13617 j = 1; |
| 13618 while( rc==SQLITE_OK |
| 13619 && j<nMerge |
| 13620 && apSegment[j]->pOffsetList |
| 13621 && apSegment[j]->iDocid==iDocid |
| 13622 ){ |
| 13623 rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); |
| 13624 j++; |
| 13625 } |
| 13626 if( rc!=SQLITE_OK ) return rc; |
| 13627 fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); |
| 13628 |
| 13629 if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ |
| 13630 rc = fts3MsrBufferData(pMsr, pList, nList+1); |
| 13631 if( rc!=SQLITE_OK ) return rc; |
| 13632 assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); |
| 13633 pList = pMsr->aBuffer; |
| 13634 } |
| 13635 |
| 13636 if( pMsr->iColFilter>=0 ){ |
| 13637 fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); |
| 13638 } |
| 13639 |
| 13640 if( nList>0 ){ |
| 13641 *paPoslist = pList; |
| 13642 *piDocid = iDocid; |
| 13643 *pnPoslist = nList; |
| 13644 break; |
| 13645 } |
| 13646 } |
| 13647 } |
| 13648 |
| 13649 return SQLITE_OK; |
| 13650 } |
| 13651 |
| 13652 static int fts3SegReaderStart( |
| 13653 Fts3Table *p, /* Virtual table handle */ |
| 13654 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13655 const char *zTerm, /* Term searched for (or NULL) */ |
| 13656 int nTerm /* Length of zTerm in bytes */ |
| 13657 ){ |
| 13658 int i; |
| 13659 int nSeg = pCsr->nSegment; |
| 13660 |
| 13661 /* If the Fts3SegFilter defines a specific term (or term prefix) to search |
| 13662 ** for, then advance each segment iterator until it points to a term of |
| 13663 ** equal or greater value than the specified term. This prevents many |
| 13664 ** unnecessary merge/sort operations for the case where single segment |
| 13665 ** b-tree leaf nodes contain more than one term. |
| 13666 */ |
| 13667 for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){ |
| 13668 int res = 0; |
| 13669 Fts3SegReader *pSeg = pCsr->apSegment[i]; |
| 13670 do { |
| 13671 int rc = fts3SegReaderNext(p, pSeg, 0); |
| 13672 if( rc!=SQLITE_OK ) return rc; |
| 13673 }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); |
| 13674 |
| 13675 if( pSeg->bLookup && res!=0 ){ |
| 13676 fts3SegReaderSetEof(pSeg); |
| 13677 } |
| 13678 } |
| 13679 fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); |
| 13680 |
| 13681 return SQLITE_OK; |
| 13682 } |
| 13683 |
| 13684 SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( |
| 13685 Fts3Table *p, /* Virtual table handle */ |
| 13686 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13687 Fts3SegFilter *pFilter /* Restrictions on range of iteration */ |
| 13688 ){ |
| 13689 pCsr->pFilter = pFilter; |
| 13690 return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); |
| 13691 } |
| 13692 |
| 13693 SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( |
| 13694 Fts3Table *p, /* Virtual table handle */ |
| 13695 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13696 int iCol, /* Column to match on. */ |
| 13697 const char *zTerm, /* Term to iterate through a doclist for */ |
| 13698 int nTerm /* Number of bytes in zTerm */ |
| 13699 ){ |
| 13700 int i; |
| 13701 int rc; |
| 13702 int nSegment = pCsr->nSegment; |
| 13703 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13704 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13705 ); |
| 13706 |
| 13707 assert( pCsr->pFilter==0 ); |
| 13708 assert( zTerm && nTerm>0 ); |
| 13709 |
| 13710 /* Advance each segment iterator until it points to the term zTerm/nTerm. */ |
| 13711 rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); |
| 13712 if( rc!=SQLITE_OK ) return rc; |
| 13713 |
| 13714 /* Determine how many of the segments actually point to zTerm/nTerm. */ |
| 13715 for(i=0; i<nSegment; i++){ |
| 13716 Fts3SegReader *pSeg = pCsr->apSegment[i]; |
| 13717 if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ |
| 13718 break; |
| 13719 } |
| 13720 } |
| 13721 pCsr->nAdvance = i; |
| 13722 |
| 13723 /* Advance each of the segments to point to the first docid. */ |
| 13724 for(i=0; i<pCsr->nAdvance; i++){ |
| 13725 rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); |
| 13726 if( rc!=SQLITE_OK ) return rc; |
| 13727 } |
| 13728 fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); |
| 13729 |
| 13730 assert( iCol<0 || iCol<p->nColumn ); |
| 13731 pCsr->iColFilter = iCol; |
| 13732 |
| 13733 return SQLITE_OK; |
| 13734 } |
| 13735 |
| 13736 /* |
| 13737 ** This function is called on a MultiSegReader that has been started using |
| 13738 ** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also |
| 13739 ** have been made. Calling this function puts the MultiSegReader in such |
| 13740 ** a state that if the next two calls are: |
| 13741 ** |
| 13742 ** sqlite3Fts3SegReaderStart() |
| 13743 ** sqlite3Fts3SegReaderStep() |
| 13744 ** |
| 13745 ** then the entire doclist for the term is available in |
| 13746 ** MultiSegReader.aDoclist/nDoclist. |
| 13747 */ |
| 13748 SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ |
| 13749 int i; /* Used to iterate through segment-readers */ |
| 13750 |
| 13751 assert( pCsr->zTerm==0 ); |
| 13752 assert( pCsr->nTerm==0 ); |
| 13753 assert( pCsr->aDoclist==0 ); |
| 13754 assert( pCsr->nDoclist==0 ); |
| 13755 |
| 13756 pCsr->nAdvance = 0; |
| 13757 pCsr->bRestart = 1; |
| 13758 for(i=0; i<pCsr->nSegment; i++){ |
| 13759 pCsr->apSegment[i]->pOffsetList = 0; |
| 13760 pCsr->apSegment[i]->nOffsetList = 0; |
| 13761 pCsr->apSegment[i]->iDocid = 0; |
| 13762 } |
| 13763 |
| 13764 return SQLITE_OK; |
| 13765 } |
| 13766 |
| 13767 |
| 13768 SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( |
| 13769 Fts3Table *p, /* Virtual table handle */ |
| 13770 Fts3MultiSegReader *pCsr /* Cursor object */ |
| 13771 ){ |
| 13772 int rc = SQLITE_OK; |
| 13773 |
| 13774 int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); |
| 13775 int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); |
| 13776 int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); |
| 13777 int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); |
| 13778 int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); |
| 13779 int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); |
| 13780 |
| 13781 Fts3SegReader **apSegment = pCsr->apSegment; |
| 13782 int nSegment = pCsr->nSegment; |
| 13783 Fts3SegFilter *pFilter = pCsr->pFilter; |
| 13784 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13785 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13786 ); |
| 13787 |
| 13788 if( pCsr->nSegment==0 ) return SQLITE_OK; |
| 13789 |
| 13790 do { |
| 13791 int nMerge; |
| 13792 int i; |
| 13793 |
| 13794 /* Advance the first pCsr->nAdvance entries in the apSegment[] array |
| 13795 ** forward. Then sort the list in order of current term again. |
| 13796 */ |
| 13797 for(i=0; i<pCsr->nAdvance; i++){ |
| 13798 Fts3SegReader *pSeg = apSegment[i]; |
| 13799 if( pSeg->bLookup ){ |
| 13800 fts3SegReaderSetEof(pSeg); |
| 13801 }else{ |
| 13802 rc = fts3SegReaderNext(p, pSeg, 0); |
| 13803 } |
| 13804 if( rc!=SQLITE_OK ) return rc; |
| 13805 } |
| 13806 fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); |
| 13807 pCsr->nAdvance = 0; |
| 13808 |
| 13809 /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ |
| 13810 assert( rc==SQLITE_OK ); |
| 13811 if( apSegment[0]->aNode==0 ) break; |
| 13812 |
| 13813 pCsr->nTerm = apSegment[0]->nTerm; |
| 13814 pCsr->zTerm = apSegment[0]->zTerm; |
| 13815 |
| 13816 /* If this is a prefix-search, and if the term that apSegment[0] points |
| 13817 ** to does not share a suffix with pFilter->zTerm/nTerm, then all |
| 13818 ** required callbacks have been made. In this case exit early. |
| 13819 ** |
| 13820 ** Similarly, if this is a search for an exact match, and the first term |
| 13821 ** of segment apSegment[0] is not a match, exit early. |
| 13822 */ |
| 13823 if( pFilter->zTerm && !isScan ){ |
| 13824 if( pCsr->nTerm<pFilter->nTerm |
| 13825 || (!isPrefix && pCsr->nTerm>pFilter->nTerm) |
| 13826 || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) |
| 13827 ){ |
| 13828 break; |
| 13829 } |
| 13830 } |
| 13831 |
| 13832 nMerge = 1; |
| 13833 while( nMerge<nSegment |
| 13834 && apSegment[nMerge]->aNode |
| 13835 && apSegment[nMerge]->nTerm==pCsr->nTerm |
| 13836 && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) |
| 13837 ){ |
| 13838 nMerge++; |
| 13839 } |
| 13840 |
| 13841 assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); |
| 13842 if( nMerge==1 |
| 13843 && !isIgnoreEmpty |
| 13844 && !isFirst |
| 13845 && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) |
| 13846 ){ |
| 13847 pCsr->nDoclist = apSegment[0]->nDoclist; |
| 13848 if( fts3SegReaderIsPending(apSegment[0]) ){ |
| 13849 rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); |
| 13850 pCsr->aDoclist = pCsr->aBuffer; |
| 13851 }else{ |
| 13852 pCsr->aDoclist = apSegment[0]->aDoclist; |
| 13853 } |
| 13854 if( rc==SQLITE_OK ) rc = SQLITE_ROW; |
| 13855 }else{ |
| 13856 int nDoclist = 0; /* Size of doclist */ |
| 13857 sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ |
| 13858 |
| 13859 /* The current term of the first nMerge entries in the array |
| 13860 ** of Fts3SegReader objects is the same. The doclists must be merged |
| 13861 ** and a single term returned with the merged doclist. |
| 13862 */ |
| 13863 for(i=0; i<nMerge; i++){ |
| 13864 fts3SegReaderFirstDocid(p, apSegment[i]); |
| 13865 } |
| 13866 fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp); |
| 13867 while( apSegment[0]->pOffsetList ){ |
| 13868 int j; /* Number of segments that share a docid */ |
| 13869 char *pList = 0; |
| 13870 int nList = 0; |
| 13871 int nByte; |
| 13872 sqlite3_int64 iDocid = apSegment[0]->iDocid; |
| 13873 fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); |
| 13874 j = 1; |
| 13875 while( j<nMerge |
| 13876 && apSegment[j]->pOffsetList |
| 13877 && apSegment[j]->iDocid==iDocid |
| 13878 ){ |
| 13879 fts3SegReaderNextDocid(p, apSegment[j], 0, 0); |
| 13880 j++; |
| 13881 } |
| 13882 |
| 13883 if( isColFilter ){ |
| 13884 fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); |
| 13885 } |
| 13886 |
| 13887 if( !isIgnoreEmpty || nList>0 ){ |
| 13888 |
| 13889 /* Calculate the 'docid' delta value to write into the merged |
| 13890 ** doclist. */ |
| 13891 sqlite3_int64 iDelta; |
| 13892 if( p->bDescIdx && nDoclist>0 ){ |
| 13893 iDelta = iPrev - iDocid; |
| 13894 }else{ |
| 13895 iDelta = iDocid - iPrev; |
| 13896 } |
| 13897 assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); |
| 13898 assert( nDoclist>0 || iDelta==iDocid ); |
| 13899 |
| 13900 nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); |
| 13901 if( nDoclist+nByte>pCsr->nBuffer ){ |
| 13902 char *aNew; |
| 13903 pCsr->nBuffer = (nDoclist+nByte)*2; |
| 13904 aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); |
| 13905 if( !aNew ){ |
| 13906 return SQLITE_NOMEM; |
| 13907 } |
| 13908 pCsr->aBuffer = aNew; |
| 13909 } |
| 13910 |
| 13911 if( isFirst ){ |
| 13912 char *a = &pCsr->aBuffer[nDoclist]; |
| 13913 int nWrite; |
| 13914 |
| 13915 nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); |
| 13916 if( nWrite ){ |
| 13917 iPrev = iDocid; |
| 13918 nDoclist += nWrite; |
| 13919 } |
| 13920 }else{ |
| 13921 nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); |
| 13922 iPrev = iDocid; |
| 13923 if( isRequirePos ){ |
| 13924 memcpy(&pCsr->aBuffer[nDoclist], pList, nList); |
| 13925 nDoclist += nList; |
| 13926 pCsr->aBuffer[nDoclist++] = '\0'; |
| 13927 } |
| 13928 } |
| 13929 } |
| 13930 |
| 13931 fts3SegReaderSort(apSegment, nMerge, j, xCmp); |
| 13932 } |
| 13933 if( nDoclist>0 ){ |
| 13934 pCsr->aDoclist = pCsr->aBuffer; |
| 13935 pCsr->nDoclist = nDoclist; |
| 13936 rc = SQLITE_ROW; |
| 13937 } |
| 13938 } |
| 13939 pCsr->nAdvance = nMerge; |
| 13940 }while( rc==SQLITE_OK ); |
| 13941 |
| 13942 return rc; |
| 13943 } |
| 13944 |
| 13945 |
| 13946 SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( |
| 13947 Fts3MultiSegReader *pCsr /* Cursor object */ |
| 13948 ){ |
| 13949 if( pCsr ){ |
| 13950 int i; |
| 13951 for(i=0; i<pCsr->nSegment; i++){ |
| 13952 sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); |
| 13953 } |
| 13954 sqlite3_free(pCsr->apSegment); |
| 13955 sqlite3_free(pCsr->aBuffer); |
| 13956 |
| 13957 pCsr->nSegment = 0; |
| 13958 pCsr->apSegment = 0; |
| 13959 pCsr->aBuffer = 0; |
| 13960 } |
| 13961 } |
| 13962 |
| 13963 /* |
| 13964 ** Decode the "end_block" field, selected by column iCol of the SELECT |
| 13965 ** statement passed as the first argument. |
| 13966 ** |
| 13967 ** The "end_block" field may contain either an integer, or a text field |
| 13968 ** containing the text representation of two non-negative integers separated |
| 13969 ** by one or more space (0x20) characters. In the first case, set *piEndBlock |
| 13970 ** to the integer value and *pnByte to zero before returning. In the second, |
| 13971 ** set *piEndBlock to the first value and *pnByte to the second. |
| 13972 */ |
| 13973 static void fts3ReadEndBlockField( |
| 13974 sqlite3_stmt *pStmt, |
| 13975 int iCol, |
| 13976 i64 *piEndBlock, |
| 13977 i64 *pnByte |
| 13978 ){ |
| 13979 const unsigned char *zText = sqlite3_column_text(pStmt, iCol); |
| 13980 if( zText ){ |
| 13981 int i; |
| 13982 int iMul = 1; |
| 13983 i64 iVal = 0; |
| 13984 for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ |
| 13985 iVal = iVal*10 + (zText[i] - '0'); |
| 13986 } |
| 13987 *piEndBlock = iVal; |
| 13988 while( zText[i]==' ' ) i++; |
| 13989 iVal = 0; |
| 13990 if( zText[i]=='-' ){ |
| 13991 i++; |
| 13992 iMul = -1; |
| 13993 } |
| 13994 for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ |
| 13995 iVal = iVal*10 + (zText[i] - '0'); |
| 13996 } |
| 13997 *pnByte = (iVal * (i64)iMul); |
| 13998 } |
| 13999 } |
| 14000 |
| 14001 |
| 14002 /* |
| 14003 ** A segment of size nByte bytes has just been written to absolute level |
| 14004 ** iAbsLevel. Promote any segments that should be promoted as a result. |
| 14005 */ |
| 14006 static int fts3PromoteSegments( |
| 14007 Fts3Table *p, /* FTS table handle */ |
| 14008 sqlite3_int64 iAbsLevel, /* Absolute level just updated */ |
| 14009 sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ |
| 14010 ){ |
| 14011 int rc = SQLITE_OK; |
| 14012 sqlite3_stmt *pRange; |
| 14013 |
| 14014 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); |
| 14015 |
| 14016 if( rc==SQLITE_OK ){ |
| 14017 int bOk = 0; |
| 14018 i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; |
| 14019 i64 nLimit = (nByte*3)/2; |
| 14020 |
| 14021 /* Loop through all entries in the %_segdir table corresponding to |
| 14022 ** segments in this index on levels greater than iAbsLevel. If there is |
| 14023 ** at least one such segment, and it is possible to determine that all |
| 14024 ** such segments are smaller than nLimit bytes in size, they will be |
| 14025 ** promoted to level iAbsLevel. */ |
| 14026 sqlite3_bind_int64(pRange, 1, iAbsLevel+1); |
| 14027 sqlite3_bind_int64(pRange, 2, iLast); |
| 14028 while( SQLITE_ROW==sqlite3_step(pRange) ){ |
| 14029 i64 nSize = 0, dummy; |
| 14030 fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); |
| 14031 if( nSize<=0 || nSize>nLimit ){ |
| 14032 /* If nSize==0, then the %_segdir.end_block field does not not |
| 14033 ** contain a size value. This happens if it was written by an |
| 14034 ** old version of FTS. In this case it is not possible to determine |
| 14035 ** the size of the segment, and so segment promotion does not |
| 14036 ** take place. */ |
| 14037 bOk = 0; |
| 14038 break; |
| 14039 } |
| 14040 bOk = 1; |
| 14041 } |
| 14042 rc = sqlite3_reset(pRange); |
| 14043 |
| 14044 if( bOk ){ |
| 14045 int iIdx = 0; |
| 14046 sqlite3_stmt *pUpdate1 = 0; |
| 14047 sqlite3_stmt *pUpdate2 = 0; |
| 14048 |
| 14049 if( rc==SQLITE_OK ){ |
| 14050 rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); |
| 14051 } |
| 14052 if( rc==SQLITE_OK ){ |
| 14053 rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); |
| 14054 } |
| 14055 |
| 14056 if( rc==SQLITE_OK ){ |
| 14057 |
| 14058 /* Loop through all %_segdir entries for segments in this index with |
| 14059 ** levels equal to or greater than iAbsLevel. As each entry is visited, |
| 14060 ** updated it to set (level = -1) and (idx = N), where N is 0 for the |
| 14061 ** oldest segment in the range, 1 for the next oldest, and so on. |
| 14062 ** |
| 14063 ** In other words, move all segments being promoted to level -1, |
| 14064 ** setting the "idx" fields as appropriate to keep them in the same |
| 14065 ** order. The contents of level -1 (which is never used, except |
| 14066 ** transiently here), will be moved back to level iAbsLevel below. */ |
| 14067 sqlite3_bind_int64(pRange, 1, iAbsLevel); |
| 14068 while( SQLITE_ROW==sqlite3_step(pRange) ){ |
| 14069 sqlite3_bind_int(pUpdate1, 1, iIdx++); |
| 14070 sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); |
| 14071 sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); |
| 14072 sqlite3_step(pUpdate1); |
| 14073 rc = sqlite3_reset(pUpdate1); |
| 14074 if( rc!=SQLITE_OK ){ |
| 14075 sqlite3_reset(pRange); |
| 14076 break; |
| 14077 } |
| 14078 } |
| 14079 } |
| 14080 if( rc==SQLITE_OK ){ |
| 14081 rc = sqlite3_reset(pRange); |
| 14082 } |
| 14083 |
| 14084 /* Move level -1 to level iAbsLevel */ |
| 14085 if( rc==SQLITE_OK ){ |
| 14086 sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); |
| 14087 sqlite3_step(pUpdate2); |
| 14088 rc = sqlite3_reset(pUpdate2); |
| 14089 } |
| 14090 } |
| 14091 } |
| 14092 |
| 14093 |
| 14094 return rc; |
| 14095 } |
| 14096 |
| 14097 /* |
| 14098 ** Merge all level iLevel segments in the database into a single |
| 14099 ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a |
| 14100 ** single segment with a level equal to the numerically largest level |
| 14101 ** currently present in the database. |
| 14102 ** |
| 14103 ** If this function is called with iLevel<0, but there is only one |
| 14104 ** segment in the database, SQLITE_DONE is returned immediately. |
| 14105 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, |
| 14106 ** an SQLite error code is returned. |
| 14107 */ |
| 14108 static int fts3SegmentMerge( |
| 14109 Fts3Table *p, |
| 14110 int iLangid, /* Language id to merge */ |
| 14111 int iIndex, /* Index in p->aIndex[] to merge */ |
| 14112 int iLevel /* Level to merge */ |
| 14113 ){ |
| 14114 int rc; /* Return code */ |
| 14115 int iIdx = 0; /* Index of new segment */ |
| 14116 sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ |
| 14117 SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ |
| 14118 Fts3SegFilter filter; /* Segment term filter condition */ |
| 14119 Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ |
| 14120 int bIgnoreEmpty = 0; /* True to ignore empty segments */ |
| 14121 i64 iMaxLevel = 0; /* Max level number for this index/langid */ |
| 14122 |
| 14123 assert( iLevel==FTS3_SEGCURSOR_ALL |
| 14124 || iLevel==FTS3_SEGCURSOR_PENDING |
| 14125 || iLevel>=0 |
| 14126 ); |
| 14127 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 14128 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 14129 |
| 14130 rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); |
| 14131 if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; |
| 14132 |
| 14133 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 14134 rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); |
| 14135 if( rc!=SQLITE_OK ) goto finished; |
| 14136 } |
| 14137 |
| 14138 if( iLevel==FTS3_SEGCURSOR_ALL ){ |
| 14139 /* This call is to merge all segments in the database to a single |
| 14140 ** segment. The level of the new segment is equal to the numerically |
| 14141 ** greatest segment level currently present in the database for this |
| 14142 ** index. The idx of the new segment is always 0. */ |
| 14143 if( csr.nSegment==1 ){ |
| 14144 rc = SQLITE_DONE; |
| 14145 goto finished; |
| 14146 } |
| 14147 iNewLevel = iMaxLevel; |
| 14148 bIgnoreEmpty = 1; |
| 14149 |
| 14150 }else{ |
| 14151 /* This call is to merge all segments at level iLevel. find the next |
| 14152 ** available segment index at level iLevel+1. The call to |
| 14153 ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to |
| 14154 ** a single iLevel+2 segment if necessary. */ |
| 14155 assert( FTS3_SEGCURSOR_PENDING==-1 ); |
| 14156 iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); |
| 14157 rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); |
| 14158 bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); |
| 14159 } |
| 14160 if( rc!=SQLITE_OK ) goto finished; |
| 14161 |
| 14162 assert( csr.nSegment>0 ); |
| 14163 assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); |
| 14164 assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); |
| 14165 |
| 14166 memset(&filter, 0, sizeof(Fts3SegFilter)); |
| 14167 filter.flags = FTS3_SEGMENT_REQUIRE_POS; |
| 14168 filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); |
| 14169 |
| 14170 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); |
| 14171 while( SQLITE_OK==rc ){ |
| 14172 rc = sqlite3Fts3SegReaderStep(p, &csr); |
| 14173 if( rc!=SQLITE_ROW ) break; |
| 14174 rc = fts3SegWriterAdd(p, &pWriter, 1, |
| 14175 csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); |
| 14176 } |
| 14177 if( rc!=SQLITE_OK ) goto finished; |
| 14178 assert( pWriter || bIgnoreEmpty ); |
| 14179 |
| 14180 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 14181 rc = fts3DeleteSegdir( |
| 14182 p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment |
| 14183 ); |
| 14184 if( rc!=SQLITE_OK ) goto finished; |
| 14185 } |
| 14186 if( pWriter ){ |
| 14187 rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); |
| 14188 if( rc==SQLITE_OK ){ |
| 14189 if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){ |
| 14190 rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); |
| 14191 } |
| 14192 } |
| 14193 } |
| 14194 |
| 14195 finished: |
| 14196 fts3SegWriterFree(pWriter); |
| 14197 sqlite3Fts3SegReaderFinish(&csr); |
| 14198 return rc; |
| 14199 } |
| 14200 |
| 14201 |
| 14202 /* |
| 14203 ** Flush the contents of pendingTerms to level 0 segments. |
| 14204 */ |
| 14205 SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ |
| 14206 int rc = SQLITE_OK; |
| 14207 int i; |
| 14208 |
| 14209 for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 14210 rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); |
| 14211 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 14212 } |
| 14213 sqlite3Fts3PendingTermsClear(p); |
| 14214 |
| 14215 /* Determine the auto-incr-merge setting if unknown. If enabled, |
| 14216 ** estimate the number of leaf blocks of content to be written |
| 14217 */ |
| 14218 if( rc==SQLITE_OK && p->bHasStat |
| 14219 && p->nAutoincrmerge==0xff && p->nLeafAdd>0 |
| 14220 ){ |
| 14221 sqlite3_stmt *pStmt = 0; |
| 14222 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); |
| 14223 if( rc==SQLITE_OK ){ |
| 14224 sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); |
| 14225 rc = sqlite3_step(pStmt); |
| 14226 if( rc==SQLITE_ROW ){ |
| 14227 p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); |
| 14228 if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; |
| 14229 }else if( rc==SQLITE_DONE ){ |
| 14230 p->nAutoincrmerge = 0; |
| 14231 } |
| 14232 rc = sqlite3_reset(pStmt); |
| 14233 } |
| 14234 } |
| 14235 return rc; |
| 14236 } |
| 14237 |
| 14238 /* |
| 14239 ** Encode N integers as varints into a blob. |
| 14240 */ |
| 14241 static void fts3EncodeIntArray( |
| 14242 int N, /* The number of integers to encode */ |
| 14243 u32 *a, /* The integer values */ |
| 14244 char *zBuf, /* Write the BLOB here */ |
| 14245 int *pNBuf /* Write number of bytes if zBuf[] used here */ |
| 14246 ){ |
| 14247 int i, j; |
| 14248 for(i=j=0; i<N; i++){ |
| 14249 j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]); |
| 14250 } |
| 14251 *pNBuf = j; |
| 14252 } |
| 14253 |
| 14254 /* |
| 14255 ** Decode a blob of varints into N integers |
| 14256 */ |
| 14257 static void fts3DecodeIntArray( |
| 14258 int N, /* The number of integers to decode */ |
| 14259 u32 *a, /* Write the integer values */ |
| 14260 const char *zBuf, /* The BLOB containing the varints */ |
| 14261 int nBuf /* size of the BLOB */ |
| 14262 ){ |
| 14263 int i, j; |
| 14264 UNUSED_PARAMETER(nBuf); |
| 14265 for(i=j=0; i<N; i++){ |
| 14266 sqlite3_int64 x; |
| 14267 j += sqlite3Fts3GetVarint(&zBuf[j], &x); |
| 14268 assert(j<=nBuf); |
| 14269 a[i] = (u32)(x & 0xffffffff); |
| 14270 } |
| 14271 } |
| 14272 |
| 14273 /* |
| 14274 ** Insert the sizes (in tokens) for each column of the document |
| 14275 ** with docid equal to p->iPrevDocid. The sizes are encoded as |
| 14276 ** a blob of varints. |
| 14277 */ |
| 14278 static void fts3InsertDocsize( |
| 14279 int *pRC, /* Result code */ |
| 14280 Fts3Table *p, /* Table into which to insert */ |
| 14281 u32 *aSz /* Sizes of each column, in tokens */ |
| 14282 ){ |
| 14283 char *pBlob; /* The BLOB encoding of the document size */ |
| 14284 int nBlob; /* Number of bytes in the BLOB */ |
| 14285 sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ |
| 14286 int rc; /* Result code from subfunctions */ |
| 14287 |
| 14288 if( *pRC ) return; |
| 14289 pBlob = sqlite3_malloc( 10*p->nColumn ); |
| 14290 if( pBlob==0 ){ |
| 14291 *pRC = SQLITE_NOMEM; |
| 14292 return; |
| 14293 } |
| 14294 fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); |
| 14295 rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); |
| 14296 if( rc ){ |
| 14297 sqlite3_free(pBlob); |
| 14298 *pRC = rc; |
| 14299 return; |
| 14300 } |
| 14301 sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); |
| 14302 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); |
| 14303 sqlite3_step(pStmt); |
| 14304 *pRC = sqlite3_reset(pStmt); |
| 14305 } |
| 14306 |
| 14307 /* |
| 14308 ** Record 0 of the %_stat table contains a blob consisting of N varints, |
| 14309 ** where N is the number of user defined columns in the fts3 table plus |
| 14310 ** two. If nCol is the number of user defined columns, then values of the |
| 14311 ** varints are set as follows: |
| 14312 ** |
| 14313 ** Varint 0: Total number of rows in the table. |
| 14314 ** |
| 14315 ** Varint 1..nCol: For each column, the total number of tokens stored in |
| 14316 ** the column for all rows of the table. |
| 14317 ** |
| 14318 ** Varint 1+nCol: The total size, in bytes, of all text values in all |
| 14319 ** columns of all rows of the table. |
| 14320 ** |
| 14321 */ |
| 14322 static void fts3UpdateDocTotals( |
| 14323 int *pRC, /* The result code */ |
| 14324 Fts3Table *p, /* Table being updated */ |
| 14325 u32 *aSzIns, /* Size increases */ |
| 14326 u32 *aSzDel, /* Size decreases */ |
| 14327 int nChng /* Change in the number of documents */ |
| 14328 ){ |
| 14329 char *pBlob; /* Storage for BLOB written into %_stat */ |
| 14330 int nBlob; /* Size of BLOB written into %_stat */ |
| 14331 u32 *a; /* Array of integers that becomes the BLOB */ |
| 14332 sqlite3_stmt *pStmt; /* Statement for reading and writing */ |
| 14333 int i; /* Loop counter */ |
| 14334 int rc; /* Result code from subfunctions */ |
| 14335 |
| 14336 const int nStat = p->nColumn+2; |
| 14337 |
| 14338 if( *pRC ) return; |
| 14339 a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); |
| 14340 if( a==0 ){ |
| 14341 *pRC = SQLITE_NOMEM; |
| 14342 return; |
| 14343 } |
| 14344 pBlob = (char*)&a[nStat]; |
| 14345 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); |
| 14346 if( rc ){ |
| 14347 sqlite3_free(a); |
| 14348 *pRC = rc; |
| 14349 return; |
| 14350 } |
| 14351 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 14352 if( sqlite3_step(pStmt)==SQLITE_ROW ){ |
| 14353 fts3DecodeIntArray(nStat, a, |
| 14354 sqlite3_column_blob(pStmt, 0), |
| 14355 sqlite3_column_bytes(pStmt, 0)); |
| 14356 }else{ |
| 14357 memset(a, 0, sizeof(u32)*(nStat) ); |
| 14358 } |
| 14359 rc = sqlite3_reset(pStmt); |
| 14360 if( rc!=SQLITE_OK ){ |
| 14361 sqlite3_free(a); |
| 14362 *pRC = rc; |
| 14363 return; |
| 14364 } |
| 14365 if( nChng<0 && a[0]<(u32)(-nChng) ){ |
| 14366 a[0] = 0; |
| 14367 }else{ |
| 14368 a[0] += nChng; |
| 14369 } |
| 14370 for(i=0; i<p->nColumn+1; i++){ |
| 14371 u32 x = a[i+1]; |
| 14372 if( x+aSzIns[i] < aSzDel[i] ){ |
| 14373 x = 0; |
| 14374 }else{ |
| 14375 x = x + aSzIns[i] - aSzDel[i]; |
| 14376 } |
| 14377 a[i+1] = x; |
| 14378 } |
| 14379 fts3EncodeIntArray(nStat, a, pBlob, &nBlob); |
| 14380 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); |
| 14381 if( rc ){ |
| 14382 sqlite3_free(a); |
| 14383 *pRC = rc; |
| 14384 return; |
| 14385 } |
| 14386 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 14387 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); |
| 14388 sqlite3_step(pStmt); |
| 14389 *pRC = sqlite3_reset(pStmt); |
| 14390 sqlite3_free(a); |
| 14391 } |
| 14392 |
| 14393 /* |
| 14394 ** Merge the entire database so that there is one segment for each |
| 14395 ** iIndex/iLangid combination. |
| 14396 */ |
| 14397 static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ |
| 14398 int bSeenDone = 0; |
| 14399 int rc; |
| 14400 sqlite3_stmt *pAllLangid = 0; |
| 14401 |
| 14402 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); |
| 14403 if( rc==SQLITE_OK ){ |
| 14404 int rc2; |
| 14405 sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); |
| 14406 sqlite3_bind_int(pAllLangid, 2, p->nIndex); |
| 14407 while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ |
| 14408 int i; |
| 14409 int iLangid = sqlite3_column_int(pAllLangid, 0); |
| 14410 for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 14411 rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); |
| 14412 if( rc==SQLITE_DONE ){ |
| 14413 bSeenDone = 1; |
| 14414 rc = SQLITE_OK; |
| 14415 } |
| 14416 } |
| 14417 } |
| 14418 rc2 = sqlite3_reset(pAllLangid); |
| 14419 if( rc==SQLITE_OK ) rc = rc2; |
| 14420 } |
| 14421 |
| 14422 sqlite3Fts3SegmentsClose(p); |
| 14423 sqlite3Fts3PendingTermsClear(p); |
| 14424 |
| 14425 return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; |
| 14426 } |
| 14427 |
| 14428 /* |
| 14429 ** This function is called when the user executes the following statement: |
| 14430 ** |
| 14431 ** INSERT INTO <tbl>(<tbl>) VALUES('rebuild'); |
| 14432 ** |
| 14433 ** The entire FTS index is discarded and rebuilt. If the table is one |
| 14434 ** created using the content=xxx option, then the new index is based on |
| 14435 ** the current contents of the xxx table. Otherwise, it is rebuilt based |
| 14436 ** on the contents of the %_content table. |
| 14437 */ |
| 14438 static int fts3DoRebuild(Fts3Table *p){ |
| 14439 int rc; /* Return Code */ |
| 14440 |
| 14441 rc = fts3DeleteAll(p, 0); |
| 14442 if( rc==SQLITE_OK ){ |
| 14443 u32 *aSz = 0; |
| 14444 u32 *aSzIns = 0; |
| 14445 u32 *aSzDel = 0; |
| 14446 sqlite3_stmt *pStmt = 0; |
| 14447 int nEntry = 0; |
| 14448 |
| 14449 /* Compose and prepare an SQL statement to loop through the content table */ |
| 14450 char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); |
| 14451 if( !zSql ){ |
| 14452 rc = SQLITE_NOMEM; |
| 14453 }else{ |
| 14454 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 14455 sqlite3_free(zSql); |
| 14456 } |
| 14457 |
| 14458 if( rc==SQLITE_OK ){ |
| 14459 int nByte = sizeof(u32) * (p->nColumn+1)*3; |
| 14460 aSz = (u32 *)sqlite3_malloc(nByte); |
| 14461 if( aSz==0 ){ |
| 14462 rc = SQLITE_NOMEM; |
| 14463 }else{ |
| 14464 memset(aSz, 0, nByte); |
| 14465 aSzIns = &aSz[p->nColumn+1]; |
| 14466 aSzDel = &aSzIns[p->nColumn+1]; |
| 14467 } |
| 14468 } |
| 14469 |
| 14470 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 14471 int iCol; |
| 14472 int iLangid = langidFromSelect(p, pStmt); |
| 14473 rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0)); |
| 14474 memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); |
| 14475 for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ |
| 14476 if( p->abNotindexed[iCol]==0 ){ |
| 14477 const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); |
| 14478 rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); |
| 14479 aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); |
| 14480 } |
| 14481 } |
| 14482 if( p->bHasDocsize ){ |
| 14483 fts3InsertDocsize(&rc, p, aSz); |
| 14484 } |
| 14485 if( rc!=SQLITE_OK ){ |
| 14486 sqlite3_finalize(pStmt); |
| 14487 pStmt = 0; |
| 14488 }else{ |
| 14489 nEntry++; |
| 14490 for(iCol=0; iCol<=p->nColumn; iCol++){ |
| 14491 aSzIns[iCol] += aSz[iCol]; |
| 14492 } |
| 14493 } |
| 14494 } |
| 14495 if( p->bFts4 ){ |
| 14496 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); |
| 14497 } |
| 14498 sqlite3_free(aSz); |
| 14499 |
| 14500 if( pStmt ){ |
| 14501 int rc2 = sqlite3_finalize(pStmt); |
| 14502 if( rc==SQLITE_OK ){ |
| 14503 rc = rc2; |
| 14504 } |
| 14505 } |
| 14506 } |
| 14507 |
| 14508 return rc; |
| 14509 } |
| 14510 |
| 14511 |
| 14512 /* |
| 14513 ** This function opens a cursor used to read the input data for an |
| 14514 ** incremental merge operation. Specifically, it opens a cursor to scan |
| 14515 ** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute |
| 14516 ** level iAbsLevel. |
| 14517 */ |
| 14518 static int fts3IncrmergeCsr( |
| 14519 Fts3Table *p, /* FTS3 table handle */ |
| 14520 sqlite3_int64 iAbsLevel, /* Absolute level to open */ |
| 14521 int nSeg, /* Number of segments to merge */ |
| 14522 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 14523 ){ |
| 14524 int rc; /* Return Code */ |
| 14525 sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ |
| 14526 int nByte; /* Bytes allocated at pCsr->apSegment[] */ |
| 14527 |
| 14528 /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ |
| 14529 memset(pCsr, 0, sizeof(*pCsr)); |
| 14530 nByte = sizeof(Fts3SegReader *) * nSeg; |
| 14531 pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); |
| 14532 |
| 14533 if( pCsr->apSegment==0 ){ |
| 14534 rc = SQLITE_NOMEM; |
| 14535 }else{ |
| 14536 memset(pCsr->apSegment, 0, nByte); |
| 14537 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); |
| 14538 } |
| 14539 if( rc==SQLITE_OK ){ |
| 14540 int i; |
| 14541 int rc2; |
| 14542 sqlite3_bind_int64(pStmt, 1, iAbsLevel); |
| 14543 assert( pCsr->nSegment==0 ); |
| 14544 for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){ |
| 14545 rc = sqlite3Fts3SegReaderNew(i, 0, |
| 14546 sqlite3_column_int64(pStmt, 1), /* segdir.start_block */ |
| 14547 sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */ |
| 14548 sqlite3_column_int64(pStmt, 3), /* segdir.end_block */ |
| 14549 sqlite3_column_blob(pStmt, 4), /* segdir.root */ |
| 14550 sqlite3_column_bytes(pStmt, 4), /* segdir.root */ |
| 14551 &pCsr->apSegment[i] |
| 14552 ); |
| 14553 pCsr->nSegment++; |
| 14554 } |
| 14555 rc2 = sqlite3_reset(pStmt); |
| 14556 if( rc==SQLITE_OK ) rc = rc2; |
| 14557 } |
| 14558 |
| 14559 return rc; |
| 14560 } |
| 14561 |
| 14562 typedef struct IncrmergeWriter IncrmergeWriter; |
| 14563 typedef struct NodeWriter NodeWriter; |
| 14564 typedef struct Blob Blob; |
| 14565 typedef struct NodeReader NodeReader; |
| 14566 |
| 14567 /* |
| 14568 ** An instance of the following structure is used as a dynamic buffer |
| 14569 ** to build up nodes or other blobs of data in. |
| 14570 ** |
| 14571 ** The function blobGrowBuffer() is used to extend the allocation. |
| 14572 */ |
| 14573 struct Blob { |
| 14574 char *a; /* Pointer to allocation */ |
| 14575 int n; /* Number of valid bytes of data in a[] */ |
| 14576 int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ |
| 14577 }; |
| 14578 |
| 14579 /* |
| 14580 ** This structure is used to build up buffers containing segment b-tree |
| 14581 ** nodes (blocks). |
| 14582 */ |
| 14583 struct NodeWriter { |
| 14584 sqlite3_int64 iBlock; /* Current block id */ |
| 14585 Blob key; /* Last key written to the current block */ |
| 14586 Blob block; /* Current block image */ |
| 14587 }; |
| 14588 |
| 14589 /* |
| 14590 ** An object of this type contains the state required to create or append |
| 14591 ** to an appendable b-tree segment. |
| 14592 */ |
| 14593 struct IncrmergeWriter { |
| 14594 int nLeafEst; /* Space allocated for leaf blocks */ |
| 14595 int nWork; /* Number of leaf pages flushed */ |
| 14596 sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ |
| 14597 int iIdx; /* Index of *output* segment in iAbsLevel+1 */ |
| 14598 sqlite3_int64 iStart; /* Block number of first allocated block */ |
| 14599 sqlite3_int64 iEnd; /* Block number of last allocated block */ |
| 14600 sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ |
| 14601 u8 bNoLeafData; /* If true, store 0 for segment size */ |
| 14602 NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; |
| 14603 }; |
| 14604 |
| 14605 /* |
| 14606 ** An object of the following type is used to read data from a single |
| 14607 ** FTS segment node. See the following functions: |
| 14608 ** |
| 14609 ** nodeReaderInit() |
| 14610 ** nodeReaderNext() |
| 14611 ** nodeReaderRelease() |
| 14612 */ |
| 14613 struct NodeReader { |
| 14614 const char *aNode; |
| 14615 int nNode; |
| 14616 int iOff; /* Current offset within aNode[] */ |
| 14617 |
| 14618 /* Output variables. Containing the current node entry. */ |
| 14619 sqlite3_int64 iChild; /* Pointer to child node */ |
| 14620 Blob term; /* Current term */ |
| 14621 const char *aDoclist; /* Pointer to doclist */ |
| 14622 int nDoclist; /* Size of doclist in bytes */ |
| 14623 }; |
| 14624 |
| 14625 /* |
| 14626 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 14627 ** Otherwise, if the allocation at pBlob->a is not already at least nMin |
| 14628 ** bytes in size, extend (realloc) it to be so. |
| 14629 ** |
| 14630 ** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a |
| 14631 ** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc |
| 14632 ** to reflect the new size of the pBlob->a[] buffer. |
| 14633 */ |
| 14634 static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ |
| 14635 if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ |
| 14636 int nAlloc = nMin; |
| 14637 char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); |
| 14638 if( a ){ |
| 14639 pBlob->nAlloc = nAlloc; |
| 14640 pBlob->a = a; |
| 14641 }else{ |
| 14642 *pRc = SQLITE_NOMEM; |
| 14643 } |
| 14644 } |
| 14645 } |
| 14646 |
| 14647 /* |
| 14648 ** Attempt to advance the node-reader object passed as the first argument to |
| 14649 ** the next entry on the node. |
| 14650 ** |
| 14651 ** Return an error code if an error occurs (SQLITE_NOMEM is possible). |
| 14652 ** Otherwise return SQLITE_OK. If there is no next entry on the node |
| 14653 ** (e.g. because the current entry is the last) set NodeReader->aNode to |
| 14654 ** NULL to indicate EOF. Otherwise, populate the NodeReader structure output |
| 14655 ** variables for the new entry. |
| 14656 */ |
| 14657 static int nodeReaderNext(NodeReader *p){ |
| 14658 int bFirst = (p->term.n==0); /* True for first term on the node */ |
| 14659 int nPrefix = 0; /* Bytes to copy from previous term */ |
| 14660 int nSuffix = 0; /* Bytes to append to the prefix */ |
| 14661 int rc = SQLITE_OK; /* Return code */ |
| 14662 |
| 14663 assert( p->aNode ); |
| 14664 if( p->iChild && bFirst==0 ) p->iChild++; |
| 14665 if( p->iOff>=p->nNode ){ |
| 14666 /* EOF */ |
| 14667 p->aNode = 0; |
| 14668 }else{ |
| 14669 if( bFirst==0 ){ |
| 14670 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); |
| 14671 } |
| 14672 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); |
| 14673 |
| 14674 blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); |
| 14675 if( rc==SQLITE_OK ){ |
| 14676 memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); |
| 14677 p->term.n = nPrefix+nSuffix; |
| 14678 p->iOff += nSuffix; |
| 14679 if( p->iChild==0 ){ |
| 14680 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); |
| 14681 p->aDoclist = &p->aNode[p->iOff]; |
| 14682 p->iOff += p->nDoclist; |
| 14683 } |
| 14684 } |
| 14685 } |
| 14686 |
| 14687 assert( p->iOff<=p->nNode ); |
| 14688 |
| 14689 return rc; |
| 14690 } |
| 14691 |
| 14692 /* |
| 14693 ** Release all dynamic resources held by node-reader object *p. |
| 14694 */ |
| 14695 static void nodeReaderRelease(NodeReader *p){ |
| 14696 sqlite3_free(p->term.a); |
| 14697 } |
| 14698 |
| 14699 /* |
| 14700 ** Initialize a node-reader object to read the node in buffer aNode/nNode. |
| 14701 ** |
| 14702 ** If successful, SQLITE_OK is returned and the NodeReader object set to |
| 14703 ** point to the first entry on the node (if any). Otherwise, an SQLite |
| 14704 ** error code is returned. |
| 14705 */ |
| 14706 static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ |
| 14707 memset(p, 0, sizeof(NodeReader)); |
| 14708 p->aNode = aNode; |
| 14709 p->nNode = nNode; |
| 14710 |
| 14711 /* Figure out if this is a leaf or an internal node. */ |
| 14712 if( p->aNode[0] ){ |
| 14713 /* An internal node. */ |
| 14714 p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); |
| 14715 }else{ |
| 14716 p->iOff = 1; |
| 14717 } |
| 14718 |
| 14719 return nodeReaderNext(p); |
| 14720 } |
| 14721 |
| 14722 /* |
| 14723 ** This function is called while writing an FTS segment each time a leaf o |
| 14724 ** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed |
| 14725 ** to be greater than the largest key on the node just written, but smaller |
| 14726 ** than or equal to the first key that will be written to the next leaf |
| 14727 ** node. |
| 14728 ** |
| 14729 ** The block id of the leaf node just written to disk may be found in |
| 14730 ** (pWriter->aNodeWriter[0].iBlock) when this function is called. |
| 14731 */ |
| 14732 static int fts3IncrmergePush( |
| 14733 Fts3Table *p, /* Fts3 table handle */ |
| 14734 IncrmergeWriter *pWriter, /* Writer object */ |
| 14735 const char *zTerm, /* Term to write to internal node */ |
| 14736 int nTerm /* Bytes at zTerm */ |
| 14737 ){ |
| 14738 sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; |
| 14739 int iLayer; |
| 14740 |
| 14741 assert( nTerm>0 ); |
| 14742 for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){ |
| 14743 sqlite3_int64 iNextPtr = 0; |
| 14744 NodeWriter *pNode = &pWriter->aNodeWriter[iLayer]; |
| 14745 int rc = SQLITE_OK; |
| 14746 int nPrefix; |
| 14747 int nSuffix; |
| 14748 int nSpace; |
| 14749 |
| 14750 /* Figure out how much space the key will consume if it is written to |
| 14751 ** the current node of layer iLayer. Due to the prefix compression, |
| 14752 ** the space required changes depending on which node the key is to |
| 14753 ** be added to. */ |
| 14754 nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); |
| 14755 nSuffix = nTerm - nPrefix; |
| 14756 nSpace = sqlite3Fts3VarintLen(nPrefix); |
| 14757 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14758 |
| 14759 if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ |
| 14760 /* If the current node of layer iLayer contains zero keys, or if adding |
| 14761 ** the key to it will not cause it to grow to larger than nNodeSize |
| 14762 ** bytes in size, write the key here. */ |
| 14763 |
| 14764 Blob *pBlk = &pNode->block; |
| 14765 if( pBlk->n==0 ){ |
| 14766 blobGrowBuffer(pBlk, p->nNodeSize, &rc); |
| 14767 if( rc==SQLITE_OK ){ |
| 14768 pBlk->a[0] = (char)iLayer; |
| 14769 pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); |
| 14770 } |
| 14771 } |
| 14772 blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); |
| 14773 blobGrowBuffer(&pNode->key, nTerm, &rc); |
| 14774 |
| 14775 if( rc==SQLITE_OK ){ |
| 14776 if( pNode->key.n ){ |
| 14777 pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); |
| 14778 } |
| 14779 pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); |
| 14780 memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); |
| 14781 pBlk->n += nSuffix; |
| 14782 |
| 14783 memcpy(pNode->key.a, zTerm, nTerm); |
| 14784 pNode->key.n = nTerm; |
| 14785 } |
| 14786 }else{ |
| 14787 /* Otherwise, flush the current node of layer iLayer to disk. |
| 14788 ** Then allocate a new, empty sibling node. The key will be written |
| 14789 ** into the parent of this node. */ |
| 14790 rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); |
| 14791 |
| 14792 assert( pNode->block.nAlloc>=p->nNodeSize ); |
| 14793 pNode->block.a[0] = (char)iLayer; |
| 14794 pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); |
| 14795 |
| 14796 iNextPtr = pNode->iBlock; |
| 14797 pNode->iBlock++; |
| 14798 pNode->key.n = 0; |
| 14799 } |
| 14800 |
| 14801 if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; |
| 14802 iPtr = iNextPtr; |
| 14803 } |
| 14804 |
| 14805 assert( 0 ); |
| 14806 return 0; |
| 14807 } |
| 14808 |
| 14809 /* |
| 14810 ** Append a term and (optionally) doclist to the FTS segment node currently |
| 14811 ** stored in blob *pNode. The node need not contain any terms, but the |
| 14812 ** header must be written before this function is called. |
| 14813 ** |
| 14814 ** A node header is a single 0x00 byte for a leaf node, or a height varint |
| 14815 ** followed by the left-hand-child varint for an internal node. |
| 14816 ** |
| 14817 ** The term to be appended is passed via arguments zTerm/nTerm. For a |
| 14818 ** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal |
| 14819 ** node, both aDoclist and nDoclist must be passed 0. |
| 14820 ** |
| 14821 ** If the size of the value in blob pPrev is zero, then this is the first |
| 14822 ** term written to the node. Otherwise, pPrev contains a copy of the |
| 14823 ** previous term. Before this function returns, it is updated to contain a |
| 14824 ** copy of zTerm/nTerm. |
| 14825 ** |
| 14826 ** It is assumed that the buffer associated with pNode is already large |
| 14827 ** enough to accommodate the new entry. The buffer associated with pPrev |
| 14828 ** is extended by this function if requrired. |
| 14829 ** |
| 14830 ** If an error (i.e. OOM condition) occurs, an SQLite error code is |
| 14831 ** returned. Otherwise, SQLITE_OK. |
| 14832 */ |
| 14833 static int fts3AppendToNode( |
| 14834 Blob *pNode, /* Current node image to append to */ |
| 14835 Blob *pPrev, /* Buffer containing previous term written */ |
| 14836 const char *zTerm, /* New term to write */ |
| 14837 int nTerm, /* Size of zTerm in bytes */ |
| 14838 const char *aDoclist, /* Doclist (or NULL) to write */ |
| 14839 int nDoclist /* Size of aDoclist in bytes */ |
| 14840 ){ |
| 14841 int rc = SQLITE_OK; /* Return code */ |
| 14842 int bFirst = (pPrev->n==0); /* True if this is the first term written */ |
| 14843 int nPrefix; /* Size of term prefix in bytes */ |
| 14844 int nSuffix; /* Size of term suffix in bytes */ |
| 14845 |
| 14846 /* Node must have already been started. There must be a doclist for a |
| 14847 ** leaf node, and there must not be a doclist for an internal node. */ |
| 14848 assert( pNode->n>0 ); |
| 14849 assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); |
| 14850 |
| 14851 blobGrowBuffer(pPrev, nTerm, &rc); |
| 14852 if( rc!=SQLITE_OK ) return rc; |
| 14853 |
| 14854 nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); |
| 14855 nSuffix = nTerm - nPrefix; |
| 14856 memcpy(pPrev->a, zTerm, nTerm); |
| 14857 pPrev->n = nTerm; |
| 14858 |
| 14859 if( bFirst==0 ){ |
| 14860 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); |
| 14861 } |
| 14862 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); |
| 14863 memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); |
| 14864 pNode->n += nSuffix; |
| 14865 |
| 14866 if( aDoclist ){ |
| 14867 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); |
| 14868 memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); |
| 14869 pNode->n += nDoclist; |
| 14870 } |
| 14871 |
| 14872 assert( pNode->n<=pNode->nAlloc ); |
| 14873 |
| 14874 return SQLITE_OK; |
| 14875 } |
| 14876 |
| 14877 /* |
| 14878 ** Append the current term and doclist pointed to by cursor pCsr to the |
| 14879 ** appendable b-tree segment opened for writing by pWriter. |
| 14880 ** |
| 14881 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. |
| 14882 */ |
| 14883 static int fts3IncrmergeAppend( |
| 14884 Fts3Table *p, /* Fts3 table handle */ |
| 14885 IncrmergeWriter *pWriter, /* Writer object */ |
| 14886 Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ |
| 14887 ){ |
| 14888 const char *zTerm = pCsr->zTerm; |
| 14889 int nTerm = pCsr->nTerm; |
| 14890 const char *aDoclist = pCsr->aDoclist; |
| 14891 int nDoclist = pCsr->nDoclist; |
| 14892 int rc = SQLITE_OK; /* Return code */ |
| 14893 int nSpace; /* Total space in bytes required on leaf */ |
| 14894 int nPrefix; /* Size of prefix shared with previous term */ |
| 14895 int nSuffix; /* Size of suffix (nTerm - nPrefix) */ |
| 14896 NodeWriter *pLeaf; /* Object used to write leaf nodes */ |
| 14897 |
| 14898 pLeaf = &pWriter->aNodeWriter[0]; |
| 14899 nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); |
| 14900 nSuffix = nTerm - nPrefix; |
| 14901 |
| 14902 nSpace = sqlite3Fts3VarintLen(nPrefix); |
| 14903 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14904 nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; |
| 14905 |
| 14906 /* If the current block is not empty, and if adding this term/doclist |
| 14907 ** to the current block would make it larger than Fts3Table.nNodeSize |
| 14908 ** bytes, write this block out to the database. */ |
| 14909 if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ |
| 14910 rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); |
| 14911 pWriter->nWork++; |
| 14912 |
| 14913 /* Add the current term to the parent node. The term added to the |
| 14914 ** parent must: |
| 14915 ** |
| 14916 ** a) be greater than the largest term on the leaf node just written |
| 14917 ** to the database (still available in pLeaf->key), and |
| 14918 ** |
| 14919 ** b) be less than or equal to the term about to be added to the new |
| 14920 ** leaf node (zTerm/nTerm). |
| 14921 ** |
| 14922 ** In other words, it must be the prefix of zTerm 1 byte longer than |
| 14923 ** the common prefix (if any) of zTerm and pWriter->zTerm. |
| 14924 */ |
| 14925 if( rc==SQLITE_OK ){ |
| 14926 rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); |
| 14927 } |
| 14928 |
| 14929 /* Advance to the next output block */ |
| 14930 pLeaf->iBlock++; |
| 14931 pLeaf->key.n = 0; |
| 14932 pLeaf->block.n = 0; |
| 14933 |
| 14934 nSuffix = nTerm; |
| 14935 nSpace = 1; |
| 14936 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14937 nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; |
| 14938 } |
| 14939 |
| 14940 pWriter->nLeafData += nSpace; |
| 14941 blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); |
| 14942 if( rc==SQLITE_OK ){ |
| 14943 if( pLeaf->block.n==0 ){ |
| 14944 pLeaf->block.n = 1; |
| 14945 pLeaf->block.a[0] = '\0'; |
| 14946 } |
| 14947 rc = fts3AppendToNode( |
| 14948 &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist |
| 14949 ); |
| 14950 } |
| 14951 |
| 14952 return rc; |
| 14953 } |
| 14954 |
| 14955 /* |
| 14956 ** This function is called to release all dynamic resources held by the |
| 14957 ** merge-writer object pWriter, and if no error has occurred, to flush |
| 14958 ** all outstanding node buffers held by pWriter to disk. |
| 14959 ** |
| 14960 ** If *pRc is not SQLITE_OK when this function is called, then no attempt |
| 14961 ** is made to write any data to disk. Instead, this function serves only |
| 14962 ** to release outstanding resources. |
| 14963 ** |
| 14964 ** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while |
| 14965 ** flushing buffers to disk, *pRc is set to an SQLite error code before |
| 14966 ** returning. |
| 14967 */ |
| 14968 static void fts3IncrmergeRelease( |
| 14969 Fts3Table *p, /* FTS3 table handle */ |
| 14970 IncrmergeWriter *pWriter, /* Merge-writer object */ |
| 14971 int *pRc /* IN/OUT: Error code */ |
| 14972 ){ |
| 14973 int i; /* Used to iterate through non-root layers */ |
| 14974 int iRoot; /* Index of root in pWriter->aNodeWriter */ |
| 14975 NodeWriter *pRoot; /* NodeWriter for root node */ |
| 14976 int rc = *pRc; /* Error code */ |
| 14977 |
| 14978 /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment |
| 14979 ** root node. If the segment fits entirely on a single leaf node, iRoot |
| 14980 ** will be set to 0. If the root node is the parent of the leaves, iRoot |
| 14981 ** will be 1. And so on. */ |
| 14982 for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ |
| 14983 NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; |
| 14984 if( pNode->block.n>0 ) break; |
| 14985 assert( *pRc || pNode->block.nAlloc==0 ); |
| 14986 assert( *pRc || pNode->key.nAlloc==0 ); |
| 14987 sqlite3_free(pNode->block.a); |
| 14988 sqlite3_free(pNode->key.a); |
| 14989 } |
| 14990 |
| 14991 /* Empty output segment. This is a no-op. */ |
| 14992 if( iRoot<0 ) return; |
| 14993 |
| 14994 /* The entire output segment fits on a single node. Normally, this means |
| 14995 ** the node would be stored as a blob in the "root" column of the %_segdir |
| 14996 ** table. However, this is not permitted in this case. The problem is that |
| 14997 ** space has already been reserved in the %_segments table, and so the |
| 14998 ** start_block and end_block fields of the %_segdir table must be populated. |
| 14999 ** And, by design or by accident, released versions of FTS cannot handle |
| 15000 ** segments that fit entirely on the root node with start_block!=0. |
| 15001 ** |
| 15002 ** Instead, create a synthetic root node that contains nothing but a |
| 15003 ** pointer to the single content node. So that the segment consists of a |
| 15004 ** single leaf and a single interior (root) node. |
| 15005 ** |
| 15006 ** Todo: Better might be to defer allocating space in the %_segments |
| 15007 ** table until we are sure it is needed. |
| 15008 */ |
| 15009 if( iRoot==0 ){ |
| 15010 Blob *pBlock = &pWriter->aNodeWriter[1].block; |
| 15011 blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); |
| 15012 if( rc==SQLITE_OK ){ |
| 15013 pBlock->a[0] = 0x01; |
| 15014 pBlock->n = 1 + sqlite3Fts3PutVarint( |
| 15015 &pBlock->a[1], pWriter->aNodeWriter[0].iBlock |
| 15016 ); |
| 15017 } |
| 15018 iRoot = 1; |
| 15019 } |
| 15020 pRoot = &pWriter->aNodeWriter[iRoot]; |
| 15021 |
| 15022 /* Flush all currently outstanding nodes to disk. */ |
| 15023 for(i=0; i<iRoot; i++){ |
| 15024 NodeWriter *pNode = &pWriter->aNodeWriter[i]; |
| 15025 if( pNode->block.n>0 && rc==SQLITE_OK ){ |
| 15026 rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); |
| 15027 } |
| 15028 sqlite3_free(pNode->block.a); |
| 15029 sqlite3_free(pNode->key.a); |
| 15030 } |
| 15031 |
| 15032 /* Write the %_segdir record. */ |
| 15033 if( rc==SQLITE_OK ){ |
| 15034 rc = fts3WriteSegdir(p, |
| 15035 pWriter->iAbsLevel+1, /* level */ |
| 15036 pWriter->iIdx, /* idx */ |
| 15037 pWriter->iStart, /* start_block */ |
| 15038 pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ |
| 15039 pWriter->iEnd, /* end_block */ |
| 15040 (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ |
| 15041 pRoot->block.a, pRoot->block.n /* root */ |
| 15042 ); |
| 15043 } |
| 15044 sqlite3_free(pRoot->block.a); |
| 15045 sqlite3_free(pRoot->key.a); |
| 15046 |
| 15047 *pRc = rc; |
| 15048 } |
| 15049 |
| 15050 /* |
| 15051 ** Compare the term in buffer zLhs (size in bytes nLhs) with that in |
| 15052 ** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of |
| 15053 ** the other, it is considered to be smaller than the other. |
| 15054 ** |
| 15055 ** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve |
| 15056 ** if it is greater. |
| 15057 */ |
| 15058 static int fts3TermCmp( |
| 15059 const char *zLhs, int nLhs, /* LHS of comparison */ |
| 15060 const char *zRhs, int nRhs /* RHS of comparison */ |
| 15061 ){ |
| 15062 int nCmp = MIN(nLhs, nRhs); |
| 15063 int res; |
| 15064 |
| 15065 res = memcmp(zLhs, zRhs, nCmp); |
| 15066 if( res==0 ) res = nLhs - nRhs; |
| 15067 |
| 15068 return res; |
| 15069 } |
| 15070 |
| 15071 |
| 15072 /* |
| 15073 ** Query to see if the entry in the %_segments table with blockid iEnd is |
| 15074 ** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before |
| 15075 ** returning. Otherwise, set *pbRes to 0. |
| 15076 ** |
| 15077 ** Or, if an error occurs while querying the database, return an SQLite |
| 15078 ** error code. The final value of *pbRes is undefined in this case. |
| 15079 ** |
| 15080 ** This is used to test if a segment is an "appendable" segment. If it |
| 15081 ** is, then a NULL entry has been inserted into the %_segments table |
| 15082 ** with blockid %_segdir.end_block. |
| 15083 */ |
| 15084 static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ |
| 15085 int bRes = 0; /* Result to set *pbRes to */ |
| 15086 sqlite3_stmt *pCheck = 0; /* Statement to query database with */ |
| 15087 int rc; /* Return code */ |
| 15088 |
| 15089 rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); |
| 15090 if( rc==SQLITE_OK ){ |
| 15091 sqlite3_bind_int64(pCheck, 1, iEnd); |
| 15092 if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; |
| 15093 rc = sqlite3_reset(pCheck); |
| 15094 } |
| 15095 |
| 15096 *pbRes = bRes; |
| 15097 return rc; |
| 15098 } |
| 15099 |
| 15100 /* |
| 15101 ** This function is called when initializing an incremental-merge operation. |
| 15102 ** It checks if the existing segment with index value iIdx at absolute level |
| 15103 ** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the |
| 15104 ** merge-writer object *pWriter is initialized to write to it. |
| 15105 ** |
| 15106 ** An existing segment can be appended to by an incremental merge if: |
| 15107 ** |
| 15108 ** * It was initially created as an appendable segment (with all required |
| 15109 ** space pre-allocated), and |
| 15110 ** |
| 15111 ** * The first key read from the input (arguments zKey and nKey) is |
| 15112 ** greater than the largest key currently stored in the potential |
| 15113 ** output segment. |
| 15114 */ |
| 15115 static int fts3IncrmergeLoad( |
| 15116 Fts3Table *p, /* Fts3 table handle */ |
| 15117 sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ |
| 15118 int iIdx, /* Index of candidate output segment */ |
| 15119 const char *zKey, /* First key to write */ |
| 15120 int nKey, /* Number of bytes in nKey */ |
| 15121 IncrmergeWriter *pWriter /* Populate this object */ |
| 15122 ){ |
| 15123 int rc; /* Return code */ |
| 15124 sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ |
| 15125 |
| 15126 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); |
| 15127 if( rc==SQLITE_OK ){ |
| 15128 sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ |
| 15129 sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ |
| 15130 sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ |
| 15131 const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ |
| 15132 int nRoot = 0; /* Size of aRoot[] in bytes */ |
| 15133 int rc2; /* Return code from sqlite3_reset() */ |
| 15134 int bAppendable = 0; /* Set to true if segment is appendable */ |
| 15135 |
| 15136 /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ |
| 15137 sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); |
| 15138 sqlite3_bind_int(pSelect, 2, iIdx); |
| 15139 if( sqlite3_step(pSelect)==SQLITE_ROW ){ |
| 15140 iStart = sqlite3_column_int64(pSelect, 1); |
| 15141 iLeafEnd = sqlite3_column_int64(pSelect, 2); |
| 15142 fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); |
| 15143 if( pWriter->nLeafData<0 ){ |
| 15144 pWriter->nLeafData = pWriter->nLeafData * -1; |
| 15145 } |
| 15146 pWriter->bNoLeafData = (pWriter->nLeafData==0); |
| 15147 nRoot = sqlite3_column_bytes(pSelect, 4); |
| 15148 aRoot = sqlite3_column_blob(pSelect, 4); |
| 15149 }else{ |
| 15150 return sqlite3_reset(pSelect); |
| 15151 } |
| 15152 |
| 15153 /* Check for the zero-length marker in the %_segments table */ |
| 15154 rc = fts3IsAppendable(p, iEnd, &bAppendable); |
| 15155 |
| 15156 /* Check that zKey/nKey is larger than the largest key the candidate */ |
| 15157 if( rc==SQLITE_OK && bAppendable ){ |
| 15158 char *aLeaf = 0; |
| 15159 int nLeaf = 0; |
| 15160 |
| 15161 rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); |
| 15162 if( rc==SQLITE_OK ){ |
| 15163 NodeReader reader; |
| 15164 for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); |
| 15165 rc==SQLITE_OK && reader.aNode; |
| 15166 rc = nodeReaderNext(&reader) |
| 15167 ){ |
| 15168 assert( reader.aNode ); |
| 15169 } |
| 15170 if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ |
| 15171 bAppendable = 0; |
| 15172 } |
| 15173 nodeReaderRelease(&reader); |
| 15174 } |
| 15175 sqlite3_free(aLeaf); |
| 15176 } |
| 15177 |
| 15178 if( rc==SQLITE_OK && bAppendable ){ |
| 15179 /* It is possible to append to this segment. Set up the IncrmergeWriter |
| 15180 ** object to do so. */ |
| 15181 int i; |
| 15182 int nHeight = (int)aRoot[0]; |
| 15183 NodeWriter *pNode; |
| 15184 |
| 15185 pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; |
| 15186 pWriter->iStart = iStart; |
| 15187 pWriter->iEnd = iEnd; |
| 15188 pWriter->iAbsLevel = iAbsLevel; |
| 15189 pWriter->iIdx = iIdx; |
| 15190 |
| 15191 for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ |
| 15192 pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; |
| 15193 } |
| 15194 |
| 15195 pNode = &pWriter->aNodeWriter[nHeight]; |
| 15196 pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; |
| 15197 blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); |
| 15198 if( rc==SQLITE_OK ){ |
| 15199 memcpy(pNode->block.a, aRoot, nRoot); |
| 15200 pNode->block.n = nRoot; |
| 15201 } |
| 15202 |
| 15203 for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ |
| 15204 NodeReader reader; |
| 15205 pNode = &pWriter->aNodeWriter[i]; |
| 15206 |
| 15207 rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); |
| 15208 while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); |
| 15209 blobGrowBuffer(&pNode->key, reader.term.n, &rc); |
| 15210 if( rc==SQLITE_OK ){ |
| 15211 memcpy(pNode->key.a, reader.term.a, reader.term.n); |
| 15212 pNode->key.n = reader.term.n; |
| 15213 if( i>0 ){ |
| 15214 char *aBlock = 0; |
| 15215 int nBlock = 0; |
| 15216 pNode = &pWriter->aNodeWriter[i-1]; |
| 15217 pNode->iBlock = reader.iChild; |
| 15218 rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); |
| 15219 blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); |
| 15220 if( rc==SQLITE_OK ){ |
| 15221 memcpy(pNode->block.a, aBlock, nBlock); |
| 15222 pNode->block.n = nBlock; |
| 15223 } |
| 15224 sqlite3_free(aBlock); |
| 15225 } |
| 15226 } |
| 15227 nodeReaderRelease(&reader); |
| 15228 } |
| 15229 } |
| 15230 |
| 15231 rc2 = sqlite3_reset(pSelect); |
| 15232 if( rc==SQLITE_OK ) rc = rc2; |
| 15233 } |
| 15234 |
| 15235 return rc; |
| 15236 } |
| 15237 |
| 15238 /* |
| 15239 ** Determine the largest segment index value that exists within absolute |
| 15240 ** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus |
| 15241 ** one before returning SQLITE_OK. Or, if there are no segments at all |
| 15242 ** within level iAbsLevel, set *piIdx to zero. |
| 15243 ** |
| 15244 ** If an error occurs, return an SQLite error code. The final value of |
| 15245 ** *piIdx is undefined in this case. |
| 15246 */ |
| 15247 static int fts3IncrmergeOutputIdx( |
| 15248 Fts3Table *p, /* FTS Table handle */ |
| 15249 sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ |
| 15250 int *piIdx /* OUT: Next free index at iAbsLevel+1 */ |
| 15251 ){ |
| 15252 int rc; |
| 15253 sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ |
| 15254 |
| 15255 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); |
| 15256 if( rc==SQLITE_OK ){ |
| 15257 sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); |
| 15258 sqlite3_step(pOutputIdx); |
| 15259 *piIdx = sqlite3_column_int(pOutputIdx, 0); |
| 15260 rc = sqlite3_reset(pOutputIdx); |
| 15261 } |
| 15262 |
| 15263 return rc; |
| 15264 } |
| 15265 |
| 15266 /* |
| 15267 ** Allocate an appendable output segment on absolute level iAbsLevel+1 |
| 15268 ** with idx value iIdx. |
| 15269 ** |
| 15270 ** In the %_segdir table, a segment is defined by the values in three |
| 15271 ** columns: |
| 15272 ** |
| 15273 ** start_block |
| 15274 ** leaves_end_block |
| 15275 ** end_block |
| 15276 ** |
| 15277 ** When an appendable segment is allocated, it is estimated that the |
| 15278 ** maximum number of leaf blocks that may be required is the sum of the |
| 15279 ** number of leaf blocks consumed by the input segments, plus the number |
| 15280 ** of input segments, multiplied by two. This value is stored in stack |
| 15281 ** variable nLeafEst. |
| 15282 ** |
| 15283 ** A total of 16*nLeafEst blocks are allocated when an appendable segment |
| 15284 ** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous |
| 15285 ** array of leaf nodes starts at the first block allocated. The array |
| 15286 ** of interior nodes that are parents of the leaf nodes start at block |
| 15287 ** (start_block + (1 + end_block - start_block) / 16). And so on. |
| 15288 ** |
| 15289 ** In the actual code below, the value "16" is replaced with the |
| 15290 ** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. |
| 15291 */ |
| 15292 static int fts3IncrmergeWriter( |
| 15293 Fts3Table *p, /* Fts3 table handle */ |
| 15294 sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ |
| 15295 int iIdx, /* Index of new output segment */ |
| 15296 Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ |
| 15297 IncrmergeWriter *pWriter /* Populate this object */ |
| 15298 ){ |
| 15299 int rc; /* Return Code */ |
| 15300 int i; /* Iterator variable */ |
| 15301 int nLeafEst = 0; /* Blocks allocated for leaf nodes */ |
| 15302 sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ |
| 15303 sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ |
| 15304 |
| 15305 /* Calculate nLeafEst. */ |
| 15306 rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); |
| 15307 if( rc==SQLITE_OK ){ |
| 15308 sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); |
| 15309 sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); |
| 15310 if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ |
| 15311 nLeafEst = sqlite3_column_int(pLeafEst, 0); |
| 15312 } |
| 15313 rc = sqlite3_reset(pLeafEst); |
| 15314 } |
| 15315 if( rc!=SQLITE_OK ) return rc; |
| 15316 |
| 15317 /* Calculate the first block to use in the output segment */ |
| 15318 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); |
| 15319 if( rc==SQLITE_OK ){ |
| 15320 if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ |
| 15321 pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); |
| 15322 pWriter->iEnd = pWriter->iStart - 1; |
| 15323 pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; |
| 15324 } |
| 15325 rc = sqlite3_reset(pFirstBlock); |
| 15326 } |
| 15327 if( rc!=SQLITE_OK ) return rc; |
| 15328 |
| 15329 /* Insert the marker in the %_segments table to make sure nobody tries |
| 15330 ** to steal the space just allocated. This is also used to identify |
| 15331 ** appendable segments. */ |
| 15332 rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); |
| 15333 if( rc!=SQLITE_OK ) return rc; |
| 15334 |
| 15335 pWriter->iAbsLevel = iAbsLevel; |
| 15336 pWriter->nLeafEst = nLeafEst; |
| 15337 pWriter->iIdx = iIdx; |
| 15338 |
| 15339 /* Set up the array of NodeWriter objects */ |
| 15340 for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ |
| 15341 pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; |
| 15342 } |
| 15343 return SQLITE_OK; |
| 15344 } |
| 15345 |
| 15346 /* |
| 15347 ** Remove an entry from the %_segdir table. This involves running the |
| 15348 ** following two statements: |
| 15349 ** |
| 15350 ** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx |
| 15351 ** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx |
| 15352 ** |
| 15353 ** The DELETE statement removes the specific %_segdir level. The UPDATE |
| 15354 ** statement ensures that the remaining segments have contiguously allocated |
| 15355 ** idx values. |
| 15356 */ |
| 15357 static int fts3RemoveSegdirEntry( |
| 15358 Fts3Table *p, /* FTS3 table handle */ |
| 15359 sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ |
| 15360 int iIdx /* Index of %_segdir entry to delete */ |
| 15361 ){ |
| 15362 int rc; /* Return code */ |
| 15363 sqlite3_stmt *pDelete = 0; /* DELETE statement */ |
| 15364 |
| 15365 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); |
| 15366 if( rc==SQLITE_OK ){ |
| 15367 sqlite3_bind_int64(pDelete, 1, iAbsLevel); |
| 15368 sqlite3_bind_int(pDelete, 2, iIdx); |
| 15369 sqlite3_step(pDelete); |
| 15370 rc = sqlite3_reset(pDelete); |
| 15371 } |
| 15372 |
| 15373 return rc; |
| 15374 } |
| 15375 |
| 15376 /* |
| 15377 ** One or more segments have just been removed from absolute level iAbsLevel. |
| 15378 ** Update the 'idx' values of the remaining segments in the level so that |
| 15379 ** the idx values are a contiguous sequence starting from 0. |
| 15380 */ |
| 15381 static int fts3RepackSegdirLevel( |
| 15382 Fts3Table *p, /* FTS3 table handle */ |
| 15383 sqlite3_int64 iAbsLevel /* Absolute level to repack */ |
| 15384 ){ |
| 15385 int rc; /* Return code */ |
| 15386 int *aIdx = 0; /* Array of remaining idx values */ |
| 15387 int nIdx = 0; /* Valid entries in aIdx[] */ |
| 15388 int nAlloc = 0; /* Allocated size of aIdx[] */ |
| 15389 int i; /* Iterator variable */ |
| 15390 sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ |
| 15391 sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ |
| 15392 |
| 15393 rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); |
| 15394 if( rc==SQLITE_OK ){ |
| 15395 int rc2; |
| 15396 sqlite3_bind_int64(pSelect, 1, iAbsLevel); |
| 15397 while( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 15398 if( nIdx>=nAlloc ){ |
| 15399 int *aNew; |
| 15400 nAlloc += 16; |
| 15401 aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); |
| 15402 if( !aNew ){ |
| 15403 rc = SQLITE_NOMEM; |
| 15404 break; |
| 15405 } |
| 15406 aIdx = aNew; |
| 15407 } |
| 15408 aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); |
| 15409 } |
| 15410 rc2 = sqlite3_reset(pSelect); |
| 15411 if( rc==SQLITE_OK ) rc = rc2; |
| 15412 } |
| 15413 |
| 15414 if( rc==SQLITE_OK ){ |
| 15415 rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); |
| 15416 } |
| 15417 if( rc==SQLITE_OK ){ |
| 15418 sqlite3_bind_int64(pUpdate, 2, iAbsLevel); |
| 15419 } |
| 15420 |
| 15421 assert( p->bIgnoreSavepoint==0 ); |
| 15422 p->bIgnoreSavepoint = 1; |
| 15423 for(i=0; rc==SQLITE_OK && i<nIdx; i++){ |
| 15424 if( aIdx[i]!=i ){ |
| 15425 sqlite3_bind_int(pUpdate, 3, aIdx[i]); |
| 15426 sqlite3_bind_int(pUpdate, 1, i); |
| 15427 sqlite3_step(pUpdate); |
| 15428 rc = sqlite3_reset(pUpdate); |
| 15429 } |
| 15430 } |
| 15431 p->bIgnoreSavepoint = 0; |
| 15432 |
| 15433 sqlite3_free(aIdx); |
| 15434 return rc; |
| 15435 } |
| 15436 |
| 15437 static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ |
| 15438 pNode->a[0] = (char)iHeight; |
| 15439 if( iChild ){ |
| 15440 assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); |
| 15441 pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); |
| 15442 }else{ |
| 15443 assert( pNode->nAlloc>=1 ); |
| 15444 pNode->n = 1; |
| 15445 } |
| 15446 } |
| 15447 |
| 15448 /* |
| 15449 ** The first two arguments are a pointer to and the size of a segment b-tree |
| 15450 ** node. The node may be a leaf or an internal node. |
| 15451 ** |
| 15452 ** This function creates a new node image in blob object *pNew by copying |
| 15453 ** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) |
| 15454 ** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. |
| 15455 */ |
| 15456 static int fts3TruncateNode( |
| 15457 const char *aNode, /* Current node image */ |
| 15458 int nNode, /* Size of aNode in bytes */ |
| 15459 Blob *pNew, /* OUT: Write new node image here */ |
| 15460 const char *zTerm, /* Omit all terms smaller than this */ |
| 15461 int nTerm, /* Size of zTerm in bytes */ |
| 15462 sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ |
| 15463 ){ |
| 15464 NodeReader reader; /* Reader object */ |
| 15465 Blob prev = {0, 0, 0}; /* Previous term written to new node */ |
| 15466 int rc = SQLITE_OK; /* Return code */ |
| 15467 int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ |
| 15468 |
| 15469 /* Allocate required output space */ |
| 15470 blobGrowBuffer(pNew, nNode, &rc); |
| 15471 if( rc!=SQLITE_OK ) return rc; |
| 15472 pNew->n = 0; |
| 15473 |
| 15474 /* Populate new node buffer */ |
| 15475 for(rc = nodeReaderInit(&reader, aNode, nNode); |
| 15476 rc==SQLITE_OK && reader.aNode; |
| 15477 rc = nodeReaderNext(&reader) |
| 15478 ){ |
| 15479 if( pNew->n==0 ){ |
| 15480 int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); |
| 15481 if( res<0 || (bLeaf==0 && res==0) ) continue; |
| 15482 fts3StartNode(pNew, (int)aNode[0], reader.iChild); |
| 15483 *piBlock = reader.iChild; |
| 15484 } |
| 15485 rc = fts3AppendToNode( |
| 15486 pNew, &prev, reader.term.a, reader.term.n, |
| 15487 reader.aDoclist, reader.nDoclist |
| 15488 ); |
| 15489 if( rc!=SQLITE_OK ) break; |
| 15490 } |
| 15491 if( pNew->n==0 ){ |
| 15492 fts3StartNode(pNew, (int)aNode[0], reader.iChild); |
| 15493 *piBlock = reader.iChild; |
| 15494 } |
| 15495 assert( pNew->n<=pNew->nAlloc ); |
| 15496 |
| 15497 nodeReaderRelease(&reader); |
| 15498 sqlite3_free(prev.a); |
| 15499 return rc; |
| 15500 } |
| 15501 |
| 15502 /* |
| 15503 ** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute |
| 15504 ** level iAbsLevel. This may involve deleting entries from the %_segments |
| 15505 ** table, and modifying existing entries in both the %_segments and %_segdir |
| 15506 ** tables. |
| 15507 ** |
| 15508 ** SQLITE_OK is returned if the segment is updated successfully. Or an |
| 15509 ** SQLite error code otherwise. |
| 15510 */ |
| 15511 static int fts3TruncateSegment( |
| 15512 Fts3Table *p, /* FTS3 table handle */ |
| 15513 sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ |
| 15514 int iIdx, /* Index within level of segment to modify */ |
| 15515 const char *zTerm, /* Remove terms smaller than this */ |
| 15516 int nTerm /* Number of bytes in buffer zTerm */ |
| 15517 ){ |
| 15518 int rc = SQLITE_OK; /* Return code */ |
| 15519 Blob root = {0,0,0}; /* New root page image */ |
| 15520 Blob block = {0,0,0}; /* Buffer used for any other block */ |
| 15521 sqlite3_int64 iBlock = 0; /* Block id */ |
| 15522 sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ |
| 15523 sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ |
| 15524 sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ |
| 15525 |
| 15526 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); |
| 15527 if( rc==SQLITE_OK ){ |
| 15528 int rc2; /* sqlite3_reset() return code */ |
| 15529 sqlite3_bind_int64(pFetch, 1, iAbsLevel); |
| 15530 sqlite3_bind_int(pFetch, 2, iIdx); |
| 15531 if( SQLITE_ROW==sqlite3_step(pFetch) ){ |
| 15532 const char *aRoot = sqlite3_column_blob(pFetch, 4); |
| 15533 int nRoot = sqlite3_column_bytes(pFetch, 4); |
| 15534 iOldStart = sqlite3_column_int64(pFetch, 1); |
| 15535 rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); |
| 15536 } |
| 15537 rc2 = sqlite3_reset(pFetch); |
| 15538 if( rc==SQLITE_OK ) rc = rc2; |
| 15539 } |
| 15540 |
| 15541 while( rc==SQLITE_OK && iBlock ){ |
| 15542 char *aBlock = 0; |
| 15543 int nBlock = 0; |
| 15544 iNewStart = iBlock; |
| 15545 |
| 15546 rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); |
| 15547 if( rc==SQLITE_OK ){ |
| 15548 rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); |
| 15549 } |
| 15550 if( rc==SQLITE_OK ){ |
| 15551 rc = fts3WriteSegment(p, iNewStart, block.a, block.n); |
| 15552 } |
| 15553 sqlite3_free(aBlock); |
| 15554 } |
| 15555 |
| 15556 /* Variable iNewStart now contains the first valid leaf node. */ |
| 15557 if( rc==SQLITE_OK && iNewStart ){ |
| 15558 sqlite3_stmt *pDel = 0; |
| 15559 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); |
| 15560 if( rc==SQLITE_OK ){ |
| 15561 sqlite3_bind_int64(pDel, 1, iOldStart); |
| 15562 sqlite3_bind_int64(pDel, 2, iNewStart-1); |
| 15563 sqlite3_step(pDel); |
| 15564 rc = sqlite3_reset(pDel); |
| 15565 } |
| 15566 } |
| 15567 |
| 15568 if( rc==SQLITE_OK ){ |
| 15569 sqlite3_stmt *pChomp = 0; |
| 15570 rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); |
| 15571 if( rc==SQLITE_OK ){ |
| 15572 sqlite3_bind_int64(pChomp, 1, iNewStart); |
| 15573 sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); |
| 15574 sqlite3_bind_int64(pChomp, 3, iAbsLevel); |
| 15575 sqlite3_bind_int(pChomp, 4, iIdx); |
| 15576 sqlite3_step(pChomp); |
| 15577 rc = sqlite3_reset(pChomp); |
| 15578 } |
| 15579 } |
| 15580 |
| 15581 sqlite3_free(root.a); |
| 15582 sqlite3_free(block.a); |
| 15583 return rc; |
| 15584 } |
| 15585 |
| 15586 /* |
| 15587 ** This function is called after an incrmental-merge operation has run to |
| 15588 ** merge (or partially merge) two or more segments from absolute level |
| 15589 ** iAbsLevel. |
| 15590 ** |
| 15591 ** Each input segment is either removed from the db completely (if all of |
| 15592 ** its data was copied to the output segment by the incrmerge operation) |
| 15593 ** or modified in place so that it no longer contains those entries that |
| 15594 ** have been duplicated in the output segment. |
| 15595 */ |
| 15596 static int fts3IncrmergeChomp( |
| 15597 Fts3Table *p, /* FTS table handle */ |
| 15598 sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ |
| 15599 Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ |
| 15600 int *pnRem /* Number of segments not deleted */ |
| 15601 ){ |
| 15602 int i; |
| 15603 int nRem = 0; |
| 15604 int rc = SQLITE_OK; |
| 15605 |
| 15606 for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ |
| 15607 Fts3SegReader *pSeg = 0; |
| 15608 int j; |
| 15609 |
| 15610 /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding |
| 15611 ** somewhere in the pCsr->apSegment[] array. */ |
| 15612 for(j=0; ALWAYS(j<pCsr->nSegment); j++){ |
| 15613 pSeg = pCsr->apSegment[j]; |
| 15614 if( pSeg->iIdx==i ) break; |
| 15615 } |
| 15616 assert( j<pCsr->nSegment && pSeg->iIdx==i ); |
| 15617 |
| 15618 if( pSeg->aNode==0 ){ |
| 15619 /* Seg-reader is at EOF. Remove the entire input segment. */ |
| 15620 rc = fts3DeleteSegment(p, pSeg); |
| 15621 if( rc==SQLITE_OK ){ |
| 15622 rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); |
| 15623 } |
| 15624 *pnRem = 0; |
| 15625 }else{ |
| 15626 /* The incremental merge did not copy all the data from this |
| 15627 ** segment to the upper level. The segment is modified in place |
| 15628 ** so that it contains no keys smaller than zTerm/nTerm. */ |
| 15629 const char *zTerm = pSeg->zTerm; |
| 15630 int nTerm = pSeg->nTerm; |
| 15631 rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); |
| 15632 nRem++; |
| 15633 } |
| 15634 } |
| 15635 |
| 15636 if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ |
| 15637 rc = fts3RepackSegdirLevel(p, iAbsLevel); |
| 15638 } |
| 15639 |
| 15640 *pnRem = nRem; |
| 15641 return rc; |
| 15642 } |
| 15643 |
| 15644 /* |
| 15645 ** Store an incr-merge hint in the database. |
| 15646 */ |
| 15647 static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ |
| 15648 sqlite3_stmt *pReplace = 0; |
| 15649 int rc; /* Return code */ |
| 15650 |
| 15651 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); |
| 15652 if( rc==SQLITE_OK ){ |
| 15653 sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); |
| 15654 sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); |
| 15655 sqlite3_step(pReplace); |
| 15656 rc = sqlite3_reset(pReplace); |
| 15657 } |
| 15658 |
| 15659 return rc; |
| 15660 } |
| 15661 |
| 15662 /* |
| 15663 ** Load an incr-merge hint from the database. The incr-merge hint, if one |
| 15664 ** exists, is stored in the rowid==1 row of the %_stat table. |
| 15665 ** |
| 15666 ** If successful, populate blob *pHint with the value read from the %_stat |
| 15667 ** table and return SQLITE_OK. Otherwise, if an error occurs, return an |
| 15668 ** SQLite error code. |
| 15669 */ |
| 15670 static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ |
| 15671 sqlite3_stmt *pSelect = 0; |
| 15672 int rc; |
| 15673 |
| 15674 pHint->n = 0; |
| 15675 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); |
| 15676 if( rc==SQLITE_OK ){ |
| 15677 int rc2; |
| 15678 sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); |
| 15679 if( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 15680 const char *aHint = sqlite3_column_blob(pSelect, 0); |
| 15681 int nHint = sqlite3_column_bytes(pSelect, 0); |
| 15682 if( aHint ){ |
| 15683 blobGrowBuffer(pHint, nHint, &rc); |
| 15684 if( rc==SQLITE_OK ){ |
| 15685 memcpy(pHint->a, aHint, nHint); |
| 15686 pHint->n = nHint; |
| 15687 } |
| 15688 } |
| 15689 } |
| 15690 rc2 = sqlite3_reset(pSelect); |
| 15691 if( rc==SQLITE_OK ) rc = rc2; |
| 15692 } |
| 15693 |
| 15694 return rc; |
| 15695 } |
| 15696 |
| 15697 /* |
| 15698 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 15699 ** Otherwise, append an entry to the hint stored in blob *pHint. Each entry |
| 15700 ** consists of two varints, the absolute level number of the input segments |
| 15701 ** and the number of input segments. |
| 15702 ** |
| 15703 ** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, |
| 15704 ** set *pRc to an SQLite error code before returning. |
| 15705 */ |
| 15706 static void fts3IncrmergeHintPush( |
| 15707 Blob *pHint, /* Hint blob to append to */ |
| 15708 i64 iAbsLevel, /* First varint to store in hint */ |
| 15709 int nInput, /* Second varint to store in hint */ |
| 15710 int *pRc /* IN/OUT: Error code */ |
| 15711 ){ |
| 15712 blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); |
| 15713 if( *pRc==SQLITE_OK ){ |
| 15714 pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); |
| 15715 pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); |
| 15716 } |
| 15717 } |
| 15718 |
| 15719 /* |
| 15720 ** Read the last entry (most recently pushed) from the hint blob *pHint |
| 15721 ** and then remove the entry. Write the two values read to *piAbsLevel and |
| 15722 ** *pnInput before returning. |
| 15723 ** |
| 15724 ** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does |
| 15725 ** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. |
| 15726 */ |
| 15727 static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ |
| 15728 const int nHint = pHint->n; |
| 15729 int i; |
| 15730 |
| 15731 i = pHint->n-2; |
| 15732 while( i>0 && (pHint->a[i-1] & 0x80) ) i--; |
| 15733 while( i>0 && (pHint->a[i-1] & 0x80) ) i--; |
| 15734 |
| 15735 pHint->n = i; |
| 15736 i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); |
| 15737 i += fts3GetVarint32(&pHint->a[i], pnInput); |
| 15738 if( i!=nHint ) return FTS_CORRUPT_VTAB; |
| 15739 |
| 15740 return SQLITE_OK; |
| 15741 } |
| 15742 |
| 15743 |
| 15744 /* |
| 15745 ** Attempt an incremental merge that writes nMerge leaf blocks. |
| 15746 ** |
| 15747 ** Incremental merges happen nMin segments at a time. The segments |
| 15748 ** to be merged are the nMin oldest segments (the ones with the smallest |
| 15749 ** values for the _segdir.idx field) in the highest level that contains |
| 15750 ** at least nMin segments. Multiple merges might occur in an attempt to |
| 15751 ** write the quota of nMerge leaf blocks. |
| 15752 */ |
| 15753 SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ |
| 15754 int rc; /* Return code */ |
| 15755 int nRem = nMerge; /* Number of leaf pages yet to be written */ |
| 15756 Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ |
| 15757 Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ |
| 15758 IncrmergeWriter *pWriter; /* Writer object */ |
| 15759 int nSeg = 0; /* Number of input segments */ |
| 15760 sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ |
| 15761 Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ |
| 15762 int bDirtyHint = 0; /* True if blob 'hint' has been modified */ |
| 15763 |
| 15764 /* Allocate space for the cursor, filter and writer objects */ |
| 15765 const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); |
| 15766 pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); |
| 15767 if( !pWriter ) return SQLITE_NOMEM; |
| 15768 pFilter = (Fts3SegFilter *)&pWriter[1]; |
| 15769 pCsr = (Fts3MultiSegReader *)&pFilter[1]; |
| 15770 |
| 15771 rc = fts3IncrmergeHintLoad(p, &hint); |
| 15772 while( rc==SQLITE_OK && nRem>0 ){ |
| 15773 const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; |
| 15774 sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ |
| 15775 int bUseHint = 0; /* True if attempting to append */ |
| 15776 int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ |
| 15777 |
| 15778 /* Search the %_segdir table for the absolute level with the smallest |
| 15779 ** relative level number that contains at least nMin segments, if any. |
| 15780 ** If one is found, set iAbsLevel to the absolute level number and |
| 15781 ** nSeg to nMin. If no level with at least nMin segments can be found, |
| 15782 ** set nSeg to -1. |
| 15783 */ |
| 15784 rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); |
| 15785 sqlite3_bind_int(pFindLevel, 1, nMin); |
| 15786 if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ |
| 15787 iAbsLevel = sqlite3_column_int64(pFindLevel, 0); |
| 15788 nSeg = nMin; |
| 15789 }else{ |
| 15790 nSeg = -1; |
| 15791 } |
| 15792 rc = sqlite3_reset(pFindLevel); |
| 15793 |
| 15794 /* If the hint read from the %_stat table is not empty, check if the |
| 15795 ** last entry in it specifies a relative level smaller than or equal |
| 15796 ** to the level identified by the block above (if any). If so, this |
| 15797 ** iteration of the loop will work on merging at the hinted level. |
| 15798 */ |
| 15799 if( rc==SQLITE_OK && hint.n ){ |
| 15800 int nHint = hint.n; |
| 15801 sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ |
| 15802 int nHintSeg = 0; /* Hint number of segments */ |
| 15803 |
| 15804 rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); |
| 15805 if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ |
| 15806 iAbsLevel = iHintAbsLevel; |
| 15807 nSeg = nHintSeg; |
| 15808 bUseHint = 1; |
| 15809 bDirtyHint = 1; |
| 15810 }else{ |
| 15811 /* This undoes the effect of the HintPop() above - so that no entry |
| 15812 ** is removed from the hint blob. */ |
| 15813 hint.n = nHint; |
| 15814 } |
| 15815 } |
| 15816 |
| 15817 /* If nSeg is less that zero, then there is no level with at least |
| 15818 ** nMin segments and no hint in the %_stat table. No work to do. |
| 15819 ** Exit early in this case. */ |
| 15820 if( nSeg<0 ) break; |
| 15821 |
| 15822 /* Open a cursor to iterate through the contents of the oldest nSeg |
| 15823 ** indexes of absolute level iAbsLevel. If this cursor is opened using |
| 15824 ** the 'hint' parameters, it is possible that there are less than nSeg |
| 15825 ** segments available in level iAbsLevel. In this case, no work is |
| 15826 ** done on iAbsLevel - fall through to the next iteration of the loop |
| 15827 ** to start work on some other level. */ |
| 15828 memset(pWriter, 0, nAlloc); |
| 15829 pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; |
| 15830 |
| 15831 if( rc==SQLITE_OK ){ |
| 15832 rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); |
| 15833 assert( bUseHint==1 || bUseHint==0 ); |
| 15834 if( iIdx==0 || (bUseHint && iIdx==1) ){ |
| 15835 int bIgnore = 0; |
| 15836 rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); |
| 15837 if( bIgnore ){ |
| 15838 pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; |
| 15839 } |
| 15840 } |
| 15841 } |
| 15842 |
| 15843 if( rc==SQLITE_OK ){ |
| 15844 rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); |
| 15845 } |
| 15846 if( SQLITE_OK==rc && pCsr->nSegment==nSeg |
| 15847 && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) |
| 15848 && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) |
| 15849 ){ |
| 15850 if( bUseHint && iIdx>0 ){ |
| 15851 const char *zKey = pCsr->zTerm; |
| 15852 int nKey = pCsr->nTerm; |
| 15853 rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); |
| 15854 }else{ |
| 15855 rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); |
| 15856 } |
| 15857 |
| 15858 if( rc==SQLITE_OK && pWriter->nLeafEst ){ |
| 15859 fts3LogMerge(nSeg, iAbsLevel); |
| 15860 do { |
| 15861 rc = fts3IncrmergeAppend(p, pWriter, pCsr); |
| 15862 if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); |
| 15863 if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; |
| 15864 }while( rc==SQLITE_ROW ); |
| 15865 |
| 15866 /* Update or delete the input segments */ |
| 15867 if( rc==SQLITE_OK ){ |
| 15868 nRem -= (1 + pWriter->nWork); |
| 15869 rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); |
| 15870 if( nSeg!=0 ){ |
| 15871 bDirtyHint = 1; |
| 15872 fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); |
| 15873 } |
| 15874 } |
| 15875 } |
| 15876 |
| 15877 if( nSeg!=0 ){ |
| 15878 pWriter->nLeafData = pWriter->nLeafData * -1; |
| 15879 } |
| 15880 fts3IncrmergeRelease(p, pWriter, &rc); |
| 15881 if( nSeg==0 && pWriter->bNoLeafData==0 ){ |
| 15882 fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); |
| 15883 } |
| 15884 } |
| 15885 |
| 15886 sqlite3Fts3SegReaderFinish(pCsr); |
| 15887 } |
| 15888 |
| 15889 /* Write the hint values into the %_stat table for the next incr-merger */ |
| 15890 if( bDirtyHint && rc==SQLITE_OK ){ |
| 15891 rc = fts3IncrmergeHintStore(p, &hint); |
| 15892 } |
| 15893 |
| 15894 sqlite3_free(pWriter); |
| 15895 sqlite3_free(hint.a); |
| 15896 return rc; |
| 15897 } |
| 15898 |
| 15899 /* |
| 15900 ** Convert the text beginning at *pz into an integer and return |
| 15901 ** its value. Advance *pz to point to the first character past |
| 15902 ** the integer. |
| 15903 */ |
| 15904 static int fts3Getint(const char **pz){ |
| 15905 const char *z = *pz; |
| 15906 int i = 0; |
| 15907 while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; |
| 15908 *pz = z; |
| 15909 return i; |
| 15910 } |
| 15911 |
| 15912 /* |
| 15913 ** Process statements of the form: |
| 15914 ** |
| 15915 ** INSERT INTO table(table) VALUES('merge=A,B'); |
| 15916 ** |
| 15917 ** A and B are integers that decode to be the number of leaf pages |
| 15918 ** written for the merge, and the minimum number of segments on a level |
| 15919 ** before it will be selected for a merge, respectively. |
| 15920 */ |
| 15921 static int fts3DoIncrmerge( |
| 15922 Fts3Table *p, /* FTS3 table handle */ |
| 15923 const char *zParam /* Nul-terminated string containing "A,B" */ |
| 15924 ){ |
| 15925 int rc; |
| 15926 int nMin = (FTS3_MERGE_COUNT / 2); |
| 15927 int nMerge = 0; |
| 15928 const char *z = zParam; |
| 15929 |
| 15930 /* Read the first integer value */ |
| 15931 nMerge = fts3Getint(&z); |
| 15932 |
| 15933 /* If the first integer value is followed by a ',', read the second |
| 15934 ** integer value. */ |
| 15935 if( z[0]==',' && z[1]!='\0' ){ |
| 15936 z++; |
| 15937 nMin = fts3Getint(&z); |
| 15938 } |
| 15939 |
| 15940 if( z[0]!='\0' || nMin<2 ){ |
| 15941 rc = SQLITE_ERROR; |
| 15942 }else{ |
| 15943 rc = SQLITE_OK; |
| 15944 if( !p->bHasStat ){ |
| 15945 assert( p->bFts4==0 ); |
| 15946 sqlite3Fts3CreateStatTable(&rc, p); |
| 15947 } |
| 15948 if( rc==SQLITE_OK ){ |
| 15949 rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); |
| 15950 } |
| 15951 sqlite3Fts3SegmentsClose(p); |
| 15952 } |
| 15953 return rc; |
| 15954 } |
| 15955 |
| 15956 /* |
| 15957 ** Process statements of the form: |
| 15958 ** |
| 15959 ** INSERT INTO table(table) VALUES('automerge=X'); |
| 15960 ** |
| 15961 ** where X is an integer. X==0 means to turn automerge off. X!=0 means |
| 15962 ** turn it on. The setting is persistent. |
| 15963 */ |
| 15964 static int fts3DoAutoincrmerge( |
| 15965 Fts3Table *p, /* FTS3 table handle */ |
| 15966 const char *zParam /* Nul-terminated string containing boolean */ |
| 15967 ){ |
| 15968 int rc = SQLITE_OK; |
| 15969 sqlite3_stmt *pStmt = 0; |
| 15970 p->nAutoincrmerge = fts3Getint(&zParam); |
| 15971 if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ |
| 15972 p->nAutoincrmerge = 8; |
| 15973 } |
| 15974 if( !p->bHasStat ){ |
| 15975 assert( p->bFts4==0 ); |
| 15976 sqlite3Fts3CreateStatTable(&rc, p); |
| 15977 if( rc ) return rc; |
| 15978 } |
| 15979 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); |
| 15980 if( rc ) return rc; |
| 15981 sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); |
| 15982 sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); |
| 15983 sqlite3_step(pStmt); |
| 15984 rc = sqlite3_reset(pStmt); |
| 15985 return rc; |
| 15986 } |
| 15987 |
| 15988 /* |
| 15989 ** Return a 64-bit checksum for the FTS index entry specified by the |
| 15990 ** arguments to this function. |
| 15991 */ |
| 15992 static u64 fts3ChecksumEntry( |
| 15993 const char *zTerm, /* Pointer to buffer containing term */ |
| 15994 int nTerm, /* Size of zTerm in bytes */ |
| 15995 int iLangid, /* Language id for current row */ |
| 15996 int iIndex, /* Index (0..Fts3Table.nIndex-1) */ |
| 15997 i64 iDocid, /* Docid for current row. */ |
| 15998 int iCol, /* Column number */ |
| 15999 int iPos /* Position */ |
| 16000 ){ |
| 16001 int i; |
| 16002 u64 ret = (u64)iDocid; |
| 16003 |
| 16004 ret += (ret<<3) + iLangid; |
| 16005 ret += (ret<<3) + iIndex; |
| 16006 ret += (ret<<3) + iCol; |
| 16007 ret += (ret<<3) + iPos; |
| 16008 for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i]; |
| 16009 |
| 16010 return ret; |
| 16011 } |
| 16012 |
| 16013 /* |
| 16014 ** Return a checksum of all entries in the FTS index that correspond to |
| 16015 ** language id iLangid. The checksum is calculated by XORing the checksums |
| 16016 ** of each individual entry (see fts3ChecksumEntry()) together. |
| 16017 ** |
| 16018 ** If successful, the checksum value is returned and *pRc set to SQLITE_OK. |
| 16019 ** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The |
| 16020 ** return value is undefined in this case. |
| 16021 */ |
| 16022 static u64 fts3ChecksumIndex( |
| 16023 Fts3Table *p, /* FTS3 table handle */ |
| 16024 int iLangid, /* Language id to return cksum for */ |
| 16025 int iIndex, /* Index to cksum (0..p->nIndex-1) */ |
| 16026 int *pRc /* OUT: Return code */ |
| 16027 ){ |
| 16028 Fts3SegFilter filter; |
| 16029 Fts3MultiSegReader csr; |
| 16030 int rc; |
| 16031 u64 cksum = 0; |
| 16032 |
| 16033 assert( *pRc==SQLITE_OK ); |
| 16034 |
| 16035 memset(&filter, 0, sizeof(filter)); |
| 16036 memset(&csr, 0, sizeof(csr)); |
| 16037 filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; |
| 16038 filter.flags |= FTS3_SEGMENT_SCAN; |
| 16039 |
| 16040 rc = sqlite3Fts3SegReaderCursor( |
| 16041 p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr |
| 16042 ); |
| 16043 if( rc==SQLITE_OK ){ |
| 16044 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); |
| 16045 } |
| 16046 |
| 16047 if( rc==SQLITE_OK ){ |
| 16048 while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ |
| 16049 char *pCsr = csr.aDoclist; |
| 16050 char *pEnd = &pCsr[csr.nDoclist]; |
| 16051 |
| 16052 i64 iDocid = 0; |
| 16053 i64 iCol = 0; |
| 16054 i64 iPos = 0; |
| 16055 |
| 16056 pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); |
| 16057 while( pCsr<pEnd ){ |
| 16058 i64 iVal = 0; |
| 16059 pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); |
| 16060 if( pCsr<pEnd ){ |
| 16061 if( iVal==0 || iVal==1 ){ |
| 16062 iCol = 0; |
| 16063 iPos = 0; |
| 16064 if( iVal ){ |
| 16065 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); |
| 16066 }else{ |
| 16067 pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); |
| 16068 iDocid += iVal; |
| 16069 } |
| 16070 }else{ |
| 16071 iPos += (iVal - 2); |
| 16072 cksum = cksum ^ fts3ChecksumEntry( |
| 16073 csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, |
| 16074 (int)iCol, (int)iPos |
| 16075 ); |
| 16076 } |
| 16077 } |
| 16078 } |
| 16079 } |
| 16080 } |
| 16081 sqlite3Fts3SegReaderFinish(&csr); |
| 16082 |
| 16083 *pRc = rc; |
| 16084 return cksum; |
| 16085 } |
| 16086 |
| 16087 /* |
| 16088 ** Check if the contents of the FTS index match the current contents of the |
| 16089 ** content table. If no error occurs and the contents do match, set *pbOk |
| 16090 ** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk |
| 16091 ** to false before returning. |
| 16092 ** |
| 16093 ** If an error occurs (e.g. an OOM or IO error), return an SQLite error |
| 16094 ** code. The final value of *pbOk is undefined in this case. |
| 16095 */ |
| 16096 static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ |
| 16097 int rc = SQLITE_OK; /* Return code */ |
| 16098 u64 cksum1 = 0; /* Checksum based on FTS index contents */ |
| 16099 u64 cksum2 = 0; /* Checksum based on %_content contents */ |
| 16100 sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ |
| 16101 |
| 16102 /* This block calculates the checksum according to the FTS index. */ |
| 16103 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); |
| 16104 if( rc==SQLITE_OK ){ |
| 16105 int rc2; |
| 16106 sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); |
| 16107 sqlite3_bind_int(pAllLangid, 2, p->nIndex); |
| 16108 while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ |
| 16109 int iLangid = sqlite3_column_int(pAllLangid, 0); |
| 16110 int i; |
| 16111 for(i=0; i<p->nIndex; i++){ |
| 16112 cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); |
| 16113 } |
| 16114 } |
| 16115 rc2 = sqlite3_reset(pAllLangid); |
| 16116 if( rc==SQLITE_OK ) rc = rc2; |
| 16117 } |
| 16118 |
| 16119 /* This block calculates the checksum according to the %_content table */ |
| 16120 if( rc==SQLITE_OK ){ |
| 16121 sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; |
| 16122 sqlite3_stmt *pStmt = 0; |
| 16123 char *zSql; |
| 16124 |
| 16125 zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); |
| 16126 if( !zSql ){ |
| 16127 rc = SQLITE_NOMEM; |
| 16128 }else{ |
| 16129 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 16130 sqlite3_free(zSql); |
| 16131 } |
| 16132 |
| 16133 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 16134 i64 iDocid = sqlite3_column_int64(pStmt, 0); |
| 16135 int iLang = langidFromSelect(p, pStmt); |
| 16136 int iCol; |
| 16137 |
| 16138 for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ |
| 16139 if( p->abNotindexed[iCol]==0 ){ |
| 16140 const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); |
| 16141 int nText = sqlite3_column_bytes(pStmt, iCol+1); |
| 16142 sqlite3_tokenizer_cursor *pT = 0; |
| 16143 |
| 16144 rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); |
| 16145 while( rc==SQLITE_OK ){ |
| 16146 char const *zToken; /* Buffer containing token */ |
| 16147 int nToken = 0; /* Number of bytes in token */ |
| 16148 int iDum1 = 0, iDum2 = 0; /* Dummy variables */ |
| 16149 int iPos = 0; /* Position of token in zText */ |
| 16150 |
| 16151 rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); |
| 16152 if( rc==SQLITE_OK ){ |
| 16153 int i; |
| 16154 cksum2 = cksum2 ^ fts3ChecksumEntry( |
| 16155 zToken, nToken, iLang, 0, iDocid, iCol, iPos |
| 16156 ); |
| 16157 for(i=1; i<p->nIndex; i++){ |
| 16158 if( p->aIndex[i].nPrefix<=nToken ){ |
| 16159 cksum2 = cksum2 ^ fts3ChecksumEntry( |
| 16160 zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos |
| 16161 ); |
| 16162 } |
| 16163 } |
| 16164 } |
| 16165 } |
| 16166 if( pT ) pModule->xClose(pT); |
| 16167 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 16168 } |
| 16169 } |
| 16170 } |
| 16171 |
| 16172 sqlite3_finalize(pStmt); |
| 16173 } |
| 16174 |
| 16175 *pbOk = (cksum1==cksum2); |
| 16176 return rc; |
| 16177 } |
| 16178 |
| 16179 /* |
| 16180 ** Run the integrity-check. If no error occurs and the current contents of |
| 16181 ** the FTS index are correct, return SQLITE_OK. Or, if the contents of the |
| 16182 ** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. |
| 16183 ** |
| 16184 ** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite |
| 16185 ** error code. |
| 16186 ** |
| 16187 ** The integrity-check works as follows. For each token and indexed token |
| 16188 ** prefix in the document set, a 64-bit checksum is calculated (by code |
| 16189 ** in fts3ChecksumEntry()) based on the following: |
| 16190 ** |
| 16191 ** + The index number (0 for the main index, 1 for the first prefix |
| 16192 ** index etc.), |
| 16193 ** + The token (or token prefix) text itself, |
| 16194 ** + The language-id of the row it appears in, |
| 16195 ** + The docid of the row it appears in, |
| 16196 ** + The column it appears in, and |
| 16197 ** + The tokens position within that column. |
| 16198 ** |
| 16199 ** The checksums for all entries in the index are XORed together to create |
| 16200 ** a single checksum for the entire index. |
| 16201 ** |
| 16202 ** The integrity-check code calculates the same checksum in two ways: |
| 16203 ** |
| 16204 ** 1. By scanning the contents of the FTS index, and |
| 16205 ** 2. By scanning and tokenizing the content table. |
| 16206 ** |
| 16207 ** If the two checksums are identical, the integrity-check is deemed to have |
| 16208 ** passed. |
| 16209 */ |
| 16210 static int fts3DoIntegrityCheck( |
| 16211 Fts3Table *p /* FTS3 table handle */ |
| 16212 ){ |
| 16213 int rc; |
| 16214 int bOk = 0; |
| 16215 rc = fts3IntegrityCheck(p, &bOk); |
| 16216 if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; |
| 16217 return rc; |
| 16218 } |
| 16219 |
| 16220 /* |
| 16221 ** Handle a 'special' INSERT of the form: |
| 16222 ** |
| 16223 ** "INSERT INTO tbl(tbl) VALUES(<expr>)" |
| 16224 ** |
| 16225 ** Argument pVal contains the result of <expr>. Currently the only |
| 16226 ** meaningful value to insert is the text 'optimize'. |
| 16227 */ |
| 16228 static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ |
| 16229 int rc; /* Return Code */ |
| 16230 const char *zVal = (const char *)sqlite3_value_text(pVal); |
| 16231 int nVal = sqlite3_value_bytes(pVal); |
| 16232 |
| 16233 if( !zVal ){ |
| 16234 return SQLITE_NOMEM; |
| 16235 }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ |
| 16236 rc = fts3DoOptimize(p, 0); |
| 16237 }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ |
| 16238 rc = fts3DoRebuild(p); |
| 16239 }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ |
| 16240 rc = fts3DoIntegrityCheck(p); |
| 16241 }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ |
| 16242 rc = fts3DoIncrmerge(p, &zVal[6]); |
| 16243 }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ |
| 16244 rc = fts3DoAutoincrmerge(p, &zVal[10]); |
| 16245 #ifdef SQLITE_TEST |
| 16246 }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ |
| 16247 p->nNodeSize = atoi(&zVal[9]); |
| 16248 rc = SQLITE_OK; |
| 16249 }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ |
| 16250 p->nMaxPendingData = atoi(&zVal[11]); |
| 16251 rc = SQLITE_OK; |
| 16252 }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ |
| 16253 p->bNoIncrDoclist = atoi(&zVal[21]); |
| 16254 rc = SQLITE_OK; |
| 16255 #endif |
| 16256 }else{ |
| 16257 rc = SQLITE_ERROR; |
| 16258 } |
| 16259 |
| 16260 return rc; |
| 16261 } |
| 16262 |
| 16263 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 16264 /* |
| 16265 ** Delete all cached deferred doclists. Deferred doclists are cached |
| 16266 ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. |
| 16267 */ |
| 16268 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ |
| 16269 Fts3DeferredToken *pDef; |
| 16270 for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ |
| 16271 fts3PendingListDelete(pDef->pList); |
| 16272 pDef->pList = 0; |
| 16273 } |
| 16274 } |
| 16275 |
| 16276 /* |
| 16277 ** Free all entries in the pCsr->pDeffered list. Entries are added to |
| 16278 ** this list using sqlite3Fts3DeferToken(). |
| 16279 */ |
| 16280 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ |
| 16281 Fts3DeferredToken *pDef; |
| 16282 Fts3DeferredToken *pNext; |
| 16283 for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ |
| 16284 pNext = pDef->pNext; |
| 16285 fts3PendingListDelete(pDef->pList); |
| 16286 sqlite3_free(pDef); |
| 16287 } |
| 16288 pCsr->pDeferred = 0; |
| 16289 } |
| 16290 |
| 16291 /* |
| 16292 ** Generate deferred-doclists for all tokens in the pCsr->pDeferred list |
| 16293 ** based on the row that pCsr currently points to. |
| 16294 ** |
| 16295 ** A deferred-doclist is like any other doclist with position information |
| 16296 ** included, except that it only contains entries for a single row of the |
| 16297 ** table, not for all rows. |
| 16298 */ |
| 16299 SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ |
| 16300 int rc = SQLITE_OK; /* Return code */ |
| 16301 if( pCsr->pDeferred ){ |
| 16302 int i; /* Used to iterate through table columns */ |
| 16303 sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ |
| 16304 Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ |
| 16305 |
| 16306 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 16307 sqlite3_tokenizer *pT = p->pTokenizer; |
| 16308 sqlite3_tokenizer_module const *pModule = pT->pModule; |
| 16309 |
| 16310 assert( pCsr->isRequireSeek==0 ); |
| 16311 iDocid = sqlite3_column_int64(pCsr->pStmt, 0); |
| 16312 |
| 16313 for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ |
| 16314 if( p->abNotindexed[i]==0 ){ |
| 16315 const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); |
| 16316 sqlite3_tokenizer_cursor *pTC = 0; |
| 16317 |
| 16318 rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); |
| 16319 while( rc==SQLITE_OK ){ |
| 16320 char const *zToken; /* Buffer containing token */ |
| 16321 int nToken = 0; /* Number of bytes in token */ |
| 16322 int iDum1 = 0, iDum2 = 0; /* Dummy variables */ |
| 16323 int iPos = 0; /* Position of token in zText */ |
| 16324 |
| 16325 rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); |
| 16326 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ |
| 16327 Fts3PhraseToken *pPT = pDef->pToken; |
| 16328 if( (pDef->iCol>=p->nColumn || pDef->iCol==i) |
| 16329 && (pPT->bFirst==0 || iPos==0) |
| 16330 && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) |
| 16331 && (0==memcmp(zToken, pPT->z, pPT->n)) |
| 16332 ){ |
| 16333 fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); |
| 16334 } |
| 16335 } |
| 16336 } |
| 16337 if( pTC ) pModule->xClose(pTC); |
| 16338 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 16339 } |
| 16340 } |
| 16341 |
| 16342 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ |
| 16343 if( pDef->pList ){ |
| 16344 rc = fts3PendingListAppendVarint(&pDef->pList, 0); |
| 16345 } |
| 16346 } |
| 16347 } |
| 16348 |
| 16349 return rc; |
| 16350 } |
| 16351 |
| 16352 SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( |
| 16353 Fts3DeferredToken *p, |
| 16354 char **ppData, |
| 16355 int *pnData |
| 16356 ){ |
| 16357 char *pRet; |
| 16358 int nSkip; |
| 16359 sqlite3_int64 dummy; |
| 16360 |
| 16361 *ppData = 0; |
| 16362 *pnData = 0; |
| 16363 |
| 16364 if( p->pList==0 ){ |
| 16365 return SQLITE_OK; |
| 16366 } |
| 16367 |
| 16368 pRet = (char *)sqlite3_malloc(p->pList->nData); |
| 16369 if( !pRet ) return SQLITE_NOMEM; |
| 16370 |
| 16371 nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); |
| 16372 *pnData = p->pList->nData - nSkip; |
| 16373 *ppData = pRet; |
| 16374 |
| 16375 memcpy(pRet, &p->pList->aData[nSkip], *pnData); |
| 16376 return SQLITE_OK; |
| 16377 } |
| 16378 |
| 16379 /* |
| 16380 ** Add an entry for token pToken to the pCsr->pDeferred list. |
| 16381 */ |
| 16382 SQLITE_PRIVATE int sqlite3Fts3DeferToken( |
| 16383 Fts3Cursor *pCsr, /* Fts3 table cursor */ |
| 16384 Fts3PhraseToken *pToken, /* Token to defer */ |
| 16385 int iCol /* Column that token must appear in (or -1) */ |
| 16386 ){ |
| 16387 Fts3DeferredToken *pDeferred; |
| 16388 pDeferred = sqlite3_malloc(sizeof(*pDeferred)); |
| 16389 if( !pDeferred ){ |
| 16390 return SQLITE_NOMEM; |
| 16391 } |
| 16392 memset(pDeferred, 0, sizeof(*pDeferred)); |
| 16393 pDeferred->pToken = pToken; |
| 16394 pDeferred->pNext = pCsr->pDeferred; |
| 16395 pDeferred->iCol = iCol; |
| 16396 pCsr->pDeferred = pDeferred; |
| 16397 |
| 16398 assert( pToken->pDeferred==0 ); |
| 16399 pToken->pDeferred = pDeferred; |
| 16400 |
| 16401 return SQLITE_OK; |
| 16402 } |
| 16403 #endif |
| 16404 |
| 16405 /* |
| 16406 ** SQLite value pRowid contains the rowid of a row that may or may not be |
| 16407 ** present in the FTS3 table. If it is, delete it and adjust the contents |
| 16408 ** of subsiduary data structures accordingly. |
| 16409 */ |
| 16410 static int fts3DeleteByRowid( |
| 16411 Fts3Table *p, |
| 16412 sqlite3_value *pRowid, |
| 16413 int *pnChng, /* IN/OUT: Decrement if row is deleted */ |
| 16414 u32 *aSzDel |
| 16415 ){ |
| 16416 int rc = SQLITE_OK; /* Return code */ |
| 16417 int bFound = 0; /* True if *pRowid really is in the table */ |
| 16418 |
| 16419 fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); |
| 16420 if( bFound && rc==SQLITE_OK ){ |
| 16421 int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ |
| 16422 rc = fts3IsEmpty(p, pRowid, &isEmpty); |
| 16423 if( rc==SQLITE_OK ){ |
| 16424 if( isEmpty ){ |
| 16425 /* Deleting this row means the whole table is empty. In this case |
| 16426 ** delete the contents of all three tables and throw away any |
| 16427 ** data in the pendingTerms hash table. */ |
| 16428 rc = fts3DeleteAll(p, 1); |
| 16429 *pnChng = 0; |
| 16430 memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); |
| 16431 }else{ |
| 16432 *pnChng = *pnChng - 1; |
| 16433 if( p->zContentTbl==0 ){ |
| 16434 fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); |
| 16435 } |
| 16436 if( p->bHasDocsize ){ |
| 16437 fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); |
| 16438 } |
| 16439 } |
| 16440 } |
| 16441 } |
| 16442 |
| 16443 return rc; |
| 16444 } |
| 16445 |
| 16446 /* |
| 16447 ** This function does the work for the xUpdate method of FTS3 virtual |
| 16448 ** tables. The schema of the virtual table being: |
| 16449 ** |
| 16450 ** CREATE TABLE <table name>( |
| 16451 ** <user columns>, |
| 16452 ** <table name> HIDDEN, |
| 16453 ** docid HIDDEN, |
| 16454 ** <langid> HIDDEN |
| 16455 ** ); |
| 16456 ** |
| 16457 ** |
| 16458 */ |
| 16459 SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( |
| 16460 sqlite3_vtab *pVtab, /* FTS3 vtab object */ |
| 16461 int nArg, /* Size of argument array */ |
| 16462 sqlite3_value **apVal, /* Array of arguments */ |
| 16463 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
| 16464 ){ |
| 16465 Fts3Table *p = (Fts3Table *)pVtab; |
| 16466 int rc = SQLITE_OK; /* Return Code */ |
| 16467 int isRemove = 0; /* True for an UPDATE or DELETE */ |
| 16468 u32 *aSzIns = 0; /* Sizes of inserted documents */ |
| 16469 u32 *aSzDel = 0; /* Sizes of deleted documents */ |
| 16470 int nChng = 0; /* Net change in number of documents */ |
| 16471 int bInsertDone = 0; |
| 16472 |
| 16473 /* At this point it must be known if the %_stat table exists or not. |
| 16474 ** So bHasStat may not be 2. */ |
| 16475 assert( p->bHasStat==0 || p->bHasStat==1 ); |
| 16476 |
| 16477 assert( p->pSegments==0 ); |
| 16478 assert( |
| 16479 nArg==1 /* DELETE operations */ |
| 16480 || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ |
| 16481 ); |
| 16482 |
| 16483 /* Check for a "special" INSERT operation. One of the form: |
| 16484 ** |
| 16485 ** INSERT INTO xyz(xyz) VALUES('command'); |
| 16486 */ |
| 16487 if( nArg>1 |
| 16488 && sqlite3_value_type(apVal[0])==SQLITE_NULL |
| 16489 && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL |
| 16490 ){ |
| 16491 rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); |
| 16492 goto update_out; |
| 16493 } |
| 16494 |
| 16495 if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ |
| 16496 rc = SQLITE_CONSTRAINT; |
| 16497 goto update_out; |
| 16498 } |
| 16499 |
| 16500 /* Allocate space to hold the change in document sizes */ |
| 16501 aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); |
| 16502 if( aSzDel==0 ){ |
| 16503 rc = SQLITE_NOMEM; |
| 16504 goto update_out; |
| 16505 } |
| 16506 aSzIns = &aSzDel[p->nColumn+1]; |
| 16507 memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); |
| 16508 |
| 16509 rc = fts3Writelock(p); |
| 16510 if( rc!=SQLITE_OK ) goto update_out; |
| 16511 |
| 16512 /* If this is an INSERT operation, or an UPDATE that modifies the rowid |
| 16513 ** value, then this operation requires constraint handling. |
| 16514 ** |
| 16515 ** If the on-conflict mode is REPLACE, this means that the existing row |
| 16516 ** should be deleted from the database before inserting the new row. Or, |
| 16517 ** if the on-conflict mode is other than REPLACE, then this method must |
| 16518 ** detect the conflict and return SQLITE_CONSTRAINT before beginning to |
| 16519 ** modify the database file. |
| 16520 */ |
| 16521 if( nArg>1 && p->zContentTbl==0 ){ |
| 16522 /* Find the value object that holds the new rowid value. */ |
| 16523 sqlite3_value *pNewRowid = apVal[3+p->nColumn]; |
| 16524 if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ |
| 16525 pNewRowid = apVal[1]; |
| 16526 } |
| 16527 |
| 16528 if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( |
| 16529 sqlite3_value_type(apVal[0])==SQLITE_NULL |
| 16530 || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) |
| 16531 )){ |
| 16532 /* The new rowid is not NULL (in this case the rowid will be |
| 16533 ** automatically assigned and there is no chance of a conflict), and |
| 16534 ** the statement is either an INSERT or an UPDATE that modifies the |
| 16535 ** rowid column. So if the conflict mode is REPLACE, then delete any |
| 16536 ** existing row with rowid=pNewRowid. |
| 16537 ** |
| 16538 ** Or, if the conflict mode is not REPLACE, insert the new record into |
| 16539 ** the %_content table. If we hit the duplicate rowid constraint (or any |
| 16540 ** other error) while doing so, return immediately. |
| 16541 ** |
| 16542 ** This branch may also run if pNewRowid contains a value that cannot |
| 16543 ** be losslessly converted to an integer. In this case, the eventual |
| 16544 ** call to fts3InsertData() (either just below or further on in this |
| 16545 ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is |
| 16546 ** invoked, it will delete zero rows (since no row will have |
| 16547 ** docid=$pNewRowid if $pNewRowid is not an integer value). |
| 16548 */ |
| 16549 if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ |
| 16550 rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); |
| 16551 }else{ |
| 16552 rc = fts3InsertData(p, apVal, pRowid); |
| 16553 bInsertDone = 1; |
| 16554 } |
| 16555 } |
| 16556 } |
| 16557 if( rc!=SQLITE_OK ){ |
| 16558 goto update_out; |
| 16559 } |
| 16560 |
| 16561 /* If this is a DELETE or UPDATE operation, remove the old record. */ |
| 16562 if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ |
| 16563 assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); |
| 16564 rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); |
| 16565 isRemove = 1; |
| 16566 } |
| 16567 |
| 16568 /* If this is an INSERT or UPDATE operation, insert the new record. */ |
| 16569 if( nArg>1 && rc==SQLITE_OK ){ |
| 16570 int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); |
| 16571 if( bInsertDone==0 ){ |
| 16572 rc = fts3InsertData(p, apVal, pRowid); |
| 16573 if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ |
| 16574 rc = FTS_CORRUPT_VTAB; |
| 16575 } |
| 16576 } |
| 16577 if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ |
| 16578 rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid); |
| 16579 } |
| 16580 if( rc==SQLITE_OK ){ |
| 16581 assert( p->iPrevDocid==*pRowid ); |
| 16582 rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); |
| 16583 } |
| 16584 if( p->bHasDocsize ){ |
| 16585 fts3InsertDocsize(&rc, p, aSzIns); |
| 16586 } |
| 16587 nChng++; |
| 16588 } |
| 16589 |
| 16590 if( p->bFts4 ){ |
| 16591 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); |
| 16592 } |
| 16593 |
| 16594 update_out: |
| 16595 sqlite3_free(aSzDel); |
| 16596 sqlite3Fts3SegmentsClose(p); |
| 16597 return rc; |
| 16598 } |
| 16599 |
| 16600 /* |
| 16601 ** Flush any data in the pending-terms hash table to disk. If successful, |
| 16602 ** merge all segments in the database (including the new segment, if |
| 16603 ** there was any data to flush) into a single segment. |
| 16604 */ |
| 16605 SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ |
| 16606 int rc; |
| 16607 rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); |
| 16608 if( rc==SQLITE_OK ){ |
| 16609 rc = fts3DoOptimize(p, 1); |
| 16610 if( rc==SQLITE_OK || rc==SQLITE_DONE ){ |
| 16611 int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); |
| 16612 if( rc2!=SQLITE_OK ) rc = rc2; |
| 16613 }else{ |
| 16614 sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); |
| 16615 sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); |
| 16616 } |
| 16617 } |
| 16618 sqlite3Fts3SegmentsClose(p); |
| 16619 return rc; |
| 16620 } |
| 16621 |
| 16622 #endif |
| 16623 |
| 16624 /************** End of fts3_write.c ******************************************/ |
| 16625 /************** Begin file fts3_snippet.c ************************************/ |
| 16626 /* |
| 16627 ** 2009 Oct 23 |
| 16628 ** |
| 16629 ** The author disclaims copyright to this source code. In place of |
| 16630 ** a legal notice, here is a blessing: |
| 16631 ** |
| 16632 ** May you do good and not evil. |
| 16633 ** May you find forgiveness for yourself and forgive others. |
| 16634 ** May you share freely, never taking more than you give. |
| 16635 ** |
| 16636 ****************************************************************************** |
| 16637 */ |
| 16638 |
| 16639 /* #include "fts3Int.h" */ |
| 16640 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 16641 |
| 16642 /* #include <string.h> */ |
| 16643 /* #include <assert.h> */ |
| 16644 |
| 16645 /* |
| 16646 ** Characters that may appear in the second argument to matchinfo(). |
| 16647 */ |
| 16648 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ |
| 16649 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ |
| 16650 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ |
| 16651 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ |
| 16652 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ |
| 16653 #define FTS3_MATCHINFO_LCS 's' /* nCol values */ |
| 16654 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ |
| 16655 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ |
| 16656 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ |
| 16657 |
| 16658 /* |
| 16659 ** The default value for the second argument to matchinfo(). |
| 16660 */ |
| 16661 #define FTS3_MATCHINFO_DEFAULT "pcx" |
| 16662 |
| 16663 |
| 16664 /* |
| 16665 ** Used as an fts3ExprIterate() context when loading phrase doclists to |
| 16666 ** Fts3Expr.aDoclist[]/nDoclist. |
| 16667 */ |
| 16668 typedef struct LoadDoclistCtx LoadDoclistCtx; |
| 16669 struct LoadDoclistCtx { |
| 16670 Fts3Cursor *pCsr; /* FTS3 Cursor */ |
| 16671 int nPhrase; /* Number of phrases seen so far */ |
| 16672 int nToken; /* Number of tokens seen so far */ |
| 16673 }; |
| 16674 |
| 16675 /* |
| 16676 ** The following types are used as part of the implementation of the |
| 16677 ** fts3BestSnippet() routine. |
| 16678 */ |
| 16679 typedef struct SnippetIter SnippetIter; |
| 16680 typedef struct SnippetPhrase SnippetPhrase; |
| 16681 typedef struct SnippetFragment SnippetFragment; |
| 16682 |
| 16683 struct SnippetIter { |
| 16684 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ |
| 16685 int iCol; /* Extract snippet from this column */ |
| 16686 int nSnippet; /* Requested snippet length (in tokens) */ |
| 16687 int nPhrase; /* Number of phrases in query */ |
| 16688 SnippetPhrase *aPhrase; /* Array of size nPhrase */ |
| 16689 int iCurrent; /* First token of current snippet */ |
| 16690 }; |
| 16691 |
| 16692 struct SnippetPhrase { |
| 16693 int nToken; /* Number of tokens in phrase */ |
| 16694 char *pList; /* Pointer to start of phrase position list */ |
| 16695 int iHead; /* Next value in position list */ |
| 16696 char *pHead; /* Position list data following iHead */ |
| 16697 int iTail; /* Next value in trailing position list */ |
| 16698 char *pTail; /* Position list data following iTail */ |
| 16699 }; |
| 16700 |
| 16701 struct SnippetFragment { |
| 16702 int iCol; /* Column snippet is extracted from */ |
| 16703 int iPos; /* Index of first token in snippet */ |
| 16704 u64 covered; /* Mask of query phrases covered */ |
| 16705 u64 hlmask; /* Mask of snippet terms to highlight */ |
| 16706 }; |
| 16707 |
| 16708 /* |
| 16709 ** This type is used as an fts3ExprIterate() context object while |
| 16710 ** accumulating the data returned by the matchinfo() function. |
| 16711 */ |
| 16712 typedef struct MatchInfo MatchInfo; |
| 16713 struct MatchInfo { |
| 16714 Fts3Cursor *pCursor; /* FTS3 Cursor */ |
| 16715 int nCol; /* Number of columns in table */ |
| 16716 int nPhrase; /* Number of matchable phrases in query */ |
| 16717 sqlite3_int64 nDoc; /* Number of docs in database */ |
| 16718 char flag; |
| 16719 u32 *aMatchinfo; /* Pre-allocated buffer */ |
| 16720 }; |
| 16721 |
| 16722 /* |
| 16723 ** An instance of this structure is used to manage a pair of buffers, each |
| 16724 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below |
| 16725 ** for details. |
| 16726 */ |
| 16727 struct MatchinfoBuffer { |
| 16728 u8 aRef[3]; |
| 16729 int nElem; |
| 16730 int bGlobal; /* Set if global data is loaded */ |
| 16731 char *zMatchinfo; |
| 16732 u32 aMatchinfo[1]; |
| 16733 }; |
| 16734 |
| 16735 |
| 16736 /* |
| 16737 ** The snippet() and offsets() functions both return text values. An instance |
| 16738 ** of the following structure is used to accumulate those values while the |
| 16739 ** functions are running. See fts3StringAppend() for details. |
| 16740 */ |
| 16741 typedef struct StrBuffer StrBuffer; |
| 16742 struct StrBuffer { |
| 16743 char *z; /* Pointer to buffer containing string */ |
| 16744 int n; /* Length of z in bytes (excl. nul-term) */ |
| 16745 int nAlloc; /* Allocated size of buffer z in bytes */ |
| 16746 }; |
| 16747 |
| 16748 |
| 16749 /************************************************************************* |
| 16750 ** Start of MatchinfoBuffer code. |
| 16751 */ |
| 16752 |
| 16753 /* |
| 16754 ** Allocate a two-slot MatchinfoBuffer object. |
| 16755 */ |
| 16756 static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ |
| 16757 MatchinfoBuffer *pRet; |
| 16758 int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); |
| 16759 int nStr = (int)strlen(zMatchinfo); |
| 16760 |
| 16761 pRet = sqlite3_malloc(nByte + nStr+1); |
| 16762 if( pRet ){ |
| 16763 memset(pRet, 0, nByte); |
| 16764 pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; |
| 16765 pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); |
| 16766 pRet->nElem = nElem; |
| 16767 pRet->zMatchinfo = ((char*)pRet) + nByte; |
| 16768 memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); |
| 16769 pRet->aRef[0] = 1; |
| 16770 } |
| 16771 |
| 16772 return pRet; |
| 16773 } |
| 16774 |
| 16775 static void fts3MIBufferFree(void *p){ |
| 16776 MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); |
| 16777 |
| 16778 assert( (u32*)p==&pBuf->aMatchinfo[1] |
| 16779 || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] |
| 16780 ); |
| 16781 if( (u32*)p==&pBuf->aMatchinfo[1] ){ |
| 16782 pBuf->aRef[1] = 0; |
| 16783 }else{ |
| 16784 pBuf->aRef[2] = 0; |
| 16785 } |
| 16786 |
| 16787 if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ |
| 16788 sqlite3_free(pBuf); |
| 16789 } |
| 16790 } |
| 16791 |
| 16792 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ |
| 16793 void (*xRet)(void*) = 0; |
| 16794 u32 *aOut = 0; |
| 16795 |
| 16796 if( p->aRef[1]==0 ){ |
| 16797 p->aRef[1] = 1; |
| 16798 aOut = &p->aMatchinfo[1]; |
| 16799 xRet = fts3MIBufferFree; |
| 16800 } |
| 16801 else if( p->aRef[2]==0 ){ |
| 16802 p->aRef[2] = 1; |
| 16803 aOut = &p->aMatchinfo[p->nElem+2]; |
| 16804 xRet = fts3MIBufferFree; |
| 16805 }else{ |
| 16806 aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); |
| 16807 if( aOut ){ |
| 16808 xRet = sqlite3_free; |
| 16809 if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); |
| 16810 } |
| 16811 } |
| 16812 |
| 16813 *paOut = aOut; |
| 16814 return xRet; |
| 16815 } |
| 16816 |
| 16817 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ |
| 16818 p->bGlobal = 1; |
| 16819 memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); |
| 16820 } |
| 16821 |
| 16822 /* |
| 16823 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() |
| 16824 */ |
| 16825 SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ |
| 16826 if( p ){ |
| 16827 assert( p->aRef[0]==1 ); |
| 16828 p->aRef[0] = 0; |
| 16829 if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ |
| 16830 sqlite3_free(p); |
| 16831 } |
| 16832 } |
| 16833 } |
| 16834 |
| 16835 /* |
| 16836 ** End of MatchinfoBuffer code. |
| 16837 *************************************************************************/ |
| 16838 |
| 16839 |
| 16840 /* |
| 16841 ** This function is used to help iterate through a position-list. A position |
| 16842 ** list is a list of unique integers, sorted from smallest to largest. Each |
| 16843 ** element of the list is represented by an FTS3 varint that takes the value |
| 16844 ** of the difference between the current element and the previous one plus |
| 16845 ** two. For example, to store the position-list: |
| 16846 ** |
| 16847 ** 4 9 113 |
| 16848 ** |
| 16849 ** the three varints: |
| 16850 ** |
| 16851 ** 6 7 106 |
| 16852 ** |
| 16853 ** are encoded. |
| 16854 ** |
| 16855 ** When this function is called, *pp points to the start of an element of |
| 16856 ** the list. *piPos contains the value of the previous entry in the list. |
| 16857 ** After it returns, *piPos contains the value of the next element of the |
| 16858 ** list and *pp is advanced to the following varint. |
| 16859 */ |
| 16860 static void fts3GetDeltaPosition(char **pp, int *piPos){ |
| 16861 int iVal; |
| 16862 *pp += fts3GetVarint32(*pp, &iVal); |
| 16863 *piPos += (iVal-2); |
| 16864 } |
| 16865 |
| 16866 /* |
| 16867 ** Helper function for fts3ExprIterate() (see below). |
| 16868 */ |
| 16869 static int fts3ExprIterate2( |
| 16870 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 16871 int *piPhrase, /* Pointer to phrase counter */ |
| 16872 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 16873 void *pCtx /* Second argument to pass to callback */ |
| 16874 ){ |
| 16875 int rc; /* Return code */ |
| 16876 int eType = pExpr->eType; /* Type of expression node pExpr */ |
| 16877 |
| 16878 if( eType!=FTSQUERY_PHRASE ){ |
| 16879 assert( pExpr->pLeft && pExpr->pRight ); |
| 16880 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); |
| 16881 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ |
| 16882 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); |
| 16883 } |
| 16884 }else{ |
| 16885 rc = x(pExpr, *piPhrase, pCtx); |
| 16886 (*piPhrase)++; |
| 16887 } |
| 16888 return rc; |
| 16889 } |
| 16890 |
| 16891 /* |
| 16892 ** Iterate through all phrase nodes in an FTS3 query, except those that |
| 16893 ** are part of a sub-tree that is the right-hand-side of a NOT operator. |
| 16894 ** For each phrase node found, the supplied callback function is invoked. |
| 16895 ** |
| 16896 ** If the callback function returns anything other than SQLITE_OK, |
| 16897 ** the iteration is abandoned and the error code returned immediately. |
| 16898 ** Otherwise, SQLITE_OK is returned after a callback has been made for |
| 16899 ** all eligible phrase nodes. |
| 16900 */ |
| 16901 static int fts3ExprIterate( |
| 16902 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 16903 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 16904 void *pCtx /* Second argument to pass to callback */ |
| 16905 ){ |
| 16906 int iPhrase = 0; /* Variable used as the phrase counter */ |
| 16907 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); |
| 16908 } |
| 16909 |
| 16910 |
| 16911 /* |
| 16912 ** This is an fts3ExprIterate() callback used while loading the doclists |
| 16913 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also |
| 16914 ** fts3ExprLoadDoclists(). |
| 16915 */ |
| 16916 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 16917 int rc = SQLITE_OK; |
| 16918 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 16919 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; |
| 16920 |
| 16921 UNUSED_PARAMETER(iPhrase); |
| 16922 |
| 16923 p->nPhrase++; |
| 16924 p->nToken += pPhrase->nToken; |
| 16925 |
| 16926 return rc; |
| 16927 } |
| 16928 |
| 16929 /* |
| 16930 ** Load the doclists for each phrase in the query associated with FTS3 cursor |
| 16931 ** pCsr. |
| 16932 ** |
| 16933 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable |
| 16934 ** phrases in the expression (all phrases except those directly or |
| 16935 ** indirectly descended from the right-hand-side of a NOT operator). If |
| 16936 ** pnToken is not NULL, then it is set to the number of tokens in all |
| 16937 ** matchable phrases of the expression. |
| 16938 */ |
| 16939 static int fts3ExprLoadDoclists( |
| 16940 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ |
| 16941 int *pnPhrase, /* OUT: Number of phrases in query */ |
| 16942 int *pnToken /* OUT: Number of tokens in query */ |
| 16943 ){ |
| 16944 int rc; /* Return Code */ |
| 16945 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ |
| 16946 sCtx.pCsr = pCsr; |
| 16947 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); |
| 16948 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; |
| 16949 if( pnToken ) *pnToken = sCtx.nToken; |
| 16950 return rc; |
| 16951 } |
| 16952 |
| 16953 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 16954 (*(int *)ctx)++; |
| 16955 pExpr->iPhrase = iPhrase; |
| 16956 return SQLITE_OK; |
| 16957 } |
| 16958 static int fts3ExprPhraseCount(Fts3Expr *pExpr){ |
| 16959 int nPhrase = 0; |
| 16960 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); |
| 16961 return nPhrase; |
| 16962 } |
| 16963 |
| 16964 /* |
| 16965 ** Advance the position list iterator specified by the first two |
| 16966 ** arguments so that it points to the first element with a value greater |
| 16967 ** than or equal to parameter iNext. |
| 16968 */ |
| 16969 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ |
| 16970 char *pIter = *ppIter; |
| 16971 if( pIter ){ |
| 16972 int iIter = *piIter; |
| 16973 |
| 16974 while( iIter<iNext ){ |
| 16975 if( 0==(*pIter & 0xFE) ){ |
| 16976 iIter = -1; |
| 16977 pIter = 0; |
| 16978 break; |
| 16979 } |
| 16980 fts3GetDeltaPosition(&pIter, &iIter); |
| 16981 } |
| 16982 |
| 16983 *piIter = iIter; |
| 16984 *ppIter = pIter; |
| 16985 } |
| 16986 } |
| 16987 |
| 16988 /* |
| 16989 ** Advance the snippet iterator to the next candidate snippet. |
| 16990 */ |
| 16991 static int fts3SnippetNextCandidate(SnippetIter *pIter){ |
| 16992 int i; /* Loop counter */ |
| 16993 |
| 16994 if( pIter->iCurrent<0 ){ |
| 16995 /* The SnippetIter object has just been initialized. The first snippet |
| 16996 ** candidate always starts at offset 0 (even if this candidate has a |
| 16997 ** score of 0.0). |
| 16998 */ |
| 16999 pIter->iCurrent = 0; |
| 17000 |
| 17001 /* Advance the 'head' iterator of each phrase to the first offset that |
| 17002 ** is greater than or equal to (iNext+nSnippet). |
| 17003 */ |
| 17004 for(i=0; i<pIter->nPhrase; i++){ |
| 17005 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17006 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); |
| 17007 } |
| 17008 }else{ |
| 17009 int iStart; |
| 17010 int iEnd = 0x7FFFFFFF; |
| 17011 |
| 17012 for(i=0; i<pIter->nPhrase; i++){ |
| 17013 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17014 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ |
| 17015 iEnd = pPhrase->iHead; |
| 17016 } |
| 17017 } |
| 17018 if( iEnd==0x7FFFFFFF ){ |
| 17019 return 1; |
| 17020 } |
| 17021 |
| 17022 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; |
| 17023 for(i=0; i<pIter->nPhrase; i++){ |
| 17024 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17025 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); |
| 17026 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); |
| 17027 } |
| 17028 } |
| 17029 |
| 17030 return 0; |
| 17031 } |
| 17032 |
| 17033 /* |
| 17034 ** Retrieve information about the current candidate snippet of snippet |
| 17035 ** iterator pIter. |
| 17036 */ |
| 17037 static void fts3SnippetDetails( |
| 17038 SnippetIter *pIter, /* Snippet iterator */ |
| 17039 u64 mCovered, /* Bitmask of phrases already covered */ |
| 17040 int *piToken, /* OUT: First token of proposed snippet */ |
| 17041 int *piScore, /* OUT: "Score" for this snippet */ |
| 17042 u64 *pmCover, /* OUT: Bitmask of phrases covered */ |
| 17043 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ |
| 17044 ){ |
| 17045 int iStart = pIter->iCurrent; /* First token of snippet */ |
| 17046 int iScore = 0; /* Score of this snippet */ |
| 17047 int i; /* Loop counter */ |
| 17048 u64 mCover = 0; /* Mask of phrases covered by this snippet */ |
| 17049 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ |
| 17050 |
| 17051 for(i=0; i<pIter->nPhrase; i++){ |
| 17052 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17053 if( pPhrase->pTail ){ |
| 17054 char *pCsr = pPhrase->pTail; |
| 17055 int iCsr = pPhrase->iTail; |
| 17056 |
| 17057 while( iCsr<(iStart+pIter->nSnippet) ){ |
| 17058 int j; |
| 17059 u64 mPhrase = (u64)1 << i; |
| 17060 u64 mPos = (u64)1 << (iCsr - iStart); |
| 17061 assert( iCsr>=iStart ); |
| 17062 if( (mCover|mCovered)&mPhrase ){ |
| 17063 iScore++; |
| 17064 }else{ |
| 17065 iScore += 1000; |
| 17066 } |
| 17067 mCover |= mPhrase; |
| 17068 |
| 17069 for(j=0; j<pPhrase->nToken; j++){ |
| 17070 mHighlight |= (mPos>>j); |
| 17071 } |
| 17072 |
| 17073 if( 0==(*pCsr & 0x0FE) ) break; |
| 17074 fts3GetDeltaPosition(&pCsr, &iCsr); |
| 17075 } |
| 17076 } |
| 17077 } |
| 17078 |
| 17079 /* Set the output variables before returning. */ |
| 17080 *piToken = iStart; |
| 17081 *piScore = iScore; |
| 17082 *pmCover = mCover; |
| 17083 *pmHighlight = mHighlight; |
| 17084 } |
| 17085 |
| 17086 /* |
| 17087 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). |
| 17088 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. |
| 17089 */ |
| 17090 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 17091 SnippetIter *p = (SnippetIter *)ctx; |
| 17092 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; |
| 17093 char *pCsr; |
| 17094 int rc; |
| 17095 |
| 17096 pPhrase->nToken = pExpr->pPhrase->nToken; |
| 17097 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); |
| 17098 assert( rc==SQLITE_OK || pCsr==0 ); |
| 17099 if( pCsr ){ |
| 17100 int iFirst = 0; |
| 17101 pPhrase->pList = pCsr; |
| 17102 fts3GetDeltaPosition(&pCsr, &iFirst); |
| 17103 assert( iFirst>=0 ); |
| 17104 pPhrase->pHead = pCsr; |
| 17105 pPhrase->pTail = pCsr; |
| 17106 pPhrase->iHead = iFirst; |
| 17107 pPhrase->iTail = iFirst; |
| 17108 }else{ |
| 17109 assert( rc!=SQLITE_OK || ( |
| 17110 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 |
| 17111 )); |
| 17112 } |
| 17113 |
| 17114 return rc; |
| 17115 } |
| 17116 |
| 17117 /* |
| 17118 ** Select the fragment of text consisting of nFragment contiguous tokens |
| 17119 ** from column iCol that represent the "best" snippet. The best snippet |
| 17120 ** is the snippet with the highest score, where scores are calculated |
| 17121 ** by adding: |
| 17122 ** |
| 17123 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. |
| 17124 ** |
| 17125 ** (b) +1000 points for the first occurrence of each matchable phrase in |
| 17126 ** the snippet for which the corresponding mCovered bit is not set. |
| 17127 ** |
| 17128 ** The selected snippet parameters are stored in structure *pFragment before |
| 17129 ** returning. The score of the selected snippet is stored in *piScore |
| 17130 ** before returning. |
| 17131 */ |
| 17132 static int fts3BestSnippet( |
| 17133 int nSnippet, /* Desired snippet length */ |
| 17134 Fts3Cursor *pCsr, /* Cursor to create snippet for */ |
| 17135 int iCol, /* Index of column to create snippet from */ |
| 17136 u64 mCovered, /* Mask of phrases already covered */ |
| 17137 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ |
| 17138 SnippetFragment *pFragment, /* OUT: Best snippet found */ |
| 17139 int *piScore /* OUT: Score of snippet pFragment */ |
| 17140 ){ |
| 17141 int rc; /* Return Code */ |
| 17142 int nList; /* Number of phrases in expression */ |
| 17143 SnippetIter sIter; /* Iterates through snippet candidates */ |
| 17144 int nByte; /* Number of bytes of space to allocate */ |
| 17145 int iBestScore = -1; /* Best snippet score found so far */ |
| 17146 int i; /* Loop counter */ |
| 17147 |
| 17148 memset(&sIter, 0, sizeof(sIter)); |
| 17149 |
| 17150 /* Iterate through the phrases in the expression to count them. The same |
| 17151 ** callback makes sure the doclists are loaded for each phrase. |
| 17152 */ |
| 17153 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); |
| 17154 if( rc!=SQLITE_OK ){ |
| 17155 return rc; |
| 17156 } |
| 17157 |
| 17158 /* Now that it is known how many phrases there are, allocate and zero |
| 17159 ** the required space using malloc(). |
| 17160 */ |
| 17161 nByte = sizeof(SnippetPhrase) * nList; |
| 17162 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); |
| 17163 if( !sIter.aPhrase ){ |
| 17164 return SQLITE_NOMEM; |
| 17165 } |
| 17166 memset(sIter.aPhrase, 0, nByte); |
| 17167 |
| 17168 /* Initialize the contents of the SnippetIter object. Then iterate through |
| 17169 ** the set of phrases in the expression to populate the aPhrase[] array. |
| 17170 */ |
| 17171 sIter.pCsr = pCsr; |
| 17172 sIter.iCol = iCol; |
| 17173 sIter.nSnippet = nSnippet; |
| 17174 sIter.nPhrase = nList; |
| 17175 sIter.iCurrent = -1; |
| 17176 rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); |
| 17177 if( rc==SQLITE_OK ){ |
| 17178 |
| 17179 /* Set the *pmSeen output variable. */ |
| 17180 for(i=0; i<nList; i++){ |
| 17181 if( sIter.aPhrase[i].pHead ){ |
| 17182 *pmSeen |= (u64)1 << i; |
| 17183 } |
| 17184 } |
| 17185 |
| 17186 /* Loop through all candidate snippets. Store the best snippet in |
| 17187 ** *pFragment. Store its associated 'score' in iBestScore. |
| 17188 */ |
| 17189 pFragment->iCol = iCol; |
| 17190 while( !fts3SnippetNextCandidate(&sIter) ){ |
| 17191 int iPos; |
| 17192 int iScore; |
| 17193 u64 mCover; |
| 17194 u64 mHighlite; |
| 17195 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); |
| 17196 assert( iScore>=0 ); |
| 17197 if( iScore>iBestScore ){ |
| 17198 pFragment->iPos = iPos; |
| 17199 pFragment->hlmask = mHighlite; |
| 17200 pFragment->covered = mCover; |
| 17201 iBestScore = iScore; |
| 17202 } |
| 17203 } |
| 17204 |
| 17205 *piScore = iBestScore; |
| 17206 } |
| 17207 sqlite3_free(sIter.aPhrase); |
| 17208 return rc; |
| 17209 } |
| 17210 |
| 17211 |
| 17212 /* |
| 17213 ** Append a string to the string-buffer passed as the first argument. |
| 17214 ** |
| 17215 ** If nAppend is negative, then the length of the string zAppend is |
| 17216 ** determined using strlen(). |
| 17217 */ |
| 17218 static int fts3StringAppend( |
| 17219 StrBuffer *pStr, /* Buffer to append to */ |
| 17220 const char *zAppend, /* Pointer to data to append to buffer */ |
| 17221 int nAppend /* Size of zAppend in bytes (or -1) */ |
| 17222 ){ |
| 17223 if( nAppend<0 ){ |
| 17224 nAppend = (int)strlen(zAppend); |
| 17225 } |
| 17226 |
| 17227 /* If there is insufficient space allocated at StrBuffer.z, use realloc() |
| 17228 ** to grow the buffer until so that it is big enough to accomadate the |
| 17229 ** appended data. |
| 17230 */ |
| 17231 if( pStr->n+nAppend+1>=pStr->nAlloc ){ |
| 17232 int nAlloc = pStr->nAlloc+nAppend+100; |
| 17233 char *zNew = sqlite3_realloc(pStr->z, nAlloc); |
| 17234 if( !zNew ){ |
| 17235 return SQLITE_NOMEM; |
| 17236 } |
| 17237 pStr->z = zNew; |
| 17238 pStr->nAlloc = nAlloc; |
| 17239 } |
| 17240 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); |
| 17241 |
| 17242 /* Append the data to the string buffer. */ |
| 17243 memcpy(&pStr->z[pStr->n], zAppend, nAppend); |
| 17244 pStr->n += nAppend; |
| 17245 pStr->z[pStr->n] = '\0'; |
| 17246 |
| 17247 return SQLITE_OK; |
| 17248 } |
| 17249 |
| 17250 /* |
| 17251 ** The fts3BestSnippet() function often selects snippets that end with a |
| 17252 ** query term. That is, the final term of the snippet is always a term |
| 17253 ** that requires highlighting. For example, if 'X' is a highlighted term |
| 17254 ** and '.' is a non-highlighted term, BestSnippet() may select: |
| 17255 ** |
| 17256 ** ........X.....X |
| 17257 ** |
| 17258 ** This function "shifts" the beginning of the snippet forward in the |
| 17259 ** document so that there are approximately the same number of |
| 17260 ** non-highlighted terms to the right of the final highlighted term as there |
| 17261 ** are to the left of the first highlighted term. For example, to this: |
| 17262 ** |
| 17263 ** ....X.....X.... |
| 17264 ** |
| 17265 ** This is done as part of extracting the snippet text, not when selecting |
| 17266 ** the snippet. Snippet selection is done based on doclists only, so there |
| 17267 ** is no way for fts3BestSnippet() to know whether or not the document |
| 17268 ** actually contains terms that follow the final highlighted term. |
| 17269 */ |
| 17270 static int fts3SnippetShift( |
| 17271 Fts3Table *pTab, /* FTS3 table snippet comes from */ |
| 17272 int iLangid, /* Language id to use in tokenizing */ |
| 17273 int nSnippet, /* Number of tokens desired for snippet */ |
| 17274 const char *zDoc, /* Document text to extract snippet from */ |
| 17275 int nDoc, /* Size of buffer zDoc in bytes */ |
| 17276 int *piPos, /* IN/OUT: First token of snippet */ |
| 17277 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ |
| 17278 ){ |
| 17279 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ |
| 17280 |
| 17281 if( hlmask ){ |
| 17282 int nLeft; /* Tokens to the left of first highlight */ |
| 17283 int nRight; /* Tokens to the right of last highlight */ |
| 17284 int nDesired; /* Ideal number of tokens to shift forward */ |
| 17285 |
| 17286 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); |
| 17287 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); |
| 17288 nDesired = (nLeft-nRight)/2; |
| 17289 |
| 17290 /* Ideally, the start of the snippet should be pushed forward in the |
| 17291 ** document nDesired tokens. This block checks if there are actually |
| 17292 ** nDesired tokens to the right of the snippet. If so, *piPos and |
| 17293 ** *pHlMask are updated to shift the snippet nDesired tokens to the |
| 17294 ** right. Otherwise, the snippet is shifted by the number of tokens |
| 17295 ** available. |
| 17296 */ |
| 17297 if( nDesired>0 ){ |
| 17298 int nShift; /* Number of tokens to shift snippet by */ |
| 17299 int iCurrent = 0; /* Token counter */ |
| 17300 int rc; /* Return Code */ |
| 17301 sqlite3_tokenizer_module *pMod; |
| 17302 sqlite3_tokenizer_cursor *pC; |
| 17303 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 17304 |
| 17305 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) |
| 17306 ** or more tokens in zDoc/nDoc. |
| 17307 */ |
| 17308 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); |
| 17309 if( rc!=SQLITE_OK ){ |
| 17310 return rc; |
| 17311 } |
| 17312 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ |
| 17313 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; |
| 17314 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); |
| 17315 } |
| 17316 pMod->xClose(pC); |
| 17317 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } |
| 17318 |
| 17319 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; |
| 17320 assert( nShift<=nDesired ); |
| 17321 if( nShift>0 ){ |
| 17322 *piPos += nShift; |
| 17323 *pHlmask = hlmask >> nShift; |
| 17324 } |
| 17325 } |
| 17326 } |
| 17327 return SQLITE_OK; |
| 17328 } |
| 17329 |
| 17330 /* |
| 17331 ** Extract the snippet text for fragment pFragment from cursor pCsr and |
| 17332 ** append it to string buffer pOut. |
| 17333 */ |
| 17334 static int fts3SnippetText( |
| 17335 Fts3Cursor *pCsr, /* FTS3 Cursor */ |
| 17336 SnippetFragment *pFragment, /* Snippet to extract */ |
| 17337 int iFragment, /* Fragment number */ |
| 17338 int isLast, /* True for final fragment in snippet */ |
| 17339 int nSnippet, /* Number of tokens in extracted snippet */ |
| 17340 const char *zOpen, /* String inserted before highlighted term */ |
| 17341 const char *zClose, /* String inserted after highlighted term */ |
| 17342 const char *zEllipsis, /* String inserted between snippets */ |
| 17343 StrBuffer *pOut /* Write output here */ |
| 17344 ){ |
| 17345 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17346 int rc; /* Return code */ |
| 17347 const char *zDoc; /* Document text to extract snippet from */ |
| 17348 int nDoc; /* Size of zDoc in bytes */ |
| 17349 int iCurrent = 0; /* Current token number of document */ |
| 17350 int iEnd = 0; /* Byte offset of end of current token */ |
| 17351 int isShiftDone = 0; /* True after snippet is shifted */ |
| 17352 int iPos = pFragment->iPos; /* First token of snippet */ |
| 17353 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ |
| 17354 int iCol = pFragment->iCol+1; /* Query column to extract text from */ |
| 17355 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ |
| 17356 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ |
| 17357 |
| 17358 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); |
| 17359 if( zDoc==0 ){ |
| 17360 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ |
| 17361 return SQLITE_NOMEM; |
| 17362 } |
| 17363 return SQLITE_OK; |
| 17364 } |
| 17365 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); |
| 17366 |
| 17367 /* Open a token cursor on the document. */ |
| 17368 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 17369 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); |
| 17370 if( rc!=SQLITE_OK ){ |
| 17371 return rc; |
| 17372 } |
| 17373 |
| 17374 while( rc==SQLITE_OK ){ |
| 17375 const char *ZDUMMY; /* Dummy argument used with tokenizer */ |
| 17376 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ |
| 17377 int iBegin = 0; /* Offset in zDoc of start of token */ |
| 17378 int iFin = 0; /* Offset in zDoc of end of token */ |
| 17379 int isHighlight = 0; /* True for highlighted terms */ |
| 17380 |
| 17381 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere |
| 17382 ** in the FTS code the variable that the third argument to xNext points to |
| 17383 ** is initialized to zero before the first (*but not necessarily |
| 17384 ** subsequent*) call to xNext(). This is done for a particular application |
| 17385 ** that needs to know whether or not the tokenizer is being used for |
| 17386 ** snippet generation or for some other purpose. |
| 17387 ** |
| 17388 ** Extreme care is required when writing code to depend on this |
| 17389 ** initialization. It is not a documented part of the tokenizer interface. |
| 17390 ** If a tokenizer is used directly by any code outside of FTS, this |
| 17391 ** convention might not be respected. */ |
| 17392 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); |
| 17393 if( rc!=SQLITE_OK ){ |
| 17394 if( rc==SQLITE_DONE ){ |
| 17395 /* Special case - the last token of the snippet is also the last token |
| 17396 ** of the column. Append any punctuation that occurred between the end |
| 17397 ** of the previous token and the end of the document to the output. |
| 17398 ** Then break out of the loop. */ |
| 17399 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); |
| 17400 } |
| 17401 break; |
| 17402 } |
| 17403 if( iCurrent<iPos ){ continue; } |
| 17404 |
| 17405 if( !isShiftDone ){ |
| 17406 int n = nDoc - iBegin; |
| 17407 rc = fts3SnippetShift( |
| 17408 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask |
| 17409 ); |
| 17410 isShiftDone = 1; |
| 17411 |
| 17412 /* Now that the shift has been done, check if the initial "..." are |
| 17413 ** required. They are required if (a) this is not the first fragment, |
| 17414 ** or (b) this fragment does not begin at position 0 of its column. |
| 17415 */ |
| 17416 if( rc==SQLITE_OK ){ |
| 17417 if( iPos>0 || iFragment>0 ){ |
| 17418 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 17419 }else if( iBegin ){ |
| 17420 rc = fts3StringAppend(pOut, zDoc, iBegin); |
| 17421 } |
| 17422 } |
| 17423 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; |
| 17424 } |
| 17425 |
| 17426 if( iCurrent>=(iPos+nSnippet) ){ |
| 17427 if( isLast ){ |
| 17428 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 17429 } |
| 17430 break; |
| 17431 } |
| 17432 |
| 17433 /* Set isHighlight to true if this term should be highlighted. */ |
| 17434 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; |
| 17435 |
| 17436 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); |
| 17437 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); |
| 17438 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); |
| 17439 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); |
| 17440 |
| 17441 iEnd = iFin; |
| 17442 } |
| 17443 |
| 17444 pMod->xClose(pC); |
| 17445 return rc; |
| 17446 } |
| 17447 |
| 17448 |
| 17449 /* |
| 17450 ** This function is used to count the entries in a column-list (a |
| 17451 ** delta-encoded list of term offsets within a single column of a single |
| 17452 ** row). When this function is called, *ppCollist should point to the |
| 17453 ** beginning of the first varint in the column-list (the varint that |
| 17454 ** contains the position of the first matching term in the column data). |
| 17455 ** Before returning, *ppCollist is set to point to the first byte after |
| 17456 ** the last varint in the column-list (either the 0x00 signifying the end |
| 17457 ** of the position-list, or the 0x01 that precedes the column number of |
| 17458 ** the next column in the position-list). |
| 17459 ** |
| 17460 ** The number of elements in the column-list is returned. |
| 17461 */ |
| 17462 static int fts3ColumnlistCount(char **ppCollist){ |
| 17463 char *pEnd = *ppCollist; |
| 17464 char c = 0; |
| 17465 int nEntry = 0; |
| 17466 |
| 17467 /* A column-list is terminated by either a 0x01 or 0x00. */ |
| 17468 while( 0xFE & (*pEnd | c) ){ |
| 17469 c = *pEnd++ & 0x80; |
| 17470 if( !c ) nEntry++; |
| 17471 } |
| 17472 |
| 17473 *ppCollist = pEnd; |
| 17474 return nEntry; |
| 17475 } |
| 17476 |
| 17477 /* |
| 17478 ** This function gathers 'y' or 'b' data for a single phrase. |
| 17479 */ |
| 17480 static void fts3ExprLHits( |
| 17481 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17482 MatchInfo *p /* Matchinfo context */ |
| 17483 ){ |
| 17484 Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; |
| 17485 int iStart; |
| 17486 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 17487 char *pIter = pPhrase->doclist.pList; |
| 17488 int iCol = 0; |
| 17489 |
| 17490 assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); |
| 17491 if( p->flag==FTS3_MATCHINFO_LHITS ){ |
| 17492 iStart = pExpr->iPhrase * p->nCol; |
| 17493 }else{ |
| 17494 iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); |
| 17495 } |
| 17496 |
| 17497 while( 1 ){ |
| 17498 int nHit = fts3ColumnlistCount(&pIter); |
| 17499 if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ |
| 17500 if( p->flag==FTS3_MATCHINFO_LHITS ){ |
| 17501 p->aMatchinfo[iStart + iCol] = (u32)nHit; |
| 17502 }else if( nHit ){ |
| 17503 p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); |
| 17504 } |
| 17505 } |
| 17506 assert( *pIter==0x00 || *pIter==0x01 ); |
| 17507 if( *pIter!=0x01 ) break; |
| 17508 pIter++; |
| 17509 pIter += fts3GetVarint32(pIter, &iCol); |
| 17510 } |
| 17511 } |
| 17512 |
| 17513 /* |
| 17514 ** Gather the results for matchinfo directives 'y' and 'b'. |
| 17515 */ |
| 17516 static void fts3ExprLHitGather( |
| 17517 Fts3Expr *pExpr, |
| 17518 MatchInfo *p |
| 17519 ){ |
| 17520 assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); |
| 17521 if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ |
| 17522 if( pExpr->pLeft ){ |
| 17523 fts3ExprLHitGather(pExpr->pLeft, p); |
| 17524 fts3ExprLHitGather(pExpr->pRight, p); |
| 17525 }else{ |
| 17526 fts3ExprLHits(pExpr, p); |
| 17527 } |
| 17528 } |
| 17529 } |
| 17530 |
| 17531 /* |
| 17532 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats |
| 17533 ** for a single query. |
| 17534 ** |
| 17535 ** fts3ExprIterate() callback to load the 'global' elements of a |
| 17536 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements |
| 17537 ** of the matchinfo array that are constant for all rows returned by the |
| 17538 ** current query. |
| 17539 ** |
| 17540 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This |
| 17541 ** function populates Matchinfo.aMatchinfo[] as follows: |
| 17542 ** |
| 17543 ** for(iCol=0; iCol<nCol; iCol++){ |
| 17544 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; |
| 17545 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; |
| 17546 ** } |
| 17547 ** |
| 17548 ** where X is the number of matches for phrase iPhrase is column iCol of all |
| 17549 ** rows of the table. Y is the number of rows for which column iCol contains |
| 17550 ** at least one instance of phrase iPhrase. |
| 17551 ** |
| 17552 ** If the phrase pExpr consists entirely of deferred tokens, then all X and |
| 17553 ** Y values are set to nDoc, where nDoc is the number of documents in the |
| 17554 ** file system. This is done because the full-text index doclist is required |
| 17555 ** to calculate these values properly, and the full-text index doclist is |
| 17556 ** not available for deferred tokens. |
| 17557 */ |
| 17558 static int fts3ExprGlobalHitsCb( |
| 17559 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17560 int iPhrase, /* Phrase number (numbered from zero) */ |
| 17561 void *pCtx /* Pointer to MatchInfo structure */ |
| 17562 ){ |
| 17563 MatchInfo *p = (MatchInfo *)pCtx; |
| 17564 return sqlite3Fts3EvalPhraseStats( |
| 17565 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] |
| 17566 ); |
| 17567 } |
| 17568 |
| 17569 /* |
| 17570 ** fts3ExprIterate() callback used to collect the "local" part of the |
| 17571 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the |
| 17572 ** array that are different for each row returned by the query. |
| 17573 */ |
| 17574 static int fts3ExprLocalHitsCb( |
| 17575 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17576 int iPhrase, /* Phrase number */ |
| 17577 void *pCtx /* Pointer to MatchInfo structure */ |
| 17578 ){ |
| 17579 int rc = SQLITE_OK; |
| 17580 MatchInfo *p = (MatchInfo *)pCtx; |
| 17581 int iStart = iPhrase * p->nCol * 3; |
| 17582 int i; |
| 17583 |
| 17584 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ |
| 17585 char *pCsr; |
| 17586 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); |
| 17587 if( pCsr ){ |
| 17588 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); |
| 17589 }else{ |
| 17590 p->aMatchinfo[iStart+i*3] = 0; |
| 17591 } |
| 17592 } |
| 17593 |
| 17594 return rc; |
| 17595 } |
| 17596 |
| 17597 static int fts3MatchinfoCheck( |
| 17598 Fts3Table *pTab, |
| 17599 char cArg, |
| 17600 char **pzErr |
| 17601 ){ |
| 17602 if( (cArg==FTS3_MATCHINFO_NPHRASE) |
| 17603 || (cArg==FTS3_MATCHINFO_NCOL) |
| 17604 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) |
| 17605 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) |
| 17606 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) |
| 17607 || (cArg==FTS3_MATCHINFO_LCS) |
| 17608 || (cArg==FTS3_MATCHINFO_HITS) |
| 17609 || (cArg==FTS3_MATCHINFO_LHITS) |
| 17610 || (cArg==FTS3_MATCHINFO_LHITS_BM) |
| 17611 ){ |
| 17612 return SQLITE_OK; |
| 17613 } |
| 17614 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); |
| 17615 return SQLITE_ERROR; |
| 17616 } |
| 17617 |
| 17618 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ |
| 17619 int nVal; /* Number of integers output by cArg */ |
| 17620 |
| 17621 switch( cArg ){ |
| 17622 case FTS3_MATCHINFO_NDOC: |
| 17623 case FTS3_MATCHINFO_NPHRASE: |
| 17624 case FTS3_MATCHINFO_NCOL: |
| 17625 nVal = 1; |
| 17626 break; |
| 17627 |
| 17628 case FTS3_MATCHINFO_AVGLENGTH: |
| 17629 case FTS3_MATCHINFO_LENGTH: |
| 17630 case FTS3_MATCHINFO_LCS: |
| 17631 nVal = pInfo->nCol; |
| 17632 break; |
| 17633 |
| 17634 case FTS3_MATCHINFO_LHITS: |
| 17635 nVal = pInfo->nCol * pInfo->nPhrase; |
| 17636 break; |
| 17637 |
| 17638 case FTS3_MATCHINFO_LHITS_BM: |
| 17639 nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); |
| 17640 break; |
| 17641 |
| 17642 default: |
| 17643 assert( cArg==FTS3_MATCHINFO_HITS ); |
| 17644 nVal = pInfo->nCol * pInfo->nPhrase * 3; |
| 17645 break; |
| 17646 } |
| 17647 |
| 17648 return nVal; |
| 17649 } |
| 17650 |
| 17651 static int fts3MatchinfoSelectDoctotal( |
| 17652 Fts3Table *pTab, |
| 17653 sqlite3_stmt **ppStmt, |
| 17654 sqlite3_int64 *pnDoc, |
| 17655 const char **paLen |
| 17656 ){ |
| 17657 sqlite3_stmt *pStmt; |
| 17658 const char *a; |
| 17659 sqlite3_int64 nDoc; |
| 17660 |
| 17661 if( !*ppStmt ){ |
| 17662 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); |
| 17663 if( rc!=SQLITE_OK ) return rc; |
| 17664 } |
| 17665 pStmt = *ppStmt; |
| 17666 assert( sqlite3_data_count(pStmt)==1 ); |
| 17667 |
| 17668 a = sqlite3_column_blob(pStmt, 0); |
| 17669 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 17670 if( nDoc==0 ) return FTS_CORRUPT_VTAB; |
| 17671 *pnDoc = (u32)nDoc; |
| 17672 |
| 17673 if( paLen ) *paLen = a; |
| 17674 return SQLITE_OK; |
| 17675 } |
| 17676 |
| 17677 /* |
| 17678 ** An instance of the following structure is used to store state while |
| 17679 ** iterating through a multi-column position-list corresponding to the |
| 17680 ** hits for a single phrase on a single row in order to calculate the |
| 17681 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. |
| 17682 */ |
| 17683 typedef struct LcsIterator LcsIterator; |
| 17684 struct LcsIterator { |
| 17685 Fts3Expr *pExpr; /* Pointer to phrase expression */ |
| 17686 int iPosOffset; /* Tokens count up to end of this phrase */ |
| 17687 char *pRead; /* Cursor used to iterate through aDoclist */ |
| 17688 int iPos; /* Current position */ |
| 17689 }; |
| 17690 |
| 17691 /* |
| 17692 ** If LcsIterator.iCol is set to the following value, the iterator has |
| 17693 ** finished iterating through all offsets for all columns. |
| 17694 */ |
| 17695 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; |
| 17696 |
| 17697 static int fts3MatchinfoLcsCb( |
| 17698 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17699 int iPhrase, /* Phrase number (numbered from zero) */ |
| 17700 void *pCtx /* Pointer to MatchInfo structure */ |
| 17701 ){ |
| 17702 LcsIterator *aIter = (LcsIterator *)pCtx; |
| 17703 aIter[iPhrase].pExpr = pExpr; |
| 17704 return SQLITE_OK; |
| 17705 } |
| 17706 |
| 17707 /* |
| 17708 ** Advance the iterator passed as an argument to the next position. Return |
| 17709 ** 1 if the iterator is at EOF or if it now points to the start of the |
| 17710 ** position list for the next column. |
| 17711 */ |
| 17712 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ |
| 17713 char *pRead = pIter->pRead; |
| 17714 sqlite3_int64 iRead; |
| 17715 int rc = 0; |
| 17716 |
| 17717 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 17718 if( iRead==0 || iRead==1 ){ |
| 17719 pRead = 0; |
| 17720 rc = 1; |
| 17721 }else{ |
| 17722 pIter->iPos += (int)(iRead-2); |
| 17723 } |
| 17724 |
| 17725 pIter->pRead = pRead; |
| 17726 return rc; |
| 17727 } |
| 17728 |
| 17729 /* |
| 17730 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. |
| 17731 ** |
| 17732 ** If the call is successful, the longest-common-substring lengths for each |
| 17733 ** column are written into the first nCol elements of the pInfo->aMatchinfo[] |
| 17734 ** array before returning. SQLITE_OK is returned in this case. |
| 17735 ** |
| 17736 ** Otherwise, if an error occurs, an SQLite error code is returned and the |
| 17737 ** data written to the first nCol elements of pInfo->aMatchinfo[] is |
| 17738 ** undefined. |
| 17739 */ |
| 17740 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ |
| 17741 LcsIterator *aIter; |
| 17742 int i; |
| 17743 int iCol; |
| 17744 int nToken = 0; |
| 17745 |
| 17746 /* Allocate and populate the array of LcsIterator objects. The array |
| 17747 ** contains one element for each matchable phrase in the query. |
| 17748 **/ |
| 17749 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); |
| 17750 if( !aIter ) return SQLITE_NOMEM; |
| 17751 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); |
| 17752 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); |
| 17753 |
| 17754 for(i=0; i<pInfo->nPhrase; i++){ |
| 17755 LcsIterator *pIter = &aIter[i]; |
| 17756 nToken -= pIter->pExpr->pPhrase->nToken; |
| 17757 pIter->iPosOffset = nToken; |
| 17758 } |
| 17759 |
| 17760 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17761 int nLcs = 0; /* LCS value for this column */ |
| 17762 int nLive = 0; /* Number of iterators in aIter not at EOF */ |
| 17763 |
| 17764 for(i=0; i<pInfo->nPhrase; i++){ |
| 17765 int rc; |
| 17766 LcsIterator *pIt = &aIter[i]; |
| 17767 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); |
| 17768 if( rc!=SQLITE_OK ) return rc; |
| 17769 if( pIt->pRead ){ |
| 17770 pIt->iPos = pIt->iPosOffset; |
| 17771 fts3LcsIteratorAdvance(&aIter[i]); |
| 17772 nLive++; |
| 17773 } |
| 17774 } |
| 17775 |
| 17776 while( nLive>0 ){ |
| 17777 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ |
| 17778 int nThisLcs = 0; /* LCS for the current iterator positions */ |
| 17779 |
| 17780 for(i=0; i<pInfo->nPhrase; i++){ |
| 17781 LcsIterator *pIter = &aIter[i]; |
| 17782 if( pIter->pRead==0 ){ |
| 17783 /* This iterator is already at EOF for this column. */ |
| 17784 nThisLcs = 0; |
| 17785 }else{ |
| 17786 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ |
| 17787 pAdv = pIter; |
| 17788 } |
| 17789 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
| 17790 nThisLcs++; |
| 17791 }else{ |
| 17792 nThisLcs = 1; |
| 17793 } |
| 17794 if( nThisLcs>nLcs ) nLcs = nThisLcs; |
| 17795 } |
| 17796 } |
| 17797 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; |
| 17798 } |
| 17799 |
| 17800 pInfo->aMatchinfo[iCol] = nLcs; |
| 17801 } |
| 17802 |
| 17803 sqlite3_free(aIter); |
| 17804 return SQLITE_OK; |
| 17805 } |
| 17806 |
| 17807 /* |
| 17808 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to |
| 17809 ** be returned by the matchinfo() function. Argument zArg contains the |
| 17810 ** format string passed as the second argument to matchinfo (or the |
| 17811 ** default value "pcx" if no second argument was specified). The format |
| 17812 ** string has already been validated and the pInfo->aMatchinfo[] array |
| 17813 ** is guaranteed to be large enough for the output. |
| 17814 ** |
| 17815 ** If bGlobal is true, then populate all fields of the matchinfo() output. |
| 17816 ** If it is false, then assume that those fields that do not change between |
| 17817 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) |
| 17818 ** have already been populated. |
| 17819 ** |
| 17820 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 17821 ** occurs. If a value other than SQLITE_OK is returned, the state the |
| 17822 ** pInfo->aMatchinfo[] buffer is left in is undefined. |
| 17823 */ |
| 17824 static int fts3MatchinfoValues( |
| 17825 Fts3Cursor *pCsr, /* FTS3 cursor object */ |
| 17826 int bGlobal, /* True to grab the global stats */ |
| 17827 MatchInfo *pInfo, /* Matchinfo context object */ |
| 17828 const char *zArg /* Matchinfo format string */ |
| 17829 ){ |
| 17830 int rc = SQLITE_OK; |
| 17831 int i; |
| 17832 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17833 sqlite3_stmt *pSelect = 0; |
| 17834 |
| 17835 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ |
| 17836 pInfo->flag = zArg[i]; |
| 17837 switch( zArg[i] ){ |
| 17838 case FTS3_MATCHINFO_NPHRASE: |
| 17839 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; |
| 17840 break; |
| 17841 |
| 17842 case FTS3_MATCHINFO_NCOL: |
| 17843 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
| 17844 break; |
| 17845 |
| 17846 case FTS3_MATCHINFO_NDOC: |
| 17847 if( bGlobal ){ |
| 17848 sqlite3_int64 nDoc = 0; |
| 17849 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); |
| 17850 pInfo->aMatchinfo[0] = (u32)nDoc; |
| 17851 } |
| 17852 break; |
| 17853 |
| 17854 case FTS3_MATCHINFO_AVGLENGTH: |
| 17855 if( bGlobal ){ |
| 17856 sqlite3_int64 nDoc; /* Number of rows in table */ |
| 17857 const char *a; /* Aggregate column length array */ |
| 17858 |
| 17859 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); |
| 17860 if( rc==SQLITE_OK ){ |
| 17861 int iCol; |
| 17862 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17863 u32 iVal; |
| 17864 sqlite3_int64 nToken; |
| 17865 a += sqlite3Fts3GetVarint(a, &nToken); |
| 17866 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); |
| 17867 pInfo->aMatchinfo[iCol] = iVal; |
| 17868 } |
| 17869 } |
| 17870 } |
| 17871 break; |
| 17872 |
| 17873 case FTS3_MATCHINFO_LENGTH: { |
| 17874 sqlite3_stmt *pSelectDocsize = 0; |
| 17875 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); |
| 17876 if( rc==SQLITE_OK ){ |
| 17877 int iCol; |
| 17878 const char *a = sqlite3_column_blob(pSelectDocsize, 0); |
| 17879 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17880 sqlite3_int64 nToken; |
| 17881 a += sqlite3Fts3GetVarint(a, &nToken); |
| 17882 pInfo->aMatchinfo[iCol] = (u32)nToken; |
| 17883 } |
| 17884 } |
| 17885 sqlite3_reset(pSelectDocsize); |
| 17886 break; |
| 17887 } |
| 17888 |
| 17889 case FTS3_MATCHINFO_LCS: |
| 17890 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 17891 if( rc==SQLITE_OK ){ |
| 17892 rc = fts3MatchinfoLcs(pCsr, pInfo); |
| 17893 } |
| 17894 break; |
| 17895 |
| 17896 case FTS3_MATCHINFO_LHITS_BM: |
| 17897 case FTS3_MATCHINFO_LHITS: { |
| 17898 int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); |
| 17899 memset(pInfo->aMatchinfo, 0, nZero); |
| 17900 fts3ExprLHitGather(pCsr->pExpr, pInfo); |
| 17901 break; |
| 17902 } |
| 17903 |
| 17904 default: { |
| 17905 Fts3Expr *pExpr; |
| 17906 assert( zArg[i]==FTS3_MATCHINFO_HITS ); |
| 17907 pExpr = pCsr->pExpr; |
| 17908 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 17909 if( rc!=SQLITE_OK ) break; |
| 17910 if( bGlobal ){ |
| 17911 if( pCsr->pDeferred ){ |
| 17912 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); |
| 17913 if( rc!=SQLITE_OK ) break; |
| 17914 } |
| 17915 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); |
| 17916 sqlite3Fts3EvalTestDeferred(pCsr, &rc); |
| 17917 if( rc!=SQLITE_OK ) break; |
| 17918 } |
| 17919 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); |
| 17920 break; |
| 17921 } |
| 17922 } |
| 17923 |
| 17924 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); |
| 17925 } |
| 17926 |
| 17927 sqlite3_reset(pSelect); |
| 17928 return rc; |
| 17929 } |
| 17930 |
| 17931 |
| 17932 /* |
| 17933 ** Populate pCsr->aMatchinfo[] with data for the current row. The |
| 17934 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). |
| 17935 */ |
| 17936 static void fts3GetMatchinfo( |
| 17937 sqlite3_context *pCtx, /* Return results here */ |
| 17938 Fts3Cursor *pCsr, /* FTS3 Cursor object */ |
| 17939 const char *zArg /* Second argument to matchinfo() function */ |
| 17940 ){ |
| 17941 MatchInfo sInfo; |
| 17942 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17943 int rc = SQLITE_OK; |
| 17944 int bGlobal = 0; /* Collect 'global' stats as well as local */ |
| 17945 |
| 17946 u32 *aOut = 0; |
| 17947 void (*xDestroyOut)(void*) = 0; |
| 17948 |
| 17949 memset(&sInfo, 0, sizeof(MatchInfo)); |
| 17950 sInfo.pCursor = pCsr; |
| 17951 sInfo.nCol = pTab->nColumn; |
| 17952 |
| 17953 /* If there is cached matchinfo() data, but the format string for the |
| 17954 ** cache does not match the format string for this request, discard |
| 17955 ** the cached data. */ |
| 17956 if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ |
| 17957 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 17958 pCsr->pMIBuffer = 0; |
| 17959 } |
| 17960 |
| 17961 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the |
| 17962 ** matchinfo function has been called for this query. In this case |
| 17963 ** allocate the array used to accumulate the matchinfo data and |
| 17964 ** initialize those elements that are constant for every row. |
| 17965 */ |
| 17966 if( pCsr->pMIBuffer==0 ){ |
| 17967 int nMatchinfo = 0; /* Number of u32 elements in match-info */ |
| 17968 int i; /* Used to iterate through zArg */ |
| 17969 |
| 17970 /* Determine the number of phrases in the query */ |
| 17971 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); |
| 17972 sInfo.nPhrase = pCsr->nPhrase; |
| 17973 |
| 17974 /* Determine the number of integers in the buffer returned by this call. */ |
| 17975 for(i=0; zArg[i]; i++){ |
| 17976 char *zErr = 0; |
| 17977 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ |
| 17978 sqlite3_result_error(pCtx, zErr, -1); |
| 17979 sqlite3_free(zErr); |
| 17980 return; |
| 17981 } |
| 17982 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); |
| 17983 } |
| 17984 |
| 17985 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ |
| 17986 pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); |
| 17987 if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; |
| 17988 |
| 17989 pCsr->isMatchinfoNeeded = 1; |
| 17990 bGlobal = 1; |
| 17991 } |
| 17992 |
| 17993 if( rc==SQLITE_OK ){ |
| 17994 xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); |
| 17995 if( xDestroyOut==0 ){ |
| 17996 rc = SQLITE_NOMEM; |
| 17997 } |
| 17998 } |
| 17999 |
| 18000 if( rc==SQLITE_OK ){ |
| 18001 sInfo.aMatchinfo = aOut; |
| 18002 sInfo.nPhrase = pCsr->nPhrase; |
| 18003 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); |
| 18004 if( bGlobal ){ |
| 18005 fts3MIBufferSetGlobal(pCsr->pMIBuffer); |
| 18006 } |
| 18007 } |
| 18008 |
| 18009 if( rc!=SQLITE_OK ){ |
| 18010 sqlite3_result_error_code(pCtx, rc); |
| 18011 if( xDestroyOut ) xDestroyOut(aOut); |
| 18012 }else{ |
| 18013 int n = pCsr->pMIBuffer->nElem * sizeof(u32); |
| 18014 sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); |
| 18015 } |
| 18016 } |
| 18017 |
| 18018 /* |
| 18019 ** Implementation of snippet() function. |
| 18020 */ |
| 18021 SQLITE_PRIVATE void sqlite3Fts3Snippet( |
| 18022 sqlite3_context *pCtx, /* SQLite function call context */ |
| 18023 Fts3Cursor *pCsr, /* Cursor object */ |
| 18024 const char *zStart, /* Snippet start text - "<b>" */ |
| 18025 const char *zEnd, /* Snippet end text - "</b>" */ |
| 18026 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ |
| 18027 int iCol, /* Extract snippet from this column */ |
| 18028 int nToken /* Approximate number of tokens in snippet */ |
| 18029 ){ |
| 18030 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18031 int rc = SQLITE_OK; |
| 18032 int i; |
| 18033 StrBuffer res = {0, 0, 0}; |
| 18034 |
| 18035 /* The returned text includes up to four fragments of text extracted from |
| 18036 ** the data in the current row. The first iteration of the for(...) loop |
| 18037 ** below attempts to locate a single fragment of text nToken tokens in |
| 18038 ** size that contains at least one instance of all phrases in the query |
| 18039 ** expression that appear in the current row. If such a fragment of text |
| 18040 ** cannot be found, the second iteration of the loop attempts to locate |
| 18041 ** a pair of fragments, and so on. |
| 18042 */ |
| 18043 int nSnippet = 0; /* Number of fragments in this snippet */ |
| 18044 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ |
| 18045 int nFToken = -1; /* Number of tokens in each fragment */ |
| 18046 |
| 18047 if( !pCsr->pExpr ){ |
| 18048 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 18049 return; |
| 18050 } |
| 18051 |
| 18052 for(nSnippet=1; 1; nSnippet++){ |
| 18053 |
| 18054 int iSnip; /* Loop counter 0..nSnippet-1 */ |
| 18055 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ |
| 18056 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ |
| 18057 |
| 18058 if( nToken>=0 ){ |
| 18059 nFToken = (nToken+nSnippet-1) / nSnippet; |
| 18060 }else{ |
| 18061 nFToken = -1 * nToken; |
| 18062 } |
| 18063 |
| 18064 for(iSnip=0; iSnip<nSnippet; iSnip++){ |
| 18065 int iBestScore = -1; /* Best score of columns checked so far */ |
| 18066 int iRead; /* Used to iterate through columns */ |
| 18067 SnippetFragment *pFragment = &aSnippet[iSnip]; |
| 18068 |
| 18069 memset(pFragment, 0, sizeof(*pFragment)); |
| 18070 |
| 18071 /* Loop through all columns of the table being considered for snippets. |
| 18072 ** If the iCol argument to this function was negative, this means all |
| 18073 ** columns of the FTS3 table. Otherwise, only column iCol is considered. |
| 18074 */ |
| 18075 for(iRead=0; iRead<pTab->nColumn; iRead++){ |
| 18076 SnippetFragment sF = {0, 0, 0, 0}; |
| 18077 int iS = 0; |
| 18078 if( iCol>=0 && iRead!=iCol ) continue; |
| 18079 |
| 18080 /* Find the best snippet of nFToken tokens in column iRead. */ |
| 18081 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); |
| 18082 if( rc!=SQLITE_OK ){ |
| 18083 goto snippet_out; |
| 18084 } |
| 18085 if( iS>iBestScore ){ |
| 18086 *pFragment = sF; |
| 18087 iBestScore = iS; |
| 18088 } |
| 18089 } |
| 18090 |
| 18091 mCovered |= pFragment->covered; |
| 18092 } |
| 18093 |
| 18094 /* If all query phrases seen by fts3BestSnippet() are present in at least |
| 18095 ** one of the nSnippet snippet fragments, break out of the loop. |
| 18096 */ |
| 18097 assert( (mCovered&mSeen)==mCovered ); |
| 18098 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; |
| 18099 } |
| 18100 |
| 18101 assert( nFToken>0 ); |
| 18102 |
| 18103 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ |
| 18104 rc = fts3SnippetText(pCsr, &aSnippet[i], |
| 18105 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res |
| 18106 ); |
| 18107 } |
| 18108 |
| 18109 snippet_out: |
| 18110 sqlite3Fts3SegmentsClose(pTab); |
| 18111 if( rc!=SQLITE_OK ){ |
| 18112 sqlite3_result_error_code(pCtx, rc); |
| 18113 sqlite3_free(res.z); |
| 18114 }else{ |
| 18115 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); |
| 18116 } |
| 18117 } |
| 18118 |
| 18119 |
| 18120 typedef struct TermOffset TermOffset; |
| 18121 typedef struct TermOffsetCtx TermOffsetCtx; |
| 18122 |
| 18123 struct TermOffset { |
| 18124 char *pList; /* Position-list */ |
| 18125 int iPos; /* Position just read from pList */ |
| 18126 int iOff; /* Offset of this term from read positions */ |
| 18127 }; |
| 18128 |
| 18129 struct TermOffsetCtx { |
| 18130 Fts3Cursor *pCsr; |
| 18131 int iCol; /* Column of table to populate aTerm for */ |
| 18132 int iTerm; |
| 18133 sqlite3_int64 iDocid; |
| 18134 TermOffset *aTerm; |
| 18135 }; |
| 18136 |
| 18137 /* |
| 18138 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). |
| 18139 */ |
| 18140 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 18141 TermOffsetCtx *p = (TermOffsetCtx *)ctx; |
| 18142 int nTerm; /* Number of tokens in phrase */ |
| 18143 int iTerm; /* For looping through nTerm phrase terms */ |
| 18144 char *pList; /* Pointer to position list for phrase */ |
| 18145 int iPos = 0; /* First position in position-list */ |
| 18146 int rc; |
| 18147 |
| 18148 UNUSED_PARAMETER(iPhrase); |
| 18149 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); |
| 18150 nTerm = pExpr->pPhrase->nToken; |
| 18151 if( pList ){ |
| 18152 fts3GetDeltaPosition(&pList, &iPos); |
| 18153 assert( iPos>=0 ); |
| 18154 } |
| 18155 |
| 18156 for(iTerm=0; iTerm<nTerm; iTerm++){ |
| 18157 TermOffset *pT = &p->aTerm[p->iTerm++]; |
| 18158 pT->iOff = nTerm-iTerm-1; |
| 18159 pT->pList = pList; |
| 18160 pT->iPos = iPos; |
| 18161 } |
| 18162 |
| 18163 return rc; |
| 18164 } |
| 18165 |
| 18166 /* |
| 18167 ** Implementation of offsets() function. |
| 18168 */ |
| 18169 SQLITE_PRIVATE void sqlite3Fts3Offsets( |
| 18170 sqlite3_context *pCtx, /* SQLite function call context */ |
| 18171 Fts3Cursor *pCsr /* Cursor object */ |
| 18172 ){ |
| 18173 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18174 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; |
| 18175 int rc; /* Return Code */ |
| 18176 int nToken; /* Number of tokens in query */ |
| 18177 int iCol; /* Column currently being processed */ |
| 18178 StrBuffer res = {0, 0, 0}; /* Result string */ |
| 18179 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ |
| 18180 |
| 18181 if( !pCsr->pExpr ){ |
| 18182 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 18183 return; |
| 18184 } |
| 18185 |
| 18186 memset(&sCtx, 0, sizeof(sCtx)); |
| 18187 assert( pCsr->isRequireSeek==0 ); |
| 18188 |
| 18189 /* Count the number of terms in the query */ |
| 18190 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); |
| 18191 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18192 |
| 18193 /* Allocate the array of TermOffset iterators. */ |
| 18194 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); |
| 18195 if( 0==sCtx.aTerm ){ |
| 18196 rc = SQLITE_NOMEM; |
| 18197 goto offsets_out; |
| 18198 } |
| 18199 sCtx.iDocid = pCsr->iPrevId; |
| 18200 sCtx.pCsr = pCsr; |
| 18201 |
| 18202 /* Loop through the table columns, appending offset information to |
| 18203 ** string-buffer res for each column. |
| 18204 */ |
| 18205 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 18206 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ |
| 18207 const char *ZDUMMY; /* Dummy argument used with xNext() */ |
| 18208 int NDUMMY = 0; /* Dummy argument used with xNext() */ |
| 18209 int iStart = 0; |
| 18210 int iEnd = 0; |
| 18211 int iCurrent = 0; |
| 18212 const char *zDoc; |
| 18213 int nDoc; |
| 18214 |
| 18215 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is |
| 18216 ** no way that this operation can fail, so the return code from |
| 18217 ** fts3ExprIterate() can be discarded. |
| 18218 */ |
| 18219 sCtx.iCol = iCol; |
| 18220 sCtx.iTerm = 0; |
| 18221 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); |
| 18222 |
| 18223 /* Retreive the text stored in column iCol. If an SQL NULL is stored |
| 18224 ** in column iCol, jump immediately to the next iteration of the loop. |
| 18225 ** If an OOM occurs while retrieving the data (this can happen if SQLite |
| 18226 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM |
| 18227 ** to the caller. |
| 18228 */ |
| 18229 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); |
| 18230 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
| 18231 if( zDoc==0 ){ |
| 18232 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ |
| 18233 continue; |
| 18234 } |
| 18235 rc = SQLITE_NOMEM; |
| 18236 goto offsets_out; |
| 18237 } |
| 18238 |
| 18239 /* Initialize a tokenizer iterator to iterate through column iCol. */ |
| 18240 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, |
| 18241 zDoc, nDoc, &pC |
| 18242 ); |
| 18243 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18244 |
| 18245 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 18246 while( rc==SQLITE_OK ){ |
| 18247 int i; /* Used to loop through terms */ |
| 18248 int iMinPos = 0x7FFFFFFF; /* Position of next token */ |
| 18249 TermOffset *pTerm = 0; /* TermOffset associated with next token */ |
| 18250 |
| 18251 for(i=0; i<nToken; i++){ |
| 18252 TermOffset *pT = &sCtx.aTerm[i]; |
| 18253 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ |
| 18254 iMinPos = pT->iPos-pT->iOff; |
| 18255 pTerm = pT; |
| 18256 } |
| 18257 } |
| 18258 |
| 18259 if( !pTerm ){ |
| 18260 /* All offsets for this column have been gathered. */ |
| 18261 rc = SQLITE_DONE; |
| 18262 }else{ |
| 18263 assert( iCurrent<=iMinPos ); |
| 18264 if( 0==(0xFE&*pTerm->pList) ){ |
| 18265 pTerm->pList = 0; |
| 18266 }else{ |
| 18267 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); |
| 18268 } |
| 18269 while( rc==SQLITE_OK && iCurrent<iMinPos ){ |
| 18270 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 18271 } |
| 18272 if( rc==SQLITE_OK ){ |
| 18273 char aBuffer[64]; |
| 18274 sqlite3_snprintf(sizeof(aBuffer), aBuffer, |
| 18275 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart |
| 18276 ); |
| 18277 rc = fts3StringAppend(&res, aBuffer, -1); |
| 18278 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ |
| 18279 rc = FTS_CORRUPT_VTAB; |
| 18280 } |
| 18281 } |
| 18282 } |
| 18283 if( rc==SQLITE_DONE ){ |
| 18284 rc = SQLITE_OK; |
| 18285 } |
| 18286 |
| 18287 pMod->xClose(pC); |
| 18288 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18289 } |
| 18290 |
| 18291 offsets_out: |
| 18292 sqlite3_free(sCtx.aTerm); |
| 18293 assert( rc!=SQLITE_DONE ); |
| 18294 sqlite3Fts3SegmentsClose(pTab); |
| 18295 if( rc!=SQLITE_OK ){ |
| 18296 sqlite3_result_error_code(pCtx, rc); |
| 18297 sqlite3_free(res.z); |
| 18298 }else{ |
| 18299 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); |
| 18300 } |
| 18301 return; |
| 18302 } |
| 18303 |
| 18304 /* |
| 18305 ** Implementation of matchinfo() function. |
| 18306 */ |
| 18307 SQLITE_PRIVATE void sqlite3Fts3Matchinfo( |
| 18308 sqlite3_context *pContext, /* Function call context */ |
| 18309 Fts3Cursor *pCsr, /* FTS3 table cursor */ |
| 18310 const char *zArg /* Second arg to matchinfo() function */ |
| 18311 ){ |
| 18312 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18313 const char *zFormat; |
| 18314 |
| 18315 if( zArg ){ |
| 18316 zFormat = zArg; |
| 18317 }else{ |
| 18318 zFormat = FTS3_MATCHINFO_DEFAULT; |
| 18319 } |
| 18320 |
| 18321 if( !pCsr->pExpr ){ |
| 18322 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); |
| 18323 return; |
| 18324 }else{ |
| 18325 /* Retrieve matchinfo() data. */ |
| 18326 fts3GetMatchinfo(pContext, pCsr, zFormat); |
| 18327 sqlite3Fts3SegmentsClose(pTab); |
| 18328 } |
| 18329 } |
| 18330 |
| 18331 #endif |
| 18332 |
| 18333 /************** End of fts3_snippet.c ****************************************/ |
| 18334 /************** Begin file fts3_unicode.c ************************************/ |
| 18335 /* |
| 18336 ** 2012 May 24 |
| 18337 ** |
| 18338 ** The author disclaims copyright to this source code. In place of |
| 18339 ** a legal notice, here is a blessing: |
| 18340 ** |
| 18341 ** May you do good and not evil. |
| 18342 ** May you find forgiveness for yourself and forgive others. |
| 18343 ** May you share freely, never taking more than you give. |
| 18344 ** |
| 18345 ****************************************************************************** |
| 18346 ** |
| 18347 ** Implementation of the "unicode" full-text-search tokenizer. |
| 18348 */ |
| 18349 |
| 18350 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 18351 |
| 18352 /* #include "fts3Int.h" */ |
| 18353 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 18354 |
| 18355 /* #include <assert.h> */ |
| 18356 /* #include <stdlib.h> */ |
| 18357 /* #include <stdio.h> */ |
| 18358 /* #include <string.h> */ |
| 18359 |
| 18360 /* #include "fts3_tokenizer.h" */ |
| 18361 |
| 18362 /* |
| 18363 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied |
| 18364 ** from the sqlite3 source file utf.c. If this file is compiled as part |
| 18365 ** of the amalgamation, they are not required. |
| 18366 */ |
| 18367 #ifndef SQLITE_AMALGAMATION |
| 18368 |
| 18369 static const unsigned char sqlite3Utf8Trans1[] = { |
| 18370 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18371 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18372 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 18373 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
| 18374 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18375 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18376 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18377 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
| 18378 }; |
| 18379 |
| 18380 #define READ_UTF8(zIn, zTerm, c) \ |
| 18381 c = *(zIn++); \ |
| 18382 if( c>=0xc0 ){ \ |
| 18383 c = sqlite3Utf8Trans1[c-0xc0]; \ |
| 18384 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ |
| 18385 c = (c<<6) + (0x3f & *(zIn++)); \ |
| 18386 } \ |
| 18387 if( c<0x80 \ |
| 18388 || (c&0xFFFFF800)==0xD800 \ |
| 18389 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ |
| 18390 } |
| 18391 |
| 18392 #define WRITE_UTF8(zOut, c) { \ |
| 18393 if( c<0x00080 ){ \ |
| 18394 *zOut++ = (u8)(c&0xFF); \ |
| 18395 } \ |
| 18396 else if( c<0x00800 ){ \ |
| 18397 *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ |
| 18398 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18399 } \ |
| 18400 else if( c<0x10000 ){ \ |
| 18401 *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ |
| 18402 *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
| 18403 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18404 }else{ \ |
| 18405 *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ |
| 18406 *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ |
| 18407 *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
| 18408 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18409 } \ |
| 18410 } |
| 18411 |
| 18412 #endif /* ifndef SQLITE_AMALGAMATION */ |
| 18413 |
| 18414 typedef struct unicode_tokenizer unicode_tokenizer; |
| 18415 typedef struct unicode_cursor unicode_cursor; |
| 18416 |
| 18417 struct unicode_tokenizer { |
| 18418 sqlite3_tokenizer base; |
| 18419 int bRemoveDiacritic; |
| 18420 int nException; |
| 18421 int *aiException; |
| 18422 }; |
| 18423 |
| 18424 struct unicode_cursor { |
| 18425 sqlite3_tokenizer_cursor base; |
| 18426 const unsigned char *aInput; /* Input text being tokenized */ |
| 18427 int nInput; /* Size of aInput[] in bytes */ |
| 18428 int iOff; /* Current offset within aInput[] */ |
| 18429 int iToken; /* Index of next token to be returned */ |
| 18430 char *zToken; /* storage for current token */ |
| 18431 int nAlloc; /* space allocated at zToken */ |
| 18432 }; |
| 18433 |
| 18434 |
| 18435 /* |
| 18436 ** Destroy a tokenizer allocated by unicodeCreate(). |
| 18437 */ |
| 18438 static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ |
| 18439 if( pTokenizer ){ |
| 18440 unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; |
| 18441 sqlite3_free(p->aiException); |
| 18442 sqlite3_free(p); |
| 18443 } |
| 18444 return SQLITE_OK; |
| 18445 } |
| 18446 |
| 18447 /* |
| 18448 ** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE |
| 18449 ** statement has specified that the tokenizer for this table shall consider |
| 18450 ** all characters in string zIn/nIn to be separators (if bAlnum==0) or |
| 18451 ** token characters (if bAlnum==1). |
| 18452 ** |
| 18453 ** For each codepoint in the zIn/nIn string, this function checks if the |
| 18454 ** sqlite3FtsUnicodeIsalnum() function already returns the desired result. |
| 18455 ** If so, no action is taken. Otherwise, the codepoint is added to the |
| 18456 ** unicode_tokenizer.aiException[] array. For the purposes of tokenization, |
| 18457 ** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all |
| 18458 ** codepoints in the aiException[] array. |
| 18459 ** |
| 18460 ** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() |
| 18461 ** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. |
| 18462 ** It is not possible to change the behavior of the tokenizer with respect |
| 18463 ** to these codepoints. |
| 18464 */ |
| 18465 static int unicodeAddExceptions( |
| 18466 unicode_tokenizer *p, /* Tokenizer to add exceptions to */ |
| 18467 int bAlnum, /* Replace Isalnum() return value with this */ |
| 18468 const char *zIn, /* Array of characters to make exceptions */ |
| 18469 int nIn /* Length of z in bytes */ |
| 18470 ){ |
| 18471 const unsigned char *z = (const unsigned char *)zIn; |
| 18472 const unsigned char *zTerm = &z[nIn]; |
| 18473 int iCode; |
| 18474 int nEntry = 0; |
| 18475 |
| 18476 assert( bAlnum==0 || bAlnum==1 ); |
| 18477 |
| 18478 while( z<zTerm ){ |
| 18479 READ_UTF8(z, zTerm, iCode); |
| 18480 assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); |
| 18481 if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum |
| 18482 && sqlite3FtsUnicodeIsdiacritic(iCode)==0 |
| 18483 ){ |
| 18484 nEntry++; |
| 18485 } |
| 18486 } |
| 18487 |
| 18488 if( nEntry ){ |
| 18489 int *aNew; /* New aiException[] array */ |
| 18490 int nNew; /* Number of valid entries in array aNew[] */ |
| 18491 |
| 18492 aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int)); |
| 18493 if( aNew==0 ) return SQLITE_NOMEM; |
| 18494 nNew = p->nException; |
| 18495 |
| 18496 z = (const unsigned char *)zIn; |
| 18497 while( z<zTerm ){ |
| 18498 READ_UTF8(z, zTerm, iCode); |
| 18499 if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum |
| 18500 && sqlite3FtsUnicodeIsdiacritic(iCode)==0 |
| 18501 ){ |
| 18502 int i, j; |
| 18503 for(i=0; i<nNew && aNew[i]<iCode; i++); |
| 18504 for(j=nNew; j>i; j--) aNew[j] = aNew[j-1]; |
| 18505 aNew[i] = iCode; |
| 18506 nNew++; |
| 18507 } |
| 18508 } |
| 18509 p->aiException = aNew; |
| 18510 p->nException = nNew; |
| 18511 } |
| 18512 |
| 18513 return SQLITE_OK; |
| 18514 } |
| 18515 |
| 18516 /* |
| 18517 ** Return true if the p->aiException[] array contains the value iCode. |
| 18518 */ |
| 18519 static int unicodeIsException(unicode_tokenizer *p, int iCode){ |
| 18520 if( p->nException>0 ){ |
| 18521 int *a = p->aiException; |
| 18522 int iLo = 0; |
| 18523 int iHi = p->nException-1; |
| 18524 |
| 18525 while( iHi>=iLo ){ |
| 18526 int iTest = (iHi + iLo) / 2; |
| 18527 if( iCode==a[iTest] ){ |
| 18528 return 1; |
| 18529 }else if( iCode>a[iTest] ){ |
| 18530 iLo = iTest+1; |
| 18531 }else{ |
| 18532 iHi = iTest-1; |
| 18533 } |
| 18534 } |
| 18535 } |
| 18536 |
| 18537 return 0; |
| 18538 } |
| 18539 |
| 18540 /* |
| 18541 ** Return true if, for the purposes of tokenization, codepoint iCode is |
| 18542 ** considered a token character (not a separator). |
| 18543 */ |
| 18544 static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ |
| 18545 assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); |
| 18546 return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); |
| 18547 } |
| 18548 |
| 18549 /* |
| 18550 ** Create a new tokenizer instance. |
| 18551 */ |
| 18552 static int unicodeCreate( |
| 18553 int nArg, /* Size of array argv[] */ |
| 18554 const char * const *azArg, /* Tokenizer creation arguments */ |
| 18555 sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ |
| 18556 ){ |
| 18557 unicode_tokenizer *pNew; /* New tokenizer object */ |
| 18558 int i; |
| 18559 int rc = SQLITE_OK; |
| 18560 |
| 18561 pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); |
| 18562 if( pNew==NULL ) return SQLITE_NOMEM; |
| 18563 memset(pNew, 0, sizeof(unicode_tokenizer)); |
| 18564 pNew->bRemoveDiacritic = 1; |
| 18565 |
| 18566 for(i=0; rc==SQLITE_OK && i<nArg; i++){ |
| 18567 const char *z = azArg[i]; |
| 18568 int n = (int)strlen(z); |
| 18569 |
| 18570 if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){ |
| 18571 pNew->bRemoveDiacritic = 1; |
| 18572 } |
| 18573 else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ |
| 18574 pNew->bRemoveDiacritic = 0; |
| 18575 } |
| 18576 else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ |
| 18577 rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); |
| 18578 } |
| 18579 else if( n>=11 && memcmp("separators=", z, 11)==0 ){ |
| 18580 rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); |
| 18581 } |
| 18582 else{ |
| 18583 /* Unrecognized argument */ |
| 18584 rc = SQLITE_ERROR; |
| 18585 } |
| 18586 } |
| 18587 |
| 18588 if( rc!=SQLITE_OK ){ |
| 18589 unicodeDestroy((sqlite3_tokenizer *)pNew); |
| 18590 pNew = 0; |
| 18591 } |
| 18592 *pp = (sqlite3_tokenizer *)pNew; |
| 18593 return rc; |
| 18594 } |
| 18595 |
| 18596 /* |
| 18597 ** Prepare to begin tokenizing a particular string. The input |
| 18598 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 18599 ** used to incrementally tokenize this string is returned in |
| 18600 ** *ppCursor. |
| 18601 */ |
| 18602 static int unicodeOpen( |
| 18603 sqlite3_tokenizer *p, /* The tokenizer */ |
| 18604 const char *aInput, /* Input string */ |
| 18605 int nInput, /* Size of string aInput in bytes */ |
| 18606 sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ |
| 18607 ){ |
| 18608 unicode_cursor *pCsr; |
| 18609 |
| 18610 pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); |
| 18611 if( pCsr==0 ){ |
| 18612 return SQLITE_NOMEM; |
| 18613 } |
| 18614 memset(pCsr, 0, sizeof(unicode_cursor)); |
| 18615 |
| 18616 pCsr->aInput = (const unsigned char *)aInput; |
| 18617 if( aInput==0 ){ |
| 18618 pCsr->nInput = 0; |
| 18619 }else if( nInput<0 ){ |
| 18620 pCsr->nInput = (int)strlen(aInput); |
| 18621 }else{ |
| 18622 pCsr->nInput = nInput; |
| 18623 } |
| 18624 |
| 18625 *pp = &pCsr->base; |
| 18626 UNUSED_PARAMETER(p); |
| 18627 return SQLITE_OK; |
| 18628 } |
| 18629 |
| 18630 /* |
| 18631 ** Close a tokenization cursor previously opened by a call to |
| 18632 ** simpleOpen() above. |
| 18633 */ |
| 18634 static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ |
| 18635 unicode_cursor *pCsr = (unicode_cursor *) pCursor; |
| 18636 sqlite3_free(pCsr->zToken); |
| 18637 sqlite3_free(pCsr); |
| 18638 return SQLITE_OK; |
| 18639 } |
| 18640 |
| 18641 /* |
| 18642 ** Extract the next token from a tokenization cursor. The cursor must |
| 18643 ** have been opened by a prior call to simpleOpen(). |
| 18644 */ |
| 18645 static int unicodeNext( |
| 18646 sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ |
| 18647 const char **paToken, /* OUT: Token text */ |
| 18648 int *pnToken, /* OUT: Number of bytes at *paToken */ |
| 18649 int *piStart, /* OUT: Starting offset of token */ |
| 18650 int *piEnd, /* OUT: Ending offset of token */ |
| 18651 int *piPos /* OUT: Position integer of token */ |
| 18652 ){ |
| 18653 unicode_cursor *pCsr = (unicode_cursor *)pC; |
| 18654 unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); |
| 18655 int iCode = 0; |
| 18656 char *zOut; |
| 18657 const unsigned char *z = &pCsr->aInput[pCsr->iOff]; |
| 18658 const unsigned char *zStart = z; |
| 18659 const unsigned char *zEnd; |
| 18660 const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; |
| 18661 |
| 18662 /* Scan past any delimiter characters before the start of the next token. |
| 18663 ** Return SQLITE_DONE early if this takes us all the way to the end of |
| 18664 ** the input. */ |
| 18665 while( z<zTerm ){ |
| 18666 READ_UTF8(z, zTerm, iCode); |
| 18667 if( unicodeIsAlnum(p, iCode) ) break; |
| 18668 zStart = z; |
| 18669 } |
| 18670 if( zStart>=zTerm ) return SQLITE_DONE; |
| 18671 |
| 18672 zOut = pCsr->zToken; |
| 18673 do { |
| 18674 int iOut; |
| 18675 |
| 18676 /* Grow the output buffer if required. */ |
| 18677 if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ |
| 18678 char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); |
| 18679 if( !zNew ) return SQLITE_NOMEM; |
| 18680 zOut = &zNew[zOut - pCsr->zToken]; |
| 18681 pCsr->zToken = zNew; |
| 18682 pCsr->nAlloc += 64; |
| 18683 } |
| 18684 |
| 18685 /* Write the folded case of the last character read to the output */ |
| 18686 zEnd = z; |
| 18687 iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); |
| 18688 if( iOut ){ |
| 18689 WRITE_UTF8(zOut, iOut); |
| 18690 } |
| 18691 |
| 18692 /* If the cursor is not at EOF, read the next character */ |
| 18693 if( z>=zTerm ) break; |
| 18694 READ_UTF8(z, zTerm, iCode); |
| 18695 }while( unicodeIsAlnum(p, iCode) |
| 18696 || sqlite3FtsUnicodeIsdiacritic(iCode) |
| 18697 ); |
| 18698 |
| 18699 /* Set the output variables and return. */ |
| 18700 pCsr->iOff = (int)(z - pCsr->aInput); |
| 18701 *paToken = pCsr->zToken; |
| 18702 *pnToken = (int)(zOut - pCsr->zToken); |
| 18703 *piStart = (int)(zStart - pCsr->aInput); |
| 18704 *piEnd = (int)(zEnd - pCsr->aInput); |
| 18705 *piPos = pCsr->iToken++; |
| 18706 return SQLITE_OK; |
| 18707 } |
| 18708 |
| 18709 /* |
| 18710 ** Set *ppModule to a pointer to the sqlite3_tokenizer_module |
| 18711 ** structure for the unicode tokenizer. |
| 18712 */ |
| 18713 SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const *
*ppModule){ |
| 18714 static const sqlite3_tokenizer_module module = { |
| 18715 0, |
| 18716 unicodeCreate, |
| 18717 unicodeDestroy, |
| 18718 unicodeOpen, |
| 18719 unicodeClose, |
| 18720 unicodeNext, |
| 18721 0, |
| 18722 }; |
| 18723 *ppModule = &module; |
| 18724 } |
| 18725 |
| 18726 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 18727 #endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ |
| 18728 |
| 18729 /************** End of fts3_unicode.c ****************************************/ |
| 18730 /************** Begin file fts3_unicode2.c ***********************************/ |
| 18731 /* |
| 18732 ** 2012 May 25 |
| 18733 ** |
| 18734 ** The author disclaims copyright to this source code. In place of |
| 18735 ** a legal notice, here is a blessing: |
| 18736 ** |
| 18737 ** May you do good and not evil. |
| 18738 ** May you find forgiveness for yourself and forgive others. |
| 18739 ** May you share freely, never taking more than you give. |
| 18740 ** |
| 18741 ****************************************************************************** |
| 18742 */ |
| 18743 |
| 18744 /* |
| 18745 ** DO NOT EDIT THIS MACHINE GENERATED FILE. |
| 18746 */ |
| 18747 |
| 18748 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 18749 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) |
| 18750 |
| 18751 /* #include <assert.h> */ |
| 18752 |
| 18753 /* |
| 18754 ** Return true if the argument corresponds to a unicode codepoint |
| 18755 ** classified as either a letter or a number. Otherwise false. |
| 18756 ** |
| 18757 ** The results are undefined if the value passed to this function |
| 18758 ** is less than zero. |
| 18759 */ |
| 18760 SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ |
| 18761 /* Each unsigned integer in the following array corresponds to a contiguous |
| 18762 ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 18763 ** codepoints for which this function should return 0). |
| 18764 ** |
| 18765 ** The most significant 22 bits in each 32-bit value contain the first |
| 18766 ** codepoint in the range. The least significant 10 bits are used to store |
| 18767 ** the size of the range (always at least 1). In other words, the value |
| 18768 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
| 18769 ** C. It is not possible to represent a range larger than 1023 codepoints |
| 18770 ** using this format. |
| 18771 */ |
| 18772 static const unsigned int aEntry[] = { |
| 18773 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 18774 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| 18775 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, |
| 18776 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, |
| 18777 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, |
| 18778 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, |
| 18779 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, |
| 18780 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, |
| 18781 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, |
| 18782 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, |
| 18783 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, |
| 18784 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, |
| 18785 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, |
| 18786 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, |
| 18787 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, |
| 18788 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, |
| 18789 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, |
| 18790 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, |
| 18791 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, |
| 18792 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, |
| 18793 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, |
| 18794 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, |
| 18795 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, |
| 18796 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, |
| 18797 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, |
| 18798 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, |
| 18799 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, |
| 18800 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, |
| 18801 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, |
| 18802 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, |
| 18803 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, |
| 18804 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, |
| 18805 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, |
| 18806 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, |
| 18807 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, |
| 18808 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, |
| 18809 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, |
| 18810 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, |
| 18811 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, |
| 18812 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, |
| 18813 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, |
| 18814 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, |
| 18815 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, |
| 18816 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, |
| 18817 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, |
| 18818 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, |
| 18819 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, |
| 18820 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, |
| 18821 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, |
| 18822 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, |
| 18823 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, |
| 18824 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, |
| 18825 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, |
| 18826 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, |
| 18827 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, |
| 18828 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, |
| 18829 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, |
| 18830 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, |
| 18831 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, |
| 18832 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, |
| 18833 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, |
| 18834 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, |
| 18835 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, |
| 18836 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, |
| 18837 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, |
| 18838 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, |
| 18839 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, |
| 18840 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, |
| 18841 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, |
| 18842 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, |
| 18843 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, |
| 18844 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, |
| 18845 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, |
| 18846 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, |
| 18847 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, |
| 18848 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, |
| 18849 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, |
| 18850 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, |
| 18851 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, |
| 18852 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, |
| 18853 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, |
| 18854 0x380400F0, |
| 18855 }; |
| 18856 static const unsigned int aAscii[4] = { |
| 18857 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, |
| 18858 }; |
| 18859 |
| 18860 if( c<128 ){ |
| 18861 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); |
| 18862 }else if( c<(1<<22) ){ |
| 18863 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; |
| 18864 int iRes = 0; |
| 18865 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 18866 int iLo = 0; |
| 18867 while( iHi>=iLo ){ |
| 18868 int iTest = (iHi + iLo) / 2; |
| 18869 if( key >= aEntry[iTest] ){ |
| 18870 iRes = iTest; |
| 18871 iLo = iTest+1; |
| 18872 }else{ |
| 18873 iHi = iTest-1; |
| 18874 } |
| 18875 } |
| 18876 assert( aEntry[0]<key ); |
| 18877 assert( key>=aEntry[iRes] ); |
| 18878 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); |
| 18879 } |
| 18880 return 1; |
| 18881 } |
| 18882 |
| 18883 |
| 18884 /* |
| 18885 ** If the argument is a codepoint corresponding to a lowercase letter |
| 18886 ** in the ASCII range with a diacritic added, return the codepoint |
| 18887 ** of the ASCII letter only. For example, if passed 235 - "LATIN |
| 18888 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 18889 ** E"). The resuls of passing a codepoint that corresponds to an |
| 18890 ** uppercase letter are undefined. |
| 18891 */ |
| 18892 static int remove_diacritic(int c){ |
| 18893 unsigned short aDia[] = { |
| 18894 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
| 18895 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
| 18896 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
| 18897 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
| 18898 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
| 18899 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
| 18900 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
| 18901 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
| 18902 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
| 18903 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, |
| 18904 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, |
| 18905 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, |
| 18906 62924, 63050, 63082, 63274, 63390, |
| 18907 }; |
| 18908 char aChar[] = { |
| 18909 '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', |
| 18910 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', |
| 18911 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', |
| 18912 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', |
| 18913 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', |
| 18914 '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', |
| 18915 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', |
| 18916 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', |
| 18917 'e', 'i', 'o', 'u', 'y', |
| 18918 }; |
| 18919 |
| 18920 unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 18921 int iRes = 0; |
| 18922 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| 18923 int iLo = 0; |
| 18924 while( iHi>=iLo ){ |
| 18925 int iTest = (iHi + iLo) / 2; |
| 18926 if( key >= aDia[iTest] ){ |
| 18927 iRes = iTest; |
| 18928 iLo = iTest+1; |
| 18929 }else{ |
| 18930 iHi = iTest-1; |
| 18931 } |
| 18932 } |
| 18933 assert( key>=aDia[iRes] ); |
| 18934 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); |
| 18935 } |
| 18936 |
| 18937 |
| 18938 /* |
| 18939 ** Return true if the argument interpreted as a unicode codepoint |
| 18940 ** is a diacritical modifier character. |
| 18941 */ |
| 18942 SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ |
| 18943 unsigned int mask0 = 0x08029FDF; |
| 18944 unsigned int mask1 = 0x000361F8; |
| 18945 if( c<768 || c>817 ) return 0; |
| 18946 return (c < 768+32) ? |
| 18947 (mask0 & (1 << (c-768))) : |
| 18948 (mask1 & (1 << (c-768-32))); |
| 18949 } |
| 18950 |
| 18951 |
| 18952 /* |
| 18953 ** Interpret the argument as a unicode codepoint. If the codepoint |
| 18954 ** is an upper case character that has a lower case equivalent, |
| 18955 ** return the codepoint corresponding to the lower case version. |
| 18956 ** Otherwise, return a copy of the argument. |
| 18957 ** |
| 18958 ** The results are undefined if the value passed to this function |
| 18959 ** is less than zero. |
| 18960 */ |
| 18961 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ |
| 18962 /* Each entry in the following array defines a rule for folding a range |
| 18963 ** of codepoints to lower case. The rule applies to a range of nRange |
| 18964 ** codepoints starting at codepoint iCode. |
| 18965 ** |
| 18966 ** If the least significant bit in flags is clear, then the rule applies |
| 18967 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
| 18968 ** need to be folded). Or, if it is set, then the rule only applies to |
| 18969 ** every second codepoint in the range, starting with codepoint C. |
| 18970 ** |
| 18971 ** The 7 most significant bits in flags are an index into the aiOff[] |
| 18972 ** array. If a specific codepoint C does require folding, then its lower |
| 18973 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). |
| 18974 ** |
| 18975 ** The contents of this array are generated by parsing the CaseFolding.txt |
| 18976 ** file distributed as part of the "Unicode Character Database". See |
| 18977 ** http://www.unicode.org for details. |
| 18978 */ |
| 18979 static const struct TableEntry { |
| 18980 unsigned short iCode; |
| 18981 unsigned char flags; |
| 18982 unsigned char nRange; |
| 18983 } aEntry[] = { |
| 18984 {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, |
| 18985 {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, |
| 18986 {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, |
| 18987 {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, |
| 18988 {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, |
| 18989 {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, |
| 18990 {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, |
| 18991 {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, |
| 18992 {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, |
| 18993 {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, |
| 18994 {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, |
| 18995 {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, |
| 18996 {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, |
| 18997 {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, |
| 18998 {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, |
| 18999 {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, |
| 19000 {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, |
| 19001 {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, |
| 19002 {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, |
| 19003 {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, |
| 19004 {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, |
| 19005 {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, |
| 19006 {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, |
| 19007 {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, |
| 19008 {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, |
| 19009 {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, |
| 19010 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, |
| 19011 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, |
| 19012 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, |
| 19013 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, |
| 19014 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, |
| 19015 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, |
| 19016 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, |
| 19017 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, |
| 19018 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, |
| 19019 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, |
| 19020 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, |
| 19021 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, |
| 19022 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, |
| 19023 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, |
| 19024 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, |
| 19025 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, |
| 19026 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, |
| 19027 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, |
| 19028 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, |
| 19029 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, |
| 19030 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, |
| 19031 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, |
| 19032 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, |
| 19033 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 19034 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 19035 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 19036 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 19037 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 19038 {65313, 14, 26}, |
| 19039 }; |
| 19040 static const unsigned short aiOff[] = { |
| 19041 1, 2, 8, 15, 16, 26, 28, 32, |
| 19042 37, 38, 40, 48, 63, 64, 69, 71, |
| 19043 79, 80, 116, 202, 203, 205, 206, 207, |
| 19044 209, 210, 211, 213, 214, 217, 218, 219, |
| 19045 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
| 19046 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
| 19047 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
| 19048 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
| 19049 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
| 19050 65514, 65521, 65527, 65528, 65529, |
| 19051 }; |
| 19052 |
| 19053 int ret = c; |
| 19054 |
| 19055 assert( c>=0 ); |
| 19056 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
| 19057 |
| 19058 if( c<128 ){ |
| 19059 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
| 19060 }else if( c<65536 ){ |
| 19061 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 19062 int iLo = 0; |
| 19063 int iRes = -1; |
| 19064 |
| 19065 while( iHi>=iLo ){ |
| 19066 int iTest = (iHi + iLo) / 2; |
| 19067 int cmp = (c - aEntry[iTest].iCode); |
| 19068 if( cmp>=0 ){ |
| 19069 iRes = iTest; |
| 19070 iLo = iTest+1; |
| 19071 }else{ |
| 19072 iHi = iTest-1; |
| 19073 } |
| 19074 } |
| 19075 assert( iRes<0 || c>=aEntry[iRes].iCode ); |
| 19076 |
| 19077 if( iRes>=0 ){ |
| 19078 const struct TableEntry *p = &aEntry[iRes]; |
| 19079 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
| 19080 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
| 19081 assert( ret>0 ); |
| 19082 } |
| 19083 } |
| 19084 |
| 19085 if( bRemoveDiacritic ) ret = remove_diacritic(ret); |
| 19086 } |
| 19087 |
| 19088 else if( c>=66560 && c<66600 ){ |
| 19089 ret = c + 40; |
| 19090 } |
| 19091 |
| 19092 return ret; |
| 19093 } |
| 19094 #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ |
| 19095 #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ |
| 19096 |
| 19097 /************** End of fts3_unicode2.c ***************************************/ |
| 19098 /************** Begin file rtree.c *******************************************/ |
| 19099 /* |
| 19100 ** 2001 September 15 |
| 19101 ** |
| 19102 ** The author disclaims copyright to this source code. In place of |
| 19103 ** a legal notice, here is a blessing: |
| 19104 ** |
| 19105 ** May you do good and not evil. |
| 19106 ** May you find forgiveness for yourself and forgive others. |
| 19107 ** May you share freely, never taking more than you give. |
| 19108 ** |
| 19109 ************************************************************************* |
| 19110 ** This file contains code for implementations of the r-tree and r*-tree |
| 19111 ** algorithms packaged as an SQLite virtual table module. |
| 19112 */ |
| 19113 |
| 19114 /* |
| 19115 ** Database Format of R-Tree Tables |
| 19116 ** -------------------------------- |
| 19117 ** |
| 19118 ** The data structure for a single virtual r-tree table is stored in three |
| 19119 ** native SQLite tables declared as follows. In each case, the '%' character |
| 19120 ** in the table name is replaced with the user-supplied name of the r-tree |
| 19121 ** table. |
| 19122 ** |
| 19123 ** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) |
| 19124 ** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) |
| 19125 ** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) |
| 19126 ** |
| 19127 ** The data for each node of the r-tree structure is stored in the %_node |
| 19128 ** table. For each node that is not the root node of the r-tree, there is |
| 19129 ** an entry in the %_parent table associating the node with its parent. |
| 19130 ** And for each row of data in the table, there is an entry in the %_rowid |
| 19131 ** table that maps from the entries rowid to the id of the node that it |
| 19132 ** is stored on. |
| 19133 ** |
| 19134 ** The root node of an r-tree always exists, even if the r-tree table is |
| 19135 ** empty. The nodeno of the root node is always 1. All other nodes in the |
| 19136 ** table must be the same size as the root node. The content of each node |
| 19137 ** is formatted as follows: |
| 19138 ** |
| 19139 ** 1. If the node is the root node (node 1), then the first 2 bytes |
| 19140 ** of the node contain the tree depth as a big-endian integer. |
| 19141 ** For non-root nodes, the first 2 bytes are left unused. |
| 19142 ** |
| 19143 ** 2. The next 2 bytes contain the number of entries currently |
| 19144 ** stored in the node. |
| 19145 ** |
| 19146 ** 3. The remainder of the node contains the node entries. Each entry |
| 19147 ** consists of a single 8-byte integer followed by an even number |
| 19148 ** of 4-byte coordinates. For leaf nodes the integer is the rowid |
| 19149 ** of a record. For internal nodes it is the node number of a |
| 19150 ** child page. |
| 19151 */ |
| 19152 |
| 19153 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) |
| 19154 |
| 19155 #ifndef SQLITE_CORE |
| 19156 /* #include "sqlite3ext.h" */ |
| 19157 SQLITE_EXTENSION_INIT1 |
| 19158 #else |
| 19159 /* #include "sqlite3.h" */ |
| 19160 #endif |
| 19161 |
| 19162 /* #include <string.h> */ |
| 19163 /* #include <assert.h> */ |
| 19164 /* #include <stdio.h> */ |
| 19165 |
| 19166 #ifndef SQLITE_AMALGAMATION |
| 19167 #include "sqlite3rtree.h" |
| 19168 typedef sqlite3_int64 i64; |
| 19169 typedef unsigned char u8; |
| 19170 typedef unsigned short u16; |
| 19171 typedef unsigned int u32; |
| 19172 #endif |
| 19173 |
| 19174 /* The following macro is used to suppress compiler warnings. |
| 19175 */ |
| 19176 #ifndef UNUSED_PARAMETER |
| 19177 # define UNUSED_PARAMETER(x) (void)(x) |
| 19178 #endif |
| 19179 |
| 19180 typedef struct Rtree Rtree; |
| 19181 typedef struct RtreeCursor RtreeCursor; |
| 19182 typedef struct RtreeNode RtreeNode; |
| 19183 typedef struct RtreeCell RtreeCell; |
| 19184 typedef struct RtreeConstraint RtreeConstraint; |
| 19185 typedef struct RtreeMatchArg RtreeMatchArg; |
| 19186 typedef struct RtreeGeomCallback RtreeGeomCallback; |
| 19187 typedef union RtreeCoord RtreeCoord; |
| 19188 typedef struct RtreeSearchPoint RtreeSearchPoint; |
| 19189 |
| 19190 /* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ |
| 19191 #define RTREE_MAX_DIMENSIONS 5 |
| 19192 |
| 19193 /* Size of hash table Rtree.aHash. This hash table is not expected to |
| 19194 ** ever contain very many entries, so a fixed number of buckets is |
| 19195 ** used. |
| 19196 */ |
| 19197 #define HASHSIZE 97 |
| 19198 |
| 19199 /* The xBestIndex method of this virtual table requires an estimate of |
| 19200 ** the number of rows in the virtual table to calculate the costs of |
| 19201 ** various strategies. If possible, this estimate is loaded from the |
| 19202 ** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). |
| 19203 ** Otherwise, if no sqlite_stat1 entry is available, use |
| 19204 ** RTREE_DEFAULT_ROWEST. |
| 19205 */ |
| 19206 #define RTREE_DEFAULT_ROWEST 1048576 |
| 19207 #define RTREE_MIN_ROWEST 100 |
| 19208 |
| 19209 /* |
| 19210 ** An rtree virtual-table object. |
| 19211 */ |
| 19212 struct Rtree { |
| 19213 sqlite3_vtab base; /* Base class. Must be first */ |
| 19214 sqlite3 *db; /* Host database connection */ |
| 19215 int iNodeSize; /* Size in bytes of each node in the node table */ |
| 19216 u8 nDim; /* Number of dimensions */ |
| 19217 u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ |
| 19218 u8 nBytesPerCell; /* Bytes consumed per cell */ |
| 19219 int iDepth; /* Current depth of the r-tree structure */ |
| 19220 char *zDb; /* Name of database containing r-tree table */ |
| 19221 char *zName; /* Name of r-tree table */ |
| 19222 int nBusy; /* Current number of users of this structure */ |
| 19223 i64 nRowEst; /* Estimated number of rows in this table */ |
| 19224 |
| 19225 /* List of nodes removed during a CondenseTree operation. List is |
| 19226 ** linked together via the pointer normally used for hash chains - |
| 19227 ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree |
| 19228 ** headed by the node (leaf nodes have RtreeNode.iNode==0). |
| 19229 */ |
| 19230 RtreeNode *pDeleted; |
| 19231 int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ |
| 19232 |
| 19233 /* Statements to read/write/delete a record from xxx_node */ |
| 19234 sqlite3_stmt *pReadNode; |
| 19235 sqlite3_stmt *pWriteNode; |
| 19236 sqlite3_stmt *pDeleteNode; |
| 19237 |
| 19238 /* Statements to read/write/delete a record from xxx_rowid */ |
| 19239 sqlite3_stmt *pReadRowid; |
| 19240 sqlite3_stmt *pWriteRowid; |
| 19241 sqlite3_stmt *pDeleteRowid; |
| 19242 |
| 19243 /* Statements to read/write/delete a record from xxx_parent */ |
| 19244 sqlite3_stmt *pReadParent; |
| 19245 sqlite3_stmt *pWriteParent; |
| 19246 sqlite3_stmt *pDeleteParent; |
| 19247 |
| 19248 RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ |
| 19249 }; |
| 19250 |
| 19251 /* Possible values for Rtree.eCoordType: */ |
| 19252 #define RTREE_COORD_REAL32 0 |
| 19253 #define RTREE_COORD_INT32 1 |
| 19254 |
| 19255 /* |
| 19256 ** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will |
| 19257 ** only deal with integer coordinates. No floating point operations |
| 19258 ** will be done. |
| 19259 */ |
| 19260 #ifdef SQLITE_RTREE_INT_ONLY |
| 19261 typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ |
| 19262 typedef int RtreeValue; /* Low accuracy coordinate */ |
| 19263 # define RTREE_ZERO 0 |
| 19264 #else |
| 19265 typedef double RtreeDValue; /* High accuracy coordinate */ |
| 19266 typedef float RtreeValue; /* Low accuracy coordinate */ |
| 19267 # define RTREE_ZERO 0.0 |
| 19268 #endif |
| 19269 |
| 19270 /* |
| 19271 ** When doing a search of an r-tree, instances of the following structure |
| 19272 ** record intermediate results from the tree walk. |
| 19273 ** |
| 19274 ** The id is always a node-id. For iLevel>=1 the id is the node-id of |
| 19275 ** the node that the RtreeSearchPoint represents. When iLevel==0, however, |
| 19276 ** the id is of the parent node and the cell that RtreeSearchPoint |
| 19277 ** represents is the iCell-th entry in the parent node. |
| 19278 */ |
| 19279 struct RtreeSearchPoint { |
| 19280 RtreeDValue rScore; /* The score for this node. Smallest goes first. */ |
| 19281 sqlite3_int64 id; /* Node ID */ |
| 19282 u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ |
| 19283 u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ |
| 19284 u8 iCell; /* Cell index within the node */ |
| 19285 }; |
| 19286 |
| 19287 /* |
| 19288 ** The minimum number of cells allowed for a node is a third of the |
| 19289 ** maximum. In Gutman's notation: |
| 19290 ** |
| 19291 ** m = M/3 |
| 19292 ** |
| 19293 ** If an R*-tree "Reinsert" operation is required, the same number of |
| 19294 ** cells are removed from the overfull node and reinserted into the tree. |
| 19295 */ |
| 19296 #define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) |
| 19297 #define RTREE_REINSERT(p) RTREE_MINCELLS(p) |
| 19298 #define RTREE_MAXCELLS 51 |
| 19299 |
| 19300 /* |
| 19301 ** The smallest possible node-size is (512-64)==448 bytes. And the largest |
| 19302 ** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). |
| 19303 ** Therefore all non-root nodes must contain at least 3 entries. Since |
| 19304 ** 2^40 is greater than 2^64, an r-tree structure always has a depth of |
| 19305 ** 40 or less. |
| 19306 */ |
| 19307 #define RTREE_MAX_DEPTH 40 |
| 19308 |
| 19309 |
| 19310 /* |
| 19311 ** Number of entries in the cursor RtreeNode cache. The first entry is |
| 19312 ** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining |
| 19313 ** entries cache the RtreeNode for the first elements of the priority queue. |
| 19314 */ |
| 19315 #define RTREE_CACHE_SZ 5 |
| 19316 |
| 19317 /* |
| 19318 ** An rtree cursor object. |
| 19319 */ |
| 19320 struct RtreeCursor { |
| 19321 sqlite3_vtab_cursor base; /* Base class. Must be first */ |
| 19322 u8 atEOF; /* True if at end of search */ |
| 19323 u8 bPoint; /* True if sPoint is valid */ |
| 19324 int iStrategy; /* Copy of idxNum search parameter */ |
| 19325 int nConstraint; /* Number of entries in aConstraint */ |
| 19326 RtreeConstraint *aConstraint; /* Search constraints. */ |
| 19327 int nPointAlloc; /* Number of slots allocated for aPoint[] */ |
| 19328 int nPoint; /* Number of slots used in aPoint[] */ |
| 19329 int mxLevel; /* iLevel value for root of the tree */ |
| 19330 RtreeSearchPoint *aPoint; /* Priority queue for search points */ |
| 19331 RtreeSearchPoint sPoint; /* Cached next search point */ |
| 19332 RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ |
| 19333 u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ |
| 19334 }; |
| 19335 |
| 19336 /* Return the Rtree of a RtreeCursor */ |
| 19337 #define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) |
| 19338 |
| 19339 /* |
| 19340 ** A coordinate can be either a floating point number or a integer. All |
| 19341 ** coordinates within a single R-Tree are always of the same time. |
| 19342 */ |
| 19343 union RtreeCoord { |
| 19344 RtreeValue f; /* Floating point value */ |
| 19345 int i; /* Integer value */ |
| 19346 u32 u; /* Unsigned for byte-order conversions */ |
| 19347 }; |
| 19348 |
| 19349 /* |
| 19350 ** The argument is an RtreeCoord. Return the value stored within the RtreeCoord |
| 19351 ** formatted as a RtreeDValue (double or int64). This macro assumes that local |
| 19352 ** variable pRtree points to the Rtree structure associated with the |
| 19353 ** RtreeCoord. |
| 19354 */ |
| 19355 #ifdef SQLITE_RTREE_INT_ONLY |
| 19356 # define DCOORD(coord) ((RtreeDValue)coord.i) |
| 19357 #else |
| 19358 # define DCOORD(coord) ( \ |
| 19359 (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ |
| 19360 ((double)coord.f) : \ |
| 19361 ((double)coord.i) \ |
| 19362 ) |
| 19363 #endif |
| 19364 |
| 19365 /* |
| 19366 ** A search constraint. |
| 19367 */ |
| 19368 struct RtreeConstraint { |
| 19369 int iCoord; /* Index of constrained coordinate */ |
| 19370 int op; /* Constraining operation */ |
| 19371 union { |
| 19372 RtreeDValue rValue; /* Constraint value. */ |
| 19373 int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); |
| 19374 int (*xQueryFunc)(sqlite3_rtree_query_info*); |
| 19375 } u; |
| 19376 sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ |
| 19377 }; |
| 19378 |
| 19379 /* Possible values for RtreeConstraint.op */ |
| 19380 #define RTREE_EQ 0x41 /* A */ |
| 19381 #define RTREE_LE 0x42 /* B */ |
| 19382 #define RTREE_LT 0x43 /* C */ |
| 19383 #define RTREE_GE 0x44 /* D */ |
| 19384 #define RTREE_GT 0x45 /* E */ |
| 19385 #define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ |
| 19386 #define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ |
| 19387 |
| 19388 |
| 19389 /* |
| 19390 ** An rtree structure node. |
| 19391 */ |
| 19392 struct RtreeNode { |
| 19393 RtreeNode *pParent; /* Parent node */ |
| 19394 i64 iNode; /* The node number */ |
| 19395 int nRef; /* Number of references to this node */ |
| 19396 int isDirty; /* True if the node needs to be written to disk */ |
| 19397 u8 *zData; /* Content of the node, as should be on disk */ |
| 19398 RtreeNode *pNext; /* Next node in this hash collision chain */ |
| 19399 }; |
| 19400 |
| 19401 /* Return the number of cells in a node */ |
| 19402 #define NCELL(pNode) readInt16(&(pNode)->zData[2]) |
| 19403 |
| 19404 /* |
| 19405 ** A single cell from a node, deserialized |
| 19406 */ |
| 19407 struct RtreeCell { |
| 19408 i64 iRowid; /* Node or entry ID */ |
| 19409 RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ |
| 19410 }; |
| 19411 |
| 19412 |
| 19413 /* |
| 19414 ** This object becomes the sqlite3_user_data() for the SQL functions |
| 19415 ** that are created by sqlite3_rtree_geometry_callback() and |
| 19416 ** sqlite3_rtree_query_callback() and which appear on the right of MATCH |
| 19417 ** operators in order to constrain a search. |
| 19418 ** |
| 19419 ** xGeom and xQueryFunc are the callback functions. Exactly one of |
| 19420 ** xGeom and xQueryFunc fields is non-NULL, depending on whether the |
| 19421 ** SQL function was created using sqlite3_rtree_geometry_callback() or |
| 19422 ** sqlite3_rtree_query_callback(). |
| 19423 ** |
| 19424 ** This object is deleted automatically by the destructor mechanism in |
| 19425 ** sqlite3_create_function_v2(). |
| 19426 */ |
| 19427 struct RtreeGeomCallback { |
| 19428 int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); |
| 19429 int (*xQueryFunc)(sqlite3_rtree_query_info*); |
| 19430 void (*xDestructor)(void*); |
| 19431 void *pContext; |
| 19432 }; |
| 19433 |
| 19434 |
| 19435 /* |
| 19436 ** Value for the first field of every RtreeMatchArg object. The MATCH |
| 19437 ** operator tests that the first field of a blob operand matches this |
| 19438 ** value to avoid operating on invalid blobs (which could cause a segfault). |
| 19439 */ |
| 19440 #define RTREE_GEOMETRY_MAGIC 0x891245AB |
| 19441 |
| 19442 /* |
| 19443 ** An instance of this structure (in the form of a BLOB) is returned by |
| 19444 ** the SQL functions that sqlite3_rtree_geometry_callback() and |
| 19445 ** sqlite3_rtree_query_callback() create, and is read as the right-hand |
| 19446 ** operand to the MATCH operator of an R-Tree. |
| 19447 */ |
| 19448 struct RtreeMatchArg { |
| 19449 u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ |
| 19450 RtreeGeomCallback cb; /* Info about the callback functions */ |
| 19451 int nParam; /* Number of parameters to the SQL function */ |
| 19452 sqlite3_value **apSqlParam; /* Original SQL parameter values */ |
| 19453 RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ |
| 19454 }; |
| 19455 |
| 19456 #ifndef MAX |
| 19457 # define MAX(x,y) ((x) < (y) ? (y) : (x)) |
| 19458 #endif |
| 19459 #ifndef MIN |
| 19460 # define MIN(x,y) ((x) > (y) ? (y) : (x)) |
| 19461 #endif |
| 19462 |
| 19463 /* |
| 19464 ** Functions to deserialize a 16 bit integer, 32 bit real number and |
| 19465 ** 64 bit integer. The deserialized value is returned. |
| 19466 */ |
| 19467 static int readInt16(u8 *p){ |
| 19468 return (p[0]<<8) + p[1]; |
| 19469 } |
| 19470 static void readCoord(u8 *p, RtreeCoord *pCoord){ |
| 19471 pCoord->u = ( |
| 19472 (((u32)p[0]) << 24) + |
| 19473 (((u32)p[1]) << 16) + |
| 19474 (((u32)p[2]) << 8) + |
| 19475 (((u32)p[3]) << 0) |
| 19476 ); |
| 19477 } |
| 19478 static i64 readInt64(u8 *p){ |
| 19479 return ( |
| 19480 (((i64)p[0]) << 56) + |
| 19481 (((i64)p[1]) << 48) + |
| 19482 (((i64)p[2]) << 40) + |
| 19483 (((i64)p[3]) << 32) + |
| 19484 (((i64)p[4]) << 24) + |
| 19485 (((i64)p[5]) << 16) + |
| 19486 (((i64)p[6]) << 8) + |
| 19487 (((i64)p[7]) << 0) |
| 19488 ); |
| 19489 } |
| 19490 |
| 19491 /* |
| 19492 ** Functions to serialize a 16 bit integer, 32 bit real number and |
| 19493 ** 64 bit integer. The value returned is the number of bytes written |
| 19494 ** to the argument buffer (always 2, 4 and 8 respectively). |
| 19495 */ |
| 19496 static int writeInt16(u8 *p, int i){ |
| 19497 p[0] = (i>> 8)&0xFF; |
| 19498 p[1] = (i>> 0)&0xFF; |
| 19499 return 2; |
| 19500 } |
| 19501 static int writeCoord(u8 *p, RtreeCoord *pCoord){ |
| 19502 u32 i; |
| 19503 assert( sizeof(RtreeCoord)==4 ); |
| 19504 assert( sizeof(u32)==4 ); |
| 19505 i = pCoord->u; |
| 19506 p[0] = (i>>24)&0xFF; |
| 19507 p[1] = (i>>16)&0xFF; |
| 19508 p[2] = (i>> 8)&0xFF; |
| 19509 p[3] = (i>> 0)&0xFF; |
| 19510 return 4; |
| 19511 } |
| 19512 static int writeInt64(u8 *p, i64 i){ |
| 19513 p[0] = (i>>56)&0xFF; |
| 19514 p[1] = (i>>48)&0xFF; |
| 19515 p[2] = (i>>40)&0xFF; |
| 19516 p[3] = (i>>32)&0xFF; |
| 19517 p[4] = (i>>24)&0xFF; |
| 19518 p[5] = (i>>16)&0xFF; |
| 19519 p[6] = (i>> 8)&0xFF; |
| 19520 p[7] = (i>> 0)&0xFF; |
| 19521 return 8; |
| 19522 } |
| 19523 |
| 19524 /* |
| 19525 ** Increment the reference count of node p. |
| 19526 */ |
| 19527 static void nodeReference(RtreeNode *p){ |
| 19528 if( p ){ |
| 19529 p->nRef++; |
| 19530 } |
| 19531 } |
| 19532 |
| 19533 /* |
| 19534 ** Clear the content of node p (set all bytes to 0x00). |
| 19535 */ |
| 19536 static void nodeZero(Rtree *pRtree, RtreeNode *p){ |
| 19537 memset(&p->zData[2], 0, pRtree->iNodeSize-2); |
| 19538 p->isDirty = 1; |
| 19539 } |
| 19540 |
| 19541 /* |
| 19542 ** Given a node number iNode, return the corresponding key to use |
| 19543 ** in the Rtree.aHash table. |
| 19544 */ |
| 19545 static int nodeHash(i64 iNode){ |
| 19546 return iNode % HASHSIZE; |
| 19547 } |
| 19548 |
| 19549 /* |
| 19550 ** Search the node hash table for node iNode. If found, return a pointer |
| 19551 ** to it. Otherwise, return 0. |
| 19552 */ |
| 19553 static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ |
| 19554 RtreeNode *p; |
| 19555 for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); |
| 19556 return p; |
| 19557 } |
| 19558 |
| 19559 /* |
| 19560 ** Add node pNode to the node hash table. |
| 19561 */ |
| 19562 static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ |
| 19563 int iHash; |
| 19564 assert( pNode->pNext==0 ); |
| 19565 iHash = nodeHash(pNode->iNode); |
| 19566 pNode->pNext = pRtree->aHash[iHash]; |
| 19567 pRtree->aHash[iHash] = pNode; |
| 19568 } |
| 19569 |
| 19570 /* |
| 19571 ** Remove node pNode from the node hash table. |
| 19572 */ |
| 19573 static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ |
| 19574 RtreeNode **pp; |
| 19575 if( pNode->iNode!=0 ){ |
| 19576 pp = &pRtree->aHash[nodeHash(pNode->iNode)]; |
| 19577 for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } |
| 19578 *pp = pNode->pNext; |
| 19579 pNode->pNext = 0; |
| 19580 } |
| 19581 } |
| 19582 |
| 19583 /* |
| 19584 ** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), |
| 19585 ** indicating that node has not yet been assigned a node number. It is |
| 19586 ** assigned a node number when nodeWrite() is called to write the |
| 19587 ** node contents out to the database. |
| 19588 */ |
| 19589 static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ |
| 19590 RtreeNode *pNode; |
| 19591 pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); |
| 19592 if( pNode ){ |
| 19593 memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); |
| 19594 pNode->zData = (u8 *)&pNode[1]; |
| 19595 pNode->nRef = 1; |
| 19596 pNode->pParent = pParent; |
| 19597 pNode->isDirty = 1; |
| 19598 nodeReference(pParent); |
| 19599 } |
| 19600 return pNode; |
| 19601 } |
| 19602 |
| 19603 /* |
| 19604 ** Obtain a reference to an r-tree node. |
| 19605 */ |
| 19606 static int nodeAcquire( |
| 19607 Rtree *pRtree, /* R-tree structure */ |
| 19608 i64 iNode, /* Node number to load */ |
| 19609 RtreeNode *pParent, /* Either the parent node or NULL */ |
| 19610 RtreeNode **ppNode /* OUT: Acquired node */ |
| 19611 ){ |
| 19612 int rc; |
| 19613 int rc2 = SQLITE_OK; |
| 19614 RtreeNode *pNode; |
| 19615 |
| 19616 /* Check if the requested node is already in the hash table. If so, |
| 19617 ** increase its reference count and return it. |
| 19618 */ |
| 19619 if( (pNode = nodeHashLookup(pRtree, iNode)) ){ |
| 19620 assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); |
| 19621 if( pParent && !pNode->pParent ){ |
| 19622 nodeReference(pParent); |
| 19623 pNode->pParent = pParent; |
| 19624 } |
| 19625 pNode->nRef++; |
| 19626 *ppNode = pNode; |
| 19627 return SQLITE_OK; |
| 19628 } |
| 19629 |
| 19630 sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); |
| 19631 rc = sqlite3_step(pRtree->pReadNode); |
| 19632 if( rc==SQLITE_ROW ){ |
| 19633 const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); |
| 19634 if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ |
| 19635 pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); |
| 19636 if( !pNode ){ |
| 19637 rc2 = SQLITE_NOMEM; |
| 19638 }else{ |
| 19639 pNode->pParent = pParent; |
| 19640 pNode->zData = (u8 *)&pNode[1]; |
| 19641 pNode->nRef = 1; |
| 19642 pNode->iNode = iNode; |
| 19643 pNode->isDirty = 0; |
| 19644 pNode->pNext = 0; |
| 19645 memcpy(pNode->zData, zBlob, pRtree->iNodeSize); |
| 19646 nodeReference(pParent); |
| 19647 } |
| 19648 } |
| 19649 } |
| 19650 rc = sqlite3_reset(pRtree->pReadNode); |
| 19651 if( rc==SQLITE_OK ) rc = rc2; |
| 19652 |
| 19653 /* If the root node was just loaded, set pRtree->iDepth to the height |
| 19654 ** of the r-tree structure. A height of zero means all data is stored on |
| 19655 ** the root node. A height of one means the children of the root node |
| 19656 ** are the leaves, and so on. If the depth as specified on the root node |
| 19657 ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. |
| 19658 */ |
| 19659 if( pNode && iNode==1 ){ |
| 19660 pRtree->iDepth = readInt16(pNode->zData); |
| 19661 if( pRtree->iDepth>RTREE_MAX_DEPTH ){ |
| 19662 rc = SQLITE_CORRUPT_VTAB; |
| 19663 } |
| 19664 } |
| 19665 |
| 19666 /* If no error has occurred so far, check if the "number of entries" |
| 19667 ** field on the node is too large. If so, set the return code to |
| 19668 ** SQLITE_CORRUPT_VTAB. |
| 19669 */ |
| 19670 if( pNode && rc==SQLITE_OK ){ |
| 19671 if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ |
| 19672 rc = SQLITE_CORRUPT_VTAB; |
| 19673 } |
| 19674 } |
| 19675 |
| 19676 if( rc==SQLITE_OK ){ |
| 19677 if( pNode!=0 ){ |
| 19678 nodeHashInsert(pRtree, pNode); |
| 19679 }else{ |
| 19680 rc = SQLITE_CORRUPT_VTAB; |
| 19681 } |
| 19682 *ppNode = pNode; |
| 19683 }else{ |
| 19684 sqlite3_free(pNode); |
| 19685 *ppNode = 0; |
| 19686 } |
| 19687 |
| 19688 return rc; |
| 19689 } |
| 19690 |
| 19691 /* |
| 19692 ** Overwrite cell iCell of node pNode with the contents of pCell. |
| 19693 */ |
| 19694 static void nodeOverwriteCell( |
| 19695 Rtree *pRtree, /* The overall R-Tree */ |
| 19696 RtreeNode *pNode, /* The node into which the cell is to be written */ |
| 19697 RtreeCell *pCell, /* The cell to write */ |
| 19698 int iCell /* Index into pNode into which pCell is written */ |
| 19699 ){ |
| 19700 int ii; |
| 19701 u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; |
| 19702 p += writeInt64(p, pCell->iRowid); |
| 19703 for(ii=0; ii<(pRtree->nDim*2); ii++){ |
| 19704 p += writeCoord(p, &pCell->aCoord[ii]); |
| 19705 } |
| 19706 pNode->isDirty = 1; |
| 19707 } |
| 19708 |
| 19709 /* |
| 19710 ** Remove the cell with index iCell from node pNode. |
| 19711 */ |
| 19712 static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ |
| 19713 u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; |
| 19714 u8 *pSrc = &pDst[pRtree->nBytesPerCell]; |
| 19715 int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; |
| 19716 memmove(pDst, pSrc, nByte); |
| 19717 writeInt16(&pNode->zData[2], NCELL(pNode)-1); |
| 19718 pNode->isDirty = 1; |
| 19719 } |
| 19720 |
| 19721 /* |
| 19722 ** Insert the contents of cell pCell into node pNode. If the insert |
| 19723 ** is successful, return SQLITE_OK. |
| 19724 ** |
| 19725 ** If there is not enough free space in pNode, return SQLITE_FULL. |
| 19726 */ |
| 19727 static int nodeInsertCell( |
| 19728 Rtree *pRtree, /* The overall R-Tree */ |
| 19729 RtreeNode *pNode, /* Write new cell into this node */ |
| 19730 RtreeCell *pCell /* The cell to be inserted */ |
| 19731 ){ |
| 19732 int nCell; /* Current number of cells in pNode */ |
| 19733 int nMaxCell; /* Maximum number of cells for pNode */ |
| 19734 |
| 19735 nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; |
| 19736 nCell = NCELL(pNode); |
| 19737 |
| 19738 assert( nCell<=nMaxCell ); |
| 19739 if( nCell<nMaxCell ){ |
| 19740 nodeOverwriteCell(pRtree, pNode, pCell, nCell); |
| 19741 writeInt16(&pNode->zData[2], nCell+1); |
| 19742 pNode->isDirty = 1; |
| 19743 } |
| 19744 |
| 19745 return (nCell==nMaxCell); |
| 19746 } |
| 19747 |
| 19748 /* |
| 19749 ** If the node is dirty, write it out to the database. |
| 19750 */ |
| 19751 static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ |
| 19752 int rc = SQLITE_OK; |
| 19753 if( pNode->isDirty ){ |
| 19754 sqlite3_stmt *p = pRtree->pWriteNode; |
| 19755 if( pNode->iNode ){ |
| 19756 sqlite3_bind_int64(p, 1, pNode->iNode); |
| 19757 }else{ |
| 19758 sqlite3_bind_null(p, 1); |
| 19759 } |
| 19760 sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); |
| 19761 sqlite3_step(p); |
| 19762 pNode->isDirty = 0; |
| 19763 rc = sqlite3_reset(p); |
| 19764 if( pNode->iNode==0 && rc==SQLITE_OK ){ |
| 19765 pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); |
| 19766 nodeHashInsert(pRtree, pNode); |
| 19767 } |
| 19768 } |
| 19769 return rc; |
| 19770 } |
| 19771 |
| 19772 /* |
| 19773 ** Release a reference to a node. If the node is dirty and the reference |
| 19774 ** count drops to zero, the node data is written to the database. |
| 19775 */ |
| 19776 static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ |
| 19777 int rc = SQLITE_OK; |
| 19778 if( pNode ){ |
| 19779 assert( pNode->nRef>0 ); |
| 19780 pNode->nRef--; |
| 19781 if( pNode->nRef==0 ){ |
| 19782 if( pNode->iNode==1 ){ |
| 19783 pRtree->iDepth = -1; |
| 19784 } |
| 19785 if( pNode->pParent ){ |
| 19786 rc = nodeRelease(pRtree, pNode->pParent); |
| 19787 } |
| 19788 if( rc==SQLITE_OK ){ |
| 19789 rc = nodeWrite(pRtree, pNode); |
| 19790 } |
| 19791 nodeHashDelete(pRtree, pNode); |
| 19792 sqlite3_free(pNode); |
| 19793 } |
| 19794 } |
| 19795 return rc; |
| 19796 } |
| 19797 |
| 19798 /* |
| 19799 ** Return the 64-bit integer value associated with cell iCell of |
| 19800 ** node pNode. If pNode is a leaf node, this is a rowid. If it is |
| 19801 ** an internal node, then the 64-bit integer is a child page number. |
| 19802 */ |
| 19803 static i64 nodeGetRowid( |
| 19804 Rtree *pRtree, /* The overall R-Tree */ |
| 19805 RtreeNode *pNode, /* The node from which to extract the ID */ |
| 19806 int iCell /* The cell index from which to extract the ID */ |
| 19807 ){ |
| 19808 assert( iCell<NCELL(pNode) ); |
| 19809 return readInt64(&pNode->zData[4 + pRtree->nBytesPerCell*iCell]); |
| 19810 } |
| 19811 |
| 19812 /* |
| 19813 ** Return coordinate iCoord from cell iCell in node pNode. |
| 19814 */ |
| 19815 static void nodeGetCoord( |
| 19816 Rtree *pRtree, /* The overall R-Tree */ |
| 19817 RtreeNode *pNode, /* The node from which to extract a coordinate */ |
| 19818 int iCell, /* The index of the cell within the node */ |
| 19819 int iCoord, /* Which coordinate to extract */ |
| 19820 RtreeCoord *pCoord /* OUT: Space to write result to */ |
| 19821 ){ |
| 19822 readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); |
| 19823 } |
| 19824 |
| 19825 /* |
| 19826 ** Deserialize cell iCell of node pNode. Populate the structure pointed |
| 19827 ** to by pCell with the results. |
| 19828 */ |
| 19829 static void nodeGetCell( |
| 19830 Rtree *pRtree, /* The overall R-Tree */ |
| 19831 RtreeNode *pNode, /* The node containing the cell to be read */ |
| 19832 int iCell, /* Index of the cell within the node */ |
| 19833 RtreeCell *pCell /* OUT: Write the cell contents here */ |
| 19834 ){ |
| 19835 u8 *pData; |
| 19836 RtreeCoord *pCoord; |
| 19837 int ii; |
| 19838 pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); |
| 19839 pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); |
| 19840 pCoord = pCell->aCoord; |
| 19841 for(ii=0; ii<pRtree->nDim*2; ii++){ |
| 19842 readCoord(&pData[ii*4], &pCoord[ii]); |
| 19843 } |
| 19844 } |
| 19845 |
| 19846 |
| 19847 /* Forward declaration for the function that does the work of |
| 19848 ** the virtual table module xCreate() and xConnect() methods. |
| 19849 */ |
| 19850 static int rtreeInit( |
| 19851 sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int |
| 19852 ); |
| 19853 |
| 19854 /* |
| 19855 ** Rtree virtual table module xCreate method. |
| 19856 */ |
| 19857 static int rtreeCreate( |
| 19858 sqlite3 *db, |
| 19859 void *pAux, |
| 19860 int argc, const char *const*argv, |
| 19861 sqlite3_vtab **ppVtab, |
| 19862 char **pzErr |
| 19863 ){ |
| 19864 return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); |
| 19865 } |
| 19866 |
| 19867 /* |
| 19868 ** Rtree virtual table module xConnect method. |
| 19869 */ |
| 19870 static int rtreeConnect( |
| 19871 sqlite3 *db, |
| 19872 void *pAux, |
| 19873 int argc, const char *const*argv, |
| 19874 sqlite3_vtab **ppVtab, |
| 19875 char **pzErr |
| 19876 ){ |
| 19877 return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); |
| 19878 } |
| 19879 |
| 19880 /* |
| 19881 ** Increment the r-tree reference count. |
| 19882 */ |
| 19883 static void rtreeReference(Rtree *pRtree){ |
| 19884 pRtree->nBusy++; |
| 19885 } |
| 19886 |
| 19887 /* |
| 19888 ** Decrement the r-tree reference count. When the reference count reaches |
| 19889 ** zero the structure is deleted. |
| 19890 */ |
| 19891 static void rtreeRelease(Rtree *pRtree){ |
| 19892 pRtree->nBusy--; |
| 19893 if( pRtree->nBusy==0 ){ |
| 19894 sqlite3_finalize(pRtree->pReadNode); |
| 19895 sqlite3_finalize(pRtree->pWriteNode); |
| 19896 sqlite3_finalize(pRtree->pDeleteNode); |
| 19897 sqlite3_finalize(pRtree->pReadRowid); |
| 19898 sqlite3_finalize(pRtree->pWriteRowid); |
| 19899 sqlite3_finalize(pRtree->pDeleteRowid); |
| 19900 sqlite3_finalize(pRtree->pReadParent); |
| 19901 sqlite3_finalize(pRtree->pWriteParent); |
| 19902 sqlite3_finalize(pRtree->pDeleteParent); |
| 19903 sqlite3_free(pRtree); |
| 19904 } |
| 19905 } |
| 19906 |
| 19907 /* |
| 19908 ** Rtree virtual table module xDisconnect method. |
| 19909 */ |
| 19910 static int rtreeDisconnect(sqlite3_vtab *pVtab){ |
| 19911 rtreeRelease((Rtree *)pVtab); |
| 19912 return SQLITE_OK; |
| 19913 } |
| 19914 |
| 19915 /* |
| 19916 ** Rtree virtual table module xDestroy method. |
| 19917 */ |
| 19918 static int rtreeDestroy(sqlite3_vtab *pVtab){ |
| 19919 Rtree *pRtree = (Rtree *)pVtab; |
| 19920 int rc; |
| 19921 char *zCreate = sqlite3_mprintf( |
| 19922 "DROP TABLE '%q'.'%q_node';" |
| 19923 "DROP TABLE '%q'.'%q_rowid';" |
| 19924 "DROP TABLE '%q'.'%q_parent';", |
| 19925 pRtree->zDb, pRtree->zName, |
| 19926 pRtree->zDb, pRtree->zName, |
| 19927 pRtree->zDb, pRtree->zName |
| 19928 ); |
| 19929 if( !zCreate ){ |
| 19930 rc = SQLITE_NOMEM; |
| 19931 }else{ |
| 19932 rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); |
| 19933 sqlite3_free(zCreate); |
| 19934 } |
| 19935 if( rc==SQLITE_OK ){ |
| 19936 rtreeRelease(pRtree); |
| 19937 } |
| 19938 |
| 19939 return rc; |
| 19940 } |
| 19941 |
| 19942 /* |
| 19943 ** Rtree virtual table module xOpen method. |
| 19944 */ |
| 19945 static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ |
| 19946 int rc = SQLITE_NOMEM; |
| 19947 RtreeCursor *pCsr; |
| 19948 |
| 19949 pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); |
| 19950 if( pCsr ){ |
| 19951 memset(pCsr, 0, sizeof(RtreeCursor)); |
| 19952 pCsr->base.pVtab = pVTab; |
| 19953 rc = SQLITE_OK; |
| 19954 } |
| 19955 *ppCursor = (sqlite3_vtab_cursor *)pCsr; |
| 19956 |
| 19957 return rc; |
| 19958 } |
| 19959 |
| 19960 |
| 19961 /* |
| 19962 ** Free the RtreeCursor.aConstraint[] array and its contents. |
| 19963 */ |
| 19964 static void freeCursorConstraints(RtreeCursor *pCsr){ |
| 19965 if( pCsr->aConstraint ){ |
| 19966 int i; /* Used to iterate through constraint array */ |
| 19967 for(i=0; i<pCsr->nConstraint; i++){ |
| 19968 sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; |
| 19969 if( pInfo ){ |
| 19970 if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); |
| 19971 sqlite3_free(pInfo); |
| 19972 } |
| 19973 } |
| 19974 sqlite3_free(pCsr->aConstraint); |
| 19975 pCsr->aConstraint = 0; |
| 19976 } |
| 19977 } |
| 19978 |
| 19979 /* |
| 19980 ** Rtree virtual table module xClose method. |
| 19981 */ |
| 19982 static int rtreeClose(sqlite3_vtab_cursor *cur){ |
| 19983 Rtree *pRtree = (Rtree *)(cur->pVtab); |
| 19984 int ii; |
| 19985 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 19986 freeCursorConstraints(pCsr); |
| 19987 sqlite3_free(pCsr->aPoint); |
| 19988 for(ii=0; ii<RTREE_CACHE_SZ; ii++) nodeRelease(pRtree, pCsr->aNode[ii]); |
| 19989 sqlite3_free(pCsr); |
| 19990 return SQLITE_OK; |
| 19991 } |
| 19992 |
| 19993 /* |
| 19994 ** Rtree virtual table module xEof method. |
| 19995 ** |
| 19996 ** Return non-zero if the cursor does not currently point to a valid |
| 19997 ** record (i.e if the scan has finished), or zero otherwise. |
| 19998 */ |
| 19999 static int rtreeEof(sqlite3_vtab_cursor *cur){ |
| 20000 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 20001 return pCsr->atEOF; |
| 20002 } |
| 20003 |
| 20004 /* |
| 20005 ** Convert raw bits from the on-disk RTree record into a coordinate value. |
| 20006 ** The on-disk format is big-endian and needs to be converted for little- |
| 20007 ** endian platforms. The on-disk record stores integer coordinates if |
| 20008 ** eInt is true and it stores 32-bit floating point records if eInt is |
| 20009 ** false. a[] is the four bytes of the on-disk record to be decoded. |
| 20010 ** Store the results in "r". |
| 20011 ** |
| 20012 ** There are three versions of this macro, one each for little-endian and |
| 20013 ** big-endian processors and a third generic implementation. The endian- |
| 20014 ** specific implementations are much faster and are preferred if the |
| 20015 ** processor endianness is known at compile-time. The SQLITE_BYTEORDER |
| 20016 ** macro is part of sqliteInt.h and hence the endian-specific |
| 20017 ** implementation will only be used if this module is compiled as part |
| 20018 ** of the amalgamation. |
| 20019 */ |
| 20020 #if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 |
| 20021 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20022 RtreeCoord c; /* Coordinate decoded */ \ |
| 20023 memcpy(&c.u,a,4); \ |
| 20024 c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ |
| 20025 ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ |
| 20026 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20027 } |
| 20028 #elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 |
| 20029 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20030 RtreeCoord c; /* Coordinate decoded */ \ |
| 20031 memcpy(&c.u,a,4); \ |
| 20032 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20033 } |
| 20034 #else |
| 20035 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20036 RtreeCoord c; /* Coordinate decoded */ \ |
| 20037 c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ |
| 20038 +((u32)a[2]<<8) + a[3]; \ |
| 20039 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20040 } |
| 20041 #endif |
| 20042 |
| 20043 /* |
| 20044 ** Check the RTree node or entry given by pCellData and p against the MATCH |
| 20045 ** constraint pConstraint. |
| 20046 */ |
| 20047 static int rtreeCallbackConstraint( |
| 20048 RtreeConstraint *pConstraint, /* The constraint to test */ |
| 20049 int eInt, /* True if RTree holding integer coordinates */ |
| 20050 u8 *pCellData, /* Raw cell content */ |
| 20051 RtreeSearchPoint *pSearch, /* Container of this cell */ |
| 20052 sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ |
| 20053 int *peWithin /* OUT: visibility of the cell */ |
| 20054 ){ |
| 20055 int i; /* Loop counter */ |
| 20056 sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ |
| 20057 int nCoord = pInfo->nCoord; /* No. of coordinates */ |
| 20058 int rc; /* Callback return code */ |
| 20059 sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ |
| 20060 |
| 20061 assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); |
| 20062 assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); |
| 20063 |
| 20064 if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ |
| 20065 pInfo->iRowid = readInt64(pCellData); |
| 20066 } |
| 20067 pCellData += 8; |
| 20068 for(i=0; i<nCoord; i++, pCellData += 4){ |
| 20069 RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]); |
| 20070 } |
| 20071 if( pConstraint->op==RTREE_MATCH ){ |
| 20072 rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, |
| 20073 nCoord, aCoord, &i); |
| 20074 if( i==0 ) *peWithin = NOT_WITHIN; |
| 20075 *prScore = RTREE_ZERO; |
| 20076 }else{ |
| 20077 pInfo->aCoord = aCoord; |
| 20078 pInfo->iLevel = pSearch->iLevel - 1; |
| 20079 pInfo->rScore = pInfo->rParentScore = pSearch->rScore; |
| 20080 pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; |
| 20081 rc = pConstraint->u.xQueryFunc(pInfo); |
| 20082 if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; |
| 20083 if( pInfo->rScore<*prScore || *prScore<RTREE_ZERO ){ |
| 20084 *prScore = pInfo->rScore; |
| 20085 } |
| 20086 } |
| 20087 return rc; |
| 20088 } |
| 20089 |
| 20090 /* |
| 20091 ** Check the internal RTree node given by pCellData against constraint p. |
| 20092 ** If this constraint cannot be satisfied by any child within the node, |
| 20093 ** set *peWithin to NOT_WITHIN. |
| 20094 */ |
| 20095 static void rtreeNonleafConstraint( |
| 20096 RtreeConstraint *p, /* The constraint to test */ |
| 20097 int eInt, /* True if RTree holds integer coordinates */ |
| 20098 u8 *pCellData, /* Raw cell content as appears on disk */ |
| 20099 int *peWithin /* Adjust downward, as appropriate */ |
| 20100 ){ |
| 20101 sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ |
| 20102 |
| 20103 /* p->iCoord might point to either a lower or upper bound coordinate |
| 20104 ** in a coordinate pair. But make pCellData point to the lower bound. |
| 20105 */ |
| 20106 pCellData += 8 + 4*(p->iCoord&0xfe); |
| 20107 |
| 20108 assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE |
| 20109 || p->op==RTREE_GT || p->op==RTREE_EQ ); |
| 20110 switch( p->op ){ |
| 20111 case RTREE_LE: |
| 20112 case RTREE_LT: |
| 20113 case RTREE_EQ: |
| 20114 RTREE_DECODE_COORD(eInt, pCellData, val); |
| 20115 /* val now holds the lower bound of the coordinate pair */ |
| 20116 if( p->u.rValue>=val ) return; |
| 20117 if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ |
| 20118 /* Fall through for the RTREE_EQ case */ |
| 20119 |
| 20120 default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ |
| 20121 pCellData += 4; |
| 20122 RTREE_DECODE_COORD(eInt, pCellData, val); |
| 20123 /* val now holds the upper bound of the coordinate pair */ |
| 20124 if( p->u.rValue<=val ) return; |
| 20125 } |
| 20126 *peWithin = NOT_WITHIN; |
| 20127 } |
| 20128 |
| 20129 /* |
| 20130 ** Check the leaf RTree cell given by pCellData against constraint p. |
| 20131 ** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. |
| 20132 ** If the constraint is satisfied, leave *peWithin unchanged. |
| 20133 ** |
| 20134 ** The constraint is of the form: xN op $val |
| 20135 ** |
| 20136 ** The op is given by p->op. The xN is p->iCoord-th coordinate in |
| 20137 ** pCellData. $val is given by p->u.rValue. |
| 20138 */ |
| 20139 static void rtreeLeafConstraint( |
| 20140 RtreeConstraint *p, /* The constraint to test */ |
| 20141 int eInt, /* True if RTree holds integer coordinates */ |
| 20142 u8 *pCellData, /* Raw cell content as appears on disk */ |
| 20143 int *peWithin /* Adjust downward, as appropriate */ |
| 20144 ){ |
| 20145 RtreeDValue xN; /* Coordinate value converted to a double */ |
| 20146 |
| 20147 assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE |
| 20148 || p->op==RTREE_GT || p->op==RTREE_EQ ); |
| 20149 pCellData += 8 + p->iCoord*4; |
| 20150 RTREE_DECODE_COORD(eInt, pCellData, xN); |
| 20151 switch( p->op ){ |
| 20152 case RTREE_LE: if( xN <= p->u.rValue ) return; break; |
| 20153 case RTREE_LT: if( xN < p->u.rValue ) return; break; |
| 20154 case RTREE_GE: if( xN >= p->u.rValue ) return; break; |
| 20155 case RTREE_GT: if( xN > p->u.rValue ) return; break; |
| 20156 default: if( xN == p->u.rValue ) return; break; |
| 20157 } |
| 20158 *peWithin = NOT_WITHIN; |
| 20159 } |
| 20160 |
| 20161 /* |
| 20162 ** One of the cells in node pNode is guaranteed to have a 64-bit |
| 20163 ** integer value equal to iRowid. Return the index of this cell. |
| 20164 */ |
| 20165 static int nodeRowidIndex( |
| 20166 Rtree *pRtree, |
| 20167 RtreeNode *pNode, |
| 20168 i64 iRowid, |
| 20169 int *piIndex |
| 20170 ){ |
| 20171 int ii; |
| 20172 int nCell = NCELL(pNode); |
| 20173 assert( nCell<200 ); |
| 20174 for(ii=0; ii<nCell; ii++){ |
| 20175 if( nodeGetRowid(pRtree, pNode, ii)==iRowid ){ |
| 20176 *piIndex = ii; |
| 20177 return SQLITE_OK; |
| 20178 } |
| 20179 } |
| 20180 return SQLITE_CORRUPT_VTAB; |
| 20181 } |
| 20182 |
| 20183 /* |
| 20184 ** Return the index of the cell containing a pointer to node pNode |
| 20185 ** in its parent. If pNode is the root node, return -1. |
| 20186 */ |
| 20187 static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){ |
| 20188 RtreeNode *pParent = pNode->pParent; |
| 20189 if( pParent ){ |
| 20190 return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); |
| 20191 } |
| 20192 *piIndex = -1; |
| 20193 return SQLITE_OK; |
| 20194 } |
| 20195 |
| 20196 /* |
| 20197 ** Compare two search points. Return negative, zero, or positive if the first |
| 20198 ** is less than, equal to, or greater than the second. |
| 20199 ** |
| 20200 ** The rScore is the primary key. Smaller rScore values come first. |
| 20201 ** If the rScore is a tie, then use iLevel as the tie breaker with smaller |
| 20202 ** iLevel values coming first. In this way, if rScore is the same for all |
| 20203 ** SearchPoints, then iLevel becomes the deciding factor and the result |
| 20204 ** is a depth-first search, which is the desired default behavior. |
| 20205 */ |
| 20206 static int rtreeSearchPointCompare( |
| 20207 const RtreeSearchPoint *pA, |
| 20208 const RtreeSearchPoint *pB |
| 20209 ){ |
| 20210 if( pA->rScore<pB->rScore ) return -1; |
| 20211 if( pA->rScore>pB->rScore ) return +1; |
| 20212 if( pA->iLevel<pB->iLevel ) return -1; |
| 20213 if( pA->iLevel>pB->iLevel ) return +1; |
| 20214 return 0; |
| 20215 } |
| 20216 |
| 20217 /* |
| 20218 ** Interchange to search points in a cursor. |
| 20219 */ |
| 20220 static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ |
| 20221 RtreeSearchPoint t = p->aPoint[i]; |
| 20222 assert( i<j ); |
| 20223 p->aPoint[i] = p->aPoint[j]; |
| 20224 p->aPoint[j] = t; |
| 20225 i++; j++; |
| 20226 if( i<RTREE_CACHE_SZ ){ |
| 20227 if( j>=RTREE_CACHE_SZ ){ |
| 20228 nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); |
| 20229 p->aNode[i] = 0; |
| 20230 }else{ |
| 20231 RtreeNode *pTemp = p->aNode[i]; |
| 20232 p->aNode[i] = p->aNode[j]; |
| 20233 p->aNode[j] = pTemp; |
| 20234 } |
| 20235 } |
| 20236 } |
| 20237 |
| 20238 /* |
| 20239 ** Return the search point with the lowest current score. |
| 20240 */ |
| 20241 static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ |
| 20242 return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; |
| 20243 } |
| 20244 |
| 20245 /* |
| 20246 ** Get the RtreeNode for the search point with the lowest score. |
| 20247 */ |
| 20248 static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ |
| 20249 sqlite3_int64 id; |
| 20250 int ii = 1 - pCur->bPoint; |
| 20251 assert( ii==0 || ii==1 ); |
| 20252 assert( pCur->bPoint || pCur->nPoint ); |
| 20253 if( pCur->aNode[ii]==0 ){ |
| 20254 assert( pRC!=0 ); |
| 20255 id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; |
| 20256 *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); |
| 20257 } |
| 20258 return pCur->aNode[ii]; |
| 20259 } |
| 20260 |
| 20261 /* |
| 20262 ** Push a new element onto the priority queue |
| 20263 */ |
| 20264 static RtreeSearchPoint *rtreeEnqueue( |
| 20265 RtreeCursor *pCur, /* The cursor */ |
| 20266 RtreeDValue rScore, /* Score for the new search point */ |
| 20267 u8 iLevel /* Level for the new search point */ |
| 20268 ){ |
| 20269 int i, j; |
| 20270 RtreeSearchPoint *pNew; |
| 20271 if( pCur->nPoint>=pCur->nPointAlloc ){ |
| 20272 int nNew = pCur->nPointAlloc*2 + 8; |
| 20273 pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); |
| 20274 if( pNew==0 ) return 0; |
| 20275 pCur->aPoint = pNew; |
| 20276 pCur->nPointAlloc = nNew; |
| 20277 } |
| 20278 i = pCur->nPoint++; |
| 20279 pNew = pCur->aPoint + i; |
| 20280 pNew->rScore = rScore; |
| 20281 pNew->iLevel = iLevel; |
| 20282 assert( iLevel<=RTREE_MAX_DEPTH ); |
| 20283 while( i>0 ){ |
| 20284 RtreeSearchPoint *pParent; |
| 20285 j = (i-1)/2; |
| 20286 pParent = pCur->aPoint + j; |
| 20287 if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; |
| 20288 rtreeSearchPointSwap(pCur, j, i); |
| 20289 i = j; |
| 20290 pNew = pParent; |
| 20291 } |
| 20292 return pNew; |
| 20293 } |
| 20294 |
| 20295 /* |
| 20296 ** Allocate a new RtreeSearchPoint and return a pointer to it. Return |
| 20297 ** NULL if malloc fails. |
| 20298 */ |
| 20299 static RtreeSearchPoint *rtreeSearchPointNew( |
| 20300 RtreeCursor *pCur, /* The cursor */ |
| 20301 RtreeDValue rScore, /* Score for the new search point */ |
| 20302 u8 iLevel /* Level for the new search point */ |
| 20303 ){ |
| 20304 RtreeSearchPoint *pNew, *pFirst; |
| 20305 pFirst = rtreeSearchPointFirst(pCur); |
| 20306 pCur->anQueue[iLevel]++; |
| 20307 if( pFirst==0 |
| 20308 || pFirst->rScore>rScore |
| 20309 || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) |
| 20310 ){ |
| 20311 if( pCur->bPoint ){ |
| 20312 int ii; |
| 20313 pNew = rtreeEnqueue(pCur, rScore, iLevel); |
| 20314 if( pNew==0 ) return 0; |
| 20315 ii = (int)(pNew - pCur->aPoint) + 1; |
| 20316 if( ii<RTREE_CACHE_SZ ){ |
| 20317 assert( pCur->aNode[ii]==0 ); |
| 20318 pCur->aNode[ii] = pCur->aNode[0]; |
| 20319 }else{ |
| 20320 nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); |
| 20321 } |
| 20322 pCur->aNode[0] = 0; |
| 20323 *pNew = pCur->sPoint; |
| 20324 } |
| 20325 pCur->sPoint.rScore = rScore; |
| 20326 pCur->sPoint.iLevel = iLevel; |
| 20327 pCur->bPoint = 1; |
| 20328 return &pCur->sPoint; |
| 20329 }else{ |
| 20330 return rtreeEnqueue(pCur, rScore, iLevel); |
| 20331 } |
| 20332 } |
| 20333 |
| 20334 #if 0 |
| 20335 /* Tracing routines for the RtreeSearchPoint queue */ |
| 20336 static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ |
| 20337 if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } |
| 20338 printf(" %d.%05lld.%02d %g %d", |
| 20339 p->iLevel, p->id, p->iCell, p->rScore, p->eWithin |
| 20340 ); |
| 20341 idx++; |
| 20342 if( idx<RTREE_CACHE_SZ ){ |
| 20343 printf(" %p\n", pCur->aNode[idx]); |
| 20344 }else{ |
| 20345 printf("\n"); |
| 20346 } |
| 20347 } |
| 20348 static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ |
| 20349 int ii; |
| 20350 printf("=== %9s ", zPrefix); |
| 20351 if( pCur->bPoint ){ |
| 20352 tracePoint(&pCur->sPoint, -1, pCur); |
| 20353 } |
| 20354 for(ii=0; ii<pCur->nPoint; ii++){ |
| 20355 if( ii>0 || pCur->bPoint ) printf(" "); |
| 20356 tracePoint(&pCur->aPoint[ii], ii, pCur); |
| 20357 } |
| 20358 } |
| 20359 # define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) |
| 20360 #else |
| 20361 # define RTREE_QUEUE_TRACE(A,B) /* no-op */ |
| 20362 #endif |
| 20363 |
| 20364 /* Remove the search point with the lowest current score. |
| 20365 */ |
| 20366 static void rtreeSearchPointPop(RtreeCursor *p){ |
| 20367 int i, j, k, n; |
| 20368 i = 1 - p->bPoint; |
| 20369 assert( i==0 || i==1 ); |
| 20370 if( p->aNode[i] ){ |
| 20371 nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); |
| 20372 p->aNode[i] = 0; |
| 20373 } |
| 20374 if( p->bPoint ){ |
| 20375 p->anQueue[p->sPoint.iLevel]--; |
| 20376 p->bPoint = 0; |
| 20377 }else if( p->nPoint ){ |
| 20378 p->anQueue[p->aPoint[0].iLevel]--; |
| 20379 n = --p->nPoint; |
| 20380 p->aPoint[0] = p->aPoint[n]; |
| 20381 if( n<RTREE_CACHE_SZ-1 ){ |
| 20382 p->aNode[1] = p->aNode[n+1]; |
| 20383 p->aNode[n+1] = 0; |
| 20384 } |
| 20385 i = 0; |
| 20386 while( (j = i*2+1)<n ){ |
| 20387 k = j+1; |
| 20388 if( k<n && rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[j])<0 ){ |
| 20389 if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ |
| 20390 rtreeSearchPointSwap(p, i, k); |
| 20391 i = k; |
| 20392 }else{ |
| 20393 break; |
| 20394 } |
| 20395 }else{ |
| 20396 if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ |
| 20397 rtreeSearchPointSwap(p, i, j); |
| 20398 i = j; |
| 20399 }else{ |
| 20400 break; |
| 20401 } |
| 20402 } |
| 20403 } |
| 20404 } |
| 20405 } |
| 20406 |
| 20407 |
| 20408 /* |
| 20409 ** Continue the search on cursor pCur until the front of the queue |
| 20410 ** contains an entry suitable for returning as a result-set row, |
| 20411 ** or until the RtreeSearchPoint queue is empty, indicating that the |
| 20412 ** query has completed. |
| 20413 */ |
| 20414 static int rtreeStepToLeaf(RtreeCursor *pCur){ |
| 20415 RtreeSearchPoint *p; |
| 20416 Rtree *pRtree = RTREE_OF_CURSOR(pCur); |
| 20417 RtreeNode *pNode; |
| 20418 int eWithin; |
| 20419 int rc = SQLITE_OK; |
| 20420 int nCell; |
| 20421 int nConstraint = pCur->nConstraint; |
| 20422 int ii; |
| 20423 int eInt; |
| 20424 RtreeSearchPoint x; |
| 20425 |
| 20426 eInt = pRtree->eCoordType==RTREE_COORD_INT32; |
| 20427 while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ |
| 20428 pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); |
| 20429 if( rc ) return rc; |
| 20430 nCell = NCELL(pNode); |
| 20431 assert( nCell<200 ); |
| 20432 while( p->iCell<nCell ){ |
| 20433 sqlite3_rtree_dbl rScore = (sqlite3_rtree_dbl)-1; |
| 20434 u8 *pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell); |
| 20435 eWithin = FULLY_WITHIN; |
| 20436 for(ii=0; ii<nConstraint; ii++){ |
| 20437 RtreeConstraint *pConstraint = pCur->aConstraint + ii; |
| 20438 if( pConstraint->op>=RTREE_MATCH ){ |
| 20439 rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, |
| 20440 &rScore, &eWithin); |
| 20441 if( rc ) return rc; |
| 20442 }else if( p->iLevel==1 ){ |
| 20443 rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); |
| 20444 }else{ |
| 20445 rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); |
| 20446 } |
| 20447 if( eWithin==NOT_WITHIN ) break; |
| 20448 } |
| 20449 p->iCell++; |
| 20450 if( eWithin==NOT_WITHIN ) continue; |
| 20451 x.iLevel = p->iLevel - 1; |
| 20452 if( x.iLevel ){ |
| 20453 x.id = readInt64(pCellData); |
| 20454 x.iCell = 0; |
| 20455 }else{ |
| 20456 x.id = p->id; |
| 20457 x.iCell = p->iCell - 1; |
| 20458 } |
| 20459 if( p->iCell>=nCell ){ |
| 20460 RTREE_QUEUE_TRACE(pCur, "POP-S:"); |
| 20461 rtreeSearchPointPop(pCur); |
| 20462 } |
| 20463 if( rScore<RTREE_ZERO ) rScore = RTREE_ZERO; |
| 20464 p = rtreeSearchPointNew(pCur, rScore, x.iLevel); |
| 20465 if( p==0 ) return SQLITE_NOMEM; |
| 20466 p->eWithin = eWithin; |
| 20467 p->id = x.id; |
| 20468 p->iCell = x.iCell; |
| 20469 RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); |
| 20470 break; |
| 20471 } |
| 20472 if( p->iCell>=nCell ){ |
| 20473 RTREE_QUEUE_TRACE(pCur, "POP-Se:"); |
| 20474 rtreeSearchPointPop(pCur); |
| 20475 } |
| 20476 } |
| 20477 pCur->atEOF = p==0; |
| 20478 return SQLITE_OK; |
| 20479 } |
| 20480 |
| 20481 /* |
| 20482 ** Rtree virtual table module xNext method. |
| 20483 */ |
| 20484 static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ |
| 20485 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20486 int rc = SQLITE_OK; |
| 20487 |
| 20488 /* Move to the next entry that matches the configured constraints. */ |
| 20489 RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); |
| 20490 rtreeSearchPointPop(pCsr); |
| 20491 rc = rtreeStepToLeaf(pCsr); |
| 20492 return rc; |
| 20493 } |
| 20494 |
| 20495 /* |
| 20496 ** Rtree virtual table module xRowid method. |
| 20497 */ |
| 20498 static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ |
| 20499 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20500 RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); |
| 20501 int rc = SQLITE_OK; |
| 20502 RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); |
| 20503 if( rc==SQLITE_OK && p ){ |
| 20504 *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); |
| 20505 } |
| 20506 return rc; |
| 20507 } |
| 20508 |
| 20509 /* |
| 20510 ** Rtree virtual table module xColumn method. |
| 20511 */ |
| 20512 static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ |
| 20513 Rtree *pRtree = (Rtree *)cur->pVtab; |
| 20514 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 20515 RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); |
| 20516 RtreeCoord c; |
| 20517 int rc = SQLITE_OK; |
| 20518 RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); |
| 20519 |
| 20520 if( rc ) return rc; |
| 20521 if( p==0 ) return SQLITE_OK; |
| 20522 if( i==0 ){ |
| 20523 sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); |
| 20524 }else{ |
| 20525 if( rc ) return rc; |
| 20526 nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); |
| 20527 #ifndef SQLITE_RTREE_INT_ONLY |
| 20528 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 20529 sqlite3_result_double(ctx, c.f); |
| 20530 }else |
| 20531 #endif |
| 20532 { |
| 20533 assert( pRtree->eCoordType==RTREE_COORD_INT32 ); |
| 20534 sqlite3_result_int(ctx, c.i); |
| 20535 } |
| 20536 } |
| 20537 return SQLITE_OK; |
| 20538 } |
| 20539 |
| 20540 /* |
| 20541 ** Use nodeAcquire() to obtain the leaf node containing the record with |
| 20542 ** rowid iRowid. If successful, set *ppLeaf to point to the node and |
| 20543 ** return SQLITE_OK. If there is no such record in the table, set |
| 20544 ** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf |
| 20545 ** to zero and return an SQLite error code. |
| 20546 */ |
| 20547 static int findLeafNode( |
| 20548 Rtree *pRtree, /* RTree to search */ |
| 20549 i64 iRowid, /* The rowid searching for */ |
| 20550 RtreeNode **ppLeaf, /* Write the node here */ |
| 20551 sqlite3_int64 *piNode /* Write the node-id here */ |
| 20552 ){ |
| 20553 int rc; |
| 20554 *ppLeaf = 0; |
| 20555 sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); |
| 20556 if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ |
| 20557 i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); |
| 20558 if( piNode ) *piNode = iNode; |
| 20559 rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); |
| 20560 sqlite3_reset(pRtree->pReadRowid); |
| 20561 }else{ |
| 20562 rc = sqlite3_reset(pRtree->pReadRowid); |
| 20563 } |
| 20564 return rc; |
| 20565 } |
| 20566 |
| 20567 /* |
| 20568 ** This function is called to configure the RtreeConstraint object passed |
| 20569 ** as the second argument for a MATCH constraint. The value passed as the |
| 20570 ** first argument to this function is the right-hand operand to the MATCH |
| 20571 ** operator. |
| 20572 */ |
| 20573 static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ |
| 20574 RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ |
| 20575 sqlite3_rtree_query_info *pInfo; /* Callback information */ |
| 20576 int nBlob; /* Size of the geometry function blob */ |
| 20577 int nExpected; /* Expected size of the BLOB */ |
| 20578 |
| 20579 /* Check that value is actually a blob. */ |
| 20580 if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; |
| 20581 |
| 20582 /* Check that the blob is roughly the right size. */ |
| 20583 nBlob = sqlite3_value_bytes(pValue); |
| 20584 if( nBlob<(int)sizeof(RtreeMatchArg) ){ |
| 20585 return SQLITE_ERROR; |
| 20586 } |
| 20587 |
| 20588 pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); |
| 20589 if( !pInfo ) return SQLITE_NOMEM; |
| 20590 memset(pInfo, 0, sizeof(*pInfo)); |
| 20591 pBlob = (RtreeMatchArg*)&pInfo[1]; |
| 20592 |
| 20593 memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); |
| 20594 nExpected = (int)(sizeof(RtreeMatchArg) + |
| 20595 pBlob->nParam*sizeof(sqlite3_value*) + |
| 20596 (pBlob->nParam-1)*sizeof(RtreeDValue)); |
| 20597 if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ |
| 20598 sqlite3_free(pInfo); |
| 20599 return SQLITE_ERROR; |
| 20600 } |
| 20601 pInfo->pContext = pBlob->cb.pContext; |
| 20602 pInfo->nParam = pBlob->nParam; |
| 20603 pInfo->aParam = pBlob->aParam; |
| 20604 pInfo->apSqlParam = pBlob->apSqlParam; |
| 20605 |
| 20606 if( pBlob->cb.xGeom ){ |
| 20607 pCons->u.xGeom = pBlob->cb.xGeom; |
| 20608 }else{ |
| 20609 pCons->op = RTREE_QUERY; |
| 20610 pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; |
| 20611 } |
| 20612 pCons->pInfo = pInfo; |
| 20613 return SQLITE_OK; |
| 20614 } |
| 20615 |
| 20616 /* |
| 20617 ** Rtree virtual table module xFilter method. |
| 20618 */ |
| 20619 static int rtreeFilter( |
| 20620 sqlite3_vtab_cursor *pVtabCursor, |
| 20621 int idxNum, const char *idxStr, |
| 20622 int argc, sqlite3_value **argv |
| 20623 ){ |
| 20624 Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; |
| 20625 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20626 RtreeNode *pRoot = 0; |
| 20627 int ii; |
| 20628 int rc = SQLITE_OK; |
| 20629 int iCell = 0; |
| 20630 |
| 20631 rtreeReference(pRtree); |
| 20632 |
| 20633 /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ |
| 20634 freeCursorConstraints(pCsr); |
| 20635 sqlite3_free(pCsr->aPoint); |
| 20636 memset(pCsr, 0, sizeof(RtreeCursor)); |
| 20637 pCsr->base.pVtab = (sqlite3_vtab*)pRtree; |
| 20638 |
| 20639 pCsr->iStrategy = idxNum; |
| 20640 if( idxNum==1 ){ |
| 20641 /* Special case - lookup by rowid. */ |
| 20642 RtreeNode *pLeaf; /* Leaf on which the required cell resides */ |
| 20643 RtreeSearchPoint *p; /* Search point for the the leaf */ |
| 20644 i64 iRowid = sqlite3_value_int64(argv[0]); |
| 20645 i64 iNode = 0; |
| 20646 rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); |
| 20647 if( rc==SQLITE_OK && pLeaf!=0 ){ |
| 20648 p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); |
| 20649 assert( p!=0 ); /* Always returns pCsr->sPoint */ |
| 20650 pCsr->aNode[0] = pLeaf; |
| 20651 p->id = iNode; |
| 20652 p->eWithin = PARTLY_WITHIN; |
| 20653 rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); |
| 20654 p->iCell = iCell; |
| 20655 RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); |
| 20656 }else{ |
| 20657 pCsr->atEOF = 1; |
| 20658 } |
| 20659 }else{ |
| 20660 /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array |
| 20661 ** with the configured constraints. |
| 20662 */ |
| 20663 rc = nodeAcquire(pRtree, 1, 0, &pRoot); |
| 20664 if( rc==SQLITE_OK && argc>0 ){ |
| 20665 pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); |
| 20666 pCsr->nConstraint = argc; |
| 20667 if( !pCsr->aConstraint ){ |
| 20668 rc = SQLITE_NOMEM; |
| 20669 }else{ |
| 20670 memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); |
| 20671 memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); |
| 20672 assert( (idxStr==0 && argc==0) |
| 20673 || (idxStr && (int)strlen(idxStr)==argc*2) ); |
| 20674 for(ii=0; ii<argc; ii++){ |
| 20675 RtreeConstraint *p = &pCsr->aConstraint[ii]; |
| 20676 p->op = idxStr[ii*2]; |
| 20677 p->iCoord = idxStr[ii*2+1]-'0'; |
| 20678 if( p->op>=RTREE_MATCH ){ |
| 20679 /* A MATCH operator. The right-hand-side must be a blob that |
| 20680 ** can be cast into an RtreeMatchArg object. One created using |
| 20681 ** an sqlite3_rtree_geometry_callback() SQL user function. |
| 20682 */ |
| 20683 rc = deserializeGeometry(argv[ii], p); |
| 20684 if( rc!=SQLITE_OK ){ |
| 20685 break; |
| 20686 } |
| 20687 p->pInfo->nCoord = pRtree->nDim*2; |
| 20688 p->pInfo->anQueue = pCsr->anQueue; |
| 20689 p->pInfo->mxLevel = pRtree->iDepth + 1; |
| 20690 }else{ |
| 20691 #ifdef SQLITE_RTREE_INT_ONLY |
| 20692 p->u.rValue = sqlite3_value_int64(argv[ii]); |
| 20693 #else |
| 20694 p->u.rValue = sqlite3_value_double(argv[ii]); |
| 20695 #endif |
| 20696 } |
| 20697 } |
| 20698 } |
| 20699 } |
| 20700 if( rc==SQLITE_OK ){ |
| 20701 RtreeSearchPoint *pNew; |
| 20702 pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1); |
| 20703 if( pNew==0 ) return SQLITE_NOMEM; |
| 20704 pNew->id = 1; |
| 20705 pNew->iCell = 0; |
| 20706 pNew->eWithin = PARTLY_WITHIN; |
| 20707 assert( pCsr->bPoint==1 ); |
| 20708 pCsr->aNode[0] = pRoot; |
| 20709 pRoot = 0; |
| 20710 RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); |
| 20711 rc = rtreeStepToLeaf(pCsr); |
| 20712 } |
| 20713 } |
| 20714 |
| 20715 nodeRelease(pRtree, pRoot); |
| 20716 rtreeRelease(pRtree); |
| 20717 return rc; |
| 20718 } |
| 20719 |
| 20720 /* |
| 20721 ** Set the pIdxInfo->estimatedRows variable to nRow. Unless this |
| 20722 ** extension is currently being used by a version of SQLite too old to |
| 20723 ** support estimatedRows. In that case this function is a no-op. |
| 20724 */ |
| 20725 static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ |
| 20726 #if SQLITE_VERSION_NUMBER>=3008002 |
| 20727 if( sqlite3_libversion_number()>=3008002 ){ |
| 20728 pIdxInfo->estimatedRows = nRow; |
| 20729 } |
| 20730 #endif |
| 20731 } |
| 20732 |
| 20733 /* |
| 20734 ** Rtree virtual table module xBestIndex method. There are three |
| 20735 ** table scan strategies to choose from (in order from most to |
| 20736 ** least desirable): |
| 20737 ** |
| 20738 ** idxNum idxStr Strategy |
| 20739 ** ------------------------------------------------ |
| 20740 ** 1 Unused Direct lookup by rowid. |
| 20741 ** 2 See below R-tree query or full-table scan. |
| 20742 ** ------------------------------------------------ |
| 20743 ** |
| 20744 ** If strategy 1 is used, then idxStr is not meaningful. If strategy |
| 20745 ** 2 is used, idxStr is formatted to contain 2 bytes for each |
| 20746 ** constraint used. The first two bytes of idxStr correspond to |
| 20747 ** the constraint in sqlite3_index_info.aConstraintUsage[] with |
| 20748 ** (argvIndex==1) etc. |
| 20749 ** |
| 20750 ** The first of each pair of bytes in idxStr identifies the constraint |
| 20751 ** operator as follows: |
| 20752 ** |
| 20753 ** Operator Byte Value |
| 20754 ** ---------------------- |
| 20755 ** = 0x41 ('A') |
| 20756 ** <= 0x42 ('B') |
| 20757 ** < 0x43 ('C') |
| 20758 ** >= 0x44 ('D') |
| 20759 ** > 0x45 ('E') |
| 20760 ** MATCH 0x46 ('F') |
| 20761 ** ---------------------- |
| 20762 ** |
| 20763 ** The second of each pair of bytes identifies the coordinate column |
| 20764 ** to which the constraint applies. The leftmost coordinate column |
| 20765 ** is 'a', the second from the left 'b' etc. |
| 20766 */ |
| 20767 static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ |
| 20768 Rtree *pRtree = (Rtree*)tab; |
| 20769 int rc = SQLITE_OK; |
| 20770 int ii; |
| 20771 int bMatch = 0; /* True if there exists a MATCH constraint */ |
| 20772 i64 nRow; /* Estimated rows returned by this scan */ |
| 20773 |
| 20774 int iIdx = 0; |
| 20775 char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; |
| 20776 memset(zIdxStr, 0, sizeof(zIdxStr)); |
| 20777 |
| 20778 /* Check if there exists a MATCH constraint - even an unusable one. If there |
| 20779 ** is, do not consider the lookup-by-rowid plan as using such a plan would |
| 20780 ** require the VDBE to evaluate the MATCH constraint, which is not currently |
| 20781 ** possible. */ |
| 20782 for(ii=0; ii<pIdxInfo->nConstraint; ii++){ |
| 20783 if( pIdxInfo->aConstraint[ii].op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |
| 20784 bMatch = 1; |
| 20785 } |
| 20786 } |
| 20787 |
| 20788 assert( pIdxInfo->idxStr==0 ); |
| 20789 for(ii=0; ii<pIdxInfo->nConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ |
| 20790 struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; |
| 20791 |
| 20792 if( bMatch==0 && p->usable |
| 20793 && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ |
| 20794 ){ |
| 20795 /* We have an equality constraint on the rowid. Use strategy 1. */ |
| 20796 int jj; |
| 20797 for(jj=0; jj<ii; jj++){ |
| 20798 pIdxInfo->aConstraintUsage[jj].argvIndex = 0; |
| 20799 pIdxInfo->aConstraintUsage[jj].omit = 0; |
| 20800 } |
| 20801 pIdxInfo->idxNum = 1; |
| 20802 pIdxInfo->aConstraintUsage[ii].argvIndex = 1; |
| 20803 pIdxInfo->aConstraintUsage[jj].omit = 1; |
| 20804 |
| 20805 /* This strategy involves a two rowid lookups on an B-Tree structures |
| 20806 ** and then a linear search of an R-Tree node. This should be |
| 20807 ** considered almost as quick as a direct rowid lookup (for which |
| 20808 ** sqlite uses an internal cost of 0.0). It is expected to return |
| 20809 ** a single row. |
| 20810 */ |
| 20811 pIdxInfo->estimatedCost = 30.0; |
| 20812 setEstimatedRows(pIdxInfo, 1); |
| 20813 return SQLITE_OK; |
| 20814 } |
| 20815 |
| 20816 if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ |
| 20817 u8 op; |
| 20818 switch( p->op ){ |
| 20819 case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; |
| 20820 case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; |
| 20821 case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; |
| 20822 case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; |
| 20823 case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; |
| 20824 default: |
| 20825 assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); |
| 20826 op = RTREE_MATCH; |
| 20827 break; |
| 20828 } |
| 20829 zIdxStr[iIdx++] = op; |
| 20830 zIdxStr[iIdx++] = p->iColumn - 1 + '0'; |
| 20831 pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); |
| 20832 pIdxInfo->aConstraintUsage[ii].omit = 1; |
| 20833 } |
| 20834 } |
| 20835 |
| 20836 pIdxInfo->idxNum = 2; |
| 20837 pIdxInfo->needToFreeIdxStr = 1; |
| 20838 if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ |
| 20839 return SQLITE_NOMEM; |
| 20840 } |
| 20841 |
| 20842 nRow = pRtree->nRowEst / (iIdx + 1); |
| 20843 pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; |
| 20844 setEstimatedRows(pIdxInfo, nRow); |
| 20845 |
| 20846 return rc; |
| 20847 } |
| 20848 |
| 20849 /* |
| 20850 ** Return the N-dimensional volumn of the cell stored in *p. |
| 20851 */ |
| 20852 static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ |
| 20853 RtreeDValue area = (RtreeDValue)1; |
| 20854 int ii; |
| 20855 for(ii=0; ii<(pRtree->nDim*2); ii+=2){ |
| 20856 area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]))); |
| 20857 } |
| 20858 return area; |
| 20859 } |
| 20860 |
| 20861 /* |
| 20862 ** Return the margin length of cell p. The margin length is the sum |
| 20863 ** of the objects size in each dimension. |
| 20864 */ |
| 20865 static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ |
| 20866 RtreeDValue margin = (RtreeDValue)0; |
| 20867 int ii; |
| 20868 for(ii=0; ii<(pRtree->nDim*2); ii+=2){ |
| 20869 margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); |
| 20870 } |
| 20871 return margin; |
| 20872 } |
| 20873 |
| 20874 /* |
| 20875 ** Store the union of cells p1 and p2 in p1. |
| 20876 */ |
| 20877 static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ |
| 20878 int ii; |
| 20879 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 20880 for(ii=0; ii<(pRtree->nDim*2); ii+=2){ |
| 20881 p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); |
| 20882 p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); |
| 20883 } |
| 20884 }else{ |
| 20885 for(ii=0; ii<(pRtree->nDim*2); ii+=2){ |
| 20886 p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); |
| 20887 p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); |
| 20888 } |
| 20889 } |
| 20890 } |
| 20891 |
| 20892 /* |
| 20893 ** Return true if the area covered by p2 is a subset of the area covered |
| 20894 ** by p1. False otherwise. |
| 20895 */ |
| 20896 static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ |
| 20897 int ii; |
| 20898 int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); |
| 20899 for(ii=0; ii<(pRtree->nDim*2); ii+=2){ |
| 20900 RtreeCoord *a1 = &p1->aCoord[ii]; |
| 20901 RtreeCoord *a2 = &p2->aCoord[ii]; |
| 20902 if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f)) |
| 20903 || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i)) |
| 20904 ){ |
| 20905 return 0; |
| 20906 } |
| 20907 } |
| 20908 return 1; |
| 20909 } |
| 20910 |
| 20911 /* |
| 20912 ** Return the amount cell p would grow by if it were unioned with pCell. |
| 20913 */ |
| 20914 static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ |
| 20915 RtreeDValue area; |
| 20916 RtreeCell cell; |
| 20917 memcpy(&cell, p, sizeof(RtreeCell)); |
| 20918 area = cellArea(pRtree, &cell); |
| 20919 cellUnion(pRtree, &cell, pCell); |
| 20920 return (cellArea(pRtree, &cell)-area); |
| 20921 } |
| 20922 |
| 20923 static RtreeDValue cellOverlap( |
| 20924 Rtree *pRtree, |
| 20925 RtreeCell *p, |
| 20926 RtreeCell *aCell, |
| 20927 int nCell |
| 20928 ){ |
| 20929 int ii; |
| 20930 RtreeDValue overlap = RTREE_ZERO; |
| 20931 for(ii=0; ii<nCell; ii++){ |
| 20932 int jj; |
| 20933 RtreeDValue o = (RtreeDValue)1; |
| 20934 for(jj=0; jj<(pRtree->nDim*2); jj+=2){ |
| 20935 RtreeDValue x1, x2; |
| 20936 x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); |
| 20937 x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); |
| 20938 if( x2<x1 ){ |
| 20939 o = (RtreeDValue)0; |
| 20940 break; |
| 20941 }else{ |
| 20942 o = o * (x2-x1); |
| 20943 } |
| 20944 } |
| 20945 overlap += o; |
| 20946 } |
| 20947 return overlap; |
| 20948 } |
| 20949 |
| 20950 |
| 20951 /* |
| 20952 ** This function implements the ChooseLeaf algorithm from Gutman[84]. |
| 20953 ** ChooseSubTree in r*tree terminology. |
| 20954 */ |
| 20955 static int ChooseLeaf( |
| 20956 Rtree *pRtree, /* Rtree table */ |
| 20957 RtreeCell *pCell, /* Cell to insert into rtree */ |
| 20958 int iHeight, /* Height of sub-tree rooted at pCell */ |
| 20959 RtreeNode **ppLeaf /* OUT: Selected leaf page */ |
| 20960 ){ |
| 20961 int rc; |
| 20962 int ii; |
| 20963 RtreeNode *pNode; |
| 20964 rc = nodeAcquire(pRtree, 1, 0, &pNode); |
| 20965 |
| 20966 for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){ |
| 20967 int iCell; |
| 20968 sqlite3_int64 iBest = 0; |
| 20969 |
| 20970 RtreeDValue fMinGrowth = RTREE_ZERO; |
| 20971 RtreeDValue fMinArea = RTREE_ZERO; |
| 20972 |
| 20973 int nCell = NCELL(pNode); |
| 20974 RtreeCell cell; |
| 20975 RtreeNode *pChild; |
| 20976 |
| 20977 RtreeCell *aCell = 0; |
| 20978 |
| 20979 /* Select the child node which will be enlarged the least if pCell |
| 20980 ** is inserted into it. Resolve ties by choosing the entry with |
| 20981 ** the smallest area. |
| 20982 */ |
| 20983 for(iCell=0; iCell<nCell; iCell++){ |
| 20984 int bBest = 0; |
| 20985 RtreeDValue growth; |
| 20986 RtreeDValue area; |
| 20987 nodeGetCell(pRtree, pNode, iCell, &cell); |
| 20988 growth = cellGrowth(pRtree, &cell, pCell); |
| 20989 area = cellArea(pRtree, &cell); |
| 20990 if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){ |
| 20991 bBest = 1; |
| 20992 } |
| 20993 if( bBest ){ |
| 20994 fMinGrowth = growth; |
| 20995 fMinArea = area; |
| 20996 iBest = cell.iRowid; |
| 20997 } |
| 20998 } |
| 20999 |
| 21000 sqlite3_free(aCell); |
| 21001 rc = nodeAcquire(pRtree, iBest, pNode, &pChild); |
| 21002 nodeRelease(pRtree, pNode); |
| 21003 pNode = pChild; |
| 21004 } |
| 21005 |
| 21006 *ppLeaf = pNode; |
| 21007 return rc; |
| 21008 } |
| 21009 |
| 21010 /* |
| 21011 ** A cell with the same content as pCell has just been inserted into |
| 21012 ** the node pNode. This function updates the bounding box cells in |
| 21013 ** all ancestor elements. |
| 21014 */ |
| 21015 static int AdjustTree( |
| 21016 Rtree *pRtree, /* Rtree table */ |
| 21017 RtreeNode *pNode, /* Adjust ancestry of this node. */ |
| 21018 RtreeCell *pCell /* This cell was just inserted */ |
| 21019 ){ |
| 21020 RtreeNode *p = pNode; |
| 21021 while( p->pParent ){ |
| 21022 RtreeNode *pParent = p->pParent; |
| 21023 RtreeCell cell; |
| 21024 int iCell; |
| 21025 |
| 21026 if( nodeParentIndex(pRtree, p, &iCell) ){ |
| 21027 return SQLITE_CORRUPT_VTAB; |
| 21028 } |
| 21029 |
| 21030 nodeGetCell(pRtree, pParent, iCell, &cell); |
| 21031 if( !cellContains(pRtree, &cell, pCell) ){ |
| 21032 cellUnion(pRtree, &cell, pCell); |
| 21033 nodeOverwriteCell(pRtree, pParent, &cell, iCell); |
| 21034 } |
| 21035 |
| 21036 p = pParent; |
| 21037 } |
| 21038 return SQLITE_OK; |
| 21039 } |
| 21040 |
| 21041 /* |
| 21042 ** Write mapping (iRowid->iNode) to the <rtree>_rowid table. |
| 21043 */ |
| 21044 static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ |
| 21045 sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); |
| 21046 sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); |
| 21047 sqlite3_step(pRtree->pWriteRowid); |
| 21048 return sqlite3_reset(pRtree->pWriteRowid); |
| 21049 } |
| 21050 |
| 21051 /* |
| 21052 ** Write mapping (iNode->iPar) to the <rtree>_parent table. |
| 21053 */ |
| 21054 static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ |
| 21055 sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); |
| 21056 sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); |
| 21057 sqlite3_step(pRtree->pWriteParent); |
| 21058 return sqlite3_reset(pRtree->pWriteParent); |
| 21059 } |
| 21060 |
| 21061 static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); |
| 21062 |
| 21063 |
| 21064 /* |
| 21065 ** Arguments aIdx, aDistance and aSpare all point to arrays of size |
| 21066 ** nIdx. The aIdx array contains the set of integers from 0 to |
| 21067 ** (nIdx-1) in no particular order. This function sorts the values |
| 21068 ** in aIdx according to the indexed values in aDistance. For |
| 21069 ** example, assuming the inputs: |
| 21070 ** |
| 21071 ** aIdx = { 0, 1, 2, 3 } |
| 21072 ** aDistance = { 5.0, 2.0, 7.0, 6.0 } |
| 21073 ** |
| 21074 ** this function sets the aIdx array to contain: |
| 21075 ** |
| 21076 ** aIdx = { 0, 1, 2, 3 } |
| 21077 ** |
| 21078 ** The aSpare array is used as temporary working space by the |
| 21079 ** sorting algorithm. |
| 21080 */ |
| 21081 static void SortByDistance( |
| 21082 int *aIdx, |
| 21083 int nIdx, |
| 21084 RtreeDValue *aDistance, |
| 21085 int *aSpare |
| 21086 ){ |
| 21087 if( nIdx>1 ){ |
| 21088 int iLeft = 0; |
| 21089 int iRight = 0; |
| 21090 |
| 21091 int nLeft = nIdx/2; |
| 21092 int nRight = nIdx-nLeft; |
| 21093 int *aLeft = aIdx; |
| 21094 int *aRight = &aIdx[nLeft]; |
| 21095 |
| 21096 SortByDistance(aLeft, nLeft, aDistance, aSpare); |
| 21097 SortByDistance(aRight, nRight, aDistance, aSpare); |
| 21098 |
| 21099 memcpy(aSpare, aLeft, sizeof(int)*nLeft); |
| 21100 aLeft = aSpare; |
| 21101 |
| 21102 while( iLeft<nLeft || iRight<nRight ){ |
| 21103 if( iLeft==nLeft ){ |
| 21104 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21105 iRight++; |
| 21106 }else if( iRight==nRight ){ |
| 21107 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21108 iLeft++; |
| 21109 }else{ |
| 21110 RtreeDValue fLeft = aDistance[aLeft[iLeft]]; |
| 21111 RtreeDValue fRight = aDistance[aRight[iRight]]; |
| 21112 if( fLeft<fRight ){ |
| 21113 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21114 iLeft++; |
| 21115 }else{ |
| 21116 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21117 iRight++; |
| 21118 } |
| 21119 } |
| 21120 } |
| 21121 |
| 21122 #if 0 |
| 21123 /* Check that the sort worked */ |
| 21124 { |
| 21125 int jj; |
| 21126 for(jj=1; jj<nIdx; jj++){ |
| 21127 RtreeDValue left = aDistance[aIdx[jj-1]]; |
| 21128 RtreeDValue right = aDistance[aIdx[jj]]; |
| 21129 assert( left<=right ); |
| 21130 } |
| 21131 } |
| 21132 #endif |
| 21133 } |
| 21134 } |
| 21135 |
| 21136 /* |
| 21137 ** Arguments aIdx, aCell and aSpare all point to arrays of size |
| 21138 ** nIdx. The aIdx array contains the set of integers from 0 to |
| 21139 ** (nIdx-1) in no particular order. This function sorts the values |
| 21140 ** in aIdx according to dimension iDim of the cells in aCell. The |
| 21141 ** minimum value of dimension iDim is considered first, the |
| 21142 ** maximum used to break ties. |
| 21143 ** |
| 21144 ** The aSpare array is used as temporary working space by the |
| 21145 ** sorting algorithm. |
| 21146 */ |
| 21147 static void SortByDimension( |
| 21148 Rtree *pRtree, |
| 21149 int *aIdx, |
| 21150 int nIdx, |
| 21151 int iDim, |
| 21152 RtreeCell *aCell, |
| 21153 int *aSpare |
| 21154 ){ |
| 21155 if( nIdx>1 ){ |
| 21156 |
| 21157 int iLeft = 0; |
| 21158 int iRight = 0; |
| 21159 |
| 21160 int nLeft = nIdx/2; |
| 21161 int nRight = nIdx-nLeft; |
| 21162 int *aLeft = aIdx; |
| 21163 int *aRight = &aIdx[nLeft]; |
| 21164 |
| 21165 SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); |
| 21166 SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); |
| 21167 |
| 21168 memcpy(aSpare, aLeft, sizeof(int)*nLeft); |
| 21169 aLeft = aSpare; |
| 21170 while( iLeft<nLeft || iRight<nRight ){ |
| 21171 RtreeDValue xleft1 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2]); |
| 21172 RtreeDValue xleft2 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2+1]); |
| 21173 RtreeDValue xright1 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2]); |
| 21174 RtreeDValue xright2 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2+1]); |
| 21175 if( (iLeft!=nLeft) && ((iRight==nRight) |
| 21176 || (xleft1<xright1) |
| 21177 || (xleft1==xright1 && xleft2<xright2) |
| 21178 )){ |
| 21179 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21180 iLeft++; |
| 21181 }else{ |
| 21182 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21183 iRight++; |
| 21184 } |
| 21185 } |
| 21186 |
| 21187 #if 0 |
| 21188 /* Check that the sort worked */ |
| 21189 { |
| 21190 int jj; |
| 21191 for(jj=1; jj<nIdx; jj++){ |
| 21192 RtreeDValue xleft1 = aCell[aIdx[jj-1]].aCoord[iDim*2]; |
| 21193 RtreeDValue xleft2 = aCell[aIdx[jj-1]].aCoord[iDim*2+1]; |
| 21194 RtreeDValue xright1 = aCell[aIdx[jj]].aCoord[iDim*2]; |
| 21195 RtreeDValue xright2 = aCell[aIdx[jj]].aCoord[iDim*2+1]; |
| 21196 assert( xleft1<=xright1 && (xleft1<xright1 || xleft2<=xright2) ); |
| 21197 } |
| 21198 } |
| 21199 #endif |
| 21200 } |
| 21201 } |
| 21202 |
| 21203 /* |
| 21204 ** Implementation of the R*-tree variant of SplitNode from Beckman[1990]. |
| 21205 */ |
| 21206 static int splitNodeStartree( |
| 21207 Rtree *pRtree, |
| 21208 RtreeCell *aCell, |
| 21209 int nCell, |
| 21210 RtreeNode *pLeft, |
| 21211 RtreeNode *pRight, |
| 21212 RtreeCell *pBboxLeft, |
| 21213 RtreeCell *pBboxRight |
| 21214 ){ |
| 21215 int **aaSorted; |
| 21216 int *aSpare; |
| 21217 int ii; |
| 21218 |
| 21219 int iBestDim = 0; |
| 21220 int iBestSplit = 0; |
| 21221 RtreeDValue fBestMargin = RTREE_ZERO; |
| 21222 |
| 21223 int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); |
| 21224 |
| 21225 aaSorted = (int **)sqlite3_malloc(nByte); |
| 21226 if( !aaSorted ){ |
| 21227 return SQLITE_NOMEM; |
| 21228 } |
| 21229 |
| 21230 aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; |
| 21231 memset(aaSorted, 0, nByte); |
| 21232 for(ii=0; ii<pRtree->nDim; ii++){ |
| 21233 int jj; |
| 21234 aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; |
| 21235 for(jj=0; jj<nCell; jj++){ |
| 21236 aaSorted[ii][jj] = jj; |
| 21237 } |
| 21238 SortByDimension(pRtree, aaSorted[ii], nCell, ii, aCell, aSpare); |
| 21239 } |
| 21240 |
| 21241 for(ii=0; ii<pRtree->nDim; ii++){ |
| 21242 RtreeDValue margin = RTREE_ZERO; |
| 21243 RtreeDValue fBestOverlap = RTREE_ZERO; |
| 21244 RtreeDValue fBestArea = RTREE_ZERO; |
| 21245 int iBestLeft = 0; |
| 21246 int nLeft; |
| 21247 |
| 21248 for( |
| 21249 nLeft=RTREE_MINCELLS(pRtree); |
| 21250 nLeft<=(nCell-RTREE_MINCELLS(pRtree)); |
| 21251 nLeft++ |
| 21252 ){ |
| 21253 RtreeCell left; |
| 21254 RtreeCell right; |
| 21255 int kk; |
| 21256 RtreeDValue overlap; |
| 21257 RtreeDValue area; |
| 21258 |
| 21259 memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); |
| 21260 memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); |
| 21261 for(kk=1; kk<(nCell-1); kk++){ |
| 21262 if( kk<nLeft ){ |
| 21263 cellUnion(pRtree, &left, &aCell[aaSorted[ii][kk]]); |
| 21264 }else{ |
| 21265 cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]); |
| 21266 } |
| 21267 } |
| 21268 margin += cellMargin(pRtree, &left); |
| 21269 margin += cellMargin(pRtree, &right); |
| 21270 overlap = cellOverlap(pRtree, &left, &right, 1); |
| 21271 area = cellArea(pRtree, &left) + cellArea(pRtree, &right); |
| 21272 if( (nLeft==RTREE_MINCELLS(pRtree)) |
| 21273 || (overlap<fBestOverlap) |
| 21274 || (overlap==fBestOverlap && area<fBestArea) |
| 21275 ){ |
| 21276 iBestLeft = nLeft; |
| 21277 fBestOverlap = overlap; |
| 21278 fBestArea = area; |
| 21279 } |
| 21280 } |
| 21281 |
| 21282 if( ii==0 || margin<fBestMargin ){ |
| 21283 iBestDim = ii; |
| 21284 fBestMargin = margin; |
| 21285 iBestSplit = iBestLeft; |
| 21286 } |
| 21287 } |
| 21288 |
| 21289 memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell)); |
| 21290 memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell)); |
| 21291 for(ii=0; ii<nCell; ii++){ |
| 21292 RtreeNode *pTarget = (ii<iBestSplit)?pLeft:pRight; |
| 21293 RtreeCell *pBbox = (ii<iBestSplit)?pBboxLeft:pBboxRight; |
| 21294 RtreeCell *pCell = &aCell[aaSorted[iBestDim][ii]]; |
| 21295 nodeInsertCell(pRtree, pTarget, pCell); |
| 21296 cellUnion(pRtree, pBbox, pCell); |
| 21297 } |
| 21298 |
| 21299 sqlite3_free(aaSorted); |
| 21300 return SQLITE_OK; |
| 21301 } |
| 21302 |
| 21303 |
| 21304 static int updateMapping( |
| 21305 Rtree *pRtree, |
| 21306 i64 iRowid, |
| 21307 RtreeNode *pNode, |
| 21308 int iHeight |
| 21309 ){ |
| 21310 int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); |
| 21311 xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); |
| 21312 if( iHeight>0 ){ |
| 21313 RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); |
| 21314 if( pChild ){ |
| 21315 nodeRelease(pRtree, pChild->pParent); |
| 21316 nodeReference(pNode); |
| 21317 pChild->pParent = pNode; |
| 21318 } |
| 21319 } |
| 21320 return xSetMapping(pRtree, iRowid, pNode->iNode); |
| 21321 } |
| 21322 |
| 21323 static int SplitNode( |
| 21324 Rtree *pRtree, |
| 21325 RtreeNode *pNode, |
| 21326 RtreeCell *pCell, |
| 21327 int iHeight |
| 21328 ){ |
| 21329 int i; |
| 21330 int newCellIsRight = 0; |
| 21331 |
| 21332 int rc = SQLITE_OK; |
| 21333 int nCell = NCELL(pNode); |
| 21334 RtreeCell *aCell; |
| 21335 int *aiUsed; |
| 21336 |
| 21337 RtreeNode *pLeft = 0; |
| 21338 RtreeNode *pRight = 0; |
| 21339 |
| 21340 RtreeCell leftbbox; |
| 21341 RtreeCell rightbbox; |
| 21342 |
| 21343 /* Allocate an array and populate it with a copy of pCell and |
| 21344 ** all cells from node pLeft. Then zero the original node. |
| 21345 */ |
| 21346 aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); |
| 21347 if( !aCell ){ |
| 21348 rc = SQLITE_NOMEM; |
| 21349 goto splitnode_out; |
| 21350 } |
| 21351 aiUsed = (int *)&aCell[nCell+1]; |
| 21352 memset(aiUsed, 0, sizeof(int)*(nCell+1)); |
| 21353 for(i=0; i<nCell; i++){ |
| 21354 nodeGetCell(pRtree, pNode, i, &aCell[i]); |
| 21355 } |
| 21356 nodeZero(pRtree, pNode); |
| 21357 memcpy(&aCell[nCell], pCell, sizeof(RtreeCell)); |
| 21358 nCell++; |
| 21359 |
| 21360 if( pNode->iNode==1 ){ |
| 21361 pRight = nodeNew(pRtree, pNode); |
| 21362 pLeft = nodeNew(pRtree, pNode); |
| 21363 pRtree->iDepth++; |
| 21364 pNode->isDirty = 1; |
| 21365 writeInt16(pNode->zData, pRtree->iDepth); |
| 21366 }else{ |
| 21367 pLeft = pNode; |
| 21368 pRight = nodeNew(pRtree, pLeft->pParent); |
| 21369 nodeReference(pLeft); |
| 21370 } |
| 21371 |
| 21372 if( !pLeft || !pRight ){ |
| 21373 rc = SQLITE_NOMEM; |
| 21374 goto splitnode_out; |
| 21375 } |
| 21376 |
| 21377 memset(pLeft->zData, 0, pRtree->iNodeSize); |
| 21378 memset(pRight->zData, 0, pRtree->iNodeSize); |
| 21379 |
| 21380 rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, |
| 21381 &leftbbox, &rightbbox); |
| 21382 if( rc!=SQLITE_OK ){ |
| 21383 goto splitnode_out; |
| 21384 } |
| 21385 |
| 21386 /* Ensure both child nodes have node numbers assigned to them by calling |
| 21387 ** nodeWrite(). Node pRight always needs a node number, as it was created |
| 21388 ** by nodeNew() above. But node pLeft sometimes already has a node number. |
| 21389 ** In this case avoid the all to nodeWrite(). |
| 21390 */ |
| 21391 if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) |
| 21392 || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) |
| 21393 ){ |
| 21394 goto splitnode_out; |
| 21395 } |
| 21396 |
| 21397 rightbbox.iRowid = pRight->iNode; |
| 21398 leftbbox.iRowid = pLeft->iNode; |
| 21399 |
| 21400 if( pNode->iNode==1 ){ |
| 21401 rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); |
| 21402 if( rc!=SQLITE_OK ){ |
| 21403 goto splitnode_out; |
| 21404 } |
| 21405 }else{ |
| 21406 RtreeNode *pParent = pLeft->pParent; |
| 21407 int iCell; |
| 21408 rc = nodeParentIndex(pRtree, pLeft, &iCell); |
| 21409 if( rc==SQLITE_OK ){ |
| 21410 nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); |
| 21411 rc = AdjustTree(pRtree, pParent, &leftbbox); |
| 21412 } |
| 21413 if( rc!=SQLITE_OK ){ |
| 21414 goto splitnode_out; |
| 21415 } |
| 21416 } |
| 21417 if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ |
| 21418 goto splitnode_out; |
| 21419 } |
| 21420 |
| 21421 for(i=0; i<NCELL(pRight); i++){ |
| 21422 i64 iRowid = nodeGetRowid(pRtree, pRight, i); |
| 21423 rc = updateMapping(pRtree, iRowid, pRight, iHeight); |
| 21424 if( iRowid==pCell->iRowid ){ |
| 21425 newCellIsRight = 1; |
| 21426 } |
| 21427 if( rc!=SQLITE_OK ){ |
| 21428 goto splitnode_out; |
| 21429 } |
| 21430 } |
| 21431 if( pNode->iNode==1 ){ |
| 21432 for(i=0; i<NCELL(pLeft); i++){ |
| 21433 i64 iRowid = nodeGetRowid(pRtree, pLeft, i); |
| 21434 rc = updateMapping(pRtree, iRowid, pLeft, iHeight); |
| 21435 if( rc!=SQLITE_OK ){ |
| 21436 goto splitnode_out; |
| 21437 } |
| 21438 } |
| 21439 }else if( newCellIsRight==0 ){ |
| 21440 rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight); |
| 21441 } |
| 21442 |
| 21443 if( rc==SQLITE_OK ){ |
| 21444 rc = nodeRelease(pRtree, pRight); |
| 21445 pRight = 0; |
| 21446 } |
| 21447 if( rc==SQLITE_OK ){ |
| 21448 rc = nodeRelease(pRtree, pLeft); |
| 21449 pLeft = 0; |
| 21450 } |
| 21451 |
| 21452 splitnode_out: |
| 21453 nodeRelease(pRtree, pRight); |
| 21454 nodeRelease(pRtree, pLeft); |
| 21455 sqlite3_free(aCell); |
| 21456 return rc; |
| 21457 } |
| 21458 |
| 21459 /* |
| 21460 ** If node pLeaf is not the root of the r-tree and its pParent pointer is |
| 21461 ** still NULL, load all ancestor nodes of pLeaf into memory and populate |
| 21462 ** the pLeaf->pParent chain all the way up to the root node. |
| 21463 ** |
| 21464 ** This operation is required when a row is deleted (or updated - an update |
| 21465 ** is implemented as a delete followed by an insert). SQLite provides the |
| 21466 ** rowid of the row to delete, which can be used to find the leaf on which |
| 21467 ** the entry resides (argument pLeaf). Once the leaf is located, this |
| 21468 ** function is called to determine its ancestry. |
| 21469 */ |
| 21470 static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ |
| 21471 int rc = SQLITE_OK; |
| 21472 RtreeNode *pChild = pLeaf; |
| 21473 while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ |
| 21474 int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ |
| 21475 sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); |
| 21476 rc = sqlite3_step(pRtree->pReadParent); |
| 21477 if( rc==SQLITE_ROW ){ |
| 21478 RtreeNode *pTest; /* Used to test for reference loops */ |
| 21479 i64 iNode; /* Node number of parent node */ |
| 21480 |
| 21481 /* Before setting pChild->pParent, test that we are not creating a |
| 21482 ** loop of references (as we would if, say, pChild==pParent). We don't |
| 21483 ** want to do this as it leads to a memory leak when trying to delete |
| 21484 ** the referenced counted node structures. |
| 21485 */ |
| 21486 iNode = sqlite3_column_int64(pRtree->pReadParent, 0); |
| 21487 for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); |
| 21488 if( !pTest ){ |
| 21489 rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); |
| 21490 } |
| 21491 } |
| 21492 rc = sqlite3_reset(pRtree->pReadParent); |
| 21493 if( rc==SQLITE_OK ) rc = rc2; |
| 21494 if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; |
| 21495 pChild = pChild->pParent; |
| 21496 } |
| 21497 return rc; |
| 21498 } |
| 21499 |
| 21500 static int deleteCell(Rtree *, RtreeNode *, int, int); |
| 21501 |
| 21502 static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ |
| 21503 int rc; |
| 21504 int rc2; |
| 21505 RtreeNode *pParent = 0; |
| 21506 int iCell; |
| 21507 |
| 21508 assert( pNode->nRef==1 ); |
| 21509 |
| 21510 /* Remove the entry in the parent cell. */ |
| 21511 rc = nodeParentIndex(pRtree, pNode, &iCell); |
| 21512 if( rc==SQLITE_OK ){ |
| 21513 pParent = pNode->pParent; |
| 21514 pNode->pParent = 0; |
| 21515 rc = deleteCell(pRtree, pParent, iCell, iHeight+1); |
| 21516 } |
| 21517 rc2 = nodeRelease(pRtree, pParent); |
| 21518 if( rc==SQLITE_OK ){ |
| 21519 rc = rc2; |
| 21520 } |
| 21521 if( rc!=SQLITE_OK ){ |
| 21522 return rc; |
| 21523 } |
| 21524 |
| 21525 /* Remove the xxx_node entry. */ |
| 21526 sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); |
| 21527 sqlite3_step(pRtree->pDeleteNode); |
| 21528 if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ |
| 21529 return rc; |
| 21530 } |
| 21531 |
| 21532 /* Remove the xxx_parent entry. */ |
| 21533 sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); |
| 21534 sqlite3_step(pRtree->pDeleteParent); |
| 21535 if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ |
| 21536 return rc; |
| 21537 } |
| 21538 |
| 21539 /* Remove the node from the in-memory hash table and link it into |
| 21540 ** the Rtree.pDeleted list. Its contents will be re-inserted later on. |
| 21541 */ |
| 21542 nodeHashDelete(pRtree, pNode); |
| 21543 pNode->iNode = iHeight; |
| 21544 pNode->pNext = pRtree->pDeleted; |
| 21545 pNode->nRef++; |
| 21546 pRtree->pDeleted = pNode; |
| 21547 |
| 21548 return SQLITE_OK; |
| 21549 } |
| 21550 |
| 21551 static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ |
| 21552 RtreeNode *pParent = pNode->pParent; |
| 21553 int rc = SQLITE_OK; |
| 21554 if( pParent ){ |
| 21555 int ii; |
| 21556 int nCell = NCELL(pNode); |
| 21557 RtreeCell box; /* Bounding box for pNode */ |
| 21558 nodeGetCell(pRtree, pNode, 0, &box); |
| 21559 for(ii=1; ii<nCell; ii++){ |
| 21560 RtreeCell cell; |
| 21561 nodeGetCell(pRtree, pNode, ii, &cell); |
| 21562 cellUnion(pRtree, &box, &cell); |
| 21563 } |
| 21564 box.iRowid = pNode->iNode; |
| 21565 rc = nodeParentIndex(pRtree, pNode, &ii); |
| 21566 if( rc==SQLITE_OK ){ |
| 21567 nodeOverwriteCell(pRtree, pParent, &box, ii); |
| 21568 rc = fixBoundingBox(pRtree, pParent); |
| 21569 } |
| 21570 } |
| 21571 return rc; |
| 21572 } |
| 21573 |
| 21574 /* |
| 21575 ** Delete the cell at index iCell of node pNode. After removing the |
| 21576 ** cell, adjust the r-tree data structure if required. |
| 21577 */ |
| 21578 static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ |
| 21579 RtreeNode *pParent; |
| 21580 int rc; |
| 21581 |
| 21582 if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ |
| 21583 return rc; |
| 21584 } |
| 21585 |
| 21586 /* Remove the cell from the node. This call just moves bytes around |
| 21587 ** the in-memory node image, so it cannot fail. |
| 21588 */ |
| 21589 nodeDeleteCell(pRtree, pNode, iCell); |
| 21590 |
| 21591 /* If the node is not the tree root and now has less than the minimum |
| 21592 ** number of cells, remove it from the tree. Otherwise, update the |
| 21593 ** cell in the parent node so that it tightly contains the updated |
| 21594 ** node. |
| 21595 */ |
| 21596 pParent = pNode->pParent; |
| 21597 assert( pParent || pNode->iNode==1 ); |
| 21598 if( pParent ){ |
| 21599 if( NCELL(pNode)<RTREE_MINCELLS(pRtree) ){ |
| 21600 rc = removeNode(pRtree, pNode, iHeight); |
| 21601 }else{ |
| 21602 rc = fixBoundingBox(pRtree, pNode); |
| 21603 } |
| 21604 } |
| 21605 |
| 21606 return rc; |
| 21607 } |
| 21608 |
| 21609 static int Reinsert( |
| 21610 Rtree *pRtree, |
| 21611 RtreeNode *pNode, |
| 21612 RtreeCell *pCell, |
| 21613 int iHeight |
| 21614 ){ |
| 21615 int *aOrder; |
| 21616 int *aSpare; |
| 21617 RtreeCell *aCell; |
| 21618 RtreeDValue *aDistance; |
| 21619 int nCell; |
| 21620 RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS]; |
| 21621 int iDim; |
| 21622 int ii; |
| 21623 int rc = SQLITE_OK; |
| 21624 int n; |
| 21625 |
| 21626 memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS); |
| 21627 |
| 21628 nCell = NCELL(pNode)+1; |
| 21629 n = (nCell+1)&(~1); |
| 21630 |
| 21631 /* Allocate the buffers used by this operation. The allocation is |
| 21632 ** relinquished before this function returns. |
| 21633 */ |
| 21634 aCell = (RtreeCell *)sqlite3_malloc(n * ( |
| 21635 sizeof(RtreeCell) + /* aCell array */ |
| 21636 sizeof(int) + /* aOrder array */ |
| 21637 sizeof(int) + /* aSpare array */ |
| 21638 sizeof(RtreeDValue) /* aDistance array */ |
| 21639 )); |
| 21640 if( !aCell ){ |
| 21641 return SQLITE_NOMEM; |
| 21642 } |
| 21643 aOrder = (int *)&aCell[n]; |
| 21644 aSpare = (int *)&aOrder[n]; |
| 21645 aDistance = (RtreeDValue *)&aSpare[n]; |
| 21646 |
| 21647 for(ii=0; ii<nCell; ii++){ |
| 21648 if( ii==(nCell-1) ){ |
| 21649 memcpy(&aCell[ii], pCell, sizeof(RtreeCell)); |
| 21650 }else{ |
| 21651 nodeGetCell(pRtree, pNode, ii, &aCell[ii]); |
| 21652 } |
| 21653 aOrder[ii] = ii; |
| 21654 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21655 aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); |
| 21656 aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); |
| 21657 } |
| 21658 } |
| 21659 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21660 aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); |
| 21661 } |
| 21662 |
| 21663 for(ii=0; ii<nCell; ii++){ |
| 21664 aDistance[ii] = RTREE_ZERO; |
| 21665 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21666 RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - |
| 21667 DCOORD(aCell[ii].aCoord[iDim*2])); |
| 21668 aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); |
| 21669 } |
| 21670 } |
| 21671 |
| 21672 SortByDistance(aOrder, nCell, aDistance, aSpare); |
| 21673 nodeZero(pRtree, pNode); |
| 21674 |
| 21675 for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ |
| 21676 RtreeCell *p = &aCell[aOrder[ii]]; |
| 21677 nodeInsertCell(pRtree, pNode, p); |
| 21678 if( p->iRowid==pCell->iRowid ){ |
| 21679 if( iHeight==0 ){ |
| 21680 rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); |
| 21681 }else{ |
| 21682 rc = parentWrite(pRtree, p->iRowid, pNode->iNode); |
| 21683 } |
| 21684 } |
| 21685 } |
| 21686 if( rc==SQLITE_OK ){ |
| 21687 rc = fixBoundingBox(pRtree, pNode); |
| 21688 } |
| 21689 for(; rc==SQLITE_OK && ii<nCell; ii++){ |
| 21690 /* Find a node to store this cell in. pNode->iNode currently contains |
| 21691 ** the height of the sub-tree headed by the cell. |
| 21692 */ |
| 21693 RtreeNode *pInsert; |
| 21694 RtreeCell *p = &aCell[aOrder[ii]]; |
| 21695 rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); |
| 21696 if( rc==SQLITE_OK ){ |
| 21697 int rc2; |
| 21698 rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); |
| 21699 rc2 = nodeRelease(pRtree, pInsert); |
| 21700 if( rc==SQLITE_OK ){ |
| 21701 rc = rc2; |
| 21702 } |
| 21703 } |
| 21704 } |
| 21705 |
| 21706 sqlite3_free(aCell); |
| 21707 return rc; |
| 21708 } |
| 21709 |
| 21710 /* |
| 21711 ** Insert cell pCell into node pNode. Node pNode is the head of a |
| 21712 ** subtree iHeight high (leaf nodes have iHeight==0). |
| 21713 */ |
| 21714 static int rtreeInsertCell( |
| 21715 Rtree *pRtree, |
| 21716 RtreeNode *pNode, |
| 21717 RtreeCell *pCell, |
| 21718 int iHeight |
| 21719 ){ |
| 21720 int rc = SQLITE_OK; |
| 21721 if( iHeight>0 ){ |
| 21722 RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); |
| 21723 if( pChild ){ |
| 21724 nodeRelease(pRtree, pChild->pParent); |
| 21725 nodeReference(pNode); |
| 21726 pChild->pParent = pNode; |
| 21727 } |
| 21728 } |
| 21729 if( nodeInsertCell(pRtree, pNode, pCell) ){ |
| 21730 if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ |
| 21731 rc = SplitNode(pRtree, pNode, pCell, iHeight); |
| 21732 }else{ |
| 21733 pRtree->iReinsertHeight = iHeight; |
| 21734 rc = Reinsert(pRtree, pNode, pCell, iHeight); |
| 21735 } |
| 21736 }else{ |
| 21737 rc = AdjustTree(pRtree, pNode, pCell); |
| 21738 if( rc==SQLITE_OK ){ |
| 21739 if( iHeight==0 ){ |
| 21740 rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); |
| 21741 }else{ |
| 21742 rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); |
| 21743 } |
| 21744 } |
| 21745 } |
| 21746 return rc; |
| 21747 } |
| 21748 |
| 21749 static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ |
| 21750 int ii; |
| 21751 int rc = SQLITE_OK; |
| 21752 int nCell = NCELL(pNode); |
| 21753 |
| 21754 for(ii=0; rc==SQLITE_OK && ii<nCell; ii++){ |
| 21755 RtreeNode *pInsert; |
| 21756 RtreeCell cell; |
| 21757 nodeGetCell(pRtree, pNode, ii, &cell); |
| 21758 |
| 21759 /* Find a node to store this cell in. pNode->iNode currently contains |
| 21760 ** the height of the sub-tree headed by the cell. |
| 21761 */ |
| 21762 rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); |
| 21763 if( rc==SQLITE_OK ){ |
| 21764 int rc2; |
| 21765 rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); |
| 21766 rc2 = nodeRelease(pRtree, pInsert); |
| 21767 if( rc==SQLITE_OK ){ |
| 21768 rc = rc2; |
| 21769 } |
| 21770 } |
| 21771 } |
| 21772 return rc; |
| 21773 } |
| 21774 |
| 21775 /* |
| 21776 ** Select a currently unused rowid for a new r-tree record. |
| 21777 */ |
| 21778 static int newRowid(Rtree *pRtree, i64 *piRowid){ |
| 21779 int rc; |
| 21780 sqlite3_bind_null(pRtree->pWriteRowid, 1); |
| 21781 sqlite3_bind_null(pRtree->pWriteRowid, 2); |
| 21782 sqlite3_step(pRtree->pWriteRowid); |
| 21783 rc = sqlite3_reset(pRtree->pWriteRowid); |
| 21784 *piRowid = sqlite3_last_insert_rowid(pRtree->db); |
| 21785 return rc; |
| 21786 } |
| 21787 |
| 21788 /* |
| 21789 ** Remove the entry with rowid=iDelete from the r-tree structure. |
| 21790 */ |
| 21791 static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ |
| 21792 int rc; /* Return code */ |
| 21793 RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ |
| 21794 int iCell; /* Index of iDelete cell in pLeaf */ |
| 21795 RtreeNode *pRoot; /* Root node of rtree structure */ |
| 21796 |
| 21797 |
| 21798 /* Obtain a reference to the root node to initialize Rtree.iDepth */ |
| 21799 rc = nodeAcquire(pRtree, 1, 0, &pRoot); |
| 21800 |
| 21801 /* Obtain a reference to the leaf node that contains the entry |
| 21802 ** about to be deleted. |
| 21803 */ |
| 21804 if( rc==SQLITE_OK ){ |
| 21805 rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); |
| 21806 } |
| 21807 |
| 21808 /* Delete the cell in question from the leaf node. */ |
| 21809 if( rc==SQLITE_OK ){ |
| 21810 int rc2; |
| 21811 rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); |
| 21812 if( rc==SQLITE_OK ){ |
| 21813 rc = deleteCell(pRtree, pLeaf, iCell, 0); |
| 21814 } |
| 21815 rc2 = nodeRelease(pRtree, pLeaf); |
| 21816 if( rc==SQLITE_OK ){ |
| 21817 rc = rc2; |
| 21818 } |
| 21819 } |
| 21820 |
| 21821 /* Delete the corresponding entry in the <rtree>_rowid table. */ |
| 21822 if( rc==SQLITE_OK ){ |
| 21823 sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); |
| 21824 sqlite3_step(pRtree->pDeleteRowid); |
| 21825 rc = sqlite3_reset(pRtree->pDeleteRowid); |
| 21826 } |
| 21827 |
| 21828 /* Check if the root node now has exactly one child. If so, remove |
| 21829 ** it, schedule the contents of the child for reinsertion and |
| 21830 ** reduce the tree height by one. |
| 21831 ** |
| 21832 ** This is equivalent to copying the contents of the child into |
| 21833 ** the root node (the operation that Gutman's paper says to perform |
| 21834 ** in this scenario). |
| 21835 */ |
| 21836 if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ |
| 21837 int rc2; |
| 21838 RtreeNode *pChild; |
| 21839 i64 iChild = nodeGetRowid(pRtree, pRoot, 0); |
| 21840 rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); |
| 21841 if( rc==SQLITE_OK ){ |
| 21842 rc = removeNode(pRtree, pChild, pRtree->iDepth-1); |
| 21843 } |
| 21844 rc2 = nodeRelease(pRtree, pChild); |
| 21845 if( rc==SQLITE_OK ) rc = rc2; |
| 21846 if( rc==SQLITE_OK ){ |
| 21847 pRtree->iDepth--; |
| 21848 writeInt16(pRoot->zData, pRtree->iDepth); |
| 21849 pRoot->isDirty = 1; |
| 21850 } |
| 21851 } |
| 21852 |
| 21853 /* Re-insert the contents of any underfull nodes removed from the tree. */ |
| 21854 for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ |
| 21855 if( rc==SQLITE_OK ){ |
| 21856 rc = reinsertNodeContent(pRtree, pLeaf); |
| 21857 } |
| 21858 pRtree->pDeleted = pLeaf->pNext; |
| 21859 sqlite3_free(pLeaf); |
| 21860 } |
| 21861 |
| 21862 /* Release the reference to the root node. */ |
| 21863 if( rc==SQLITE_OK ){ |
| 21864 rc = nodeRelease(pRtree, pRoot); |
| 21865 }else{ |
| 21866 nodeRelease(pRtree, pRoot); |
| 21867 } |
| 21868 |
| 21869 return rc; |
| 21870 } |
| 21871 |
| 21872 /* |
| 21873 ** Rounding constants for float->double conversion. |
| 21874 */ |
| 21875 #define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ |
| 21876 #define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ |
| 21877 |
| 21878 #if !defined(SQLITE_RTREE_INT_ONLY) |
| 21879 /* |
| 21880 ** Convert an sqlite3_value into an RtreeValue (presumably a float) |
| 21881 ** while taking care to round toward negative or positive, respectively. |
| 21882 */ |
| 21883 static RtreeValue rtreeValueDown(sqlite3_value *v){ |
| 21884 double d = sqlite3_value_double(v); |
| 21885 float f = (float)d; |
| 21886 if( f>d ){ |
| 21887 f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); |
| 21888 } |
| 21889 return f; |
| 21890 } |
| 21891 static RtreeValue rtreeValueUp(sqlite3_value *v){ |
| 21892 double d = sqlite3_value_double(v); |
| 21893 float f = (float)d; |
| 21894 if( f<d ){ |
| 21895 f = (float)(d*(d<0 ? RNDTOWARDS : RNDAWAY)); |
| 21896 } |
| 21897 return f; |
| 21898 } |
| 21899 #endif /* !defined(SQLITE_RTREE_INT_ONLY) */ |
| 21900 |
| 21901 |
| 21902 /* |
| 21903 ** The xUpdate method for rtree module virtual tables. |
| 21904 */ |
| 21905 static int rtreeUpdate( |
| 21906 sqlite3_vtab *pVtab, |
| 21907 int nData, |
| 21908 sqlite3_value **azData, |
| 21909 sqlite_int64 *pRowid |
| 21910 ){ |
| 21911 Rtree *pRtree = (Rtree *)pVtab; |
| 21912 int rc = SQLITE_OK; |
| 21913 RtreeCell cell; /* New cell to insert if nData>1 */ |
| 21914 int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ |
| 21915 |
| 21916 rtreeReference(pRtree); |
| 21917 assert(nData>=1); |
| 21918 |
| 21919 cell.iRowid = 0; /* Used only to suppress a compiler warning */ |
| 21920 |
| 21921 /* Constraint handling. A write operation on an r-tree table may return |
| 21922 ** SQLITE_CONSTRAINT for two reasons: |
| 21923 ** |
| 21924 ** 1. A duplicate rowid value, or |
| 21925 ** 2. The supplied data violates the "x2>=x1" constraint. |
| 21926 ** |
| 21927 ** In the first case, if the conflict-handling mode is REPLACE, then |
| 21928 ** the conflicting row can be removed before proceeding. In the second |
| 21929 ** case, SQLITE_CONSTRAINT must be returned regardless of the |
| 21930 ** conflict-handling mode specified by the user. |
| 21931 */ |
| 21932 if( nData>1 ){ |
| 21933 int ii; |
| 21934 |
| 21935 /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. |
| 21936 ** |
| 21937 ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared |
| 21938 ** with "column" that are interpreted as table constraints. |
| 21939 ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); |
| 21940 ** This problem was discovered after years of use, so we silently ignore |
| 21941 ** these kinds of misdeclared tables to avoid breaking any legacy. |
| 21942 */ |
| 21943 assert( nData<=(pRtree->nDim*2 + 3) ); |
| 21944 |
| 21945 #ifndef SQLITE_RTREE_INT_ONLY |
| 21946 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 21947 for(ii=0; ii<nData-4; ii+=2){ |
| 21948 cell.aCoord[ii].f = rtreeValueDown(azData[ii+3]); |
| 21949 cell.aCoord[ii+1].f = rtreeValueUp(azData[ii+4]); |
| 21950 if( cell.aCoord[ii].f>cell.aCoord[ii+1].f ){ |
| 21951 rc = SQLITE_CONSTRAINT; |
| 21952 goto constraint; |
| 21953 } |
| 21954 } |
| 21955 }else |
| 21956 #endif |
| 21957 { |
| 21958 for(ii=0; ii<nData-4; ii+=2){ |
| 21959 cell.aCoord[ii].i = sqlite3_value_int(azData[ii+3]); |
| 21960 cell.aCoord[ii+1].i = sqlite3_value_int(azData[ii+4]); |
| 21961 if( cell.aCoord[ii].i>cell.aCoord[ii+1].i ){ |
| 21962 rc = SQLITE_CONSTRAINT; |
| 21963 goto constraint; |
| 21964 } |
| 21965 } |
| 21966 } |
| 21967 |
| 21968 /* If a rowid value was supplied, check if it is already present in |
| 21969 ** the table. If so, the constraint has failed. */ |
| 21970 if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){ |
| 21971 cell.iRowid = sqlite3_value_int64(azData[2]); |
| 21972 if( sqlite3_value_type(azData[0])==SQLITE_NULL |
| 21973 || sqlite3_value_int64(azData[0])!=cell.iRowid |
| 21974 ){ |
| 21975 int steprc; |
| 21976 sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); |
| 21977 steprc = sqlite3_step(pRtree->pReadRowid); |
| 21978 rc = sqlite3_reset(pRtree->pReadRowid); |
| 21979 if( SQLITE_ROW==steprc ){ |
| 21980 if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ |
| 21981 rc = rtreeDeleteRowid(pRtree, cell.iRowid); |
| 21982 }else{ |
| 21983 rc = SQLITE_CONSTRAINT; |
| 21984 goto constraint; |
| 21985 } |
| 21986 } |
| 21987 } |
| 21988 bHaveRowid = 1; |
| 21989 } |
| 21990 } |
| 21991 |
| 21992 /* If azData[0] is not an SQL NULL value, it is the rowid of a |
| 21993 ** record to delete from the r-tree table. The following block does |
| 21994 ** just that. |
| 21995 */ |
| 21996 if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){ |
| 21997 rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0])); |
| 21998 } |
| 21999 |
| 22000 /* If the azData[] array contains more than one element, elements |
| 22001 ** (azData[2]..azData[argc-1]) contain a new record to insert into |
| 22002 ** the r-tree structure. |
| 22003 */ |
| 22004 if( rc==SQLITE_OK && nData>1 ){ |
| 22005 /* Insert the new record into the r-tree */ |
| 22006 RtreeNode *pLeaf = 0; |
| 22007 |
| 22008 /* Figure out the rowid of the new row. */ |
| 22009 if( bHaveRowid==0 ){ |
| 22010 rc = newRowid(pRtree, &cell.iRowid); |
| 22011 } |
| 22012 *pRowid = cell.iRowid; |
| 22013 |
| 22014 if( rc==SQLITE_OK ){ |
| 22015 rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); |
| 22016 } |
| 22017 if( rc==SQLITE_OK ){ |
| 22018 int rc2; |
| 22019 pRtree->iReinsertHeight = -1; |
| 22020 rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); |
| 22021 rc2 = nodeRelease(pRtree, pLeaf); |
| 22022 if( rc==SQLITE_OK ){ |
| 22023 rc = rc2; |
| 22024 } |
| 22025 } |
| 22026 } |
| 22027 |
| 22028 constraint: |
| 22029 rtreeRelease(pRtree); |
| 22030 return rc; |
| 22031 } |
| 22032 |
| 22033 /* |
| 22034 ** The xRename method for rtree module virtual tables. |
| 22035 */ |
| 22036 static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ |
| 22037 Rtree *pRtree = (Rtree *)pVtab; |
| 22038 int rc = SQLITE_NOMEM; |
| 22039 char *zSql = sqlite3_mprintf( |
| 22040 "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" |
| 22041 "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" |
| 22042 "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" |
| 22043 , pRtree->zDb, pRtree->zName, zNewName |
| 22044 , pRtree->zDb, pRtree->zName, zNewName |
| 22045 , pRtree->zDb, pRtree->zName, zNewName |
| 22046 ); |
| 22047 if( zSql ){ |
| 22048 rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); |
| 22049 sqlite3_free(zSql); |
| 22050 } |
| 22051 return rc; |
| 22052 } |
| 22053 |
| 22054 /* |
| 22055 ** This function populates the pRtree->nRowEst variable with an estimate |
| 22056 ** of the number of rows in the virtual table. If possible, this is based |
| 22057 ** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. |
| 22058 */ |
| 22059 static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ |
| 22060 const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; |
| 22061 char *zSql; |
| 22062 sqlite3_stmt *p; |
| 22063 int rc; |
| 22064 i64 nRow = 0; |
| 22065 |
| 22066 zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); |
| 22067 if( zSql==0 ){ |
| 22068 rc = SQLITE_NOMEM; |
| 22069 }else{ |
| 22070 rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); |
| 22071 if( rc==SQLITE_OK ){ |
| 22072 if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); |
| 22073 rc = sqlite3_finalize(p); |
| 22074 }else if( rc!=SQLITE_NOMEM ){ |
| 22075 rc = SQLITE_OK; |
| 22076 } |
| 22077 |
| 22078 if( rc==SQLITE_OK ){ |
| 22079 if( nRow==0 ){ |
| 22080 pRtree->nRowEst = RTREE_DEFAULT_ROWEST; |
| 22081 }else{ |
| 22082 pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); |
| 22083 } |
| 22084 } |
| 22085 sqlite3_free(zSql); |
| 22086 } |
| 22087 |
| 22088 return rc; |
| 22089 } |
| 22090 |
| 22091 static sqlite3_module rtreeModule = { |
| 22092 0, /* iVersion */ |
| 22093 rtreeCreate, /* xCreate - create a table */ |
| 22094 rtreeConnect, /* xConnect - connect to an existing table */ |
| 22095 rtreeBestIndex, /* xBestIndex - Determine search strategy */ |
| 22096 rtreeDisconnect, /* xDisconnect - Disconnect from a table */ |
| 22097 rtreeDestroy, /* xDestroy - Drop a table */ |
| 22098 rtreeOpen, /* xOpen - open a cursor */ |
| 22099 rtreeClose, /* xClose - close a cursor */ |
| 22100 rtreeFilter, /* xFilter - configure scan constraints */ |
| 22101 rtreeNext, /* xNext - advance a cursor */ |
| 22102 rtreeEof, /* xEof */ |
| 22103 rtreeColumn, /* xColumn - read data */ |
| 22104 rtreeRowid, /* xRowid - read data */ |
| 22105 rtreeUpdate, /* xUpdate - write data */ |
| 22106 0, /* xBegin - begin transaction */ |
| 22107 0, /* xSync - sync transaction */ |
| 22108 0, /* xCommit - commit transaction */ |
| 22109 0, /* xRollback - rollback transaction */ |
| 22110 0, /* xFindFunction - function overloading */ |
| 22111 rtreeRename, /* xRename - rename the table */ |
| 22112 0, /* xSavepoint */ |
| 22113 0, /* xRelease */ |
| 22114 0 /* xRollbackTo */ |
| 22115 }; |
| 22116 |
| 22117 static int rtreeSqlInit( |
| 22118 Rtree *pRtree, |
| 22119 sqlite3 *db, |
| 22120 const char *zDb, |
| 22121 const char *zPrefix, |
| 22122 int isCreate |
| 22123 ){ |
| 22124 int rc = SQLITE_OK; |
| 22125 |
| 22126 #define N_STATEMENT 9 |
| 22127 static const char *azSql[N_STATEMENT] = { |
| 22128 /* Read and write the xxx_node table */ |
| 22129 "SELECT data FROM '%q'.'%q_node' WHERE nodeno = :1", |
| 22130 "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)", |
| 22131 "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1", |
| 22132 |
| 22133 /* Read and write the xxx_rowid table */ |
| 22134 "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1", |
| 22135 "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)", |
| 22136 "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1", |
| 22137 |
| 22138 /* Read and write the xxx_parent table */ |
| 22139 "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1", |
| 22140 "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)", |
| 22141 "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1" |
| 22142 }; |
| 22143 sqlite3_stmt **appStmt[N_STATEMENT]; |
| 22144 int i; |
| 22145 |
| 22146 pRtree->db = db; |
| 22147 |
| 22148 if( isCreate ){ |
| 22149 char *zCreate = sqlite3_mprintf( |
| 22150 "CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);" |
| 22151 "CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);" |
| 22152 "CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY," |
| 22153 " parentnode INTEGER);" |
| 22154 "INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))", |
| 22155 zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize |
| 22156 ); |
| 22157 if( !zCreate ){ |
| 22158 return SQLITE_NOMEM; |
| 22159 } |
| 22160 rc = sqlite3_exec(db, zCreate, 0, 0, 0); |
| 22161 sqlite3_free(zCreate); |
| 22162 if( rc!=SQLITE_OK ){ |
| 22163 return rc; |
| 22164 } |
| 22165 } |
| 22166 |
| 22167 appStmt[0] = &pRtree->pReadNode; |
| 22168 appStmt[1] = &pRtree->pWriteNode; |
| 22169 appStmt[2] = &pRtree->pDeleteNode; |
| 22170 appStmt[3] = &pRtree->pReadRowid; |
| 22171 appStmt[4] = &pRtree->pWriteRowid; |
| 22172 appStmt[5] = &pRtree->pDeleteRowid; |
| 22173 appStmt[6] = &pRtree->pReadParent; |
| 22174 appStmt[7] = &pRtree->pWriteParent; |
| 22175 appStmt[8] = &pRtree->pDeleteParent; |
| 22176 |
| 22177 rc = rtreeQueryStat1(db, pRtree); |
| 22178 for(i=0; i<N_STATEMENT && rc==SQLITE_OK; i++){ |
| 22179 char *zSql = sqlite3_mprintf(azSql[i], zDb, zPrefix); |
| 22180 if( zSql ){ |
| 22181 rc = sqlite3_prepare_v2(db, zSql, -1, appStmt[i], 0); |
| 22182 }else{ |
| 22183 rc = SQLITE_NOMEM; |
| 22184 } |
| 22185 sqlite3_free(zSql); |
| 22186 } |
| 22187 |
| 22188 return rc; |
| 22189 } |
| 22190 |
| 22191 /* |
| 22192 ** The second argument to this function contains the text of an SQL statement |
| 22193 ** that returns a single integer value. The statement is compiled and executed |
| 22194 ** using database connection db. If successful, the integer value returned |
| 22195 ** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error |
| 22196 ** code is returned and the value of *piVal after returning is not defined. |
| 22197 */ |
| 22198 static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ |
| 22199 int rc = SQLITE_NOMEM; |
| 22200 if( zSql ){ |
| 22201 sqlite3_stmt *pStmt = 0; |
| 22202 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 22203 if( rc==SQLITE_OK ){ |
| 22204 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 22205 *piVal = sqlite3_column_int(pStmt, 0); |
| 22206 } |
| 22207 rc = sqlite3_finalize(pStmt); |
| 22208 } |
| 22209 } |
| 22210 return rc; |
| 22211 } |
| 22212 |
| 22213 /* |
| 22214 ** This function is called from within the xConnect() or xCreate() method to |
| 22215 ** determine the node-size used by the rtree table being created or connected |
| 22216 ** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. |
| 22217 ** Otherwise, an SQLite error code is returned. |
| 22218 ** |
| 22219 ** If this function is being called as part of an xConnect(), then the rtree |
| 22220 ** table already exists. In this case the node-size is determined by inspecting |
| 22221 ** the root node of the tree. |
| 22222 ** |
| 22223 ** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. |
| 22224 ** This ensures that each node is stored on a single database page. If the |
| 22225 ** database page-size is so large that more than RTREE_MAXCELLS entries |
| 22226 ** would fit in a single node, use a smaller node-size. |
| 22227 */ |
| 22228 static int getNodeSize( |
| 22229 sqlite3 *db, /* Database handle */ |
| 22230 Rtree *pRtree, /* Rtree handle */ |
| 22231 int isCreate, /* True for xCreate, false for xConnect */ |
| 22232 char **pzErr /* OUT: Error message, if any */ |
| 22233 ){ |
| 22234 int rc; |
| 22235 char *zSql; |
| 22236 if( isCreate ){ |
| 22237 int iPageSize = 0; |
| 22238 zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); |
| 22239 rc = getIntFromStmt(db, zSql, &iPageSize); |
| 22240 if( rc==SQLITE_OK ){ |
| 22241 pRtree->iNodeSize = iPageSize-64; |
| 22242 if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){ |
| 22243 pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; |
| 22244 } |
| 22245 }else{ |
| 22246 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22247 } |
| 22248 }else{ |
| 22249 zSql = sqlite3_mprintf( |
| 22250 "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", |
| 22251 pRtree->zDb, pRtree->zName |
| 22252 ); |
| 22253 rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); |
| 22254 if( rc!=SQLITE_OK ){ |
| 22255 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22256 } |
| 22257 } |
| 22258 |
| 22259 sqlite3_free(zSql); |
| 22260 return rc; |
| 22261 } |
| 22262 |
| 22263 /* |
| 22264 ** This function is the implementation of both the xConnect and xCreate |
| 22265 ** methods of the r-tree virtual table. |
| 22266 ** |
| 22267 ** argv[0] -> module name |
| 22268 ** argv[1] -> database name |
| 22269 ** argv[2] -> table name |
| 22270 ** argv[...] -> column names... |
| 22271 */ |
| 22272 static int rtreeInit( |
| 22273 sqlite3 *db, /* Database connection */ |
| 22274 void *pAux, /* One of the RTREE_COORD_* constants */ |
| 22275 int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ |
| 22276 sqlite3_vtab **ppVtab, /* OUT: New virtual table */ |
| 22277 char **pzErr, /* OUT: Error message, if any */ |
| 22278 int isCreate /* True for xCreate, false for xConnect */ |
| 22279 ){ |
| 22280 int rc = SQLITE_OK; |
| 22281 Rtree *pRtree; |
| 22282 int nDb; /* Length of string argv[1] */ |
| 22283 int nName; /* Length of string argv[2] */ |
| 22284 int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); |
| 22285 |
| 22286 const char *aErrMsg[] = { |
| 22287 0, /* 0 */ |
| 22288 "Wrong number of columns for an rtree table", /* 1 */ |
| 22289 "Too few columns for an rtree table", /* 2 */ |
| 22290 "Too many columns for an rtree table" /* 3 */ |
| 22291 }; |
| 22292 |
| 22293 int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2; |
| 22294 if( aErrMsg[iErr] ){ |
| 22295 *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); |
| 22296 return SQLITE_ERROR; |
| 22297 } |
| 22298 |
| 22299 sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); |
| 22300 |
| 22301 /* Allocate the sqlite3_vtab structure */ |
| 22302 nDb = (int)strlen(argv[1]); |
| 22303 nName = (int)strlen(argv[2]); |
| 22304 pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2); |
| 22305 if( !pRtree ){ |
| 22306 return SQLITE_NOMEM; |
| 22307 } |
| 22308 memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); |
| 22309 pRtree->nBusy = 1; |
| 22310 pRtree->base.pModule = &rtreeModule; |
| 22311 pRtree->zDb = (char *)&pRtree[1]; |
| 22312 pRtree->zName = &pRtree->zDb[nDb+1]; |
| 22313 pRtree->nDim = (argc-4)/2; |
| 22314 pRtree->nBytesPerCell = 8 + pRtree->nDim*4*2; |
| 22315 pRtree->eCoordType = eCoordType; |
| 22316 memcpy(pRtree->zDb, argv[1], nDb); |
| 22317 memcpy(pRtree->zName, argv[2], nName); |
| 22318 |
| 22319 /* Figure out the node size to use. */ |
| 22320 rc = getNodeSize(db, pRtree, isCreate, pzErr); |
| 22321 |
| 22322 /* Create/Connect to the underlying relational database schema. If |
| 22323 ** that is successful, call sqlite3_declare_vtab() to configure |
| 22324 ** the r-tree table schema. |
| 22325 */ |
| 22326 if( rc==SQLITE_OK ){ |
| 22327 if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){ |
| 22328 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22329 }else{ |
| 22330 char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]); |
| 22331 char *zTmp; |
| 22332 int ii; |
| 22333 for(ii=4; zSql && ii<argc; ii++){ |
| 22334 zTmp = zSql; |
| 22335 zSql = sqlite3_mprintf("%s, %s", zTmp, argv[ii]); |
| 22336 sqlite3_free(zTmp); |
| 22337 } |
| 22338 if( zSql ){ |
| 22339 zTmp = zSql; |
| 22340 zSql = sqlite3_mprintf("%s);", zTmp); |
| 22341 sqlite3_free(zTmp); |
| 22342 } |
| 22343 if( !zSql ){ |
| 22344 rc = SQLITE_NOMEM; |
| 22345 }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){ |
| 22346 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22347 } |
| 22348 sqlite3_free(zSql); |
| 22349 } |
| 22350 } |
| 22351 |
| 22352 if( rc==SQLITE_OK ){ |
| 22353 *ppVtab = (sqlite3_vtab *)pRtree; |
| 22354 }else{ |
| 22355 assert( *ppVtab==0 ); |
| 22356 assert( pRtree->nBusy==1 ); |
| 22357 rtreeRelease(pRtree); |
| 22358 } |
| 22359 return rc; |
| 22360 } |
| 22361 |
| 22362 |
| 22363 /* |
| 22364 ** Implementation of a scalar function that decodes r-tree nodes to |
| 22365 ** human readable strings. This can be used for debugging and analysis. |
| 22366 ** |
| 22367 ** The scalar function takes two arguments: (1) the number of dimensions |
| 22368 ** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing |
| 22369 ** an r-tree node. For a two-dimensional r-tree structure called "rt", to |
| 22370 ** deserialize all nodes, a statement like: |
| 22371 ** |
| 22372 ** SELECT rtreenode(2, data) FROM rt_node; |
| 22373 ** |
| 22374 ** The human readable string takes the form of a Tcl list with one |
| 22375 ** entry for each cell in the r-tree node. Each entry is itself a |
| 22376 ** list, containing the 8-byte rowid/pageno followed by the |
| 22377 ** <num-dimension>*2 coordinates. |
| 22378 */ |
| 22379 static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ |
| 22380 char *zText = 0; |
| 22381 RtreeNode node; |
| 22382 Rtree tree; |
| 22383 int ii; |
| 22384 |
| 22385 UNUSED_PARAMETER(nArg); |
| 22386 memset(&node, 0, sizeof(RtreeNode)); |
| 22387 memset(&tree, 0, sizeof(Rtree)); |
| 22388 tree.nDim = sqlite3_value_int(apArg[0]); |
| 22389 tree.nBytesPerCell = 8 + 8 * tree.nDim; |
| 22390 node.zData = (u8 *)sqlite3_value_blob(apArg[1]); |
| 22391 |
| 22392 for(ii=0; ii<NCELL(&node); ii++){ |
| 22393 char zCell[512]; |
| 22394 int nCell = 0; |
| 22395 RtreeCell cell; |
| 22396 int jj; |
| 22397 |
| 22398 nodeGetCell(&tree, &node, ii, &cell); |
| 22399 sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid); |
| 22400 nCell = (int)strlen(zCell); |
| 22401 for(jj=0; jj<tree.nDim*2; jj++){ |
| 22402 #ifndef SQLITE_RTREE_INT_ONLY |
| 22403 sqlite3_snprintf(512-nCell,&zCell[nCell], " %g", |
| 22404 (double)cell.aCoord[jj].f); |
| 22405 #else |
| 22406 sqlite3_snprintf(512-nCell,&zCell[nCell], " %d", |
| 22407 cell.aCoord[jj].i); |
| 22408 #endif |
| 22409 nCell = (int)strlen(zCell); |
| 22410 } |
| 22411 |
| 22412 if( zText ){ |
| 22413 char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell); |
| 22414 sqlite3_free(zText); |
| 22415 zText = zTextNew; |
| 22416 }else{ |
| 22417 zText = sqlite3_mprintf("{%s}", zCell); |
| 22418 } |
| 22419 } |
| 22420 |
| 22421 sqlite3_result_text(ctx, zText, -1, sqlite3_free); |
| 22422 } |
| 22423 |
| 22424 /* This routine implements an SQL function that returns the "depth" parameter |
| 22425 ** from the front of a blob that is an r-tree node. For example: |
| 22426 ** |
| 22427 ** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; |
| 22428 ** |
| 22429 ** The depth value is 0 for all nodes other than the root node, and the root |
| 22430 ** node always has nodeno=1, so the example above is the primary use for this |
| 22431 ** routine. This routine is intended for testing and analysis only. |
| 22432 */ |
| 22433 static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ |
| 22434 UNUSED_PARAMETER(nArg); |
| 22435 if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB |
| 22436 || sqlite3_value_bytes(apArg[0])<2 |
| 22437 ){ |
| 22438 sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); |
| 22439 }else{ |
| 22440 u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]); |
| 22441 sqlite3_result_int(ctx, readInt16(zBlob)); |
| 22442 } |
| 22443 } |
| 22444 |
| 22445 /* |
| 22446 ** Register the r-tree module with database handle db. This creates the |
| 22447 ** virtual table module "rtree" and the debugging/analysis scalar |
| 22448 ** function "rtreenode". |
| 22449 */ |
| 22450 SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){ |
| 22451 const int utf8 = SQLITE_UTF8; |
| 22452 int rc; |
| 22453 |
| 22454 rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); |
| 22455 if( rc==SQLITE_OK ){ |
| 22456 rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); |
| 22457 } |
| 22458 if( rc==SQLITE_OK ){ |
| 22459 #ifdef SQLITE_RTREE_INT_ONLY |
| 22460 void *c = (void *)RTREE_COORD_INT32; |
| 22461 #else |
| 22462 void *c = (void *)RTREE_COORD_REAL32; |
| 22463 #endif |
| 22464 rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0); |
| 22465 } |
| 22466 if( rc==SQLITE_OK ){ |
| 22467 void *c = (void *)RTREE_COORD_INT32; |
| 22468 rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); |
| 22469 } |
| 22470 |
| 22471 return rc; |
| 22472 } |
| 22473 |
| 22474 /* |
| 22475 ** This routine deletes the RtreeGeomCallback object that was attached |
| 22476 ** one of the SQL functions create by sqlite3_rtree_geometry_callback() |
| 22477 ** or sqlite3_rtree_query_callback(). In other words, this routine is the |
| 22478 ** destructor for an RtreeGeomCallback objecct. This routine is called when |
| 22479 ** the corresponding SQL function is deleted. |
| 22480 */ |
| 22481 static void rtreeFreeCallback(void *p){ |
| 22482 RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p; |
| 22483 if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext); |
| 22484 sqlite3_free(p); |
| 22485 } |
| 22486 |
| 22487 /* |
| 22488 ** This routine frees the BLOB that is returned by geomCallback(). |
| 22489 */ |
| 22490 static void rtreeMatchArgFree(void *pArg){ |
| 22491 int i; |
| 22492 RtreeMatchArg *p = (RtreeMatchArg*)pArg; |
| 22493 for(i=0; i<p->nParam; i++){ |
| 22494 sqlite3_value_free(p->apSqlParam[i]); |
| 22495 } |
| 22496 sqlite3_free(p); |
| 22497 } |
| 22498 |
| 22499 /* |
| 22500 ** Each call to sqlite3_rtree_geometry_callback() or |
| 22501 ** sqlite3_rtree_query_callback() creates an ordinary SQLite |
| 22502 ** scalar function that is implemented by this routine. |
| 22503 ** |
| 22504 ** All this function does is construct an RtreeMatchArg object that |
| 22505 ** contains the geometry-checking callback routines and a list of |
| 22506 ** parameters to this function, then return that RtreeMatchArg object |
| 22507 ** as a BLOB. |
| 22508 ** |
| 22509 ** The R-Tree MATCH operator will read the returned BLOB, deserialize |
| 22510 ** the RtreeMatchArg object, and use the RtreeMatchArg object to figure |
| 22511 ** out which elements of the R-Tree should be returned by the query. |
| 22512 */ |
| 22513 static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ |
| 22514 RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); |
| 22515 RtreeMatchArg *pBlob; |
| 22516 int nBlob; |
| 22517 int memErr = 0; |
| 22518 |
| 22519 nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue) |
| 22520 + nArg*sizeof(sqlite3_value*); |
| 22521 pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); |
| 22522 if( !pBlob ){ |
| 22523 sqlite3_result_error_nomem(ctx); |
| 22524 }else{ |
| 22525 int i; |
| 22526 pBlob->magic = RTREE_GEOMETRY_MAGIC; |
| 22527 pBlob->cb = pGeomCtx[0]; |
| 22528 pBlob->apSqlParam = (sqlite3_value**)&pBlob->aParam[nArg]; |
| 22529 pBlob->nParam = nArg; |
| 22530 for(i=0; i<nArg; i++){ |
| 22531 pBlob->apSqlParam[i] = sqlite3_value_dup(aArg[i]); |
| 22532 if( pBlob->apSqlParam[i]==0 ) memErr = 1; |
| 22533 #ifdef SQLITE_RTREE_INT_ONLY |
| 22534 pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); |
| 22535 #else |
| 22536 pBlob->aParam[i] = sqlite3_value_double(aArg[i]); |
| 22537 #endif |
| 22538 } |
| 22539 if( memErr ){ |
| 22540 sqlite3_result_error_nomem(ctx); |
| 22541 rtreeMatchArgFree(pBlob); |
| 22542 }else{ |
| 22543 sqlite3_result_blob(ctx, pBlob, nBlob, rtreeMatchArgFree); |
| 22544 } |
| 22545 } |
| 22546 } |
| 22547 |
| 22548 /* |
| 22549 ** Register a new geometry function for use with the r-tree MATCH operator. |
| 22550 */ |
| 22551 SQLITE_API int SQLITE_STDCALL sqlite3_rtree_geometry_callback( |
| 22552 sqlite3 *db, /* Register SQL function on this connection */ |
| 22553 const char *zGeom, /* Name of the new SQL function */ |
| 22554 int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ |
| 22555 void *pContext /* Extra data associated with the callback */ |
| 22556 ){ |
| 22557 RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ |
| 22558 |
| 22559 /* Allocate and populate the context object. */ |
| 22560 pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); |
| 22561 if( !pGeomCtx ) return SQLITE_NOMEM; |
| 22562 pGeomCtx->xGeom = xGeom; |
| 22563 pGeomCtx->xQueryFunc = 0; |
| 22564 pGeomCtx->xDestructor = 0; |
| 22565 pGeomCtx->pContext = pContext; |
| 22566 return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, |
| 22567 (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback |
| 22568 ); |
| 22569 } |
| 22570 |
| 22571 /* |
| 22572 ** Register a new 2nd-generation geometry function for use with the |
| 22573 ** r-tree MATCH operator. |
| 22574 */ |
| 22575 SQLITE_API int SQLITE_STDCALL sqlite3_rtree_query_callback( |
| 22576 sqlite3 *db, /* Register SQL function on this connection */ |
| 22577 const char *zQueryFunc, /* Name of new SQL function */ |
| 22578 int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ |
| 22579 void *pContext, /* Extra data passed into the callback */ |
| 22580 void (*xDestructor)(void*) /* Destructor for the extra data */ |
| 22581 ){ |
| 22582 RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ |
| 22583 |
| 22584 /* Allocate and populate the context object. */ |
| 22585 pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); |
| 22586 if( !pGeomCtx ) return SQLITE_NOMEM; |
| 22587 pGeomCtx->xGeom = 0; |
| 22588 pGeomCtx->xQueryFunc = xQueryFunc; |
| 22589 pGeomCtx->xDestructor = xDestructor; |
| 22590 pGeomCtx->pContext = pContext; |
| 22591 return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, |
| 22592 (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback |
| 22593 ); |
| 22594 } |
| 22595 |
| 22596 #if !SQLITE_CORE |
| 22597 #ifdef _WIN32 |
| 22598 __declspec(dllexport) |
| 22599 #endif |
| 22600 SQLITE_API int SQLITE_STDCALL sqlite3_rtree_init( |
| 22601 sqlite3 *db, |
| 22602 char **pzErrMsg, |
| 22603 const sqlite3_api_routines *pApi |
| 22604 ){ |
| 22605 SQLITE_EXTENSION_INIT2(pApi) |
| 22606 return sqlite3RtreeInit(db); |
| 22607 } |
| 22608 #endif |
| 22609 |
| 22610 #endif |
| 22611 |
| 22612 /************** End of rtree.c ***********************************************/ |
| 22613 /************** Begin file icu.c *********************************************/ |
| 22614 /* |
| 22615 ** 2007 May 6 |
| 22616 ** |
| 22617 ** The author disclaims copyright to this source code. In place of |
| 22618 ** a legal notice, here is a blessing: |
| 22619 ** |
| 22620 ** May you do good and not evil. |
| 22621 ** May you find forgiveness for yourself and forgive others. |
| 22622 ** May you share freely, never taking more than you give. |
| 22623 ** |
| 22624 ************************************************************************* |
| 22625 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ |
| 22626 ** |
| 22627 ** This file implements an integration between the ICU library |
| 22628 ** ("International Components for Unicode", an open-source library |
| 22629 ** for handling unicode data) and SQLite. The integration uses |
| 22630 ** ICU to provide the following to SQLite: |
| 22631 ** |
| 22632 ** * An implementation of the SQL regexp() function (and hence REGEXP |
| 22633 ** operator) using the ICU uregex_XX() APIs. |
| 22634 ** |
| 22635 ** * Implementations of the SQL scalar upper() and lower() functions |
| 22636 ** for case mapping. |
| 22637 ** |
| 22638 ** * Integration of ICU and SQLite collation sequences. |
| 22639 ** |
| 22640 ** * An implementation of the LIKE operator that uses ICU to |
| 22641 ** provide case-independent matching. |
| 22642 */ |
| 22643 |
| 22644 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) |
| 22645 |
| 22646 /* Include ICU headers */ |
| 22647 #include <unicode/utypes.h> |
| 22648 #include <unicode/uregex.h> |
| 22649 #include <unicode/ustring.h> |
| 22650 #include <unicode/ucol.h> |
| 22651 |
| 22652 /* #include <assert.h> */ |
| 22653 |
| 22654 #ifndef SQLITE_CORE |
| 22655 /* #include "sqlite3ext.h" */ |
| 22656 SQLITE_EXTENSION_INIT1 |
| 22657 #else |
| 22658 /* #include "sqlite3.h" */ |
| 22659 #endif |
| 22660 |
| 22661 /* |
| 22662 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB |
| 22663 ** operator. |
| 22664 */ |
| 22665 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH |
| 22666 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 |
| 22667 #endif |
| 22668 |
| 22669 /* |
| 22670 ** Version of sqlite3_free() that is always a function, never a macro. |
| 22671 */ |
| 22672 static void xFree(void *p){ |
| 22673 sqlite3_free(p); |
| 22674 } |
| 22675 |
| 22676 /* |
| 22677 ** Compare two UTF-8 strings for equality where the first string is |
| 22678 ** a "LIKE" expression. Return true (1) if they are the same and |
| 22679 ** false (0) if they are different. |
| 22680 */ |
| 22681 static int icuLikeCompare( |
| 22682 const uint8_t *zPattern, /* LIKE pattern */ |
| 22683 const uint8_t *zString, /* The UTF-8 string to compare against */ |
| 22684 const UChar32 uEsc /* The escape character */ |
| 22685 ){ |
| 22686 static const int MATCH_ONE = (UChar32)'_'; |
| 22687 static const int MATCH_ALL = (UChar32)'%'; |
| 22688 |
| 22689 int iPattern = 0; /* Current byte index in zPattern */ |
| 22690 int iString = 0; /* Current byte index in zString */ |
| 22691 |
| 22692 int prevEscape = 0; /* True if the previous character was uEsc */ |
| 22693 |
| 22694 while( zPattern[iPattern]!=0 ){ |
| 22695 |
| 22696 /* Read (and consume) the next character from the input pattern. */ |
| 22697 UChar32 uPattern; |
| 22698 U8_NEXT_UNSAFE(zPattern, iPattern, uPattern); |
| 22699 |
| 22700 /* There are now 4 possibilities: |
| 22701 ** |
| 22702 ** 1. uPattern is an unescaped match-all character "%", |
| 22703 ** 2. uPattern is an unescaped match-one character "_", |
| 22704 ** 3. uPattern is an unescaped escape character, or |
| 22705 ** 4. uPattern is to be handled as an ordinary character |
| 22706 */ |
| 22707 if( !prevEscape && uPattern==MATCH_ALL ){ |
| 22708 /* Case 1. */ |
| 22709 uint8_t c; |
| 22710 |
| 22711 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a |
| 22712 ** MATCH_ALL. For each MATCH_ONE, skip one character in the |
| 22713 ** test string. |
| 22714 */ |
| 22715 while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ |
| 22716 if( c==MATCH_ONE ){ |
| 22717 if( zString[iString]==0 ) return 0; |
| 22718 U8_FWD_1_UNSAFE(zString, iString); |
| 22719 } |
| 22720 iPattern++; |
| 22721 } |
| 22722 |
| 22723 if( zPattern[iPattern]==0 ) return 1; |
| 22724 |
| 22725 while( zString[iString] ){ |
| 22726 if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ |
| 22727 return 1; |
| 22728 } |
| 22729 U8_FWD_1_UNSAFE(zString, iString); |
| 22730 } |
| 22731 return 0; |
| 22732 |
| 22733 }else if( !prevEscape && uPattern==MATCH_ONE ){ |
| 22734 /* Case 2. */ |
| 22735 if( zString[iString]==0 ) return 0; |
| 22736 U8_FWD_1_UNSAFE(zString, iString); |
| 22737 |
| 22738 }else if( !prevEscape && uPattern==uEsc){ |
| 22739 /* Case 3. */ |
| 22740 prevEscape = 1; |
| 22741 |
| 22742 }else{ |
| 22743 /* Case 4. */ |
| 22744 UChar32 uString; |
| 22745 U8_NEXT_UNSAFE(zString, iString, uString); |
| 22746 uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); |
| 22747 uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); |
| 22748 if( uString!=uPattern ){ |
| 22749 return 0; |
| 22750 } |
| 22751 prevEscape = 0; |
| 22752 } |
| 22753 } |
| 22754 |
| 22755 return zString[iString]==0; |
| 22756 } |
| 22757 |
| 22758 /* |
| 22759 ** Implementation of the like() SQL function. This function implements |
| 22760 ** the build-in LIKE operator. The first argument to the function is the |
| 22761 ** pattern and the second argument is the string. So, the SQL statements: |
| 22762 ** |
| 22763 ** A LIKE B |
| 22764 ** |
| 22765 ** is implemented as like(B, A). If there is an escape character E, |
| 22766 ** |
| 22767 ** A LIKE B ESCAPE E |
| 22768 ** |
| 22769 ** is mapped to like(B, A, E). |
| 22770 */ |
| 22771 static void icuLikeFunc( |
| 22772 sqlite3_context *context, |
| 22773 int argc, |
| 22774 sqlite3_value **argv |
| 22775 ){ |
| 22776 const unsigned char *zA = sqlite3_value_text(argv[0]); |
| 22777 const unsigned char *zB = sqlite3_value_text(argv[1]); |
| 22778 UChar32 uEsc = 0; |
| 22779 |
| 22780 /* Limit the length of the LIKE or GLOB pattern to avoid problems |
| 22781 ** of deep recursion and N*N behavior in patternCompare(). |
| 22782 */ |
| 22783 if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ |
| 22784 sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); |
| 22785 return; |
| 22786 } |
| 22787 |
| 22788 |
| 22789 if( argc==3 ){ |
| 22790 /* The escape character string must consist of a single UTF-8 character. |
| 22791 ** Otherwise, return an error. |
| 22792 */ |
| 22793 int nE= sqlite3_value_bytes(argv[2]); |
| 22794 const unsigned char *zE = sqlite3_value_text(argv[2]); |
| 22795 int i = 0; |
| 22796 if( zE==0 ) return; |
| 22797 U8_NEXT(zE, i, nE, uEsc); |
| 22798 if( i!=nE){ |
| 22799 sqlite3_result_error(context, |
| 22800 "ESCAPE expression must be a single character", -1); |
| 22801 return; |
| 22802 } |
| 22803 } |
| 22804 |
| 22805 if( zA && zB ){ |
| 22806 sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); |
| 22807 } |
| 22808 } |
| 22809 |
| 22810 /* |
| 22811 ** This function is called when an ICU function called from within |
| 22812 ** the implementation of an SQL scalar function returns an error. |
| 22813 ** |
| 22814 ** The scalar function context passed as the first argument is |
| 22815 ** loaded with an error message based on the following two args. |
| 22816 */ |
| 22817 static void icuFunctionError( |
| 22818 sqlite3_context *pCtx, /* SQLite scalar function context */ |
| 22819 const char *zName, /* Name of ICU function that failed */ |
| 22820 UErrorCode e /* Error code returned by ICU function */ |
| 22821 ){ |
| 22822 char zBuf[128]; |
| 22823 sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); |
| 22824 zBuf[127] = '\0'; |
| 22825 sqlite3_result_error(pCtx, zBuf, -1); |
| 22826 } |
| 22827 |
| 22828 /* |
| 22829 ** Function to delete compiled regexp objects. Registered as |
| 22830 ** a destructor function with sqlite3_set_auxdata(). |
| 22831 */ |
| 22832 static void icuRegexpDelete(void *p){ |
| 22833 URegularExpression *pExpr = (URegularExpression *)p; |
| 22834 uregex_close(pExpr); |
| 22835 } |
| 22836 |
| 22837 /* |
| 22838 ** Implementation of SQLite REGEXP operator. This scalar function takes |
| 22839 ** two arguments. The first is a regular expression pattern to compile |
| 22840 ** the second is a string to match against that pattern. If either |
| 22841 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result |
| 22842 ** is 1 if the string matches the pattern, or 0 otherwise. |
| 22843 ** |
| 22844 ** SQLite maps the regexp() function to the regexp() operator such |
| 22845 ** that the following two are equivalent: |
| 22846 ** |
| 22847 ** zString REGEXP zPattern |
| 22848 ** regexp(zPattern, zString) |
| 22849 ** |
| 22850 ** Uses the following ICU regexp APIs: |
| 22851 ** |
| 22852 ** uregex_open() |
| 22853 ** uregex_matches() |
| 22854 ** uregex_close() |
| 22855 */ |
| 22856 static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ |
| 22857 UErrorCode status = U_ZERO_ERROR; |
| 22858 URegularExpression *pExpr; |
| 22859 UBool res; |
| 22860 const UChar *zString = sqlite3_value_text16(apArg[1]); |
| 22861 |
| 22862 (void)nArg; /* Unused parameter */ |
| 22863 |
| 22864 /* If the left hand side of the regexp operator is NULL, |
| 22865 ** then the result is also NULL. |
| 22866 */ |
| 22867 if( !zString ){ |
| 22868 return; |
| 22869 } |
| 22870 |
| 22871 pExpr = sqlite3_get_auxdata(p, 0); |
| 22872 if( !pExpr ){ |
| 22873 const UChar *zPattern = sqlite3_value_text16(apArg[0]); |
| 22874 if( !zPattern ){ |
| 22875 return; |
| 22876 } |
| 22877 pExpr = uregex_open(zPattern, -1, 0, 0, &status); |
| 22878 |
| 22879 if( U_SUCCESS(status) ){ |
| 22880 sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); |
| 22881 }else{ |
| 22882 assert(!pExpr); |
| 22883 icuFunctionError(p, "uregex_open", status); |
| 22884 return; |
| 22885 } |
| 22886 } |
| 22887 |
| 22888 /* Configure the text that the regular expression operates on. */ |
| 22889 uregex_setText(pExpr, zString, -1, &status); |
| 22890 if( !U_SUCCESS(status) ){ |
| 22891 icuFunctionError(p, "uregex_setText", status); |
| 22892 return; |
| 22893 } |
| 22894 |
| 22895 /* Attempt the match */ |
| 22896 res = uregex_matches(pExpr, 0, &status); |
| 22897 if( !U_SUCCESS(status) ){ |
| 22898 icuFunctionError(p, "uregex_matches", status); |
| 22899 return; |
| 22900 } |
| 22901 |
| 22902 /* Set the text that the regular expression operates on to a NULL |
| 22903 ** pointer. This is not really necessary, but it is tidier than |
| 22904 ** leaving the regular expression object configured with an invalid |
| 22905 ** pointer after this function returns. |
| 22906 */ |
| 22907 uregex_setText(pExpr, 0, 0, &status); |
| 22908 |
| 22909 /* Return 1 or 0. */ |
| 22910 sqlite3_result_int(p, res ? 1 : 0); |
| 22911 } |
| 22912 |
| 22913 /* |
| 22914 ** Implementations of scalar functions for case mapping - upper() and |
| 22915 ** lower(). Function upper() converts its input to upper-case (ABC). |
| 22916 ** Function lower() converts to lower-case (abc). |
| 22917 ** |
| 22918 ** ICU provides two types of case mapping, "general" case mapping and |
| 22919 ** "language specific". Refer to ICU documentation for the differences |
| 22920 ** between the two. |
| 22921 ** |
| 22922 ** To utilise "general" case mapping, the upper() or lower() scalar |
| 22923 ** functions are invoked with one argument: |
| 22924 ** |
| 22925 ** upper('ABC') -> 'abc' |
| 22926 ** lower('abc') -> 'ABC' |
| 22927 ** |
| 22928 ** To access ICU "language specific" case mapping, upper() or lower() |
| 22929 ** should be invoked with two arguments. The second argument is the name |
| 22930 ** of the locale to use. Passing an empty string ("") or SQL NULL value |
| 22931 ** as the second argument is the same as invoking the 1 argument version |
| 22932 ** of upper() or lower(). |
| 22933 ** |
| 22934 ** lower('I', 'en_us') -> 'i' |
| 22935 ** lower('I', 'tr_tr') -> 'ı' (small dotless i) |
| 22936 ** |
| 22937 ** http://www.icu-project.org/userguide/posix.html#case_mappings |
| 22938 */ |
| 22939 static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ |
| 22940 const UChar *zInput; |
| 22941 UChar *zOutput; |
| 22942 int nInput; |
| 22943 int nOutput; |
| 22944 |
| 22945 UErrorCode status = U_ZERO_ERROR; |
| 22946 const char *zLocale = 0; |
| 22947 |
| 22948 assert(nArg==1 || nArg==2); |
| 22949 if( nArg==2 ){ |
| 22950 zLocale = (const char *)sqlite3_value_text(apArg[1]); |
| 22951 } |
| 22952 |
| 22953 zInput = sqlite3_value_text16(apArg[0]); |
| 22954 if( !zInput ){ |
| 22955 return; |
| 22956 } |
| 22957 nInput = sqlite3_value_bytes16(apArg[0]); |
| 22958 |
| 22959 nOutput = nInput * 2 + 2; |
| 22960 zOutput = sqlite3_malloc(nOutput); |
| 22961 if( !zOutput ){ |
| 22962 return; |
| 22963 } |
| 22964 |
| 22965 if( sqlite3_user_data(p) ){ |
| 22966 u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); |
| 22967 }else{ |
| 22968 u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); |
| 22969 } |
| 22970 |
| 22971 if( !U_SUCCESS(status) ){ |
| 22972 icuFunctionError(p, "u_strToLower()/u_strToUpper", status); |
| 22973 return; |
| 22974 } |
| 22975 |
| 22976 sqlite3_result_text16(p, zOutput, -1, xFree); |
| 22977 } |
| 22978 |
| 22979 /* |
| 22980 ** Collation sequence destructor function. The pCtx argument points to |
| 22981 ** a UCollator structure previously allocated using ucol_open(). |
| 22982 */ |
| 22983 static void icuCollationDel(void *pCtx){ |
| 22984 UCollator *p = (UCollator *)pCtx; |
| 22985 ucol_close(p); |
| 22986 } |
| 22987 |
| 22988 /* |
| 22989 ** Collation sequence comparison function. The pCtx argument points to |
| 22990 ** a UCollator structure previously allocated using ucol_open(). |
| 22991 */ |
| 22992 static int icuCollationColl( |
| 22993 void *pCtx, |
| 22994 int nLeft, |
| 22995 const void *zLeft, |
| 22996 int nRight, |
| 22997 const void *zRight |
| 22998 ){ |
| 22999 UCollationResult res; |
| 23000 UCollator *p = (UCollator *)pCtx; |
| 23001 res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); |
| 23002 switch( res ){ |
| 23003 case UCOL_LESS: return -1; |
| 23004 case UCOL_GREATER: return +1; |
| 23005 case UCOL_EQUAL: return 0; |
| 23006 } |
| 23007 assert(!"Unexpected return value from ucol_strcoll()"); |
| 23008 return 0; |
| 23009 } |
| 23010 |
| 23011 /* |
| 23012 ** Implementation of the scalar function icu_load_collation(). |
| 23013 ** |
| 23014 ** This scalar function is used to add ICU collation based collation |
| 23015 ** types to an SQLite database connection. It is intended to be called |
| 23016 ** as follows: |
| 23017 ** |
| 23018 ** SELECT icu_load_collation(<locale>, <collation-name>); |
| 23019 ** |
| 23020 ** Where <locale> is a string containing an ICU locale identifier (i.e. |
| 23021 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the |
| 23022 ** collation sequence to create. |
| 23023 */ |
| 23024 static void icuLoadCollation( |
| 23025 sqlite3_context *p, |
| 23026 int nArg, |
| 23027 sqlite3_value **apArg |
| 23028 ){ |
| 23029 sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); |
| 23030 UErrorCode status = U_ZERO_ERROR; |
| 23031 const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ |
| 23032 const char *zName; /* SQL Collation sequence name (eg. "japanese") */ |
| 23033 UCollator *pUCollator; /* ICU library collation object */ |
| 23034 int rc; /* Return code from sqlite3_create_collation_x() */ |
| 23035 |
| 23036 assert(nArg==2); |
| 23037 (void)nArg; /* Unused parameter */ |
| 23038 zLocale = (const char *)sqlite3_value_text(apArg[0]); |
| 23039 zName = (const char *)sqlite3_value_text(apArg[1]); |
| 23040 |
| 23041 if( !zLocale || !zName ){ |
| 23042 return; |
| 23043 } |
| 23044 |
| 23045 pUCollator = ucol_open(zLocale, &status); |
| 23046 if( !U_SUCCESS(status) ){ |
| 23047 icuFunctionError(p, "ucol_open", status); |
| 23048 return; |
| 23049 } |
| 23050 assert(p); |
| 23051 |
| 23052 rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, |
| 23053 icuCollationColl, icuCollationDel |
| 23054 ); |
| 23055 if( rc!=SQLITE_OK ){ |
| 23056 ucol_close(pUCollator); |
| 23057 sqlite3_result_error(p, "Error registering collation function", -1); |
| 23058 } |
| 23059 } |
| 23060 |
| 23061 /* |
| 23062 ** Register the ICU extension functions with database db. |
| 23063 */ |
| 23064 SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ |
| 23065 struct IcuScalar { |
| 23066 const char *zName; /* Function name */ |
| 23067 int nArg; /* Number of arguments */ |
| 23068 int enc; /* Optimal text encoding */ |
| 23069 void *pContext; /* sqlite3_user_data() context */ |
| 23070 void (*xFunc)(sqlite3_context*,int,sqlite3_value**); |
| 23071 } scalars[] = { |
| 23072 {"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc}, |
| 23073 |
| 23074 {"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16}, |
| 23075 {"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16}, |
| 23076 {"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16}, |
| 23077 {"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16}, |
| 23078 |
| 23079 {"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16}, |
| 23080 {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, |
| 23081 {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, |
| 23082 {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, |
| 23083 |
| 23084 {"like", 2, SQLITE_UTF8, 0, icuLikeFunc}, |
| 23085 {"like", 3, SQLITE_UTF8, 0, icuLikeFunc}, |
| 23086 |
| 23087 {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, |
| 23088 }; |
| 23089 |
| 23090 int rc = SQLITE_OK; |
| 23091 int i; |
| 23092 |
| 23093 for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ |
| 23094 struct IcuScalar *p = &scalars[i]; |
| 23095 rc = sqlite3_create_function( |
| 23096 db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0 |
| 23097 ); |
| 23098 } |
| 23099 |
| 23100 return rc; |
| 23101 } |
| 23102 |
| 23103 #if !SQLITE_CORE |
| 23104 #ifdef _WIN32 |
| 23105 __declspec(dllexport) |
| 23106 #endif |
| 23107 SQLITE_API int SQLITE_STDCALL sqlite3_icu_init( |
| 23108 sqlite3 *db, |
| 23109 char **pzErrMsg, |
| 23110 const sqlite3_api_routines *pApi |
| 23111 ){ |
| 23112 SQLITE_EXTENSION_INIT2(pApi) |
| 23113 return sqlite3IcuInit(db); |
| 23114 } |
| 23115 #endif |
| 23116 |
| 23117 #endif |
| 23118 |
| 23119 /************** End of icu.c *************************************************/ |
| 23120 /************** Begin file fts3_icu.c ****************************************/ |
| 23121 /* |
| 23122 ** 2007 June 22 |
| 23123 ** |
| 23124 ** The author disclaims copyright to this source code. In place of |
| 23125 ** a legal notice, here is a blessing: |
| 23126 ** |
| 23127 ** May you do good and not evil. |
| 23128 ** May you find forgiveness for yourself and forgive others. |
| 23129 ** May you share freely, never taking more than you give. |
| 23130 ** |
| 23131 ************************************************************************* |
| 23132 ** This file implements a tokenizer for fts3 based on the ICU library. |
| 23133 */ |
| 23134 /* #include "fts3Int.h" */ |
| 23135 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 23136 #ifdef SQLITE_ENABLE_ICU |
| 23137 |
| 23138 /* #include <assert.h> */ |
| 23139 /* #include <string.h> */ |
| 23140 /* #include "fts3_tokenizer.h" */ |
| 23141 |
| 23142 #include <unicode/ubrk.h> |
| 23143 /* #include <unicode/ucol.h> */ |
| 23144 /* #include <unicode/ustring.h> */ |
| 23145 #include <unicode/utf16.h> |
| 23146 |
| 23147 typedef struct IcuTokenizer IcuTokenizer; |
| 23148 typedef struct IcuCursor IcuCursor; |
| 23149 |
| 23150 struct IcuTokenizer { |
| 23151 sqlite3_tokenizer base; |
| 23152 char *zLocale; |
| 23153 }; |
| 23154 |
| 23155 struct IcuCursor { |
| 23156 sqlite3_tokenizer_cursor base; |
| 23157 |
| 23158 UBreakIterator *pIter; /* ICU break-iterator object */ |
| 23159 int nChar; /* Number of UChar elements in pInput */ |
| 23160 UChar *aChar; /* Copy of input using utf-16 encoding */ |
| 23161 int *aOffset; /* Offsets of each character in utf-8 input */ |
| 23162 |
| 23163 int nBuffer; |
| 23164 char *zBuffer; |
| 23165 |
| 23166 int iToken; |
| 23167 }; |
| 23168 |
| 23169 /* |
| 23170 ** Create a new tokenizer instance. |
| 23171 */ |
| 23172 static int icuCreate( |
| 23173 int argc, /* Number of entries in argv[] */ |
| 23174 const char * const *argv, /* Tokenizer creation arguments */ |
| 23175 sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ |
| 23176 ){ |
| 23177 IcuTokenizer *p; |
| 23178 int n = 0; |
| 23179 |
| 23180 if( argc>0 ){ |
| 23181 n = strlen(argv[0])+1; |
| 23182 } |
| 23183 p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); |
| 23184 if( !p ){ |
| 23185 return SQLITE_NOMEM; |
| 23186 } |
| 23187 memset(p, 0, sizeof(IcuTokenizer)); |
| 23188 |
| 23189 if( n ){ |
| 23190 p->zLocale = (char *)&p[1]; |
| 23191 memcpy(p->zLocale, argv[0], n); |
| 23192 } |
| 23193 |
| 23194 *ppTokenizer = (sqlite3_tokenizer *)p; |
| 23195 |
| 23196 return SQLITE_OK; |
| 23197 } |
| 23198 |
| 23199 /* |
| 23200 ** Destroy a tokenizer |
| 23201 */ |
| 23202 static int icuDestroy(sqlite3_tokenizer *pTokenizer){ |
| 23203 IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| 23204 sqlite3_free(p); |
| 23205 return SQLITE_OK; |
| 23206 } |
| 23207 |
| 23208 /* |
| 23209 ** Prepare to begin tokenizing a particular string. The input |
| 23210 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 23211 ** used to incrementally tokenize this string is returned in |
| 23212 ** *ppCursor. |
| 23213 */ |
| 23214 static int icuOpen( |
| 23215 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 23216 const char *zInput, /* Input string */ |
| 23217 int nInput, /* Length of zInput in bytes */ |
| 23218 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 23219 ){ |
| 23220 IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| 23221 IcuCursor *pCsr; |
| 23222 |
| 23223 const int32_t opt = U_FOLD_CASE_DEFAULT; |
| 23224 UErrorCode status = U_ZERO_ERROR; |
| 23225 int nChar; |
| 23226 |
| 23227 UChar32 c; |
| 23228 int iInput = 0; |
| 23229 int iOut = 0; |
| 23230 |
| 23231 *ppCursor = 0; |
| 23232 |
| 23233 if( zInput==0 ){ |
| 23234 nInput = 0; |
| 23235 zInput = ""; |
| 23236 }else if( nInput<0 ){ |
| 23237 nInput = strlen(zInput); |
| 23238 } |
| 23239 nChar = nInput+1; |
| 23240 pCsr = (IcuCursor *)sqlite3_malloc( |
| 23241 sizeof(IcuCursor) + /* IcuCursor */ |
| 23242 ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ |
| 23243 (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ |
| 23244 ); |
| 23245 if( !pCsr ){ |
| 23246 return SQLITE_NOMEM; |
| 23247 } |
| 23248 memset(pCsr, 0, sizeof(IcuCursor)); |
| 23249 pCsr->aChar = (UChar *)&pCsr[1]; |
| 23250 pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; |
| 23251 |
| 23252 pCsr->aOffset[iOut] = iInput; |
| 23253 U8_NEXT(zInput, iInput, nInput, c); |
| 23254 while( c>0 ){ |
| 23255 int isError = 0; |
| 23256 c = u_foldCase(c, opt); |
| 23257 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); |
| 23258 if( isError ){ |
| 23259 sqlite3_free(pCsr); |
| 23260 return SQLITE_ERROR; |
| 23261 } |
| 23262 pCsr->aOffset[iOut] = iInput; |
| 23263 |
| 23264 if( iInput<nInput ){ |
| 23265 U8_NEXT(zInput, iInput, nInput, c); |
| 23266 }else{ |
| 23267 c = 0; |
| 23268 } |
| 23269 } |
| 23270 |
| 23271 pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); |
| 23272 if( !U_SUCCESS(status) ){ |
| 23273 sqlite3_free(pCsr); |
| 23274 return SQLITE_ERROR; |
| 23275 } |
| 23276 pCsr->nChar = iOut; |
| 23277 |
| 23278 ubrk_first(pCsr->pIter); |
| 23279 *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; |
| 23280 return SQLITE_OK; |
| 23281 } |
| 23282 |
| 23283 /* |
| 23284 ** Close a tokenization cursor previously opened by a call to icuOpen(). |
| 23285 */ |
| 23286 static int icuClose(sqlite3_tokenizer_cursor *pCursor){ |
| 23287 IcuCursor *pCsr = (IcuCursor *)pCursor; |
| 23288 ubrk_close(pCsr->pIter); |
| 23289 sqlite3_free(pCsr->zBuffer); |
| 23290 sqlite3_free(pCsr); |
| 23291 return SQLITE_OK; |
| 23292 } |
| 23293 |
| 23294 /* |
| 23295 ** Extract the next token from a tokenization cursor. |
| 23296 */ |
| 23297 static int icuNext( |
| 23298 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
| 23299 const char **ppToken, /* OUT: *ppToken is the token text */ |
| 23300 int *pnBytes, /* OUT: Number of bytes in token */ |
| 23301 int *piStartOffset, /* OUT: Starting offset of token */ |
| 23302 int *piEndOffset, /* OUT: Ending offset of token */ |
| 23303 int *piPosition /* OUT: Position integer of token */ |
| 23304 ){ |
| 23305 IcuCursor *pCsr = (IcuCursor *)pCursor; |
| 23306 |
| 23307 int iStart = 0; |
| 23308 int iEnd = 0; |
| 23309 int nByte = 0; |
| 23310 |
| 23311 while( iStart==iEnd ){ |
| 23312 UChar32 c; |
| 23313 |
| 23314 iStart = ubrk_current(pCsr->pIter); |
| 23315 iEnd = ubrk_next(pCsr->pIter); |
| 23316 if( iEnd==UBRK_DONE ){ |
| 23317 return SQLITE_DONE; |
| 23318 } |
| 23319 |
| 23320 while( iStart<iEnd ){ |
| 23321 int iWhite = iStart; |
| 23322 U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); |
| 23323 if( u_isspace(c) ){ |
| 23324 iStart = iWhite; |
| 23325 }else{ |
| 23326 break; |
| 23327 } |
| 23328 } |
| 23329 assert(iStart<=iEnd); |
| 23330 } |
| 23331 |
| 23332 do { |
| 23333 UErrorCode status = U_ZERO_ERROR; |
| 23334 if( nByte ){ |
| 23335 char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); |
| 23336 if( !zNew ){ |
| 23337 return SQLITE_NOMEM; |
| 23338 } |
| 23339 pCsr->zBuffer = zNew; |
| 23340 pCsr->nBuffer = nByte; |
| 23341 } |
| 23342 |
| 23343 u_strToUTF8( |
| 23344 pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ |
| 23345 &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ |
| 23346 &status /* Output success/failure */ |
| 23347 ); |
| 23348 } while( nByte>pCsr->nBuffer ); |
| 23349 |
| 23350 *ppToken = pCsr->zBuffer; |
| 23351 *pnBytes = nByte; |
| 23352 *piStartOffset = pCsr->aOffset[iStart]; |
| 23353 *piEndOffset = pCsr->aOffset[iEnd]; |
| 23354 *piPosition = pCsr->iToken++; |
| 23355 |
| 23356 return SQLITE_OK; |
| 23357 } |
| 23358 |
| 23359 /* |
| 23360 ** The set of routines that implement the simple tokenizer |
| 23361 */ |
| 23362 static const sqlite3_tokenizer_module icuTokenizerModule = { |
| 23363 0, /* iVersion */ |
| 23364 icuCreate, /* xCreate */ |
| 23365 icuDestroy, /* xCreate */ |
| 23366 icuOpen, /* xOpen */ |
| 23367 icuClose, /* xClose */ |
| 23368 icuNext, /* xNext */ |
| 23369 0, /* xLanguageid */ |
| 23370 }; |
| 23371 |
| 23372 /* |
| 23373 ** Set *ppModule to point at the implementation of the ICU tokenizer. |
| 23374 */ |
| 23375 SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( |
| 23376 sqlite3_tokenizer_module const**ppModule |
| 23377 ){ |
| 23378 *ppModule = &icuTokenizerModule; |
| 23379 } |
| 23380 |
| 23381 #endif /* defined(SQLITE_ENABLE_ICU) */ |
| 23382 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 23383 |
| 23384 /************** End of fts3_icu.c ********************************************/ |
| 23385 |
| 23386 /* Chain include. */ |
| 23387 #include "sqlite3.07.c" |
OLD | NEW |