OLD | NEW |
(Empty) | |
| 1 /************** Begin file fts3.c ********************************************/ |
| 2 /* |
| 3 ** 2006 Oct 10 |
| 4 ** |
| 5 ** The author disclaims copyright to this source code. In place of |
| 6 ** a legal notice, here is a blessing: |
| 7 ** |
| 8 ** May you do good and not evil. |
| 9 ** May you find forgiveness for yourself and forgive others. |
| 10 ** May you share freely, never taking more than you give. |
| 11 ** |
| 12 ****************************************************************************** |
| 13 ** |
| 14 ** This is an SQLite module implementing full-text search. |
| 15 */ |
| 16 |
| 17 /* |
| 18 ** The code in this file is only compiled if: |
| 19 ** |
| 20 ** * The FTS3 module is being built as an extension |
| 21 ** (in which case SQLITE_CORE is not defined), or |
| 22 ** |
| 23 ** * The FTS3 module is being built into the core of |
| 24 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 25 */ |
| 26 |
| 27 /* The full-text index is stored in a series of b+tree (-like) |
| 28 ** structures called segments which map terms to doclists. The |
| 29 ** structures are like b+trees in layout, but are constructed from the |
| 30 ** bottom up in optimal fashion and are not updatable. Since trees |
| 31 ** are built from the bottom up, things will be described from the |
| 32 ** bottom up. |
| 33 ** |
| 34 ** |
| 35 **** Varints **** |
| 36 ** The basic unit of encoding is a variable-length integer called a |
| 37 ** varint. We encode variable-length integers in little-endian order |
| 38 ** using seven bits * per byte as follows: |
| 39 ** |
| 40 ** KEY: |
| 41 ** A = 0xxxxxxx 7 bits of data and one flag bit |
| 42 ** B = 1xxxxxxx 7 bits of data and one flag bit |
| 43 ** |
| 44 ** 7 bits - A |
| 45 ** 14 bits - BA |
| 46 ** 21 bits - BBA |
| 47 ** and so on. |
| 48 ** |
| 49 ** This is similar in concept to how sqlite encodes "varints" but |
| 50 ** the encoding is not the same. SQLite varints are big-endian |
| 51 ** are are limited to 9 bytes in length whereas FTS3 varints are |
| 52 ** little-endian and can be up to 10 bytes in length (in theory). |
| 53 ** |
| 54 ** Example encodings: |
| 55 ** |
| 56 ** 1: 0x01 |
| 57 ** 127: 0x7f |
| 58 ** 128: 0x81 0x00 |
| 59 ** |
| 60 ** |
| 61 **** Document lists **** |
| 62 ** A doclist (document list) holds a docid-sorted list of hits for a |
| 63 ** given term. Doclists hold docids and associated token positions. |
| 64 ** A docid is the unique integer identifier for a single document. |
| 65 ** A position is the index of a word within the document. The first |
| 66 ** word of the document has a position of 0. |
| 67 ** |
| 68 ** FTS3 used to optionally store character offsets using a compile-time |
| 69 ** option. But that functionality is no longer supported. |
| 70 ** |
| 71 ** A doclist is stored like this: |
| 72 ** |
| 73 ** array { |
| 74 ** varint docid; (delta from previous doclist) |
| 75 ** array { (position list for column 0) |
| 76 ** varint position; (2 more than the delta from previous position) |
| 77 ** } |
| 78 ** array { |
| 79 ** varint POS_COLUMN; (marks start of position list for new column) |
| 80 ** varint column; (index of new column) |
| 81 ** array { |
| 82 ** varint position; (2 more than the delta from previous position) |
| 83 ** } |
| 84 ** } |
| 85 ** varint POS_END; (marks end of positions for this document. |
| 86 ** } |
| 87 ** |
| 88 ** Here, array { X } means zero or more occurrences of X, adjacent in |
| 89 ** memory. A "position" is an index of a token in the token stream |
| 90 ** generated by the tokenizer. Note that POS_END and POS_COLUMN occur |
| 91 ** in the same logical place as the position element, and act as sentinals |
| 92 ** ending a position list array. POS_END is 0. POS_COLUMN is 1. |
| 93 ** The positions numbers are not stored literally but rather as two more |
| 94 ** than the difference from the prior position, or the just the position plus |
| 95 ** 2 for the first position. Example: |
| 96 ** |
| 97 ** label: A B C D E F G H I J K |
| 98 ** value: 123 5 9 1 1 14 35 0 234 72 0 |
| 99 ** |
| 100 ** The 123 value is the first docid. For column zero in this document |
| 101 ** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1 |
| 102 ** at D signals the start of a new column; the 1 at E indicates that the |
| 103 ** new column is column number 1. There are two positions at 12 and 45 |
| 104 ** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The |
| 105 ** 234 at I is the delta to next docid (357). It has one position 70 |
| 106 ** (72-2) and then terminates with the 0 at K. |
| 107 ** |
| 108 ** A "position-list" is the list of positions for multiple columns for |
| 109 ** a single docid. A "column-list" is the set of positions for a single |
| 110 ** column. Hence, a position-list consists of one or more column-lists, |
| 111 ** a document record consists of a docid followed by a position-list and |
| 112 ** a doclist consists of one or more document records. |
| 113 ** |
| 114 ** A bare doclist omits the position information, becoming an |
| 115 ** array of varint-encoded docids. |
| 116 ** |
| 117 **** Segment leaf nodes **** |
| 118 ** Segment leaf nodes store terms and doclists, ordered by term. Leaf |
| 119 ** nodes are written using LeafWriter, and read using LeafReader (to |
| 120 ** iterate through a single leaf node's data) and LeavesReader (to |
| 121 ** iterate through a segment's entire leaf layer). Leaf nodes have |
| 122 ** the format: |
| 123 ** |
| 124 ** varint iHeight; (height from leaf level, always 0) |
| 125 ** varint nTerm; (length of first term) |
| 126 ** char pTerm[nTerm]; (content of first term) |
| 127 ** varint nDoclist; (length of term's associated doclist) |
| 128 ** char pDoclist[nDoclist]; (content of doclist) |
| 129 ** array { |
| 130 ** (further terms are delta-encoded) |
| 131 ** varint nPrefix; (length of prefix shared with previous term) |
| 132 ** varint nSuffix; (length of unshared suffix) |
| 133 ** char pTermSuffix[nSuffix];(unshared suffix of next term) |
| 134 ** varint nDoclist; (length of term's associated doclist) |
| 135 ** char pDoclist[nDoclist]; (content of doclist) |
| 136 ** } |
| 137 ** |
| 138 ** Here, array { X } means zero or more occurrences of X, adjacent in |
| 139 ** memory. |
| 140 ** |
| 141 ** Leaf nodes are broken into blocks which are stored contiguously in |
| 142 ** the %_segments table in sorted order. This means that when the end |
| 143 ** of a node is reached, the next term is in the node with the next |
| 144 ** greater node id. |
| 145 ** |
| 146 ** New data is spilled to a new leaf node when the current node |
| 147 ** exceeds LEAF_MAX bytes (default 2048). New data which itself is |
| 148 ** larger than STANDALONE_MIN (default 1024) is placed in a standalone |
| 149 ** node (a leaf node with a single term and doclist). The goal of |
| 150 ** these settings is to pack together groups of small doclists while |
| 151 ** making it efficient to directly access large doclists. The |
| 152 ** assumption is that large doclists represent terms which are more |
| 153 ** likely to be query targets. |
| 154 ** |
| 155 ** TODO(shess) It may be useful for blocking decisions to be more |
| 156 ** dynamic. For instance, it may make more sense to have a 2.5k leaf |
| 157 ** node rather than splitting into 2k and .5k nodes. My intuition is |
| 158 ** that this might extend through 2x or 4x the pagesize. |
| 159 ** |
| 160 ** |
| 161 **** Segment interior nodes **** |
| 162 ** Segment interior nodes store blockids for subtree nodes and terms |
| 163 ** to describe what data is stored by the each subtree. Interior |
| 164 ** nodes are written using InteriorWriter, and read using |
| 165 ** InteriorReader. InteriorWriters are created as needed when |
| 166 ** SegmentWriter creates new leaf nodes, or when an interior node |
| 167 ** itself grows too big and must be split. The format of interior |
| 168 ** nodes: |
| 169 ** |
| 170 ** varint iHeight; (height from leaf level, always >0) |
| 171 ** varint iBlockid; (block id of node's leftmost subtree) |
| 172 ** optional { |
| 173 ** varint nTerm; (length of first term) |
| 174 ** char pTerm[nTerm]; (content of first term) |
| 175 ** array { |
| 176 ** (further terms are delta-encoded) |
| 177 ** varint nPrefix; (length of shared prefix with previous term) |
| 178 ** varint nSuffix; (length of unshared suffix) |
| 179 ** char pTermSuffix[nSuffix]; (unshared suffix of next term) |
| 180 ** } |
| 181 ** } |
| 182 ** |
| 183 ** Here, optional { X } means an optional element, while array { X } |
| 184 ** means zero or more occurrences of X, adjacent in memory. |
| 185 ** |
| 186 ** An interior node encodes n terms separating n+1 subtrees. The |
| 187 ** subtree blocks are contiguous, so only the first subtree's blockid |
| 188 ** is encoded. The subtree at iBlockid will contain all terms less |
| 189 ** than the first term encoded (or all terms if no term is encoded). |
| 190 ** Otherwise, for terms greater than or equal to pTerm[i] but less |
| 191 ** than pTerm[i+1], the subtree for that term will be rooted at |
| 192 ** iBlockid+i. Interior nodes only store enough term data to |
| 193 ** distinguish adjacent children (if the rightmost term of the left |
| 194 ** child is "something", and the leftmost term of the right child is |
| 195 ** "wicked", only "w" is stored). |
| 196 ** |
| 197 ** New data is spilled to a new interior node at the same height when |
| 198 ** the current node exceeds INTERIOR_MAX bytes (default 2048). |
| 199 ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing |
| 200 ** interior nodes and making the tree too skinny. The interior nodes |
| 201 ** at a given height are naturally tracked by interior nodes at |
| 202 ** height+1, and so on. |
| 203 ** |
| 204 ** |
| 205 **** Segment directory **** |
| 206 ** The segment directory in table %_segdir stores meta-information for |
| 207 ** merging and deleting segments, and also the root node of the |
| 208 ** segment's tree. |
| 209 ** |
| 210 ** The root node is the top node of the segment's tree after encoding |
| 211 ** the entire segment, restricted to ROOT_MAX bytes (default 1024). |
| 212 ** This could be either a leaf node or an interior node. If the top |
| 213 ** node requires more than ROOT_MAX bytes, it is flushed to %_segments |
| 214 ** and a new root interior node is generated (which should always fit |
| 215 ** within ROOT_MAX because it only needs space for 2 varints, the |
| 216 ** height and the blockid of the previous root). |
| 217 ** |
| 218 ** The meta-information in the segment directory is: |
| 219 ** level - segment level (see below) |
| 220 ** idx - index within level |
| 221 ** - (level,idx uniquely identify a segment) |
| 222 ** start_block - first leaf node |
| 223 ** leaves_end_block - last leaf node |
| 224 ** end_block - last block (including interior nodes) |
| 225 ** root - contents of root node |
| 226 ** |
| 227 ** If the root node is a leaf node, then start_block, |
| 228 ** leaves_end_block, and end_block are all 0. |
| 229 ** |
| 230 ** |
| 231 **** Segment merging **** |
| 232 ** To amortize update costs, segments are grouped into levels and |
| 233 ** merged in batches. Each increase in level represents exponentially |
| 234 ** more documents. |
| 235 ** |
| 236 ** New documents (actually, document updates) are tokenized and |
| 237 ** written individually (using LeafWriter) to a level 0 segment, with |
| 238 ** incrementing idx. When idx reaches MERGE_COUNT (default 16), all |
| 239 ** level 0 segments are merged into a single level 1 segment. Level 1 |
| 240 ** is populated like level 0, and eventually MERGE_COUNT level 1 |
| 241 ** segments are merged to a single level 2 segment (representing |
| 242 ** MERGE_COUNT^2 updates), and so on. |
| 243 ** |
| 244 ** A segment merge traverses all segments at a given level in |
| 245 ** parallel, performing a straightforward sorted merge. Since segment |
| 246 ** leaf nodes are written in to the %_segments table in order, this |
| 247 ** merge traverses the underlying sqlite disk structures efficiently. |
| 248 ** After the merge, all segment blocks from the merged level are |
| 249 ** deleted. |
| 250 ** |
| 251 ** MERGE_COUNT controls how often we merge segments. 16 seems to be |
| 252 ** somewhat of a sweet spot for insertion performance. 32 and 64 show |
| 253 ** very similar performance numbers to 16 on insertion, though they're |
| 254 ** a tiny bit slower (perhaps due to more overhead in merge-time |
| 255 ** sorting). 8 is about 20% slower than 16, 4 about 50% slower than |
| 256 ** 16, 2 about 66% slower than 16. |
| 257 ** |
| 258 ** At query time, high MERGE_COUNT increases the number of segments |
| 259 ** which need to be scanned and merged. For instance, with 100k docs |
| 260 ** inserted: |
| 261 ** |
| 262 ** MERGE_COUNT segments |
| 263 ** 16 25 |
| 264 ** 8 12 |
| 265 ** 4 10 |
| 266 ** 2 6 |
| 267 ** |
| 268 ** This appears to have only a moderate impact on queries for very |
| 269 ** frequent terms (which are somewhat dominated by segment merge |
| 270 ** costs), and infrequent and non-existent terms still seem to be fast |
| 271 ** even with many segments. |
| 272 ** |
| 273 ** TODO(shess) That said, it would be nice to have a better query-side |
| 274 ** argument for MERGE_COUNT of 16. Also, it is possible/likely that |
| 275 ** optimizations to things like doclist merging will swing the sweet |
| 276 ** spot around. |
| 277 ** |
| 278 ** |
| 279 ** |
| 280 **** Handling of deletions and updates **** |
| 281 ** Since we're using a segmented structure, with no docid-oriented |
| 282 ** index into the term index, we clearly cannot simply update the term |
| 283 ** index when a document is deleted or updated. For deletions, we |
| 284 ** write an empty doclist (varint(docid) varint(POS_END)), for updates |
| 285 ** we simply write the new doclist. Segment merges overwrite older |
| 286 ** data for a particular docid with newer data, so deletes or updates |
| 287 ** will eventually overtake the earlier data and knock it out. The |
| 288 ** query logic likewise merges doclists so that newer data knocks out |
| 289 ** older data. |
| 290 */ |
| 291 #define CHROMIUM_FTS3_CHANGES 1 |
| 292 |
| 293 /************** Include fts3Int.h in the middle of fts3.c ********************/ |
| 294 /************** Begin file fts3Int.h *****************************************/ |
| 295 /* |
| 296 ** 2009 Nov 12 |
| 297 ** |
| 298 ** The author disclaims copyright to this source code. In place of |
| 299 ** a legal notice, here is a blessing: |
| 300 ** |
| 301 ** May you do good and not evil. |
| 302 ** May you find forgiveness for yourself and forgive others. |
| 303 ** May you share freely, never taking more than you give. |
| 304 ** |
| 305 ****************************************************************************** |
| 306 ** |
| 307 */ |
| 308 #ifndef _FTSINT_H |
| 309 #define _FTSINT_H |
| 310 |
| 311 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 312 # define NDEBUG 1 |
| 313 #endif |
| 314 |
| 315 /* FTS3/FTS4 require virtual tables */ |
| 316 #ifdef SQLITE_OMIT_VIRTUALTABLE |
| 317 # undef SQLITE_ENABLE_FTS3 |
| 318 # undef SQLITE_ENABLE_FTS4 |
| 319 #endif |
| 320 |
| 321 /* |
| 322 ** FTS4 is really an extension for FTS3. It is enabled using the |
| 323 ** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all |
| 324 ** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. |
| 325 */ |
| 326 #if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) |
| 327 # define SQLITE_ENABLE_FTS3 |
| 328 #endif |
| 329 |
| 330 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 331 |
| 332 /* If not building as part of the core, include sqlite3ext.h. */ |
| 333 #ifndef SQLITE_CORE |
| 334 /* # include "sqlite3ext.h" */ |
| 335 SQLITE_EXTENSION_INIT3 |
| 336 #endif |
| 337 |
| 338 /* #include "sqlite3.h" */ |
| 339 /************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ |
| 340 /************** Begin file fts3_tokenizer.h **********************************/ |
| 341 /* |
| 342 ** 2006 July 10 |
| 343 ** |
| 344 ** The author disclaims copyright to this source code. |
| 345 ** |
| 346 ************************************************************************* |
| 347 ** Defines the interface to tokenizers used by fulltext-search. There |
| 348 ** are three basic components: |
| 349 ** |
| 350 ** sqlite3_tokenizer_module is a singleton defining the tokenizer |
| 351 ** interface functions. This is essentially the class structure for |
| 352 ** tokenizers. |
| 353 ** |
| 354 ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps |
| 355 ** including customization information defined at creation time. |
| 356 ** |
| 357 ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate |
| 358 ** tokens from a particular input. |
| 359 */ |
| 360 #ifndef _FTS3_TOKENIZER_H_ |
| 361 #define _FTS3_TOKENIZER_H_ |
| 362 |
| 363 /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. |
| 364 ** If tokenizers are to be allowed to call sqlite3_*() functions, then |
| 365 ** we will need a way to register the API consistently. |
| 366 */ |
| 367 /* #include "sqlite3.h" */ |
| 368 |
| 369 /* |
| 370 ** Structures used by the tokenizer interface. When a new tokenizer |
| 371 ** implementation is registered, the caller provides a pointer to |
| 372 ** an sqlite3_tokenizer_module containing pointers to the callback |
| 373 ** functions that make up an implementation. |
| 374 ** |
| 375 ** When an fts3 table is created, it passes any arguments passed to |
| 376 ** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the |
| 377 ** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer |
| 378 ** implementation. The xCreate() function in turn returns an |
| 379 ** sqlite3_tokenizer structure representing the specific tokenizer to |
| 380 ** be used for the fts3 table (customized by the tokenizer clause arguments). |
| 381 ** |
| 382 ** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() |
| 383 ** method is called. It returns an sqlite3_tokenizer_cursor object |
| 384 ** that may be used to tokenize a specific input buffer based on |
| 385 ** the tokenization rules supplied by a specific sqlite3_tokenizer |
| 386 ** object. |
| 387 */ |
| 388 typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; |
| 389 typedef struct sqlite3_tokenizer sqlite3_tokenizer; |
| 390 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; |
| 391 |
| 392 struct sqlite3_tokenizer_module { |
| 393 |
| 394 /* |
| 395 ** Structure version. Should always be set to 0 or 1. |
| 396 */ |
| 397 int iVersion; |
| 398 |
| 399 /* |
| 400 ** Create a new tokenizer. The values in the argv[] array are the |
| 401 ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL |
| 402 ** TABLE statement that created the fts3 table. For example, if |
| 403 ** the following SQL is executed: |
| 404 ** |
| 405 ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2) |
| 406 ** |
| 407 ** then argc is set to 2, and the argv[] array contains pointers |
| 408 ** to the strings "arg1" and "arg2". |
| 409 ** |
| 410 ** This method should return either SQLITE_OK (0), or an SQLite error |
| 411 ** code. If SQLITE_OK is returned, then *ppTokenizer should be set |
| 412 ** to point at the newly created tokenizer structure. The generic |
| 413 ** sqlite3_tokenizer.pModule variable should not be initialized by |
| 414 ** this callback. The caller will do so. |
| 415 */ |
| 416 int (*xCreate)( |
| 417 int argc, /* Size of argv array */ |
| 418 const char *const*argv, /* Tokenizer argument strings */ |
| 419 sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ |
| 420 ); |
| 421 |
| 422 /* |
| 423 ** Destroy an existing tokenizer. The fts3 module calls this method |
| 424 ** exactly once for each successful call to xCreate(). |
| 425 */ |
| 426 int (*xDestroy)(sqlite3_tokenizer *pTokenizer); |
| 427 |
| 428 /* |
| 429 ** Create a tokenizer cursor to tokenize an input buffer. The caller |
| 430 ** is responsible for ensuring that the input buffer remains valid |
| 431 ** until the cursor is closed (using the xClose() method). |
| 432 */ |
| 433 int (*xOpen)( |
| 434 sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ |
| 435 const char *pInput, int nBytes, /* Input buffer */ |
| 436 sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ |
| 437 ); |
| 438 |
| 439 /* |
| 440 ** Destroy an existing tokenizer cursor. The fts3 module calls this |
| 441 ** method exactly once for each successful call to xOpen(). |
| 442 */ |
| 443 int (*xClose)(sqlite3_tokenizer_cursor *pCursor); |
| 444 |
| 445 /* |
| 446 ** Retrieve the next token from the tokenizer cursor pCursor. This |
| 447 ** method should either return SQLITE_OK and set the values of the |
| 448 ** "OUT" variables identified below, or SQLITE_DONE to indicate that |
| 449 ** the end of the buffer has been reached, or an SQLite error code. |
| 450 ** |
| 451 ** *ppToken should be set to point at a buffer containing the |
| 452 ** normalized version of the token (i.e. after any case-folding and/or |
| 453 ** stemming has been performed). *pnBytes should be set to the length |
| 454 ** of this buffer in bytes. The input text that generated the token is |
| 455 ** identified by the byte offsets returned in *piStartOffset and |
| 456 ** *piEndOffset. *piStartOffset should be set to the index of the first |
| 457 ** byte of the token in the input buffer. *piEndOffset should be set |
| 458 ** to the index of the first byte just past the end of the token in |
| 459 ** the input buffer. |
| 460 ** |
| 461 ** The buffer *ppToken is set to point at is managed by the tokenizer |
| 462 ** implementation. It is only required to be valid until the next call |
| 463 ** to xNext() or xClose(). |
| 464 */ |
| 465 /* TODO(shess) current implementation requires pInput to be |
| 466 ** nul-terminated. This should either be fixed, or pInput/nBytes |
| 467 ** should be converted to zInput. |
| 468 */ |
| 469 int (*xNext)( |
| 470 sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ |
| 471 const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ |
| 472 int *piStartOffset, /* OUT: Byte offset of token in input buffer */ |
| 473 int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ |
| 474 int *piPosition /* OUT: Number of tokens returned before this one */ |
| 475 ); |
| 476 |
| 477 /*********************************************************************** |
| 478 ** Methods below this point are only available if iVersion>=1. |
| 479 */ |
| 480 |
| 481 /* |
| 482 ** Configure the language id of a tokenizer cursor. |
| 483 */ |
| 484 int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid); |
| 485 }; |
| 486 |
| 487 struct sqlite3_tokenizer { |
| 488 const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ |
| 489 /* Tokenizer implementations will typically add additional fields */ |
| 490 }; |
| 491 |
| 492 struct sqlite3_tokenizer_cursor { |
| 493 sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ |
| 494 /* Tokenizer implementations will typically add additional fields */ |
| 495 }; |
| 496 |
| 497 int fts3_global_term_cnt(int iTerm, int iCol); |
| 498 int fts3_term_cnt(int iTerm, int iCol); |
| 499 |
| 500 |
| 501 #endif /* _FTS3_TOKENIZER_H_ */ |
| 502 |
| 503 /************** End of fts3_tokenizer.h **************************************/ |
| 504 /************** Continuing where we left off in fts3Int.h ********************/ |
| 505 /************** Include fts3_hash.h in the middle of fts3Int.h ***************/ |
| 506 /************** Begin file fts3_hash.h ***************************************/ |
| 507 /* |
| 508 ** 2001 September 22 |
| 509 ** |
| 510 ** The author disclaims copyright to this source code. In place of |
| 511 ** a legal notice, here is a blessing: |
| 512 ** |
| 513 ** May you do good and not evil. |
| 514 ** May you find forgiveness for yourself and forgive others. |
| 515 ** May you share freely, never taking more than you give. |
| 516 ** |
| 517 ************************************************************************* |
| 518 ** This is the header file for the generic hash-table implementation |
| 519 ** used in SQLite. We've modified it slightly to serve as a standalone |
| 520 ** hash table implementation for the full-text indexing module. |
| 521 ** |
| 522 */ |
| 523 #ifndef _FTS3_HASH_H_ |
| 524 #define _FTS3_HASH_H_ |
| 525 |
| 526 /* Forward declarations of structures. */ |
| 527 typedef struct Fts3Hash Fts3Hash; |
| 528 typedef struct Fts3HashElem Fts3HashElem; |
| 529 |
| 530 /* A complete hash table is an instance of the following structure. |
| 531 ** The internals of this structure are intended to be opaque -- client |
| 532 ** code should not attempt to access or modify the fields of this structure |
| 533 ** directly. Change this structure only by using the routines below. |
| 534 ** However, many of the "procedures" and "functions" for modifying and |
| 535 ** accessing this structure are really macros, so we can't really make |
| 536 ** this structure opaque. |
| 537 */ |
| 538 struct Fts3Hash { |
| 539 char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ |
| 540 char copyKey; /* True if copy of key made on insert */ |
| 541 int count; /* Number of entries in this table */ |
| 542 Fts3HashElem *first; /* The first element of the array */ |
| 543 int htsize; /* Number of buckets in the hash table */ |
| 544 struct _fts3ht { /* the hash table */ |
| 545 int count; /* Number of entries with this hash */ |
| 546 Fts3HashElem *chain; /* Pointer to first entry with this hash */ |
| 547 } *ht; |
| 548 }; |
| 549 |
| 550 /* Each element in the hash table is an instance of the following |
| 551 ** structure. All elements are stored on a single doubly-linked list. |
| 552 ** |
| 553 ** Again, this structure is intended to be opaque, but it can't really |
| 554 ** be opaque because it is used by macros. |
| 555 */ |
| 556 struct Fts3HashElem { |
| 557 Fts3HashElem *next, *prev; /* Next and previous elements in the table */ |
| 558 void *data; /* Data associated with this element */ |
| 559 void *pKey; int nKey; /* Key associated with this element */ |
| 560 }; |
| 561 |
| 562 /* |
| 563 ** There are 2 different modes of operation for a hash table: |
| 564 ** |
| 565 ** FTS3_HASH_STRING pKey points to a string that is nKey bytes long |
| 566 ** (including the null-terminator, if any). Case |
| 567 ** is respected in comparisons. |
| 568 ** |
| 569 ** FTS3_HASH_BINARY pKey points to binary data nKey bytes long. |
| 570 ** memcmp() is used to compare keys. |
| 571 ** |
| 572 ** A copy of the key is made if the copyKey parameter to fts3HashInit is 1. |
| 573 */ |
| 574 #define FTS3_HASH_STRING 1 |
| 575 #define FTS3_HASH_BINARY 2 |
| 576 |
| 577 /* |
| 578 ** Access routines. To delete, insert a NULL pointer. |
| 579 */ |
| 580 SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copy
Key); |
| 581 SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey
, void *pData); |
| 582 SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int
nKey); |
| 583 SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*); |
| 584 SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const voi
d *, int); |
| 585 |
| 586 /* |
| 587 ** Shorthand for the functions above |
| 588 */ |
| 589 #define fts3HashInit sqlite3Fts3HashInit |
| 590 #define fts3HashInsert sqlite3Fts3HashInsert |
| 591 #define fts3HashFind sqlite3Fts3HashFind |
| 592 #define fts3HashClear sqlite3Fts3HashClear |
| 593 #define fts3HashFindElem sqlite3Fts3HashFindElem |
| 594 |
| 595 /* |
| 596 ** Macros for looping over all elements of a hash table. The idiom is |
| 597 ** like this: |
| 598 ** |
| 599 ** Fts3Hash h; |
| 600 ** Fts3HashElem *p; |
| 601 ** ... |
| 602 ** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){ |
| 603 ** SomeStructure *pData = fts3HashData(p); |
| 604 ** // do something with pData |
| 605 ** } |
| 606 */ |
| 607 #define fts3HashFirst(H) ((H)->first) |
| 608 #define fts3HashNext(E) ((E)->next) |
| 609 #define fts3HashData(E) ((E)->data) |
| 610 #define fts3HashKey(E) ((E)->pKey) |
| 611 #define fts3HashKeysize(E) ((E)->nKey) |
| 612 |
| 613 /* |
| 614 ** Number of entries in a hash table |
| 615 */ |
| 616 #define fts3HashCount(H) ((H)->count) |
| 617 |
| 618 #endif /* _FTS3_HASH_H_ */ |
| 619 |
| 620 /************** End of fts3_hash.h *******************************************/ |
| 621 /************** Continuing where we left off in fts3Int.h ********************/ |
| 622 |
| 623 /* |
| 624 ** This constant determines the maximum depth of an FTS expression tree |
| 625 ** that the library will create and use. FTS uses recursion to perform |
| 626 ** various operations on the query tree, so the disadvantage of a large |
| 627 ** limit is that it may allow very large queries to use large amounts |
| 628 ** of stack space (perhaps causing a stack overflow). |
| 629 */ |
| 630 #ifndef SQLITE_FTS3_MAX_EXPR_DEPTH |
| 631 # define SQLITE_FTS3_MAX_EXPR_DEPTH 12 |
| 632 #endif |
| 633 |
| 634 |
| 635 /* |
| 636 ** This constant controls how often segments are merged. Once there are |
| 637 ** FTS3_MERGE_COUNT segments of level N, they are merged into a single |
| 638 ** segment of level N+1. |
| 639 */ |
| 640 #define FTS3_MERGE_COUNT 16 |
| 641 |
| 642 /* |
| 643 ** This is the maximum amount of data (in bytes) to store in the |
| 644 ** Fts3Table.pendingTerms hash table. Normally, the hash table is |
| 645 ** populated as documents are inserted/updated/deleted in a transaction |
| 646 ** and used to create a new segment when the transaction is committed. |
| 647 ** However if this limit is reached midway through a transaction, a new |
| 648 ** segment is created and the hash table cleared immediately. |
| 649 */ |
| 650 #define FTS3_MAX_PENDING_DATA (1*1024*1024) |
| 651 |
| 652 /* |
| 653 ** Macro to return the number of elements in an array. SQLite has a |
| 654 ** similar macro called ArraySize(). Use a different name to avoid |
| 655 ** a collision when building an amalgamation with built-in FTS3. |
| 656 */ |
| 657 #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) |
| 658 |
| 659 |
| 660 #ifndef MIN |
| 661 # define MIN(x,y) ((x)<(y)?(x):(y)) |
| 662 #endif |
| 663 #ifndef MAX |
| 664 # define MAX(x,y) ((x)>(y)?(x):(y)) |
| 665 #endif |
| 666 |
| 667 /* |
| 668 ** Maximum length of a varint encoded integer. The varint format is different |
| 669 ** from that used by SQLite, so the maximum length is 10, not 9. |
| 670 */ |
| 671 #define FTS3_VARINT_MAX 10 |
| 672 |
| 673 /* |
| 674 ** FTS4 virtual tables may maintain multiple indexes - one index of all terms |
| 675 ** in the document set and zero or more prefix indexes. All indexes are stored |
| 676 ** as one or more b+-trees in the %_segments and %_segdir tables. |
| 677 ** |
| 678 ** It is possible to determine which index a b+-tree belongs to based on the |
| 679 ** value stored in the "%_segdir.level" column. Given this value L, the index |
| 680 ** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with |
| 681 ** level values between 0 and 1023 (inclusive) belong to index 0, all levels |
| 682 ** between 1024 and 2047 to index 1, and so on. |
| 683 ** |
| 684 ** It is considered impossible for an index to use more than 1024 levels. In |
| 685 ** theory though this may happen, but only after at least |
| 686 ** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. |
| 687 */ |
| 688 #define FTS3_SEGDIR_MAXLEVEL 1024 |
| 689 #define FTS3_SEGDIR_MAXLEVEL_STR "1024" |
| 690 |
| 691 /* |
| 692 ** The testcase() macro is only used by the amalgamation. If undefined, |
| 693 ** make it a no-op. |
| 694 */ |
| 695 #ifndef testcase |
| 696 # define testcase(X) |
| 697 #endif |
| 698 |
| 699 /* |
| 700 ** Terminator values for position-lists and column-lists. |
| 701 */ |
| 702 #define POS_COLUMN (1) /* Column-list terminator */ |
| 703 #define POS_END (0) /* Position-list terminator */ |
| 704 |
| 705 /* |
| 706 ** This section provides definitions to allow the |
| 707 ** FTS3 extension to be compiled outside of the |
| 708 ** amalgamation. |
| 709 */ |
| 710 #ifndef SQLITE_AMALGAMATION |
| 711 /* |
| 712 ** Macros indicating that conditional expressions are always true or |
| 713 ** false. |
| 714 */ |
| 715 #ifdef SQLITE_COVERAGE_TEST |
| 716 # define ALWAYS(x) (1) |
| 717 # define NEVER(X) (0) |
| 718 #elif defined(SQLITE_DEBUG) |
| 719 # define ALWAYS(x) sqlite3Fts3Always((x)!=0) |
| 720 # define NEVER(x) sqlite3Fts3Never((x)!=0) |
| 721 SQLITE_PRIVATE int sqlite3Fts3Always(int b); |
| 722 SQLITE_PRIVATE int sqlite3Fts3Never(int b); |
| 723 #else |
| 724 # define ALWAYS(x) (x) |
| 725 # define NEVER(x) (x) |
| 726 #endif |
| 727 |
| 728 /* |
| 729 ** Internal types used by SQLite. |
| 730 */ |
| 731 typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ |
| 732 typedef short int i16; /* 2-byte (or larger) signed integer */ |
| 733 typedef unsigned int u32; /* 4-byte unsigned integer */ |
| 734 typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ |
| 735 typedef sqlite3_int64 i64; /* 8-byte signed integer */ |
| 736 |
| 737 /* |
| 738 ** Macro used to suppress compiler warnings for unused parameters. |
| 739 */ |
| 740 #define UNUSED_PARAMETER(x) (void)(x) |
| 741 |
| 742 /* |
| 743 ** Activate assert() only if SQLITE_TEST is enabled. |
| 744 */ |
| 745 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 746 # define NDEBUG 1 |
| 747 #endif |
| 748 |
| 749 /* |
| 750 ** The TESTONLY macro is used to enclose variable declarations or |
| 751 ** other bits of code that are needed to support the arguments |
| 752 ** within testcase() and assert() macros. |
| 753 */ |
| 754 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) |
| 755 # define TESTONLY(X) X |
| 756 #else |
| 757 # define TESTONLY(X) |
| 758 #endif |
| 759 |
| 760 #endif /* SQLITE_AMALGAMATION */ |
| 761 |
| 762 #ifdef SQLITE_DEBUG |
| 763 SQLITE_PRIVATE int sqlite3Fts3Corrupt(void); |
| 764 # define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() |
| 765 #else |
| 766 # define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB |
| 767 #endif |
| 768 |
| 769 typedef struct Fts3Table Fts3Table; |
| 770 typedef struct Fts3Cursor Fts3Cursor; |
| 771 typedef struct Fts3Expr Fts3Expr; |
| 772 typedef struct Fts3Phrase Fts3Phrase; |
| 773 typedef struct Fts3PhraseToken Fts3PhraseToken; |
| 774 |
| 775 typedef struct Fts3Doclist Fts3Doclist; |
| 776 typedef struct Fts3SegFilter Fts3SegFilter; |
| 777 typedef struct Fts3DeferredToken Fts3DeferredToken; |
| 778 typedef struct Fts3SegReader Fts3SegReader; |
| 779 typedef struct Fts3MultiSegReader Fts3MultiSegReader; |
| 780 |
| 781 typedef struct MatchinfoBuffer MatchinfoBuffer; |
| 782 |
| 783 /* |
| 784 ** A connection to a fulltext index is an instance of the following |
| 785 ** structure. The xCreate and xConnect methods create an instance |
| 786 ** of this structure and xDestroy and xDisconnect free that instance. |
| 787 ** All other methods receive a pointer to the structure as one of their |
| 788 ** arguments. |
| 789 */ |
| 790 struct Fts3Table { |
| 791 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 792 sqlite3 *db; /* The database connection */ |
| 793 const char *zDb; /* logical database name */ |
| 794 const char *zName; /* virtual table name */ |
| 795 int nColumn; /* number of named columns in virtual table */ |
| 796 char **azColumn; /* column names. malloced */ |
| 797 u8 *abNotindexed; /* True for 'notindexed' columns */ |
| 798 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ |
| 799 char *zContentTbl; /* content=xxx option, or NULL */ |
| 800 char *zLanguageid; /* languageid=xxx option, or NULL */ |
| 801 int nAutoincrmerge; /* Value configured by 'automerge' */ |
| 802 u32 nLeafAdd; /* Number of leaf blocks added this trans */ |
| 803 |
| 804 /* Precompiled statements used by the implementation. Each of these |
| 805 ** statements is run and reset within a single virtual table API call. |
| 806 */ |
| 807 sqlite3_stmt *aStmt[40]; |
| 808 sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */ |
| 809 |
| 810 char *zReadExprlist; |
| 811 char *zWriteExprlist; |
| 812 |
| 813 int nNodeSize; /* Soft limit for node size */ |
| 814 u8 bFts4; /* True for FTS4, false for FTS3 */ |
| 815 u8 bHasStat; /* True if %_stat table exists (2==unknown) */ |
| 816 u8 bHasDocsize; /* True if %_docsize table exists */ |
| 817 u8 bDescIdx; /* True if doclists are in reverse order */ |
| 818 u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ |
| 819 int nPgsz; /* Page size for host database */ |
| 820 char *zSegmentsTbl; /* Name of %_segments table */ |
| 821 sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ |
| 822 |
| 823 /* |
| 824 ** The following array of hash tables is used to buffer pending index |
| 825 ** updates during transactions. All pending updates buffered at any one |
| 826 ** time must share a common language-id (see the FTS4 langid= feature). |
| 827 ** The current language id is stored in variable iPrevLangid. |
| 828 ** |
| 829 ** A single FTS4 table may have multiple full-text indexes. For each index |
| 830 ** there is an entry in the aIndex[] array. Index 0 is an index of all the |
| 831 ** terms that appear in the document set. Each subsequent index in aIndex[] |
| 832 ** is an index of prefixes of a specific length. |
| 833 ** |
| 834 ** Variable nPendingData contains an estimate the memory consumed by the |
| 835 ** pending data structures, including hash table overhead, but not including |
| 836 ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash |
| 837 ** tables are flushed to disk. Variable iPrevDocid is the docid of the most |
| 838 ** recently inserted record. |
| 839 */ |
| 840 int nIndex; /* Size of aIndex[] */ |
| 841 struct Fts3Index { |
| 842 int nPrefix; /* Prefix length (0 for main terms index) */ |
| 843 Fts3Hash hPending; /* Pending terms table for this index */ |
| 844 } *aIndex; |
| 845 int nMaxPendingData; /* Max pending data before flush to disk */ |
| 846 int nPendingData; /* Current bytes of pending data */ |
| 847 sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ |
| 848 int iPrevLangid; /* Langid of recently inserted document */ |
| 849 int bPrevDelete; /* True if last operation was a delete */ |
| 850 |
| 851 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) |
| 852 /* State variables used for validating that the transaction control |
| 853 ** methods of the virtual table are called at appropriate times. These |
| 854 ** values do not contribute to FTS functionality; they are used for |
| 855 ** verifying the operation of the SQLite core. |
| 856 */ |
| 857 int inTransaction; /* True after xBegin but before xCommit/xRollback */ |
| 858 int mxSavepoint; /* Largest valid xSavepoint integer */ |
| 859 #endif |
| 860 |
| 861 #ifdef SQLITE_TEST |
| 862 /* True to disable the incremental doclist optimization. This is controled |
| 863 ** by special insert command 'test-no-incr-doclist'. */ |
| 864 int bNoIncrDoclist; |
| 865 #endif |
| 866 }; |
| 867 |
| 868 /* |
| 869 ** When the core wants to read from the virtual table, it creates a |
| 870 ** virtual table cursor (an instance of the following structure) using |
| 871 ** the xOpen method. Cursors are destroyed using the xClose method. |
| 872 */ |
| 873 struct Fts3Cursor { |
| 874 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 875 i16 eSearch; /* Search strategy (see below) */ |
| 876 u8 isEof; /* True if at End Of Results */ |
| 877 u8 isRequireSeek; /* True if must seek pStmt to %_content row */ |
| 878 u8 bSeekStmt; /* True if pStmt is a seek */ |
| 879 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ |
| 880 Fts3Expr *pExpr; /* Parsed MATCH query string */ |
| 881 int iLangid; /* Language being queried for */ |
| 882 int nPhrase; /* Number of matchable phrases in query */ |
| 883 Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ |
| 884 sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ |
| 885 char *pNextId; /* Pointer into the body of aDoclist */ |
| 886 char *aDoclist; /* List of docids for full-text queries */ |
| 887 int nDoclist; /* Size of buffer at aDoclist */ |
| 888 u8 bDesc; /* True to sort in descending order */ |
| 889 int eEvalmode; /* An FTS3_EVAL_XX constant */ |
| 890 int nRowAvg; /* Average size of database rows, in pages */ |
| 891 sqlite3_int64 nDoc; /* Documents in table */ |
| 892 i64 iMinDocid; /* Minimum docid to return */ |
| 893 i64 iMaxDocid; /* Maximum docid to return */ |
| 894 int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ |
| 895 MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ |
| 896 }; |
| 897 |
| 898 #define FTS3_EVAL_FILTER 0 |
| 899 #define FTS3_EVAL_NEXT 1 |
| 900 #define FTS3_EVAL_MATCHINFO 2 |
| 901 |
| 902 /* |
| 903 ** The Fts3Cursor.eSearch member is always set to one of the following. |
| 904 ** Actualy, Fts3Cursor.eSearch can be greater than or equal to |
| 905 ** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index |
| 906 ** of the column to be searched. For example, in |
| 907 ** |
| 908 ** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); |
| 909 ** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; |
| 910 ** |
| 911 ** Because the LHS of the MATCH operator is 2nd column "b", |
| 912 ** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, |
| 913 ** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" |
| 914 ** indicating that all columns should be searched, |
| 915 ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. |
| 916 */ |
| 917 #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ |
| 918 #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ |
| 919 #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ |
| 920 |
| 921 /* |
| 922 ** The lower 16-bits of the sqlite3_index_info.idxNum value set by |
| 923 ** the xBestIndex() method contains the Fts3Cursor.eSearch value described |
| 924 ** above. The upper 16-bits contain a combination of the following |
| 925 ** bits, used to describe extra constraints on full-text searches. |
| 926 */ |
| 927 #define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ |
| 928 #define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ |
| 929 #define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ |
| 930 |
| 931 struct Fts3Doclist { |
| 932 char *aAll; /* Array containing doclist (or NULL) */ |
| 933 int nAll; /* Size of a[] in bytes */ |
| 934 char *pNextDocid; /* Pointer to next docid */ |
| 935 |
| 936 sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ |
| 937 int bFreeList; /* True if pList should be sqlite3_free()d */ |
| 938 char *pList; /* Pointer to position list following iDocid */ |
| 939 int nList; /* Length of position list */ |
| 940 }; |
| 941 |
| 942 /* |
| 943 ** A "phrase" is a sequence of one or more tokens that must match in |
| 944 ** sequence. A single token is the base case and the most common case. |
| 945 ** For a sequence of tokens contained in double-quotes (i.e. "one two three") |
| 946 ** nToken will be the number of tokens in the string. |
| 947 */ |
| 948 struct Fts3PhraseToken { |
| 949 char *z; /* Text of the token */ |
| 950 int n; /* Number of bytes in buffer z */ |
| 951 int isPrefix; /* True if token ends with a "*" character */ |
| 952 int bFirst; /* True if token must appear at position 0 */ |
| 953 |
| 954 /* Variables above this point are populated when the expression is |
| 955 ** parsed (by code in fts3_expr.c). Below this point the variables are |
| 956 ** used when evaluating the expression. */ |
| 957 Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ |
| 958 Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ |
| 959 }; |
| 960 |
| 961 struct Fts3Phrase { |
| 962 /* Cache of doclist for this phrase. */ |
| 963 Fts3Doclist doclist; |
| 964 int bIncr; /* True if doclist is loaded incrementally */ |
| 965 int iDoclistToken; |
| 966 |
| 967 /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an |
| 968 ** OR condition. */ |
| 969 char *pOrPoslist; |
| 970 i64 iOrDocid; |
| 971 |
| 972 /* Variables below this point are populated by fts3_expr.c when parsing |
| 973 ** a MATCH expression. Everything above is part of the evaluation phase. |
| 974 */ |
| 975 int nToken; /* Number of tokens in the phrase */ |
| 976 int iColumn; /* Index of column this phrase must match */ |
| 977 Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ |
| 978 }; |
| 979 |
| 980 /* |
| 981 ** A tree of these objects forms the RHS of a MATCH operator. |
| 982 ** |
| 983 ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist |
| 984 ** points to a malloced buffer, size nDoclist bytes, containing the results |
| 985 ** of this phrase query in FTS3 doclist format. As usual, the initial |
| 986 ** "Length" field found in doclists stored on disk is omitted from this |
| 987 ** buffer. |
| 988 ** |
| 989 ** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global |
| 990 ** matchinfo data. If it is not NULL, it points to an array of size nCol*3, |
| 991 ** where nCol is the number of columns in the queried FTS table. The array |
| 992 ** is populated as follows: |
| 993 ** |
| 994 ** aMI[iCol*3 + 0] = Undefined |
| 995 ** aMI[iCol*3 + 1] = Number of occurrences |
| 996 ** aMI[iCol*3 + 2] = Number of rows containing at least one instance |
| 997 ** |
| 998 ** The aMI array is allocated using sqlite3_malloc(). It should be freed |
| 999 ** when the expression node is. |
| 1000 */ |
| 1001 struct Fts3Expr { |
| 1002 int eType; /* One of the FTSQUERY_XXX values defined below */ |
| 1003 int nNear; /* Valid if eType==FTSQUERY_NEAR */ |
| 1004 Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ |
| 1005 Fts3Expr *pLeft; /* Left operand */ |
| 1006 Fts3Expr *pRight; /* Right operand */ |
| 1007 Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ |
| 1008 |
| 1009 /* The following are used by the fts3_eval.c module. */ |
| 1010 sqlite3_int64 iDocid; /* Current docid */ |
| 1011 u8 bEof; /* True this expression is at EOF already */ |
| 1012 u8 bStart; /* True if iDocid is valid */ |
| 1013 u8 bDeferred; /* True if this expression is entirely deferred */ |
| 1014 |
| 1015 /* The following are used by the fts3_snippet.c module. */ |
| 1016 int iPhrase; /* Index of this phrase in matchinfo() results */ |
| 1017 u32 *aMI; /* See above */ |
| 1018 }; |
| 1019 |
| 1020 /* |
| 1021 ** Candidate values for Fts3Query.eType. Note that the order of the first |
| 1022 ** four values is in order of precedence when parsing expressions. For |
| 1023 ** example, the following: |
| 1024 ** |
| 1025 ** "a OR b AND c NOT d NEAR e" |
| 1026 ** |
| 1027 ** is equivalent to: |
| 1028 ** |
| 1029 ** "a OR (b AND (c NOT (d NEAR e)))" |
| 1030 */ |
| 1031 #define FTSQUERY_NEAR 1 |
| 1032 #define FTSQUERY_NOT 2 |
| 1033 #define FTSQUERY_AND 3 |
| 1034 #define FTSQUERY_OR 4 |
| 1035 #define FTSQUERY_PHRASE 5 |
| 1036 |
| 1037 |
| 1038 /* fts3_write.c */ |
| 1039 SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sql
ite3_int64*); |
| 1040 SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *); |
| 1041 SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *); |
| 1042 SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *); |
| 1043 SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, |
| 1044 sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); |
| 1045 SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( |
| 1046 Fts3Table*,int,const char*,int,int,Fts3SegReader**); |
| 1047 SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *); |
| 1048 SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt
**); |
| 1049 SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*
, int*); |
| 1050 |
| 1051 SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); |
| 1052 SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_
stmt **); |
| 1053 |
| 1054 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 1055 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); |
| 1056 SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); |
| 1057 SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); |
| 1058 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); |
| 1059 SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, in
t *); |
| 1060 #else |
| 1061 # define sqlite3Fts3FreeDeferredTokens(x) |
| 1062 # define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK |
| 1063 # define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK |
| 1064 # define sqlite3Fts3FreeDeferredDoclists(x) |
| 1065 # define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK |
| 1066 #endif |
| 1067 |
| 1068 SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *); |
| 1069 SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *); |
| 1070 |
| 1071 /* Special values interpreted by sqlite3SegReaderCursor() */ |
| 1072 #define FTS3_SEGCURSOR_PENDING -1 |
| 1073 #define FTS3_SEGCURSOR_ALL -2 |
| 1074 |
| 1075 SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Ft
s3SegFilter*); |
| 1076 SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); |
| 1077 SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); |
| 1078 |
| 1079 SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, |
| 1080 int, int, int, const char *, int, int, int, Fts3MultiSegReader *); |
| 1081 |
| 1082 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ |
| 1083 #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 |
| 1084 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 |
| 1085 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 |
| 1086 #define FTS3_SEGMENT_PREFIX 0x00000008 |
| 1087 #define FTS3_SEGMENT_SCAN 0x00000010 |
| 1088 #define FTS3_SEGMENT_FIRST 0x00000020 |
| 1089 |
| 1090 /* Type passed as 4th argument to SegmentReaderIterate() */ |
| 1091 struct Fts3SegFilter { |
| 1092 const char *zTerm; |
| 1093 int nTerm; |
| 1094 int iCol; |
| 1095 int flags; |
| 1096 }; |
| 1097 |
| 1098 struct Fts3MultiSegReader { |
| 1099 /* Used internally by sqlite3Fts3SegReaderXXX() calls */ |
| 1100 Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ |
| 1101 int nSegment; /* Size of apSegment array */ |
| 1102 int nAdvance; /* How many seg-readers to advance */ |
| 1103 Fts3SegFilter *pFilter; /* Pointer to filter object */ |
| 1104 char *aBuffer; /* Buffer to merge doclists in */ |
| 1105 int nBuffer; /* Allocated size of aBuffer[] in bytes */ |
| 1106 |
| 1107 int iColFilter; /* If >=0, filter for this column */ |
| 1108 int bRestart; |
| 1109 |
| 1110 /* Used by fts3.c only. */ |
| 1111 int nCost; /* Cost of running iterator */ |
| 1112 int bLookup; /* True if a lookup of a single entry. */ |
| 1113 |
| 1114 /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ |
| 1115 char *zTerm; /* Pointer to term buffer */ |
| 1116 int nTerm; /* Size of zTerm in bytes */ |
| 1117 char *aDoclist; /* Pointer to doclist buffer */ |
| 1118 int nDoclist; /* Size of aDoclist[] in bytes */ |
| 1119 }; |
| 1120 |
| 1121 SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int); |
| 1122 |
| 1123 #define fts3GetVarint32(p, piVal) ( \ |
| 1124 (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ |
| 1125 ) |
| 1126 |
| 1127 /* fts3.c */ |
| 1128 SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char**,const char*,...); |
| 1129 SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64); |
| 1130 SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); |
| 1131 SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *); |
| 1132 SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64); |
| 1133 SQLITE_PRIVATE void sqlite3Fts3Dequote(char *); |
| 1134 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,i
nt*,u8*); |
| 1135 SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); |
| 1136 SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); |
| 1137 SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); |
| 1138 SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); |
| 1139 |
| 1140 /* fts3_tokenizer.c */ |
| 1141 SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); |
| 1142 SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *)
; |
| 1143 SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, |
| 1144 sqlite3_tokenizer **, char ** |
| 1145 ); |
| 1146 SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char); |
| 1147 |
| 1148 /* fts3_snippet.c */ |
| 1149 SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); |
| 1150 SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const ch
ar *, |
| 1151 const char *, const char *, int, int |
| 1152 ); |
| 1153 SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const
char *); |
| 1154 SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); |
| 1155 |
| 1156 /* fts3_expr.c */ |
| 1157 SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, |
| 1158 char **, int, int, int, const char *, int, Fts3Expr **, char ** |
| 1159 ); |
| 1160 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); |
| 1161 #ifdef SQLITE_TEST |
| 1162 SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); |
| 1163 SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db); |
| 1164 #endif |
| 1165 |
| 1166 SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char
*, int, |
| 1167 sqlite3_tokenizer_cursor ** |
| 1168 ); |
| 1169 |
| 1170 /* fts3_aux.c */ |
| 1171 SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db); |
| 1172 |
| 1173 SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); |
| 1174 |
| 1175 SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( |
| 1176 Fts3Table*, Fts3MultiSegReader*, int, const char*, int); |
| 1177 SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( |
| 1178 Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); |
| 1179 SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iC
ol, char **); |
| 1180 SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *)
; |
| 1181 SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); |
| 1182 |
| 1183 /* fts3_tokenize_vtab.c */ |
| 1184 SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); |
| 1185 |
| 1186 /* fts3_unicode2.c (functions generated by parsing unicode text files) */ |
| 1187 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 1188 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); |
| 1189 SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int); |
| 1190 SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); |
| 1191 #endif |
| 1192 |
| 1193 #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ |
| 1194 #endif /* _FTSINT_H */ |
| 1195 |
| 1196 /************** End of fts3Int.h *********************************************/ |
| 1197 /************** Continuing where we left off in fts3.c ***********************/ |
| 1198 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 1199 |
| 1200 #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) |
| 1201 # define SQLITE_CORE 1 |
| 1202 #endif |
| 1203 |
| 1204 /* #include <assert.h> */ |
| 1205 /* #include <stdlib.h> */ |
| 1206 /* #include <stddef.h> */ |
| 1207 /* #include <stdio.h> */ |
| 1208 /* #include <string.h> */ |
| 1209 /* #include <stdarg.h> */ |
| 1210 |
| 1211 /* #include "fts3.h" */ |
| 1212 #ifndef SQLITE_CORE |
| 1213 /* # include "sqlite3ext.h" */ |
| 1214 SQLITE_EXTENSION_INIT1 |
| 1215 #endif |
| 1216 |
| 1217 static int fts3EvalNext(Fts3Cursor *pCsr); |
| 1218 static int fts3EvalStart(Fts3Cursor *pCsr); |
| 1219 static int fts3TermSegReaderCursor( |
| 1220 Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); |
| 1221 |
| 1222 #ifndef SQLITE_AMALGAMATION |
| 1223 # if defined(SQLITE_DEBUG) |
| 1224 SQLITE_PRIVATE int sqlite3Fts3Always(int b) { assert( b ); return b; } |
| 1225 SQLITE_PRIVATE int sqlite3Fts3Never(int b) { assert( !b ); return b; } |
| 1226 # endif |
| 1227 #endif |
| 1228 |
| 1229 /* |
| 1230 ** Write a 64-bit variable-length integer to memory starting at p[0]. |
| 1231 ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. |
| 1232 ** The number of bytes written is returned. |
| 1233 */ |
| 1234 SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ |
| 1235 unsigned char *q = (unsigned char *) p; |
| 1236 sqlite_uint64 vu = v; |
| 1237 do{ |
| 1238 *q++ = (unsigned char) ((vu & 0x7f) | 0x80); |
| 1239 vu >>= 7; |
| 1240 }while( vu!=0 ); |
| 1241 q[-1] &= 0x7f; /* turn off high bit in final byte */ |
| 1242 assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); |
| 1243 return (int) (q - (unsigned char *)p); |
| 1244 } |
| 1245 |
| 1246 #define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ |
| 1247 v = (v & mask1) | ( (*ptr++) << shift ); \ |
| 1248 if( (v & mask2)==0 ){ var = v; return ret; } |
| 1249 #define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ |
| 1250 v = (*ptr++); \ |
| 1251 if( (v & mask2)==0 ){ var = v; return ret; } |
| 1252 |
| 1253 /* |
| 1254 ** Read a 64-bit variable-length integer from memory starting at p[0]. |
| 1255 ** Return the number of bytes read, or 0 on error. |
| 1256 ** The value is stored in *v. |
| 1257 */ |
| 1258 SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ |
| 1259 const char *pStart = p; |
| 1260 u32 a; |
| 1261 u64 b; |
| 1262 int shift; |
| 1263 |
| 1264 GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); |
| 1265 GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); |
| 1266 GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); |
| 1267 GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); |
| 1268 b = (a & 0x0FFFFFFF ); |
| 1269 |
| 1270 for(shift=28; shift<=63; shift+=7){ |
| 1271 u64 c = *p++; |
| 1272 b += (c&0x7F) << shift; |
| 1273 if( (c & 0x80)==0 ) break; |
| 1274 } |
| 1275 *v = b; |
| 1276 return (int)(p - pStart); |
| 1277 } |
| 1278 |
| 1279 /* |
| 1280 ** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a |
| 1281 ** 32-bit integer before it is returned. |
| 1282 */ |
| 1283 SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *p, int *pi){ |
| 1284 u32 a; |
| 1285 |
| 1286 #ifndef fts3GetVarint32 |
| 1287 GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1); |
| 1288 #else |
| 1289 a = (*p++); |
| 1290 assert( a & 0x80 ); |
| 1291 #endif |
| 1292 |
| 1293 GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2); |
| 1294 GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3); |
| 1295 GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); |
| 1296 a = (a & 0x0FFFFFFF ); |
| 1297 *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); |
| 1298 return 5; |
| 1299 } |
| 1300 |
| 1301 /* |
| 1302 ** Return the number of bytes required to encode v as a varint |
| 1303 */ |
| 1304 SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){ |
| 1305 int i = 0; |
| 1306 do{ |
| 1307 i++; |
| 1308 v >>= 7; |
| 1309 }while( v!=0 ); |
| 1310 return i; |
| 1311 } |
| 1312 |
| 1313 /* |
| 1314 ** Convert an SQL-style quoted string into a normal string by removing |
| 1315 ** the quote characters. The conversion is done in-place. If the |
| 1316 ** input does not begin with a quote character, then this routine |
| 1317 ** is a no-op. |
| 1318 ** |
| 1319 ** Examples: |
| 1320 ** |
| 1321 ** "abc" becomes abc |
| 1322 ** 'xyz' becomes xyz |
| 1323 ** [pqr] becomes pqr |
| 1324 ** `mno` becomes mno |
| 1325 ** |
| 1326 */ |
| 1327 SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){ |
| 1328 char quote; /* Quote character (if any ) */ |
| 1329 |
| 1330 quote = z[0]; |
| 1331 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ |
| 1332 int iIn = 1; /* Index of next byte to read from input */ |
| 1333 int iOut = 0; /* Index of next byte to write to output */ |
| 1334 |
| 1335 /* If the first byte was a '[', then the close-quote character is a ']' */ |
| 1336 if( quote=='[' ) quote = ']'; |
| 1337 |
| 1338 while( z[iIn] ){ |
| 1339 if( z[iIn]==quote ){ |
| 1340 if( z[iIn+1]!=quote ) break; |
| 1341 z[iOut++] = quote; |
| 1342 iIn += 2; |
| 1343 }else{ |
| 1344 z[iOut++] = z[iIn++]; |
| 1345 } |
| 1346 } |
| 1347 z[iOut] = '\0'; |
| 1348 } |
| 1349 } |
| 1350 |
| 1351 /* |
| 1352 ** Read a single varint from the doclist at *pp and advance *pp to point |
| 1353 ** to the first byte past the end of the varint. Add the value of the varint |
| 1354 ** to *pVal. |
| 1355 */ |
| 1356 static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ |
| 1357 sqlite3_int64 iVal; |
| 1358 *pp += sqlite3Fts3GetVarint(*pp, &iVal); |
| 1359 *pVal += iVal; |
| 1360 } |
| 1361 |
| 1362 /* |
| 1363 ** When this function is called, *pp points to the first byte following a |
| 1364 ** varint that is part of a doclist (or position-list, or any other list |
| 1365 ** of varints). This function moves *pp to point to the start of that varint, |
| 1366 ** and sets *pVal by the varint value. |
| 1367 ** |
| 1368 ** Argument pStart points to the first byte of the doclist that the |
| 1369 ** varint is part of. |
| 1370 */ |
| 1371 static void fts3GetReverseVarint( |
| 1372 char **pp, |
| 1373 char *pStart, |
| 1374 sqlite3_int64 *pVal |
| 1375 ){ |
| 1376 sqlite3_int64 iVal; |
| 1377 char *p; |
| 1378 |
| 1379 /* Pointer p now points at the first byte past the varint we are |
| 1380 ** interested in. So, unless the doclist is corrupt, the 0x80 bit is |
| 1381 ** clear on character p[-1]. */ |
| 1382 for(p = (*pp)-2; p>=pStart && *p&0x80; p--); |
| 1383 p++; |
| 1384 *pp = p; |
| 1385 |
| 1386 sqlite3Fts3GetVarint(p, &iVal); |
| 1387 *pVal = iVal; |
| 1388 } |
| 1389 |
| 1390 /* |
| 1391 ** The xDisconnect() virtual table method. |
| 1392 */ |
| 1393 static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ |
| 1394 Fts3Table *p = (Fts3Table *)pVtab; |
| 1395 int i; |
| 1396 |
| 1397 assert( p->nPendingData==0 ); |
| 1398 assert( p->pSegments==0 ); |
| 1399 |
| 1400 /* Free any prepared statements held */ |
| 1401 sqlite3_finalize(p->pSeekStmt); |
| 1402 for(i=0; i<SizeofArray(p->aStmt); i++){ |
| 1403 sqlite3_finalize(p->aStmt[i]); |
| 1404 } |
| 1405 sqlite3_free(p->zSegmentsTbl); |
| 1406 sqlite3_free(p->zReadExprlist); |
| 1407 sqlite3_free(p->zWriteExprlist); |
| 1408 sqlite3_free(p->zContentTbl); |
| 1409 sqlite3_free(p->zLanguageid); |
| 1410 |
| 1411 /* Invoke the tokenizer destructor to free the tokenizer. */ |
| 1412 p->pTokenizer->pModule->xDestroy(p->pTokenizer); |
| 1413 |
| 1414 sqlite3_free(p); |
| 1415 return SQLITE_OK; |
| 1416 } |
| 1417 |
| 1418 /* |
| 1419 ** Write an error message into *pzErr |
| 1420 */ |
| 1421 SQLITE_PRIVATE void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){ |
| 1422 va_list ap; |
| 1423 sqlite3_free(*pzErr); |
| 1424 va_start(ap, zFormat); |
| 1425 *pzErr = sqlite3_vmprintf(zFormat, ap); |
| 1426 va_end(ap); |
| 1427 } |
| 1428 |
| 1429 /* |
| 1430 ** Construct one or more SQL statements from the format string given |
| 1431 ** and then evaluate those statements. The success code is written |
| 1432 ** into *pRc. |
| 1433 ** |
| 1434 ** If *pRc is initially non-zero then this routine is a no-op. |
| 1435 */ |
| 1436 static void fts3DbExec( |
| 1437 int *pRc, /* Success code */ |
| 1438 sqlite3 *db, /* Database in which to run SQL */ |
| 1439 const char *zFormat, /* Format string for SQL */ |
| 1440 ... /* Arguments to the format string */ |
| 1441 ){ |
| 1442 va_list ap; |
| 1443 char *zSql; |
| 1444 if( *pRc ) return; |
| 1445 va_start(ap, zFormat); |
| 1446 zSql = sqlite3_vmprintf(zFormat, ap); |
| 1447 va_end(ap); |
| 1448 if( zSql==0 ){ |
| 1449 *pRc = SQLITE_NOMEM; |
| 1450 }else{ |
| 1451 *pRc = sqlite3_exec(db, zSql, 0, 0, 0); |
| 1452 sqlite3_free(zSql); |
| 1453 } |
| 1454 } |
| 1455 |
| 1456 /* |
| 1457 ** The xDestroy() virtual table method. |
| 1458 */ |
| 1459 static int fts3DestroyMethod(sqlite3_vtab *pVtab){ |
| 1460 Fts3Table *p = (Fts3Table *)pVtab; |
| 1461 int rc = SQLITE_OK; /* Return code */ |
| 1462 const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ |
| 1463 sqlite3 *db = p->db; /* Database handle */ |
| 1464 |
| 1465 /* Drop the shadow tables */ |
| 1466 if( p->zContentTbl==0 ){ |
| 1467 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); |
| 1468 } |
| 1469 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); |
| 1470 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); |
| 1471 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); |
| 1472 fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); |
| 1473 |
| 1474 /* If everything has worked, invoke fts3DisconnectMethod() to free the |
| 1475 ** memory associated with the Fts3Table structure and return SQLITE_OK. |
| 1476 ** Otherwise, return an SQLite error code. |
| 1477 */ |
| 1478 return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); |
| 1479 } |
| 1480 |
| 1481 |
| 1482 /* |
| 1483 ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table |
| 1484 ** passed as the first argument. This is done as part of the xConnect() |
| 1485 ** and xCreate() methods. |
| 1486 ** |
| 1487 ** If *pRc is non-zero when this function is called, it is a no-op. |
| 1488 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc |
| 1489 ** before returning. |
| 1490 */ |
| 1491 static void fts3DeclareVtab(int *pRc, Fts3Table *p){ |
| 1492 if( *pRc==SQLITE_OK ){ |
| 1493 int i; /* Iterator variable */ |
| 1494 int rc; /* Return code */ |
| 1495 char *zSql; /* SQL statement passed to declare_vtab() */ |
| 1496 char *zCols; /* List of user defined columns */ |
| 1497 const char *zLanguageid; |
| 1498 |
| 1499 zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); |
| 1500 sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); |
| 1501 |
| 1502 /* Create a list of user columns for the virtual table */ |
| 1503 zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); |
| 1504 for(i=1; zCols && i<p->nColumn; i++){ |
| 1505 zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); |
| 1506 } |
| 1507 |
| 1508 /* Create the whole "CREATE TABLE" statement to pass to SQLite */ |
| 1509 zSql = sqlite3_mprintf( |
| 1510 "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", |
| 1511 zCols, p->zName, zLanguageid |
| 1512 ); |
| 1513 if( !zCols || !zSql ){ |
| 1514 rc = SQLITE_NOMEM; |
| 1515 }else{ |
| 1516 rc = sqlite3_declare_vtab(p->db, zSql); |
| 1517 } |
| 1518 |
| 1519 sqlite3_free(zSql); |
| 1520 sqlite3_free(zCols); |
| 1521 *pRc = rc; |
| 1522 } |
| 1523 } |
| 1524 |
| 1525 /* |
| 1526 ** Create the %_stat table if it does not already exist. |
| 1527 */ |
| 1528 SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ |
| 1529 fts3DbExec(pRc, p->db, |
| 1530 "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" |
| 1531 "(id INTEGER PRIMARY KEY, value BLOB);", |
| 1532 p->zDb, p->zName |
| 1533 ); |
| 1534 if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; |
| 1535 } |
| 1536 |
| 1537 /* |
| 1538 ** Create the backing store tables (%_content, %_segments and %_segdir) |
| 1539 ** required by the FTS3 table passed as the only argument. This is done |
| 1540 ** as part of the vtab xCreate() method. |
| 1541 ** |
| 1542 ** If the p->bHasDocsize boolean is true (indicating that this is an |
| 1543 ** FTS4 table, not an FTS3 table) then also create the %_docsize and |
| 1544 ** %_stat tables required by FTS4. |
| 1545 */ |
| 1546 static int fts3CreateTables(Fts3Table *p){ |
| 1547 int rc = SQLITE_OK; /* Return code */ |
| 1548 int i; /* Iterator variable */ |
| 1549 sqlite3 *db = p->db; /* The database connection */ |
| 1550 |
| 1551 if( p->zContentTbl==0 ){ |
| 1552 const char *zLanguageid = p->zLanguageid; |
| 1553 char *zContentCols; /* Columns of %_content table */ |
| 1554 |
| 1555 /* Create a list of user columns for the content table */ |
| 1556 zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); |
| 1557 for(i=0; zContentCols && i<p->nColumn; i++){ |
| 1558 char *z = p->azColumn[i]; |
| 1559 zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); |
| 1560 } |
| 1561 if( zLanguageid && zContentCols ){ |
| 1562 zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); |
| 1563 } |
| 1564 if( zContentCols==0 ) rc = SQLITE_NOMEM; |
| 1565 |
| 1566 /* Create the content table */ |
| 1567 fts3DbExec(&rc, db, |
| 1568 "CREATE TABLE %Q.'%q_content'(%s)", |
| 1569 p->zDb, p->zName, zContentCols |
| 1570 ); |
| 1571 sqlite3_free(zContentCols); |
| 1572 } |
| 1573 |
| 1574 /* Create other tables */ |
| 1575 fts3DbExec(&rc, db, |
| 1576 "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", |
| 1577 p->zDb, p->zName |
| 1578 ); |
| 1579 fts3DbExec(&rc, db, |
| 1580 "CREATE TABLE %Q.'%q_segdir'(" |
| 1581 "level INTEGER," |
| 1582 "idx INTEGER," |
| 1583 "start_block INTEGER," |
| 1584 "leaves_end_block INTEGER," |
| 1585 "end_block INTEGER," |
| 1586 "root BLOB," |
| 1587 "PRIMARY KEY(level, idx)" |
| 1588 ");", |
| 1589 p->zDb, p->zName |
| 1590 ); |
| 1591 if( p->bHasDocsize ){ |
| 1592 fts3DbExec(&rc, db, |
| 1593 "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", |
| 1594 p->zDb, p->zName |
| 1595 ); |
| 1596 } |
| 1597 assert( p->bHasStat==p->bFts4 ); |
| 1598 if( p->bHasStat ){ |
| 1599 sqlite3Fts3CreateStatTable(&rc, p); |
| 1600 } |
| 1601 return rc; |
| 1602 } |
| 1603 |
| 1604 /* |
| 1605 ** Store the current database page-size in bytes in p->nPgsz. |
| 1606 ** |
| 1607 ** If *pRc is non-zero when this function is called, it is a no-op. |
| 1608 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc |
| 1609 ** before returning. |
| 1610 */ |
| 1611 static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ |
| 1612 if( *pRc==SQLITE_OK ){ |
| 1613 int rc; /* Return code */ |
| 1614 char *zSql; /* SQL text "PRAGMA %Q.page_size" */ |
| 1615 sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ |
| 1616 |
| 1617 zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); |
| 1618 if( !zSql ){ |
| 1619 rc = SQLITE_NOMEM; |
| 1620 }else{ |
| 1621 rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); |
| 1622 if( rc==SQLITE_OK ){ |
| 1623 sqlite3_step(pStmt); |
| 1624 p->nPgsz = sqlite3_column_int(pStmt, 0); |
| 1625 rc = sqlite3_finalize(pStmt); |
| 1626 }else if( rc==SQLITE_AUTH ){ |
| 1627 p->nPgsz = 1024; |
| 1628 rc = SQLITE_OK; |
| 1629 } |
| 1630 } |
| 1631 assert( p->nPgsz>0 || rc!=SQLITE_OK ); |
| 1632 sqlite3_free(zSql); |
| 1633 *pRc = rc; |
| 1634 } |
| 1635 } |
| 1636 |
| 1637 /* |
| 1638 ** "Special" FTS4 arguments are column specifications of the following form: |
| 1639 ** |
| 1640 ** <key> = <value> |
| 1641 ** |
| 1642 ** There may not be whitespace surrounding the "=" character. The <value> |
| 1643 ** term may be quoted, but the <key> may not. |
| 1644 */ |
| 1645 static int fts3IsSpecialColumn( |
| 1646 const char *z, |
| 1647 int *pnKey, |
| 1648 char **pzValue |
| 1649 ){ |
| 1650 char *zValue; |
| 1651 const char *zCsr = z; |
| 1652 |
| 1653 while( *zCsr!='=' ){ |
| 1654 if( *zCsr=='\0' ) return 0; |
| 1655 zCsr++; |
| 1656 } |
| 1657 |
| 1658 *pnKey = (int)(zCsr-z); |
| 1659 zValue = sqlite3_mprintf("%s", &zCsr[1]); |
| 1660 if( zValue ){ |
| 1661 sqlite3Fts3Dequote(zValue); |
| 1662 } |
| 1663 *pzValue = zValue; |
| 1664 return 1; |
| 1665 } |
| 1666 |
| 1667 /* |
| 1668 ** Append the output of a printf() style formatting to an existing string. |
| 1669 */ |
| 1670 static void fts3Appendf( |
| 1671 int *pRc, /* IN/OUT: Error code */ |
| 1672 char **pz, /* IN/OUT: Pointer to string buffer */ |
| 1673 const char *zFormat, /* Printf format string to append */ |
| 1674 ... /* Arguments for printf format string */ |
| 1675 ){ |
| 1676 if( *pRc==SQLITE_OK ){ |
| 1677 va_list ap; |
| 1678 char *z; |
| 1679 va_start(ap, zFormat); |
| 1680 z = sqlite3_vmprintf(zFormat, ap); |
| 1681 va_end(ap); |
| 1682 if( z && *pz ){ |
| 1683 char *z2 = sqlite3_mprintf("%s%s", *pz, z); |
| 1684 sqlite3_free(z); |
| 1685 z = z2; |
| 1686 } |
| 1687 if( z==0 ) *pRc = SQLITE_NOMEM; |
| 1688 sqlite3_free(*pz); |
| 1689 *pz = z; |
| 1690 } |
| 1691 } |
| 1692 |
| 1693 /* |
| 1694 ** Return a copy of input string zInput enclosed in double-quotes (") and |
| 1695 ** with all double quote characters escaped. For example: |
| 1696 ** |
| 1697 ** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\"" |
| 1698 ** |
| 1699 ** The pointer returned points to memory obtained from sqlite3_malloc(). It |
| 1700 ** is the callers responsibility to call sqlite3_free() to release this |
| 1701 ** memory. |
| 1702 */ |
| 1703 static char *fts3QuoteId(char const *zInput){ |
| 1704 int nRet; |
| 1705 char *zRet; |
| 1706 nRet = 2 + (int)strlen(zInput)*2 + 1; |
| 1707 zRet = sqlite3_malloc(nRet); |
| 1708 if( zRet ){ |
| 1709 int i; |
| 1710 char *z = zRet; |
| 1711 *(z++) = '"'; |
| 1712 for(i=0; zInput[i]; i++){ |
| 1713 if( zInput[i]=='"' ) *(z++) = '"'; |
| 1714 *(z++) = zInput[i]; |
| 1715 } |
| 1716 *(z++) = '"'; |
| 1717 *(z++) = '\0'; |
| 1718 } |
| 1719 return zRet; |
| 1720 } |
| 1721 |
| 1722 /* |
| 1723 ** Return a list of comma separated SQL expressions and a FROM clause that |
| 1724 ** could be used in a SELECT statement such as the following: |
| 1725 ** |
| 1726 ** SELECT <list of expressions> FROM %_content AS x ... |
| 1727 ** |
| 1728 ** to return the docid, followed by each column of text data in order |
| 1729 ** from left to write. If parameter zFunc is not NULL, then instead of |
| 1730 ** being returned directly each column of text data is passed to an SQL |
| 1731 ** function named zFunc first. For example, if zFunc is "unzip" and the |
| 1732 ** table has the three user-defined columns "a", "b", and "c", the following |
| 1733 ** string is returned: |
| 1734 ** |
| 1735 ** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" |
| 1736 ** |
| 1737 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It |
| 1738 ** is the responsibility of the caller to eventually free it. |
| 1739 ** |
| 1740 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and |
| 1741 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered |
| 1742 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If |
| 1743 ** no error occurs, *pRc is left unmodified. |
| 1744 */ |
| 1745 static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ |
| 1746 char *zRet = 0; |
| 1747 char *zFree = 0; |
| 1748 char *zFunction; |
| 1749 int i; |
| 1750 |
| 1751 if( p->zContentTbl==0 ){ |
| 1752 if( !zFunc ){ |
| 1753 zFunction = ""; |
| 1754 }else{ |
| 1755 zFree = zFunction = fts3QuoteId(zFunc); |
| 1756 } |
| 1757 fts3Appendf(pRc, &zRet, "docid"); |
| 1758 for(i=0; i<p->nColumn; i++){ |
| 1759 fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); |
| 1760 } |
| 1761 if( p->zLanguageid ){ |
| 1762 fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); |
| 1763 } |
| 1764 sqlite3_free(zFree); |
| 1765 }else{ |
| 1766 fts3Appendf(pRc, &zRet, "rowid"); |
| 1767 for(i=0; i<p->nColumn; i++){ |
| 1768 fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); |
| 1769 } |
| 1770 if( p->zLanguageid ){ |
| 1771 fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); |
| 1772 } |
| 1773 } |
| 1774 fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", |
| 1775 p->zDb, |
| 1776 (p->zContentTbl ? p->zContentTbl : p->zName), |
| 1777 (p->zContentTbl ? "" : "_content") |
| 1778 ); |
| 1779 return zRet; |
| 1780 } |
| 1781 |
| 1782 /* |
| 1783 ** Return a list of N comma separated question marks, where N is the number |
| 1784 ** of columns in the %_content table (one for the docid plus one for each |
| 1785 ** user-defined text column). |
| 1786 ** |
| 1787 ** If argument zFunc is not NULL, then all but the first question mark |
| 1788 ** is preceded by zFunc and an open bracket, and followed by a closed |
| 1789 ** bracket. For example, if zFunc is "zip" and the FTS3 table has three |
| 1790 ** user-defined text columns, the following string is returned: |
| 1791 ** |
| 1792 ** "?, zip(?), zip(?), zip(?)" |
| 1793 ** |
| 1794 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It |
| 1795 ** is the responsibility of the caller to eventually free it. |
| 1796 ** |
| 1797 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and |
| 1798 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered |
| 1799 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If |
| 1800 ** no error occurs, *pRc is left unmodified. |
| 1801 */ |
| 1802 static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ |
| 1803 char *zRet = 0; |
| 1804 char *zFree = 0; |
| 1805 char *zFunction; |
| 1806 int i; |
| 1807 |
| 1808 if( !zFunc ){ |
| 1809 zFunction = ""; |
| 1810 }else{ |
| 1811 zFree = zFunction = fts3QuoteId(zFunc); |
| 1812 } |
| 1813 fts3Appendf(pRc, &zRet, "?"); |
| 1814 for(i=0; i<p->nColumn; i++){ |
| 1815 fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); |
| 1816 } |
| 1817 if( p->zLanguageid ){ |
| 1818 fts3Appendf(pRc, &zRet, ", ?"); |
| 1819 } |
| 1820 sqlite3_free(zFree); |
| 1821 return zRet; |
| 1822 } |
| 1823 |
| 1824 /* |
| 1825 ** This function interprets the string at (*pp) as a non-negative integer |
| 1826 ** value. It reads the integer and sets *pnOut to the value read, then |
| 1827 ** sets *pp to point to the byte immediately following the last byte of |
| 1828 ** the integer value. |
| 1829 ** |
| 1830 ** Only decimal digits ('0'..'9') may be part of an integer value. |
| 1831 ** |
| 1832 ** If *pp does not being with a decimal digit SQLITE_ERROR is returned and |
| 1833 ** the output value undefined. Otherwise SQLITE_OK is returned. |
| 1834 ** |
| 1835 ** This function is used when parsing the "prefix=" FTS4 parameter. |
| 1836 */ |
| 1837 static int fts3GobbleInt(const char **pp, int *pnOut){ |
| 1838 const int MAX_NPREFIX = 10000000; |
| 1839 const char *p; /* Iterator pointer */ |
| 1840 int nInt = 0; /* Output value */ |
| 1841 |
| 1842 for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ |
| 1843 nInt = nInt * 10 + (p[0] - '0'); |
| 1844 if( nInt>MAX_NPREFIX ){ |
| 1845 nInt = 0; |
| 1846 break; |
| 1847 } |
| 1848 } |
| 1849 if( p==*pp ) return SQLITE_ERROR; |
| 1850 *pnOut = nInt; |
| 1851 *pp = p; |
| 1852 return SQLITE_OK; |
| 1853 } |
| 1854 |
| 1855 /* |
| 1856 ** This function is called to allocate an array of Fts3Index structures |
| 1857 ** representing the indexes maintained by the current FTS table. FTS tables |
| 1858 ** always maintain the main "terms" index, but may also maintain one or |
| 1859 ** more "prefix" indexes, depending on the value of the "prefix=" parameter |
| 1860 ** (if any) specified as part of the CREATE VIRTUAL TABLE statement. |
| 1861 ** |
| 1862 ** Argument zParam is passed the value of the "prefix=" option if one was |
| 1863 ** specified, or NULL otherwise. |
| 1864 ** |
| 1865 ** If no error occurs, SQLITE_OK is returned and *apIndex set to point to |
| 1866 ** the allocated array. *pnIndex is set to the number of elements in the |
| 1867 ** array. If an error does occur, an SQLite error code is returned. |
| 1868 ** |
| 1869 ** Regardless of whether or not an error is returned, it is the responsibility |
| 1870 ** of the caller to call sqlite3_free() on the output array to free it. |
| 1871 */ |
| 1872 static int fts3PrefixParameter( |
| 1873 const char *zParam, /* ABC in prefix=ABC parameter to parse */ |
| 1874 int *pnIndex, /* OUT: size of *apIndex[] array */ |
| 1875 struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ |
| 1876 ){ |
| 1877 struct Fts3Index *aIndex; /* Allocated array */ |
| 1878 int nIndex = 1; /* Number of entries in array */ |
| 1879 |
| 1880 if( zParam && zParam[0] ){ |
| 1881 const char *p; |
| 1882 nIndex++; |
| 1883 for(p=zParam; *p; p++){ |
| 1884 if( *p==',' ) nIndex++; |
| 1885 } |
| 1886 } |
| 1887 |
| 1888 aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); |
| 1889 *apIndex = aIndex; |
| 1890 if( !aIndex ){ |
| 1891 return SQLITE_NOMEM; |
| 1892 } |
| 1893 |
| 1894 memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); |
| 1895 if( zParam ){ |
| 1896 const char *p = zParam; |
| 1897 int i; |
| 1898 for(i=1; i<nIndex; i++){ |
| 1899 int nPrefix = 0; |
| 1900 if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR; |
| 1901 assert( nPrefix>=0 ); |
| 1902 if( nPrefix==0 ){ |
| 1903 nIndex--; |
| 1904 i--; |
| 1905 }else{ |
| 1906 aIndex[i].nPrefix = nPrefix; |
| 1907 } |
| 1908 p++; |
| 1909 } |
| 1910 } |
| 1911 |
| 1912 *pnIndex = nIndex; |
| 1913 return SQLITE_OK; |
| 1914 } |
| 1915 |
| 1916 /* |
| 1917 ** This function is called when initializing an FTS4 table that uses the |
| 1918 ** content=xxx option. It determines the number of and names of the columns |
| 1919 ** of the new FTS4 table. |
| 1920 ** |
| 1921 ** The third argument passed to this function is the value passed to the |
| 1922 ** config=xxx option (i.e. "xxx"). This function queries the database for |
| 1923 ** a table of that name. If found, the output variables are populated |
| 1924 ** as follows: |
| 1925 ** |
| 1926 ** *pnCol: Set to the number of columns table xxx has, |
| 1927 ** |
| 1928 ** *pnStr: Set to the total amount of space required to store a copy |
| 1929 ** of each columns name, including the nul-terminator. |
| 1930 ** |
| 1931 ** *pazCol: Set to point to an array of *pnCol strings. Each string is |
| 1932 ** the name of the corresponding column in table xxx. The array |
| 1933 ** and its contents are allocated using a single allocation. It |
| 1934 ** is the responsibility of the caller to free this allocation |
| 1935 ** by eventually passing the *pazCol value to sqlite3_free(). |
| 1936 ** |
| 1937 ** If the table cannot be found, an error code is returned and the output |
| 1938 ** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is |
| 1939 ** returned (and the output variables are undefined). |
| 1940 */ |
| 1941 static int fts3ContentColumns( |
| 1942 sqlite3 *db, /* Database handle */ |
| 1943 const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ |
| 1944 const char *zTbl, /* Name of content table */ |
| 1945 const char ***pazCol, /* OUT: Malloc'd array of column names */ |
| 1946 int *pnCol, /* OUT: Size of array *pazCol */ |
| 1947 int *pnStr, /* OUT: Bytes of string content */ |
| 1948 char **pzErr /* OUT: error message */ |
| 1949 ){ |
| 1950 int rc = SQLITE_OK; /* Return code */ |
| 1951 char *zSql; /* "SELECT *" statement on zTbl */ |
| 1952 sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ |
| 1953 |
| 1954 zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); |
| 1955 if( !zSql ){ |
| 1956 rc = SQLITE_NOMEM; |
| 1957 }else{ |
| 1958 rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); |
| 1959 if( rc!=SQLITE_OK ){ |
| 1960 sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db)); |
| 1961 } |
| 1962 } |
| 1963 sqlite3_free(zSql); |
| 1964 |
| 1965 if( rc==SQLITE_OK ){ |
| 1966 const char **azCol; /* Output array */ |
| 1967 int nStr = 0; /* Size of all column names (incl. 0x00) */ |
| 1968 int nCol; /* Number of table columns */ |
| 1969 int i; /* Used to iterate through columns */ |
| 1970 |
| 1971 /* Loop through the returned columns. Set nStr to the number of bytes of |
| 1972 ** space required to store a copy of each column name, including the |
| 1973 ** nul-terminator byte. */ |
| 1974 nCol = sqlite3_column_count(pStmt); |
| 1975 for(i=0; i<nCol; i++){ |
| 1976 const char *zCol = sqlite3_column_name(pStmt, i); |
| 1977 nStr += (int)strlen(zCol) + 1; |
| 1978 } |
| 1979 |
| 1980 /* Allocate and populate the array to return. */ |
| 1981 azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr); |
| 1982 if( azCol==0 ){ |
| 1983 rc = SQLITE_NOMEM; |
| 1984 }else{ |
| 1985 char *p = (char *)&azCol[nCol]; |
| 1986 for(i=0; i<nCol; i++){ |
| 1987 const char *zCol = sqlite3_column_name(pStmt, i); |
| 1988 int n = (int)strlen(zCol)+1; |
| 1989 memcpy(p, zCol, n); |
| 1990 azCol[i] = p; |
| 1991 p += n; |
| 1992 } |
| 1993 } |
| 1994 sqlite3_finalize(pStmt); |
| 1995 |
| 1996 /* Set the output variables. */ |
| 1997 *pnCol = nCol; |
| 1998 *pnStr = nStr; |
| 1999 *pazCol = azCol; |
| 2000 } |
| 2001 |
| 2002 return rc; |
| 2003 } |
| 2004 |
| 2005 /* |
| 2006 ** This function is the implementation of both the xConnect and xCreate |
| 2007 ** methods of the FTS3 virtual table. |
| 2008 ** |
| 2009 ** The argv[] array contains the following: |
| 2010 ** |
| 2011 ** argv[0] -> module name ("fts3" or "fts4") |
| 2012 ** argv[1] -> database name |
| 2013 ** argv[2] -> table name |
| 2014 ** argv[...] -> "column name" and other module argument fields. |
| 2015 */ |
| 2016 static int fts3InitVtab( |
| 2017 int isCreate, /* True for xCreate, false for xConnect */ |
| 2018 sqlite3 *db, /* The SQLite database connection */ |
| 2019 void *pAux, /* Hash table containing tokenizers */ |
| 2020 int argc, /* Number of elements in argv array */ |
| 2021 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2022 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
| 2023 char **pzErr /* Write any error message here */ |
| 2024 ){ |
| 2025 Fts3Hash *pHash = (Fts3Hash *)pAux; |
| 2026 Fts3Table *p = 0; /* Pointer to allocated vtab */ |
| 2027 int rc = SQLITE_OK; /* Return code */ |
| 2028 int i; /* Iterator variable */ |
| 2029 int nByte; /* Size of allocation used for *p */ |
| 2030 int iCol; /* Column index */ |
| 2031 int nString = 0; /* Bytes required to hold all column names */ |
| 2032 int nCol = 0; /* Number of columns in the FTS table */ |
| 2033 char *zCsr; /* Space for holding column names */ |
| 2034 int nDb; /* Bytes required to hold database name */ |
| 2035 int nName; /* Bytes required to hold table name */ |
| 2036 int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ |
| 2037 const char **aCol; /* Array of column names */ |
| 2038 sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ |
| 2039 |
| 2040 int nIndex = 0; /* Size of aIndex[] array */ |
| 2041 struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ |
| 2042 |
| 2043 /* The results of parsing supported FTS4 key=value options: */ |
| 2044 int bNoDocsize = 0; /* True to omit %_docsize table */ |
| 2045 int bDescIdx = 0; /* True to store descending indexes */ |
| 2046 char *zPrefix = 0; /* Prefix parameter value (or NULL) */ |
| 2047 char *zCompress = 0; /* compress=? parameter (or NULL) */ |
| 2048 char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ |
| 2049 char *zContent = 0; /* content=? parameter (or NULL) */ |
| 2050 char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ |
| 2051 char **azNotindexed = 0; /* The set of notindexed= columns */ |
| 2052 int nNotindexed = 0; /* Size of azNotindexed[] array */ |
| 2053 |
| 2054 assert( strlen(argv[0])==4 ); |
| 2055 assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) |
| 2056 || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) |
| 2057 ); |
| 2058 |
| 2059 nDb = (int)strlen(argv[1]) + 1; |
| 2060 nName = (int)strlen(argv[2]) + 1; |
| 2061 |
| 2062 nByte = sizeof(const char *) * (argc-2); |
| 2063 aCol = (const char **)sqlite3_malloc(nByte); |
| 2064 if( aCol ){ |
| 2065 memset((void*)aCol, 0, nByte); |
| 2066 azNotindexed = (char **)sqlite3_malloc(nByte); |
| 2067 } |
| 2068 if( azNotindexed ){ |
| 2069 memset(azNotindexed, 0, nByte); |
| 2070 } |
| 2071 if( !aCol || !azNotindexed ){ |
| 2072 rc = SQLITE_NOMEM; |
| 2073 goto fts3_init_out; |
| 2074 } |
| 2075 |
| 2076 /* Loop through all of the arguments passed by the user to the FTS3/4 |
| 2077 ** module (i.e. all the column names and special arguments). This loop |
| 2078 ** does the following: |
| 2079 ** |
| 2080 ** + Figures out the number of columns the FTSX table will have, and |
| 2081 ** the number of bytes of space that must be allocated to store copies |
| 2082 ** of the column names. |
| 2083 ** |
| 2084 ** + If there is a tokenizer specification included in the arguments, |
| 2085 ** initializes the tokenizer pTokenizer. |
| 2086 */ |
| 2087 for(i=3; rc==SQLITE_OK && i<argc; i++){ |
| 2088 char const *z = argv[i]; |
| 2089 int nKey; |
| 2090 char *zVal; |
| 2091 |
| 2092 /* Check if this is a tokenizer specification */ |
| 2093 if( !pTokenizer |
| 2094 && strlen(z)>8 |
| 2095 && 0==sqlite3_strnicmp(z, "tokenize", 8) |
| 2096 && 0==sqlite3Fts3IsIdChar(z[8]) |
| 2097 ){ |
| 2098 rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); |
| 2099 } |
| 2100 |
| 2101 /* Check if it is an FTS4 special argument. */ |
| 2102 else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ |
| 2103 struct Fts4Option { |
| 2104 const char *zOpt; |
| 2105 int nOpt; |
| 2106 } aFts4Opt[] = { |
| 2107 { "matchinfo", 9 }, /* 0 -> MATCHINFO */ |
| 2108 { "prefix", 6 }, /* 1 -> PREFIX */ |
| 2109 { "compress", 8 }, /* 2 -> COMPRESS */ |
| 2110 { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ |
| 2111 { "order", 5 }, /* 4 -> ORDER */ |
| 2112 { "content", 7 }, /* 5 -> CONTENT */ |
| 2113 { "languageid", 10 }, /* 6 -> LANGUAGEID */ |
| 2114 { "notindexed", 10 } /* 7 -> NOTINDEXED */ |
| 2115 }; |
| 2116 |
| 2117 int iOpt; |
| 2118 if( !zVal ){ |
| 2119 rc = SQLITE_NOMEM; |
| 2120 }else{ |
| 2121 for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){ |
| 2122 struct Fts4Option *pOp = &aFts4Opt[iOpt]; |
| 2123 if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ |
| 2124 break; |
| 2125 } |
| 2126 } |
| 2127 if( iOpt==SizeofArray(aFts4Opt) ){ |
| 2128 sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z); |
| 2129 rc = SQLITE_ERROR; |
| 2130 }else{ |
| 2131 switch( iOpt ){ |
| 2132 case 0: /* MATCHINFO */ |
| 2133 if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ |
| 2134 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal); |
| 2135 rc = SQLITE_ERROR; |
| 2136 } |
| 2137 bNoDocsize = 1; |
| 2138 break; |
| 2139 |
| 2140 case 1: /* PREFIX */ |
| 2141 sqlite3_free(zPrefix); |
| 2142 zPrefix = zVal; |
| 2143 zVal = 0; |
| 2144 break; |
| 2145 |
| 2146 case 2: /* COMPRESS */ |
| 2147 sqlite3_free(zCompress); |
| 2148 zCompress = zVal; |
| 2149 zVal = 0; |
| 2150 break; |
| 2151 |
| 2152 case 3: /* UNCOMPRESS */ |
| 2153 sqlite3_free(zUncompress); |
| 2154 zUncompress = zVal; |
| 2155 zVal = 0; |
| 2156 break; |
| 2157 |
| 2158 case 4: /* ORDER */ |
| 2159 if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) |
| 2160 && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) |
| 2161 ){ |
| 2162 sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal); |
| 2163 rc = SQLITE_ERROR; |
| 2164 } |
| 2165 bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); |
| 2166 break; |
| 2167 |
| 2168 case 5: /* CONTENT */ |
| 2169 sqlite3_free(zContent); |
| 2170 zContent = zVal; |
| 2171 zVal = 0; |
| 2172 break; |
| 2173 |
| 2174 case 6: /* LANGUAGEID */ |
| 2175 assert( iOpt==6 ); |
| 2176 sqlite3_free(zLanguageid); |
| 2177 zLanguageid = zVal; |
| 2178 zVal = 0; |
| 2179 break; |
| 2180 |
| 2181 case 7: /* NOTINDEXED */ |
| 2182 azNotindexed[nNotindexed++] = zVal; |
| 2183 zVal = 0; |
| 2184 break; |
| 2185 } |
| 2186 } |
| 2187 sqlite3_free(zVal); |
| 2188 } |
| 2189 } |
| 2190 |
| 2191 /* Otherwise, the argument is a column name. */ |
| 2192 else { |
| 2193 nString += (int)(strlen(z) + 1); |
| 2194 aCol[nCol++] = z; |
| 2195 } |
| 2196 } |
| 2197 |
| 2198 /* If a content=xxx option was specified, the following: |
| 2199 ** |
| 2200 ** 1. Ignore any compress= and uncompress= options. |
| 2201 ** |
| 2202 ** 2. If no column names were specified as part of the CREATE VIRTUAL |
| 2203 ** TABLE statement, use all columns from the content table. |
| 2204 */ |
| 2205 if( rc==SQLITE_OK && zContent ){ |
| 2206 sqlite3_free(zCompress); |
| 2207 sqlite3_free(zUncompress); |
| 2208 zCompress = 0; |
| 2209 zUncompress = 0; |
| 2210 if( nCol==0 ){ |
| 2211 sqlite3_free((void*)aCol); |
| 2212 aCol = 0; |
| 2213 rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr); |
| 2214 |
| 2215 /* If a languageid= option was specified, remove the language id |
| 2216 ** column from the aCol[] array. */ |
| 2217 if( rc==SQLITE_OK && zLanguageid ){ |
| 2218 int j; |
| 2219 for(j=0; j<nCol; j++){ |
| 2220 if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){ |
| 2221 int k; |
| 2222 for(k=j; k<nCol; k++) aCol[k] = aCol[k+1]; |
| 2223 nCol--; |
| 2224 break; |
| 2225 } |
| 2226 } |
| 2227 } |
| 2228 } |
| 2229 } |
| 2230 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2231 |
| 2232 if( nCol==0 ){ |
| 2233 assert( nString==0 ); |
| 2234 aCol[0] = "content"; |
| 2235 nString = 8; |
| 2236 nCol = 1; |
| 2237 } |
| 2238 |
| 2239 if( pTokenizer==0 ){ |
| 2240 rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr); |
| 2241 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2242 } |
| 2243 assert( pTokenizer ); |
| 2244 |
| 2245 rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); |
| 2246 if( rc==SQLITE_ERROR ){ |
| 2247 assert( zPrefix ); |
| 2248 sqlite3Fts3ErrMsg(pzErr, "error parsing prefix parameter: %s", zPrefix); |
| 2249 } |
| 2250 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2251 |
| 2252 /* Allocate and populate the Fts3Table structure. */ |
| 2253 nByte = sizeof(Fts3Table) + /* Fts3Table */ |
| 2254 nCol * sizeof(char *) + /* azColumn */ |
| 2255 nIndex * sizeof(struct Fts3Index) + /* aIndex */ |
| 2256 nCol * sizeof(u8) + /* abNotindexed */ |
| 2257 nName + /* zName */ |
| 2258 nDb + /* zDb */ |
| 2259 nString; /* Space for azColumn strings */ |
| 2260 p = (Fts3Table*)sqlite3_malloc(nByte); |
| 2261 if( p==0 ){ |
| 2262 rc = SQLITE_NOMEM; |
| 2263 goto fts3_init_out; |
| 2264 } |
| 2265 memset(p, 0, nByte); |
| 2266 p->db = db; |
| 2267 p->nColumn = nCol; |
| 2268 p->nPendingData = 0; |
| 2269 p->azColumn = (char **)&p[1]; |
| 2270 p->pTokenizer = pTokenizer; |
| 2271 p->nMaxPendingData = FTS3_MAX_PENDING_DATA; |
| 2272 p->bHasDocsize = (isFts4 && bNoDocsize==0); |
| 2273 p->bHasStat = (u8)isFts4; |
| 2274 p->bFts4 = (u8)isFts4; |
| 2275 p->bDescIdx = (u8)bDescIdx; |
| 2276 p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ |
| 2277 p->zContentTbl = zContent; |
| 2278 p->zLanguageid = zLanguageid; |
| 2279 zContent = 0; |
| 2280 zLanguageid = 0; |
| 2281 TESTONLY( p->inTransaction = -1 ); |
| 2282 TESTONLY( p->mxSavepoint = -1 ); |
| 2283 |
| 2284 p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; |
| 2285 memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); |
| 2286 p->nIndex = nIndex; |
| 2287 for(i=0; i<nIndex; i++){ |
| 2288 fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1); |
| 2289 } |
| 2290 p->abNotindexed = (u8 *)&p->aIndex[nIndex]; |
| 2291 |
| 2292 /* Fill in the zName and zDb fields of the vtab structure. */ |
| 2293 zCsr = (char *)&p->abNotindexed[nCol]; |
| 2294 p->zName = zCsr; |
| 2295 memcpy(zCsr, argv[2], nName); |
| 2296 zCsr += nName; |
| 2297 p->zDb = zCsr; |
| 2298 memcpy(zCsr, argv[1], nDb); |
| 2299 zCsr += nDb; |
| 2300 |
| 2301 /* Fill in the azColumn array */ |
| 2302 for(iCol=0; iCol<nCol; iCol++){ |
| 2303 char *z; |
| 2304 int n = 0; |
| 2305 z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n); |
| 2306 memcpy(zCsr, z, n); |
| 2307 zCsr[n] = '\0'; |
| 2308 sqlite3Fts3Dequote(zCsr); |
| 2309 p->azColumn[iCol] = zCsr; |
| 2310 zCsr += n+1; |
| 2311 assert( zCsr <= &((char *)p)[nByte] ); |
| 2312 } |
| 2313 |
| 2314 /* Fill in the abNotindexed array */ |
| 2315 for(iCol=0; iCol<nCol; iCol++){ |
| 2316 int n = (int)strlen(p->azColumn[iCol]); |
| 2317 for(i=0; i<nNotindexed; i++){ |
| 2318 char *zNot = azNotindexed[i]; |
| 2319 if( zNot && n==(int)strlen(zNot) |
| 2320 && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n) |
| 2321 ){ |
| 2322 p->abNotindexed[iCol] = 1; |
| 2323 sqlite3_free(zNot); |
| 2324 azNotindexed[i] = 0; |
| 2325 } |
| 2326 } |
| 2327 } |
| 2328 for(i=0; i<nNotindexed; i++){ |
| 2329 if( azNotindexed[i] ){ |
| 2330 sqlite3Fts3ErrMsg(pzErr, "no such column: %s", azNotindexed[i]); |
| 2331 rc = SQLITE_ERROR; |
| 2332 } |
| 2333 } |
| 2334 |
| 2335 if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){ |
| 2336 char const *zMiss = (zCompress==0 ? "compress" : "uncompress"); |
| 2337 rc = SQLITE_ERROR; |
| 2338 sqlite3Fts3ErrMsg(pzErr, "missing %s parameter in fts4 constructor", zMiss); |
| 2339 } |
| 2340 p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc); |
| 2341 p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); |
| 2342 if( rc!=SQLITE_OK ) goto fts3_init_out; |
| 2343 |
| 2344 /* If this is an xCreate call, create the underlying tables in the |
| 2345 ** database. TODO: For xConnect(), it could verify that said tables exist. |
| 2346 */ |
| 2347 if( isCreate ){ |
| 2348 rc = fts3CreateTables(p); |
| 2349 } |
| 2350 |
| 2351 /* Check to see if a legacy fts3 table has been "upgraded" by the |
| 2352 ** addition of a %_stat table so that it can use incremental merge. |
| 2353 */ |
| 2354 if( !isFts4 && !isCreate ){ |
| 2355 p->bHasStat = 2; |
| 2356 } |
| 2357 |
| 2358 /* Figure out the page-size for the database. This is required in order to |
| 2359 ** estimate the cost of loading large doclists from the database. */ |
| 2360 fts3DatabasePageSize(&rc, p); |
| 2361 p->nNodeSize = p->nPgsz-35; |
| 2362 |
| 2363 /* Declare the table schema to SQLite. */ |
| 2364 fts3DeclareVtab(&rc, p); |
| 2365 |
| 2366 fts3_init_out: |
| 2367 sqlite3_free(zPrefix); |
| 2368 sqlite3_free(aIndex); |
| 2369 sqlite3_free(zCompress); |
| 2370 sqlite3_free(zUncompress); |
| 2371 sqlite3_free(zContent); |
| 2372 sqlite3_free(zLanguageid); |
| 2373 for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]); |
| 2374 sqlite3_free((void *)aCol); |
| 2375 sqlite3_free((void *)azNotindexed); |
| 2376 if( rc!=SQLITE_OK ){ |
| 2377 if( p ){ |
| 2378 fts3DisconnectMethod((sqlite3_vtab *)p); |
| 2379 }else if( pTokenizer ){ |
| 2380 pTokenizer->pModule->xDestroy(pTokenizer); |
| 2381 } |
| 2382 }else{ |
| 2383 assert( p->pSegments==0 ); |
| 2384 *ppVTab = &p->base; |
| 2385 } |
| 2386 return rc; |
| 2387 } |
| 2388 |
| 2389 /* |
| 2390 ** The xConnect() and xCreate() methods for the virtual table. All the |
| 2391 ** work is done in function fts3InitVtab(). |
| 2392 */ |
| 2393 static int fts3ConnectMethod( |
| 2394 sqlite3 *db, /* Database connection */ |
| 2395 void *pAux, /* Pointer to tokenizer hash table */ |
| 2396 int argc, /* Number of elements in argv array */ |
| 2397 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2398 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 2399 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 2400 ){ |
| 2401 return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); |
| 2402 } |
| 2403 static int fts3CreateMethod( |
| 2404 sqlite3 *db, /* Database connection */ |
| 2405 void *pAux, /* Pointer to tokenizer hash table */ |
| 2406 int argc, /* Number of elements in argv array */ |
| 2407 const char * const *argv, /* xCreate/xConnect argument array */ |
| 2408 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 2409 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 2410 ){ |
| 2411 return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); |
| 2412 } |
| 2413 |
| 2414 /* |
| 2415 ** Set the pIdxInfo->estimatedRows variable to nRow. Unless this |
| 2416 ** extension is currently being used by a version of SQLite too old to |
| 2417 ** support estimatedRows. In that case this function is a no-op. |
| 2418 */ |
| 2419 static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ |
| 2420 #if SQLITE_VERSION_NUMBER>=3008002 |
| 2421 if( sqlite3_libversion_number()>=3008002 ){ |
| 2422 pIdxInfo->estimatedRows = nRow; |
| 2423 } |
| 2424 #endif |
| 2425 } |
| 2426 |
| 2427 /* |
| 2428 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this |
| 2429 ** extension is currently being used by a version of SQLite too old to |
| 2430 ** support index-info flags. In that case this function is a no-op. |
| 2431 */ |
| 2432 static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){ |
| 2433 #if SQLITE_VERSION_NUMBER>=3008012 |
| 2434 if( sqlite3_libversion_number()>=3008012 ){ |
| 2435 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; |
| 2436 } |
| 2437 #endif |
| 2438 } |
| 2439 |
| 2440 /* |
| 2441 ** Implementation of the xBestIndex method for FTS3 tables. There |
| 2442 ** are three possible strategies, in order of preference: |
| 2443 ** |
| 2444 ** 1. Direct lookup by rowid or docid. |
| 2445 ** 2. Full-text search using a MATCH operator on a non-docid column. |
| 2446 ** 3. Linear scan of %_content table. |
| 2447 */ |
| 2448 static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |
| 2449 Fts3Table *p = (Fts3Table *)pVTab; |
| 2450 int i; /* Iterator variable */ |
| 2451 int iCons = -1; /* Index of constraint to use */ |
| 2452 |
| 2453 int iLangidCons = -1; /* Index of langid=x constraint, if present */ |
| 2454 int iDocidGe = -1; /* Index of docid>=x constraint, if present */ |
| 2455 int iDocidLe = -1; /* Index of docid<=x constraint, if present */ |
| 2456 int iIdx; |
| 2457 |
| 2458 /* By default use a full table scan. This is an expensive option, |
| 2459 ** so search through the constraints to see if a more efficient |
| 2460 ** strategy is possible. |
| 2461 */ |
| 2462 pInfo->idxNum = FTS3_FULLSCAN_SEARCH; |
| 2463 pInfo->estimatedCost = 5000000; |
| 2464 for(i=0; i<pInfo->nConstraint; i++){ |
| 2465 int bDocid; /* True if this constraint is on docid */ |
| 2466 struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; |
| 2467 if( pCons->usable==0 ){ |
| 2468 if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |
| 2469 /* There exists an unusable MATCH constraint. This means that if |
| 2470 ** the planner does elect to use the results of this call as part |
| 2471 ** of the overall query plan the user will see an "unable to use |
| 2472 ** function MATCH in the requested context" error. To discourage |
| 2473 ** this, return a very high cost here. */ |
| 2474 pInfo->idxNum = FTS3_FULLSCAN_SEARCH; |
| 2475 pInfo->estimatedCost = 1e50; |
| 2476 fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); |
| 2477 return SQLITE_OK; |
| 2478 } |
| 2479 continue; |
| 2480 } |
| 2481 |
| 2482 bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); |
| 2483 |
| 2484 /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ |
| 2485 if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ |
| 2486 pInfo->idxNum = FTS3_DOCID_SEARCH; |
| 2487 pInfo->estimatedCost = 1.0; |
| 2488 iCons = i; |
| 2489 } |
| 2490 |
| 2491 /* A MATCH constraint. Use a full-text search. |
| 2492 ** |
| 2493 ** If there is more than one MATCH constraint available, use the first |
| 2494 ** one encountered. If there is both a MATCH constraint and a direct |
| 2495 ** rowid/docid lookup, prefer the MATCH strategy. This is done even |
| 2496 ** though the rowid/docid lookup is faster than a MATCH query, selecting |
| 2497 ** it would lead to an "unable to use function MATCH in the requested |
| 2498 ** context" error. |
| 2499 */ |
| 2500 if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH |
| 2501 && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn |
| 2502 ){ |
| 2503 pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; |
| 2504 pInfo->estimatedCost = 2.0; |
| 2505 iCons = i; |
| 2506 } |
| 2507 |
| 2508 /* Equality constraint on the langid column */ |
| 2509 if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ |
| 2510 && pCons->iColumn==p->nColumn + 2 |
| 2511 ){ |
| 2512 iLangidCons = i; |
| 2513 } |
| 2514 |
| 2515 if( bDocid ){ |
| 2516 switch( pCons->op ){ |
| 2517 case SQLITE_INDEX_CONSTRAINT_GE: |
| 2518 case SQLITE_INDEX_CONSTRAINT_GT: |
| 2519 iDocidGe = i; |
| 2520 break; |
| 2521 |
| 2522 case SQLITE_INDEX_CONSTRAINT_LE: |
| 2523 case SQLITE_INDEX_CONSTRAINT_LT: |
| 2524 iDocidLe = i; |
| 2525 break; |
| 2526 } |
| 2527 } |
| 2528 } |
| 2529 |
| 2530 /* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */ |
| 2531 if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo); |
| 2532 |
| 2533 iIdx = 1; |
| 2534 if( iCons>=0 ){ |
| 2535 pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; |
| 2536 pInfo->aConstraintUsage[iCons].omit = 1; |
| 2537 } |
| 2538 if( iLangidCons>=0 ){ |
| 2539 pInfo->idxNum |= FTS3_HAVE_LANGID; |
| 2540 pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; |
| 2541 } |
| 2542 if( iDocidGe>=0 ){ |
| 2543 pInfo->idxNum |= FTS3_HAVE_DOCID_GE; |
| 2544 pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; |
| 2545 } |
| 2546 if( iDocidLe>=0 ){ |
| 2547 pInfo->idxNum |= FTS3_HAVE_DOCID_LE; |
| 2548 pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; |
| 2549 } |
| 2550 |
| 2551 /* Regardless of the strategy selected, FTS can deliver rows in rowid (or |
| 2552 ** docid) order. Both ascending and descending are possible. |
| 2553 */ |
| 2554 if( pInfo->nOrderBy==1 ){ |
| 2555 struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; |
| 2556 if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ |
| 2557 if( pOrder->desc ){ |
| 2558 pInfo->idxStr = "DESC"; |
| 2559 }else{ |
| 2560 pInfo->idxStr = "ASC"; |
| 2561 } |
| 2562 pInfo->orderByConsumed = 1; |
| 2563 } |
| 2564 } |
| 2565 |
| 2566 assert( p->pSegments==0 ); |
| 2567 return SQLITE_OK; |
| 2568 } |
| 2569 |
| 2570 /* |
| 2571 ** Implementation of xOpen method. |
| 2572 */ |
| 2573 static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 2574 sqlite3_vtab_cursor *pCsr; /* Allocated cursor */ |
| 2575 |
| 2576 UNUSED_PARAMETER(pVTab); |
| 2577 |
| 2578 /* Allocate a buffer large enough for an Fts3Cursor structure. If the |
| 2579 ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, |
| 2580 ** if the allocation fails, return SQLITE_NOMEM. |
| 2581 */ |
| 2582 *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); |
| 2583 if( !pCsr ){ |
| 2584 return SQLITE_NOMEM; |
| 2585 } |
| 2586 memset(pCsr, 0, sizeof(Fts3Cursor)); |
| 2587 return SQLITE_OK; |
| 2588 } |
| 2589 |
| 2590 /* |
| 2591 ** Finalize the statement handle at pCsr->pStmt. |
| 2592 ** |
| 2593 ** Or, if that statement handle is one created by fts3CursorSeekStmt(), |
| 2594 ** and the Fts3Table.pSeekStmt slot is currently NULL, save the statement |
| 2595 ** pointer there instead of finalizing it. |
| 2596 */ |
| 2597 static void fts3CursorFinalizeStmt(Fts3Cursor *pCsr){ |
| 2598 if( pCsr->bSeekStmt ){ |
| 2599 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 2600 if( p->pSeekStmt==0 ){ |
| 2601 p->pSeekStmt = pCsr->pStmt; |
| 2602 sqlite3_reset(pCsr->pStmt); |
| 2603 pCsr->pStmt = 0; |
| 2604 } |
| 2605 pCsr->bSeekStmt = 0; |
| 2606 } |
| 2607 sqlite3_finalize(pCsr->pStmt); |
| 2608 } |
| 2609 |
| 2610 /* |
| 2611 ** Close the cursor. For additional information see the documentation |
| 2612 ** on the xClose method of the virtual table interface. |
| 2613 */ |
| 2614 static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 2615 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 2616 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 2617 fts3CursorFinalizeStmt(pCsr); |
| 2618 sqlite3Fts3ExprFree(pCsr->pExpr); |
| 2619 sqlite3Fts3FreeDeferredTokens(pCsr); |
| 2620 sqlite3_free(pCsr->aDoclist); |
| 2621 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 2622 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 2623 sqlite3_free(pCsr); |
| 2624 return SQLITE_OK; |
| 2625 } |
| 2626 |
| 2627 /* |
| 2628 ** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then |
| 2629 ** compose and prepare an SQL statement of the form: |
| 2630 ** |
| 2631 ** "SELECT <columns> FROM %_content WHERE rowid = ?" |
| 2632 ** |
| 2633 ** (or the equivalent for a content=xxx table) and set pCsr->pStmt to |
| 2634 ** it. If an error occurs, return an SQLite error code. |
| 2635 */ |
| 2636 static int fts3CursorSeekStmt(Fts3Cursor *pCsr){ |
| 2637 int rc = SQLITE_OK; |
| 2638 if( pCsr->pStmt==0 ){ |
| 2639 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 2640 char *zSql; |
| 2641 if( p->pSeekStmt ){ |
| 2642 pCsr->pStmt = p->pSeekStmt; |
| 2643 p->pSeekStmt = 0; |
| 2644 }else{ |
| 2645 zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); |
| 2646 if( !zSql ) return SQLITE_NOMEM; |
| 2647 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); |
| 2648 sqlite3_free(zSql); |
| 2649 } |
| 2650 if( rc==SQLITE_OK ) pCsr->bSeekStmt = 1; |
| 2651 } |
| 2652 return rc; |
| 2653 } |
| 2654 |
| 2655 /* |
| 2656 ** Position the pCsr->pStmt statement so that it is on the row |
| 2657 ** of the %_content table that contains the last match. Return |
| 2658 ** SQLITE_OK on success. |
| 2659 */ |
| 2660 static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ |
| 2661 int rc = SQLITE_OK; |
| 2662 if( pCsr->isRequireSeek ){ |
| 2663 rc = fts3CursorSeekStmt(pCsr); |
| 2664 if( rc==SQLITE_OK ){ |
| 2665 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); |
| 2666 pCsr->isRequireSeek = 0; |
| 2667 if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ |
| 2668 return SQLITE_OK; |
| 2669 }else{ |
| 2670 rc = sqlite3_reset(pCsr->pStmt); |
| 2671 if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ |
| 2672 /* If no row was found and no error has occurred, then the %_content |
| 2673 ** table is missing a row that is present in the full-text index. |
| 2674 ** The data structures are corrupt. */ |
| 2675 rc = FTS_CORRUPT_VTAB; |
| 2676 pCsr->isEof = 1; |
| 2677 } |
| 2678 } |
| 2679 } |
| 2680 } |
| 2681 |
| 2682 if( rc!=SQLITE_OK && pContext ){ |
| 2683 sqlite3_result_error_code(pContext, rc); |
| 2684 } |
| 2685 return rc; |
| 2686 } |
| 2687 |
| 2688 /* |
| 2689 ** This function is used to process a single interior node when searching |
| 2690 ** a b-tree for a term or term prefix. The node data is passed to this |
| 2691 ** function via the zNode/nNode parameters. The term to search for is |
| 2692 ** passed in zTerm/nTerm. |
| 2693 ** |
| 2694 ** If piFirst is not NULL, then this function sets *piFirst to the blockid |
| 2695 ** of the child node that heads the sub-tree that may contain the term. |
| 2696 ** |
| 2697 ** If piLast is not NULL, then *piLast is set to the right-most child node |
| 2698 ** that heads a sub-tree that may contain a term for which zTerm/nTerm is |
| 2699 ** a prefix. |
| 2700 ** |
| 2701 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. |
| 2702 */ |
| 2703 static int fts3ScanInteriorNode( |
| 2704 const char *zTerm, /* Term to select leaves for */ |
| 2705 int nTerm, /* Size of term zTerm in bytes */ |
| 2706 const char *zNode, /* Buffer containing segment interior node */ |
| 2707 int nNode, /* Size of buffer at zNode */ |
| 2708 sqlite3_int64 *piFirst, /* OUT: Selected child node */ |
| 2709 sqlite3_int64 *piLast /* OUT: Selected child node */ |
| 2710 ){ |
| 2711 int rc = SQLITE_OK; /* Return code */ |
| 2712 const char *zCsr = zNode; /* Cursor to iterate through node */ |
| 2713 const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ |
| 2714 char *zBuffer = 0; /* Buffer to load terms into */ |
| 2715 int nAlloc = 0; /* Size of allocated buffer */ |
| 2716 int isFirstTerm = 1; /* True when processing first term on page */ |
| 2717 sqlite3_int64 iChild; /* Block id of child node to descend to */ |
| 2718 |
| 2719 /* Skip over the 'height' varint that occurs at the start of every |
| 2720 ** interior node. Then load the blockid of the left-child of the b-tree |
| 2721 ** node into variable iChild. |
| 2722 ** |
| 2723 ** Even if the data structure on disk is corrupted, this (reading two |
| 2724 ** varints from the buffer) does not risk an overread. If zNode is a |
| 2725 ** root node, then the buffer comes from a SELECT statement. SQLite does |
| 2726 ** not make this guarantee explicitly, but in practice there are always |
| 2727 ** either more than 20 bytes of allocated space following the nNode bytes of |
| 2728 ** contents, or two zero bytes. Or, if the node is read from the %_segments |
| 2729 ** table, then there are always 20 bytes of zeroed padding following the |
| 2730 ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). |
| 2731 */ |
| 2732 zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); |
| 2733 zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); |
| 2734 if( zCsr>zEnd ){ |
| 2735 return FTS_CORRUPT_VTAB; |
| 2736 } |
| 2737 |
| 2738 while( zCsr<zEnd && (piFirst || piLast) ){ |
| 2739 int cmp; /* memcmp() result */ |
| 2740 int nSuffix; /* Size of term suffix */ |
| 2741 int nPrefix = 0; /* Size of term prefix */ |
| 2742 int nBuffer; /* Total term size */ |
| 2743 |
| 2744 /* Load the next term on the node into zBuffer. Use realloc() to expand |
| 2745 ** the size of zBuffer if required. */ |
| 2746 if( !isFirstTerm ){ |
| 2747 zCsr += fts3GetVarint32(zCsr, &nPrefix); |
| 2748 } |
| 2749 isFirstTerm = 0; |
| 2750 zCsr += fts3GetVarint32(zCsr, &nSuffix); |
| 2751 |
| 2752 /* NOTE(shess): Previous code checked for negative nPrefix and |
| 2753 ** nSuffix and suffix overrunning zEnd. Additionally corrupt if |
| 2754 ** the prefix is longer than the previous term, or if the suffix |
| 2755 ** causes overflow. |
| 2756 */ |
| 2757 if( nPrefix<0 || nSuffix<0 /* || nPrefix>nBuffer */ |
| 2758 || &zCsr[nSuffix]<zCsr || &zCsr[nSuffix]>zEnd ){ |
| 2759 rc = FTS_CORRUPT_VTAB; |
| 2760 goto finish_scan; |
| 2761 } |
| 2762 if( nPrefix+nSuffix>nAlloc ){ |
| 2763 char *zNew; |
| 2764 nAlloc = (nPrefix+nSuffix) * 2; |
| 2765 zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); |
| 2766 if( !zNew ){ |
| 2767 rc = SQLITE_NOMEM; |
| 2768 goto finish_scan; |
| 2769 } |
| 2770 zBuffer = zNew; |
| 2771 } |
| 2772 assert( zBuffer ); |
| 2773 memcpy(&zBuffer[nPrefix], zCsr, nSuffix); |
| 2774 nBuffer = nPrefix + nSuffix; |
| 2775 zCsr += nSuffix; |
| 2776 |
| 2777 /* Compare the term we are searching for with the term just loaded from |
| 2778 ** the interior node. If the specified term is greater than or equal |
| 2779 ** to the term from the interior node, then all terms on the sub-tree |
| 2780 ** headed by node iChild are smaller than zTerm. No need to search |
| 2781 ** iChild. |
| 2782 ** |
| 2783 ** If the interior node term is larger than the specified term, then |
| 2784 ** the tree headed by iChild may contain the specified term. |
| 2785 */ |
| 2786 cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); |
| 2787 if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ |
| 2788 *piFirst = iChild; |
| 2789 piFirst = 0; |
| 2790 } |
| 2791 |
| 2792 if( piLast && cmp<0 ){ |
| 2793 *piLast = iChild; |
| 2794 piLast = 0; |
| 2795 } |
| 2796 |
| 2797 iChild++; |
| 2798 }; |
| 2799 |
| 2800 if( piFirst ) *piFirst = iChild; |
| 2801 if( piLast ) *piLast = iChild; |
| 2802 |
| 2803 finish_scan: |
| 2804 sqlite3_free(zBuffer); |
| 2805 return rc; |
| 2806 } |
| 2807 |
| 2808 |
| 2809 /* |
| 2810 ** The buffer pointed to by argument zNode (size nNode bytes) contains an |
| 2811 ** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) |
| 2812 ** contains a term. This function searches the sub-tree headed by the zNode |
| 2813 ** node for the range of leaf nodes that may contain the specified term |
| 2814 ** or terms for which the specified term is a prefix. |
| 2815 ** |
| 2816 ** If piLeaf is not NULL, then *piLeaf is set to the blockid of the |
| 2817 ** left-most leaf node in the tree that may contain the specified term. |
| 2818 ** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the |
| 2819 ** right-most leaf node that may contain a term for which the specified |
| 2820 ** term is a prefix. |
| 2821 ** |
| 2822 ** It is possible that the range of returned leaf nodes does not contain |
| 2823 ** the specified term or any terms for which it is a prefix. However, if the |
| 2824 ** segment does contain any such terms, they are stored within the identified |
| 2825 ** range. Because this function only inspects interior segment nodes (and |
| 2826 ** never loads leaf nodes into memory), it is not possible to be sure. |
| 2827 ** |
| 2828 ** If an error occurs, an error code other than SQLITE_OK is returned. |
| 2829 */ |
| 2830 static int fts3SelectLeaf( |
| 2831 Fts3Table *p, /* Virtual table handle */ |
| 2832 const char *zTerm, /* Term to select leaves for */ |
| 2833 int nTerm, /* Size of term zTerm in bytes */ |
| 2834 const char *zNode, /* Buffer containing segment interior node */ |
| 2835 int nNode, /* Size of buffer at zNode */ |
| 2836 sqlite3_int64 *piLeaf, /* Selected leaf node */ |
| 2837 sqlite3_int64 *piLeaf2 /* Selected leaf node */ |
| 2838 ){ |
| 2839 int rc = SQLITE_OK; /* Return code */ |
| 2840 int iHeight; /* Height of this node in tree */ |
| 2841 |
| 2842 assert( piLeaf || piLeaf2 ); |
| 2843 |
| 2844 fts3GetVarint32(zNode, &iHeight); |
| 2845 rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); |
| 2846 assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); |
| 2847 |
| 2848 if( rc==SQLITE_OK && iHeight>1 ){ |
| 2849 char *zBlob = 0; /* Blob read from %_segments table */ |
| 2850 int nBlob = 0; /* Size of zBlob in bytes */ |
| 2851 |
| 2852 if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ |
| 2853 rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); |
| 2854 if( rc==SQLITE_OK ){ |
| 2855 rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); |
| 2856 } |
| 2857 sqlite3_free(zBlob); |
| 2858 piLeaf = 0; |
| 2859 zBlob = 0; |
| 2860 } |
| 2861 |
| 2862 if( rc==SQLITE_OK ){ |
| 2863 rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); |
| 2864 } |
| 2865 if( rc==SQLITE_OK ){ |
| 2866 rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); |
| 2867 } |
| 2868 sqlite3_free(zBlob); |
| 2869 } |
| 2870 |
| 2871 return rc; |
| 2872 } |
| 2873 |
| 2874 /* |
| 2875 ** This function is used to create delta-encoded serialized lists of FTS3 |
| 2876 ** varints. Each call to this function appends a single varint to a list. |
| 2877 */ |
| 2878 static void fts3PutDeltaVarint( |
| 2879 char **pp, /* IN/OUT: Output pointer */ |
| 2880 sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ |
| 2881 sqlite3_int64 iVal /* Write this value to the list */ |
| 2882 ){ |
| 2883 assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); |
| 2884 *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); |
| 2885 *piPrev = iVal; |
| 2886 } |
| 2887 |
| 2888 /* |
| 2889 ** When this function is called, *ppPoslist is assumed to point to the |
| 2890 ** start of a position-list. After it returns, *ppPoslist points to the |
| 2891 ** first byte after the position-list. |
| 2892 ** |
| 2893 ** A position list is list of positions (delta encoded) and columns for |
| 2894 ** a single document record of a doclist. So, in other words, this |
| 2895 ** routine advances *ppPoslist so that it points to the next docid in |
| 2896 ** the doclist, or to the first byte past the end of the doclist. |
| 2897 ** |
| 2898 ** If pp is not NULL, then the contents of the position list are copied |
| 2899 ** to *pp. *pp is set to point to the first byte past the last byte copied |
| 2900 ** before this function returns. |
| 2901 */ |
| 2902 static void fts3PoslistCopy(char **pp, char **ppPoslist){ |
| 2903 char *pEnd = *ppPoslist; |
| 2904 char c = 0; |
| 2905 |
| 2906 /* The end of a position list is marked by a zero encoded as an FTS3 |
| 2907 ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by |
| 2908 ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail |
| 2909 ** of some other, multi-byte, value. |
| 2910 ** |
| 2911 ** The following while-loop moves pEnd to point to the first byte that is not |
| 2912 ** immediately preceded by a byte with the 0x80 bit set. Then increments |
| 2913 ** pEnd once more so that it points to the byte immediately following the |
| 2914 ** last byte in the position-list. |
| 2915 */ |
| 2916 while( *pEnd | c ){ |
| 2917 c = *pEnd++ & 0x80; |
| 2918 testcase( c!=0 && (*pEnd)==0 ); |
| 2919 } |
| 2920 pEnd++; /* Advance past the POS_END terminator byte */ |
| 2921 |
| 2922 if( pp ){ |
| 2923 int n = (int)(pEnd - *ppPoslist); |
| 2924 char *p = *pp; |
| 2925 memcpy(p, *ppPoslist, n); |
| 2926 p += n; |
| 2927 *pp = p; |
| 2928 } |
| 2929 *ppPoslist = pEnd; |
| 2930 } |
| 2931 |
| 2932 /* |
| 2933 ** When this function is called, *ppPoslist is assumed to point to the |
| 2934 ** start of a column-list. After it returns, *ppPoslist points to the |
| 2935 ** to the terminator (POS_COLUMN or POS_END) byte of the column-list. |
| 2936 ** |
| 2937 ** A column-list is list of delta-encoded positions for a single column |
| 2938 ** within a single document within a doclist. |
| 2939 ** |
| 2940 ** The column-list is terminated either by a POS_COLUMN varint (1) or |
| 2941 ** a POS_END varint (0). This routine leaves *ppPoslist pointing to |
| 2942 ** the POS_COLUMN or POS_END that terminates the column-list. |
| 2943 ** |
| 2944 ** If pp is not NULL, then the contents of the column-list are copied |
| 2945 ** to *pp. *pp is set to point to the first byte past the last byte copied |
| 2946 ** before this function returns. The POS_COLUMN or POS_END terminator |
| 2947 ** is not copied into *pp. |
| 2948 */ |
| 2949 static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ |
| 2950 char *pEnd = *ppPoslist; |
| 2951 char c = 0; |
| 2952 |
| 2953 /* A column-list is terminated by either a 0x01 or 0x00 byte that is |
| 2954 ** not part of a multi-byte varint. |
| 2955 */ |
| 2956 while( 0xFE & (*pEnd | c) ){ |
| 2957 c = *pEnd++ & 0x80; |
| 2958 testcase( c!=0 && ((*pEnd)&0xfe)==0 ); |
| 2959 } |
| 2960 if( pp ){ |
| 2961 int n = (int)(pEnd - *ppPoslist); |
| 2962 char *p = *pp; |
| 2963 memcpy(p, *ppPoslist, n); |
| 2964 p += n; |
| 2965 *pp = p; |
| 2966 } |
| 2967 *ppPoslist = pEnd; |
| 2968 } |
| 2969 |
| 2970 /* |
| 2971 ** Value used to signify the end of an position-list. This is safe because |
| 2972 ** it is not possible to have a document with 2^31 terms. |
| 2973 */ |
| 2974 #define POSITION_LIST_END 0x7fffffff |
| 2975 |
| 2976 /* |
| 2977 ** This function is used to help parse position-lists. When this function is |
| 2978 ** called, *pp may point to the start of the next varint in the position-list |
| 2979 ** being parsed, or it may point to 1 byte past the end of the position-list |
| 2980 ** (in which case **pp will be a terminator bytes POS_END (0) or |
| 2981 ** (1)). |
| 2982 ** |
| 2983 ** If *pp points past the end of the current position-list, set *pi to |
| 2984 ** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, |
| 2985 ** increment the current value of *pi by the value read, and set *pp to |
| 2986 ** point to the next value before returning. |
| 2987 ** |
| 2988 ** Before calling this routine *pi must be initialized to the value of |
| 2989 ** the previous position, or zero if we are reading the first position |
| 2990 ** in the position-list. Because positions are delta-encoded, the value |
| 2991 ** of the previous position is needed in order to compute the value of |
| 2992 ** the next position. |
| 2993 */ |
| 2994 static void fts3ReadNextPos( |
| 2995 char **pp, /* IN/OUT: Pointer into position-list buffer */ |
| 2996 sqlite3_int64 *pi /* IN/OUT: Value read from position-list */ |
| 2997 ){ |
| 2998 if( (**pp)&0xFE ){ |
| 2999 fts3GetDeltaVarint(pp, pi); |
| 3000 *pi -= 2; |
| 3001 }else{ |
| 3002 *pi = POSITION_LIST_END; |
| 3003 } |
| 3004 } |
| 3005 |
| 3006 /* |
| 3007 ** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by |
| 3008 ** the value of iCol encoded as a varint to *pp. This will start a new |
| 3009 ** column list. |
| 3010 ** |
| 3011 ** Set *pp to point to the byte just after the last byte written before |
| 3012 ** returning (do not modify it if iCol==0). Return the total number of bytes |
| 3013 ** written (0 if iCol==0). |
| 3014 */ |
| 3015 static int fts3PutColNumber(char **pp, int iCol){ |
| 3016 int n = 0; /* Number of bytes written */ |
| 3017 if( iCol ){ |
| 3018 char *p = *pp; /* Output pointer */ |
| 3019 n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); |
| 3020 *p = 0x01; |
| 3021 *pp = &p[n]; |
| 3022 } |
| 3023 return n; |
| 3024 } |
| 3025 |
| 3026 /* |
| 3027 ** Compute the union of two position lists. The output written |
| 3028 ** into *pp contains all positions of both *pp1 and *pp2 in sorted |
| 3029 ** order and with any duplicates removed. All pointers are |
| 3030 ** updated appropriately. The caller is responsible for insuring |
| 3031 ** that there is enough space in *pp to hold the complete output. |
| 3032 */ |
| 3033 static void fts3PoslistMerge( |
| 3034 char **pp, /* Output buffer */ |
| 3035 char **pp1, /* Left input list */ |
| 3036 char **pp2 /* Right input list */ |
| 3037 ){ |
| 3038 char *p = *pp; |
| 3039 char *p1 = *pp1; |
| 3040 char *p2 = *pp2; |
| 3041 |
| 3042 while( *p1 || *p2 ){ |
| 3043 int iCol1; /* The current column index in pp1 */ |
| 3044 int iCol2; /* The current column index in pp2 */ |
| 3045 |
| 3046 if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); |
| 3047 else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; |
| 3048 else iCol1 = 0; |
| 3049 |
| 3050 if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); |
| 3051 else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; |
| 3052 else iCol2 = 0; |
| 3053 |
| 3054 if( iCol1==iCol2 ){ |
| 3055 sqlite3_int64 i1 = 0; /* Last position from pp1 */ |
| 3056 sqlite3_int64 i2 = 0; /* Last position from pp2 */ |
| 3057 sqlite3_int64 iPrev = 0; |
| 3058 int n = fts3PutColNumber(&p, iCol1); |
| 3059 p1 += n; |
| 3060 p2 += n; |
| 3061 |
| 3062 /* At this point, both p1 and p2 point to the start of column-lists |
| 3063 ** for the same column (the column with index iCol1 and iCol2). |
| 3064 ** A column-list is a list of non-negative delta-encoded varints, each |
| 3065 ** incremented by 2 before being stored. Each list is terminated by a |
| 3066 ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists |
| 3067 ** and writes the results to buffer p. p is left pointing to the byte |
| 3068 ** after the list written. No terminator (POS_END or POS_COLUMN) is |
| 3069 ** written to the output. |
| 3070 */ |
| 3071 fts3GetDeltaVarint(&p1, &i1); |
| 3072 fts3GetDeltaVarint(&p2, &i2); |
| 3073 do { |
| 3074 fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2); |
| 3075 iPrev -= 2; |
| 3076 if( i1==i2 ){ |
| 3077 fts3ReadNextPos(&p1, &i1); |
| 3078 fts3ReadNextPos(&p2, &i2); |
| 3079 }else if( i1<i2 ){ |
| 3080 fts3ReadNextPos(&p1, &i1); |
| 3081 }else{ |
| 3082 fts3ReadNextPos(&p2, &i2); |
| 3083 } |
| 3084 }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END ); |
| 3085 }else if( iCol1<iCol2 ){ |
| 3086 p1 += fts3PutColNumber(&p, iCol1); |
| 3087 fts3ColumnlistCopy(&p, &p1); |
| 3088 }else{ |
| 3089 p2 += fts3PutColNumber(&p, iCol2); |
| 3090 fts3ColumnlistCopy(&p, &p2); |
| 3091 } |
| 3092 } |
| 3093 |
| 3094 *p++ = POS_END; |
| 3095 *pp = p; |
| 3096 *pp1 = p1 + 1; |
| 3097 *pp2 = p2 + 1; |
| 3098 } |
| 3099 |
| 3100 /* |
| 3101 ** This function is used to merge two position lists into one. When it is |
| 3102 ** called, *pp1 and *pp2 must both point to position lists. A position-list is |
| 3103 ** the part of a doclist that follows each document id. For example, if a row |
| 3104 ** contains: |
| 3105 ** |
| 3106 ** 'a b c'|'x y z'|'a b b a' |
| 3107 ** |
| 3108 ** Then the position list for this row for token 'b' would consist of: |
| 3109 ** |
| 3110 ** 0x02 0x01 0x02 0x03 0x03 0x00 |
| 3111 ** |
| 3112 ** When this function returns, both *pp1 and *pp2 are left pointing to the |
| 3113 ** byte following the 0x00 terminator of their respective position lists. |
| 3114 ** |
| 3115 ** If isSaveLeft is 0, an entry is added to the output position list for |
| 3116 ** each position in *pp2 for which there exists one or more positions in |
| 3117 ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. |
| 3118 ** when the *pp1 token appears before the *pp2 token, but not more than nToken |
| 3119 ** slots before it. |
| 3120 ** |
| 3121 ** e.g. nToken==1 searches for adjacent positions. |
| 3122 */ |
| 3123 static int fts3PoslistPhraseMerge( |
| 3124 char **pp, /* IN/OUT: Preallocated output buffer */ |
| 3125 int nToken, /* Maximum difference in token positions */ |
| 3126 int isSaveLeft, /* Save the left position */ |
| 3127 int isExact, /* If *pp1 is exactly nTokens before *pp2 */ |
| 3128 char **pp1, /* IN/OUT: Left input list */ |
| 3129 char **pp2 /* IN/OUT: Right input list */ |
| 3130 ){ |
| 3131 char *p = *pp; |
| 3132 char *p1 = *pp1; |
| 3133 char *p2 = *pp2; |
| 3134 int iCol1 = 0; |
| 3135 int iCol2 = 0; |
| 3136 |
| 3137 /* Never set both isSaveLeft and isExact for the same invocation. */ |
| 3138 assert( isSaveLeft==0 || isExact==0 ); |
| 3139 |
| 3140 assert( p!=0 && *p1!=0 && *p2!=0 ); |
| 3141 if( *p1==POS_COLUMN ){ |
| 3142 p1++; |
| 3143 p1 += fts3GetVarint32(p1, &iCol1); |
| 3144 } |
| 3145 if( *p2==POS_COLUMN ){ |
| 3146 p2++; |
| 3147 p2 += fts3GetVarint32(p2, &iCol2); |
| 3148 } |
| 3149 |
| 3150 while( 1 ){ |
| 3151 if( iCol1==iCol2 ){ |
| 3152 char *pSave = p; |
| 3153 sqlite3_int64 iPrev = 0; |
| 3154 sqlite3_int64 iPos1 = 0; |
| 3155 sqlite3_int64 iPos2 = 0; |
| 3156 |
| 3157 if( iCol1 ){ |
| 3158 *p++ = POS_COLUMN; |
| 3159 p += sqlite3Fts3PutVarint(p, iCol1); |
| 3160 } |
| 3161 |
| 3162 assert( *p1!=POS_END && *p1!=POS_COLUMN ); |
| 3163 assert( *p2!=POS_END && *p2!=POS_COLUMN ); |
| 3164 fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; |
| 3165 fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; |
| 3166 |
| 3167 while( 1 ){ |
| 3168 if( iPos2==iPos1+nToken |
| 3169 || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) |
| 3170 ){ |
| 3171 sqlite3_int64 iSave; |
| 3172 iSave = isSaveLeft ? iPos1 : iPos2; |
| 3173 fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; |
| 3174 pSave = 0; |
| 3175 assert( p ); |
| 3176 } |
| 3177 if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ |
| 3178 if( (*p2&0xFE)==0 ) break; |
| 3179 fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; |
| 3180 }else{ |
| 3181 if( (*p1&0xFE)==0 ) break; |
| 3182 fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; |
| 3183 } |
| 3184 } |
| 3185 |
| 3186 if( pSave ){ |
| 3187 assert( pp && p ); |
| 3188 p = pSave; |
| 3189 } |
| 3190 |
| 3191 fts3ColumnlistCopy(0, &p1); |
| 3192 fts3ColumnlistCopy(0, &p2); |
| 3193 assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); |
| 3194 if( 0==*p1 || 0==*p2 ) break; |
| 3195 |
| 3196 p1++; |
| 3197 p1 += fts3GetVarint32(p1, &iCol1); |
| 3198 p2++; |
| 3199 p2 += fts3GetVarint32(p2, &iCol2); |
| 3200 } |
| 3201 |
| 3202 /* Advance pointer p1 or p2 (whichever corresponds to the smaller of |
| 3203 ** iCol1 and iCol2) so that it points to either the 0x00 that marks the |
| 3204 ** end of the position list, or the 0x01 that precedes the next |
| 3205 ** column-number in the position list. |
| 3206 */ |
| 3207 else if( iCol1<iCol2 ){ |
| 3208 fts3ColumnlistCopy(0, &p1); |
| 3209 if( 0==*p1 ) break; |
| 3210 p1++; |
| 3211 p1 += fts3GetVarint32(p1, &iCol1); |
| 3212 }else{ |
| 3213 fts3ColumnlistCopy(0, &p2); |
| 3214 if( 0==*p2 ) break; |
| 3215 p2++; |
| 3216 p2 += fts3GetVarint32(p2, &iCol2); |
| 3217 } |
| 3218 } |
| 3219 |
| 3220 fts3PoslistCopy(0, &p2); |
| 3221 fts3PoslistCopy(0, &p1); |
| 3222 *pp1 = p1; |
| 3223 *pp2 = p2; |
| 3224 if( *pp==p ){ |
| 3225 return 0; |
| 3226 } |
| 3227 *p++ = 0x00; |
| 3228 *pp = p; |
| 3229 return 1; |
| 3230 } |
| 3231 |
| 3232 /* |
| 3233 ** Merge two position-lists as required by the NEAR operator. The argument |
| 3234 ** position lists correspond to the left and right phrases of an expression |
| 3235 ** like: |
| 3236 ** |
| 3237 ** "phrase 1" NEAR "phrase number 2" |
| 3238 ** |
| 3239 ** Position list *pp1 corresponds to the left-hand side of the NEAR |
| 3240 ** expression and *pp2 to the right. As usual, the indexes in the position |
| 3241 ** lists are the offsets of the last token in each phrase (tokens "1" and "2" |
| 3242 ** in the example above). |
| 3243 ** |
| 3244 ** The output position list - written to *pp - is a copy of *pp2 with those |
| 3245 ** entries that are not sufficiently NEAR entries in *pp1 removed. |
| 3246 */ |
| 3247 static int fts3PoslistNearMerge( |
| 3248 char **pp, /* Output buffer */ |
| 3249 char *aTmp, /* Temporary buffer space */ |
| 3250 int nRight, /* Maximum difference in token positions */ |
| 3251 int nLeft, /* Maximum difference in token positions */ |
| 3252 char **pp1, /* IN/OUT: Left input list */ |
| 3253 char **pp2 /* IN/OUT: Right input list */ |
| 3254 ){ |
| 3255 char *p1 = *pp1; |
| 3256 char *p2 = *pp2; |
| 3257 |
| 3258 char *pTmp1 = aTmp; |
| 3259 char *pTmp2; |
| 3260 char *aTmp2; |
| 3261 int res = 1; |
| 3262 |
| 3263 fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); |
| 3264 aTmp2 = pTmp2 = pTmp1; |
| 3265 *pp1 = p1; |
| 3266 *pp2 = p2; |
| 3267 fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); |
| 3268 if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ |
| 3269 fts3PoslistMerge(pp, &aTmp, &aTmp2); |
| 3270 }else if( pTmp1!=aTmp ){ |
| 3271 fts3PoslistCopy(pp, &aTmp); |
| 3272 }else if( pTmp2!=aTmp2 ){ |
| 3273 fts3PoslistCopy(pp, &aTmp2); |
| 3274 }else{ |
| 3275 res = 0; |
| 3276 } |
| 3277 |
| 3278 return res; |
| 3279 } |
| 3280 |
| 3281 /* |
| 3282 ** An instance of this function is used to merge together the (potentially |
| 3283 ** large number of) doclists for each term that matches a prefix query. |
| 3284 ** See function fts3TermSelectMerge() for details. |
| 3285 */ |
| 3286 typedef struct TermSelect TermSelect; |
| 3287 struct TermSelect { |
| 3288 char *aaOutput[16]; /* Malloc'd output buffers */ |
| 3289 int anOutput[16]; /* Size each output buffer in bytes */ |
| 3290 }; |
| 3291 |
| 3292 /* |
| 3293 ** This function is used to read a single varint from a buffer. Parameter |
| 3294 ** pEnd points 1 byte past the end of the buffer. When this function is |
| 3295 ** called, if *pp points to pEnd or greater, then the end of the buffer |
| 3296 ** has been reached. In this case *pp is set to 0 and the function returns. |
| 3297 ** |
| 3298 ** If *pp does not point to or past pEnd, then a single varint is read |
| 3299 ** from *pp. *pp is then set to point 1 byte past the end of the read varint. |
| 3300 ** |
| 3301 ** If bDescIdx is false, the value read is added to *pVal before returning. |
| 3302 ** If it is true, the value read is subtracted from *pVal before this |
| 3303 ** function returns. |
| 3304 */ |
| 3305 static void fts3GetDeltaVarint3( |
| 3306 char **pp, /* IN/OUT: Point to read varint from */ |
| 3307 char *pEnd, /* End of buffer */ |
| 3308 int bDescIdx, /* True if docids are descending */ |
| 3309 sqlite3_int64 *pVal /* IN/OUT: Integer value */ |
| 3310 ){ |
| 3311 if( *pp>=pEnd ){ |
| 3312 *pp = 0; |
| 3313 }else{ |
| 3314 sqlite3_int64 iVal; |
| 3315 *pp += sqlite3Fts3GetVarint(*pp, &iVal); |
| 3316 if( bDescIdx ){ |
| 3317 *pVal -= iVal; |
| 3318 }else{ |
| 3319 *pVal += iVal; |
| 3320 } |
| 3321 } |
| 3322 } |
| 3323 |
| 3324 /* |
| 3325 ** This function is used to write a single varint to a buffer. The varint |
| 3326 ** is written to *pp. Before returning, *pp is set to point 1 byte past the |
| 3327 ** end of the value written. |
| 3328 ** |
| 3329 ** If *pbFirst is zero when this function is called, the value written to |
| 3330 ** the buffer is that of parameter iVal. |
| 3331 ** |
| 3332 ** If *pbFirst is non-zero when this function is called, then the value |
| 3333 ** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) |
| 3334 ** (if bDescIdx is non-zero). |
| 3335 ** |
| 3336 ** Before returning, this function always sets *pbFirst to 1 and *piPrev |
| 3337 ** to the value of parameter iVal. |
| 3338 */ |
| 3339 static void fts3PutDeltaVarint3( |
| 3340 char **pp, /* IN/OUT: Output pointer */ |
| 3341 int bDescIdx, /* True for descending docids */ |
| 3342 sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ |
| 3343 int *pbFirst, /* IN/OUT: True after first int written */ |
| 3344 sqlite3_int64 iVal /* Write this value to the list */ |
| 3345 ){ |
| 3346 sqlite3_int64 iWrite; |
| 3347 if( bDescIdx==0 || *pbFirst==0 ){ |
| 3348 iWrite = iVal - *piPrev; |
| 3349 }else{ |
| 3350 iWrite = *piPrev - iVal; |
| 3351 } |
| 3352 assert( *pbFirst || *piPrev==0 ); |
| 3353 assert( *pbFirst==0 || iWrite>0 ); |
| 3354 *pp += sqlite3Fts3PutVarint(*pp, iWrite); |
| 3355 *piPrev = iVal; |
| 3356 *pbFirst = 1; |
| 3357 } |
| 3358 |
| 3359 |
| 3360 /* |
| 3361 ** This macro is used by various functions that merge doclists. The two |
| 3362 ** arguments are 64-bit docid values. If the value of the stack variable |
| 3363 ** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). |
| 3364 ** Otherwise, (i2-i1). |
| 3365 ** |
| 3366 ** Using this makes it easier to write code that can merge doclists that are |
| 3367 ** sorted in either ascending or descending order. |
| 3368 */ |
| 3369 #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) |
| 3370 |
| 3371 /* |
| 3372 ** This function does an "OR" merge of two doclists (output contains all |
| 3373 ** positions contained in either argument doclist). If the docids in the |
| 3374 ** input doclists are sorted in ascending order, parameter bDescDoclist |
| 3375 ** should be false. If they are sorted in ascending order, it should be |
| 3376 ** passed a non-zero value. |
| 3377 ** |
| 3378 ** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer |
| 3379 ** containing the output doclist and SQLITE_OK is returned. In this case |
| 3380 ** *pnOut is set to the number of bytes in the output doclist. |
| 3381 ** |
| 3382 ** If an error occurs, an SQLite error code is returned. The output values |
| 3383 ** are undefined in this case. |
| 3384 */ |
| 3385 static int fts3DoclistOrMerge( |
| 3386 int bDescDoclist, /* True if arguments are desc */ |
| 3387 char *a1, int n1, /* First doclist */ |
| 3388 char *a2, int n2, /* Second doclist */ |
| 3389 char **paOut, int *pnOut /* OUT: Malloc'd doclist */ |
| 3390 ){ |
| 3391 sqlite3_int64 i1 = 0; |
| 3392 sqlite3_int64 i2 = 0; |
| 3393 sqlite3_int64 iPrev = 0; |
| 3394 char *pEnd1 = &a1[n1]; |
| 3395 char *pEnd2 = &a2[n2]; |
| 3396 char *p1 = a1; |
| 3397 char *p2 = a2; |
| 3398 char *p; |
| 3399 char *aOut; |
| 3400 int bFirstOut = 0; |
| 3401 |
| 3402 *paOut = 0; |
| 3403 *pnOut = 0; |
| 3404 |
| 3405 /* Allocate space for the output. Both the input and output doclists |
| 3406 ** are delta encoded. If they are in ascending order (bDescDoclist==0), |
| 3407 ** then the first docid in each list is simply encoded as a varint. For |
| 3408 ** each subsequent docid, the varint stored is the difference between the |
| 3409 ** current and previous docid (a positive number - since the list is in |
| 3410 ** ascending order). |
| 3411 ** |
| 3412 ** The first docid written to the output is therefore encoded using the |
| 3413 ** same number of bytes as it is in whichever of the input lists it is |
| 3414 ** read from. And each subsequent docid read from the same input list |
| 3415 ** consumes either the same or less bytes as it did in the input (since |
| 3416 ** the difference between it and the previous value in the output must |
| 3417 ** be a positive value less than or equal to the delta value read from |
| 3418 ** the input list). The same argument applies to all but the first docid |
| 3419 ** read from the 'other' list. And to the contents of all position lists |
| 3420 ** that will be copied and merged from the input to the output. |
| 3421 ** |
| 3422 ** However, if the first docid copied to the output is a negative number, |
| 3423 ** then the encoding of the first docid from the 'other' input list may |
| 3424 ** be larger in the output than it was in the input (since the delta value |
| 3425 ** may be a larger positive integer than the actual docid). |
| 3426 ** |
| 3427 ** The space required to store the output is therefore the sum of the |
| 3428 ** sizes of the two inputs, plus enough space for exactly one of the input |
| 3429 ** docids to grow. |
| 3430 ** |
| 3431 ** A symetric argument may be made if the doclists are in descending |
| 3432 ** order. |
| 3433 */ |
| 3434 aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); |
| 3435 if( !aOut ) return SQLITE_NOMEM; |
| 3436 |
| 3437 p = aOut; |
| 3438 fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); |
| 3439 fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); |
| 3440 while( p1 || p2 ){ |
| 3441 sqlite3_int64 iDiff = DOCID_CMP(i1, i2); |
| 3442 |
| 3443 if( p2 && p1 && iDiff==0 ){ |
| 3444 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3445 fts3PoslistMerge(&p, &p1, &p2); |
| 3446 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3447 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3448 }else if( !p2 || (p1 && iDiff<0) ){ |
| 3449 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3450 fts3PoslistCopy(&p, &p1); |
| 3451 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3452 }else{ |
| 3453 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); |
| 3454 fts3PoslistCopy(&p, &p2); |
| 3455 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3456 } |
| 3457 } |
| 3458 |
| 3459 *paOut = aOut; |
| 3460 *pnOut = (int)(p-aOut); |
| 3461 assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); |
| 3462 return SQLITE_OK; |
| 3463 } |
| 3464 |
| 3465 /* |
| 3466 ** This function does a "phrase" merge of two doclists. In a phrase merge, |
| 3467 ** the output contains a copy of each position from the right-hand input |
| 3468 ** doclist for which there is a position in the left-hand input doclist |
| 3469 ** exactly nDist tokens before it. |
| 3470 ** |
| 3471 ** If the docids in the input doclists are sorted in ascending order, |
| 3472 ** parameter bDescDoclist should be false. If they are sorted in ascending |
| 3473 ** order, it should be passed a non-zero value. |
| 3474 ** |
| 3475 ** The right-hand input doclist is overwritten by this function. |
| 3476 */ |
| 3477 static int fts3DoclistPhraseMerge( |
| 3478 int bDescDoclist, /* True if arguments are desc */ |
| 3479 int nDist, /* Distance from left to right (1=adjacent) */ |
| 3480 char *aLeft, int nLeft, /* Left doclist */ |
| 3481 char **paRight, int *pnRight /* IN/OUT: Right/output doclist */ |
| 3482 ){ |
| 3483 sqlite3_int64 i1 = 0; |
| 3484 sqlite3_int64 i2 = 0; |
| 3485 sqlite3_int64 iPrev = 0; |
| 3486 char *aRight = *paRight; |
| 3487 char *pEnd1 = &aLeft[nLeft]; |
| 3488 char *pEnd2 = &aRight[*pnRight]; |
| 3489 char *p1 = aLeft; |
| 3490 char *p2 = aRight; |
| 3491 char *p; |
| 3492 int bFirstOut = 0; |
| 3493 char *aOut; |
| 3494 |
| 3495 assert( nDist>0 ); |
| 3496 if( bDescDoclist ){ |
| 3497 aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX); |
| 3498 if( aOut==0 ) return SQLITE_NOMEM; |
| 3499 }else{ |
| 3500 aOut = aRight; |
| 3501 } |
| 3502 p = aOut; |
| 3503 |
| 3504 fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); |
| 3505 fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); |
| 3506 |
| 3507 while( p1 && p2 ){ |
| 3508 sqlite3_int64 iDiff = DOCID_CMP(i1, i2); |
| 3509 if( iDiff==0 ){ |
| 3510 char *pSave = p; |
| 3511 sqlite3_int64 iPrevSave = iPrev; |
| 3512 int bFirstOutSave = bFirstOut; |
| 3513 |
| 3514 fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); |
| 3515 if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ |
| 3516 p = pSave; |
| 3517 iPrev = iPrevSave; |
| 3518 bFirstOut = bFirstOutSave; |
| 3519 } |
| 3520 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3521 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3522 }else if( iDiff<0 ){ |
| 3523 fts3PoslistCopy(0, &p1); |
| 3524 fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); |
| 3525 }else{ |
| 3526 fts3PoslistCopy(0, &p2); |
| 3527 fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); |
| 3528 } |
| 3529 } |
| 3530 |
| 3531 *pnRight = (int)(p - aOut); |
| 3532 if( bDescDoclist ){ |
| 3533 sqlite3_free(aRight); |
| 3534 *paRight = aOut; |
| 3535 } |
| 3536 |
| 3537 return SQLITE_OK; |
| 3538 } |
| 3539 |
| 3540 /* |
| 3541 ** Argument pList points to a position list nList bytes in size. This |
| 3542 ** function checks to see if the position list contains any entries for |
| 3543 ** a token in position 0 (of any column). If so, it writes argument iDelta |
| 3544 ** to the output buffer pOut, followed by a position list consisting only |
| 3545 ** of the entries from pList at position 0, and terminated by an 0x00 byte. |
| 3546 ** The value returned is the number of bytes written to pOut (if any). |
| 3547 */ |
| 3548 SQLITE_PRIVATE int sqlite3Fts3FirstFilter( |
| 3549 sqlite3_int64 iDelta, /* Varint that may be written to pOut */ |
| 3550 char *pList, /* Position list (no 0x00 term) */ |
| 3551 int nList, /* Size of pList in bytes */ |
| 3552 char *pOut /* Write output here */ |
| 3553 ){ |
| 3554 int nOut = 0; |
| 3555 int bWritten = 0; /* True once iDelta has been written */ |
| 3556 char *p = pList; |
| 3557 char *pEnd = &pList[nList]; |
| 3558 |
| 3559 if( *p!=0x01 ){ |
| 3560 if( *p==0x02 ){ |
| 3561 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); |
| 3562 pOut[nOut++] = 0x02; |
| 3563 bWritten = 1; |
| 3564 } |
| 3565 fts3ColumnlistCopy(0, &p); |
| 3566 } |
| 3567 |
| 3568 while( p<pEnd && *p==0x01 ){ |
| 3569 sqlite3_int64 iCol; |
| 3570 p++; |
| 3571 p += sqlite3Fts3GetVarint(p, &iCol); |
| 3572 if( *p==0x02 ){ |
| 3573 if( bWritten==0 ){ |
| 3574 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); |
| 3575 bWritten = 1; |
| 3576 } |
| 3577 pOut[nOut++] = 0x01; |
| 3578 nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol); |
| 3579 pOut[nOut++] = 0x02; |
| 3580 } |
| 3581 fts3ColumnlistCopy(0, &p); |
| 3582 } |
| 3583 if( bWritten ){ |
| 3584 pOut[nOut++] = 0x00; |
| 3585 } |
| 3586 |
| 3587 return nOut; |
| 3588 } |
| 3589 |
| 3590 |
| 3591 /* |
| 3592 ** Merge all doclists in the TermSelect.aaOutput[] array into a single |
| 3593 ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all |
| 3594 ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. |
| 3595 ** |
| 3596 ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is |
| 3597 ** the responsibility of the caller to free any doclists left in the |
| 3598 ** TermSelect.aaOutput[] array. |
| 3599 */ |
| 3600 static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ |
| 3601 char *aOut = 0; |
| 3602 int nOut = 0; |
| 3603 int i; |
| 3604 |
| 3605 /* Loop through the doclists in the aaOutput[] array. Merge them all |
| 3606 ** into a single doclist. |
| 3607 */ |
| 3608 for(i=0; i<SizeofArray(pTS->aaOutput); i++){ |
| 3609 if( pTS->aaOutput[i] ){ |
| 3610 if( !aOut ){ |
| 3611 aOut = pTS->aaOutput[i]; |
| 3612 nOut = pTS->anOutput[i]; |
| 3613 pTS->aaOutput[i] = 0; |
| 3614 }else{ |
| 3615 int nNew; |
| 3616 char *aNew; |
| 3617 |
| 3618 int rc = fts3DoclistOrMerge(p->bDescIdx, |
| 3619 pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew |
| 3620 ); |
| 3621 if( rc!=SQLITE_OK ){ |
| 3622 sqlite3_free(aOut); |
| 3623 return rc; |
| 3624 } |
| 3625 |
| 3626 sqlite3_free(pTS->aaOutput[i]); |
| 3627 sqlite3_free(aOut); |
| 3628 pTS->aaOutput[i] = 0; |
| 3629 aOut = aNew; |
| 3630 nOut = nNew; |
| 3631 } |
| 3632 } |
| 3633 } |
| 3634 |
| 3635 pTS->aaOutput[0] = aOut; |
| 3636 pTS->anOutput[0] = nOut; |
| 3637 return SQLITE_OK; |
| 3638 } |
| 3639 |
| 3640 /* |
| 3641 ** Merge the doclist aDoclist/nDoclist into the TermSelect object passed |
| 3642 ** as the first argument. The merge is an "OR" merge (see function |
| 3643 ** fts3DoclistOrMerge() for details). |
| 3644 ** |
| 3645 ** This function is called with the doclist for each term that matches |
| 3646 ** a queried prefix. It merges all these doclists into one, the doclist |
| 3647 ** for the specified prefix. Since there can be a very large number of |
| 3648 ** doclists to merge, the merging is done pair-wise using the TermSelect |
| 3649 ** object. |
| 3650 ** |
| 3651 ** This function returns SQLITE_OK if the merge is successful, or an |
| 3652 ** SQLite error code (SQLITE_NOMEM) if an error occurs. |
| 3653 */ |
| 3654 static int fts3TermSelectMerge( |
| 3655 Fts3Table *p, /* FTS table handle */ |
| 3656 TermSelect *pTS, /* TermSelect object to merge into */ |
| 3657 char *aDoclist, /* Pointer to doclist */ |
| 3658 int nDoclist /* Size of aDoclist in bytes */ |
| 3659 ){ |
| 3660 if( pTS->aaOutput[0]==0 ){ |
| 3661 /* If this is the first term selected, copy the doclist to the output |
| 3662 ** buffer using memcpy(). |
| 3663 ** |
| 3664 ** Add FTS3_VARINT_MAX bytes of unused space to the end of the |
| 3665 ** allocation. This is so as to ensure that the buffer is big enough |
| 3666 ** to hold the current doclist AND'd with any other doclist. If the |
| 3667 ** doclists are stored in order=ASC order, this padding would not be |
| 3668 ** required (since the size of [doclistA AND doclistB] is always less |
| 3669 ** than or equal to the size of [doclistA] in that case). But this is |
| 3670 ** not true for order=DESC. For example, a doclist containing (1, -1) |
| 3671 ** may be smaller than (-1), as in the first example the -1 may be stored |
| 3672 ** as a single-byte delta, whereas in the second it must be stored as a |
| 3673 ** FTS3_VARINT_MAX byte varint. |
| 3674 ** |
| 3675 ** Similar padding is added in the fts3DoclistOrMerge() function. |
| 3676 */ |
| 3677 pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1); |
| 3678 pTS->anOutput[0] = nDoclist; |
| 3679 if( pTS->aaOutput[0] ){ |
| 3680 memcpy(pTS->aaOutput[0], aDoclist, nDoclist); |
| 3681 }else{ |
| 3682 return SQLITE_NOMEM; |
| 3683 } |
| 3684 }else{ |
| 3685 char *aMerge = aDoclist; |
| 3686 int nMerge = nDoclist; |
| 3687 int iOut; |
| 3688 |
| 3689 for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ |
| 3690 if( pTS->aaOutput[iOut]==0 ){ |
| 3691 assert( iOut>0 ); |
| 3692 pTS->aaOutput[iOut] = aMerge; |
| 3693 pTS->anOutput[iOut] = nMerge; |
| 3694 break; |
| 3695 }else{ |
| 3696 char *aNew; |
| 3697 int nNew; |
| 3698 |
| 3699 int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, |
| 3700 pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew |
| 3701 ); |
| 3702 if( rc!=SQLITE_OK ){ |
| 3703 if( aMerge!=aDoclist ) sqlite3_free(aMerge); |
| 3704 return rc; |
| 3705 } |
| 3706 |
| 3707 if( aMerge!=aDoclist ) sqlite3_free(aMerge); |
| 3708 sqlite3_free(pTS->aaOutput[iOut]); |
| 3709 pTS->aaOutput[iOut] = 0; |
| 3710 |
| 3711 aMerge = aNew; |
| 3712 nMerge = nNew; |
| 3713 if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ |
| 3714 pTS->aaOutput[iOut] = aMerge; |
| 3715 pTS->anOutput[iOut] = nMerge; |
| 3716 } |
| 3717 } |
| 3718 } |
| 3719 } |
| 3720 return SQLITE_OK; |
| 3721 } |
| 3722 |
| 3723 /* |
| 3724 ** Append SegReader object pNew to the end of the pCsr->apSegment[] array. |
| 3725 */ |
| 3726 static int fts3SegReaderCursorAppend( |
| 3727 Fts3MultiSegReader *pCsr, |
| 3728 Fts3SegReader *pNew |
| 3729 ){ |
| 3730 if( (pCsr->nSegment%16)==0 ){ |
| 3731 Fts3SegReader **apNew; |
| 3732 int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); |
| 3733 apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); |
| 3734 if( !apNew ){ |
| 3735 sqlite3Fts3SegReaderFree(pNew); |
| 3736 return SQLITE_NOMEM; |
| 3737 } |
| 3738 pCsr->apSegment = apNew; |
| 3739 } |
| 3740 pCsr->apSegment[pCsr->nSegment++] = pNew; |
| 3741 return SQLITE_OK; |
| 3742 } |
| 3743 |
| 3744 /* |
| 3745 ** Add seg-reader objects to the Fts3MultiSegReader object passed as the |
| 3746 ** 8th argument. |
| 3747 ** |
| 3748 ** This function returns SQLITE_OK if successful, or an SQLite error code |
| 3749 ** otherwise. |
| 3750 */ |
| 3751 static int fts3SegReaderCursor( |
| 3752 Fts3Table *p, /* FTS3 table handle */ |
| 3753 int iLangid, /* Language id */ |
| 3754 int iIndex, /* Index to search (from 0 to p->nIndex-1) */ |
| 3755 int iLevel, /* Level of segments to scan */ |
| 3756 const char *zTerm, /* Term to query for */ |
| 3757 int nTerm, /* Size of zTerm in bytes */ |
| 3758 int isPrefix, /* True for a prefix search */ |
| 3759 int isScan, /* True to scan from zTerm to EOF */ |
| 3760 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 3761 ){ |
| 3762 int rc = SQLITE_OK; /* Error code */ |
| 3763 sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ |
| 3764 int rc2; /* Result of sqlite3_reset() */ |
| 3765 |
| 3766 /* If iLevel is less than 0 and this is not a scan, include a seg-reader |
| 3767 ** for the pending-terms. If this is a scan, then this call must be being |
| 3768 ** made by an fts4aux module, not an FTS table. In this case calling |
| 3769 ** Fts3SegReaderPending might segfault, as the data structures used by |
| 3770 ** fts4aux are not completely populated. So it's easiest to filter these |
| 3771 ** calls out here. */ |
| 3772 if( iLevel<0 && p->aIndex ){ |
| 3773 Fts3SegReader *pSeg = 0; |
| 3774 rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan,
&pSeg); |
| 3775 if( rc==SQLITE_OK && pSeg ){ |
| 3776 rc = fts3SegReaderCursorAppend(pCsr, pSeg); |
| 3777 } |
| 3778 } |
| 3779 |
| 3780 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 3781 if( rc==SQLITE_OK ){ |
| 3782 rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); |
| 3783 } |
| 3784 |
| 3785 while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ |
| 3786 Fts3SegReader *pSeg = 0; |
| 3787 |
| 3788 /* Read the values returned by the SELECT into local variables. */ |
| 3789 sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); |
| 3790 sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); |
| 3791 sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); |
| 3792 int nRoot = sqlite3_column_bytes(pStmt, 4); |
| 3793 char const *zRoot = sqlite3_column_blob(pStmt, 4); |
| 3794 |
| 3795 /* If zTerm is not NULL, and this segment is not stored entirely on its |
| 3796 ** root node, the range of leaves scanned can be reduced. Do this. */ |
| 3797 if( iStartBlock && zTerm ){ |
| 3798 sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); |
| 3799 rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); |
| 3800 if( rc!=SQLITE_OK ) goto finished; |
| 3801 if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; |
| 3802 } |
| 3803 |
| 3804 rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, |
| 3805 (isPrefix==0 && isScan==0), |
| 3806 iStartBlock, iLeavesEndBlock, |
| 3807 iEndBlock, zRoot, nRoot, &pSeg |
| 3808 ); |
| 3809 if( rc!=SQLITE_OK ) goto finished; |
| 3810 rc = fts3SegReaderCursorAppend(pCsr, pSeg); |
| 3811 } |
| 3812 } |
| 3813 |
| 3814 finished: |
| 3815 rc2 = sqlite3_reset(pStmt); |
| 3816 if( rc==SQLITE_DONE ) rc = rc2; |
| 3817 |
| 3818 return rc; |
| 3819 } |
| 3820 |
| 3821 /* |
| 3822 ** Set up a cursor object for iterating through a full-text index or a |
| 3823 ** single level therein. |
| 3824 */ |
| 3825 SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor( |
| 3826 Fts3Table *p, /* FTS3 table handle */ |
| 3827 int iLangid, /* Language-id to search */ |
| 3828 int iIndex, /* Index to search (from 0 to p->nIndex-1) */ |
| 3829 int iLevel, /* Level of segments to scan */ |
| 3830 const char *zTerm, /* Term to query for */ |
| 3831 int nTerm, /* Size of zTerm in bytes */ |
| 3832 int isPrefix, /* True for a prefix search */ |
| 3833 int isScan, /* True to scan from zTerm to EOF */ |
| 3834 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 3835 ){ |
| 3836 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 3837 assert( iLevel==FTS3_SEGCURSOR_ALL |
| 3838 || iLevel==FTS3_SEGCURSOR_PENDING |
| 3839 || iLevel>=0 |
| 3840 ); |
| 3841 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 3842 assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 ); |
| 3843 assert( isPrefix==0 || isScan==0 ); |
| 3844 |
| 3845 memset(pCsr, 0, sizeof(Fts3MultiSegReader)); |
| 3846 return fts3SegReaderCursor( |
| 3847 p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr |
| 3848 ); |
| 3849 } |
| 3850 |
| 3851 /* |
| 3852 ** In addition to its current configuration, have the Fts3MultiSegReader |
| 3853 ** passed as the 4th argument also scan the doclist for term zTerm/nTerm. |
| 3854 ** |
| 3855 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 3856 */ |
| 3857 static int fts3SegReaderCursorAddZero( |
| 3858 Fts3Table *p, /* FTS virtual table handle */ |
| 3859 int iLangid, |
| 3860 const char *zTerm, /* Term to scan doclist of */ |
| 3861 int nTerm, /* Number of bytes in zTerm */ |
| 3862 Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ |
| 3863 ){ |
| 3864 return fts3SegReaderCursor(p, |
| 3865 iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr |
| 3866 ); |
| 3867 } |
| 3868 |
| 3869 /* |
| 3870 ** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, |
| 3871 ** if isPrefix is true, to scan the doclist for all terms for which |
| 3872 ** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write |
| 3873 ** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return |
| 3874 ** an SQLite error code. |
| 3875 ** |
| 3876 ** It is the responsibility of the caller to free this object by eventually |
| 3877 ** passing it to fts3SegReaderCursorFree() |
| 3878 ** |
| 3879 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 3880 ** Output parameter *ppSegcsr is set to 0 if an error occurs. |
| 3881 */ |
| 3882 static int fts3TermSegReaderCursor( |
| 3883 Fts3Cursor *pCsr, /* Virtual table cursor handle */ |
| 3884 const char *zTerm, /* Term to query for */ |
| 3885 int nTerm, /* Size of zTerm in bytes */ |
| 3886 int isPrefix, /* True for a prefix search */ |
| 3887 Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ |
| 3888 ){ |
| 3889 Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ |
| 3890 int rc = SQLITE_NOMEM; /* Return code */ |
| 3891 |
| 3892 pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); |
| 3893 if( pSegcsr ){ |
| 3894 int i; |
| 3895 int bFound = 0; /* True once an index has been found */ |
| 3896 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 3897 |
| 3898 if( isPrefix ){ |
| 3899 for(i=1; bFound==0 && i<p->nIndex; i++){ |
| 3900 if( p->aIndex[i].nPrefix==nTerm ){ |
| 3901 bFound = 1; |
| 3902 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3903 i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr |
| 3904 ); |
| 3905 pSegcsr->bLookup = 1; |
| 3906 } |
| 3907 } |
| 3908 |
| 3909 for(i=1; bFound==0 && i<p->nIndex; i++){ |
| 3910 if( p->aIndex[i].nPrefix==nTerm+1 ){ |
| 3911 bFound = 1; |
| 3912 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3913 i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr |
| 3914 ); |
| 3915 if( rc==SQLITE_OK ){ |
| 3916 rc = fts3SegReaderCursorAddZero( |
| 3917 p, pCsr->iLangid, zTerm, nTerm, pSegcsr |
| 3918 ); |
| 3919 } |
| 3920 } |
| 3921 } |
| 3922 } |
| 3923 |
| 3924 if( bFound==0 ){ |
| 3925 rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, |
| 3926 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr |
| 3927 ); |
| 3928 pSegcsr->bLookup = !isPrefix; |
| 3929 } |
| 3930 } |
| 3931 |
| 3932 *ppSegcsr = pSegcsr; |
| 3933 return rc; |
| 3934 } |
| 3935 |
| 3936 /* |
| 3937 ** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). |
| 3938 */ |
| 3939 static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ |
| 3940 sqlite3Fts3SegReaderFinish(pSegcsr); |
| 3941 sqlite3_free(pSegcsr); |
| 3942 } |
| 3943 |
| 3944 /* |
| 3945 ** This function retrieves the doclist for the specified term (or term |
| 3946 ** prefix) from the database. |
| 3947 */ |
| 3948 static int fts3TermSelect( |
| 3949 Fts3Table *p, /* Virtual table handle */ |
| 3950 Fts3PhraseToken *pTok, /* Token to query for */ |
| 3951 int iColumn, /* Column to query (or -ve for all columns) */ |
| 3952 int *pnOut, /* OUT: Size of buffer at *ppOut */ |
| 3953 char **ppOut /* OUT: Malloced result buffer */ |
| 3954 ){ |
| 3955 int rc; /* Return code */ |
| 3956 Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ |
| 3957 TermSelect tsc; /* Object for pair-wise doclist merging */ |
| 3958 Fts3SegFilter filter; /* Segment term filter configuration */ |
| 3959 |
| 3960 pSegcsr = pTok->pSegcsr; |
| 3961 memset(&tsc, 0, sizeof(TermSelect)); |
| 3962 |
| 3963 filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS |
| 3964 | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) |
| 3965 | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) |
| 3966 | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); |
| 3967 filter.iCol = iColumn; |
| 3968 filter.zTerm = pTok->z; |
| 3969 filter.nTerm = pTok->n; |
| 3970 |
| 3971 rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); |
| 3972 while( SQLITE_OK==rc |
| 3973 && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) |
| 3974 ){ |
| 3975 rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); |
| 3976 } |
| 3977 |
| 3978 if( rc==SQLITE_OK ){ |
| 3979 rc = fts3TermSelectFinishMerge(p, &tsc); |
| 3980 } |
| 3981 if( rc==SQLITE_OK ){ |
| 3982 *ppOut = tsc.aaOutput[0]; |
| 3983 *pnOut = tsc.anOutput[0]; |
| 3984 }else{ |
| 3985 int i; |
| 3986 for(i=0; i<SizeofArray(tsc.aaOutput); i++){ |
| 3987 sqlite3_free(tsc.aaOutput[i]); |
| 3988 } |
| 3989 } |
| 3990 |
| 3991 fts3SegReaderCursorFree(pSegcsr); |
| 3992 pTok->pSegcsr = 0; |
| 3993 return rc; |
| 3994 } |
| 3995 |
| 3996 /* |
| 3997 ** This function counts the total number of docids in the doclist stored |
| 3998 ** in buffer aList[], size nList bytes. |
| 3999 ** |
| 4000 ** If the isPoslist argument is true, then it is assumed that the doclist |
| 4001 ** contains a position-list following each docid. Otherwise, it is assumed |
| 4002 ** that the doclist is simply a list of docids stored as delta encoded |
| 4003 ** varints. |
| 4004 */ |
| 4005 static int fts3DoclistCountDocids(char *aList, int nList){ |
| 4006 int nDoc = 0; /* Return value */ |
| 4007 if( aList ){ |
| 4008 char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ |
| 4009 char *p = aList; /* Cursor */ |
| 4010 while( p<aEnd ){ |
| 4011 nDoc++; |
| 4012 while( (*p++)&0x80 ); /* Skip docid varint */ |
| 4013 fts3PoslistCopy(0, &p); /* Skip over position list */ |
| 4014 } |
| 4015 } |
| 4016 |
| 4017 return nDoc; |
| 4018 } |
| 4019 |
| 4020 /* |
| 4021 ** Advance the cursor to the next row in the %_content table that |
| 4022 ** matches the search criteria. For a MATCH search, this will be |
| 4023 ** the next row that matches. For a full-table scan, this will be |
| 4024 ** simply the next row in the %_content table. For a docid lookup, |
| 4025 ** this routine simply sets the EOF flag. |
| 4026 ** |
| 4027 ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned |
| 4028 ** even if we reach end-of-file. The fts3EofMethod() will be called |
| 4029 ** subsequently to determine whether or not an EOF was hit. |
| 4030 */ |
| 4031 static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ |
| 4032 int rc; |
| 4033 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 4034 if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ |
| 4035 if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ |
| 4036 pCsr->isEof = 1; |
| 4037 rc = sqlite3_reset(pCsr->pStmt); |
| 4038 }else{ |
| 4039 pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); |
| 4040 rc = SQLITE_OK; |
| 4041 } |
| 4042 }else{ |
| 4043 rc = fts3EvalNext((Fts3Cursor *)pCursor); |
| 4044 } |
| 4045 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 4046 return rc; |
| 4047 } |
| 4048 |
| 4049 /* |
| 4050 ** The following are copied from sqliteInt.h. |
| 4051 ** |
| 4052 ** Constants for the largest and smallest possible 64-bit signed integers. |
| 4053 ** These macros are designed to work correctly on both 32-bit and 64-bit |
| 4054 ** compilers. |
| 4055 */ |
| 4056 #ifndef SQLITE_AMALGAMATION |
| 4057 # define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) |
| 4058 # define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) |
| 4059 #endif |
| 4060 |
| 4061 /* |
| 4062 ** If the numeric type of argument pVal is "integer", then return it |
| 4063 ** converted to a 64-bit signed integer. Otherwise, return a copy of |
| 4064 ** the second parameter, iDefault. |
| 4065 */ |
| 4066 static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ |
| 4067 if( pVal ){ |
| 4068 int eType = sqlite3_value_numeric_type(pVal); |
| 4069 if( eType==SQLITE_INTEGER ){ |
| 4070 return sqlite3_value_int64(pVal); |
| 4071 } |
| 4072 } |
| 4073 return iDefault; |
| 4074 } |
| 4075 |
| 4076 /* |
| 4077 ** This is the xFilter interface for the virtual table. See |
| 4078 ** the virtual table xFilter method documentation for additional |
| 4079 ** information. |
| 4080 ** |
| 4081 ** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against |
| 4082 ** the %_content table. |
| 4083 ** |
| 4084 ** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry |
| 4085 ** in the %_content table. |
| 4086 ** |
| 4087 ** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The |
| 4088 ** column on the left-hand side of the MATCH operator is column |
| 4089 ** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand |
| 4090 ** side of the MATCH operator. |
| 4091 */ |
| 4092 static int fts3FilterMethod( |
| 4093 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 4094 int idxNum, /* Strategy index */ |
| 4095 const char *idxStr, /* Unused */ |
| 4096 int nVal, /* Number of elements in apVal */ |
| 4097 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 4098 ){ |
| 4099 int rc = SQLITE_OK; |
| 4100 char *zSql; /* SQL statement used to access %_content */ |
| 4101 int eSearch; |
| 4102 Fts3Table *p = (Fts3Table *)pCursor->pVtab; |
| 4103 Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; |
| 4104 |
| 4105 sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ |
| 4106 sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ |
| 4107 sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ |
| 4108 sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ |
| 4109 int iIdx; |
| 4110 |
| 4111 UNUSED_PARAMETER(idxStr); |
| 4112 UNUSED_PARAMETER(nVal); |
| 4113 |
| 4114 eSearch = (idxNum & 0x0000FFFF); |
| 4115 assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); |
| 4116 assert( p->pSegments==0 ); |
| 4117 |
| 4118 /* Collect arguments into local variables */ |
| 4119 iIdx = 0; |
| 4120 if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; |
| 4121 if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; |
| 4122 if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; |
| 4123 if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; |
| 4124 assert( iIdx==nVal ); |
| 4125 |
| 4126 /* In case the cursor has been used before, clear it now. */ |
| 4127 fts3CursorFinalizeStmt(pCsr); |
| 4128 sqlite3_free(pCsr->aDoclist); |
| 4129 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 4130 sqlite3Fts3ExprFree(pCsr->pExpr); |
| 4131 memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); |
| 4132 |
| 4133 /* Set the lower and upper bounds on docids to return */ |
| 4134 pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); |
| 4135 pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); |
| 4136 |
| 4137 if( idxStr ){ |
| 4138 pCsr->bDesc = (idxStr[0]=='D'); |
| 4139 }else{ |
| 4140 pCsr->bDesc = p->bDescIdx; |
| 4141 } |
| 4142 pCsr->eSearch = (i16)eSearch; |
| 4143 |
| 4144 if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ |
| 4145 int iCol = eSearch-FTS3_FULLTEXT_SEARCH; |
| 4146 const char *zQuery = (const char *)sqlite3_value_text(pCons); |
| 4147 |
| 4148 if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ |
| 4149 return SQLITE_NOMEM; |
| 4150 } |
| 4151 |
| 4152 pCsr->iLangid = 0; |
| 4153 if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); |
| 4154 |
| 4155 assert( p->base.zErrMsg==0 ); |
| 4156 rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, |
| 4157 p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, |
| 4158 &p->base.zErrMsg |
| 4159 ); |
| 4160 if( rc!=SQLITE_OK ){ |
| 4161 return rc; |
| 4162 } |
| 4163 |
| 4164 rc = fts3EvalStart(pCsr); |
| 4165 sqlite3Fts3SegmentsClose(p); |
| 4166 if( rc!=SQLITE_OK ) return rc; |
| 4167 pCsr->pNextId = pCsr->aDoclist; |
| 4168 pCsr->iPrevId = 0; |
| 4169 } |
| 4170 |
| 4171 /* Compile a SELECT statement for this cursor. For a full-table-scan, the |
| 4172 ** statement loops through all rows of the %_content table. For a |
| 4173 ** full-text query or docid lookup, the statement retrieves a single |
| 4174 ** row by docid. |
| 4175 */ |
| 4176 if( eSearch==FTS3_FULLSCAN_SEARCH ){ |
| 4177 if( pDocidGe || pDocidLe ){ |
| 4178 zSql = sqlite3_mprintf( |
| 4179 "SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s", |
| 4180 p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid, |
| 4181 (pCsr->bDesc ? "DESC" : "ASC") |
| 4182 ); |
| 4183 }else{ |
| 4184 zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s", |
| 4185 p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") |
| 4186 ); |
| 4187 } |
| 4188 if( zSql ){ |
| 4189 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); |
| 4190 sqlite3_free(zSql); |
| 4191 }else{ |
| 4192 rc = SQLITE_NOMEM; |
| 4193 } |
| 4194 }else if( eSearch==FTS3_DOCID_SEARCH ){ |
| 4195 rc = fts3CursorSeekStmt(pCsr); |
| 4196 if( rc==SQLITE_OK ){ |
| 4197 rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); |
| 4198 } |
| 4199 } |
| 4200 if( rc!=SQLITE_OK ) return rc; |
| 4201 |
| 4202 return fts3NextMethod(pCursor); |
| 4203 } |
| 4204 |
| 4205 /* |
| 4206 ** This is the xEof method of the virtual table. SQLite calls this |
| 4207 ** routine to find out if it has reached the end of a result set. |
| 4208 */ |
| 4209 static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ |
| 4210 return ((Fts3Cursor *)pCursor)->isEof; |
| 4211 } |
| 4212 |
| 4213 /* |
| 4214 ** This is the xRowid method. The SQLite core calls this routine to |
| 4215 ** retrieve the rowid for the current row of the result set. fts3 |
| 4216 ** exposes %_content.docid as the rowid for the virtual table. The |
| 4217 ** rowid should be written to *pRowid. |
| 4218 */ |
| 4219 static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |
| 4220 Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; |
| 4221 *pRowid = pCsr->iPrevId; |
| 4222 return SQLITE_OK; |
| 4223 } |
| 4224 |
| 4225 /* |
| 4226 ** This is the xColumn method, called by SQLite to request a value from |
| 4227 ** the row that the supplied cursor currently points to. |
| 4228 ** |
| 4229 ** If: |
| 4230 ** |
| 4231 ** (iCol < p->nColumn) -> The value of the iCol'th user column. |
| 4232 ** (iCol == p->nColumn) -> Magic column with the same name as the table. |
| 4233 ** (iCol == p->nColumn+1) -> Docid column |
| 4234 ** (iCol == p->nColumn+2) -> Langid column |
| 4235 */ |
| 4236 static int fts3ColumnMethod( |
| 4237 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 4238 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 4239 int iCol /* Index of column to read value from */ |
| 4240 ){ |
| 4241 int rc = SQLITE_OK; /* Return Code */ |
| 4242 Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; |
| 4243 Fts3Table *p = (Fts3Table *)pCursor->pVtab; |
| 4244 |
| 4245 /* The column value supplied by SQLite must be in range. */ |
| 4246 assert( iCol>=0 && iCol<=p->nColumn+2 ); |
| 4247 |
| 4248 if( iCol==p->nColumn+1 ){ |
| 4249 /* This call is a request for the "docid" column. Since "docid" is an |
| 4250 ** alias for "rowid", use the xRowid() method to obtain the value. |
| 4251 */ |
| 4252 sqlite3_result_int64(pCtx, pCsr->iPrevId); |
| 4253 }else if( iCol==p->nColumn ){ |
| 4254 /* The extra column whose name is the same as the table. |
| 4255 ** Return a blob which is a pointer to the cursor. */ |
| 4256 sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); |
| 4257 }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ |
| 4258 sqlite3_result_int64(pCtx, pCsr->iLangid); |
| 4259 }else{ |
| 4260 /* The requested column is either a user column (one that contains |
| 4261 ** indexed data), or the language-id column. */ |
| 4262 rc = fts3CursorSeek(0, pCsr); |
| 4263 |
| 4264 if( rc==SQLITE_OK ){ |
| 4265 if( iCol==p->nColumn+2 ){ |
| 4266 int iLangid = 0; |
| 4267 if( p->zLanguageid ){ |
| 4268 iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); |
| 4269 } |
| 4270 sqlite3_result_int(pCtx, iLangid); |
| 4271 }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ |
| 4272 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); |
| 4273 } |
| 4274 } |
| 4275 } |
| 4276 |
| 4277 assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); |
| 4278 return rc; |
| 4279 } |
| 4280 |
| 4281 /* |
| 4282 ** This function is the implementation of the xUpdate callback used by |
| 4283 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be |
| 4284 ** inserted, updated or deleted. |
| 4285 */ |
| 4286 static int fts3UpdateMethod( |
| 4287 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4288 int nArg, /* Size of argument array */ |
| 4289 sqlite3_value **apVal, /* Array of arguments */ |
| 4290 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
| 4291 ){ |
| 4292 return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); |
| 4293 } |
| 4294 |
| 4295 /* |
| 4296 ** Implementation of xSync() method. Flush the contents of the pending-terms |
| 4297 ** hash-table to the database. |
| 4298 */ |
| 4299 static int fts3SyncMethod(sqlite3_vtab *pVtab){ |
| 4300 |
| 4301 /* Following an incremental-merge operation, assuming that the input |
| 4302 ** segments are not completely consumed (the usual case), they are updated |
| 4303 ** in place to remove the entries that have already been merged. This |
| 4304 ** involves updating the leaf block that contains the smallest unmerged |
| 4305 ** entry and each block (if any) between the leaf and the root node. So |
| 4306 ** if the height of the input segment b-trees is N, and input segments |
| 4307 ** are merged eight at a time, updating the input segments at the end |
| 4308 ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually |
| 4309 ** small - often between 0 and 2. So the overhead of the incremental |
| 4310 ** merge is somewhere between 8 and 24 blocks. To avoid this overhead |
| 4311 ** dwarfing the actual productive work accomplished, the incremental merge |
| 4312 ** is only attempted if it will write at least 64 leaf blocks. Hence |
| 4313 ** nMinMerge. |
| 4314 ** |
| 4315 ** Of course, updating the input segments also involves deleting a bunch |
| 4316 ** of blocks from the segments table. But this is not considered overhead |
| 4317 ** as it would also be required by a crisis-merge that used the same input |
| 4318 ** segments. |
| 4319 */ |
| 4320 const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ |
| 4321 |
| 4322 Fts3Table *p = (Fts3Table*)pVtab; |
| 4323 int rc = sqlite3Fts3PendingTermsFlush(p); |
| 4324 |
| 4325 if( rc==SQLITE_OK |
| 4326 && p->nLeafAdd>(nMinMerge/16) |
| 4327 && p->nAutoincrmerge && p->nAutoincrmerge!=0xff |
| 4328 ){ |
| 4329 int mxLevel = 0; /* Maximum relative level value in db */ |
| 4330 int A; /* Incr-merge parameter A */ |
| 4331 |
| 4332 rc = sqlite3Fts3MaxLevel(p, &mxLevel); |
| 4333 assert( rc==SQLITE_OK || mxLevel==0 ); |
| 4334 A = p->nLeafAdd * mxLevel; |
| 4335 A += (A/2); |
| 4336 if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); |
| 4337 } |
| 4338 sqlite3Fts3SegmentsClose(p); |
| 4339 return rc; |
| 4340 } |
| 4341 |
| 4342 /* |
| 4343 ** If it is currently unknown whether or not the FTS table has an %_stat |
| 4344 ** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat |
| 4345 ** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code |
| 4346 ** if an error occurs. |
| 4347 */ |
| 4348 static int fts3SetHasStat(Fts3Table *p){ |
| 4349 int rc = SQLITE_OK; |
| 4350 if( p->bHasStat==2 ){ |
| 4351 const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; |
| 4352 char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); |
| 4353 if( zSql ){ |
| 4354 sqlite3_stmt *pStmt = 0; |
| 4355 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 4356 if( rc==SQLITE_OK ){ |
| 4357 int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); |
| 4358 rc = sqlite3_finalize(pStmt); |
| 4359 if( rc==SQLITE_OK ) p->bHasStat = (u8)bHasStat; |
| 4360 } |
| 4361 sqlite3_free(zSql); |
| 4362 }else{ |
| 4363 rc = SQLITE_NOMEM; |
| 4364 } |
| 4365 } |
| 4366 return rc; |
| 4367 } |
| 4368 |
| 4369 /* |
| 4370 ** Implementation of xBegin() method. |
| 4371 */ |
| 4372 static int fts3BeginMethod(sqlite3_vtab *pVtab){ |
| 4373 Fts3Table *p = (Fts3Table*)pVtab; |
| 4374 UNUSED_PARAMETER(pVtab); |
| 4375 assert( p->pSegments==0 ); |
| 4376 assert( p->nPendingData==0 ); |
| 4377 assert( p->inTransaction!=1 ); |
| 4378 TESTONLY( p->inTransaction = 1 ); |
| 4379 TESTONLY( p->mxSavepoint = -1; ); |
| 4380 p->nLeafAdd = 0; |
| 4381 return fts3SetHasStat(p); |
| 4382 } |
| 4383 |
| 4384 /* |
| 4385 ** Implementation of xCommit() method. This is a no-op. The contents of |
| 4386 ** the pending-terms hash-table have already been flushed into the database |
| 4387 ** by fts3SyncMethod(). |
| 4388 */ |
| 4389 static int fts3CommitMethod(sqlite3_vtab *pVtab){ |
| 4390 TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); |
| 4391 UNUSED_PARAMETER(pVtab); |
| 4392 assert( p->nPendingData==0 ); |
| 4393 assert( p->inTransaction!=0 ); |
| 4394 assert( p->pSegments==0 ); |
| 4395 TESTONLY( p->inTransaction = 0 ); |
| 4396 TESTONLY( p->mxSavepoint = -1; ); |
| 4397 return SQLITE_OK; |
| 4398 } |
| 4399 |
| 4400 /* |
| 4401 ** Implementation of xRollback(). Discard the contents of the pending-terms |
| 4402 ** hash-table. Any changes made to the database are reverted by SQLite. |
| 4403 */ |
| 4404 static int fts3RollbackMethod(sqlite3_vtab *pVtab){ |
| 4405 Fts3Table *p = (Fts3Table*)pVtab; |
| 4406 sqlite3Fts3PendingTermsClear(p); |
| 4407 assert( p->inTransaction!=0 ); |
| 4408 TESTONLY( p->inTransaction = 0 ); |
| 4409 TESTONLY( p->mxSavepoint = -1; ); |
| 4410 return SQLITE_OK; |
| 4411 } |
| 4412 |
| 4413 /* |
| 4414 ** When called, *ppPoslist must point to the byte immediately following the |
| 4415 ** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function |
| 4416 ** moves *ppPoslist so that it instead points to the first byte of the |
| 4417 ** same position list. |
| 4418 */ |
| 4419 static void fts3ReversePoslist(char *pStart, char **ppPoslist){ |
| 4420 char *p = &(*ppPoslist)[-2]; |
| 4421 char c = 0; |
| 4422 |
| 4423 /* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */ |
| 4424 while( p>pStart && (c=*p--)==0 ); |
| 4425 |
| 4426 /* Search backwards for a varint with value zero (the end of the previous |
| 4427 ** poslist). This is an 0x00 byte preceded by some byte that does not |
| 4428 ** have the 0x80 bit set. */ |
| 4429 while( p>pStart && (*p & 0x80) | c ){ |
| 4430 c = *p--; |
| 4431 } |
| 4432 assert( p==pStart || c==0 ); |
| 4433 |
| 4434 /* At this point p points to that preceding byte without the 0x80 bit |
| 4435 ** set. So to find the start of the poslist, skip forward 2 bytes then |
| 4436 ** over a varint. |
| 4437 ** |
| 4438 ** Normally. The other case is that p==pStart and the poslist to return |
| 4439 ** is the first in the doclist. In this case do not skip forward 2 bytes. |
| 4440 ** The second part of the if condition (c==0 && *ppPoslist>&p[2]) |
| 4441 ** is required for cases where the first byte of a doclist and the |
| 4442 ** doclist is empty. For example, if the first docid is 10, a doclist |
| 4443 ** that begins with: |
| 4444 ** |
| 4445 ** 0x0A 0x00 <next docid delta varint> |
| 4446 */ |
| 4447 if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; } |
| 4448 while( *p++&0x80 ); |
| 4449 *ppPoslist = p; |
| 4450 } |
| 4451 |
| 4452 /* |
| 4453 ** Helper function used by the implementation of the overloaded snippet(), |
| 4454 ** offsets() and optimize() SQL functions. |
| 4455 ** |
| 4456 ** If the value passed as the third argument is a blob of size |
| 4457 ** sizeof(Fts3Cursor*), then the blob contents are copied to the |
| 4458 ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error |
| 4459 ** message is written to context pContext and SQLITE_ERROR returned. The |
| 4460 ** string passed via zFunc is used as part of the error message. |
| 4461 */ |
| 4462 static int fts3FunctionArg( |
| 4463 sqlite3_context *pContext, /* SQL function call context */ |
| 4464 const char *zFunc, /* Function name */ |
| 4465 sqlite3_value *pVal, /* argv[0] passed to function */ |
| 4466 Fts3Cursor **ppCsr /* OUT: Store cursor handle here */ |
| 4467 ){ |
| 4468 Fts3Cursor *pRet; |
| 4469 if( sqlite3_value_type(pVal)!=SQLITE_BLOB |
| 4470 || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *) |
| 4471 ){ |
| 4472 char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); |
| 4473 sqlite3_result_error(pContext, zErr, -1); |
| 4474 sqlite3_free(zErr); |
| 4475 return SQLITE_ERROR; |
| 4476 } |
| 4477 memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *)); |
| 4478 *ppCsr = pRet; |
| 4479 return SQLITE_OK; |
| 4480 } |
| 4481 |
| 4482 /* |
| 4483 ** Implementation of the snippet() function for FTS3 |
| 4484 */ |
| 4485 static void fts3SnippetFunc( |
| 4486 sqlite3_context *pContext, /* SQLite function call context */ |
| 4487 int nVal, /* Size of apVal[] array */ |
| 4488 sqlite3_value **apVal /* Array of arguments */ |
| 4489 ){ |
| 4490 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4491 const char *zStart = "<b>"; |
| 4492 const char *zEnd = "</b>"; |
| 4493 const char *zEllipsis = "<b>...</b>"; |
| 4494 int iCol = -1; |
| 4495 int nToken = 15; /* Default number of tokens in snippet */ |
| 4496 |
| 4497 /* There must be at least one argument passed to this function (otherwise |
| 4498 ** the non-overloaded version would have been called instead of this one). |
| 4499 */ |
| 4500 assert( nVal>=1 ); |
| 4501 |
| 4502 if( nVal>6 ){ |
| 4503 sqlite3_result_error(pContext, |
| 4504 "wrong number of arguments to function snippet()", -1); |
| 4505 return; |
| 4506 } |
| 4507 if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; |
| 4508 |
| 4509 switch( nVal ){ |
| 4510 case 6: nToken = sqlite3_value_int(apVal[5]); |
| 4511 case 5: iCol = sqlite3_value_int(apVal[4]); |
| 4512 case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); |
| 4513 case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); |
| 4514 case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); |
| 4515 } |
| 4516 if( !zEllipsis || !zEnd || !zStart ){ |
| 4517 sqlite3_result_error_nomem(pContext); |
| 4518 }else if( nToken==0 ){ |
| 4519 sqlite3_result_text(pContext, "", -1, SQLITE_STATIC); |
| 4520 }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ |
| 4521 sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); |
| 4522 } |
| 4523 } |
| 4524 |
| 4525 /* |
| 4526 ** Implementation of the offsets() function for FTS3 |
| 4527 */ |
| 4528 static void fts3OffsetsFunc( |
| 4529 sqlite3_context *pContext, /* SQLite function call context */ |
| 4530 int nVal, /* Size of argument array */ |
| 4531 sqlite3_value **apVal /* Array of arguments */ |
| 4532 ){ |
| 4533 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4534 |
| 4535 UNUSED_PARAMETER(nVal); |
| 4536 |
| 4537 assert( nVal==1 ); |
| 4538 if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; |
| 4539 assert( pCsr ); |
| 4540 if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ |
| 4541 sqlite3Fts3Offsets(pContext, pCsr); |
| 4542 } |
| 4543 } |
| 4544 |
| 4545 /* |
| 4546 ** Implementation of the special optimize() function for FTS3. This |
| 4547 ** function merges all segments in the database to a single segment. |
| 4548 ** Example usage is: |
| 4549 ** |
| 4550 ** SELECT optimize(t) FROM t LIMIT 1; |
| 4551 ** |
| 4552 ** where 't' is the name of an FTS3 table. |
| 4553 */ |
| 4554 static void fts3OptimizeFunc( |
| 4555 sqlite3_context *pContext, /* SQLite function call context */ |
| 4556 int nVal, /* Size of argument array */ |
| 4557 sqlite3_value **apVal /* Array of arguments */ |
| 4558 ){ |
| 4559 int rc; /* Return code */ |
| 4560 Fts3Table *p; /* Virtual table handle */ |
| 4561 Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */ |
| 4562 |
| 4563 UNUSED_PARAMETER(nVal); |
| 4564 |
| 4565 assert( nVal==1 ); |
| 4566 if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; |
| 4567 p = (Fts3Table *)pCursor->base.pVtab; |
| 4568 assert( p ); |
| 4569 |
| 4570 rc = sqlite3Fts3Optimize(p); |
| 4571 |
| 4572 switch( rc ){ |
| 4573 case SQLITE_OK: |
| 4574 sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); |
| 4575 break; |
| 4576 case SQLITE_DONE: |
| 4577 sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); |
| 4578 break; |
| 4579 default: |
| 4580 sqlite3_result_error_code(pContext, rc); |
| 4581 break; |
| 4582 } |
| 4583 } |
| 4584 |
| 4585 /* |
| 4586 ** Implementation of the matchinfo() function for FTS3 |
| 4587 */ |
| 4588 static void fts3MatchinfoFunc( |
| 4589 sqlite3_context *pContext, /* SQLite function call context */ |
| 4590 int nVal, /* Size of argument array */ |
| 4591 sqlite3_value **apVal /* Array of arguments */ |
| 4592 ){ |
| 4593 Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ |
| 4594 assert( nVal==1 || nVal==2 ); |
| 4595 if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ |
| 4596 const char *zArg = 0; |
| 4597 if( nVal>1 ){ |
| 4598 zArg = (const char *)sqlite3_value_text(apVal[1]); |
| 4599 } |
| 4600 sqlite3Fts3Matchinfo(pContext, pCsr, zArg); |
| 4601 } |
| 4602 } |
| 4603 |
| 4604 /* |
| 4605 ** This routine implements the xFindFunction method for the FTS3 |
| 4606 ** virtual table. |
| 4607 */ |
| 4608 static int fts3FindFunctionMethod( |
| 4609 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4610 int nArg, /* Number of SQL function arguments */ |
| 4611 const char *zName, /* Name of SQL function */ |
| 4612 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ |
| 4613 void **ppArg /* Unused */ |
| 4614 ){ |
| 4615 struct Overloaded { |
| 4616 const char *zName; |
| 4617 void (*xFunc)(sqlite3_context*,int,sqlite3_value**); |
| 4618 } aOverload[] = { |
| 4619 { "snippet", fts3SnippetFunc }, |
| 4620 { "offsets", fts3OffsetsFunc }, |
| 4621 { "optimize", fts3OptimizeFunc }, |
| 4622 { "matchinfo", fts3MatchinfoFunc }, |
| 4623 }; |
| 4624 int i; /* Iterator variable */ |
| 4625 |
| 4626 UNUSED_PARAMETER(pVtab); |
| 4627 UNUSED_PARAMETER(nArg); |
| 4628 UNUSED_PARAMETER(ppArg); |
| 4629 |
| 4630 for(i=0; i<SizeofArray(aOverload); i++){ |
| 4631 if( strcmp(zName, aOverload[i].zName)==0 ){ |
| 4632 *pxFunc = aOverload[i].xFunc; |
| 4633 return 1; |
| 4634 } |
| 4635 } |
| 4636 |
| 4637 /* No function of the specified name was found. Return 0. */ |
| 4638 return 0; |
| 4639 } |
| 4640 |
| 4641 /* |
| 4642 ** Implementation of FTS3 xRename method. Rename an fts3 table. |
| 4643 */ |
| 4644 static int fts3RenameMethod( |
| 4645 sqlite3_vtab *pVtab, /* Virtual table handle */ |
| 4646 const char *zName /* New name of table */ |
| 4647 ){ |
| 4648 Fts3Table *p = (Fts3Table *)pVtab; |
| 4649 sqlite3 *db = p->db; /* Database connection */ |
| 4650 int rc; /* Return Code */ |
| 4651 |
| 4652 /* At this point it must be known if the %_stat table exists or not. |
| 4653 ** So bHasStat may not be 2. */ |
| 4654 rc = fts3SetHasStat(p); |
| 4655 |
| 4656 /* As it happens, the pending terms table is always empty here. This is |
| 4657 ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction |
| 4658 ** always opens a savepoint transaction. And the xSavepoint() method |
| 4659 ** flushes the pending terms table. But leave the (no-op) call to |
| 4660 ** PendingTermsFlush() in in case that changes. |
| 4661 */ |
| 4662 assert( p->nPendingData==0 ); |
| 4663 if( rc==SQLITE_OK ){ |
| 4664 rc = sqlite3Fts3PendingTermsFlush(p); |
| 4665 } |
| 4666 |
| 4667 if( p->zContentTbl==0 ){ |
| 4668 fts3DbExec(&rc, db, |
| 4669 "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", |
| 4670 p->zDb, p->zName, zName |
| 4671 ); |
| 4672 } |
| 4673 |
| 4674 if( p->bHasDocsize ){ |
| 4675 fts3DbExec(&rc, db, |
| 4676 "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", |
| 4677 p->zDb, p->zName, zName |
| 4678 ); |
| 4679 } |
| 4680 if( p->bHasStat ){ |
| 4681 fts3DbExec(&rc, db, |
| 4682 "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';", |
| 4683 p->zDb, p->zName, zName |
| 4684 ); |
| 4685 } |
| 4686 fts3DbExec(&rc, db, |
| 4687 "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';", |
| 4688 p->zDb, p->zName, zName |
| 4689 ); |
| 4690 fts3DbExec(&rc, db, |
| 4691 "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", |
| 4692 p->zDb, p->zName, zName |
| 4693 ); |
| 4694 return rc; |
| 4695 } |
| 4696 |
| 4697 /* |
| 4698 ** The xSavepoint() method. |
| 4699 ** |
| 4700 ** Flush the contents of the pending-terms table to disk. |
| 4701 */ |
| 4702 static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4703 int rc = SQLITE_OK; |
| 4704 UNUSED_PARAMETER(iSavepoint); |
| 4705 assert( ((Fts3Table *)pVtab)->inTransaction ); |
| 4706 assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); |
| 4707 TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); |
| 4708 if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ |
| 4709 rc = fts3SyncMethod(pVtab); |
| 4710 } |
| 4711 return rc; |
| 4712 } |
| 4713 |
| 4714 /* |
| 4715 ** The xRelease() method. |
| 4716 ** |
| 4717 ** This is a no-op. |
| 4718 */ |
| 4719 static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4720 TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); |
| 4721 UNUSED_PARAMETER(iSavepoint); |
| 4722 UNUSED_PARAMETER(pVtab); |
| 4723 assert( p->inTransaction ); |
| 4724 assert( p->mxSavepoint >= iSavepoint ); |
| 4725 TESTONLY( p->mxSavepoint = iSavepoint-1 ); |
| 4726 return SQLITE_OK; |
| 4727 } |
| 4728 |
| 4729 /* |
| 4730 ** The xRollbackTo() method. |
| 4731 ** |
| 4732 ** Discard the contents of the pending terms table. |
| 4733 */ |
| 4734 static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
| 4735 Fts3Table *p = (Fts3Table*)pVtab; |
| 4736 UNUSED_PARAMETER(iSavepoint); |
| 4737 assert( p->inTransaction ); |
| 4738 assert( p->mxSavepoint >= iSavepoint ); |
| 4739 TESTONLY( p->mxSavepoint = iSavepoint ); |
| 4740 sqlite3Fts3PendingTermsClear(p); |
| 4741 return SQLITE_OK; |
| 4742 } |
| 4743 |
| 4744 static const sqlite3_module fts3Module = { |
| 4745 /* iVersion */ 2, |
| 4746 /* xCreate */ fts3CreateMethod, |
| 4747 /* xConnect */ fts3ConnectMethod, |
| 4748 /* xBestIndex */ fts3BestIndexMethod, |
| 4749 /* xDisconnect */ fts3DisconnectMethod, |
| 4750 /* xDestroy */ fts3DestroyMethod, |
| 4751 /* xOpen */ fts3OpenMethod, |
| 4752 /* xClose */ fts3CloseMethod, |
| 4753 /* xFilter */ fts3FilterMethod, |
| 4754 /* xNext */ fts3NextMethod, |
| 4755 /* xEof */ fts3EofMethod, |
| 4756 /* xColumn */ fts3ColumnMethod, |
| 4757 /* xRowid */ fts3RowidMethod, |
| 4758 /* xUpdate */ fts3UpdateMethod, |
| 4759 /* xBegin */ fts3BeginMethod, |
| 4760 /* xSync */ fts3SyncMethod, |
| 4761 /* xCommit */ fts3CommitMethod, |
| 4762 /* xRollback */ fts3RollbackMethod, |
| 4763 /* xFindFunction */ fts3FindFunctionMethod, |
| 4764 /* xRename */ fts3RenameMethod, |
| 4765 /* xSavepoint */ fts3SavepointMethod, |
| 4766 /* xRelease */ fts3ReleaseMethod, |
| 4767 /* xRollbackTo */ fts3RollbackToMethod, |
| 4768 }; |
| 4769 |
| 4770 /* |
| 4771 ** This function is registered as the module destructor (called when an |
| 4772 ** FTS3 enabled database connection is closed). It frees the memory |
| 4773 ** allocated for the tokenizer hash table. |
| 4774 */ |
| 4775 static void hashDestroy(void *p){ |
| 4776 Fts3Hash *pHash = (Fts3Hash *)p; |
| 4777 sqlite3Fts3HashClear(pHash); |
| 4778 sqlite3_free(pHash); |
| 4779 } |
| 4780 |
| 4781 /* |
| 4782 ** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are |
| 4783 ** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c |
| 4784 ** respectively. The following three forward declarations are for functions |
| 4785 ** declared in these files used to retrieve the respective implementations. |
| 4786 ** |
| 4787 ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed |
| 4788 ** to by the argument to point to the "simple" tokenizer implementation. |
| 4789 ** And so on. |
| 4790 */ |
| 4791 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 4792 SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 4793 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4794 SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**
ppModule); |
| 4795 #endif |
| 4796 #ifdef SQLITE_ENABLE_ICU |
| 4797 SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const
**ppModule); |
| 4798 #endif |
| 4799 |
| 4800 /* |
| 4801 ** Initialize the fts3 extension. If this extension is built as part |
| 4802 ** of the sqlite library, then this function is called directly by |
| 4803 ** SQLite. If fts3 is built as a dynamically loadable extension, this |
| 4804 ** function is called by the sqlite3_extension_init() entry point. |
| 4805 */ |
| 4806 SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ |
| 4807 int rc = SQLITE_OK; |
| 4808 Fts3Hash *pHash = 0; |
| 4809 const sqlite3_tokenizer_module *pSimple = 0; |
| 4810 const sqlite3_tokenizer_module *pPorter = 0; |
| 4811 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4812 const sqlite3_tokenizer_module *pUnicode = 0; |
| 4813 #endif |
| 4814 |
| 4815 #ifdef SQLITE_ENABLE_ICU |
| 4816 const sqlite3_tokenizer_module *pIcu = 0; |
| 4817 sqlite3Fts3IcuTokenizerModule(&pIcu); |
| 4818 #endif |
| 4819 |
| 4820 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4821 sqlite3Fts3UnicodeTokenizer(&pUnicode); |
| 4822 #endif |
| 4823 |
| 4824 #ifdef SQLITE_TEST |
| 4825 rc = sqlite3Fts3InitTerm(db); |
| 4826 if( rc!=SQLITE_OK ) return rc; |
| 4827 #endif |
| 4828 |
| 4829 rc = sqlite3Fts3InitAux(db); |
| 4830 if( rc!=SQLITE_OK ) return rc; |
| 4831 |
| 4832 sqlite3Fts3SimpleTokenizerModule(&pSimple); |
| 4833 sqlite3Fts3PorterTokenizerModule(&pPorter); |
| 4834 |
| 4835 /* Allocate and initialize the hash-table used to store tokenizers. */ |
| 4836 pHash = sqlite3_malloc(sizeof(Fts3Hash)); |
| 4837 if( !pHash ){ |
| 4838 rc = SQLITE_NOMEM; |
| 4839 }else{ |
| 4840 sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |
| 4841 } |
| 4842 |
| 4843 /* Load the built-in tokenizers into the hash table */ |
| 4844 if( rc==SQLITE_OK ){ |
| 4845 if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) |
| 4846 || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) |
| 4847 |
| 4848 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 4849 || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) |
| 4850 #endif |
| 4851 #ifdef SQLITE_ENABLE_ICU |
| 4852 || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) |
| 4853 #endif |
| 4854 ){ |
| 4855 rc = SQLITE_NOMEM; |
| 4856 } |
| 4857 } |
| 4858 |
| 4859 #ifdef SQLITE_TEST |
| 4860 if( rc==SQLITE_OK ){ |
| 4861 rc = sqlite3Fts3ExprInitTestInterface(db); |
| 4862 } |
| 4863 #endif |
| 4864 |
| 4865 /* Create the virtual table wrapper around the hash-table and overload |
| 4866 ** the two scalar functions. If this is successful, register the |
| 4867 ** module with sqlite. |
| 4868 */ |
| 4869 if( SQLITE_OK==rc |
| 4870 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST |
| 4871 /* fts3_tokenizer() disabled for security reasons. */ |
| 4872 #else |
| 4873 && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) |
| 4874 #endif |
| 4875 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) |
| 4876 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) |
| 4877 && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) |
| 4878 && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) |
| 4879 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) |
| 4880 ){ |
| 4881 rc = sqlite3_create_module_v2( |
| 4882 db, "fts3", &fts3Module, (void *)pHash, hashDestroy |
| 4883 ); |
| 4884 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST |
| 4885 /* Disable fts4 and tokenizer vtab pending review. */ |
| 4886 #else |
| 4887 if( rc==SQLITE_OK ){ |
| 4888 rc = sqlite3_create_module_v2( |
| 4889 db, "fts4", &fts3Module, (void *)pHash, 0 |
| 4890 ); |
| 4891 } |
| 4892 if( rc==SQLITE_OK ){ |
| 4893 rc = sqlite3Fts3InitTok(db, (void *)pHash); |
| 4894 } |
| 4895 #endif |
| 4896 return rc; |
| 4897 } |
| 4898 |
| 4899 |
| 4900 /* An error has occurred. Delete the hash table and return the error code. */ |
| 4901 assert( rc!=SQLITE_OK ); |
| 4902 if( pHash ){ |
| 4903 sqlite3Fts3HashClear(pHash); |
| 4904 sqlite3_free(pHash); |
| 4905 } |
| 4906 return rc; |
| 4907 } |
| 4908 |
| 4909 /* |
| 4910 ** Allocate an Fts3MultiSegReader for each token in the expression headed |
| 4911 ** by pExpr. |
| 4912 ** |
| 4913 ** An Fts3SegReader object is a cursor that can seek or scan a range of |
| 4914 ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple |
| 4915 ** Fts3SegReader objects internally to provide an interface to seek or scan |
| 4916 ** within the union of all segments of a b-tree. Hence the name. |
| 4917 ** |
| 4918 ** If the allocated Fts3MultiSegReader just seeks to a single entry in a |
| 4919 ** segment b-tree (if the term is not a prefix or it is a prefix for which |
| 4920 ** there exists prefix b-tree of the right length) then it may be traversed |
| 4921 ** and merged incrementally. Otherwise, it has to be merged into an in-memory |
| 4922 ** doclist and then traversed. |
| 4923 */ |
| 4924 static void fts3EvalAllocateReaders( |
| 4925 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 4926 Fts3Expr *pExpr, /* Allocate readers for this expression */ |
| 4927 int *pnToken, /* OUT: Total number of tokens in phrase. */ |
| 4928 int *pnOr, /* OUT: Total number of OR nodes in expr. */ |
| 4929 int *pRc /* IN/OUT: Error code */ |
| 4930 ){ |
| 4931 if( pExpr && SQLITE_OK==*pRc ){ |
| 4932 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 4933 int i; |
| 4934 int nToken = pExpr->pPhrase->nToken; |
| 4935 *pnToken += nToken; |
| 4936 for(i=0; i<nToken; i++){ |
| 4937 Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; |
| 4938 int rc = fts3TermSegReaderCursor(pCsr, |
| 4939 pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr |
| 4940 ); |
| 4941 if( rc!=SQLITE_OK ){ |
| 4942 *pRc = rc; |
| 4943 return; |
| 4944 } |
| 4945 } |
| 4946 assert( pExpr->pPhrase->iDoclistToken==0 ); |
| 4947 pExpr->pPhrase->iDoclistToken = -1; |
| 4948 }else{ |
| 4949 *pnOr += (pExpr->eType==FTSQUERY_OR); |
| 4950 fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); |
| 4951 fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); |
| 4952 } |
| 4953 } |
| 4954 } |
| 4955 |
| 4956 /* |
| 4957 ** Arguments pList/nList contain the doclist for token iToken of phrase p. |
| 4958 ** It is merged into the main doclist stored in p->doclist.aAll/nAll. |
| 4959 ** |
| 4960 ** This function assumes that pList points to a buffer allocated using |
| 4961 ** sqlite3_malloc(). This function takes responsibility for eventually |
| 4962 ** freeing the buffer. |
| 4963 ** |
| 4964 ** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs. |
| 4965 */ |
| 4966 static int fts3EvalPhraseMergeToken( |
| 4967 Fts3Table *pTab, /* FTS Table pointer */ |
| 4968 Fts3Phrase *p, /* Phrase to merge pList/nList into */ |
| 4969 int iToken, /* Token pList/nList corresponds to */ |
| 4970 char *pList, /* Pointer to doclist */ |
| 4971 int nList /* Number of bytes in pList */ |
| 4972 ){ |
| 4973 int rc = SQLITE_OK; |
| 4974 assert( iToken!=p->iDoclistToken ); |
| 4975 |
| 4976 if( pList==0 ){ |
| 4977 sqlite3_free(p->doclist.aAll); |
| 4978 p->doclist.aAll = 0; |
| 4979 p->doclist.nAll = 0; |
| 4980 } |
| 4981 |
| 4982 else if( p->iDoclistToken<0 ){ |
| 4983 p->doclist.aAll = pList; |
| 4984 p->doclist.nAll = nList; |
| 4985 } |
| 4986 |
| 4987 else if( p->doclist.aAll==0 ){ |
| 4988 sqlite3_free(pList); |
| 4989 } |
| 4990 |
| 4991 else { |
| 4992 char *pLeft; |
| 4993 char *pRight; |
| 4994 int nLeft; |
| 4995 int nRight; |
| 4996 int nDiff; |
| 4997 |
| 4998 if( p->iDoclistToken<iToken ){ |
| 4999 pLeft = p->doclist.aAll; |
| 5000 nLeft = p->doclist.nAll; |
| 5001 pRight = pList; |
| 5002 nRight = nList; |
| 5003 nDiff = iToken - p->iDoclistToken; |
| 5004 }else{ |
| 5005 pRight = p->doclist.aAll; |
| 5006 nRight = p->doclist.nAll; |
| 5007 pLeft = pList; |
| 5008 nLeft = nList; |
| 5009 nDiff = p->iDoclistToken - iToken; |
| 5010 } |
| 5011 |
| 5012 rc = fts3DoclistPhraseMerge( |
| 5013 pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight |
| 5014 ); |
| 5015 sqlite3_free(pLeft); |
| 5016 p->doclist.aAll = pRight; |
| 5017 p->doclist.nAll = nRight; |
| 5018 } |
| 5019 |
| 5020 if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; |
| 5021 return rc; |
| 5022 } |
| 5023 |
| 5024 /* |
| 5025 ** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist |
| 5026 ** does not take deferred tokens into account. |
| 5027 ** |
| 5028 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 5029 */ |
| 5030 static int fts3EvalPhraseLoad( |
| 5031 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5032 Fts3Phrase *p /* Phrase object */ |
| 5033 ){ |
| 5034 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5035 int iToken; |
| 5036 int rc = SQLITE_OK; |
| 5037 |
| 5038 for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){ |
| 5039 Fts3PhraseToken *pToken = &p->aToken[iToken]; |
| 5040 assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); |
| 5041 |
| 5042 if( pToken->pSegcsr ){ |
| 5043 int nThis = 0; |
| 5044 char *pThis = 0; |
| 5045 rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); |
| 5046 if( rc==SQLITE_OK ){ |
| 5047 rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); |
| 5048 } |
| 5049 } |
| 5050 assert( pToken->pSegcsr==0 ); |
| 5051 } |
| 5052 |
| 5053 return rc; |
| 5054 } |
| 5055 |
| 5056 /* |
| 5057 ** This function is called on each phrase after the position lists for |
| 5058 ** any deferred tokens have been loaded into memory. It updates the phrases |
| 5059 ** current position list to include only those positions that are really |
| 5060 ** instances of the phrase (after considering deferred tokens). If this |
| 5061 ** means that the phrase does not appear in the current row, doclist.pList |
| 5062 ** and doclist.nList are both zeroed. |
| 5063 ** |
| 5064 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 5065 */ |
| 5066 static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ |
| 5067 int iToken; /* Used to iterate through phrase tokens */ |
| 5068 char *aPoslist = 0; /* Position list for deferred tokens */ |
| 5069 int nPoslist = 0; /* Number of bytes in aPoslist */ |
| 5070 int iPrev = -1; /* Token number of previous deferred token */ |
| 5071 |
| 5072 assert( pPhrase->doclist.bFreeList==0 ); |
| 5073 |
| 5074 for(iToken=0; iToken<pPhrase->nToken; iToken++){ |
| 5075 Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; |
| 5076 Fts3DeferredToken *pDeferred = pToken->pDeferred; |
| 5077 |
| 5078 if( pDeferred ){ |
| 5079 char *pList = 0; |
| 5080 int nList = 0; |
| 5081 int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); |
| 5082 if( rc!=SQLITE_OK ) return rc; |
| 5083 |
| 5084 if( pList==0 ){ |
| 5085 sqlite3_free(aPoslist); |
| 5086 pPhrase->doclist.pList = 0; |
| 5087 pPhrase->doclist.nList = 0; |
| 5088 return SQLITE_OK; |
| 5089 |
| 5090 }else if( aPoslist==0 ){ |
| 5091 aPoslist = pList; |
| 5092 nPoslist = nList; |
| 5093 |
| 5094 }else{ |
| 5095 char *aOut = pList; |
| 5096 char *p1 = aPoslist; |
| 5097 char *p2 = aOut; |
| 5098 |
| 5099 assert( iPrev>=0 ); |
| 5100 fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); |
| 5101 sqlite3_free(aPoslist); |
| 5102 aPoslist = pList; |
| 5103 nPoslist = (int)(aOut - aPoslist); |
| 5104 if( nPoslist==0 ){ |
| 5105 sqlite3_free(aPoslist); |
| 5106 pPhrase->doclist.pList = 0; |
| 5107 pPhrase->doclist.nList = 0; |
| 5108 return SQLITE_OK; |
| 5109 } |
| 5110 } |
| 5111 iPrev = iToken; |
| 5112 } |
| 5113 } |
| 5114 |
| 5115 if( iPrev>=0 ){ |
| 5116 int nMaxUndeferred = pPhrase->iDoclistToken; |
| 5117 if( nMaxUndeferred<0 ){ |
| 5118 pPhrase->doclist.pList = aPoslist; |
| 5119 pPhrase->doclist.nList = nPoslist; |
| 5120 pPhrase->doclist.iDocid = pCsr->iPrevId; |
| 5121 pPhrase->doclist.bFreeList = 1; |
| 5122 }else{ |
| 5123 int nDistance; |
| 5124 char *p1; |
| 5125 char *p2; |
| 5126 char *aOut; |
| 5127 |
| 5128 if( nMaxUndeferred>iPrev ){ |
| 5129 p1 = aPoslist; |
| 5130 p2 = pPhrase->doclist.pList; |
| 5131 nDistance = nMaxUndeferred - iPrev; |
| 5132 }else{ |
| 5133 p1 = pPhrase->doclist.pList; |
| 5134 p2 = aPoslist; |
| 5135 nDistance = iPrev - nMaxUndeferred; |
| 5136 } |
| 5137 |
| 5138 aOut = (char *)sqlite3_malloc(nPoslist+8); |
| 5139 if( !aOut ){ |
| 5140 sqlite3_free(aPoslist); |
| 5141 return SQLITE_NOMEM; |
| 5142 } |
| 5143 |
| 5144 pPhrase->doclist.pList = aOut; |
| 5145 if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ |
| 5146 pPhrase->doclist.bFreeList = 1; |
| 5147 pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); |
| 5148 }else{ |
| 5149 sqlite3_free(aOut); |
| 5150 pPhrase->doclist.pList = 0; |
| 5151 pPhrase->doclist.nList = 0; |
| 5152 } |
| 5153 sqlite3_free(aPoslist); |
| 5154 } |
| 5155 } |
| 5156 |
| 5157 return SQLITE_OK; |
| 5158 } |
| 5159 |
| 5160 /* |
| 5161 ** Maximum number of tokens a phrase may have to be considered for the |
| 5162 ** incremental doclists strategy. |
| 5163 */ |
| 5164 #define MAX_INCR_PHRASE_TOKENS 4 |
| 5165 |
| 5166 /* |
| 5167 ** This function is called for each Fts3Phrase in a full-text query |
| 5168 ** expression to initialize the mechanism for returning rows. Once this |
| 5169 ** function has been called successfully on an Fts3Phrase, it may be |
| 5170 ** used with fts3EvalPhraseNext() to iterate through the matching docids. |
| 5171 ** |
| 5172 ** If parameter bOptOk is true, then the phrase may (or may not) use the |
| 5173 ** incremental loading strategy. Otherwise, the entire doclist is loaded into |
| 5174 ** memory within this call. |
| 5175 ** |
| 5176 ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. |
| 5177 */ |
| 5178 static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ |
| 5179 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5180 int rc = SQLITE_OK; /* Error code */ |
| 5181 int i; |
| 5182 |
| 5183 /* Determine if doclists may be loaded from disk incrementally. This is |
| 5184 ** possible if the bOptOk argument is true, the FTS doclists will be |
| 5185 ** scanned in forward order, and the phrase consists of |
| 5186 ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" |
| 5187 ** tokens or prefix tokens that cannot use a prefix-index. */ |
| 5188 int bHaveIncr = 0; |
| 5189 int bIncrOk = (bOptOk |
| 5190 && pCsr->bDesc==pTab->bDescIdx |
| 5191 && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 |
| 5192 #ifdef SQLITE_TEST |
| 5193 && pTab->bNoIncrDoclist==0 |
| 5194 #endif |
| 5195 ); |
| 5196 for(i=0; bIncrOk==1 && i<p->nToken; i++){ |
| 5197 Fts3PhraseToken *pToken = &p->aToken[i]; |
| 5198 if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ |
| 5199 bIncrOk = 0; |
| 5200 } |
| 5201 if( pToken->pSegcsr ) bHaveIncr = 1; |
| 5202 } |
| 5203 |
| 5204 if( bIncrOk && bHaveIncr ){ |
| 5205 /* Use the incremental approach. */ |
| 5206 int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); |
| 5207 for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ |
| 5208 Fts3PhraseToken *pToken = &p->aToken[i]; |
| 5209 Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; |
| 5210 if( pSegcsr ){ |
| 5211 rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); |
| 5212 } |
| 5213 } |
| 5214 p->bIncr = 1; |
| 5215 }else{ |
| 5216 /* Load the full doclist for the phrase into memory. */ |
| 5217 rc = fts3EvalPhraseLoad(pCsr, p); |
| 5218 p->bIncr = 0; |
| 5219 } |
| 5220 |
| 5221 assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); |
| 5222 return rc; |
| 5223 } |
| 5224 |
| 5225 /* |
| 5226 ** This function is used to iterate backwards (from the end to start) |
| 5227 ** through doclists. It is used by this module to iterate through phrase |
| 5228 ** doclists in reverse and by the fts3_write.c module to iterate through |
| 5229 ** pending-terms lists when writing to databases with "order=desc". |
| 5230 ** |
| 5231 ** The doclist may be sorted in ascending (parameter bDescIdx==0) or |
| 5232 ** descending (parameter bDescIdx==1) order of docid. Regardless, this |
| 5233 ** function iterates from the end of the doclist to the beginning. |
| 5234 */ |
| 5235 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev( |
| 5236 int bDescIdx, /* True if the doclist is desc */ |
| 5237 char *aDoclist, /* Pointer to entire doclist */ |
| 5238 int nDoclist, /* Length of aDoclist in bytes */ |
| 5239 char **ppIter, /* IN/OUT: Iterator pointer */ |
| 5240 sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ |
| 5241 int *pnList, /* OUT: List length pointer */ |
| 5242 u8 *pbEof /* OUT: End-of-file flag */ |
| 5243 ){ |
| 5244 char *p = *ppIter; |
| 5245 |
| 5246 assert( nDoclist>0 ); |
| 5247 assert( *pbEof==0 ); |
| 5248 assert( p || *piDocid==0 ); |
| 5249 assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); |
| 5250 |
| 5251 if( p==0 ){ |
| 5252 sqlite3_int64 iDocid = 0; |
| 5253 char *pNext = 0; |
| 5254 char *pDocid = aDoclist; |
| 5255 char *pEnd = &aDoclist[nDoclist]; |
| 5256 int iMul = 1; |
| 5257 |
| 5258 while( pDocid<pEnd ){ |
| 5259 sqlite3_int64 iDelta; |
| 5260 pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta); |
| 5261 iDocid += (iMul * iDelta); |
| 5262 pNext = pDocid; |
| 5263 fts3PoslistCopy(0, &pDocid); |
| 5264 while( pDocid<pEnd && *pDocid==0 ) pDocid++; |
| 5265 iMul = (bDescIdx ? -1 : 1); |
| 5266 } |
| 5267 |
| 5268 *pnList = (int)(pEnd - pNext); |
| 5269 *ppIter = pNext; |
| 5270 *piDocid = iDocid; |
| 5271 }else{ |
| 5272 int iMul = (bDescIdx ? -1 : 1); |
| 5273 sqlite3_int64 iDelta; |
| 5274 fts3GetReverseVarint(&p, aDoclist, &iDelta); |
| 5275 *piDocid -= (iMul * iDelta); |
| 5276 |
| 5277 if( p==aDoclist ){ |
| 5278 *pbEof = 1; |
| 5279 }else{ |
| 5280 char *pSave = p; |
| 5281 fts3ReversePoslist(aDoclist, &p); |
| 5282 *pnList = (int)(pSave - p); |
| 5283 } |
| 5284 *ppIter = p; |
| 5285 } |
| 5286 } |
| 5287 |
| 5288 /* |
| 5289 ** Iterate forwards through a doclist. |
| 5290 */ |
| 5291 SQLITE_PRIVATE void sqlite3Fts3DoclistNext( |
| 5292 int bDescIdx, /* True if the doclist is desc */ |
| 5293 char *aDoclist, /* Pointer to entire doclist */ |
| 5294 int nDoclist, /* Length of aDoclist in bytes */ |
| 5295 char **ppIter, /* IN/OUT: Iterator pointer */ |
| 5296 sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ |
| 5297 u8 *pbEof /* OUT: End-of-file flag */ |
| 5298 ){ |
| 5299 char *p = *ppIter; |
| 5300 |
| 5301 assert( nDoclist>0 ); |
| 5302 assert( *pbEof==0 ); |
| 5303 assert( p || *piDocid==0 ); |
| 5304 assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); |
| 5305 |
| 5306 if( p==0 ){ |
| 5307 p = aDoclist; |
| 5308 p += sqlite3Fts3GetVarint(p, piDocid); |
| 5309 }else{ |
| 5310 fts3PoslistCopy(0, &p); |
| 5311 while( p<&aDoclist[nDoclist] && *p==0 ) p++; |
| 5312 if( p>=&aDoclist[nDoclist] ){ |
| 5313 *pbEof = 1; |
| 5314 }else{ |
| 5315 sqlite3_int64 iVar; |
| 5316 p += sqlite3Fts3GetVarint(p, &iVar); |
| 5317 *piDocid += ((bDescIdx ? -1 : 1) * iVar); |
| 5318 } |
| 5319 } |
| 5320 |
| 5321 *ppIter = p; |
| 5322 } |
| 5323 |
| 5324 /* |
| 5325 ** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof |
| 5326 ** to true if EOF is reached. |
| 5327 */ |
| 5328 static void fts3EvalDlPhraseNext( |
| 5329 Fts3Table *pTab, |
| 5330 Fts3Doclist *pDL, |
| 5331 u8 *pbEof |
| 5332 ){ |
| 5333 char *pIter; /* Used to iterate through aAll */ |
| 5334 char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ |
| 5335 |
| 5336 if( pDL->pNextDocid ){ |
| 5337 pIter = pDL->pNextDocid; |
| 5338 }else{ |
| 5339 pIter = pDL->aAll; |
| 5340 } |
| 5341 |
| 5342 if( pIter>=pEnd ){ |
| 5343 /* We have already reached the end of this doclist. EOF. */ |
| 5344 *pbEof = 1; |
| 5345 }else{ |
| 5346 sqlite3_int64 iDelta; |
| 5347 pIter += sqlite3Fts3GetVarint(pIter, &iDelta); |
| 5348 if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ |
| 5349 pDL->iDocid += iDelta; |
| 5350 }else{ |
| 5351 pDL->iDocid -= iDelta; |
| 5352 } |
| 5353 pDL->pList = pIter; |
| 5354 fts3PoslistCopy(0, &pIter); |
| 5355 pDL->nList = (int)(pIter - pDL->pList); |
| 5356 |
| 5357 /* pIter now points just past the 0x00 that terminates the position- |
| 5358 ** list for document pDL->iDocid. However, if this position-list was |
| 5359 ** edited in place by fts3EvalNearTrim(), then pIter may not actually |
| 5360 ** point to the start of the next docid value. The following line deals |
| 5361 ** with this case by advancing pIter past the zero-padding added by |
| 5362 ** fts3EvalNearTrim(). */ |
| 5363 while( pIter<pEnd && *pIter==0 ) pIter++; |
| 5364 |
| 5365 pDL->pNextDocid = pIter; |
| 5366 assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); |
| 5367 *pbEof = 0; |
| 5368 } |
| 5369 } |
| 5370 |
| 5371 /* |
| 5372 ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). |
| 5373 */ |
| 5374 typedef struct TokenDoclist TokenDoclist; |
| 5375 struct TokenDoclist { |
| 5376 int bIgnore; |
| 5377 sqlite3_int64 iDocid; |
| 5378 char *pList; |
| 5379 int nList; |
| 5380 }; |
| 5381 |
| 5382 /* |
| 5383 ** Token pToken is an incrementally loaded token that is part of a |
| 5384 ** multi-token phrase. Advance it to the next matching document in the |
| 5385 ** database and populate output variable *p with the details of the new |
| 5386 ** entry. Or, if the iterator has reached EOF, set *pbEof to true. |
| 5387 ** |
| 5388 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5389 ** SQLITE_OK. |
| 5390 */ |
| 5391 static int incrPhraseTokenNext( |
| 5392 Fts3Table *pTab, /* Virtual table handle */ |
| 5393 Fts3Phrase *pPhrase, /* Phrase to advance token of */ |
| 5394 int iToken, /* Specific token to advance */ |
| 5395 TokenDoclist *p, /* OUT: Docid and doclist for new entry */ |
| 5396 u8 *pbEof /* OUT: True if iterator is at EOF */ |
| 5397 ){ |
| 5398 int rc = SQLITE_OK; |
| 5399 |
| 5400 if( pPhrase->iDoclistToken==iToken ){ |
| 5401 assert( p->bIgnore==0 ); |
| 5402 assert( pPhrase->aToken[iToken].pSegcsr==0 ); |
| 5403 fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); |
| 5404 p->pList = pPhrase->doclist.pList; |
| 5405 p->nList = pPhrase->doclist.nList; |
| 5406 p->iDocid = pPhrase->doclist.iDocid; |
| 5407 }else{ |
| 5408 Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; |
| 5409 assert( pToken->pDeferred==0 ); |
| 5410 assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); |
| 5411 if( pToken->pSegcsr ){ |
| 5412 assert( p->bIgnore==0 ); |
| 5413 rc = sqlite3Fts3MsrIncrNext( |
| 5414 pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList |
| 5415 ); |
| 5416 if( p->pList==0 ) *pbEof = 1; |
| 5417 }else{ |
| 5418 p->bIgnore = 1; |
| 5419 } |
| 5420 } |
| 5421 |
| 5422 return rc; |
| 5423 } |
| 5424 |
| 5425 |
| 5426 /* |
| 5427 ** The phrase iterator passed as the second argument: |
| 5428 ** |
| 5429 ** * features at least one token that uses an incremental doclist, and |
| 5430 ** |
| 5431 ** * does not contain any deferred tokens. |
| 5432 ** |
| 5433 ** Advance it to the next matching documnent in the database and populate |
| 5434 ** the Fts3Doclist.pList and nList fields. |
| 5435 ** |
| 5436 ** If there is no "next" entry and no error occurs, then *pbEof is set to |
| 5437 ** 1 before returning. Otherwise, if no error occurs and the iterator is |
| 5438 ** successfully advanced, *pbEof is set to 0. |
| 5439 ** |
| 5440 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5441 ** SQLITE_OK. |
| 5442 */ |
| 5443 static int fts3EvalIncrPhraseNext( |
| 5444 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5445 Fts3Phrase *p, /* Phrase object to advance to next docid */ |
| 5446 u8 *pbEof /* OUT: Set to 1 if EOF */ |
| 5447 ){ |
| 5448 int rc = SQLITE_OK; |
| 5449 Fts3Doclist *pDL = &p->doclist; |
| 5450 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5451 u8 bEof = 0; |
| 5452 |
| 5453 /* This is only called if it is guaranteed that the phrase has at least |
| 5454 ** one incremental token. In which case the bIncr flag is set. */ |
| 5455 assert( p->bIncr==1 ); |
| 5456 |
| 5457 if( p->nToken==1 && p->bIncr ){ |
| 5458 rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, |
| 5459 &pDL->iDocid, &pDL->pList, &pDL->nList |
| 5460 ); |
| 5461 if( pDL->pList==0 ) bEof = 1; |
| 5462 }else{ |
| 5463 int bDescDoclist = pCsr->bDesc; |
| 5464 struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; |
| 5465 |
| 5466 memset(a, 0, sizeof(a)); |
| 5467 assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); |
| 5468 assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS ); |
| 5469 |
| 5470 while( bEof==0 ){ |
| 5471 int bMaxSet = 0; |
| 5472 sqlite3_int64 iMax = 0; /* Largest docid for all iterators */ |
| 5473 int i; /* Used to iterate through tokens */ |
| 5474 |
| 5475 /* Advance the iterator for each token in the phrase once. */ |
| 5476 for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){ |
| 5477 rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); |
| 5478 if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ |
| 5479 iMax = a[i].iDocid; |
| 5480 bMaxSet = 1; |
| 5481 } |
| 5482 } |
| 5483 assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) ); |
| 5484 assert( rc!=SQLITE_OK || bMaxSet ); |
| 5485 |
| 5486 /* Keep advancing iterators until they all point to the same document */ |
| 5487 for(i=0; i<p->nToken; i++){ |
| 5488 while( rc==SQLITE_OK && bEof==0 |
| 5489 && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 |
| 5490 ){ |
| 5491 rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); |
| 5492 if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ |
| 5493 iMax = a[i].iDocid; |
| 5494 i = 0; |
| 5495 } |
| 5496 } |
| 5497 } |
| 5498 |
| 5499 /* Check if the current entries really are a phrase match */ |
| 5500 if( bEof==0 ){ |
| 5501 int nList = 0; |
| 5502 int nByte = a[p->nToken-1].nList; |
| 5503 char *aDoclist = sqlite3_malloc(nByte+1); |
| 5504 if( !aDoclist ) return SQLITE_NOMEM; |
| 5505 memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); |
| 5506 |
| 5507 for(i=0; i<(p->nToken-1); i++){ |
| 5508 if( a[i].bIgnore==0 ){ |
| 5509 char *pL = a[i].pList; |
| 5510 char *pR = aDoclist; |
| 5511 char *pOut = aDoclist; |
| 5512 int nDist = p->nToken-1-i; |
| 5513 int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); |
| 5514 if( res==0 ) break; |
| 5515 nList = (int)(pOut - aDoclist); |
| 5516 } |
| 5517 } |
| 5518 if( i==(p->nToken-1) ){ |
| 5519 pDL->iDocid = iMax; |
| 5520 pDL->pList = aDoclist; |
| 5521 pDL->nList = nList; |
| 5522 pDL->bFreeList = 1; |
| 5523 break; |
| 5524 } |
| 5525 sqlite3_free(aDoclist); |
| 5526 } |
| 5527 } |
| 5528 } |
| 5529 |
| 5530 *pbEof = bEof; |
| 5531 return rc; |
| 5532 } |
| 5533 |
| 5534 /* |
| 5535 ** Attempt to move the phrase iterator to point to the next matching docid. |
| 5536 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 5537 ** SQLITE_OK. |
| 5538 ** |
| 5539 ** If there is no "next" entry and no error occurs, then *pbEof is set to |
| 5540 ** 1 before returning. Otherwise, if no error occurs and the iterator is |
| 5541 ** successfully advanced, *pbEof is set to 0. |
| 5542 */ |
| 5543 static int fts3EvalPhraseNext( |
| 5544 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5545 Fts3Phrase *p, /* Phrase object to advance to next docid */ |
| 5546 u8 *pbEof /* OUT: Set to 1 if EOF */ |
| 5547 ){ |
| 5548 int rc = SQLITE_OK; |
| 5549 Fts3Doclist *pDL = &p->doclist; |
| 5550 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5551 |
| 5552 if( p->bIncr ){ |
| 5553 rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); |
| 5554 }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ |
| 5555 sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, |
| 5556 &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof |
| 5557 ); |
| 5558 pDL->pList = pDL->pNextDocid; |
| 5559 }else{ |
| 5560 fts3EvalDlPhraseNext(pTab, pDL, pbEof); |
| 5561 } |
| 5562 |
| 5563 return rc; |
| 5564 } |
| 5565 |
| 5566 /* |
| 5567 ** |
| 5568 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 5569 ** Otherwise, fts3EvalPhraseStart() is called on all phrases within the |
| 5570 ** expression. Also the Fts3Expr.bDeferred variable is set to true for any |
| 5571 ** expressions for which all descendent tokens are deferred. |
| 5572 ** |
| 5573 ** If parameter bOptOk is zero, then it is guaranteed that the |
| 5574 ** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for |
| 5575 ** each phrase in the expression (subject to deferred token processing). |
| 5576 ** Or, if bOptOk is non-zero, then one or more tokens within the expression |
| 5577 ** may be loaded incrementally, meaning doclist.aAll/nAll is not available. |
| 5578 ** |
| 5579 ** If an error occurs within this function, *pRc is set to an SQLite error |
| 5580 ** code before returning. |
| 5581 */ |
| 5582 static void fts3EvalStartReaders( |
| 5583 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5584 Fts3Expr *pExpr, /* Expression to initialize phrases in */ |
| 5585 int *pRc /* IN/OUT: Error code */ |
| 5586 ){ |
| 5587 if( pExpr && SQLITE_OK==*pRc ){ |
| 5588 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 5589 int nToken = pExpr->pPhrase->nToken; |
| 5590 if( nToken ){ |
| 5591 int i; |
| 5592 for(i=0; i<nToken; i++){ |
| 5593 if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; |
| 5594 } |
| 5595 pExpr->bDeferred = (i==nToken); |
| 5596 } |
| 5597 *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); |
| 5598 }else{ |
| 5599 fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); |
| 5600 fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); |
| 5601 pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); |
| 5602 } |
| 5603 } |
| 5604 } |
| 5605 |
| 5606 /* |
| 5607 ** An array of the following structures is assembled as part of the process |
| 5608 ** of selecting tokens to defer before the query starts executing (as part |
| 5609 ** of the xFilter() method). There is one element in the array for each |
| 5610 ** token in the FTS expression. |
| 5611 ** |
| 5612 ** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong |
| 5613 ** to phrases that are connected only by AND and NEAR operators (not OR or |
| 5614 ** NOT). When determining tokens to defer, each AND/NEAR cluster is considered |
| 5615 ** separately. The root of a tokens AND/NEAR cluster is stored in |
| 5616 ** Fts3TokenAndCost.pRoot. |
| 5617 */ |
| 5618 typedef struct Fts3TokenAndCost Fts3TokenAndCost; |
| 5619 struct Fts3TokenAndCost { |
| 5620 Fts3Phrase *pPhrase; /* The phrase the token belongs to */ |
| 5621 int iToken; /* Position of token in phrase */ |
| 5622 Fts3PhraseToken *pToken; /* The token itself */ |
| 5623 Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ |
| 5624 int nOvfl; /* Number of overflow pages to load doclist */ |
| 5625 int iCol; /* The column the token must match */ |
| 5626 }; |
| 5627 |
| 5628 /* |
| 5629 ** This function is used to populate an allocated Fts3TokenAndCost array. |
| 5630 ** |
| 5631 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 5632 ** Otherwise, if an error occurs during execution, *pRc is set to an |
| 5633 ** SQLite error code. |
| 5634 */ |
| 5635 static void fts3EvalTokenCosts( |
| 5636 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5637 Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ |
| 5638 Fts3Expr *pExpr, /* Expression to consider */ |
| 5639 Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ |
| 5640 Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ |
| 5641 int *pRc /* IN/OUT: Error code */ |
| 5642 ){ |
| 5643 if( *pRc==SQLITE_OK ){ |
| 5644 if( pExpr->eType==FTSQUERY_PHRASE ){ |
| 5645 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 5646 int i; |
| 5647 for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ |
| 5648 Fts3TokenAndCost *pTC = (*ppTC)++; |
| 5649 pTC->pPhrase = pPhrase; |
| 5650 pTC->iToken = i; |
| 5651 pTC->pRoot = pRoot; |
| 5652 pTC->pToken = &pPhrase->aToken[i]; |
| 5653 pTC->iCol = pPhrase->iColumn; |
| 5654 *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); |
| 5655 } |
| 5656 }else if( pExpr->eType!=FTSQUERY_NOT ){ |
| 5657 assert( pExpr->eType==FTSQUERY_OR |
| 5658 || pExpr->eType==FTSQUERY_AND |
| 5659 || pExpr->eType==FTSQUERY_NEAR |
| 5660 ); |
| 5661 assert( pExpr->pLeft && pExpr->pRight ); |
| 5662 if( pExpr->eType==FTSQUERY_OR ){ |
| 5663 pRoot = pExpr->pLeft; |
| 5664 **ppOr = pRoot; |
| 5665 (*ppOr)++; |
| 5666 } |
| 5667 fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); |
| 5668 if( pExpr->eType==FTSQUERY_OR ){ |
| 5669 pRoot = pExpr->pRight; |
| 5670 **ppOr = pRoot; |
| 5671 (*ppOr)++; |
| 5672 } |
| 5673 fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); |
| 5674 } |
| 5675 } |
| 5676 } |
| 5677 |
| 5678 /* |
| 5679 ** Determine the average document (row) size in pages. If successful, |
| 5680 ** write this value to *pnPage and return SQLITE_OK. Otherwise, return |
| 5681 ** an SQLite error code. |
| 5682 ** |
| 5683 ** The average document size in pages is calculated by first calculating |
| 5684 ** determining the average size in bytes, B. If B is less than the amount |
| 5685 ** of data that will fit on a single leaf page of an intkey table in |
| 5686 ** this database, then the average docsize is 1. Otherwise, it is 1 plus |
| 5687 ** the number of overflow pages consumed by a record B bytes in size. |
| 5688 */ |
| 5689 static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ |
| 5690 if( pCsr->nRowAvg==0 ){ |
| 5691 /* The average document size, which is required to calculate the cost |
| 5692 ** of each doclist, has not yet been determined. Read the required |
| 5693 ** data from the %_stat table to calculate it. |
| 5694 ** |
| 5695 ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 |
| 5696 ** varints, where nCol is the number of columns in the FTS3 table. |
| 5697 ** The first varint is the number of documents currently stored in |
| 5698 ** the table. The following nCol varints contain the total amount of |
| 5699 ** data stored in all rows of each column of the table, from left |
| 5700 ** to right. |
| 5701 */ |
| 5702 int rc; |
| 5703 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; |
| 5704 sqlite3_stmt *pStmt; |
| 5705 sqlite3_int64 nDoc = 0; |
| 5706 sqlite3_int64 nByte = 0; |
| 5707 const char *pEnd; |
| 5708 const char *a; |
| 5709 |
| 5710 rc = sqlite3Fts3SelectDoctotal(p, &pStmt); |
| 5711 if( rc!=SQLITE_OK ) return rc; |
| 5712 a = sqlite3_column_blob(pStmt, 0); |
| 5713 assert( a ); |
| 5714 |
| 5715 pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; |
| 5716 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 5717 while( a<pEnd ){ |
| 5718 a += sqlite3Fts3GetVarint(a, &nByte); |
| 5719 } |
| 5720 if( nDoc==0 || nByte==0 ){ |
| 5721 sqlite3_reset(pStmt); |
| 5722 return FTS_CORRUPT_VTAB; |
| 5723 } |
| 5724 |
| 5725 pCsr->nDoc = nDoc; |
| 5726 pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); |
| 5727 assert( pCsr->nRowAvg>0 ); |
| 5728 rc = sqlite3_reset(pStmt); |
| 5729 if( rc!=SQLITE_OK ) return rc; |
| 5730 } |
| 5731 |
| 5732 *pnPage = pCsr->nRowAvg; |
| 5733 return SQLITE_OK; |
| 5734 } |
| 5735 |
| 5736 /* |
| 5737 ** This function is called to select the tokens (if any) that will be |
| 5738 ** deferred. The array aTC[] has already been populated when this is |
| 5739 ** called. |
| 5740 ** |
| 5741 ** This function is called once for each AND/NEAR cluster in the |
| 5742 ** expression. Each invocation determines which tokens to defer within |
| 5743 ** the cluster with root node pRoot. See comments above the definition |
| 5744 ** of struct Fts3TokenAndCost for more details. |
| 5745 ** |
| 5746 ** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() |
| 5747 ** called on each token to defer. Otherwise, an SQLite error code is |
| 5748 ** returned. |
| 5749 */ |
| 5750 static int fts3EvalSelectDeferred( |
| 5751 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 5752 Fts3Expr *pRoot, /* Consider tokens with this root node */ |
| 5753 Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ |
| 5754 int nTC /* Number of entries in aTC[] */ |
| 5755 ){ |
| 5756 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5757 int nDocSize = 0; /* Number of pages per doc loaded */ |
| 5758 int rc = SQLITE_OK; /* Return code */ |
| 5759 int ii; /* Iterator variable for various purposes */ |
| 5760 int nOvfl = 0; /* Total overflow pages used by doclists */ |
| 5761 int nToken = 0; /* Total number of tokens in cluster */ |
| 5762 |
| 5763 int nMinEst = 0; /* The minimum count for any phrase so far. */ |
| 5764 int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ |
| 5765 |
| 5766 /* Tokens are never deferred for FTS tables created using the content=xxx |
| 5767 ** option. The reason being that it is not guaranteed that the content |
| 5768 ** table actually contains the same data as the index. To prevent this from |
| 5769 ** causing any problems, the deferred token optimization is completely |
| 5770 ** disabled for content=xxx tables. */ |
| 5771 if( pTab->zContentTbl ){ |
| 5772 return SQLITE_OK; |
| 5773 } |
| 5774 |
| 5775 /* Count the tokens in this AND/NEAR cluster. If none of the doclists |
| 5776 ** associated with the tokens spill onto overflow pages, or if there is |
| 5777 ** only 1 token, exit early. No tokens to defer in this case. */ |
| 5778 for(ii=0; ii<nTC; ii++){ |
| 5779 if( aTC[ii].pRoot==pRoot ){ |
| 5780 nOvfl += aTC[ii].nOvfl; |
| 5781 nToken++; |
| 5782 } |
| 5783 } |
| 5784 if( nOvfl==0 || nToken<2 ) return SQLITE_OK; |
| 5785 |
| 5786 /* Obtain the average docsize (in pages). */ |
| 5787 rc = fts3EvalAverageDocsize(pCsr, &nDocSize); |
| 5788 assert( rc!=SQLITE_OK || nDocSize>0 ); |
| 5789 |
| 5790 |
| 5791 /* Iterate through all tokens in this AND/NEAR cluster, in ascending order |
| 5792 ** of the number of overflow pages that will be loaded by the pager layer |
| 5793 ** to retrieve the entire doclist for the token from the full-text index. |
| 5794 ** Load the doclists for tokens that are either: |
| 5795 ** |
| 5796 ** a. The cheapest token in the entire query (i.e. the one visited by the |
| 5797 ** first iteration of this loop), or |
| 5798 ** |
| 5799 ** b. Part of a multi-token phrase. |
| 5800 ** |
| 5801 ** After each token doclist is loaded, merge it with the others from the |
| 5802 ** same phrase and count the number of documents that the merged doclist |
| 5803 ** contains. Set variable "nMinEst" to the smallest number of documents in |
| 5804 ** any phrase doclist for which 1 or more token doclists have been loaded. |
| 5805 ** Let nOther be the number of other phrases for which it is certain that |
| 5806 ** one or more tokens will not be deferred. |
| 5807 ** |
| 5808 ** Then, for each token, defer it if loading the doclist would result in |
| 5809 ** loading N or more overflow pages into memory, where N is computed as: |
| 5810 ** |
| 5811 ** (nMinEst + 4^nOther - 1) / (4^nOther) |
| 5812 */ |
| 5813 for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){ |
| 5814 int iTC; /* Used to iterate through aTC[] array. */ |
| 5815 Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */ |
| 5816 |
| 5817 /* Set pTC to point to the cheapest remaining token. */ |
| 5818 for(iTC=0; iTC<nTC; iTC++){ |
| 5819 if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot |
| 5820 && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) |
| 5821 ){ |
| 5822 pTC = &aTC[iTC]; |
| 5823 } |
| 5824 } |
| 5825 assert( pTC ); |
| 5826 |
| 5827 if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ |
| 5828 /* The number of overflow pages to load for this (and therefore all |
| 5829 ** subsequent) tokens is greater than the estimated number of pages |
| 5830 ** that will be loaded if all subsequent tokens are deferred. |
| 5831 */ |
| 5832 Fts3PhraseToken *pToken = pTC->pToken; |
| 5833 rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); |
| 5834 fts3SegReaderCursorFree(pToken->pSegcsr); |
| 5835 pToken->pSegcsr = 0; |
| 5836 }else{ |
| 5837 /* Set nLoad4 to the value of (4^nOther) for the next iteration of the |
| 5838 ** for-loop. Except, limit the value to 2^24 to prevent it from |
| 5839 ** overflowing the 32-bit integer it is stored in. */ |
| 5840 if( ii<12 ) nLoad4 = nLoad4*4; |
| 5841 |
| 5842 if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ |
| 5843 /* Either this is the cheapest token in the entire query, or it is |
| 5844 ** part of a multi-token phrase. Either way, the entire doclist will |
| 5845 ** (eventually) be loaded into memory. It may as well be now. */ |
| 5846 Fts3PhraseToken *pToken = pTC->pToken; |
| 5847 int nList = 0; |
| 5848 char *pList = 0; |
| 5849 rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); |
| 5850 assert( rc==SQLITE_OK || pList==0 ); |
| 5851 if( rc==SQLITE_OK ){ |
| 5852 rc = fts3EvalPhraseMergeToken( |
| 5853 pTab, pTC->pPhrase, pTC->iToken,pList,nList |
| 5854 ); |
| 5855 } |
| 5856 if( rc==SQLITE_OK ){ |
| 5857 int nCount; |
| 5858 nCount = fts3DoclistCountDocids( |
| 5859 pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll |
| 5860 ); |
| 5861 if( ii==0 || nCount<nMinEst ) nMinEst = nCount; |
| 5862 } |
| 5863 } |
| 5864 } |
| 5865 pTC->pToken = 0; |
| 5866 } |
| 5867 |
| 5868 return rc; |
| 5869 } |
| 5870 |
| 5871 /* |
| 5872 ** This function is called from within the xFilter method. It initializes |
| 5873 ** the full-text query currently stored in pCsr->pExpr. To iterate through |
| 5874 ** the results of a query, the caller does: |
| 5875 ** |
| 5876 ** fts3EvalStart(pCsr); |
| 5877 ** while( 1 ){ |
| 5878 ** fts3EvalNext(pCsr); |
| 5879 ** if( pCsr->bEof ) break; |
| 5880 ** ... return row pCsr->iPrevId to the caller ... |
| 5881 ** } |
| 5882 */ |
| 5883 static int fts3EvalStart(Fts3Cursor *pCsr){ |
| 5884 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 5885 int rc = SQLITE_OK; |
| 5886 int nToken = 0; |
| 5887 int nOr = 0; |
| 5888 |
| 5889 /* Allocate a MultiSegReader for each token in the expression. */ |
| 5890 fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); |
| 5891 |
| 5892 /* Determine which, if any, tokens in the expression should be deferred. */ |
| 5893 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 5894 if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ |
| 5895 Fts3TokenAndCost *aTC; |
| 5896 Fts3Expr **apOr; |
| 5897 aTC = (Fts3TokenAndCost *)sqlite3_malloc( |
| 5898 sizeof(Fts3TokenAndCost) * nToken |
| 5899 + sizeof(Fts3Expr *) * nOr * 2 |
| 5900 ); |
| 5901 apOr = (Fts3Expr **)&aTC[nToken]; |
| 5902 |
| 5903 if( !aTC ){ |
| 5904 rc = SQLITE_NOMEM; |
| 5905 }else{ |
| 5906 int ii; |
| 5907 Fts3TokenAndCost *pTC = aTC; |
| 5908 Fts3Expr **ppOr = apOr; |
| 5909 |
| 5910 fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); |
| 5911 nToken = (int)(pTC-aTC); |
| 5912 nOr = (int)(ppOr-apOr); |
| 5913 |
| 5914 if( rc==SQLITE_OK ){ |
| 5915 rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); |
| 5916 for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){ |
| 5917 rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken); |
| 5918 } |
| 5919 } |
| 5920 |
| 5921 sqlite3_free(aTC); |
| 5922 } |
| 5923 } |
| 5924 #endif |
| 5925 |
| 5926 fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); |
| 5927 return rc; |
| 5928 } |
| 5929 |
| 5930 /* |
| 5931 ** Invalidate the current position list for phrase pPhrase. |
| 5932 */ |
| 5933 static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ |
| 5934 if( pPhrase->doclist.bFreeList ){ |
| 5935 sqlite3_free(pPhrase->doclist.pList); |
| 5936 } |
| 5937 pPhrase->doclist.pList = 0; |
| 5938 pPhrase->doclist.nList = 0; |
| 5939 pPhrase->doclist.bFreeList = 0; |
| 5940 } |
| 5941 |
| 5942 /* |
| 5943 ** This function is called to edit the position list associated with |
| 5944 ** the phrase object passed as the fifth argument according to a NEAR |
| 5945 ** condition. For example: |
| 5946 ** |
| 5947 ** abc NEAR/5 "def ghi" |
| 5948 ** |
| 5949 ** Parameter nNear is passed the NEAR distance of the expression (5 in |
| 5950 ** the example above). When this function is called, *paPoslist points to |
| 5951 ** the position list, and *pnToken is the number of phrase tokens in, the |
| 5952 ** phrase on the other side of the NEAR operator to pPhrase. For example, |
| 5953 ** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to |
| 5954 ** the position list associated with phrase "abc". |
| 5955 ** |
| 5956 ** All positions in the pPhrase position list that are not sufficiently |
| 5957 ** close to a position in the *paPoslist position list are removed. If this |
| 5958 ** leaves 0 positions, zero is returned. Otherwise, non-zero. |
| 5959 ** |
| 5960 ** Before returning, *paPoslist is set to point to the position lsit |
| 5961 ** associated with pPhrase. And *pnToken is set to the number of tokens in |
| 5962 ** pPhrase. |
| 5963 */ |
| 5964 static int fts3EvalNearTrim( |
| 5965 int nNear, /* NEAR distance. As in "NEAR/nNear". */ |
| 5966 char *aTmp, /* Temporary space to use */ |
| 5967 char **paPoslist, /* IN/OUT: Position list */ |
| 5968 int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ |
| 5969 Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ |
| 5970 ){ |
| 5971 int nParam1 = nNear + pPhrase->nToken; |
| 5972 int nParam2 = nNear + *pnToken; |
| 5973 int nNew; |
| 5974 char *p2; |
| 5975 char *pOut; |
| 5976 int res; |
| 5977 |
| 5978 assert( pPhrase->doclist.pList ); |
| 5979 |
| 5980 p2 = pOut = pPhrase->doclist.pList; |
| 5981 res = fts3PoslistNearMerge( |
| 5982 &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 |
| 5983 ); |
| 5984 if( res ){ |
| 5985 nNew = (int)(pOut - pPhrase->doclist.pList) - 1; |
| 5986 assert( pPhrase->doclist.pList[nNew]=='\0' ); |
| 5987 assert( nNew<=pPhrase->doclist.nList && nNew>0 ); |
| 5988 memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); |
| 5989 pPhrase->doclist.nList = nNew; |
| 5990 *paPoslist = pPhrase->doclist.pList; |
| 5991 *pnToken = pPhrase->nToken; |
| 5992 } |
| 5993 |
| 5994 return res; |
| 5995 } |
| 5996 |
| 5997 /* |
| 5998 ** This function is a no-op if *pRc is other than SQLITE_OK when it is called. |
| 5999 ** Otherwise, it advances the expression passed as the second argument to |
| 6000 ** point to the next matching row in the database. Expressions iterate through |
| 6001 ** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, |
| 6002 ** or descending if it is non-zero. |
| 6003 ** |
| 6004 ** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if |
| 6005 ** successful, the following variables in pExpr are set: |
| 6006 ** |
| 6007 ** Fts3Expr.bEof (non-zero if EOF - there is no next row) |
| 6008 ** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) |
| 6009 ** |
| 6010 ** If the expression is of type FTSQUERY_PHRASE, and the expression is not |
| 6011 ** at EOF, then the following variables are populated with the position list |
| 6012 ** for the phrase for the visited row: |
| 6013 ** |
| 6014 ** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) |
| 6015 ** FTs3Expr.pPhrase->doclist.pList (pointer to position list) |
| 6016 ** |
| 6017 ** It says above that this function advances the expression to the next |
| 6018 ** matching row. This is usually true, but there are the following exceptions: |
| 6019 ** |
| 6020 ** 1. Deferred tokens are not taken into account. If a phrase consists |
| 6021 ** entirely of deferred tokens, it is assumed to match every row in |
| 6022 ** the db. In this case the position-list is not populated at all. |
| 6023 ** |
| 6024 ** Or, if a phrase contains one or more deferred tokens and one or |
| 6025 ** more non-deferred tokens, then the expression is advanced to the |
| 6026 ** next possible match, considering only non-deferred tokens. In other |
| 6027 ** words, if the phrase is "A B C", and "B" is deferred, the expression |
| 6028 ** is advanced to the next row that contains an instance of "A * C", |
| 6029 ** where "*" may match any single token. The position list in this case |
| 6030 ** is populated as for "A * C" before returning. |
| 6031 ** |
| 6032 ** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is |
| 6033 ** advanced to point to the next row that matches "x AND y". |
| 6034 ** |
| 6035 ** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is |
| 6036 ** really a match, taking into account deferred tokens and NEAR operators. |
| 6037 */ |
| 6038 static void fts3EvalNextRow( |
| 6039 Fts3Cursor *pCsr, /* FTS Cursor handle */ |
| 6040 Fts3Expr *pExpr, /* Expr. to advance to next matching row */ |
| 6041 int *pRc /* IN/OUT: Error code */ |
| 6042 ){ |
| 6043 if( *pRc==SQLITE_OK ){ |
| 6044 int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ |
| 6045 assert( pExpr->bEof==0 ); |
| 6046 pExpr->bStart = 1; |
| 6047 |
| 6048 switch( pExpr->eType ){ |
| 6049 case FTSQUERY_NEAR: |
| 6050 case FTSQUERY_AND: { |
| 6051 Fts3Expr *pLeft = pExpr->pLeft; |
| 6052 Fts3Expr *pRight = pExpr->pRight; |
| 6053 assert( !pLeft->bDeferred || !pRight->bDeferred ); |
| 6054 |
| 6055 if( pLeft->bDeferred ){ |
| 6056 /* LHS is entirely deferred. So we assume it matches every row. |
| 6057 ** Advance the RHS iterator to find the next row visited. */ |
| 6058 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6059 pExpr->iDocid = pRight->iDocid; |
| 6060 pExpr->bEof = pRight->bEof; |
| 6061 }else if( pRight->bDeferred ){ |
| 6062 /* RHS is entirely deferred. So we assume it matches every row. |
| 6063 ** Advance the LHS iterator to find the next row visited. */ |
| 6064 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6065 pExpr->iDocid = pLeft->iDocid; |
| 6066 pExpr->bEof = pLeft->bEof; |
| 6067 }else{ |
| 6068 /* Neither the RHS or LHS are deferred. */ |
| 6069 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6070 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6071 while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ |
| 6072 sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6073 if( iDiff==0 ) break; |
| 6074 if( iDiff<0 ){ |
| 6075 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6076 }else{ |
| 6077 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6078 } |
| 6079 } |
| 6080 pExpr->iDocid = pLeft->iDocid; |
| 6081 pExpr->bEof = (pLeft->bEof || pRight->bEof); |
| 6082 if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){ |
| 6083 if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){ |
| 6084 Fts3Doclist *pDl = &pRight->pPhrase->doclist; |
| 6085 while( *pRc==SQLITE_OK && pRight->bEof==0 ){ |
| 6086 memset(pDl->pList, 0, pDl->nList); |
| 6087 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6088 } |
| 6089 } |
| 6090 if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){ |
| 6091 Fts3Doclist *pDl = &pLeft->pPhrase->doclist; |
| 6092 while( *pRc==SQLITE_OK && pLeft->bEof==0 ){ |
| 6093 memset(pDl->pList, 0, pDl->nList); |
| 6094 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6095 } |
| 6096 } |
| 6097 } |
| 6098 } |
| 6099 break; |
| 6100 } |
| 6101 |
| 6102 case FTSQUERY_OR: { |
| 6103 Fts3Expr *pLeft = pExpr->pLeft; |
| 6104 Fts3Expr *pRight = pExpr->pRight; |
| 6105 sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6106 |
| 6107 assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); |
| 6108 assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); |
| 6109 |
| 6110 if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ |
| 6111 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6112 }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ |
| 6113 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6114 }else{ |
| 6115 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6116 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6117 } |
| 6118 |
| 6119 pExpr->bEof = (pLeft->bEof && pRight->bEof); |
| 6120 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); |
| 6121 if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ |
| 6122 pExpr->iDocid = pLeft->iDocid; |
| 6123 }else{ |
| 6124 pExpr->iDocid = pRight->iDocid; |
| 6125 } |
| 6126 |
| 6127 break; |
| 6128 } |
| 6129 |
| 6130 case FTSQUERY_NOT: { |
| 6131 Fts3Expr *pLeft = pExpr->pLeft; |
| 6132 Fts3Expr *pRight = pExpr->pRight; |
| 6133 |
| 6134 if( pRight->bStart==0 ){ |
| 6135 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6136 assert( *pRc!=SQLITE_OK || pRight->bStart ); |
| 6137 } |
| 6138 |
| 6139 fts3EvalNextRow(pCsr, pLeft, pRc); |
| 6140 if( pLeft->bEof==0 ){ |
| 6141 while( !*pRc |
| 6142 && !pRight->bEof |
| 6143 && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 |
| 6144 ){ |
| 6145 fts3EvalNextRow(pCsr, pRight, pRc); |
| 6146 } |
| 6147 } |
| 6148 pExpr->iDocid = pLeft->iDocid; |
| 6149 pExpr->bEof = pLeft->bEof; |
| 6150 break; |
| 6151 } |
| 6152 |
| 6153 default: { |
| 6154 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6155 fts3EvalInvalidatePoslist(pPhrase); |
| 6156 *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); |
| 6157 pExpr->iDocid = pPhrase->doclist.iDocid; |
| 6158 break; |
| 6159 } |
| 6160 } |
| 6161 } |
| 6162 } |
| 6163 |
| 6164 /* |
| 6165 ** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR |
| 6166 ** cluster, then this function returns 1 immediately. |
| 6167 ** |
| 6168 ** Otherwise, it checks if the current row really does match the NEAR |
| 6169 ** expression, using the data currently stored in the position lists |
| 6170 ** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. |
| 6171 ** |
| 6172 ** If the current row is a match, the position list associated with each |
| 6173 ** phrase in the NEAR expression is edited in place to contain only those |
| 6174 ** phrase instances sufficiently close to their peers to satisfy all NEAR |
| 6175 ** constraints. In this case it returns 1. If the NEAR expression does not |
| 6176 ** match the current row, 0 is returned. The position lists may or may not |
| 6177 ** be edited if 0 is returned. |
| 6178 */ |
| 6179 static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ |
| 6180 int res = 1; |
| 6181 |
| 6182 /* The following block runs if pExpr is the root of a NEAR query. |
| 6183 ** For example, the query: |
| 6184 ** |
| 6185 ** "w" NEAR "x" NEAR "y" NEAR "z" |
| 6186 ** |
| 6187 ** which is represented in tree form as: |
| 6188 ** |
| 6189 ** | |
| 6190 ** +--NEAR--+ <-- root of NEAR query |
| 6191 ** | | |
| 6192 ** +--NEAR--+ "z" |
| 6193 ** | | |
| 6194 ** +--NEAR--+ "y" |
| 6195 ** | | |
| 6196 ** "w" "x" |
| 6197 ** |
| 6198 ** The right-hand child of a NEAR node is always a phrase. The |
| 6199 ** left-hand child may be either a phrase or a NEAR node. There are |
| 6200 ** no exceptions to this - it's the way the parser in fts3_expr.c works. |
| 6201 */ |
| 6202 if( *pRc==SQLITE_OK |
| 6203 && pExpr->eType==FTSQUERY_NEAR |
| 6204 && pExpr->bEof==0 |
| 6205 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) |
| 6206 ){ |
| 6207 Fts3Expr *p; |
| 6208 int nTmp = 0; /* Bytes of temp space */ |
| 6209 char *aTmp; /* Temp space for PoslistNearMerge() */ |
| 6210 |
| 6211 /* Allocate temporary working space. */ |
| 6212 for(p=pExpr; p->pLeft; p=p->pLeft){ |
| 6213 nTmp += p->pRight->pPhrase->doclist.nList; |
| 6214 } |
| 6215 nTmp += p->pPhrase->doclist.nList; |
| 6216 if( nTmp==0 ){ |
| 6217 res = 0; |
| 6218 }else{ |
| 6219 aTmp = sqlite3_malloc(nTmp*2); |
| 6220 if( !aTmp ){ |
| 6221 *pRc = SQLITE_NOMEM; |
| 6222 res = 0; |
| 6223 }else{ |
| 6224 char *aPoslist = p->pPhrase->doclist.pList; |
| 6225 int nToken = p->pPhrase->nToken; |
| 6226 |
| 6227 for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ |
| 6228 Fts3Phrase *pPhrase = p->pRight->pPhrase; |
| 6229 int nNear = p->nNear; |
| 6230 res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); |
| 6231 } |
| 6232 |
| 6233 aPoslist = pExpr->pRight->pPhrase->doclist.pList; |
| 6234 nToken = pExpr->pRight->pPhrase->nToken; |
| 6235 for(p=pExpr->pLeft; p && res; p=p->pLeft){ |
| 6236 int nNear; |
| 6237 Fts3Phrase *pPhrase; |
| 6238 assert( p->pParent && p->pParent->pLeft==p ); |
| 6239 nNear = p->pParent->nNear; |
| 6240 pPhrase = ( |
| 6241 p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase |
| 6242 ); |
| 6243 res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); |
| 6244 } |
| 6245 } |
| 6246 |
| 6247 sqlite3_free(aTmp); |
| 6248 } |
| 6249 } |
| 6250 |
| 6251 return res; |
| 6252 } |
| 6253 |
| 6254 /* |
| 6255 ** This function is a helper function for sqlite3Fts3EvalTestDeferred(). |
| 6256 ** Assuming no error occurs or has occurred, It returns non-zero if the |
| 6257 ** expression passed as the second argument matches the row that pCsr |
| 6258 ** currently points to, or zero if it does not. |
| 6259 ** |
| 6260 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 6261 ** If an error occurs during execution of this function, *pRc is set to |
| 6262 ** the appropriate SQLite error code. In this case the returned value is |
| 6263 ** undefined. |
| 6264 */ |
| 6265 static int fts3EvalTestExpr( |
| 6266 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 6267 Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ |
| 6268 int *pRc /* IN/OUT: Error code */ |
| 6269 ){ |
| 6270 int bHit = 1; /* Return value */ |
| 6271 if( *pRc==SQLITE_OK ){ |
| 6272 switch( pExpr->eType ){ |
| 6273 case FTSQUERY_NEAR: |
| 6274 case FTSQUERY_AND: |
| 6275 bHit = ( |
| 6276 fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) |
| 6277 && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) |
| 6278 && fts3EvalNearTest(pExpr, pRc) |
| 6279 ); |
| 6280 |
| 6281 /* If the NEAR expression does not match any rows, zero the doclist for |
| 6282 ** all phrases involved in the NEAR. This is because the snippet(), |
| 6283 ** offsets() and matchinfo() functions are not supposed to recognize |
| 6284 ** any instances of phrases that are part of unmatched NEAR queries. |
| 6285 ** For example if this expression: |
| 6286 ** |
| 6287 ** ... MATCH 'a OR (b NEAR c)' |
| 6288 ** |
| 6289 ** is matched against a row containing: |
| 6290 ** |
| 6291 ** 'a b d e' |
| 6292 ** |
| 6293 ** then any snippet() should ony highlight the "a" term, not the "b" |
| 6294 ** (as "b" is part of a non-matching NEAR clause). |
| 6295 */ |
| 6296 if( bHit==0 |
| 6297 && pExpr->eType==FTSQUERY_NEAR |
| 6298 && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) |
| 6299 ){ |
| 6300 Fts3Expr *p; |
| 6301 for(p=pExpr; p->pPhrase==0; p=p->pLeft){ |
| 6302 if( p->pRight->iDocid==pCsr->iPrevId ){ |
| 6303 fts3EvalInvalidatePoslist(p->pRight->pPhrase); |
| 6304 } |
| 6305 } |
| 6306 if( p->iDocid==pCsr->iPrevId ){ |
| 6307 fts3EvalInvalidatePoslist(p->pPhrase); |
| 6308 } |
| 6309 } |
| 6310 |
| 6311 break; |
| 6312 |
| 6313 case FTSQUERY_OR: { |
| 6314 int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); |
| 6315 int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); |
| 6316 bHit = bHit1 || bHit2; |
| 6317 break; |
| 6318 } |
| 6319 |
| 6320 case FTSQUERY_NOT: |
| 6321 bHit = ( |
| 6322 fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) |
| 6323 && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) |
| 6324 ); |
| 6325 break; |
| 6326 |
| 6327 default: { |
| 6328 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 6329 if( pCsr->pDeferred |
| 6330 && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) |
| 6331 ){ |
| 6332 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6333 assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); |
| 6334 if( pExpr->bDeferred ){ |
| 6335 fts3EvalInvalidatePoslist(pPhrase); |
| 6336 } |
| 6337 *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); |
| 6338 bHit = (pPhrase->doclist.pList!=0); |
| 6339 pExpr->iDocid = pCsr->iPrevId; |
| 6340 }else |
| 6341 #endif |
| 6342 { |
| 6343 bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); |
| 6344 } |
| 6345 break; |
| 6346 } |
| 6347 } |
| 6348 } |
| 6349 return bHit; |
| 6350 } |
| 6351 |
| 6352 /* |
| 6353 ** This function is called as the second part of each xNext operation when |
| 6354 ** iterating through the results of a full-text query. At this point the |
| 6355 ** cursor points to a row that matches the query expression, with the |
| 6356 ** following caveats: |
| 6357 ** |
| 6358 ** * Up until this point, "NEAR" operators in the expression have been |
| 6359 ** treated as "AND". |
| 6360 ** |
| 6361 ** * Deferred tokens have not yet been considered. |
| 6362 ** |
| 6363 ** If *pRc is not SQLITE_OK when this function is called, it immediately |
| 6364 ** returns 0. Otherwise, it tests whether or not after considering NEAR |
| 6365 ** operators and deferred tokens the current row is still a match for the |
| 6366 ** expression. It returns 1 if both of the following are true: |
| 6367 ** |
| 6368 ** 1. *pRc is SQLITE_OK when this function returns, and |
| 6369 ** |
| 6370 ** 2. After scanning the current FTS table row for the deferred tokens, |
| 6371 ** it is determined that the row does *not* match the query. |
| 6372 ** |
| 6373 ** Or, if no error occurs and it seems the current row does match the FTS |
| 6374 ** query, return 0. |
| 6375 */ |
| 6376 SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){ |
| 6377 int rc = *pRc; |
| 6378 int bMiss = 0; |
| 6379 if( rc==SQLITE_OK ){ |
| 6380 |
| 6381 /* If there are one or more deferred tokens, load the current row into |
| 6382 ** memory and scan it to determine the position list for each deferred |
| 6383 ** token. Then, see if this row is really a match, considering deferred |
| 6384 ** tokens and NEAR operators (neither of which were taken into account |
| 6385 ** earlier, by fts3EvalNextRow()). |
| 6386 */ |
| 6387 if( pCsr->pDeferred ){ |
| 6388 rc = fts3CursorSeek(0, pCsr); |
| 6389 if( rc==SQLITE_OK ){ |
| 6390 rc = sqlite3Fts3CacheDeferredDoclists(pCsr); |
| 6391 } |
| 6392 } |
| 6393 bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); |
| 6394 |
| 6395 /* Free the position-lists accumulated for each deferred token above. */ |
| 6396 sqlite3Fts3FreeDeferredDoclists(pCsr); |
| 6397 *pRc = rc; |
| 6398 } |
| 6399 return (rc==SQLITE_OK && bMiss); |
| 6400 } |
| 6401 |
| 6402 /* |
| 6403 ** Advance to the next document that matches the FTS expression in |
| 6404 ** Fts3Cursor.pExpr. |
| 6405 */ |
| 6406 static int fts3EvalNext(Fts3Cursor *pCsr){ |
| 6407 int rc = SQLITE_OK; /* Return Code */ |
| 6408 Fts3Expr *pExpr = pCsr->pExpr; |
| 6409 assert( pCsr->isEof==0 ); |
| 6410 if( pExpr==0 ){ |
| 6411 pCsr->isEof = 1; |
| 6412 }else{ |
| 6413 do { |
| 6414 if( pCsr->isRequireSeek==0 ){ |
| 6415 sqlite3_reset(pCsr->pStmt); |
| 6416 } |
| 6417 assert( sqlite3_data_count(pCsr->pStmt)==0 ); |
| 6418 fts3EvalNextRow(pCsr, pExpr, &rc); |
| 6419 pCsr->isEof = pExpr->bEof; |
| 6420 pCsr->isRequireSeek = 1; |
| 6421 pCsr->isMatchinfoNeeded = 1; |
| 6422 pCsr->iPrevId = pExpr->iDocid; |
| 6423 }while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); |
| 6424 } |
| 6425 |
| 6426 /* Check if the cursor is past the end of the docid range specified |
| 6427 ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ |
| 6428 if( rc==SQLITE_OK && ( |
| 6429 (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) |
| 6430 || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid) |
| 6431 )){ |
| 6432 pCsr->isEof = 1; |
| 6433 } |
| 6434 |
| 6435 return rc; |
| 6436 } |
| 6437 |
| 6438 /* |
| 6439 ** Restart interation for expression pExpr so that the next call to |
| 6440 ** fts3EvalNext() visits the first row. Do not allow incremental |
| 6441 ** loading or merging of phrase doclists for this iteration. |
| 6442 ** |
| 6443 ** If *pRc is other than SQLITE_OK when this function is called, it is |
| 6444 ** a no-op. If an error occurs within this function, *pRc is set to an |
| 6445 ** SQLite error code before returning. |
| 6446 */ |
| 6447 static void fts3EvalRestart( |
| 6448 Fts3Cursor *pCsr, |
| 6449 Fts3Expr *pExpr, |
| 6450 int *pRc |
| 6451 ){ |
| 6452 if( pExpr && *pRc==SQLITE_OK ){ |
| 6453 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6454 |
| 6455 if( pPhrase ){ |
| 6456 fts3EvalInvalidatePoslist(pPhrase); |
| 6457 if( pPhrase->bIncr ){ |
| 6458 int i; |
| 6459 for(i=0; i<pPhrase->nToken; i++){ |
| 6460 Fts3PhraseToken *pToken = &pPhrase->aToken[i]; |
| 6461 assert( pToken->pDeferred==0 ); |
| 6462 if( pToken->pSegcsr ){ |
| 6463 sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); |
| 6464 } |
| 6465 } |
| 6466 *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); |
| 6467 } |
| 6468 pPhrase->doclist.pNextDocid = 0; |
| 6469 pPhrase->doclist.iDocid = 0; |
| 6470 pPhrase->pOrPoslist = 0; |
| 6471 } |
| 6472 |
| 6473 pExpr->iDocid = 0; |
| 6474 pExpr->bEof = 0; |
| 6475 pExpr->bStart = 0; |
| 6476 |
| 6477 fts3EvalRestart(pCsr, pExpr->pLeft, pRc); |
| 6478 fts3EvalRestart(pCsr, pExpr->pRight, pRc); |
| 6479 } |
| 6480 } |
| 6481 |
| 6482 /* |
| 6483 ** After allocating the Fts3Expr.aMI[] array for each phrase in the |
| 6484 ** expression rooted at pExpr, the cursor iterates through all rows matched |
| 6485 ** by pExpr, calling this function for each row. This function increments |
| 6486 ** the values in Fts3Expr.aMI[] according to the position-list currently |
| 6487 ** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase |
| 6488 ** expression nodes. |
| 6489 */ |
| 6490 static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ |
| 6491 if( pExpr ){ |
| 6492 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6493 if( pPhrase && pPhrase->doclist.pList ){ |
| 6494 int iCol = 0; |
| 6495 char *p = pPhrase->doclist.pList; |
| 6496 |
| 6497 assert( *p ); |
| 6498 while( 1 ){ |
| 6499 u8 c = 0; |
| 6500 int iCnt = 0; |
| 6501 while( 0xFE & (*p | c) ){ |
| 6502 if( (c&0x80)==0 ) iCnt++; |
| 6503 c = *p++ & 0x80; |
| 6504 } |
| 6505 |
| 6506 /* aMI[iCol*3 + 1] = Number of occurrences |
| 6507 ** aMI[iCol*3 + 2] = Number of rows containing at least one instance |
| 6508 */ |
| 6509 pExpr->aMI[iCol*3 + 1] += iCnt; |
| 6510 pExpr->aMI[iCol*3 + 2] += (iCnt>0); |
| 6511 if( *p==0x00 ) break; |
| 6512 p++; |
| 6513 p += fts3GetVarint32(p, &iCol); |
| 6514 } |
| 6515 } |
| 6516 |
| 6517 fts3EvalUpdateCounts(pExpr->pLeft); |
| 6518 fts3EvalUpdateCounts(pExpr->pRight); |
| 6519 } |
| 6520 } |
| 6521 |
| 6522 /* |
| 6523 ** Expression pExpr must be of type FTSQUERY_PHRASE. |
| 6524 ** |
| 6525 ** If it is not already allocated and populated, this function allocates and |
| 6526 ** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part |
| 6527 ** of a NEAR expression, then it also allocates and populates the same array |
| 6528 ** for all other phrases that are part of the NEAR expression. |
| 6529 ** |
| 6530 ** SQLITE_OK is returned if the aMI[] array is successfully allocated and |
| 6531 ** populated. Otherwise, if an error occurs, an SQLite error code is returned. |
| 6532 */ |
| 6533 static int fts3EvalGatherStats( |
| 6534 Fts3Cursor *pCsr, /* Cursor object */ |
| 6535 Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ |
| 6536 ){ |
| 6537 int rc = SQLITE_OK; /* Return code */ |
| 6538 |
| 6539 assert( pExpr->eType==FTSQUERY_PHRASE ); |
| 6540 if( pExpr->aMI==0 ){ |
| 6541 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6542 Fts3Expr *pRoot; /* Root of NEAR expression */ |
| 6543 Fts3Expr *p; /* Iterator used for several purposes */ |
| 6544 |
| 6545 sqlite3_int64 iPrevId = pCsr->iPrevId; |
| 6546 sqlite3_int64 iDocid; |
| 6547 u8 bEof; |
| 6548 |
| 6549 /* Find the root of the NEAR expression */ |
| 6550 pRoot = pExpr; |
| 6551 while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ |
| 6552 pRoot = pRoot->pParent; |
| 6553 } |
| 6554 iDocid = pRoot->iDocid; |
| 6555 bEof = pRoot->bEof; |
| 6556 assert( pRoot->bStart ); |
| 6557 |
| 6558 /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ |
| 6559 for(p=pRoot; p; p=p->pLeft){ |
| 6560 Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); |
| 6561 assert( pE->aMI==0 ); |
| 6562 pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); |
| 6563 if( !pE->aMI ) return SQLITE_NOMEM; |
| 6564 memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); |
| 6565 } |
| 6566 |
| 6567 fts3EvalRestart(pCsr, pRoot, &rc); |
| 6568 |
| 6569 while( pCsr->isEof==0 && rc==SQLITE_OK ){ |
| 6570 |
| 6571 do { |
| 6572 /* Ensure the %_content statement is reset. */ |
| 6573 if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); |
| 6574 assert( sqlite3_data_count(pCsr->pStmt)==0 ); |
| 6575 |
| 6576 /* Advance to the next document */ |
| 6577 fts3EvalNextRow(pCsr, pRoot, &rc); |
| 6578 pCsr->isEof = pRoot->bEof; |
| 6579 pCsr->isRequireSeek = 1; |
| 6580 pCsr->isMatchinfoNeeded = 1; |
| 6581 pCsr->iPrevId = pRoot->iDocid; |
| 6582 }while( pCsr->isEof==0 |
| 6583 && pRoot->eType==FTSQUERY_NEAR |
| 6584 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) |
| 6585 ); |
| 6586 |
| 6587 if( rc==SQLITE_OK && pCsr->isEof==0 ){ |
| 6588 fts3EvalUpdateCounts(pRoot); |
| 6589 } |
| 6590 } |
| 6591 |
| 6592 pCsr->isEof = 0; |
| 6593 pCsr->iPrevId = iPrevId; |
| 6594 |
| 6595 if( bEof ){ |
| 6596 pRoot->bEof = bEof; |
| 6597 }else{ |
| 6598 /* Caution: pRoot may iterate through docids in ascending or descending |
| 6599 ** order. For this reason, even though it seems more defensive, the |
| 6600 ** do loop can not be written: |
| 6601 ** |
| 6602 ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK ); |
| 6603 */ |
| 6604 fts3EvalRestart(pCsr, pRoot, &rc); |
| 6605 do { |
| 6606 fts3EvalNextRow(pCsr, pRoot, &rc); |
| 6607 assert( pRoot->bEof==0 ); |
| 6608 }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); |
| 6609 } |
| 6610 } |
| 6611 return rc; |
| 6612 } |
| 6613 |
| 6614 /* |
| 6615 ** This function is used by the matchinfo() module to query a phrase |
| 6616 ** expression node for the following information: |
| 6617 ** |
| 6618 ** 1. The total number of occurrences of the phrase in each column of |
| 6619 ** the FTS table (considering all rows), and |
| 6620 ** |
| 6621 ** 2. For each column, the number of rows in the table for which the |
| 6622 ** column contains at least one instance of the phrase. |
| 6623 ** |
| 6624 ** If no error occurs, SQLITE_OK is returned and the values for each column |
| 6625 ** written into the array aiOut as follows: |
| 6626 ** |
| 6627 ** aiOut[iCol*3 + 1] = Number of occurrences |
| 6628 ** aiOut[iCol*3 + 2] = Number of rows containing at least one instance |
| 6629 ** |
| 6630 ** Caveats: |
| 6631 ** |
| 6632 ** * If a phrase consists entirely of deferred tokens, then all output |
| 6633 ** values are set to the number of documents in the table. In other |
| 6634 ** words we assume that very common tokens occur exactly once in each |
| 6635 ** column of each row of the table. |
| 6636 ** |
| 6637 ** * If a phrase contains some deferred tokens (and some non-deferred |
| 6638 ** tokens), count the potential occurrence identified by considering |
| 6639 ** the non-deferred tokens instead of actual phrase occurrences. |
| 6640 ** |
| 6641 ** * If the phrase is part of a NEAR expression, then only phrase instances |
| 6642 ** that meet the NEAR constraint are included in the counts. |
| 6643 */ |
| 6644 SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats( |
| 6645 Fts3Cursor *pCsr, /* FTS cursor handle */ |
| 6646 Fts3Expr *pExpr, /* Phrase expression */ |
| 6647 u32 *aiOut /* Array to write results into (see above) */ |
| 6648 ){ |
| 6649 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6650 int rc = SQLITE_OK; |
| 6651 int iCol; |
| 6652 |
| 6653 if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ |
| 6654 assert( pCsr->nDoc>0 ); |
| 6655 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 6656 aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; |
| 6657 aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; |
| 6658 } |
| 6659 }else{ |
| 6660 rc = fts3EvalGatherStats(pCsr, pExpr); |
| 6661 if( rc==SQLITE_OK ){ |
| 6662 assert( pExpr->aMI ); |
| 6663 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 6664 aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; |
| 6665 aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; |
| 6666 } |
| 6667 } |
| 6668 } |
| 6669 |
| 6670 return rc; |
| 6671 } |
| 6672 |
| 6673 /* |
| 6674 ** The expression pExpr passed as the second argument to this function |
| 6675 ** must be of type FTSQUERY_PHRASE. |
| 6676 ** |
| 6677 ** The returned value is either NULL or a pointer to a buffer containing |
| 6678 ** a position-list indicating the occurrences of the phrase in column iCol |
| 6679 ** of the current row. |
| 6680 ** |
| 6681 ** More specifically, the returned buffer contains 1 varint for each |
| 6682 ** occurrence of the phrase in the column, stored using the normal (delta+2) |
| 6683 ** compression and is terminated by either an 0x01 or 0x00 byte. For example, |
| 6684 ** if the requested column contains "a b X c d X X" and the position-list |
| 6685 ** for 'X' is requested, the buffer returned may contain: |
| 6686 ** |
| 6687 ** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 |
| 6688 ** |
| 6689 ** This function works regardless of whether or not the phrase is deferred, |
| 6690 ** incremental, or neither. |
| 6691 */ |
| 6692 SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( |
| 6693 Fts3Cursor *pCsr, /* FTS3 cursor object */ |
| 6694 Fts3Expr *pExpr, /* Phrase to return doclist for */ |
| 6695 int iCol, /* Column to return position list for */ |
| 6696 char **ppOut /* OUT: Pointer to position list */ |
| 6697 ){ |
| 6698 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 6699 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 6700 char *pIter; |
| 6701 int iThis; |
| 6702 sqlite3_int64 iDocid; |
| 6703 |
| 6704 /* If this phrase is applies specifically to some column other than |
| 6705 ** column iCol, return a NULL pointer. */ |
| 6706 *ppOut = 0; |
| 6707 assert( iCol>=0 && iCol<pTab->nColumn ); |
| 6708 if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){ |
| 6709 return SQLITE_OK; |
| 6710 } |
| 6711 |
| 6712 iDocid = pExpr->iDocid; |
| 6713 pIter = pPhrase->doclist.pList; |
| 6714 if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ |
| 6715 int rc = SQLITE_OK; |
| 6716 int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ |
| 6717 int bOr = 0; |
| 6718 u8 bTreeEof = 0; |
| 6719 Fts3Expr *p; /* Used to iterate from pExpr to root */ |
| 6720 Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ |
| 6721 int bMatch; |
| 6722 |
| 6723 /* Check if this phrase descends from an OR expression node. If not, |
| 6724 ** return NULL. Otherwise, the entry that corresponds to docid |
| 6725 ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the |
| 6726 ** tree that the node is part of has been marked as EOF, but the node |
| 6727 ** itself is not EOF, then it may point to an earlier entry. */ |
| 6728 pNear = pExpr; |
| 6729 for(p=pExpr->pParent; p; p=p->pParent){ |
| 6730 if( p->eType==FTSQUERY_OR ) bOr = 1; |
| 6731 if( p->eType==FTSQUERY_NEAR ) pNear = p; |
| 6732 if( p->bEof ) bTreeEof = 1; |
| 6733 } |
| 6734 if( bOr==0 ) return SQLITE_OK; |
| 6735 |
| 6736 /* This is the descendent of an OR node. In this case we cannot use |
| 6737 ** an incremental phrase. Load the entire doclist for the phrase |
| 6738 ** into memory in this case. */ |
| 6739 if( pPhrase->bIncr ){ |
| 6740 int bEofSave = pNear->bEof; |
| 6741 fts3EvalRestart(pCsr, pNear, &rc); |
| 6742 while( rc==SQLITE_OK && !pNear->bEof ){ |
| 6743 fts3EvalNextRow(pCsr, pNear, &rc); |
| 6744 if( bEofSave==0 && pNear->iDocid==iDocid ) break; |
| 6745 } |
| 6746 assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); |
| 6747 } |
| 6748 if( bTreeEof ){ |
| 6749 while( rc==SQLITE_OK && !pNear->bEof ){ |
| 6750 fts3EvalNextRow(pCsr, pNear, &rc); |
| 6751 } |
| 6752 } |
| 6753 if( rc!=SQLITE_OK ) return rc; |
| 6754 |
| 6755 bMatch = 1; |
| 6756 for(p=pNear; p; p=p->pLeft){ |
| 6757 u8 bEof = 0; |
| 6758 Fts3Expr *pTest = p; |
| 6759 Fts3Phrase *pPh; |
| 6760 assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE ); |
| 6761 if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight; |
| 6762 assert( pTest->eType==FTSQUERY_PHRASE ); |
| 6763 pPh = pTest->pPhrase; |
| 6764 |
| 6765 pIter = pPh->pOrPoslist; |
| 6766 iDocid = pPh->iOrDocid; |
| 6767 if( pCsr->bDesc==bDescDoclist ){ |
| 6768 bEof = !pPh->doclist.nAll || |
| 6769 (pIter >= (pPh->doclist.aAll + pPh->doclist.nAll)); |
| 6770 while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ |
| 6771 sqlite3Fts3DoclistNext( |
| 6772 bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, |
| 6773 &pIter, &iDocid, &bEof |
| 6774 ); |
| 6775 } |
| 6776 }else{ |
| 6777 bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll); |
| 6778 while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ |
| 6779 int dummy; |
| 6780 sqlite3Fts3DoclistPrev( |
| 6781 bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, |
| 6782 &pIter, &iDocid, &dummy, &bEof |
| 6783 ); |
| 6784 } |
| 6785 } |
| 6786 pPh->pOrPoslist = pIter; |
| 6787 pPh->iOrDocid = iDocid; |
| 6788 if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0; |
| 6789 } |
| 6790 |
| 6791 if( bMatch ){ |
| 6792 pIter = pPhrase->pOrPoslist; |
| 6793 }else{ |
| 6794 pIter = 0; |
| 6795 } |
| 6796 } |
| 6797 if( pIter==0 ) return SQLITE_OK; |
| 6798 |
| 6799 if( *pIter==0x01 ){ |
| 6800 pIter++; |
| 6801 pIter += fts3GetVarint32(pIter, &iThis); |
| 6802 }else{ |
| 6803 iThis = 0; |
| 6804 } |
| 6805 while( iThis<iCol ){ |
| 6806 fts3ColumnlistCopy(0, &pIter); |
| 6807 if( *pIter==0x00 ) return SQLITE_OK; |
| 6808 pIter++; |
| 6809 pIter += fts3GetVarint32(pIter, &iThis); |
| 6810 } |
| 6811 if( *pIter==0x00 ){ |
| 6812 pIter = 0; |
| 6813 } |
| 6814 |
| 6815 *ppOut = ((iCol==iThis)?pIter:0); |
| 6816 return SQLITE_OK; |
| 6817 } |
| 6818 |
| 6819 /* |
| 6820 ** Free all components of the Fts3Phrase structure that were allocated by |
| 6821 ** the eval module. Specifically, this means to free: |
| 6822 ** |
| 6823 ** * the contents of pPhrase->doclist, and |
| 6824 ** * any Fts3MultiSegReader objects held by phrase tokens. |
| 6825 */ |
| 6826 SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ |
| 6827 if( pPhrase ){ |
| 6828 int i; |
| 6829 sqlite3_free(pPhrase->doclist.aAll); |
| 6830 fts3EvalInvalidatePoslist(pPhrase); |
| 6831 memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); |
| 6832 for(i=0; i<pPhrase->nToken; i++){ |
| 6833 fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); |
| 6834 pPhrase->aToken[i].pSegcsr = 0; |
| 6835 } |
| 6836 } |
| 6837 } |
| 6838 |
| 6839 |
| 6840 /* |
| 6841 ** Return SQLITE_CORRUPT_VTAB. |
| 6842 */ |
| 6843 #ifdef SQLITE_DEBUG |
| 6844 SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ |
| 6845 return SQLITE_CORRUPT_VTAB; |
| 6846 } |
| 6847 #endif |
| 6848 |
| 6849 #if !SQLITE_CORE |
| 6850 /* |
| 6851 ** Initialize API pointer table, if required. |
| 6852 */ |
| 6853 #ifdef _WIN32 |
| 6854 __declspec(dllexport) |
| 6855 #endif |
| 6856 SQLITE_API int sqlite3_fts3_init( |
| 6857 sqlite3 *db, |
| 6858 char **pzErrMsg, |
| 6859 const sqlite3_api_routines *pApi |
| 6860 ){ |
| 6861 SQLITE_EXTENSION_INIT2(pApi) |
| 6862 return sqlite3Fts3Init(db); |
| 6863 } |
| 6864 #endif |
| 6865 |
| 6866 #endif |
| 6867 |
| 6868 /************** End of fts3.c ************************************************/ |
| 6869 /************** Begin file fts3_aux.c ****************************************/ |
| 6870 /* |
| 6871 ** 2011 Jan 27 |
| 6872 ** |
| 6873 ** The author disclaims copyright to this source code. In place of |
| 6874 ** a legal notice, here is a blessing: |
| 6875 ** |
| 6876 ** May you do good and not evil. |
| 6877 ** May you find forgiveness for yourself and forgive others. |
| 6878 ** May you share freely, never taking more than you give. |
| 6879 ** |
| 6880 ****************************************************************************** |
| 6881 ** |
| 6882 */ |
| 6883 /* #include "fts3Int.h" */ |
| 6884 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 6885 |
| 6886 /* #include <string.h> */ |
| 6887 /* #include <assert.h> */ |
| 6888 |
| 6889 typedef struct Fts3auxTable Fts3auxTable; |
| 6890 typedef struct Fts3auxCursor Fts3auxCursor; |
| 6891 |
| 6892 struct Fts3auxTable { |
| 6893 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 6894 Fts3Table *pFts3Tab; |
| 6895 }; |
| 6896 |
| 6897 struct Fts3auxCursor { |
| 6898 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 6899 Fts3MultiSegReader csr; /* Must be right after "base" */ |
| 6900 Fts3SegFilter filter; |
| 6901 char *zStop; |
| 6902 int nStop; /* Byte-length of string zStop */ |
| 6903 int iLangid; /* Language id to query */ |
| 6904 int isEof; /* True if cursor is at EOF */ |
| 6905 sqlite3_int64 iRowid; /* Current rowid */ |
| 6906 |
| 6907 int iCol; /* Current value of 'col' column */ |
| 6908 int nStat; /* Size of aStat[] array */ |
| 6909 struct Fts3auxColstats { |
| 6910 sqlite3_int64 nDoc; /* 'documents' values for current csr row */ |
| 6911 sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */ |
| 6912 } *aStat; |
| 6913 }; |
| 6914 |
| 6915 /* |
| 6916 ** Schema of the terms table. |
| 6917 */ |
| 6918 #define FTS3_AUX_SCHEMA \ |
| 6919 "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" |
| 6920 |
| 6921 /* |
| 6922 ** This function does all the work for both the xConnect and xCreate methods. |
| 6923 ** These tables have no persistent representation of their own, so xConnect |
| 6924 ** and xCreate are identical operations. |
| 6925 */ |
| 6926 static int fts3auxConnectMethod( |
| 6927 sqlite3 *db, /* Database connection */ |
| 6928 void *pUnused, /* Unused */ |
| 6929 int argc, /* Number of elements in argv array */ |
| 6930 const char * const *argv, /* xCreate/xConnect argument array */ |
| 6931 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 6932 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 6933 ){ |
| 6934 char const *zDb; /* Name of database (e.g. "main") */ |
| 6935 char const *zFts3; /* Name of fts3 table */ |
| 6936 int nDb; /* Result of strlen(zDb) */ |
| 6937 int nFts3; /* Result of strlen(zFts3) */ |
| 6938 int nByte; /* Bytes of space to allocate here */ |
| 6939 int rc; /* value returned by declare_vtab() */ |
| 6940 Fts3auxTable *p; /* Virtual table object to return */ |
| 6941 |
| 6942 UNUSED_PARAMETER(pUnused); |
| 6943 |
| 6944 /* The user should invoke this in one of two forms: |
| 6945 ** |
| 6946 ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); |
| 6947 ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); |
| 6948 */ |
| 6949 if( argc!=4 && argc!=5 ) goto bad_args; |
| 6950 |
| 6951 zDb = argv[1]; |
| 6952 nDb = (int)strlen(zDb); |
| 6953 if( argc==5 ){ |
| 6954 if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ |
| 6955 zDb = argv[3]; |
| 6956 nDb = (int)strlen(zDb); |
| 6957 zFts3 = argv[4]; |
| 6958 }else{ |
| 6959 goto bad_args; |
| 6960 } |
| 6961 }else{ |
| 6962 zFts3 = argv[3]; |
| 6963 } |
| 6964 nFts3 = (int)strlen(zFts3); |
| 6965 |
| 6966 rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); |
| 6967 if( rc!=SQLITE_OK ) return rc; |
| 6968 |
| 6969 nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; |
| 6970 p = (Fts3auxTable *)sqlite3_malloc(nByte); |
| 6971 if( !p ) return SQLITE_NOMEM; |
| 6972 memset(p, 0, nByte); |
| 6973 |
| 6974 p->pFts3Tab = (Fts3Table *)&p[1]; |
| 6975 p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1]; |
| 6976 p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1]; |
| 6977 p->pFts3Tab->db = db; |
| 6978 p->pFts3Tab->nIndex = 1; |
| 6979 |
| 6980 memcpy((char *)p->pFts3Tab->zDb, zDb, nDb); |
| 6981 memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3); |
| 6982 sqlite3Fts3Dequote((char *)p->pFts3Tab->zName); |
| 6983 |
| 6984 *ppVtab = (sqlite3_vtab *)p; |
| 6985 return SQLITE_OK; |
| 6986 |
| 6987 bad_args: |
| 6988 sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor"); |
| 6989 return SQLITE_ERROR; |
| 6990 } |
| 6991 |
| 6992 /* |
| 6993 ** This function does the work for both the xDisconnect and xDestroy methods. |
| 6994 ** These tables have no persistent representation of their own, so xDisconnect |
| 6995 ** and xDestroy are identical operations. |
| 6996 */ |
| 6997 static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){ |
| 6998 Fts3auxTable *p = (Fts3auxTable *)pVtab; |
| 6999 Fts3Table *pFts3 = p->pFts3Tab; |
| 7000 int i; |
| 7001 |
| 7002 /* Free any prepared statements held */ |
| 7003 for(i=0; i<SizeofArray(pFts3->aStmt); i++){ |
| 7004 sqlite3_finalize(pFts3->aStmt[i]); |
| 7005 } |
| 7006 sqlite3_free(pFts3->zSegmentsTbl); |
| 7007 sqlite3_free(p); |
| 7008 return SQLITE_OK; |
| 7009 } |
| 7010 |
| 7011 #define FTS4AUX_EQ_CONSTRAINT 1 |
| 7012 #define FTS4AUX_GE_CONSTRAINT 2 |
| 7013 #define FTS4AUX_LE_CONSTRAINT 4 |
| 7014 |
| 7015 /* |
| 7016 ** xBestIndex - Analyze a WHERE and ORDER BY clause. |
| 7017 */ |
| 7018 static int fts3auxBestIndexMethod( |
| 7019 sqlite3_vtab *pVTab, |
| 7020 sqlite3_index_info *pInfo |
| 7021 ){ |
| 7022 int i; |
| 7023 int iEq = -1; |
| 7024 int iGe = -1; |
| 7025 int iLe = -1; |
| 7026 int iLangid = -1; |
| 7027 int iNext = 1; /* Next free argvIndex value */ |
| 7028 |
| 7029 UNUSED_PARAMETER(pVTab); |
| 7030 |
| 7031 /* This vtab delivers always results in "ORDER BY term ASC" order. */ |
| 7032 if( pInfo->nOrderBy==1 |
| 7033 && pInfo->aOrderBy[0].iColumn==0 |
| 7034 && pInfo->aOrderBy[0].desc==0 |
| 7035 ){ |
| 7036 pInfo->orderByConsumed = 1; |
| 7037 } |
| 7038 |
| 7039 /* Search for equality and range constraints on the "term" column. |
| 7040 ** And equality constraints on the hidden "languageid" column. */ |
| 7041 for(i=0; i<pInfo->nConstraint; i++){ |
| 7042 if( pInfo->aConstraint[i].usable ){ |
| 7043 int op = pInfo->aConstraint[i].op; |
| 7044 int iCol = pInfo->aConstraint[i].iColumn; |
| 7045 |
| 7046 if( iCol==0 ){ |
| 7047 if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; |
| 7048 if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; |
| 7049 if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; |
| 7050 if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; |
| 7051 if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; |
| 7052 } |
| 7053 if( iCol==4 ){ |
| 7054 if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; |
| 7055 } |
| 7056 } |
| 7057 } |
| 7058 |
| 7059 if( iEq>=0 ){ |
| 7060 pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; |
| 7061 pInfo->aConstraintUsage[iEq].argvIndex = iNext++; |
| 7062 pInfo->estimatedCost = 5; |
| 7063 }else{ |
| 7064 pInfo->idxNum = 0; |
| 7065 pInfo->estimatedCost = 20000; |
| 7066 if( iGe>=0 ){ |
| 7067 pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; |
| 7068 pInfo->aConstraintUsage[iGe].argvIndex = iNext++; |
| 7069 pInfo->estimatedCost /= 2; |
| 7070 } |
| 7071 if( iLe>=0 ){ |
| 7072 pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; |
| 7073 pInfo->aConstraintUsage[iLe].argvIndex = iNext++; |
| 7074 pInfo->estimatedCost /= 2; |
| 7075 } |
| 7076 } |
| 7077 if( iLangid>=0 ){ |
| 7078 pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; |
| 7079 pInfo->estimatedCost--; |
| 7080 } |
| 7081 |
| 7082 return SQLITE_OK; |
| 7083 } |
| 7084 |
| 7085 /* |
| 7086 ** xOpen - Open a cursor. |
| 7087 */ |
| 7088 static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 7089 Fts3auxCursor *pCsr; /* Pointer to cursor object to return */ |
| 7090 |
| 7091 UNUSED_PARAMETER(pVTab); |
| 7092 |
| 7093 pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor)); |
| 7094 if( !pCsr ) return SQLITE_NOMEM; |
| 7095 memset(pCsr, 0, sizeof(Fts3auxCursor)); |
| 7096 |
| 7097 *ppCsr = (sqlite3_vtab_cursor *)pCsr; |
| 7098 return SQLITE_OK; |
| 7099 } |
| 7100 |
| 7101 /* |
| 7102 ** xClose - Close a cursor. |
| 7103 */ |
| 7104 static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 7105 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7106 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7107 |
| 7108 sqlite3Fts3SegmentsClose(pFts3); |
| 7109 sqlite3Fts3SegReaderFinish(&pCsr->csr); |
| 7110 sqlite3_free((void *)pCsr->filter.zTerm); |
| 7111 sqlite3_free(pCsr->zStop); |
| 7112 sqlite3_free(pCsr->aStat); |
| 7113 sqlite3_free(pCsr); |
| 7114 return SQLITE_OK; |
| 7115 } |
| 7116 |
| 7117 static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){ |
| 7118 if( nSize>pCsr->nStat ){ |
| 7119 struct Fts3auxColstats *aNew; |
| 7120 aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, |
| 7121 sizeof(struct Fts3auxColstats) * nSize |
| 7122 ); |
| 7123 if( aNew==0 ) return SQLITE_NOMEM; |
| 7124 memset(&aNew[pCsr->nStat], 0, |
| 7125 sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat) |
| 7126 ); |
| 7127 pCsr->aStat = aNew; |
| 7128 pCsr->nStat = nSize; |
| 7129 } |
| 7130 return SQLITE_OK; |
| 7131 } |
| 7132 |
| 7133 /* |
| 7134 ** xNext - Advance the cursor to the next row, if any. |
| 7135 */ |
| 7136 static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){ |
| 7137 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7138 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7139 int rc; |
| 7140 |
| 7141 /* Increment our pretend rowid value. */ |
| 7142 pCsr->iRowid++; |
| 7143 |
| 7144 for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){ |
| 7145 if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK; |
| 7146 } |
| 7147 |
| 7148 rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr); |
| 7149 if( rc==SQLITE_ROW ){ |
| 7150 int i = 0; |
| 7151 int nDoclist = pCsr->csr.nDoclist; |
| 7152 char *aDoclist = pCsr->csr.aDoclist; |
| 7153 int iCol; |
| 7154 |
| 7155 int eState = 0; |
| 7156 |
| 7157 if( pCsr->zStop ){ |
| 7158 int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm; |
| 7159 int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n); |
| 7160 if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){ |
| 7161 pCsr->isEof = 1; |
| 7162 return SQLITE_OK; |
| 7163 } |
| 7164 } |
| 7165 |
| 7166 if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM; |
| 7167 memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat); |
| 7168 iCol = 0; |
| 7169 |
| 7170 while( i<nDoclist ){ |
| 7171 sqlite3_int64 v = 0; |
| 7172 |
| 7173 i += sqlite3Fts3GetVarint(&aDoclist[i], &v); |
| 7174 switch( eState ){ |
| 7175 /* State 0. In this state the integer just read was a docid. */ |
| 7176 case 0: |
| 7177 pCsr->aStat[0].nDoc++; |
| 7178 eState = 1; |
| 7179 iCol = 0; |
| 7180 break; |
| 7181 |
| 7182 /* State 1. In this state we are expecting either a 1, indicating |
| 7183 ** that the following integer will be a column number, or the |
| 7184 ** start of a position list for column 0. |
| 7185 ** |
| 7186 ** The only difference between state 1 and state 2 is that if the |
| 7187 ** integer encountered in state 1 is not 0 or 1, then we need to |
| 7188 ** increment the column 0 "nDoc" count for this term. |
| 7189 */ |
| 7190 case 1: |
| 7191 assert( iCol==0 ); |
| 7192 if( v>1 ){ |
| 7193 pCsr->aStat[1].nDoc++; |
| 7194 } |
| 7195 eState = 2; |
| 7196 /* fall through */ |
| 7197 |
| 7198 case 2: |
| 7199 if( v==0 ){ /* 0x00. Next integer will be a docid. */ |
| 7200 eState = 0; |
| 7201 }else if( v==1 ){ /* 0x01. Next integer will be a column number. */ |
| 7202 eState = 3; |
| 7203 }else{ /* 2 or greater. A position. */ |
| 7204 pCsr->aStat[iCol+1].nOcc++; |
| 7205 pCsr->aStat[0].nOcc++; |
| 7206 } |
| 7207 break; |
| 7208 |
| 7209 /* State 3. The integer just read is a column number. */ |
| 7210 default: assert( eState==3 ); |
| 7211 iCol = (int)v; |
| 7212 if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM; |
| 7213 pCsr->aStat[iCol+1].nDoc++; |
| 7214 eState = 2; |
| 7215 break; |
| 7216 } |
| 7217 } |
| 7218 |
| 7219 pCsr->iCol = 0; |
| 7220 rc = SQLITE_OK; |
| 7221 }else{ |
| 7222 pCsr->isEof = 1; |
| 7223 } |
| 7224 return rc; |
| 7225 } |
| 7226 |
| 7227 /* |
| 7228 ** xFilter - Initialize a cursor to point at the start of its data. |
| 7229 */ |
| 7230 static int fts3auxFilterMethod( |
| 7231 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 7232 int idxNum, /* Strategy index */ |
| 7233 const char *idxStr, /* Unused */ |
| 7234 int nVal, /* Number of elements in apVal */ |
| 7235 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 7236 ){ |
| 7237 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7238 Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; |
| 7239 int rc; |
| 7240 int isScan = 0; |
| 7241 int iLangVal = 0; /* Language id to query */ |
| 7242 |
| 7243 int iEq = -1; /* Index of term=? value in apVal */ |
| 7244 int iGe = -1; /* Index of term>=? value in apVal */ |
| 7245 int iLe = -1; /* Index of term<=? value in apVal */ |
| 7246 int iLangid = -1; /* Index of languageid=? value in apVal */ |
| 7247 int iNext = 0; |
| 7248 |
| 7249 UNUSED_PARAMETER(nVal); |
| 7250 UNUSED_PARAMETER(idxStr); |
| 7251 |
| 7252 assert( idxStr==0 ); |
| 7253 assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0 |
| 7254 || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT |
| 7255 || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) |
| 7256 ); |
| 7257 |
| 7258 if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ |
| 7259 iEq = iNext++; |
| 7260 }else{ |
| 7261 isScan = 1; |
| 7262 if( idxNum & FTS4AUX_GE_CONSTRAINT ){ |
| 7263 iGe = iNext++; |
| 7264 } |
| 7265 if( idxNum & FTS4AUX_LE_CONSTRAINT ){ |
| 7266 iLe = iNext++; |
| 7267 } |
| 7268 } |
| 7269 if( iNext<nVal ){ |
| 7270 iLangid = iNext++; |
| 7271 } |
| 7272 |
| 7273 /* In case this cursor is being reused, close and zero it. */ |
| 7274 testcase(pCsr->filter.zTerm); |
| 7275 sqlite3Fts3SegReaderFinish(&pCsr->csr); |
| 7276 sqlite3_free((void *)pCsr->filter.zTerm); |
| 7277 sqlite3_free(pCsr->aStat); |
| 7278 memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr); |
| 7279 |
| 7280 pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; |
| 7281 if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; |
| 7282 |
| 7283 if( iEq>=0 || iGe>=0 ){ |
| 7284 const unsigned char *zStr = sqlite3_value_text(apVal[0]); |
| 7285 assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); |
| 7286 if( zStr ){ |
| 7287 pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); |
| 7288 pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); |
| 7289 if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; |
| 7290 } |
| 7291 } |
| 7292 |
| 7293 if( iLe>=0 ){ |
| 7294 pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); |
| 7295 pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); |
| 7296 if( pCsr->zStop==0 ) return SQLITE_NOMEM; |
| 7297 } |
| 7298 |
| 7299 if( iLangid>=0 ){ |
| 7300 iLangVal = sqlite3_value_int(apVal[iLangid]); |
| 7301 |
| 7302 /* If the user specified a negative value for the languageid, use zero |
| 7303 ** instead. This works, as the "languageid=?" constraint will also |
| 7304 ** be tested by the VDBE layer. The test will always be false (since |
| 7305 ** this module will not return a row with a negative languageid), and |
| 7306 ** so the overall query will return zero rows. */ |
| 7307 if( iLangVal<0 ) iLangVal = 0; |
| 7308 } |
| 7309 pCsr->iLangid = iLangVal; |
| 7310 |
| 7311 rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, |
| 7312 pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr |
| 7313 ); |
| 7314 if( rc==SQLITE_OK ){ |
| 7315 rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter); |
| 7316 } |
| 7317 |
| 7318 if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor); |
| 7319 return rc; |
| 7320 } |
| 7321 |
| 7322 /* |
| 7323 ** xEof - Return true if the cursor is at EOF, or false otherwise. |
| 7324 */ |
| 7325 static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ |
| 7326 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7327 return pCsr->isEof; |
| 7328 } |
| 7329 |
| 7330 /* |
| 7331 ** xColumn - Return a column value. |
| 7332 */ |
| 7333 static int fts3auxColumnMethod( |
| 7334 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 7335 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 7336 int iCol /* Index of column to read value from */ |
| 7337 ){ |
| 7338 Fts3auxCursor *p = (Fts3auxCursor *)pCursor; |
| 7339 |
| 7340 assert( p->isEof==0 ); |
| 7341 switch( iCol ){ |
| 7342 case 0: /* term */ |
| 7343 sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); |
| 7344 break; |
| 7345 |
| 7346 case 1: /* col */ |
| 7347 if( p->iCol ){ |
| 7348 sqlite3_result_int(pCtx, p->iCol-1); |
| 7349 }else{ |
| 7350 sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); |
| 7351 } |
| 7352 break; |
| 7353 |
| 7354 case 2: /* documents */ |
| 7355 sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); |
| 7356 break; |
| 7357 |
| 7358 case 3: /* occurrences */ |
| 7359 sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); |
| 7360 break; |
| 7361 |
| 7362 default: /* languageid */ |
| 7363 assert( iCol==4 ); |
| 7364 sqlite3_result_int(pCtx, p->iLangid); |
| 7365 break; |
| 7366 } |
| 7367 |
| 7368 return SQLITE_OK; |
| 7369 } |
| 7370 |
| 7371 /* |
| 7372 ** xRowid - Return the current rowid for the cursor. |
| 7373 */ |
| 7374 static int fts3auxRowidMethod( |
| 7375 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 7376 sqlite_int64 *pRowid /* OUT: Rowid value */ |
| 7377 ){ |
| 7378 Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; |
| 7379 *pRowid = pCsr->iRowid; |
| 7380 return SQLITE_OK; |
| 7381 } |
| 7382 |
| 7383 /* |
| 7384 ** Register the fts3aux module with database connection db. Return SQLITE_OK |
| 7385 ** if successful or an error code if sqlite3_create_module() fails. |
| 7386 */ |
| 7387 SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ |
| 7388 static const sqlite3_module fts3aux_module = { |
| 7389 0, /* iVersion */ |
| 7390 fts3auxConnectMethod, /* xCreate */ |
| 7391 fts3auxConnectMethod, /* xConnect */ |
| 7392 fts3auxBestIndexMethod, /* xBestIndex */ |
| 7393 fts3auxDisconnectMethod, /* xDisconnect */ |
| 7394 fts3auxDisconnectMethod, /* xDestroy */ |
| 7395 fts3auxOpenMethod, /* xOpen */ |
| 7396 fts3auxCloseMethod, /* xClose */ |
| 7397 fts3auxFilterMethod, /* xFilter */ |
| 7398 fts3auxNextMethod, /* xNext */ |
| 7399 fts3auxEofMethod, /* xEof */ |
| 7400 fts3auxColumnMethod, /* xColumn */ |
| 7401 fts3auxRowidMethod, /* xRowid */ |
| 7402 0, /* xUpdate */ |
| 7403 0, /* xBegin */ |
| 7404 0, /* xSync */ |
| 7405 0, /* xCommit */ |
| 7406 0, /* xRollback */ |
| 7407 0, /* xFindFunction */ |
| 7408 0, /* xRename */ |
| 7409 0, /* xSavepoint */ |
| 7410 0, /* xRelease */ |
| 7411 0 /* xRollbackTo */ |
| 7412 }; |
| 7413 int rc; /* Return code */ |
| 7414 |
| 7415 rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0); |
| 7416 return rc; |
| 7417 } |
| 7418 |
| 7419 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 7420 |
| 7421 /************** End of fts3_aux.c ********************************************/ |
| 7422 /************** Begin file fts3_expr.c ***************************************/ |
| 7423 /* |
| 7424 ** 2008 Nov 28 |
| 7425 ** |
| 7426 ** The author disclaims copyright to this source code. In place of |
| 7427 ** a legal notice, here is a blessing: |
| 7428 ** |
| 7429 ** May you do good and not evil. |
| 7430 ** May you find forgiveness for yourself and forgive others. |
| 7431 ** May you share freely, never taking more than you give. |
| 7432 ** |
| 7433 ****************************************************************************** |
| 7434 ** |
| 7435 ** This module contains code that implements a parser for fts3 query strings |
| 7436 ** (the right-hand argument to the MATCH operator). Because the supported |
| 7437 ** syntax is relatively simple, the whole tokenizer/parser system is |
| 7438 ** hand-coded. |
| 7439 */ |
| 7440 /* #include "fts3Int.h" */ |
| 7441 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 7442 |
| 7443 /* |
| 7444 ** By default, this module parses the legacy syntax that has been |
| 7445 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7446 ** is defined, then it uses the new syntax. The differences between |
| 7447 ** the new and the old syntaxes are: |
| 7448 ** |
| 7449 ** a) The new syntax supports parenthesis. The old does not. |
| 7450 ** |
| 7451 ** b) The new syntax supports the AND and NOT operators. The old does not. |
| 7452 ** |
| 7453 ** c) The old syntax supports the "-" token qualifier. This is not |
| 7454 ** supported by the new syntax (it is replaced by the NOT operator). |
| 7455 ** |
| 7456 ** d) When using the old syntax, the OR operator has a greater precedence |
| 7457 ** than an implicit AND. When using the new, both implicity and explicit |
| 7458 ** AND operators have a higher precedence than OR. |
| 7459 ** |
| 7460 ** If compiled with SQLITE_TEST defined, then this module exports the |
| 7461 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable |
| 7462 ** to zero causes the module to use the old syntax. If it is set to |
| 7463 ** non-zero the new syntax is activated. This is so both syntaxes can |
| 7464 ** be tested using a single build of testfixture. |
| 7465 ** |
| 7466 ** The following describes the syntax supported by the fts3 MATCH |
| 7467 ** operator in a similar format to that used by the lemon parser |
| 7468 ** generator. This module does not use actually lemon, it uses a |
| 7469 ** custom parser. |
| 7470 ** |
| 7471 ** query ::= andexpr (OR andexpr)*. |
| 7472 ** |
| 7473 ** andexpr ::= notexpr (AND? notexpr)*. |
| 7474 ** |
| 7475 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. |
| 7476 ** notexpr ::= LP query RP. |
| 7477 ** |
| 7478 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. |
| 7479 ** |
| 7480 ** distance_opt ::= . |
| 7481 ** distance_opt ::= / INTEGER. |
| 7482 ** |
| 7483 ** phrase ::= TOKEN. |
| 7484 ** phrase ::= COLUMN:TOKEN. |
| 7485 ** phrase ::= "TOKEN TOKEN TOKEN...". |
| 7486 */ |
| 7487 |
| 7488 #ifdef SQLITE_TEST |
| 7489 SQLITE_API int sqlite3_fts3_enable_parentheses = 0; |
| 7490 #else |
| 7491 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7492 # define sqlite3_fts3_enable_parentheses 1 |
| 7493 # else |
| 7494 # define sqlite3_fts3_enable_parentheses 0 |
| 7495 # endif |
| 7496 #endif |
| 7497 |
| 7498 /* |
| 7499 ** Default span for NEAR operators. |
| 7500 */ |
| 7501 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
| 7502 |
| 7503 /* #include <string.h> */ |
| 7504 /* #include <assert.h> */ |
| 7505 |
| 7506 /* |
| 7507 ** isNot: |
| 7508 ** This variable is used by function getNextNode(). When getNextNode() is |
| 7509 ** called, it sets ParseContext.isNot to true if the 'next node' is a |
| 7510 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the |
| 7511 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to |
| 7512 ** zero. |
| 7513 */ |
| 7514 typedef struct ParseContext ParseContext; |
| 7515 struct ParseContext { |
| 7516 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
| 7517 int iLangid; /* Language id used with tokenizer */ |
| 7518 const char **azCol; /* Array of column names for fts3 table */ |
| 7519 int bFts4; /* True to allow FTS4-only syntax */ |
| 7520 int nCol; /* Number of entries in azCol[] */ |
| 7521 int iDefaultCol; /* Default column to query */ |
| 7522 int isNot; /* True if getNextNode() sees a unary - */ |
| 7523 sqlite3_context *pCtx; /* Write error message here */ |
| 7524 int nNest; /* Number of nested brackets */ |
| 7525 }; |
| 7526 |
| 7527 /* |
| 7528 ** This function is equivalent to the standard isspace() function. |
| 7529 ** |
| 7530 ** The standard isspace() can be awkward to use safely, because although it |
| 7531 ** is defined to accept an argument of type int, its behavior when passed |
| 7532 ** an integer that falls outside of the range of the unsigned char type |
| 7533 ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
| 7534 ** is defined to accept an argument of type char, and always returns 0 for |
| 7535 ** any values that fall outside of the range of the unsigned char type (i.e. |
| 7536 ** negative values). |
| 7537 */ |
| 7538 static int fts3isspace(char c){ |
| 7539 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
| 7540 } |
| 7541 |
| 7542 /* |
| 7543 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
| 7544 ** zero the memory before returning a pointer to it. If unsuccessful, |
| 7545 ** return NULL. |
| 7546 */ |
| 7547 static void *fts3MallocZero(int nByte){ |
| 7548 void *pRet = sqlite3_malloc(nByte); |
| 7549 if( pRet ) memset(pRet, 0, nByte); |
| 7550 return pRet; |
| 7551 } |
| 7552 |
| 7553 SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( |
| 7554 sqlite3_tokenizer *pTokenizer, |
| 7555 int iLangid, |
| 7556 const char *z, |
| 7557 int n, |
| 7558 sqlite3_tokenizer_cursor **ppCsr |
| 7559 ){ |
| 7560 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7561 sqlite3_tokenizer_cursor *pCsr = 0; |
| 7562 int rc; |
| 7563 |
| 7564 rc = pModule->xOpen(pTokenizer, z, n, &pCsr); |
| 7565 assert( rc==SQLITE_OK || pCsr==0 ); |
| 7566 if( rc==SQLITE_OK ){ |
| 7567 pCsr->pTokenizer = pTokenizer; |
| 7568 if( pModule->iVersion>=1 ){ |
| 7569 rc = pModule->xLanguageid(pCsr, iLangid); |
| 7570 if( rc!=SQLITE_OK ){ |
| 7571 pModule->xClose(pCsr); |
| 7572 pCsr = 0; |
| 7573 } |
| 7574 } |
| 7575 } |
| 7576 *ppCsr = pCsr; |
| 7577 return rc; |
| 7578 } |
| 7579 |
| 7580 /* |
| 7581 ** Function getNextNode(), which is called by fts3ExprParse(), may itself |
| 7582 ** call fts3ExprParse(). So this forward declaration is required. |
| 7583 */ |
| 7584 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
| 7585 |
| 7586 /* |
| 7587 ** Extract the next token from buffer z (length n) using the tokenizer |
| 7588 ** and other information (column names etc.) in pParse. Create an Fts3Expr |
| 7589 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
| 7590 ** single token and set *ppExpr to point to it. If the end of the buffer is |
| 7591 ** reached before a token is found, set *ppExpr to zero. It is the |
| 7592 ** responsibility of the caller to eventually deallocate the allocated |
| 7593 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
| 7594 ** |
| 7595 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
| 7596 ** fails. |
| 7597 */ |
| 7598 static int getNextToken( |
| 7599 ParseContext *pParse, /* fts3 query parse context */ |
| 7600 int iCol, /* Value for Fts3Phrase.iColumn */ |
| 7601 const char *z, int n, /* Input string */ |
| 7602 Fts3Expr **ppExpr, /* OUT: expression */ |
| 7603 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 7604 ){ |
| 7605 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 7606 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7607 int rc; |
| 7608 sqlite3_tokenizer_cursor *pCursor; |
| 7609 Fts3Expr *pRet = 0; |
| 7610 int i = 0; |
| 7611 |
| 7612 /* Set variable i to the maximum number of bytes of input to tokenize. */ |
| 7613 for(i=0; i<n; i++){ |
| 7614 if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; |
| 7615 if( z[i]=='"' ) break; |
| 7616 } |
| 7617 |
| 7618 *pnConsumed = i; |
| 7619 rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); |
| 7620 if( rc==SQLITE_OK ){ |
| 7621 const char *zToken; |
| 7622 int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; |
| 7623 int nByte; /* total space to allocate */ |
| 7624 |
| 7625 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
| 7626 if( rc==SQLITE_OK ){ |
| 7627 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
| 7628 pRet = (Fts3Expr *)fts3MallocZero(nByte); |
| 7629 if( !pRet ){ |
| 7630 rc = SQLITE_NOMEM; |
| 7631 }else{ |
| 7632 pRet->eType = FTSQUERY_PHRASE; |
| 7633 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
| 7634 pRet->pPhrase->nToken = 1; |
| 7635 pRet->pPhrase->iColumn = iCol; |
| 7636 pRet->pPhrase->aToken[0].n = nToken; |
| 7637 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
| 7638 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
| 7639 |
| 7640 if( iEnd<n && z[iEnd]=='*' ){ |
| 7641 pRet->pPhrase->aToken[0].isPrefix = 1; |
| 7642 iEnd++; |
| 7643 } |
| 7644 |
| 7645 while( 1 ){ |
| 7646 if( !sqlite3_fts3_enable_parentheses |
| 7647 && iStart>0 && z[iStart-1]=='-' |
| 7648 ){ |
| 7649 pParse->isNot = 1; |
| 7650 iStart--; |
| 7651 }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ |
| 7652 pRet->pPhrase->aToken[0].bFirst = 1; |
| 7653 iStart--; |
| 7654 }else{ |
| 7655 break; |
| 7656 } |
| 7657 } |
| 7658 |
| 7659 } |
| 7660 *pnConsumed = iEnd; |
| 7661 }else if( i && rc==SQLITE_DONE ){ |
| 7662 rc = SQLITE_OK; |
| 7663 } |
| 7664 |
| 7665 pModule->xClose(pCursor); |
| 7666 } |
| 7667 |
| 7668 *ppExpr = pRet; |
| 7669 return rc; |
| 7670 } |
| 7671 |
| 7672 |
| 7673 /* |
| 7674 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
| 7675 ** then free the old allocation. |
| 7676 */ |
| 7677 static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
| 7678 void *pRet = sqlite3_realloc(pOrig, nNew); |
| 7679 if( !pRet ){ |
| 7680 sqlite3_free(pOrig); |
| 7681 } |
| 7682 return pRet; |
| 7683 } |
| 7684 |
| 7685 /* |
| 7686 ** Buffer zInput, length nInput, contains the contents of a quoted string |
| 7687 ** that appeared as part of an fts3 query expression. Neither quote character |
| 7688 ** is included in the buffer. This function attempts to tokenize the entire |
| 7689 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE |
| 7690 ** containing the results. |
| 7691 ** |
| 7692 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the |
| 7693 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory |
| 7694 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set |
| 7695 ** to 0. |
| 7696 */ |
| 7697 static int getNextString( |
| 7698 ParseContext *pParse, /* fts3 query parse context */ |
| 7699 const char *zInput, int nInput, /* Input string */ |
| 7700 Fts3Expr **ppExpr /* OUT: expression */ |
| 7701 ){ |
| 7702 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 7703 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 7704 int rc; |
| 7705 Fts3Expr *p = 0; |
| 7706 sqlite3_tokenizer_cursor *pCursor = 0; |
| 7707 char *zTemp = 0; |
| 7708 int nTemp = 0; |
| 7709 |
| 7710 const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| 7711 int nToken = 0; |
| 7712 |
| 7713 /* The final Fts3Expr data structure, including the Fts3Phrase, |
| 7714 ** Fts3PhraseToken structures token buffers are all stored as a single |
| 7715 ** allocation so that the expression can be freed with a single call to |
| 7716 ** sqlite3_free(). Setting this up requires a two pass approach. |
| 7717 ** |
| 7718 ** The first pass, in the block below, uses a tokenizer cursor to iterate |
| 7719 ** through the tokens in the expression. This pass uses fts3ReallocOrFree() |
| 7720 ** to assemble data in two dynamic buffers: |
| 7721 ** |
| 7722 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase |
| 7723 ** structure, followed by the array of Fts3PhraseToken |
| 7724 ** structures. This pass only populates the Fts3PhraseToken array. |
| 7725 ** |
| 7726 ** Buffer zTemp: Contains copies of all tokens. |
| 7727 ** |
| 7728 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, |
| 7729 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase |
| 7730 ** structures. |
| 7731 */ |
| 7732 rc = sqlite3Fts3OpenTokenizer( |
| 7733 pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); |
| 7734 if( rc==SQLITE_OK ){ |
| 7735 int ii; |
| 7736 for(ii=0; rc==SQLITE_OK; ii++){ |
| 7737 const char *zByte; |
| 7738 int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; |
| 7739 rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); |
| 7740 if( rc==SQLITE_OK ){ |
| 7741 Fts3PhraseToken *pToken; |
| 7742 |
| 7743 p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); |
| 7744 if( !p ) goto no_mem; |
| 7745 |
| 7746 zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); |
| 7747 if( !zTemp ) goto no_mem; |
| 7748 |
| 7749 assert( nToken==ii ); |
| 7750 pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; |
| 7751 memset(pToken, 0, sizeof(Fts3PhraseToken)); |
| 7752 |
| 7753 memcpy(&zTemp[nTemp], zByte, nByte); |
| 7754 nTemp += nByte; |
| 7755 |
| 7756 pToken->n = nByte; |
| 7757 pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); |
| 7758 pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); |
| 7759 nToken = ii+1; |
| 7760 } |
| 7761 } |
| 7762 |
| 7763 pModule->xClose(pCursor); |
| 7764 pCursor = 0; |
| 7765 } |
| 7766 |
| 7767 if( rc==SQLITE_DONE ){ |
| 7768 int jj; |
| 7769 char *zBuf = 0; |
| 7770 |
| 7771 p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); |
| 7772 if( !p ) goto no_mem; |
| 7773 memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); |
| 7774 p->eType = FTSQUERY_PHRASE; |
| 7775 p->pPhrase = (Fts3Phrase *)&p[1]; |
| 7776 p->pPhrase->iColumn = pParse->iDefaultCol; |
| 7777 p->pPhrase->nToken = nToken; |
| 7778 |
| 7779 zBuf = (char *)&p->pPhrase->aToken[nToken]; |
| 7780 if( zTemp ){ |
| 7781 memcpy(zBuf, zTemp, nTemp); |
| 7782 sqlite3_free(zTemp); |
| 7783 }else{ |
| 7784 assert( nTemp==0 ); |
| 7785 } |
| 7786 |
| 7787 for(jj=0; jj<p->pPhrase->nToken; jj++){ |
| 7788 p->pPhrase->aToken[jj].z = zBuf; |
| 7789 zBuf += p->pPhrase->aToken[jj].n; |
| 7790 } |
| 7791 rc = SQLITE_OK; |
| 7792 } |
| 7793 |
| 7794 *ppExpr = p; |
| 7795 return rc; |
| 7796 no_mem: |
| 7797 |
| 7798 if( pCursor ){ |
| 7799 pModule->xClose(pCursor); |
| 7800 } |
| 7801 sqlite3_free(zTemp); |
| 7802 sqlite3_free(p); |
| 7803 *ppExpr = 0; |
| 7804 return SQLITE_NOMEM; |
| 7805 } |
| 7806 |
| 7807 /* |
| 7808 ** The output variable *ppExpr is populated with an allocated Fts3Expr |
| 7809 ** structure, or set to 0 if the end of the input buffer is reached. |
| 7810 ** |
| 7811 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM |
| 7812 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
| 7813 ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
| 7814 */ |
| 7815 static int getNextNode( |
| 7816 ParseContext *pParse, /* fts3 query parse context */ |
| 7817 const char *z, int n, /* Input string */ |
| 7818 Fts3Expr **ppExpr, /* OUT: expression */ |
| 7819 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 7820 ){ |
| 7821 static const struct Fts3Keyword { |
| 7822 char *z; /* Keyword text */ |
| 7823 unsigned char n; /* Length of the keyword */ |
| 7824 unsigned char parenOnly; /* Only valid in paren mode */ |
| 7825 unsigned char eType; /* Keyword code */ |
| 7826 } aKeyword[] = { |
| 7827 { "OR" , 2, 0, FTSQUERY_OR }, |
| 7828 { "AND", 3, 1, FTSQUERY_AND }, |
| 7829 { "NOT", 3, 1, FTSQUERY_NOT }, |
| 7830 { "NEAR", 4, 0, FTSQUERY_NEAR } |
| 7831 }; |
| 7832 int ii; |
| 7833 int iCol; |
| 7834 int iColLen; |
| 7835 int rc; |
| 7836 Fts3Expr *pRet = 0; |
| 7837 |
| 7838 const char *zInput = z; |
| 7839 int nInput = n; |
| 7840 |
| 7841 pParse->isNot = 0; |
| 7842 |
| 7843 /* Skip over any whitespace before checking for a keyword, an open or |
| 7844 ** close bracket, or a quoted string. |
| 7845 */ |
| 7846 while( nInput>0 && fts3isspace(*zInput) ){ |
| 7847 nInput--; |
| 7848 zInput++; |
| 7849 } |
| 7850 if( nInput==0 ){ |
| 7851 return SQLITE_DONE; |
| 7852 } |
| 7853 |
| 7854 /* See if we are dealing with a keyword. */ |
| 7855 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ |
| 7856 const struct Fts3Keyword *pKey = &aKeyword[ii]; |
| 7857 |
| 7858 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ |
| 7859 continue; |
| 7860 } |
| 7861 |
| 7862 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ |
| 7863 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; |
| 7864 int nKey = pKey->n; |
| 7865 char cNext; |
| 7866 |
| 7867 /* If this is a "NEAR" keyword, check for an explicit nearness. */ |
| 7868 if( pKey->eType==FTSQUERY_NEAR ){ |
| 7869 assert( nKey==4 ); |
| 7870 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ |
| 7871 nNear = 0; |
| 7872 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ |
| 7873 nNear = nNear * 10 + (zInput[nKey] - '0'); |
| 7874 } |
| 7875 } |
| 7876 } |
| 7877 |
| 7878 /* At this point this is probably a keyword. But for that to be true, |
| 7879 ** the next byte must contain either whitespace, an open or close |
| 7880 ** parenthesis, a quote character, or EOF. |
| 7881 */ |
| 7882 cNext = zInput[nKey]; |
| 7883 if( fts3isspace(cNext) |
| 7884 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 |
| 7885 ){ |
| 7886 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); |
| 7887 if( !pRet ){ |
| 7888 return SQLITE_NOMEM; |
| 7889 } |
| 7890 pRet->eType = pKey->eType; |
| 7891 pRet->nNear = nNear; |
| 7892 *ppExpr = pRet; |
| 7893 *pnConsumed = (int)((zInput - z) + nKey); |
| 7894 return SQLITE_OK; |
| 7895 } |
| 7896 |
| 7897 /* Turns out that wasn't a keyword after all. This happens if the |
| 7898 ** user has supplied a token such as "ORacle". Continue. |
| 7899 */ |
| 7900 } |
| 7901 } |
| 7902 |
| 7903 /* See if we are dealing with a quoted phrase. If this is the case, then |
| 7904 ** search for the closing quote and pass the whole string to getNextString() |
| 7905 ** for processing. This is easy to do, as fts3 has no syntax for escaping |
| 7906 ** a quote character embedded in a string. |
| 7907 */ |
| 7908 if( *zInput=='"' ){ |
| 7909 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
| 7910 *pnConsumed = (int)((zInput - z) + ii + 1); |
| 7911 if( ii==nInput ){ |
| 7912 return SQLITE_ERROR; |
| 7913 } |
| 7914 return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
| 7915 } |
| 7916 |
| 7917 if( sqlite3_fts3_enable_parentheses ){ |
| 7918 if( *zInput=='(' ){ |
| 7919 int nConsumed = 0; |
| 7920 pParse->nNest++; |
| 7921 rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); |
| 7922 if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } |
| 7923 *pnConsumed = (int)(zInput - z) + 1 + nConsumed; |
| 7924 return rc; |
| 7925 }else if( *zInput==')' ){ |
| 7926 pParse->nNest--; |
| 7927 *pnConsumed = (int)((zInput - z) + 1); |
| 7928 *ppExpr = 0; |
| 7929 return SQLITE_DONE; |
| 7930 } |
| 7931 } |
| 7932 |
| 7933 /* If control flows to this point, this must be a regular token, or |
| 7934 ** the end of the input. Read a regular token using the sqlite3_tokenizer |
| 7935 ** interface. Before doing so, figure out if there is an explicit |
| 7936 ** column specifier for the token. |
| 7937 ** |
| 7938 ** TODO: Strangely, it is not possible to associate a column specifier |
| 7939 ** with a quoted phrase, only with a single token. Not sure if this was |
| 7940 ** an implementation artifact or an intentional decision when fts3 was |
| 7941 ** first implemented. Whichever it was, this module duplicates the |
| 7942 ** limitation. |
| 7943 */ |
| 7944 iCol = pParse->iDefaultCol; |
| 7945 iColLen = 0; |
| 7946 for(ii=0; ii<pParse->nCol; ii++){ |
| 7947 const char *zStr = pParse->azCol[ii]; |
| 7948 int nStr = (int)strlen(zStr); |
| 7949 if( nInput>nStr && zInput[nStr]==':' |
| 7950 && sqlite3_strnicmp(zStr, zInput, nStr)==0 |
| 7951 ){ |
| 7952 iCol = ii; |
| 7953 iColLen = (int)((zInput - z) + nStr + 1); |
| 7954 break; |
| 7955 } |
| 7956 } |
| 7957 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); |
| 7958 *pnConsumed += iColLen; |
| 7959 return rc; |
| 7960 } |
| 7961 |
| 7962 /* |
| 7963 ** The argument is an Fts3Expr structure for a binary operator (any type |
| 7964 ** except an FTSQUERY_PHRASE). Return an integer value representing the |
| 7965 ** precedence of the operator. Lower values have a higher precedence (i.e. |
| 7966 ** group more tightly). For example, in the C language, the == operator |
| 7967 ** groups more tightly than ||, and would therefore have a higher precedence. |
| 7968 ** |
| 7969 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS |
| 7970 ** is defined), the order of the operators in precedence from highest to |
| 7971 ** lowest is: |
| 7972 ** |
| 7973 ** NEAR |
| 7974 ** NOT |
| 7975 ** AND (including implicit ANDs) |
| 7976 ** OR |
| 7977 ** |
| 7978 ** Note that when using the old query syntax, the OR operator has a higher |
| 7979 ** precedence than the AND operator. |
| 7980 */ |
| 7981 static int opPrecedence(Fts3Expr *p){ |
| 7982 assert( p->eType!=FTSQUERY_PHRASE ); |
| 7983 if( sqlite3_fts3_enable_parentheses ){ |
| 7984 return p->eType; |
| 7985 }else if( p->eType==FTSQUERY_NEAR ){ |
| 7986 return 1; |
| 7987 }else if( p->eType==FTSQUERY_OR ){ |
| 7988 return 2; |
| 7989 } |
| 7990 assert( p->eType==FTSQUERY_AND ); |
| 7991 return 3; |
| 7992 } |
| 7993 |
| 7994 /* |
| 7995 ** Argument ppHead contains a pointer to the current head of a query |
| 7996 ** expression tree being parsed. pPrev is the expression node most recently |
| 7997 ** inserted into the tree. This function adds pNew, which is always a binary |
| 7998 ** operator node, into the expression tree based on the relative precedence |
| 7999 ** of pNew and the existing nodes of the tree. This may result in the head |
| 8000 ** of the tree changing, in which case *ppHead is set to the new root node. |
| 8001 */ |
| 8002 static void insertBinaryOperator( |
| 8003 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ |
| 8004 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ |
| 8005 Fts3Expr *pNew /* New binary node to insert into expression tree */ |
| 8006 ){ |
| 8007 Fts3Expr *pSplit = pPrev; |
| 8008 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ |
| 8009 pSplit = pSplit->pParent; |
| 8010 } |
| 8011 |
| 8012 if( pSplit->pParent ){ |
| 8013 assert( pSplit->pParent->pRight==pSplit ); |
| 8014 pSplit->pParent->pRight = pNew; |
| 8015 pNew->pParent = pSplit->pParent; |
| 8016 }else{ |
| 8017 *ppHead = pNew; |
| 8018 } |
| 8019 pNew->pLeft = pSplit; |
| 8020 pSplit->pParent = pNew; |
| 8021 } |
| 8022 |
| 8023 /* |
| 8024 ** Parse the fts3 query expression found in buffer z, length n. This function |
| 8025 ** returns either when the end of the buffer is reached or an unmatched |
| 8026 ** closing bracket - ')' - is encountered. |
| 8027 ** |
| 8028 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the |
| 8029 ** parsed form of the expression and *pnConsumed is set to the number of |
| 8030 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM |
| 8031 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. |
| 8032 */ |
| 8033 static int fts3ExprParse( |
| 8034 ParseContext *pParse, /* fts3 query parse context */ |
| 8035 const char *z, int n, /* Text of MATCH query */ |
| 8036 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 8037 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 8038 ){ |
| 8039 Fts3Expr *pRet = 0; |
| 8040 Fts3Expr *pPrev = 0; |
| 8041 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ |
| 8042 int nIn = n; |
| 8043 const char *zIn = z; |
| 8044 int rc = SQLITE_OK; |
| 8045 int isRequirePhrase = 1; |
| 8046 |
| 8047 while( rc==SQLITE_OK ){ |
| 8048 Fts3Expr *p = 0; |
| 8049 int nByte = 0; |
| 8050 |
| 8051 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
| 8052 assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); |
| 8053 if( rc==SQLITE_OK ){ |
| 8054 if( p ){ |
| 8055 int isPhrase; |
| 8056 |
| 8057 if( !sqlite3_fts3_enable_parentheses |
| 8058 && p->eType==FTSQUERY_PHRASE && pParse->isNot |
| 8059 ){ |
| 8060 /* Create an implicit NOT operator. */ |
| 8061 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
| 8062 if( !pNot ){ |
| 8063 sqlite3Fts3ExprFree(p); |
| 8064 rc = SQLITE_NOMEM; |
| 8065 goto exprparse_out; |
| 8066 } |
| 8067 pNot->eType = FTSQUERY_NOT; |
| 8068 pNot->pRight = p; |
| 8069 p->pParent = pNot; |
| 8070 if( pNotBranch ){ |
| 8071 pNot->pLeft = pNotBranch; |
| 8072 pNotBranch->pParent = pNot; |
| 8073 } |
| 8074 pNotBranch = pNot; |
| 8075 p = pPrev; |
| 8076 }else{ |
| 8077 int eType = p->eType; |
| 8078 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
| 8079 |
| 8080 /* The isRequirePhrase variable is set to true if a phrase or |
| 8081 ** an expression contained in parenthesis is required. If a |
| 8082 ** binary operator (AND, OR, NOT or NEAR) is encounted when |
| 8083 ** isRequirePhrase is set, this is a syntax error. |
| 8084 */ |
| 8085 if( !isPhrase && isRequirePhrase ){ |
| 8086 sqlite3Fts3ExprFree(p); |
| 8087 rc = SQLITE_ERROR; |
| 8088 goto exprparse_out; |
| 8089 } |
| 8090 |
| 8091 if( isPhrase && !isRequirePhrase ){ |
| 8092 /* Insert an implicit AND operator. */ |
| 8093 Fts3Expr *pAnd; |
| 8094 assert( pRet && pPrev ); |
| 8095 pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
| 8096 if( !pAnd ){ |
| 8097 sqlite3Fts3ExprFree(p); |
| 8098 rc = SQLITE_NOMEM; |
| 8099 goto exprparse_out; |
| 8100 } |
| 8101 pAnd->eType = FTSQUERY_AND; |
| 8102 insertBinaryOperator(&pRet, pPrev, pAnd); |
| 8103 pPrev = pAnd; |
| 8104 } |
| 8105 |
| 8106 /* This test catches attempts to make either operand of a NEAR |
| 8107 ** operator something other than a phrase. For example, either of |
| 8108 ** the following: |
| 8109 ** |
| 8110 ** (bracketed expression) NEAR phrase |
| 8111 ** phrase NEAR (bracketed expression) |
| 8112 ** |
| 8113 ** Return an error in either case. |
| 8114 */ |
| 8115 if( pPrev && ( |
| 8116 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
| 8117 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
| 8118 )){ |
| 8119 sqlite3Fts3ExprFree(p); |
| 8120 rc = SQLITE_ERROR; |
| 8121 goto exprparse_out; |
| 8122 } |
| 8123 |
| 8124 if( isPhrase ){ |
| 8125 if( pRet ){ |
| 8126 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
| 8127 pPrev->pRight = p; |
| 8128 p->pParent = pPrev; |
| 8129 }else{ |
| 8130 pRet = p; |
| 8131 } |
| 8132 }else{ |
| 8133 insertBinaryOperator(&pRet, pPrev, p); |
| 8134 } |
| 8135 isRequirePhrase = !isPhrase; |
| 8136 } |
| 8137 pPrev = p; |
| 8138 } |
| 8139 assert( nByte>0 ); |
| 8140 } |
| 8141 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
| 8142 nIn -= nByte; |
| 8143 zIn += nByte; |
| 8144 } |
| 8145 |
| 8146 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
| 8147 rc = SQLITE_ERROR; |
| 8148 } |
| 8149 |
| 8150 if( rc==SQLITE_DONE ){ |
| 8151 rc = SQLITE_OK; |
| 8152 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ |
| 8153 if( !pRet ){ |
| 8154 rc = SQLITE_ERROR; |
| 8155 }else{ |
| 8156 Fts3Expr *pIter = pNotBranch; |
| 8157 while( pIter->pLeft ){ |
| 8158 pIter = pIter->pLeft; |
| 8159 } |
| 8160 pIter->pLeft = pRet; |
| 8161 pRet->pParent = pIter; |
| 8162 pRet = pNotBranch; |
| 8163 } |
| 8164 } |
| 8165 } |
| 8166 *pnConsumed = n - nIn; |
| 8167 |
| 8168 exprparse_out: |
| 8169 if( rc!=SQLITE_OK ){ |
| 8170 sqlite3Fts3ExprFree(pRet); |
| 8171 sqlite3Fts3ExprFree(pNotBranch); |
| 8172 pRet = 0; |
| 8173 } |
| 8174 *ppExpr = pRet; |
| 8175 return rc; |
| 8176 } |
| 8177 |
| 8178 /* |
| 8179 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed |
| 8180 ** as the only argument is more than nMaxDepth. |
| 8181 */ |
| 8182 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ |
| 8183 int rc = SQLITE_OK; |
| 8184 if( p ){ |
| 8185 if( nMaxDepth<0 ){ |
| 8186 rc = SQLITE_TOOBIG; |
| 8187 }else{ |
| 8188 rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); |
| 8189 if( rc==SQLITE_OK ){ |
| 8190 rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); |
| 8191 } |
| 8192 } |
| 8193 } |
| 8194 return rc; |
| 8195 } |
| 8196 |
| 8197 /* |
| 8198 ** This function attempts to transform the expression tree at (*pp) to |
| 8199 ** an equivalent but more balanced form. The tree is modified in place. |
| 8200 ** If successful, SQLITE_OK is returned and (*pp) set to point to the |
| 8201 ** new root expression node. |
| 8202 ** |
| 8203 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. |
| 8204 ** |
| 8205 ** Otherwise, if an error occurs, an SQLite error code is returned and |
| 8206 ** expression (*pp) freed. |
| 8207 */ |
| 8208 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ |
| 8209 int rc = SQLITE_OK; /* Return code */ |
| 8210 Fts3Expr *pRoot = *pp; /* Initial root node */ |
| 8211 Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ |
| 8212 int eType = pRoot->eType; /* Type of node in this tree */ |
| 8213 |
| 8214 if( nMaxDepth==0 ){ |
| 8215 rc = SQLITE_ERROR; |
| 8216 } |
| 8217 |
| 8218 if( rc==SQLITE_OK ){ |
| 8219 if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ |
| 8220 Fts3Expr **apLeaf; |
| 8221 apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); |
| 8222 if( 0==apLeaf ){ |
| 8223 rc = SQLITE_NOMEM; |
| 8224 }else{ |
| 8225 memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); |
| 8226 } |
| 8227 |
| 8228 if( rc==SQLITE_OK ){ |
| 8229 int i; |
| 8230 Fts3Expr *p; |
| 8231 |
| 8232 /* Set $p to point to the left-most leaf in the tree of eType nodes. */ |
| 8233 for(p=pRoot; p->eType==eType; p=p->pLeft){ |
| 8234 assert( p->pParent==0 || p->pParent->pLeft==p ); |
| 8235 assert( p->pLeft && p->pRight ); |
| 8236 } |
| 8237 |
| 8238 /* This loop runs once for each leaf in the tree of eType nodes. */ |
| 8239 while( 1 ){ |
| 8240 int iLvl; |
| 8241 Fts3Expr *pParent = p->pParent; /* Current parent of p */ |
| 8242 |
| 8243 assert( pParent==0 || pParent->pLeft==p ); |
| 8244 p->pParent = 0; |
| 8245 if( pParent ){ |
| 8246 pParent->pLeft = 0; |
| 8247 }else{ |
| 8248 pRoot = 0; |
| 8249 } |
| 8250 rc = fts3ExprBalance(&p, nMaxDepth-1); |
| 8251 if( rc!=SQLITE_OK ) break; |
| 8252 |
| 8253 for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ |
| 8254 if( apLeaf[iLvl]==0 ){ |
| 8255 apLeaf[iLvl] = p; |
| 8256 p = 0; |
| 8257 }else{ |
| 8258 assert( pFree ); |
| 8259 pFree->pLeft = apLeaf[iLvl]; |
| 8260 pFree->pRight = p; |
| 8261 pFree->pLeft->pParent = pFree; |
| 8262 pFree->pRight->pParent = pFree; |
| 8263 |
| 8264 p = pFree; |
| 8265 pFree = pFree->pParent; |
| 8266 p->pParent = 0; |
| 8267 apLeaf[iLvl] = 0; |
| 8268 } |
| 8269 } |
| 8270 if( p ){ |
| 8271 sqlite3Fts3ExprFree(p); |
| 8272 rc = SQLITE_TOOBIG; |
| 8273 break; |
| 8274 } |
| 8275 |
| 8276 /* If that was the last leaf node, break out of the loop */ |
| 8277 if( pParent==0 ) break; |
| 8278 |
| 8279 /* Set $p to point to the next leaf in the tree of eType nodes */ |
| 8280 for(p=pParent->pRight; p->eType==eType; p=p->pLeft); |
| 8281 |
| 8282 /* Remove pParent from the original tree. */ |
| 8283 assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); |
| 8284 pParent->pRight->pParent = pParent->pParent; |
| 8285 if( pParent->pParent ){ |
| 8286 pParent->pParent->pLeft = pParent->pRight; |
| 8287 }else{ |
| 8288 assert( pParent==pRoot ); |
| 8289 pRoot = pParent->pRight; |
| 8290 } |
| 8291 |
| 8292 /* Link pParent into the free node list. It will be used as an |
| 8293 ** internal node of the new tree. */ |
| 8294 pParent->pParent = pFree; |
| 8295 pFree = pParent; |
| 8296 } |
| 8297 |
| 8298 if( rc==SQLITE_OK ){ |
| 8299 p = 0; |
| 8300 for(i=0; i<nMaxDepth; i++){ |
| 8301 if( apLeaf[i] ){ |
| 8302 if( p==0 ){ |
| 8303 p = apLeaf[i]; |
| 8304 p->pParent = 0; |
| 8305 }else{ |
| 8306 assert( pFree!=0 ); |
| 8307 pFree->pRight = p; |
| 8308 pFree->pLeft = apLeaf[i]; |
| 8309 pFree->pLeft->pParent = pFree; |
| 8310 pFree->pRight->pParent = pFree; |
| 8311 |
| 8312 p = pFree; |
| 8313 pFree = pFree->pParent; |
| 8314 p->pParent = 0; |
| 8315 } |
| 8316 } |
| 8317 } |
| 8318 pRoot = p; |
| 8319 }else{ |
| 8320 /* An error occurred. Delete the contents of the apLeaf[] array |
| 8321 ** and pFree list. Everything else is cleaned up by the call to |
| 8322 ** sqlite3Fts3ExprFree(pRoot) below. */ |
| 8323 Fts3Expr *pDel; |
| 8324 for(i=0; i<nMaxDepth; i++){ |
| 8325 sqlite3Fts3ExprFree(apLeaf[i]); |
| 8326 } |
| 8327 while( (pDel=pFree)!=0 ){ |
| 8328 pFree = pDel->pParent; |
| 8329 sqlite3_free(pDel); |
| 8330 } |
| 8331 } |
| 8332 |
| 8333 assert( pFree==0 ); |
| 8334 sqlite3_free( apLeaf ); |
| 8335 } |
| 8336 }else if( eType==FTSQUERY_NOT ){ |
| 8337 Fts3Expr *pLeft = pRoot->pLeft; |
| 8338 Fts3Expr *pRight = pRoot->pRight; |
| 8339 |
| 8340 pRoot->pLeft = 0; |
| 8341 pRoot->pRight = 0; |
| 8342 pLeft->pParent = 0; |
| 8343 pRight->pParent = 0; |
| 8344 |
| 8345 rc = fts3ExprBalance(&pLeft, nMaxDepth-1); |
| 8346 if( rc==SQLITE_OK ){ |
| 8347 rc = fts3ExprBalance(&pRight, nMaxDepth-1); |
| 8348 } |
| 8349 |
| 8350 if( rc!=SQLITE_OK ){ |
| 8351 sqlite3Fts3ExprFree(pRight); |
| 8352 sqlite3Fts3ExprFree(pLeft); |
| 8353 }else{ |
| 8354 assert( pLeft && pRight ); |
| 8355 pRoot->pLeft = pLeft; |
| 8356 pLeft->pParent = pRoot; |
| 8357 pRoot->pRight = pRight; |
| 8358 pRight->pParent = pRoot; |
| 8359 } |
| 8360 } |
| 8361 } |
| 8362 |
| 8363 if( rc!=SQLITE_OK ){ |
| 8364 sqlite3Fts3ExprFree(pRoot); |
| 8365 pRoot = 0; |
| 8366 } |
| 8367 *pp = pRoot; |
| 8368 return rc; |
| 8369 } |
| 8370 |
| 8371 /* |
| 8372 ** This function is similar to sqlite3Fts3ExprParse(), with the following |
| 8373 ** differences: |
| 8374 ** |
| 8375 ** 1. It does not do expression rebalancing. |
| 8376 ** 2. It does not check that the expression does not exceed the |
| 8377 ** maximum allowable depth. |
| 8378 ** 3. Even if it fails, *ppExpr may still be set to point to an |
| 8379 ** expression tree. It should be deleted using sqlite3Fts3ExprFree() |
| 8380 ** in this case. |
| 8381 */ |
| 8382 static int fts3ExprParseUnbalanced( |
| 8383 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 8384 int iLangid, /* Language id for tokenizer */ |
| 8385 char **azCol, /* Array of column names for fts3 table */ |
| 8386 int bFts4, /* True to allow FTS4-only syntax */ |
| 8387 int nCol, /* Number of entries in azCol[] */ |
| 8388 int iDefaultCol, /* Default column to query */ |
| 8389 const char *z, int n, /* Text of MATCH query */ |
| 8390 Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
| 8391 ){ |
| 8392 int nParsed; |
| 8393 int rc; |
| 8394 ParseContext sParse; |
| 8395 |
| 8396 memset(&sParse, 0, sizeof(ParseContext)); |
| 8397 sParse.pTokenizer = pTokenizer; |
| 8398 sParse.iLangid = iLangid; |
| 8399 sParse.azCol = (const char **)azCol; |
| 8400 sParse.nCol = nCol; |
| 8401 sParse.iDefaultCol = iDefaultCol; |
| 8402 sParse.bFts4 = bFts4; |
| 8403 if( z==0 ){ |
| 8404 *ppExpr = 0; |
| 8405 return SQLITE_OK; |
| 8406 } |
| 8407 if( n<0 ){ |
| 8408 n = (int)strlen(z); |
| 8409 } |
| 8410 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
| 8411 assert( rc==SQLITE_OK || *ppExpr==0 ); |
| 8412 |
| 8413 /* Check for mismatched parenthesis */ |
| 8414 if( rc==SQLITE_OK && sParse.nNest ){ |
| 8415 rc = SQLITE_ERROR; |
| 8416 } |
| 8417 |
| 8418 return rc; |
| 8419 } |
| 8420 |
| 8421 /* |
| 8422 ** Parameters z and n contain a pointer to and length of a buffer containing |
| 8423 ** an fts3 query expression, respectively. This function attempts to parse the |
| 8424 ** query expression and create a tree of Fts3Expr structures representing the |
| 8425 ** parsed expression. If successful, *ppExpr is set to point to the head |
| 8426 ** of the parsed expression tree and SQLITE_OK is returned. If an error |
| 8427 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse |
| 8428 ** error) is returned and *ppExpr is set to 0. |
| 8429 ** |
| 8430 ** If parameter n is a negative number, then z is assumed to point to a |
| 8431 ** nul-terminated string and the length is determined using strlen(). |
| 8432 ** |
| 8433 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to |
| 8434 ** use to normalize query tokens while parsing the expression. The azCol[] |
| 8435 ** array, which is assumed to contain nCol entries, should contain the names |
| 8436 ** of each column in the target fts3 table, in order from left to right. |
| 8437 ** Column names must be nul-terminated strings. |
| 8438 ** |
| 8439 ** The iDefaultCol parameter should be passed the index of the table column |
| 8440 ** that appears on the left-hand-side of the MATCH operator (the default |
| 8441 ** column to match against for tokens for which a column name is not explicitly |
| 8442 ** specified as part of the query string), or -1 if tokens may by default |
| 8443 ** match any table column. |
| 8444 */ |
| 8445 SQLITE_PRIVATE int sqlite3Fts3ExprParse( |
| 8446 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 8447 int iLangid, /* Language id for tokenizer */ |
| 8448 char **azCol, /* Array of column names for fts3 table */ |
| 8449 int bFts4, /* True to allow FTS4-only syntax */ |
| 8450 int nCol, /* Number of entries in azCol[] */ |
| 8451 int iDefaultCol, /* Default column to query */ |
| 8452 const char *z, int n, /* Text of MATCH query */ |
| 8453 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 8454 char **pzErr /* OUT: Error message (sqlite3_malloc) */ |
| 8455 ){ |
| 8456 int rc = fts3ExprParseUnbalanced( |
| 8457 pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr |
| 8458 ); |
| 8459 |
| 8460 /* Rebalance the expression. And check that its depth does not exceed |
| 8461 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ |
| 8462 if( rc==SQLITE_OK && *ppExpr ){ |
| 8463 rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 8464 if( rc==SQLITE_OK ){ |
| 8465 rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 8466 } |
| 8467 } |
| 8468 |
| 8469 if( rc!=SQLITE_OK ){ |
| 8470 sqlite3Fts3ExprFree(*ppExpr); |
| 8471 *ppExpr = 0; |
| 8472 if( rc==SQLITE_TOOBIG ){ |
| 8473 sqlite3Fts3ErrMsg(pzErr, |
| 8474 "FTS expression tree is too large (maximum depth %d)", |
| 8475 SQLITE_FTS3_MAX_EXPR_DEPTH |
| 8476 ); |
| 8477 rc = SQLITE_ERROR; |
| 8478 }else if( rc==SQLITE_ERROR ){ |
| 8479 sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); |
| 8480 } |
| 8481 } |
| 8482 |
| 8483 return rc; |
| 8484 } |
| 8485 |
| 8486 /* |
| 8487 ** Free a single node of an expression tree. |
| 8488 */ |
| 8489 static void fts3FreeExprNode(Fts3Expr *p){ |
| 8490 assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); |
| 8491 sqlite3Fts3EvalPhraseCleanup(p->pPhrase); |
| 8492 sqlite3_free(p->aMI); |
| 8493 sqlite3_free(p); |
| 8494 } |
| 8495 |
| 8496 /* |
| 8497 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
| 8498 ** |
| 8499 ** This function would be simpler if it recursively called itself. But |
| 8500 ** that would mean passing a sufficiently large expression to ExprParse() |
| 8501 ** could cause a stack overflow. |
| 8502 */ |
| 8503 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ |
| 8504 Fts3Expr *p; |
| 8505 assert( pDel==0 || pDel->pParent==0 ); |
| 8506 for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ |
| 8507 assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 8508 } |
| 8509 while( p ){ |
| 8510 Fts3Expr *pParent = p->pParent; |
| 8511 fts3FreeExprNode(p); |
| 8512 if( pParent && p==pParent->pLeft && pParent->pRight ){ |
| 8513 p = pParent->pRight; |
| 8514 while( p && (p->pLeft || p->pRight) ){ |
| 8515 assert( p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 8516 p = (p->pLeft ? p->pLeft : p->pRight); |
| 8517 } |
| 8518 }else{ |
| 8519 p = pParent; |
| 8520 } |
| 8521 } |
| 8522 } |
| 8523 |
| 8524 /**************************************************************************** |
| 8525 ***************************************************************************** |
| 8526 ** Everything after this point is just test code. |
| 8527 */ |
| 8528 |
| 8529 #ifdef SQLITE_TEST |
| 8530 |
| 8531 /* #include <stdio.h> */ |
| 8532 |
| 8533 /* |
| 8534 ** Function to query the hash-table of tokenizers (see README.tokenizers). |
| 8535 */ |
| 8536 static int queryTestTokenizer( |
| 8537 sqlite3 *db, |
| 8538 const char *zName, |
| 8539 const sqlite3_tokenizer_module **pp |
| 8540 ){ |
| 8541 int rc; |
| 8542 sqlite3_stmt *pStmt; |
| 8543 const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| 8544 |
| 8545 *pp = 0; |
| 8546 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 8547 if( rc!=SQLITE_OK ){ |
| 8548 return rc; |
| 8549 } |
| 8550 |
| 8551 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 8552 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 8553 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| 8554 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| 8555 } |
| 8556 } |
| 8557 |
| 8558 return sqlite3_finalize(pStmt); |
| 8559 } |
| 8560 |
| 8561 /* |
| 8562 ** Return a pointer to a buffer containing a text representation of the |
| 8563 ** expression passed as the first argument. The buffer is obtained from |
| 8564 ** sqlite3_malloc(). It is the responsibility of the caller to use |
| 8565 ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
| 8566 ** NULL is returned. |
| 8567 ** |
| 8568 ** If the second argument is not NULL, then its contents are prepended to |
| 8569 ** the returned expression text and then freed using sqlite3_free(). |
| 8570 */ |
| 8571 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
| 8572 if( pExpr==0 ){ |
| 8573 return sqlite3_mprintf(""); |
| 8574 } |
| 8575 switch( pExpr->eType ){ |
| 8576 case FTSQUERY_PHRASE: { |
| 8577 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 8578 int i; |
| 8579 zBuf = sqlite3_mprintf( |
| 8580 "%zPHRASE %d 0", zBuf, pPhrase->iColumn); |
| 8581 for(i=0; zBuf && i<pPhrase->nToken; i++){ |
| 8582 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
| 8583 pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
| 8584 (pPhrase->aToken[i].isPrefix?"+":"") |
| 8585 ); |
| 8586 } |
| 8587 return zBuf; |
| 8588 } |
| 8589 |
| 8590 case FTSQUERY_NEAR: |
| 8591 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); |
| 8592 break; |
| 8593 case FTSQUERY_NOT: |
| 8594 zBuf = sqlite3_mprintf("%zNOT ", zBuf); |
| 8595 break; |
| 8596 case FTSQUERY_AND: |
| 8597 zBuf = sqlite3_mprintf("%zAND ", zBuf); |
| 8598 break; |
| 8599 case FTSQUERY_OR: |
| 8600 zBuf = sqlite3_mprintf("%zOR ", zBuf); |
| 8601 break; |
| 8602 } |
| 8603 |
| 8604 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); |
| 8605 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); |
| 8606 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); |
| 8607 |
| 8608 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); |
| 8609 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); |
| 8610 |
| 8611 return zBuf; |
| 8612 } |
| 8613 |
| 8614 /* |
| 8615 ** This is the implementation of a scalar SQL function used to test the |
| 8616 ** expression parser. It should be called as follows: |
| 8617 ** |
| 8618 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); |
| 8619 ** |
| 8620 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used |
| 8621 ** to parse the query expression (see README.tokenizers). The second argument |
| 8622 ** is the query expression to parse. Each subsequent argument is the name |
| 8623 ** of a column of the fts3 table that the query expression may refer to. |
| 8624 ** For example: |
| 8625 ** |
| 8626 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); |
| 8627 */ |
| 8628 static void fts3ExprTest( |
| 8629 sqlite3_context *context, |
| 8630 int argc, |
| 8631 sqlite3_value **argv |
| 8632 ){ |
| 8633 sqlite3_tokenizer_module const *pModule = 0; |
| 8634 sqlite3_tokenizer *pTokenizer = 0; |
| 8635 int rc; |
| 8636 char **azCol = 0; |
| 8637 const char *zExpr; |
| 8638 int nExpr; |
| 8639 int nCol; |
| 8640 int ii; |
| 8641 Fts3Expr *pExpr; |
| 8642 char *zBuf = 0; |
| 8643 sqlite3 *db = sqlite3_context_db_handle(context); |
| 8644 |
| 8645 if( argc<3 ){ |
| 8646 sqlite3_result_error(context, |
| 8647 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 |
| 8648 ); |
| 8649 return; |
| 8650 } |
| 8651 |
| 8652 rc = queryTestTokenizer(db, |
| 8653 (const char *)sqlite3_value_text(argv[0]), &pModule); |
| 8654 if( rc==SQLITE_NOMEM ){ |
| 8655 sqlite3_result_error_nomem(context); |
| 8656 goto exprtest_out; |
| 8657 }else if( !pModule ){ |
| 8658 sqlite3_result_error(context, "No such tokenizer module", -1); |
| 8659 goto exprtest_out; |
| 8660 } |
| 8661 |
| 8662 rc = pModule->xCreate(0, 0, &pTokenizer); |
| 8663 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); |
| 8664 if( rc==SQLITE_NOMEM ){ |
| 8665 sqlite3_result_error_nomem(context); |
| 8666 goto exprtest_out; |
| 8667 } |
| 8668 pTokenizer->pModule = pModule; |
| 8669 |
| 8670 zExpr = (const char *)sqlite3_value_text(argv[1]); |
| 8671 nExpr = sqlite3_value_bytes(argv[1]); |
| 8672 nCol = argc-2; |
| 8673 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); |
| 8674 if( !azCol ){ |
| 8675 sqlite3_result_error_nomem(context); |
| 8676 goto exprtest_out; |
| 8677 } |
| 8678 for(ii=0; ii<nCol; ii++){ |
| 8679 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
| 8680 } |
| 8681 |
| 8682 if( sqlite3_user_data(context) ){ |
| 8683 char *zDummy = 0; |
| 8684 rc = sqlite3Fts3ExprParse( |
| 8685 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy |
| 8686 ); |
| 8687 assert( rc==SQLITE_OK || pExpr==0 ); |
| 8688 sqlite3_free(zDummy); |
| 8689 }else{ |
| 8690 rc = fts3ExprParseUnbalanced( |
| 8691 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr |
| 8692 ); |
| 8693 } |
| 8694 |
| 8695 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| 8696 sqlite3Fts3ExprFree(pExpr); |
| 8697 sqlite3_result_error(context, "Error parsing expression", -1); |
| 8698 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
| 8699 sqlite3_result_error_nomem(context); |
| 8700 }else{ |
| 8701 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
| 8702 sqlite3_free(zBuf); |
| 8703 } |
| 8704 |
| 8705 sqlite3Fts3ExprFree(pExpr); |
| 8706 |
| 8707 exprtest_out: |
| 8708 if( pModule && pTokenizer ){ |
| 8709 rc = pModule->xDestroy(pTokenizer); |
| 8710 } |
| 8711 sqlite3_free(azCol); |
| 8712 } |
| 8713 |
| 8714 /* |
| 8715 ** Register the query expression parser test function fts3_exprtest() |
| 8716 ** with database connection db. |
| 8717 */ |
| 8718 SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
| 8719 int rc = sqlite3_create_function( |
| 8720 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
| 8721 ); |
| 8722 if( rc==SQLITE_OK ){ |
| 8723 rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", |
| 8724 -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 |
| 8725 ); |
| 8726 } |
| 8727 return rc; |
| 8728 } |
| 8729 |
| 8730 #endif |
| 8731 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 8732 |
| 8733 /************** End of fts3_expr.c *******************************************/ |
| 8734 /************** Begin file fts3_hash.c ***************************************/ |
| 8735 /* |
| 8736 ** 2001 September 22 |
| 8737 ** |
| 8738 ** The author disclaims copyright to this source code. In place of |
| 8739 ** a legal notice, here is a blessing: |
| 8740 ** |
| 8741 ** May you do good and not evil. |
| 8742 ** May you find forgiveness for yourself and forgive others. |
| 8743 ** May you share freely, never taking more than you give. |
| 8744 ** |
| 8745 ************************************************************************* |
| 8746 ** This is the implementation of generic hash-tables used in SQLite. |
| 8747 ** We've modified it slightly to serve as a standalone hash table |
| 8748 ** implementation for the full-text indexing module. |
| 8749 */ |
| 8750 |
| 8751 /* |
| 8752 ** The code in this file is only compiled if: |
| 8753 ** |
| 8754 ** * The FTS3 module is being built as an extension |
| 8755 ** (in which case SQLITE_CORE is not defined), or |
| 8756 ** |
| 8757 ** * The FTS3 module is being built into the core of |
| 8758 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 8759 */ |
| 8760 /* #include "fts3Int.h" */ |
| 8761 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 8762 |
| 8763 /* #include <assert.h> */ |
| 8764 /* #include <stdlib.h> */ |
| 8765 /* #include <string.h> */ |
| 8766 |
| 8767 /* #include "fts3_hash.h" */ |
| 8768 |
| 8769 /* |
| 8770 ** Malloc and Free functions |
| 8771 */ |
| 8772 static void *fts3HashMalloc(int n){ |
| 8773 void *p = sqlite3_malloc(n); |
| 8774 if( p ){ |
| 8775 memset(p, 0, n); |
| 8776 } |
| 8777 return p; |
| 8778 } |
| 8779 static void fts3HashFree(void *p){ |
| 8780 sqlite3_free(p); |
| 8781 } |
| 8782 |
| 8783 /* Turn bulk memory into a hash table object by initializing the |
| 8784 ** fields of the Hash structure. |
| 8785 ** |
| 8786 ** "pNew" is a pointer to the hash table that is to be initialized. |
| 8787 ** keyClass is one of the constants |
| 8788 ** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass |
| 8789 ** determines what kind of key the hash table will use. "copyKey" is |
| 8790 ** true if the hash table should make its own private copy of keys and |
| 8791 ** false if it should just use the supplied pointer. |
| 8792 */ |
| 8793 SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copy
Key){ |
| 8794 assert( pNew!=0 ); |
| 8795 assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); |
| 8796 pNew->keyClass = keyClass; |
| 8797 pNew->copyKey = copyKey; |
| 8798 pNew->first = 0; |
| 8799 pNew->count = 0; |
| 8800 pNew->htsize = 0; |
| 8801 pNew->ht = 0; |
| 8802 } |
| 8803 |
| 8804 /* Remove all entries from a hash table. Reclaim all memory. |
| 8805 ** Call this routine to delete a hash table or to reset a hash table |
| 8806 ** to the empty state. |
| 8807 */ |
| 8808 SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ |
| 8809 Fts3HashElem *elem; /* For looping over all elements of the table */ |
| 8810 |
| 8811 assert( pH!=0 ); |
| 8812 elem = pH->first; |
| 8813 pH->first = 0; |
| 8814 fts3HashFree(pH->ht); |
| 8815 pH->ht = 0; |
| 8816 pH->htsize = 0; |
| 8817 while( elem ){ |
| 8818 Fts3HashElem *next_elem = elem->next; |
| 8819 if( pH->copyKey && elem->pKey ){ |
| 8820 fts3HashFree(elem->pKey); |
| 8821 } |
| 8822 fts3HashFree(elem); |
| 8823 elem = next_elem; |
| 8824 } |
| 8825 pH->count = 0; |
| 8826 } |
| 8827 |
| 8828 /* |
| 8829 ** Hash and comparison functions when the mode is FTS3_HASH_STRING |
| 8830 */ |
| 8831 static int fts3StrHash(const void *pKey, int nKey){ |
| 8832 const char *z = (const char *)pKey; |
| 8833 unsigned h = 0; |
| 8834 if( nKey<=0 ) nKey = (int) strlen(z); |
| 8835 while( nKey > 0 ){ |
| 8836 h = (h<<3) ^ h ^ *z++; |
| 8837 nKey--; |
| 8838 } |
| 8839 return (int)(h & 0x7fffffff); |
| 8840 } |
| 8841 static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ |
| 8842 if( n1!=n2 ) return 1; |
| 8843 return strncmp((const char*)pKey1,(const char*)pKey2,n1); |
| 8844 } |
| 8845 |
| 8846 /* |
| 8847 ** Hash and comparison functions when the mode is FTS3_HASH_BINARY |
| 8848 */ |
| 8849 static int fts3BinHash(const void *pKey, int nKey){ |
| 8850 int h = 0; |
| 8851 const char *z = (const char *)pKey; |
| 8852 while( nKey-- > 0 ){ |
| 8853 h = (h<<3) ^ h ^ *(z++); |
| 8854 } |
| 8855 return h & 0x7fffffff; |
| 8856 } |
| 8857 static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ |
| 8858 if( n1!=n2 ) return 1; |
| 8859 return memcmp(pKey1,pKey2,n1); |
| 8860 } |
| 8861 |
| 8862 /* |
| 8863 ** Return a pointer to the appropriate hash function given the key class. |
| 8864 ** |
| 8865 ** The C syntax in this function definition may be unfamilar to some |
| 8866 ** programmers, so we provide the following additional explanation: |
| 8867 ** |
| 8868 ** The name of the function is "ftsHashFunction". The function takes a |
| 8869 ** single parameter "keyClass". The return value of ftsHashFunction() |
| 8870 ** is a pointer to another function. Specifically, the return value |
| 8871 ** of ftsHashFunction() is a pointer to a function that takes two parameters |
| 8872 ** with types "const void*" and "int" and returns an "int". |
| 8873 */ |
| 8874 static int (*ftsHashFunction(int keyClass))(const void*,int){ |
| 8875 if( keyClass==FTS3_HASH_STRING ){ |
| 8876 return &fts3StrHash; |
| 8877 }else{ |
| 8878 assert( keyClass==FTS3_HASH_BINARY ); |
| 8879 return &fts3BinHash; |
| 8880 } |
| 8881 } |
| 8882 |
| 8883 /* |
| 8884 ** Return a pointer to the appropriate hash function given the key class. |
| 8885 ** |
| 8886 ** For help in interpreted the obscure C code in the function definition, |
| 8887 ** see the header comment on the previous function. |
| 8888 */ |
| 8889 static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ |
| 8890 if( keyClass==FTS3_HASH_STRING ){ |
| 8891 return &fts3StrCompare; |
| 8892 }else{ |
| 8893 assert( keyClass==FTS3_HASH_BINARY ); |
| 8894 return &fts3BinCompare; |
| 8895 } |
| 8896 } |
| 8897 |
| 8898 /* Link an element into the hash table |
| 8899 */ |
| 8900 static void fts3HashInsertElement( |
| 8901 Fts3Hash *pH, /* The complete hash table */ |
| 8902 struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ |
| 8903 Fts3HashElem *pNew /* The element to be inserted */ |
| 8904 ){ |
| 8905 Fts3HashElem *pHead; /* First element already in pEntry */ |
| 8906 pHead = pEntry->chain; |
| 8907 if( pHead ){ |
| 8908 pNew->next = pHead; |
| 8909 pNew->prev = pHead->prev; |
| 8910 if( pHead->prev ){ pHead->prev->next = pNew; } |
| 8911 else { pH->first = pNew; } |
| 8912 pHead->prev = pNew; |
| 8913 }else{ |
| 8914 pNew->next = pH->first; |
| 8915 if( pH->first ){ pH->first->prev = pNew; } |
| 8916 pNew->prev = 0; |
| 8917 pH->first = pNew; |
| 8918 } |
| 8919 pEntry->count++; |
| 8920 pEntry->chain = pNew; |
| 8921 } |
| 8922 |
| 8923 |
| 8924 /* Resize the hash table so that it cantains "new_size" buckets. |
| 8925 ** "new_size" must be a power of 2. The hash table might fail |
| 8926 ** to resize if sqliteMalloc() fails. |
| 8927 ** |
| 8928 ** Return non-zero if a memory allocation error occurs. |
| 8929 */ |
| 8930 static int fts3Rehash(Fts3Hash *pH, int new_size){ |
| 8931 struct _fts3ht *new_ht; /* The new hash table */ |
| 8932 Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ |
| 8933 int (*xHash)(const void*,int); /* The hash function */ |
| 8934 |
| 8935 assert( (new_size & (new_size-1))==0 ); |
| 8936 new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); |
| 8937 if( new_ht==0 ) return 1; |
| 8938 fts3HashFree(pH->ht); |
| 8939 pH->ht = new_ht; |
| 8940 pH->htsize = new_size; |
| 8941 xHash = ftsHashFunction(pH->keyClass); |
| 8942 for(elem=pH->first, pH->first=0; elem; elem = next_elem){ |
| 8943 int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); |
| 8944 next_elem = elem->next; |
| 8945 fts3HashInsertElement(pH, &new_ht[h], elem); |
| 8946 } |
| 8947 return 0; |
| 8948 } |
| 8949 |
| 8950 /* This function (for internal use only) locates an element in an |
| 8951 ** hash table that matches the given key. The hash for this key has |
| 8952 ** already been computed and is passed as the 4th parameter. |
| 8953 */ |
| 8954 static Fts3HashElem *fts3FindElementByHash( |
| 8955 const Fts3Hash *pH, /* The pH to be searched */ |
| 8956 const void *pKey, /* The key we are searching for */ |
| 8957 int nKey, |
| 8958 int h /* The hash for this key. */ |
| 8959 ){ |
| 8960 Fts3HashElem *elem; /* Used to loop thru the element list */ |
| 8961 int count; /* Number of elements left to test */ |
| 8962 int (*xCompare)(const void*,int,const void*,int); /* comparison function */ |
| 8963 |
| 8964 if( pH->ht ){ |
| 8965 struct _fts3ht *pEntry = &pH->ht[h]; |
| 8966 elem = pEntry->chain; |
| 8967 count = pEntry->count; |
| 8968 xCompare = ftsCompareFunction(pH->keyClass); |
| 8969 while( count-- && elem ){ |
| 8970 if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ |
| 8971 return elem; |
| 8972 } |
| 8973 elem = elem->next; |
| 8974 } |
| 8975 } |
| 8976 return 0; |
| 8977 } |
| 8978 |
| 8979 /* Remove a single entry from the hash table given a pointer to that |
| 8980 ** element and a hash on the element's key. |
| 8981 */ |
| 8982 static void fts3RemoveElementByHash( |
| 8983 Fts3Hash *pH, /* The pH containing "elem" */ |
| 8984 Fts3HashElem* elem, /* The element to be removed from the pH */ |
| 8985 int h /* Hash value for the element */ |
| 8986 ){ |
| 8987 struct _fts3ht *pEntry; |
| 8988 if( elem->prev ){ |
| 8989 elem->prev->next = elem->next; |
| 8990 }else{ |
| 8991 pH->first = elem->next; |
| 8992 } |
| 8993 if( elem->next ){ |
| 8994 elem->next->prev = elem->prev; |
| 8995 } |
| 8996 pEntry = &pH->ht[h]; |
| 8997 if( pEntry->chain==elem ){ |
| 8998 pEntry->chain = elem->next; |
| 8999 } |
| 9000 pEntry->count--; |
| 9001 if( pEntry->count<=0 ){ |
| 9002 pEntry->chain = 0; |
| 9003 } |
| 9004 if( pH->copyKey && elem->pKey ){ |
| 9005 fts3HashFree(elem->pKey); |
| 9006 } |
| 9007 fts3HashFree( elem ); |
| 9008 pH->count--; |
| 9009 if( pH->count<=0 ){ |
| 9010 assert( pH->first==0 ); |
| 9011 assert( pH->count==0 ); |
| 9012 fts3HashClear(pH); |
| 9013 } |
| 9014 } |
| 9015 |
| 9016 SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( |
| 9017 const Fts3Hash *pH, |
| 9018 const void *pKey, |
| 9019 int nKey |
| 9020 ){ |
| 9021 int h; /* A hash on key */ |
| 9022 int (*xHash)(const void*,int); /* The hash function */ |
| 9023 |
| 9024 if( pH==0 || pH->ht==0 ) return 0; |
| 9025 xHash = ftsHashFunction(pH->keyClass); |
| 9026 assert( xHash!=0 ); |
| 9027 h = (*xHash)(pKey,nKey); |
| 9028 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 9029 return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); |
| 9030 } |
| 9031 |
| 9032 /* |
| 9033 ** Attempt to locate an element of the hash table pH with a key |
| 9034 ** that matches pKey,nKey. Return the data for this element if it is |
| 9035 ** found, or NULL if there is no match. |
| 9036 */ |
| 9037 SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, i
nt nKey){ |
| 9038 Fts3HashElem *pElem; /* The element that matches key (if any) */ |
| 9039 |
| 9040 pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); |
| 9041 return pElem ? pElem->data : 0; |
| 9042 } |
| 9043 |
| 9044 /* Insert an element into the hash table pH. The key is pKey,nKey |
| 9045 ** and the data is "data". |
| 9046 ** |
| 9047 ** If no element exists with a matching key, then a new |
| 9048 ** element is created. A copy of the key is made if the copyKey |
| 9049 ** flag is set. NULL is returned. |
| 9050 ** |
| 9051 ** If another element already exists with the same key, then the |
| 9052 ** new data replaces the old data and the old data is returned. |
| 9053 ** The key is not copied in this instance. If a malloc fails, then |
| 9054 ** the new data is returned and the hash table is unchanged. |
| 9055 ** |
| 9056 ** If the "data" parameter to this function is NULL, then the |
| 9057 ** element corresponding to "key" is removed from the hash table. |
| 9058 */ |
| 9059 SQLITE_PRIVATE void *sqlite3Fts3HashInsert( |
| 9060 Fts3Hash *pH, /* The hash table to insert into */ |
| 9061 const void *pKey, /* The key */ |
| 9062 int nKey, /* Number of bytes in the key */ |
| 9063 void *data /* The data */ |
| 9064 ){ |
| 9065 int hraw; /* Raw hash value of the key */ |
| 9066 int h; /* the hash of the key modulo hash table size */ |
| 9067 Fts3HashElem *elem; /* Used to loop thru the element list */ |
| 9068 Fts3HashElem *new_elem; /* New element added to the pH */ |
| 9069 int (*xHash)(const void*,int); /* The hash function */ |
| 9070 |
| 9071 assert( pH!=0 ); |
| 9072 xHash = ftsHashFunction(pH->keyClass); |
| 9073 assert( xHash!=0 ); |
| 9074 hraw = (*xHash)(pKey, nKey); |
| 9075 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 9076 h = hraw & (pH->htsize-1); |
| 9077 elem = fts3FindElementByHash(pH,pKey,nKey,h); |
| 9078 if( elem ){ |
| 9079 void *old_data = elem->data; |
| 9080 if( data==0 ){ |
| 9081 fts3RemoveElementByHash(pH,elem,h); |
| 9082 }else{ |
| 9083 elem->data = data; |
| 9084 } |
| 9085 return old_data; |
| 9086 } |
| 9087 if( data==0 ) return 0; |
| 9088 if( (pH->htsize==0 && fts3Rehash(pH,8)) |
| 9089 || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) |
| 9090 ){ |
| 9091 pH->count = 0; |
| 9092 return data; |
| 9093 } |
| 9094 assert( pH->htsize>0 ); |
| 9095 new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); |
| 9096 if( new_elem==0 ) return data; |
| 9097 if( pH->copyKey && pKey!=0 ){ |
| 9098 new_elem->pKey = fts3HashMalloc( nKey ); |
| 9099 if( new_elem->pKey==0 ){ |
| 9100 fts3HashFree(new_elem); |
| 9101 return data; |
| 9102 } |
| 9103 memcpy((void*)new_elem->pKey, pKey, nKey); |
| 9104 }else{ |
| 9105 new_elem->pKey = (void*)pKey; |
| 9106 } |
| 9107 new_elem->nKey = nKey; |
| 9108 pH->count++; |
| 9109 assert( pH->htsize>0 ); |
| 9110 assert( (pH->htsize & (pH->htsize-1))==0 ); |
| 9111 h = hraw & (pH->htsize-1); |
| 9112 fts3HashInsertElement(pH, &pH->ht[h], new_elem); |
| 9113 new_elem->data = data; |
| 9114 return 0; |
| 9115 } |
| 9116 |
| 9117 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 9118 |
| 9119 /************** End of fts3_hash.c *******************************************/ |
| 9120 /************** Begin file fts3_porter.c *************************************/ |
| 9121 /* |
| 9122 ** 2006 September 30 |
| 9123 ** |
| 9124 ** The author disclaims copyright to this source code. In place of |
| 9125 ** a legal notice, here is a blessing: |
| 9126 ** |
| 9127 ** May you do good and not evil. |
| 9128 ** May you find forgiveness for yourself and forgive others. |
| 9129 ** May you share freely, never taking more than you give. |
| 9130 ** |
| 9131 ************************************************************************* |
| 9132 ** Implementation of the full-text-search tokenizer that implements |
| 9133 ** a Porter stemmer. |
| 9134 */ |
| 9135 |
| 9136 /* |
| 9137 ** The code in this file is only compiled if: |
| 9138 ** |
| 9139 ** * The FTS3 module is being built as an extension |
| 9140 ** (in which case SQLITE_CORE is not defined), or |
| 9141 ** |
| 9142 ** * The FTS3 module is being built into the core of |
| 9143 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 9144 */ |
| 9145 /* #include "fts3Int.h" */ |
| 9146 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 9147 |
| 9148 /* #include <assert.h> */ |
| 9149 /* #include <stdlib.h> */ |
| 9150 /* #include <stdio.h> */ |
| 9151 /* #include <string.h> */ |
| 9152 |
| 9153 /* #include "fts3_tokenizer.h" */ |
| 9154 |
| 9155 /* |
| 9156 ** Class derived from sqlite3_tokenizer |
| 9157 */ |
| 9158 typedef struct porter_tokenizer { |
| 9159 sqlite3_tokenizer base; /* Base class */ |
| 9160 } porter_tokenizer; |
| 9161 |
| 9162 /* |
| 9163 ** Class derived from sqlite3_tokenizer_cursor |
| 9164 */ |
| 9165 typedef struct porter_tokenizer_cursor { |
| 9166 sqlite3_tokenizer_cursor base; |
| 9167 const char *zInput; /* input we are tokenizing */ |
| 9168 int nInput; /* size of the input */ |
| 9169 int iOffset; /* current position in zInput */ |
| 9170 int iToken; /* index of next token to be returned */ |
| 9171 char *zToken; /* storage for current token */ |
| 9172 int nAllocated; /* space allocated to zToken buffer */ |
| 9173 } porter_tokenizer_cursor; |
| 9174 |
| 9175 |
| 9176 /* |
| 9177 ** Create a new tokenizer instance. |
| 9178 */ |
| 9179 static int porterCreate( |
| 9180 int argc, const char * const *argv, |
| 9181 sqlite3_tokenizer **ppTokenizer |
| 9182 ){ |
| 9183 porter_tokenizer *t; |
| 9184 |
| 9185 UNUSED_PARAMETER(argc); |
| 9186 UNUSED_PARAMETER(argv); |
| 9187 |
| 9188 t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); |
| 9189 if( t==NULL ) return SQLITE_NOMEM; |
| 9190 memset(t, 0, sizeof(*t)); |
| 9191 *ppTokenizer = &t->base; |
| 9192 return SQLITE_OK; |
| 9193 } |
| 9194 |
| 9195 /* |
| 9196 ** Destroy a tokenizer |
| 9197 */ |
| 9198 static int porterDestroy(sqlite3_tokenizer *pTokenizer){ |
| 9199 sqlite3_free(pTokenizer); |
| 9200 return SQLITE_OK; |
| 9201 } |
| 9202 |
| 9203 /* |
| 9204 ** Prepare to begin tokenizing a particular string. The input |
| 9205 ** string to be tokenized is zInput[0..nInput-1]. A cursor |
| 9206 ** used to incrementally tokenize this string is returned in |
| 9207 ** *ppCursor. |
| 9208 */ |
| 9209 static int porterOpen( |
| 9210 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 9211 const char *zInput, int nInput, /* String to be tokenized */ |
| 9212 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 9213 ){ |
| 9214 porter_tokenizer_cursor *c; |
| 9215 |
| 9216 UNUSED_PARAMETER(pTokenizer); |
| 9217 |
| 9218 c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); |
| 9219 if( c==NULL ) return SQLITE_NOMEM; |
| 9220 |
| 9221 c->zInput = zInput; |
| 9222 if( zInput==0 ){ |
| 9223 c->nInput = 0; |
| 9224 }else if( nInput<0 ){ |
| 9225 c->nInput = (int)strlen(zInput); |
| 9226 }else{ |
| 9227 c->nInput = nInput; |
| 9228 } |
| 9229 c->iOffset = 0; /* start tokenizing at the beginning */ |
| 9230 c->iToken = 0; |
| 9231 c->zToken = NULL; /* no space allocated, yet. */ |
| 9232 c->nAllocated = 0; |
| 9233 |
| 9234 *ppCursor = &c->base; |
| 9235 return SQLITE_OK; |
| 9236 } |
| 9237 |
| 9238 /* |
| 9239 ** Close a tokenization cursor previously opened by a call to |
| 9240 ** porterOpen() above. |
| 9241 */ |
| 9242 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ |
| 9243 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
| 9244 sqlite3_free(c->zToken); |
| 9245 sqlite3_free(c); |
| 9246 return SQLITE_OK; |
| 9247 } |
| 9248 /* |
| 9249 ** Vowel or consonant |
| 9250 */ |
| 9251 static const char cType[] = { |
| 9252 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, |
| 9253 1, 1, 1, 2, 1 |
| 9254 }; |
| 9255 |
| 9256 /* |
| 9257 ** isConsonant() and isVowel() determine if their first character in |
| 9258 ** the string they point to is a consonant or a vowel, according |
| 9259 ** to Porter ruls. |
| 9260 ** |
| 9261 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. |
| 9262 ** 'Y' is a consonant unless it follows another consonant, |
| 9263 ** in which case it is a vowel. |
| 9264 ** |
| 9265 ** In these routine, the letters are in reverse order. So the 'y' rule |
| 9266 ** is that 'y' is a consonant unless it is followed by another |
| 9267 ** consonent. |
| 9268 */ |
| 9269 static int isVowel(const char*); |
| 9270 static int isConsonant(const char *z){ |
| 9271 int j; |
| 9272 char x = *z; |
| 9273 if( x==0 ) return 0; |
| 9274 assert( x>='a' && x<='z' ); |
| 9275 j = cType[x-'a']; |
| 9276 if( j<2 ) return j; |
| 9277 return z[1]==0 || isVowel(z + 1); |
| 9278 } |
| 9279 static int isVowel(const char *z){ |
| 9280 int j; |
| 9281 char x = *z; |
| 9282 if( x==0 ) return 0; |
| 9283 assert( x>='a' && x<='z' ); |
| 9284 j = cType[x-'a']; |
| 9285 if( j<2 ) return 1-j; |
| 9286 return isConsonant(z + 1); |
| 9287 } |
| 9288 |
| 9289 /* |
| 9290 ** Let any sequence of one or more vowels be represented by V and let |
| 9291 ** C be sequence of one or more consonants. Then every word can be |
| 9292 ** represented as: |
| 9293 ** |
| 9294 ** [C] (VC){m} [V] |
| 9295 ** |
| 9296 ** In prose: A word is an optional consonant followed by zero or |
| 9297 ** vowel-consonant pairs followed by an optional vowel. "m" is the |
| 9298 ** number of vowel consonant pairs. This routine computes the value |
| 9299 ** of m for the first i bytes of a word. |
| 9300 ** |
| 9301 ** Return true if the m-value for z is 1 or more. In other words, |
| 9302 ** return true if z contains at least one vowel that is followed |
| 9303 ** by a consonant. |
| 9304 ** |
| 9305 ** In this routine z[] is in reverse order. So we are really looking |
| 9306 ** for an instance of a consonant followed by a vowel. |
| 9307 */ |
| 9308 static int m_gt_0(const char *z){ |
| 9309 while( isVowel(z) ){ z++; } |
| 9310 if( *z==0 ) return 0; |
| 9311 while( isConsonant(z) ){ z++; } |
| 9312 return *z!=0; |
| 9313 } |
| 9314 |
| 9315 /* Like mgt0 above except we are looking for a value of m which is |
| 9316 ** exactly 1 |
| 9317 */ |
| 9318 static int m_eq_1(const char *z){ |
| 9319 while( isVowel(z) ){ z++; } |
| 9320 if( *z==0 ) return 0; |
| 9321 while( isConsonant(z) ){ z++; } |
| 9322 if( *z==0 ) return 0; |
| 9323 while( isVowel(z) ){ z++; } |
| 9324 if( *z==0 ) return 1; |
| 9325 while( isConsonant(z) ){ z++; } |
| 9326 return *z==0; |
| 9327 } |
| 9328 |
| 9329 /* Like mgt0 above except we are looking for a value of m>1 instead |
| 9330 ** or m>0 |
| 9331 */ |
| 9332 static int m_gt_1(const char *z){ |
| 9333 while( isVowel(z) ){ z++; } |
| 9334 if( *z==0 ) return 0; |
| 9335 while( isConsonant(z) ){ z++; } |
| 9336 if( *z==0 ) return 0; |
| 9337 while( isVowel(z) ){ z++; } |
| 9338 if( *z==0 ) return 0; |
| 9339 while( isConsonant(z) ){ z++; } |
| 9340 return *z!=0; |
| 9341 } |
| 9342 |
| 9343 /* |
| 9344 ** Return TRUE if there is a vowel anywhere within z[0..n-1] |
| 9345 */ |
| 9346 static int hasVowel(const char *z){ |
| 9347 while( isConsonant(z) ){ z++; } |
| 9348 return *z!=0; |
| 9349 } |
| 9350 |
| 9351 /* |
| 9352 ** Return TRUE if the word ends in a double consonant. |
| 9353 ** |
| 9354 ** The text is reversed here. So we are really looking at |
| 9355 ** the first two characters of z[]. |
| 9356 */ |
| 9357 static int doubleConsonant(const char *z){ |
| 9358 return isConsonant(z) && z[0]==z[1]; |
| 9359 } |
| 9360 |
| 9361 /* |
| 9362 ** Return TRUE if the word ends with three letters which |
| 9363 ** are consonant-vowel-consonent and where the final consonant |
| 9364 ** is not 'w', 'x', or 'y'. |
| 9365 ** |
| 9366 ** The word is reversed here. So we are really checking the |
| 9367 ** first three letters and the first one cannot be in [wxy]. |
| 9368 */ |
| 9369 static int star_oh(const char *z){ |
| 9370 return |
| 9371 isConsonant(z) && |
| 9372 z[0]!='w' && z[0]!='x' && z[0]!='y' && |
| 9373 isVowel(z+1) && |
| 9374 isConsonant(z+2); |
| 9375 } |
| 9376 |
| 9377 /* |
| 9378 ** If the word ends with zFrom and xCond() is true for the stem |
| 9379 ** of the word that preceeds the zFrom ending, then change the |
| 9380 ** ending to zTo. |
| 9381 ** |
| 9382 ** The input word *pz and zFrom are both in reverse order. zTo |
| 9383 ** is in normal order. |
| 9384 ** |
| 9385 ** Return TRUE if zFrom matches. Return FALSE if zFrom does not |
| 9386 ** match. Not that TRUE is returned even if xCond() fails and |
| 9387 ** no substitution occurs. |
| 9388 */ |
| 9389 static int stem( |
| 9390 char **pz, /* The word being stemmed (Reversed) */ |
| 9391 const char *zFrom, /* If the ending matches this... (Reversed) */ |
| 9392 const char *zTo, /* ... change the ending to this (not reversed) */ |
| 9393 int (*xCond)(const char*) /* Condition that must be true */ |
| 9394 ){ |
| 9395 char *z = *pz; |
| 9396 while( *zFrom && *zFrom==*z ){ z++; zFrom++; } |
| 9397 if( *zFrom!=0 ) return 0; |
| 9398 if( xCond && !xCond(z) ) return 1; |
| 9399 while( *zTo ){ |
| 9400 *(--z) = *(zTo++); |
| 9401 } |
| 9402 *pz = z; |
| 9403 return 1; |
| 9404 } |
| 9405 |
| 9406 /* |
| 9407 ** This is the fallback stemmer used when the porter stemmer is |
| 9408 ** inappropriate. The input word is copied into the output with |
| 9409 ** US-ASCII case folding. If the input word is too long (more |
| 9410 ** than 20 bytes if it contains no digits or more than 6 bytes if |
| 9411 ** it contains digits) then word is truncated to 20 or 6 bytes |
| 9412 ** by taking 10 or 3 bytes from the beginning and end. |
| 9413 */ |
| 9414 static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ |
| 9415 int i, mx, j; |
| 9416 int hasDigit = 0; |
| 9417 for(i=0; i<nIn; i++){ |
| 9418 char c = zIn[i]; |
| 9419 if( c>='A' && c<='Z' ){ |
| 9420 zOut[i] = c - 'A' + 'a'; |
| 9421 }else{ |
| 9422 if( c>='0' && c<='9' ) hasDigit = 1; |
| 9423 zOut[i] = c; |
| 9424 } |
| 9425 } |
| 9426 mx = hasDigit ? 3 : 10; |
| 9427 if( nIn>mx*2 ){ |
| 9428 for(j=mx, i=nIn-mx; i<nIn; i++, j++){ |
| 9429 zOut[j] = zOut[i]; |
| 9430 } |
| 9431 i = j; |
| 9432 } |
| 9433 zOut[i] = 0; |
| 9434 *pnOut = i; |
| 9435 } |
| 9436 |
| 9437 |
| 9438 /* |
| 9439 ** Stem the input word zIn[0..nIn-1]. Store the output in zOut. |
| 9440 ** zOut is at least big enough to hold nIn bytes. Write the actual |
| 9441 ** size of the output word (exclusive of the '\0' terminator) into *pnOut. |
| 9442 ** |
| 9443 ** Any upper-case characters in the US-ASCII character set ([A-Z]) |
| 9444 ** are converted to lower case. Upper-case UTF characters are |
| 9445 ** unchanged. |
| 9446 ** |
| 9447 ** Words that are longer than about 20 bytes are stemmed by retaining |
| 9448 ** a few bytes from the beginning and the end of the word. If the |
| 9449 ** word contains digits, 3 bytes are taken from the beginning and |
| 9450 ** 3 bytes from the end. For long words without digits, 10 bytes |
| 9451 ** are taken from each end. US-ASCII case folding still applies. |
| 9452 ** |
| 9453 ** If the input word contains not digits but does characters not |
| 9454 ** in [a-zA-Z] then no stemming is attempted and this routine just |
| 9455 ** copies the input into the input into the output with US-ASCII |
| 9456 ** case folding. |
| 9457 ** |
| 9458 ** Stemming never increases the length of the word. So there is |
| 9459 ** no chance of overflowing the zOut buffer. |
| 9460 */ |
| 9461 static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ |
| 9462 int i, j; |
| 9463 char zReverse[28]; |
| 9464 char *z, *z2; |
| 9465 if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ |
| 9466 /* The word is too big or too small for the porter stemmer. |
| 9467 ** Fallback to the copy stemmer */ |
| 9468 copy_stemmer(zIn, nIn, zOut, pnOut); |
| 9469 return; |
| 9470 } |
| 9471 for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){ |
| 9472 char c = zIn[i]; |
| 9473 if( c>='A' && c<='Z' ){ |
| 9474 zReverse[j] = c + 'a' - 'A'; |
| 9475 }else if( c>='a' && c<='z' ){ |
| 9476 zReverse[j] = c; |
| 9477 }else{ |
| 9478 /* The use of a character not in [a-zA-Z] means that we fallback |
| 9479 ** to the copy stemmer */ |
| 9480 copy_stemmer(zIn, nIn, zOut, pnOut); |
| 9481 return; |
| 9482 } |
| 9483 } |
| 9484 memset(&zReverse[sizeof(zReverse)-5], 0, 5); |
| 9485 z = &zReverse[j+1]; |
| 9486 |
| 9487 |
| 9488 /* Step 1a */ |
| 9489 if( z[0]=='s' ){ |
| 9490 if( |
| 9491 !stem(&z, "sess", "ss", 0) && |
| 9492 !stem(&z, "sei", "i", 0) && |
| 9493 !stem(&z, "ss", "ss", 0) |
| 9494 ){ |
| 9495 z++; |
| 9496 } |
| 9497 } |
| 9498 |
| 9499 /* Step 1b */ |
| 9500 z2 = z; |
| 9501 if( stem(&z, "dee", "ee", m_gt_0) ){ |
| 9502 /* Do nothing. The work was all in the test */ |
| 9503 }else if( |
| 9504 (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) |
| 9505 && z!=z2 |
| 9506 ){ |
| 9507 if( stem(&z, "ta", "ate", 0) || |
| 9508 stem(&z, "lb", "ble", 0) || |
| 9509 stem(&z, "zi", "ize", 0) ){ |
| 9510 /* Do nothing. The work was all in the test */ |
| 9511 }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ |
| 9512 z++; |
| 9513 }else if( m_eq_1(z) && star_oh(z) ){ |
| 9514 *(--z) = 'e'; |
| 9515 } |
| 9516 } |
| 9517 |
| 9518 /* Step 1c */ |
| 9519 if( z[0]=='y' && hasVowel(z+1) ){ |
| 9520 z[0] = 'i'; |
| 9521 } |
| 9522 |
| 9523 /* Step 2 */ |
| 9524 switch( z[1] ){ |
| 9525 case 'a': |
| 9526 if( !stem(&z, "lanoita", "ate", m_gt_0) ){ |
| 9527 stem(&z, "lanoit", "tion", m_gt_0); |
| 9528 } |
| 9529 break; |
| 9530 case 'c': |
| 9531 if( !stem(&z, "icne", "ence", m_gt_0) ){ |
| 9532 stem(&z, "icna", "ance", m_gt_0); |
| 9533 } |
| 9534 break; |
| 9535 case 'e': |
| 9536 stem(&z, "rezi", "ize", m_gt_0); |
| 9537 break; |
| 9538 case 'g': |
| 9539 stem(&z, "igol", "log", m_gt_0); |
| 9540 break; |
| 9541 case 'l': |
| 9542 if( !stem(&z, "ilb", "ble", m_gt_0) |
| 9543 && !stem(&z, "illa", "al", m_gt_0) |
| 9544 && !stem(&z, "iltne", "ent", m_gt_0) |
| 9545 && !stem(&z, "ile", "e", m_gt_0) |
| 9546 ){ |
| 9547 stem(&z, "ilsuo", "ous", m_gt_0); |
| 9548 } |
| 9549 break; |
| 9550 case 'o': |
| 9551 if( !stem(&z, "noitazi", "ize", m_gt_0) |
| 9552 && !stem(&z, "noita", "ate", m_gt_0) |
| 9553 ){ |
| 9554 stem(&z, "rota", "ate", m_gt_0); |
| 9555 } |
| 9556 break; |
| 9557 case 's': |
| 9558 if( !stem(&z, "msila", "al", m_gt_0) |
| 9559 && !stem(&z, "ssenevi", "ive", m_gt_0) |
| 9560 && !stem(&z, "ssenluf", "ful", m_gt_0) |
| 9561 ){ |
| 9562 stem(&z, "ssensuo", "ous", m_gt_0); |
| 9563 } |
| 9564 break; |
| 9565 case 't': |
| 9566 if( !stem(&z, "itila", "al", m_gt_0) |
| 9567 && !stem(&z, "itivi", "ive", m_gt_0) |
| 9568 ){ |
| 9569 stem(&z, "itilib", "ble", m_gt_0); |
| 9570 } |
| 9571 break; |
| 9572 } |
| 9573 |
| 9574 /* Step 3 */ |
| 9575 switch( z[0] ){ |
| 9576 case 'e': |
| 9577 if( !stem(&z, "etaci", "ic", m_gt_0) |
| 9578 && !stem(&z, "evita", "", m_gt_0) |
| 9579 ){ |
| 9580 stem(&z, "ezila", "al", m_gt_0); |
| 9581 } |
| 9582 break; |
| 9583 case 'i': |
| 9584 stem(&z, "itici", "ic", m_gt_0); |
| 9585 break; |
| 9586 case 'l': |
| 9587 if( !stem(&z, "laci", "ic", m_gt_0) ){ |
| 9588 stem(&z, "luf", "", m_gt_0); |
| 9589 } |
| 9590 break; |
| 9591 case 's': |
| 9592 stem(&z, "ssen", "", m_gt_0); |
| 9593 break; |
| 9594 } |
| 9595 |
| 9596 /* Step 4 */ |
| 9597 switch( z[1] ){ |
| 9598 case 'a': |
| 9599 if( z[0]=='l' && m_gt_1(z+2) ){ |
| 9600 z += 2; |
| 9601 } |
| 9602 break; |
| 9603 case 'c': |
| 9604 if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ |
| 9605 z += 4; |
| 9606 } |
| 9607 break; |
| 9608 case 'e': |
| 9609 if( z[0]=='r' && m_gt_1(z+2) ){ |
| 9610 z += 2; |
| 9611 } |
| 9612 break; |
| 9613 case 'i': |
| 9614 if( z[0]=='c' && m_gt_1(z+2) ){ |
| 9615 z += 2; |
| 9616 } |
| 9617 break; |
| 9618 case 'l': |
| 9619 if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ |
| 9620 z += 4; |
| 9621 } |
| 9622 break; |
| 9623 case 'n': |
| 9624 if( z[0]=='t' ){ |
| 9625 if( z[2]=='a' ){ |
| 9626 if( m_gt_1(z+3) ){ |
| 9627 z += 3; |
| 9628 } |
| 9629 }else if( z[2]=='e' ){ |
| 9630 if( !stem(&z, "tneme", "", m_gt_1) |
| 9631 && !stem(&z, "tnem", "", m_gt_1) |
| 9632 ){ |
| 9633 stem(&z, "tne", "", m_gt_1); |
| 9634 } |
| 9635 } |
| 9636 } |
| 9637 break; |
| 9638 case 'o': |
| 9639 if( z[0]=='u' ){ |
| 9640 if( m_gt_1(z+2) ){ |
| 9641 z += 2; |
| 9642 } |
| 9643 }else if( z[3]=='s' || z[3]=='t' ){ |
| 9644 stem(&z, "noi", "", m_gt_1); |
| 9645 } |
| 9646 break; |
| 9647 case 's': |
| 9648 if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ |
| 9649 z += 3; |
| 9650 } |
| 9651 break; |
| 9652 case 't': |
| 9653 if( !stem(&z, "eta", "", m_gt_1) ){ |
| 9654 stem(&z, "iti", "", m_gt_1); |
| 9655 } |
| 9656 break; |
| 9657 case 'u': |
| 9658 if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ |
| 9659 z += 3; |
| 9660 } |
| 9661 break; |
| 9662 case 'v': |
| 9663 case 'z': |
| 9664 if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ |
| 9665 z += 3; |
| 9666 } |
| 9667 break; |
| 9668 } |
| 9669 |
| 9670 /* Step 5a */ |
| 9671 if( z[0]=='e' ){ |
| 9672 if( m_gt_1(z+1) ){ |
| 9673 z++; |
| 9674 }else if( m_eq_1(z+1) && !star_oh(z+1) ){ |
| 9675 z++; |
| 9676 } |
| 9677 } |
| 9678 |
| 9679 /* Step 5b */ |
| 9680 if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ |
| 9681 z++; |
| 9682 } |
| 9683 |
| 9684 /* z[] is now the stemmed word in reverse order. Flip it back |
| 9685 ** around into forward order and return. |
| 9686 */ |
| 9687 *pnOut = i = (int)strlen(z); |
| 9688 zOut[i] = 0; |
| 9689 while( *z ){ |
| 9690 zOut[--i] = *(z++); |
| 9691 } |
| 9692 } |
| 9693 |
| 9694 /* |
| 9695 ** Characters that can be part of a token. We assume any character |
| 9696 ** whose value is greater than 0x80 (any UTF character) can be |
| 9697 ** part of a token. In other words, delimiters all must have |
| 9698 ** values of 0x7f or lower. |
| 9699 */ |
| 9700 static const char porterIdChar[] = { |
| 9701 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 9702 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 9703 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 9704 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 9705 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 9706 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 9707 }; |
| 9708 #define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) |
| 9709 |
| 9710 /* |
| 9711 ** Extract the next token from a tokenization cursor. The cursor must |
| 9712 ** have been opened by a prior call to porterOpen(). |
| 9713 */ |
| 9714 static int porterNext( |
| 9715 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ |
| 9716 const char **pzToken, /* OUT: *pzToken is the token text */ |
| 9717 int *pnBytes, /* OUT: Number of bytes in token */ |
| 9718 int *piStartOffset, /* OUT: Starting offset of token */ |
| 9719 int *piEndOffset, /* OUT: Ending offset of token */ |
| 9720 int *piPosition /* OUT: Position integer of token */ |
| 9721 ){ |
| 9722 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
| 9723 const char *z = c->zInput; |
| 9724 |
| 9725 while( c->iOffset<c->nInput ){ |
| 9726 int iStartOffset, ch; |
| 9727 |
| 9728 /* Scan past delimiter characters */ |
| 9729 while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){ |
| 9730 c->iOffset++; |
| 9731 } |
| 9732 |
| 9733 /* Count non-delimiter characters. */ |
| 9734 iStartOffset = c->iOffset; |
| 9735 while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){ |
| 9736 c->iOffset++; |
| 9737 } |
| 9738 |
| 9739 if( c->iOffset>iStartOffset ){ |
| 9740 int n = c->iOffset-iStartOffset; |
| 9741 if( n>c->nAllocated ){ |
| 9742 char *pNew; |
| 9743 c->nAllocated = n+20; |
| 9744 pNew = sqlite3_realloc(c->zToken, c->nAllocated); |
| 9745 if( !pNew ) return SQLITE_NOMEM; |
| 9746 c->zToken = pNew; |
| 9747 } |
| 9748 porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); |
| 9749 *pzToken = c->zToken; |
| 9750 *piStartOffset = iStartOffset; |
| 9751 *piEndOffset = c->iOffset; |
| 9752 *piPosition = c->iToken++; |
| 9753 return SQLITE_OK; |
| 9754 } |
| 9755 } |
| 9756 return SQLITE_DONE; |
| 9757 } |
| 9758 |
| 9759 /* |
| 9760 ** The set of routines that implement the porter-stemmer tokenizer |
| 9761 */ |
| 9762 static const sqlite3_tokenizer_module porterTokenizerModule = { |
| 9763 0, |
| 9764 porterCreate, |
| 9765 porterDestroy, |
| 9766 porterOpen, |
| 9767 porterClose, |
| 9768 porterNext, |
| 9769 0 |
| 9770 }; |
| 9771 |
| 9772 /* |
| 9773 ** Allocate a new porter tokenizer. Return a pointer to the new |
| 9774 ** tokenizer in *ppModule |
| 9775 */ |
| 9776 SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( |
| 9777 sqlite3_tokenizer_module const**ppModule |
| 9778 ){ |
| 9779 *ppModule = &porterTokenizerModule; |
| 9780 } |
| 9781 |
| 9782 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 9783 |
| 9784 /************** End of fts3_porter.c *****************************************/ |
| 9785 /************** Begin file fts3_tokenizer.c **********************************/ |
| 9786 /* |
| 9787 ** 2007 June 22 |
| 9788 ** |
| 9789 ** The author disclaims copyright to this source code. In place of |
| 9790 ** a legal notice, here is a blessing: |
| 9791 ** |
| 9792 ** May you do good and not evil. |
| 9793 ** May you find forgiveness for yourself and forgive others. |
| 9794 ** May you share freely, never taking more than you give. |
| 9795 ** |
| 9796 ****************************************************************************** |
| 9797 ** |
| 9798 ** This is part of an SQLite module implementing full-text search. |
| 9799 ** This particular file implements the generic tokenizer interface. |
| 9800 */ |
| 9801 |
| 9802 /* |
| 9803 ** The code in this file is only compiled if: |
| 9804 ** |
| 9805 ** * The FTS3 module is being built as an extension |
| 9806 ** (in which case SQLITE_CORE is not defined), or |
| 9807 ** |
| 9808 ** * The FTS3 module is being built into the core of |
| 9809 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 9810 */ |
| 9811 /* #include "fts3Int.h" */ |
| 9812 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 9813 |
| 9814 /* #include <assert.h> */ |
| 9815 /* #include <string.h> */ |
| 9816 |
| 9817 /* |
| 9818 ** Return true if the two-argument version of fts3_tokenizer() |
| 9819 ** has been activated via a prior call to sqlite3_db_config(db, |
| 9820 ** SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); |
| 9821 */ |
| 9822 static int fts3TokenizerEnabled(sqlite3_context *context){ |
| 9823 sqlite3 *db = sqlite3_context_db_handle(context); |
| 9824 int isEnabled = 0; |
| 9825 sqlite3_db_config(db,SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER,-1,&isEnabled); |
| 9826 return isEnabled; |
| 9827 } |
| 9828 |
| 9829 /* |
| 9830 ** Implementation of the SQL scalar function for accessing the underlying |
| 9831 ** hash table. This function may be called as follows: |
| 9832 ** |
| 9833 ** SELECT <function-name>(<key-name>); |
| 9834 ** SELECT <function-name>(<key-name>, <pointer>); |
| 9835 ** |
| 9836 ** where <function-name> is the name passed as the second argument |
| 9837 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). |
| 9838 ** |
| 9839 ** If the <pointer> argument is specified, it must be a blob value |
| 9840 ** containing a pointer to be stored as the hash data corresponding |
| 9841 ** to the string <key-name>. If <pointer> is not specified, then |
| 9842 ** the string <key-name> must already exist in the has table. Otherwise, |
| 9843 ** an error is returned. |
| 9844 ** |
| 9845 ** Whether or not the <pointer> argument is specified, the value returned |
| 9846 ** is a blob containing the pointer stored as the hash data corresponding |
| 9847 ** to string <key-name> (after the hash-table is updated, if applicable). |
| 9848 */ |
| 9849 static void fts3TokenizerFunc( |
| 9850 sqlite3_context *context, |
| 9851 int argc, |
| 9852 sqlite3_value **argv |
| 9853 ){ |
| 9854 Fts3Hash *pHash; |
| 9855 void *pPtr = 0; |
| 9856 const unsigned char *zName; |
| 9857 int nName; |
| 9858 |
| 9859 assert( argc==1 || argc==2 ); |
| 9860 |
| 9861 pHash = (Fts3Hash *)sqlite3_user_data(context); |
| 9862 |
| 9863 zName = sqlite3_value_text(argv[0]); |
| 9864 nName = sqlite3_value_bytes(argv[0])+1; |
| 9865 |
| 9866 if( argc==2 ){ |
| 9867 if( fts3TokenizerEnabled(context) ){ |
| 9868 void *pOld; |
| 9869 int n = sqlite3_value_bytes(argv[1]); |
| 9870 if( zName==0 || n!=sizeof(pPtr) ){ |
| 9871 sqlite3_result_error(context, "argument type mismatch", -1); |
| 9872 return; |
| 9873 } |
| 9874 pPtr = *(void **)sqlite3_value_blob(argv[1]); |
| 9875 pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); |
| 9876 if( pOld==pPtr ){ |
| 9877 sqlite3_result_error(context, "out of memory", -1); |
| 9878 } |
| 9879 }else{ |
| 9880 sqlite3_result_error(context, "fts3tokenize disabled", -1); |
| 9881 return; |
| 9882 } |
| 9883 }else{ |
| 9884 if( zName ){ |
| 9885 pPtr = sqlite3Fts3HashFind(pHash, zName, nName); |
| 9886 } |
| 9887 if( !pPtr ){ |
| 9888 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); |
| 9889 sqlite3_result_error(context, zErr, -1); |
| 9890 sqlite3_free(zErr); |
| 9891 return; |
| 9892 } |
| 9893 } |
| 9894 sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); |
| 9895 } |
| 9896 |
| 9897 SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ |
| 9898 static const char isFtsIdChar[] = { |
| 9899 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 9900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 9901 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 9902 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 9903 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 9904 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 9905 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 9906 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 9907 }; |
| 9908 return (c&0x80 || isFtsIdChar[(int)(c)]); |
| 9909 } |
| 9910 |
| 9911 SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ |
| 9912 const char *z1; |
| 9913 const char *z2 = 0; |
| 9914 |
| 9915 /* Find the start of the next token. */ |
| 9916 z1 = zStr; |
| 9917 while( z2==0 ){ |
| 9918 char c = *z1; |
| 9919 switch( c ){ |
| 9920 case '\0': return 0; /* No more tokens here */ |
| 9921 case '\'': |
| 9922 case '"': |
| 9923 case '`': { |
| 9924 z2 = z1; |
| 9925 while( *++z2 && (*z2!=c || *++z2==c) ); |
| 9926 break; |
| 9927 } |
| 9928 case '[': |
| 9929 z2 = &z1[1]; |
| 9930 while( *z2 && z2[0]!=']' ) z2++; |
| 9931 if( *z2 ) z2++; |
| 9932 break; |
| 9933 |
| 9934 default: |
| 9935 if( sqlite3Fts3IsIdChar(*z1) ){ |
| 9936 z2 = &z1[1]; |
| 9937 while( sqlite3Fts3IsIdChar(*z2) ) z2++; |
| 9938 }else{ |
| 9939 z1++; |
| 9940 } |
| 9941 } |
| 9942 } |
| 9943 |
| 9944 *pn = (int)(z2-z1); |
| 9945 return z1; |
| 9946 } |
| 9947 |
| 9948 SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( |
| 9949 Fts3Hash *pHash, /* Tokenizer hash table */ |
| 9950 const char *zArg, /* Tokenizer name */ |
| 9951 sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ |
| 9952 char **pzErr /* OUT: Set to malloced error message */ |
| 9953 ){ |
| 9954 int rc; |
| 9955 char *z = (char *)zArg; |
| 9956 int n = 0; |
| 9957 char *zCopy; |
| 9958 char *zEnd; /* Pointer to nul-term of zCopy */ |
| 9959 sqlite3_tokenizer_module *m; |
| 9960 |
| 9961 zCopy = sqlite3_mprintf("%s", zArg); |
| 9962 if( !zCopy ) return SQLITE_NOMEM; |
| 9963 zEnd = &zCopy[strlen(zCopy)]; |
| 9964 |
| 9965 z = (char *)sqlite3Fts3NextToken(zCopy, &n); |
| 9966 if( z==0 ){ |
| 9967 assert( n==0 ); |
| 9968 z = zCopy; |
| 9969 } |
| 9970 z[n] = '\0'; |
| 9971 sqlite3Fts3Dequote(z); |
| 9972 |
| 9973 m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); |
| 9974 if( !m ){ |
| 9975 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); |
| 9976 rc = SQLITE_ERROR; |
| 9977 }else{ |
| 9978 char const **aArg = 0; |
| 9979 int iArg = 0; |
| 9980 z = &z[n+1]; |
| 9981 while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ |
| 9982 int nNew = sizeof(char *)*(iArg+1); |
| 9983 char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); |
| 9984 if( !aNew ){ |
| 9985 sqlite3_free(zCopy); |
| 9986 sqlite3_free((void *)aArg); |
| 9987 return SQLITE_NOMEM; |
| 9988 } |
| 9989 aArg = aNew; |
| 9990 aArg[iArg++] = z; |
| 9991 z[n] = '\0'; |
| 9992 sqlite3Fts3Dequote(z); |
| 9993 z = &z[n+1]; |
| 9994 } |
| 9995 rc = m->xCreate(iArg, aArg, ppTok); |
| 9996 assert( rc!=SQLITE_OK || *ppTok ); |
| 9997 if( rc!=SQLITE_OK ){ |
| 9998 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); |
| 9999 }else{ |
| 10000 (*ppTok)->pModule = m; |
| 10001 } |
| 10002 sqlite3_free((void *)aArg); |
| 10003 } |
| 10004 |
| 10005 sqlite3_free(zCopy); |
| 10006 return rc; |
| 10007 } |
| 10008 |
| 10009 |
| 10010 #ifdef SQLITE_TEST |
| 10011 |
| 10012 #if defined(INCLUDE_SQLITE_TCL_H) |
| 10013 # include "sqlite_tcl.h" |
| 10014 #else |
| 10015 # include "tcl.h" |
| 10016 #endif |
| 10017 /* #include <string.h> */ |
| 10018 |
| 10019 /* |
| 10020 ** Implementation of a special SQL scalar function for testing tokenizers |
| 10021 ** designed to be used in concert with the Tcl testing framework. This |
| 10022 ** function must be called with two or more arguments: |
| 10023 ** |
| 10024 ** SELECT <function-name>(<key-name>, ..., <input-string>); |
| 10025 ** |
| 10026 ** where <function-name> is the name passed as the second argument |
| 10027 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') |
| 10028 ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). |
| 10029 ** |
| 10030 ** The return value is a string that may be interpreted as a Tcl |
| 10031 ** list. For each token in the <input-string>, three elements are |
| 10032 ** added to the returned list. The first is the token position, the |
| 10033 ** second is the token text (folded, stemmed, etc.) and the third is the |
| 10034 ** substring of <input-string> associated with the token. For example, |
| 10035 ** using the built-in "simple" tokenizer: |
| 10036 ** |
| 10037 ** SELECT fts_tokenizer_test('simple', 'I don't see how'); |
| 10038 ** |
| 10039 ** will return the string: |
| 10040 ** |
| 10041 ** "{0 i I 1 dont don't 2 see see 3 how how}" |
| 10042 ** |
| 10043 */ |
| 10044 static void testFunc( |
| 10045 sqlite3_context *context, |
| 10046 int argc, |
| 10047 sqlite3_value **argv |
| 10048 ){ |
| 10049 Fts3Hash *pHash; |
| 10050 sqlite3_tokenizer_module *p; |
| 10051 sqlite3_tokenizer *pTokenizer = 0; |
| 10052 sqlite3_tokenizer_cursor *pCsr = 0; |
| 10053 |
| 10054 const char *zErr = 0; |
| 10055 |
| 10056 const char *zName; |
| 10057 int nName; |
| 10058 const char *zInput; |
| 10059 int nInput; |
| 10060 |
| 10061 const char *azArg[64]; |
| 10062 |
| 10063 const char *zToken; |
| 10064 int nToken = 0; |
| 10065 int iStart = 0; |
| 10066 int iEnd = 0; |
| 10067 int iPos = 0; |
| 10068 int i; |
| 10069 |
| 10070 Tcl_Obj *pRet; |
| 10071 |
| 10072 if( argc<2 ){ |
| 10073 sqlite3_result_error(context, "insufficient arguments", -1); |
| 10074 return; |
| 10075 } |
| 10076 |
| 10077 nName = sqlite3_value_bytes(argv[0]); |
| 10078 zName = (const char *)sqlite3_value_text(argv[0]); |
| 10079 nInput = sqlite3_value_bytes(argv[argc-1]); |
| 10080 zInput = (const char *)sqlite3_value_text(argv[argc-1]); |
| 10081 |
| 10082 pHash = (Fts3Hash *)sqlite3_user_data(context); |
| 10083 p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); |
| 10084 |
| 10085 if( !p ){ |
| 10086 char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); |
| 10087 sqlite3_result_error(context, zErr2, -1); |
| 10088 sqlite3_free(zErr2); |
| 10089 return; |
| 10090 } |
| 10091 |
| 10092 pRet = Tcl_NewObj(); |
| 10093 Tcl_IncrRefCount(pRet); |
| 10094 |
| 10095 for(i=1; i<argc-1; i++){ |
| 10096 azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); |
| 10097 } |
| 10098 |
| 10099 if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ |
| 10100 zErr = "error in xCreate()"; |
| 10101 goto finish; |
| 10102 } |
| 10103 pTokenizer->pModule = p; |
| 10104 if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ |
| 10105 zErr = "error in xOpen()"; |
| 10106 goto finish; |
| 10107 } |
| 10108 |
| 10109 while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ |
| 10110 Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); |
| 10111 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |
| 10112 zToken = &zInput[iStart]; |
| 10113 nToken = iEnd-iStart; |
| 10114 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |
| 10115 } |
| 10116 |
| 10117 if( SQLITE_OK!=p->xClose(pCsr) ){ |
| 10118 zErr = "error in xClose()"; |
| 10119 goto finish; |
| 10120 } |
| 10121 if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ |
| 10122 zErr = "error in xDestroy()"; |
| 10123 goto finish; |
| 10124 } |
| 10125 |
| 10126 finish: |
| 10127 if( zErr ){ |
| 10128 sqlite3_result_error(context, zErr, -1); |
| 10129 }else{ |
| 10130 sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); |
| 10131 } |
| 10132 Tcl_DecrRefCount(pRet); |
| 10133 } |
| 10134 |
| 10135 static |
| 10136 int registerTokenizer( |
| 10137 sqlite3 *db, |
| 10138 char *zName, |
| 10139 const sqlite3_tokenizer_module *p |
| 10140 ){ |
| 10141 int rc; |
| 10142 sqlite3_stmt *pStmt; |
| 10143 const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; |
| 10144 |
| 10145 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 10146 if( rc!=SQLITE_OK ){ |
| 10147 return rc; |
| 10148 } |
| 10149 |
| 10150 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 10151 sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); |
| 10152 sqlite3_step(pStmt); |
| 10153 |
| 10154 return sqlite3_finalize(pStmt); |
| 10155 } |
| 10156 |
| 10157 |
| 10158 static |
| 10159 int queryTokenizer( |
| 10160 sqlite3 *db, |
| 10161 char *zName, |
| 10162 const sqlite3_tokenizer_module **pp |
| 10163 ){ |
| 10164 int rc; |
| 10165 sqlite3_stmt *pStmt; |
| 10166 const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| 10167 |
| 10168 *pp = 0; |
| 10169 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 10170 if( rc!=SQLITE_OK ){ |
| 10171 return rc; |
| 10172 } |
| 10173 |
| 10174 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 10175 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 10176 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| 10177 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| 10178 } |
| 10179 } |
| 10180 |
| 10181 return sqlite3_finalize(pStmt); |
| 10182 } |
| 10183 |
| 10184 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module co
nst**ppModule); |
| 10185 |
| 10186 /* |
| 10187 ** Implementation of the scalar function fts3_tokenizer_internal_test(). |
| 10188 ** This function is used for testing only, it is not included in the |
| 10189 ** build unless SQLITE_TEST is defined. |
| 10190 ** |
| 10191 ** The purpose of this is to test that the fts3_tokenizer() function |
| 10192 ** can be used as designed by the C-code in the queryTokenizer and |
| 10193 ** registerTokenizer() functions above. These two functions are repeated |
| 10194 ** in the README.tokenizer file as an example, so it is important to |
| 10195 ** test them. |
| 10196 ** |
| 10197 ** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar |
| 10198 ** function with no arguments. An assert() will fail if a problem is |
| 10199 ** detected. i.e.: |
| 10200 ** |
| 10201 ** SELECT fts3_tokenizer_internal_test(); |
| 10202 ** |
| 10203 */ |
| 10204 static void intTestFunc( |
| 10205 sqlite3_context *context, |
| 10206 int argc, |
| 10207 sqlite3_value **argv |
| 10208 ){ |
| 10209 int rc; |
| 10210 const sqlite3_tokenizer_module *p1; |
| 10211 const sqlite3_tokenizer_module *p2; |
| 10212 sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); |
| 10213 |
| 10214 UNUSED_PARAMETER(argc); |
| 10215 UNUSED_PARAMETER(argv); |
| 10216 |
| 10217 /* Test the query function */ |
| 10218 sqlite3Fts3SimpleTokenizerModule(&p1); |
| 10219 rc = queryTokenizer(db, "simple", &p2); |
| 10220 assert( rc==SQLITE_OK ); |
| 10221 assert( p1==p2 ); |
| 10222 rc = queryTokenizer(db, "nosuchtokenizer", &p2); |
| 10223 assert( rc==SQLITE_ERROR ); |
| 10224 assert( p2==0 ); |
| 10225 assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); |
| 10226 |
| 10227 /* Test the storage function */ |
| 10228 if( fts3TokenizerEnabled(context) ){ |
| 10229 rc = registerTokenizer(db, "nosuchtokenizer", p1); |
| 10230 assert( rc==SQLITE_OK ); |
| 10231 rc = queryTokenizer(db, "nosuchtokenizer", &p2); |
| 10232 assert( rc==SQLITE_OK ); |
| 10233 assert( p2==p1 ); |
| 10234 } |
| 10235 |
| 10236 sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); |
| 10237 } |
| 10238 |
| 10239 #endif |
| 10240 |
| 10241 /* |
| 10242 ** Set up SQL objects in database db used to access the contents of |
| 10243 ** the hash table pointed to by argument pHash. The hash table must |
| 10244 ** been initialized to use string keys, and to take a private copy |
| 10245 ** of the key when a value is inserted. i.e. by a call similar to: |
| 10246 ** |
| 10247 ** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |
| 10248 ** |
| 10249 ** This function adds a scalar function (see header comment above |
| 10250 ** fts3TokenizerFunc() in this file for details) and, if ENABLE_TABLE is |
| 10251 ** defined at compilation time, a temporary virtual table (see header |
| 10252 ** comment above struct HashTableVtab) to the database schema. Both |
| 10253 ** provide read/write access to the contents of *pHash. |
| 10254 ** |
| 10255 ** The third argument to this function, zName, is used as the name |
| 10256 ** of both the scalar and, if created, the virtual table. |
| 10257 */ |
| 10258 SQLITE_PRIVATE int sqlite3Fts3InitHashTable( |
| 10259 sqlite3 *db, |
| 10260 Fts3Hash *pHash, |
| 10261 const char *zName |
| 10262 ){ |
| 10263 int rc = SQLITE_OK; |
| 10264 void *p = (void *)pHash; |
| 10265 const int any = SQLITE_ANY; |
| 10266 |
| 10267 #ifdef SQLITE_TEST |
| 10268 char *zTest = 0; |
| 10269 char *zTest2 = 0; |
| 10270 void *pdb = (void *)db; |
| 10271 zTest = sqlite3_mprintf("%s_test", zName); |
| 10272 zTest2 = sqlite3_mprintf("%s_internal_test", zName); |
| 10273 if( !zTest || !zTest2 ){ |
| 10274 rc = SQLITE_NOMEM; |
| 10275 } |
| 10276 #endif |
| 10277 |
| 10278 if( SQLITE_OK==rc ){ |
| 10279 rc = sqlite3_create_function(db, zName, 1, any, p, fts3TokenizerFunc, 0, 0); |
| 10280 } |
| 10281 if( SQLITE_OK==rc ){ |
| 10282 rc = sqlite3_create_function(db, zName, 2, any, p, fts3TokenizerFunc, 0, 0); |
| 10283 } |
| 10284 #ifdef SQLITE_TEST |
| 10285 if( SQLITE_OK==rc ){ |
| 10286 rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); |
| 10287 } |
| 10288 if( SQLITE_OK==rc ){ |
| 10289 rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); |
| 10290 } |
| 10291 #endif |
| 10292 |
| 10293 #ifdef SQLITE_TEST |
| 10294 sqlite3_free(zTest); |
| 10295 sqlite3_free(zTest2); |
| 10296 #endif |
| 10297 |
| 10298 return rc; |
| 10299 } |
| 10300 |
| 10301 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10302 |
| 10303 /************** End of fts3_tokenizer.c **************************************/ |
| 10304 /************** Begin file fts3_tokenizer1.c *********************************/ |
| 10305 /* |
| 10306 ** 2006 Oct 10 |
| 10307 ** |
| 10308 ** The author disclaims copyright to this source code. In place of |
| 10309 ** a legal notice, here is a blessing: |
| 10310 ** |
| 10311 ** May you do good and not evil. |
| 10312 ** May you find forgiveness for yourself and forgive others. |
| 10313 ** May you share freely, never taking more than you give. |
| 10314 ** |
| 10315 ****************************************************************************** |
| 10316 ** |
| 10317 ** Implementation of the "simple" full-text-search tokenizer. |
| 10318 */ |
| 10319 |
| 10320 /* |
| 10321 ** The code in this file is only compiled if: |
| 10322 ** |
| 10323 ** * The FTS3 module is being built as an extension |
| 10324 ** (in which case SQLITE_CORE is not defined), or |
| 10325 ** |
| 10326 ** * The FTS3 module is being built into the core of |
| 10327 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |
| 10328 */ |
| 10329 /* #include "fts3Int.h" */ |
| 10330 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 10331 |
| 10332 /* #include <assert.h> */ |
| 10333 /* #include <stdlib.h> */ |
| 10334 /* #include <stdio.h> */ |
| 10335 /* #include <string.h> */ |
| 10336 |
| 10337 /* #include "fts3_tokenizer.h" */ |
| 10338 |
| 10339 typedef struct simple_tokenizer { |
| 10340 sqlite3_tokenizer base; |
| 10341 char delim[128]; /* flag ASCII delimiters */ |
| 10342 } simple_tokenizer; |
| 10343 |
| 10344 typedef struct simple_tokenizer_cursor { |
| 10345 sqlite3_tokenizer_cursor base; |
| 10346 const char *pInput; /* input we are tokenizing */ |
| 10347 int nBytes; /* size of the input */ |
| 10348 int iOffset; /* current position in pInput */ |
| 10349 int iToken; /* index of next token to be returned */ |
| 10350 char *pToken; /* storage for current token */ |
| 10351 int nTokenAllocated; /* space allocated to zToken buffer */ |
| 10352 } simple_tokenizer_cursor; |
| 10353 |
| 10354 |
| 10355 static int simpleDelim(simple_tokenizer *t, unsigned char c){ |
| 10356 return c<0x80 && t->delim[c]; |
| 10357 } |
| 10358 static int fts3_isalnum(int x){ |
| 10359 return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); |
| 10360 } |
| 10361 |
| 10362 /* |
| 10363 ** Create a new tokenizer instance. |
| 10364 */ |
| 10365 static int simpleCreate( |
| 10366 int argc, const char * const *argv, |
| 10367 sqlite3_tokenizer **ppTokenizer |
| 10368 ){ |
| 10369 simple_tokenizer *t; |
| 10370 |
| 10371 t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); |
| 10372 if( t==NULL ) return SQLITE_NOMEM; |
| 10373 memset(t, 0, sizeof(*t)); |
| 10374 |
| 10375 /* TODO(shess) Delimiters need to remain the same from run to run, |
| 10376 ** else we need to reindex. One solution would be a meta-table to |
| 10377 ** track such information in the database, then we'd only want this |
| 10378 ** information on the initial create. |
| 10379 */ |
| 10380 if( argc>1 ){ |
| 10381 int i, n = (int)strlen(argv[1]); |
| 10382 for(i=0; i<n; i++){ |
| 10383 unsigned char ch = argv[1][i]; |
| 10384 /* We explicitly don't support UTF-8 delimiters for now. */ |
| 10385 if( ch>=0x80 ){ |
| 10386 sqlite3_free(t); |
| 10387 return SQLITE_ERROR; |
| 10388 } |
| 10389 t->delim[ch] = 1; |
| 10390 } |
| 10391 } else { |
| 10392 /* Mark non-alphanumeric ASCII characters as delimiters */ |
| 10393 int i; |
| 10394 for(i=1; i<0x80; i++){ |
| 10395 t->delim[i] = !fts3_isalnum(i) ? -1 : 0; |
| 10396 } |
| 10397 } |
| 10398 |
| 10399 *ppTokenizer = &t->base; |
| 10400 return SQLITE_OK; |
| 10401 } |
| 10402 |
| 10403 /* |
| 10404 ** Destroy a tokenizer |
| 10405 */ |
| 10406 static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ |
| 10407 sqlite3_free(pTokenizer); |
| 10408 return SQLITE_OK; |
| 10409 } |
| 10410 |
| 10411 /* |
| 10412 ** Prepare to begin tokenizing a particular string. The input |
| 10413 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 10414 ** used to incrementally tokenize this string is returned in |
| 10415 ** *ppCursor. |
| 10416 */ |
| 10417 static int simpleOpen( |
| 10418 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 10419 const char *pInput, int nBytes, /* String to be tokenized */ |
| 10420 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 10421 ){ |
| 10422 simple_tokenizer_cursor *c; |
| 10423 |
| 10424 UNUSED_PARAMETER(pTokenizer); |
| 10425 |
| 10426 c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); |
| 10427 if( c==NULL ) return SQLITE_NOMEM; |
| 10428 |
| 10429 c->pInput = pInput; |
| 10430 if( pInput==0 ){ |
| 10431 c->nBytes = 0; |
| 10432 }else if( nBytes<0 ){ |
| 10433 c->nBytes = (int)strlen(pInput); |
| 10434 }else{ |
| 10435 c->nBytes = nBytes; |
| 10436 } |
| 10437 c->iOffset = 0; /* start tokenizing at the beginning */ |
| 10438 c->iToken = 0; |
| 10439 c->pToken = NULL; /* no space allocated, yet. */ |
| 10440 c->nTokenAllocated = 0; |
| 10441 |
| 10442 *ppCursor = &c->base; |
| 10443 return SQLITE_OK; |
| 10444 } |
| 10445 |
| 10446 /* |
| 10447 ** Close a tokenization cursor previously opened by a call to |
| 10448 ** simpleOpen() above. |
| 10449 */ |
| 10450 static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ |
| 10451 simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
| 10452 sqlite3_free(c->pToken); |
| 10453 sqlite3_free(c); |
| 10454 return SQLITE_OK; |
| 10455 } |
| 10456 |
| 10457 /* |
| 10458 ** Extract the next token from a tokenization cursor. The cursor must |
| 10459 ** have been opened by a prior call to simpleOpen(). |
| 10460 */ |
| 10461 static int simpleNext( |
| 10462 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
| 10463 const char **ppToken, /* OUT: *ppToken is the token text */ |
| 10464 int *pnBytes, /* OUT: Number of bytes in token */ |
| 10465 int *piStartOffset, /* OUT: Starting offset of token */ |
| 10466 int *piEndOffset, /* OUT: Ending offset of token */ |
| 10467 int *piPosition /* OUT: Position integer of token */ |
| 10468 ){ |
| 10469 simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; |
| 10470 simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; |
| 10471 unsigned char *p = (unsigned char *)c->pInput; |
| 10472 |
| 10473 while( c->iOffset<c->nBytes ){ |
| 10474 int iStartOffset; |
| 10475 |
| 10476 /* Scan past delimiter characters */ |
| 10477 while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ |
| 10478 c->iOffset++; |
| 10479 } |
| 10480 |
| 10481 /* Count non-delimiter characters. */ |
| 10482 iStartOffset = c->iOffset; |
| 10483 while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ |
| 10484 c->iOffset++; |
| 10485 } |
| 10486 |
| 10487 if( c->iOffset>iStartOffset ){ |
| 10488 int i, n = c->iOffset-iStartOffset; |
| 10489 if( n>c->nTokenAllocated ){ |
| 10490 char *pNew; |
| 10491 c->nTokenAllocated = n+20; |
| 10492 pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); |
| 10493 if( !pNew ) return SQLITE_NOMEM; |
| 10494 c->pToken = pNew; |
| 10495 } |
| 10496 for(i=0; i<n; i++){ |
| 10497 /* TODO(shess) This needs expansion to handle UTF-8 |
| 10498 ** case-insensitivity. |
| 10499 */ |
| 10500 unsigned char ch = p[iStartOffset+i]; |
| 10501 c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); |
| 10502 } |
| 10503 *ppToken = c->pToken; |
| 10504 *pnBytes = n; |
| 10505 *piStartOffset = iStartOffset; |
| 10506 *piEndOffset = c->iOffset; |
| 10507 *piPosition = c->iToken++; |
| 10508 |
| 10509 return SQLITE_OK; |
| 10510 } |
| 10511 } |
| 10512 return SQLITE_DONE; |
| 10513 } |
| 10514 |
| 10515 /* |
| 10516 ** The set of routines that implement the simple tokenizer |
| 10517 */ |
| 10518 static const sqlite3_tokenizer_module simpleTokenizerModule = { |
| 10519 0, |
| 10520 simpleCreate, |
| 10521 simpleDestroy, |
| 10522 simpleOpen, |
| 10523 simpleClose, |
| 10524 simpleNext, |
| 10525 0, |
| 10526 }; |
| 10527 |
| 10528 /* |
| 10529 ** Allocate a new simple tokenizer. Return a pointer to the new |
| 10530 ** tokenizer in *ppModule |
| 10531 */ |
| 10532 SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( |
| 10533 sqlite3_tokenizer_module const**ppModule |
| 10534 ){ |
| 10535 *ppModule = &simpleTokenizerModule; |
| 10536 } |
| 10537 |
| 10538 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10539 |
| 10540 /************** End of fts3_tokenizer1.c *************************************/ |
| 10541 /************** Begin file fts3_tokenize_vtab.c ******************************/ |
| 10542 /* |
| 10543 ** 2013 Apr 22 |
| 10544 ** |
| 10545 ** The author disclaims copyright to this source code. In place of |
| 10546 ** a legal notice, here is a blessing: |
| 10547 ** |
| 10548 ** May you do good and not evil. |
| 10549 ** May you find forgiveness for yourself and forgive others. |
| 10550 ** May you share freely, never taking more than you give. |
| 10551 ** |
| 10552 ****************************************************************************** |
| 10553 ** |
| 10554 ** This file contains code for the "fts3tokenize" virtual table module. |
| 10555 ** An fts3tokenize virtual table is created as follows: |
| 10556 ** |
| 10557 ** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize( |
| 10558 ** <tokenizer-name>, <arg-1>, ... |
| 10559 ** ); |
| 10560 ** |
| 10561 ** The table created has the following schema: |
| 10562 ** |
| 10563 ** CREATE TABLE <tbl>(input, token, start, end, position) |
| 10564 ** |
| 10565 ** When queried, the query must include a WHERE clause of type: |
| 10566 ** |
| 10567 ** input = <string> |
| 10568 ** |
| 10569 ** The virtual table module tokenizes this <string>, using the FTS3 |
| 10570 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE |
| 10571 ** statement and returns one row for each token in the result. With |
| 10572 ** fields set as follows: |
| 10573 ** |
| 10574 ** input: Always set to a copy of <string> |
| 10575 ** token: A token from the input. |
| 10576 ** start: Byte offset of the token within the input <string>. |
| 10577 ** end: Byte offset of the byte immediately following the end of the |
| 10578 ** token within the input string. |
| 10579 ** pos: Token offset of token within input. |
| 10580 ** |
| 10581 */ |
| 10582 /* #include "fts3Int.h" */ |
| 10583 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 10584 |
| 10585 /* #include <string.h> */ |
| 10586 /* #include <assert.h> */ |
| 10587 |
| 10588 typedef struct Fts3tokTable Fts3tokTable; |
| 10589 typedef struct Fts3tokCursor Fts3tokCursor; |
| 10590 |
| 10591 /* |
| 10592 ** Virtual table structure. |
| 10593 */ |
| 10594 struct Fts3tokTable { |
| 10595 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 10596 const sqlite3_tokenizer_module *pMod; |
| 10597 sqlite3_tokenizer *pTok; |
| 10598 }; |
| 10599 |
| 10600 /* |
| 10601 ** Virtual table cursor structure. |
| 10602 */ |
| 10603 struct Fts3tokCursor { |
| 10604 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 10605 char *zInput; /* Input string */ |
| 10606 sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ |
| 10607 int iRowid; /* Current 'rowid' value */ |
| 10608 const char *zToken; /* Current 'token' value */ |
| 10609 int nToken; /* Size of zToken in bytes */ |
| 10610 int iStart; /* Current 'start' value */ |
| 10611 int iEnd; /* Current 'end' value */ |
| 10612 int iPos; /* Current 'pos' value */ |
| 10613 }; |
| 10614 |
| 10615 /* |
| 10616 ** Query FTS for the tokenizer implementation named zName. |
| 10617 */ |
| 10618 static int fts3tokQueryTokenizer( |
| 10619 Fts3Hash *pHash, |
| 10620 const char *zName, |
| 10621 const sqlite3_tokenizer_module **pp, |
| 10622 char **pzErr |
| 10623 ){ |
| 10624 sqlite3_tokenizer_module *p; |
| 10625 int nName = (int)strlen(zName); |
| 10626 |
| 10627 p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); |
| 10628 if( !p ){ |
| 10629 sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); |
| 10630 return SQLITE_ERROR; |
| 10631 } |
| 10632 |
| 10633 *pp = p; |
| 10634 return SQLITE_OK; |
| 10635 } |
| 10636 |
| 10637 /* |
| 10638 ** The second argument, argv[], is an array of pointers to nul-terminated |
| 10639 ** strings. This function makes a copy of the array and strings into a |
| 10640 ** single block of memory. It then dequotes any of the strings that appear |
| 10641 ** to be quoted. |
| 10642 ** |
| 10643 ** If successful, output parameter *pazDequote is set to point at the |
| 10644 ** array of dequoted strings and SQLITE_OK is returned. The caller is |
| 10645 ** responsible for eventually calling sqlite3_free() to free the array |
| 10646 ** in this case. Or, if an error occurs, an SQLite error code is returned. |
| 10647 ** The final value of *pazDequote is undefined in this case. |
| 10648 */ |
| 10649 static int fts3tokDequoteArray( |
| 10650 int argc, /* Number of elements in argv[] */ |
| 10651 const char * const *argv, /* Input array */ |
| 10652 char ***pazDequote /* Output array */ |
| 10653 ){ |
| 10654 int rc = SQLITE_OK; /* Return code */ |
| 10655 if( argc==0 ){ |
| 10656 *pazDequote = 0; |
| 10657 }else{ |
| 10658 int i; |
| 10659 int nByte = 0; |
| 10660 char **azDequote; |
| 10661 |
| 10662 for(i=0; i<argc; i++){ |
| 10663 nByte += (int)(strlen(argv[i]) + 1); |
| 10664 } |
| 10665 |
| 10666 *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); |
| 10667 if( azDequote==0 ){ |
| 10668 rc = SQLITE_NOMEM; |
| 10669 }else{ |
| 10670 char *pSpace = (char *)&azDequote[argc]; |
| 10671 for(i=0; i<argc; i++){ |
| 10672 int n = (int)strlen(argv[i]); |
| 10673 azDequote[i] = pSpace; |
| 10674 memcpy(pSpace, argv[i], n+1); |
| 10675 sqlite3Fts3Dequote(pSpace); |
| 10676 pSpace += (n+1); |
| 10677 } |
| 10678 } |
| 10679 } |
| 10680 |
| 10681 return rc; |
| 10682 } |
| 10683 |
| 10684 /* |
| 10685 ** Schema of the tokenizer table. |
| 10686 */ |
| 10687 #define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" |
| 10688 |
| 10689 /* |
| 10690 ** This function does all the work for both the xConnect and xCreate methods. |
| 10691 ** These tables have no persistent representation of their own, so xConnect |
| 10692 ** and xCreate are identical operations. |
| 10693 ** |
| 10694 ** argv[0]: module name |
| 10695 ** argv[1]: database name |
| 10696 ** argv[2]: table name |
| 10697 ** argv[3]: first argument (tokenizer name) |
| 10698 */ |
| 10699 static int fts3tokConnectMethod( |
| 10700 sqlite3 *db, /* Database connection */ |
| 10701 void *pHash, /* Hash table of tokenizers */ |
| 10702 int argc, /* Number of elements in argv array */ |
| 10703 const char * const *argv, /* xCreate/xConnect argument array */ |
| 10704 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
| 10705 char **pzErr /* OUT: sqlite3_malloc'd error message */ |
| 10706 ){ |
| 10707 Fts3tokTable *pTab = 0; |
| 10708 const sqlite3_tokenizer_module *pMod = 0; |
| 10709 sqlite3_tokenizer *pTok = 0; |
| 10710 int rc; |
| 10711 char **azDequote = 0; |
| 10712 int nDequote; |
| 10713 |
| 10714 rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); |
| 10715 if( rc!=SQLITE_OK ) return rc; |
| 10716 |
| 10717 nDequote = argc-3; |
| 10718 rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); |
| 10719 |
| 10720 if( rc==SQLITE_OK ){ |
| 10721 const char *zModule; |
| 10722 if( nDequote<1 ){ |
| 10723 zModule = "simple"; |
| 10724 }else{ |
| 10725 zModule = azDequote[0]; |
| 10726 } |
| 10727 rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); |
| 10728 } |
| 10729 |
| 10730 assert( (rc==SQLITE_OK)==(pMod!=0) ); |
| 10731 if( rc==SQLITE_OK ){ |
| 10732 const char * const *azArg = (const char * const *)&azDequote[1]; |
| 10733 rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); |
| 10734 } |
| 10735 |
| 10736 if( rc==SQLITE_OK ){ |
| 10737 pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); |
| 10738 if( pTab==0 ){ |
| 10739 rc = SQLITE_NOMEM; |
| 10740 } |
| 10741 } |
| 10742 |
| 10743 if( rc==SQLITE_OK ){ |
| 10744 memset(pTab, 0, sizeof(Fts3tokTable)); |
| 10745 pTab->pMod = pMod; |
| 10746 pTab->pTok = pTok; |
| 10747 *ppVtab = &pTab->base; |
| 10748 }else{ |
| 10749 if( pTok ){ |
| 10750 pMod->xDestroy(pTok); |
| 10751 } |
| 10752 } |
| 10753 |
| 10754 sqlite3_free(azDequote); |
| 10755 return rc; |
| 10756 } |
| 10757 |
| 10758 /* |
| 10759 ** This function does the work for both the xDisconnect and xDestroy methods. |
| 10760 ** These tables have no persistent representation of their own, so xDisconnect |
| 10761 ** and xDestroy are identical operations. |
| 10762 */ |
| 10763 static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ |
| 10764 Fts3tokTable *pTab = (Fts3tokTable *)pVtab; |
| 10765 |
| 10766 pTab->pMod->xDestroy(pTab->pTok); |
| 10767 sqlite3_free(pTab); |
| 10768 return SQLITE_OK; |
| 10769 } |
| 10770 |
| 10771 /* |
| 10772 ** xBestIndex - Analyze a WHERE and ORDER BY clause. |
| 10773 */ |
| 10774 static int fts3tokBestIndexMethod( |
| 10775 sqlite3_vtab *pVTab, |
| 10776 sqlite3_index_info *pInfo |
| 10777 ){ |
| 10778 int i; |
| 10779 UNUSED_PARAMETER(pVTab); |
| 10780 |
| 10781 for(i=0; i<pInfo->nConstraint; i++){ |
| 10782 if( pInfo->aConstraint[i].usable |
| 10783 && pInfo->aConstraint[i].iColumn==0 |
| 10784 && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ |
| 10785 ){ |
| 10786 pInfo->idxNum = 1; |
| 10787 pInfo->aConstraintUsage[i].argvIndex = 1; |
| 10788 pInfo->aConstraintUsage[i].omit = 1; |
| 10789 pInfo->estimatedCost = 1; |
| 10790 return SQLITE_OK; |
| 10791 } |
| 10792 } |
| 10793 |
| 10794 pInfo->idxNum = 0; |
| 10795 assert( pInfo->estimatedCost>1000000.0 ); |
| 10796 |
| 10797 return SQLITE_OK; |
| 10798 } |
| 10799 |
| 10800 /* |
| 10801 ** xOpen - Open a cursor. |
| 10802 */ |
| 10803 static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
| 10804 Fts3tokCursor *pCsr; |
| 10805 UNUSED_PARAMETER(pVTab); |
| 10806 |
| 10807 pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); |
| 10808 if( pCsr==0 ){ |
| 10809 return SQLITE_NOMEM; |
| 10810 } |
| 10811 memset(pCsr, 0, sizeof(Fts3tokCursor)); |
| 10812 |
| 10813 *ppCsr = (sqlite3_vtab_cursor *)pCsr; |
| 10814 return SQLITE_OK; |
| 10815 } |
| 10816 |
| 10817 /* |
| 10818 ** Reset the tokenizer cursor passed as the only argument. As if it had |
| 10819 ** just been returned by fts3tokOpenMethod(). |
| 10820 */ |
| 10821 static void fts3tokResetCursor(Fts3tokCursor *pCsr){ |
| 10822 if( pCsr->pCsr ){ |
| 10823 Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); |
| 10824 pTab->pMod->xClose(pCsr->pCsr); |
| 10825 pCsr->pCsr = 0; |
| 10826 } |
| 10827 sqlite3_free(pCsr->zInput); |
| 10828 pCsr->zInput = 0; |
| 10829 pCsr->zToken = 0; |
| 10830 pCsr->nToken = 0; |
| 10831 pCsr->iStart = 0; |
| 10832 pCsr->iEnd = 0; |
| 10833 pCsr->iPos = 0; |
| 10834 pCsr->iRowid = 0; |
| 10835 } |
| 10836 |
| 10837 /* |
| 10838 ** xClose - Close a cursor. |
| 10839 */ |
| 10840 static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ |
| 10841 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10842 |
| 10843 fts3tokResetCursor(pCsr); |
| 10844 sqlite3_free(pCsr); |
| 10845 return SQLITE_OK; |
| 10846 } |
| 10847 |
| 10848 /* |
| 10849 ** xNext - Advance the cursor to the next row, if any. |
| 10850 */ |
| 10851 static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ |
| 10852 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10853 Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); |
| 10854 int rc; /* Return code */ |
| 10855 |
| 10856 pCsr->iRowid++; |
| 10857 rc = pTab->pMod->xNext(pCsr->pCsr, |
| 10858 &pCsr->zToken, &pCsr->nToken, |
| 10859 &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos |
| 10860 ); |
| 10861 |
| 10862 if( rc!=SQLITE_OK ){ |
| 10863 fts3tokResetCursor(pCsr); |
| 10864 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 10865 } |
| 10866 |
| 10867 return rc; |
| 10868 } |
| 10869 |
| 10870 /* |
| 10871 ** xFilter - Initialize a cursor to point at the start of its data. |
| 10872 */ |
| 10873 static int fts3tokFilterMethod( |
| 10874 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
| 10875 int idxNum, /* Strategy index */ |
| 10876 const char *idxStr, /* Unused */ |
| 10877 int nVal, /* Number of elements in apVal */ |
| 10878 sqlite3_value **apVal /* Arguments for the indexing scheme */ |
| 10879 ){ |
| 10880 int rc = SQLITE_ERROR; |
| 10881 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10882 Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); |
| 10883 UNUSED_PARAMETER(idxStr); |
| 10884 UNUSED_PARAMETER(nVal); |
| 10885 |
| 10886 fts3tokResetCursor(pCsr); |
| 10887 if( idxNum==1 ){ |
| 10888 const char *zByte = (const char *)sqlite3_value_text(apVal[0]); |
| 10889 int nByte = sqlite3_value_bytes(apVal[0]); |
| 10890 pCsr->zInput = sqlite3_malloc(nByte+1); |
| 10891 if( pCsr->zInput==0 ){ |
| 10892 rc = SQLITE_NOMEM; |
| 10893 }else{ |
| 10894 memcpy(pCsr->zInput, zByte, nByte); |
| 10895 pCsr->zInput[nByte] = 0; |
| 10896 rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); |
| 10897 if( rc==SQLITE_OK ){ |
| 10898 pCsr->pCsr->pTokenizer = pTab->pTok; |
| 10899 } |
| 10900 } |
| 10901 } |
| 10902 |
| 10903 if( rc!=SQLITE_OK ) return rc; |
| 10904 return fts3tokNextMethod(pCursor); |
| 10905 } |
| 10906 |
| 10907 /* |
| 10908 ** xEof - Return true if the cursor is at EOF, or false otherwise. |
| 10909 */ |
| 10910 static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ |
| 10911 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10912 return (pCsr->zToken==0); |
| 10913 } |
| 10914 |
| 10915 /* |
| 10916 ** xColumn - Return a column value. |
| 10917 */ |
| 10918 static int fts3tokColumnMethod( |
| 10919 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 10920 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
| 10921 int iCol /* Index of column to read value from */ |
| 10922 ){ |
| 10923 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10924 |
| 10925 /* CREATE TABLE x(input, token, start, end, position) */ |
| 10926 switch( iCol ){ |
| 10927 case 0: |
| 10928 sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); |
| 10929 break; |
| 10930 case 1: |
| 10931 sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); |
| 10932 break; |
| 10933 case 2: |
| 10934 sqlite3_result_int(pCtx, pCsr->iStart); |
| 10935 break; |
| 10936 case 3: |
| 10937 sqlite3_result_int(pCtx, pCsr->iEnd); |
| 10938 break; |
| 10939 default: |
| 10940 assert( iCol==4 ); |
| 10941 sqlite3_result_int(pCtx, pCsr->iPos); |
| 10942 break; |
| 10943 } |
| 10944 return SQLITE_OK; |
| 10945 } |
| 10946 |
| 10947 /* |
| 10948 ** xRowid - Return the current rowid for the cursor. |
| 10949 */ |
| 10950 static int fts3tokRowidMethod( |
| 10951 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
| 10952 sqlite_int64 *pRowid /* OUT: Rowid value */ |
| 10953 ){ |
| 10954 Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; |
| 10955 *pRowid = (sqlite3_int64)pCsr->iRowid; |
| 10956 return SQLITE_OK; |
| 10957 } |
| 10958 |
| 10959 /* |
| 10960 ** Register the fts3tok module with database connection db. Return SQLITE_OK |
| 10961 ** if successful or an error code if sqlite3_create_module() fails. |
| 10962 */ |
| 10963 SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ |
| 10964 static const sqlite3_module fts3tok_module = { |
| 10965 0, /* iVersion */ |
| 10966 fts3tokConnectMethod, /* xCreate */ |
| 10967 fts3tokConnectMethod, /* xConnect */ |
| 10968 fts3tokBestIndexMethod, /* xBestIndex */ |
| 10969 fts3tokDisconnectMethod, /* xDisconnect */ |
| 10970 fts3tokDisconnectMethod, /* xDestroy */ |
| 10971 fts3tokOpenMethod, /* xOpen */ |
| 10972 fts3tokCloseMethod, /* xClose */ |
| 10973 fts3tokFilterMethod, /* xFilter */ |
| 10974 fts3tokNextMethod, /* xNext */ |
| 10975 fts3tokEofMethod, /* xEof */ |
| 10976 fts3tokColumnMethod, /* xColumn */ |
| 10977 fts3tokRowidMethod, /* xRowid */ |
| 10978 0, /* xUpdate */ |
| 10979 0, /* xBegin */ |
| 10980 0, /* xSync */ |
| 10981 0, /* xCommit */ |
| 10982 0, /* xRollback */ |
| 10983 0, /* xFindFunction */ |
| 10984 0, /* xRename */ |
| 10985 0, /* xSavepoint */ |
| 10986 0, /* xRelease */ |
| 10987 0 /* xRollbackTo */ |
| 10988 }; |
| 10989 int rc; /* Return code */ |
| 10990 |
| 10991 rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); |
| 10992 return rc; |
| 10993 } |
| 10994 |
| 10995 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 10996 |
| 10997 /************** End of fts3_tokenize_vtab.c **********************************/ |
| 10998 /************** Begin file fts3_write.c **************************************/ |
| 10999 /* |
| 11000 ** 2009 Oct 23 |
| 11001 ** |
| 11002 ** The author disclaims copyright to this source code. In place of |
| 11003 ** a legal notice, here is a blessing: |
| 11004 ** |
| 11005 ** May you do good and not evil. |
| 11006 ** May you find forgiveness for yourself and forgive others. |
| 11007 ** May you share freely, never taking more than you give. |
| 11008 ** |
| 11009 ****************************************************************************** |
| 11010 ** |
| 11011 ** This file is part of the SQLite FTS3 extension module. Specifically, |
| 11012 ** this file contains code to insert, update and delete rows from FTS3 |
| 11013 ** tables. It also contains code to merge FTS3 b-tree segments. Some |
| 11014 ** of the sub-routines used to merge segments are also used by the query |
| 11015 ** code in fts3.c. |
| 11016 */ |
| 11017 |
| 11018 /* #include "fts3Int.h" */ |
| 11019 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 11020 |
| 11021 /* #include <string.h> */ |
| 11022 /* #include <assert.h> */ |
| 11023 /* #include <stdlib.h> */ |
| 11024 |
| 11025 |
| 11026 #define FTS_MAX_APPENDABLE_HEIGHT 16 |
| 11027 |
| 11028 /* |
| 11029 ** When full-text index nodes are loaded from disk, the buffer that they |
| 11030 ** are loaded into has the following number of bytes of padding at the end |
| 11031 ** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer |
| 11032 ** of 920 bytes is allocated for it. |
| 11033 ** |
| 11034 ** This means that if we have a pointer into a buffer containing node data, |
| 11035 ** it is always safe to read up to two varints from it without risking an |
| 11036 ** overread, even if the node data is corrupted. |
| 11037 */ |
| 11038 #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) |
| 11039 |
| 11040 /* |
| 11041 ** Under certain circumstances, b-tree nodes (doclists) can be loaded into |
| 11042 ** memory incrementally instead of all at once. This can be a big performance |
| 11043 ** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() |
| 11044 ** method before retrieving all query results (as may happen, for example, |
| 11045 ** if a query has a LIMIT clause). |
| 11046 ** |
| 11047 ** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD |
| 11048 ** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. |
| 11049 ** The code is written so that the hard lower-limit for each of these values |
| 11050 ** is 1. Clearly such small values would be inefficient, but can be useful |
| 11051 ** for testing purposes. |
| 11052 ** |
| 11053 ** If this module is built with SQLITE_TEST defined, these constants may |
| 11054 ** be overridden at runtime for testing purposes. File fts3_test.c contains |
| 11055 ** a Tcl interface to read and write the values. |
| 11056 */ |
| 11057 #ifdef SQLITE_TEST |
| 11058 int test_fts3_node_chunksize = (4*1024); |
| 11059 int test_fts3_node_chunk_threshold = (4*1024)*4; |
| 11060 # define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize |
| 11061 # define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold |
| 11062 #else |
| 11063 # define FTS3_NODE_CHUNKSIZE (4*1024) |
| 11064 # define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) |
| 11065 #endif |
| 11066 |
| 11067 /* |
| 11068 ** The two values that may be meaningfully bound to the :1 parameter in |
| 11069 ** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. |
| 11070 */ |
| 11071 #define FTS_STAT_DOCTOTAL 0 |
| 11072 #define FTS_STAT_INCRMERGEHINT 1 |
| 11073 #define FTS_STAT_AUTOINCRMERGE 2 |
| 11074 |
| 11075 /* |
| 11076 ** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic |
| 11077 ** and incremental merge operation that takes place. This is used for |
| 11078 ** debugging FTS only, it should not usually be turned on in production |
| 11079 ** systems. |
| 11080 */ |
| 11081 #ifdef FTS3_LOG_MERGES |
| 11082 static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ |
| 11083 sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); |
| 11084 } |
| 11085 #else |
| 11086 #define fts3LogMerge(x, y) |
| 11087 #endif |
| 11088 |
| 11089 |
| 11090 typedef struct PendingList PendingList; |
| 11091 typedef struct SegmentNode SegmentNode; |
| 11092 typedef struct SegmentWriter SegmentWriter; |
| 11093 |
| 11094 /* |
| 11095 ** An instance of the following data structure is used to build doclists |
| 11096 ** incrementally. See function fts3PendingListAppend() for details. |
| 11097 */ |
| 11098 struct PendingList { |
| 11099 int nData; |
| 11100 char *aData; |
| 11101 int nSpace; |
| 11102 sqlite3_int64 iLastDocid; |
| 11103 sqlite3_int64 iLastCol; |
| 11104 sqlite3_int64 iLastPos; |
| 11105 }; |
| 11106 |
| 11107 |
| 11108 /* |
| 11109 ** Each cursor has a (possibly empty) linked list of the following objects. |
| 11110 */ |
| 11111 struct Fts3DeferredToken { |
| 11112 Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ |
| 11113 int iCol; /* Column token must occur in */ |
| 11114 Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ |
| 11115 PendingList *pList; /* Doclist is assembled here */ |
| 11116 }; |
| 11117 |
| 11118 /* |
| 11119 ** An instance of this structure is used to iterate through the terms on |
| 11120 ** a contiguous set of segment b-tree leaf nodes. Although the details of |
| 11121 ** this structure are only manipulated by code in this file, opaque handles |
| 11122 ** of type Fts3SegReader* are also used by code in fts3.c to iterate through |
| 11123 ** terms when querying the full-text index. See functions: |
| 11124 ** |
| 11125 ** sqlite3Fts3SegReaderNew() |
| 11126 ** sqlite3Fts3SegReaderFree() |
| 11127 ** sqlite3Fts3SegReaderIterate() |
| 11128 ** |
| 11129 ** Methods used to manipulate Fts3SegReader structures: |
| 11130 ** |
| 11131 ** fts3SegReaderNext() |
| 11132 ** fts3SegReaderFirstDocid() |
| 11133 ** fts3SegReaderNextDocid() |
| 11134 */ |
| 11135 struct Fts3SegReader { |
| 11136 int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ |
| 11137 u8 bLookup; /* True for a lookup only */ |
| 11138 u8 rootOnly; /* True for a root-only reader */ |
| 11139 |
| 11140 sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ |
| 11141 sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ |
| 11142 sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ |
| 11143 sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ |
| 11144 |
| 11145 char *aNode; /* Pointer to node data (or NULL) */ |
| 11146 int nNode; /* Size of buffer at aNode (or 0) */ |
| 11147 int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ |
| 11148 sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ |
| 11149 |
| 11150 Fts3HashElem **ppNextElem; |
| 11151 |
| 11152 /* Variables set by fts3SegReaderNext(). These may be read directly |
| 11153 ** by the caller. They are valid from the time SegmentReaderNew() returns |
| 11154 ** until SegmentReaderNext() returns something other than SQLITE_OK |
| 11155 ** (i.e. SQLITE_DONE). |
| 11156 */ |
| 11157 int nTerm; /* Number of bytes in current term */ |
| 11158 char *zTerm; /* Pointer to current term */ |
| 11159 int nTermAlloc; /* Allocated size of zTerm buffer */ |
| 11160 char *aDoclist; /* Pointer to doclist of current entry */ |
| 11161 int nDoclist; /* Size of doclist in current entry */ |
| 11162 |
| 11163 /* The following variables are used by fts3SegReaderNextDocid() to iterate |
| 11164 ** through the current doclist (aDoclist/nDoclist). |
| 11165 */ |
| 11166 char *pOffsetList; |
| 11167 int nOffsetList; /* For descending pending seg-readers only */ |
| 11168 sqlite3_int64 iDocid; |
| 11169 }; |
| 11170 |
| 11171 #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) |
| 11172 #define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) |
| 11173 |
| 11174 /* |
| 11175 ** An instance of this structure is used to create a segment b-tree in the |
| 11176 ** database. The internal details of this type are only accessed by the |
| 11177 ** following functions: |
| 11178 ** |
| 11179 ** fts3SegWriterAdd() |
| 11180 ** fts3SegWriterFlush() |
| 11181 ** fts3SegWriterFree() |
| 11182 */ |
| 11183 struct SegmentWriter { |
| 11184 SegmentNode *pTree; /* Pointer to interior tree structure */ |
| 11185 sqlite3_int64 iFirst; /* First slot in %_segments written */ |
| 11186 sqlite3_int64 iFree; /* Next free slot in %_segments */ |
| 11187 char *zTerm; /* Pointer to previous term buffer */ |
| 11188 int nTerm; /* Number of bytes in zTerm */ |
| 11189 int nMalloc; /* Size of malloc'd buffer at zMalloc */ |
| 11190 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ |
| 11191 int nSize; /* Size of allocation at aData */ |
| 11192 int nData; /* Bytes of data in aData */ |
| 11193 char *aData; /* Pointer to block from malloc() */ |
| 11194 i64 nLeafData; /* Number of bytes of leaf data written */ |
| 11195 }; |
| 11196 |
| 11197 /* |
| 11198 ** Type SegmentNode is used by the following three functions to create |
| 11199 ** the interior part of the segment b+-tree structures (everything except |
| 11200 ** the leaf nodes). These functions and type are only ever used by code |
| 11201 ** within the fts3SegWriterXXX() family of functions described above. |
| 11202 ** |
| 11203 ** fts3NodeAddTerm() |
| 11204 ** fts3NodeWrite() |
| 11205 ** fts3NodeFree() |
| 11206 ** |
| 11207 ** When a b+tree is written to the database (either as a result of a merge |
| 11208 ** or the pending-terms table being flushed), leaves are written into the |
| 11209 ** database file as soon as they are completely populated. The interior of |
| 11210 ** the tree is assembled in memory and written out only once all leaves have |
| 11211 ** been populated and stored. This is Ok, as the b+-tree fanout is usually |
| 11212 ** very large, meaning that the interior of the tree consumes relatively |
| 11213 ** little memory. |
| 11214 */ |
| 11215 struct SegmentNode { |
| 11216 SegmentNode *pParent; /* Parent node (or NULL for root node) */ |
| 11217 SegmentNode *pRight; /* Pointer to right-sibling */ |
| 11218 SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ |
| 11219 int nEntry; /* Number of terms written to node so far */ |
| 11220 char *zTerm; /* Pointer to previous term buffer */ |
| 11221 int nTerm; /* Number of bytes in zTerm */ |
| 11222 int nMalloc; /* Size of malloc'd buffer at zMalloc */ |
| 11223 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ |
| 11224 int nData; /* Bytes of valid data so far */ |
| 11225 char *aData; /* Node data */ |
| 11226 }; |
| 11227 |
| 11228 /* |
| 11229 ** Valid values for the second argument to fts3SqlStmt(). |
| 11230 */ |
| 11231 #define SQL_DELETE_CONTENT 0 |
| 11232 #define SQL_IS_EMPTY 1 |
| 11233 #define SQL_DELETE_ALL_CONTENT 2 |
| 11234 #define SQL_DELETE_ALL_SEGMENTS 3 |
| 11235 #define SQL_DELETE_ALL_SEGDIR 4 |
| 11236 #define SQL_DELETE_ALL_DOCSIZE 5 |
| 11237 #define SQL_DELETE_ALL_STAT 6 |
| 11238 #define SQL_SELECT_CONTENT_BY_ROWID 7 |
| 11239 #define SQL_NEXT_SEGMENT_INDEX 8 |
| 11240 #define SQL_INSERT_SEGMENTS 9 |
| 11241 #define SQL_NEXT_SEGMENTS_ID 10 |
| 11242 #define SQL_INSERT_SEGDIR 11 |
| 11243 #define SQL_SELECT_LEVEL 12 |
| 11244 #define SQL_SELECT_LEVEL_RANGE 13 |
| 11245 #define SQL_SELECT_LEVEL_COUNT 14 |
| 11246 #define SQL_SELECT_SEGDIR_MAX_LEVEL 15 |
| 11247 #define SQL_DELETE_SEGDIR_LEVEL 16 |
| 11248 #define SQL_DELETE_SEGMENTS_RANGE 17 |
| 11249 #define SQL_CONTENT_INSERT 18 |
| 11250 #define SQL_DELETE_DOCSIZE 19 |
| 11251 #define SQL_REPLACE_DOCSIZE 20 |
| 11252 #define SQL_SELECT_DOCSIZE 21 |
| 11253 #define SQL_SELECT_STAT 22 |
| 11254 #define SQL_REPLACE_STAT 23 |
| 11255 |
| 11256 #define SQL_SELECT_ALL_PREFIX_LEVEL 24 |
| 11257 #define SQL_DELETE_ALL_TERMS_SEGDIR 25 |
| 11258 #define SQL_DELETE_SEGDIR_RANGE 26 |
| 11259 #define SQL_SELECT_ALL_LANGID 27 |
| 11260 #define SQL_FIND_MERGE_LEVEL 28 |
| 11261 #define SQL_MAX_LEAF_NODE_ESTIMATE 29 |
| 11262 #define SQL_DELETE_SEGDIR_ENTRY 30 |
| 11263 #define SQL_SHIFT_SEGDIR_ENTRY 31 |
| 11264 #define SQL_SELECT_SEGDIR 32 |
| 11265 #define SQL_CHOMP_SEGDIR 33 |
| 11266 #define SQL_SEGMENT_IS_APPENDABLE 34 |
| 11267 #define SQL_SELECT_INDEXES 35 |
| 11268 #define SQL_SELECT_MXLEVEL 36 |
| 11269 |
| 11270 #define SQL_SELECT_LEVEL_RANGE2 37 |
| 11271 #define SQL_UPDATE_LEVEL_IDX 38 |
| 11272 #define SQL_UPDATE_LEVEL 39 |
| 11273 |
| 11274 /* |
| 11275 ** This function is used to obtain an SQLite prepared statement handle |
| 11276 ** for the statement identified by the second argument. If successful, |
| 11277 ** *pp is set to the requested statement handle and SQLITE_OK returned. |
| 11278 ** Otherwise, an SQLite error code is returned and *pp is set to 0. |
| 11279 ** |
| 11280 ** If argument apVal is not NULL, then it must point to an array with |
| 11281 ** at least as many entries as the requested statement has bound |
| 11282 ** parameters. The values are bound to the statements parameters before |
| 11283 ** returning. |
| 11284 */ |
| 11285 static int fts3SqlStmt( |
| 11286 Fts3Table *p, /* Virtual table handle */ |
| 11287 int eStmt, /* One of the SQL_XXX constants above */ |
| 11288 sqlite3_stmt **pp, /* OUT: Statement handle */ |
| 11289 sqlite3_value **apVal /* Values to bind to statement */ |
| 11290 ){ |
| 11291 const char *azSql[] = { |
| 11292 /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", |
| 11293 /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", |
| 11294 /* 2 */ "DELETE FROM %Q.'%q_content'", |
| 11295 /* 3 */ "DELETE FROM %Q.'%q_segments'", |
| 11296 /* 4 */ "DELETE FROM %Q.'%q_segdir'", |
| 11297 /* 5 */ "DELETE FROM %Q.'%q_docsize'", |
| 11298 /* 6 */ "DELETE FROM %Q.'%q_stat'", |
| 11299 /* 7 */ "SELECT %s WHERE rowid=?", |
| 11300 /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", |
| 11301 /* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", |
| 11302 /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", |
| 11303 /* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", |
| 11304 |
| 11305 /* Return segments in order from oldest to newest.*/ |
| 11306 /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11307 "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", |
| 11308 /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11309 "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" |
| 11310 "ORDER BY level DESC, idx ASC", |
| 11311 |
| 11312 /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", |
| 11313 /* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", |
| 11314 |
| 11315 /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", |
| 11316 /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", |
| 11317 /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", |
| 11318 /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", |
| 11319 /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", |
| 11320 /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", |
| 11321 /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", |
| 11322 /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", |
| 11323 /* 24 */ "", |
| 11324 /* 25 */ "", |
| 11325 |
| 11326 /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", |
| 11327 /* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", |
| 11328 |
| 11329 /* This statement is used to determine which level to read the input from |
| 11330 ** when performing an incremental merge. It returns the absolute level number |
| 11331 ** of the oldest level in the db that contains at least ? segments. Or, |
| 11332 ** if no level in the FTS index contains more than ? segments, the statement |
| 11333 ** returns zero rows. */ |
| 11334 /* 28 */ "SELECT level, count(*) AS cnt FROM %Q.'%q_segdir' " |
| 11335 " GROUP BY level HAVING cnt>=?" |
| 11336 " ORDER BY (level %% 1024) ASC LIMIT 1", |
| 11337 |
| 11338 /* Estimate the upper limit on the number of leaf nodes in a new segment |
| 11339 ** created by merging the oldest :2 segments from absolute level :1. See |
| 11340 ** function sqlite3Fts3Incrmerge() for details. */ |
| 11341 /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " |
| 11342 " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", |
| 11343 |
| 11344 /* SQL_DELETE_SEGDIR_ENTRY |
| 11345 ** Delete the %_segdir entry on absolute level :1 with index :2. */ |
| 11346 /* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", |
| 11347 |
| 11348 /* SQL_SHIFT_SEGDIR_ENTRY |
| 11349 ** Modify the idx value for the segment with idx=:3 on absolute level :2 |
| 11350 ** to :1. */ |
| 11351 /* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", |
| 11352 |
| 11353 /* SQL_SELECT_SEGDIR |
| 11354 ** Read a single entry from the %_segdir table. The entry from absolute |
| 11355 ** level :1 with index value :2. */ |
| 11356 /* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " |
| 11357 "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", |
| 11358 |
| 11359 /* SQL_CHOMP_SEGDIR |
| 11360 ** Update the start_block (:1) and root (:2) fields of the %_segdir |
| 11361 ** entry located on absolute level :3 with index :4. */ |
| 11362 /* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" |
| 11363 "WHERE level = ? AND idx = ?", |
| 11364 |
| 11365 /* SQL_SEGMENT_IS_APPENDABLE |
| 11366 ** Return a single row if the segment with end_block=? is appendable. Or |
| 11367 ** no rows otherwise. */ |
| 11368 /* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", |
| 11369 |
| 11370 /* SQL_SELECT_INDEXES |
| 11371 ** Return the list of valid segment indexes for absolute level ? */ |
| 11372 /* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", |
| 11373 |
| 11374 /* SQL_SELECT_MXLEVEL |
| 11375 ** Return the largest relative level in the FTS index or indexes. */ |
| 11376 /* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", |
| 11377 |
| 11378 /* Return segments in order from oldest to newest.*/ |
| 11379 /* 37 */ "SELECT level, idx, end_block " |
| 11380 "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " |
| 11381 "ORDER BY level DESC, idx ASC", |
| 11382 |
| 11383 /* Update statements used while promoting segments */ |
| 11384 /* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " |
| 11385 "WHERE level=? AND idx=?", |
| 11386 /* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" |
| 11387 |
| 11388 }; |
| 11389 int rc = SQLITE_OK; |
| 11390 sqlite3_stmt *pStmt; |
| 11391 |
| 11392 assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); |
| 11393 assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); |
| 11394 |
| 11395 pStmt = p->aStmt[eStmt]; |
| 11396 if( !pStmt ){ |
| 11397 char *zSql; |
| 11398 if( eStmt==SQL_CONTENT_INSERT ){ |
| 11399 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); |
| 11400 }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ |
| 11401 zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); |
| 11402 }else{ |
| 11403 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); |
| 11404 } |
| 11405 if( !zSql ){ |
| 11406 rc = SQLITE_NOMEM; |
| 11407 }else{ |
| 11408 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); |
| 11409 sqlite3_free(zSql); |
| 11410 assert( rc==SQLITE_OK || pStmt==0 ); |
| 11411 p->aStmt[eStmt] = pStmt; |
| 11412 } |
| 11413 } |
| 11414 if( apVal ){ |
| 11415 int i; |
| 11416 int nParam = sqlite3_bind_parameter_count(pStmt); |
| 11417 for(i=0; rc==SQLITE_OK && i<nParam; i++){ |
| 11418 rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); |
| 11419 } |
| 11420 } |
| 11421 *pp = pStmt; |
| 11422 return rc; |
| 11423 } |
| 11424 |
| 11425 |
| 11426 static int fts3SelectDocsize( |
| 11427 Fts3Table *pTab, /* FTS3 table handle */ |
| 11428 sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ |
| 11429 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11430 ){ |
| 11431 sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ |
| 11432 int rc; /* Return code */ |
| 11433 |
| 11434 rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); |
| 11435 if( rc==SQLITE_OK ){ |
| 11436 sqlite3_bind_int64(pStmt, 1, iDocid); |
| 11437 rc = sqlite3_step(pStmt); |
| 11438 if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ |
| 11439 rc = sqlite3_reset(pStmt); |
| 11440 if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; |
| 11441 pStmt = 0; |
| 11442 }else{ |
| 11443 rc = SQLITE_OK; |
| 11444 } |
| 11445 } |
| 11446 |
| 11447 *ppStmt = pStmt; |
| 11448 return rc; |
| 11449 } |
| 11450 |
| 11451 SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( |
| 11452 Fts3Table *pTab, /* Fts3 table handle */ |
| 11453 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11454 ){ |
| 11455 sqlite3_stmt *pStmt = 0; |
| 11456 int rc; |
| 11457 rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); |
| 11458 if( rc==SQLITE_OK ){ |
| 11459 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 11460 if( sqlite3_step(pStmt)!=SQLITE_ROW |
| 11461 || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB |
| 11462 ){ |
| 11463 rc = sqlite3_reset(pStmt); |
| 11464 if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; |
| 11465 pStmt = 0; |
| 11466 } |
| 11467 } |
| 11468 *ppStmt = pStmt; |
| 11469 return rc; |
| 11470 } |
| 11471 |
| 11472 SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( |
| 11473 Fts3Table *pTab, /* Fts3 table handle */ |
| 11474 sqlite3_int64 iDocid, /* Docid to read size data for */ |
| 11475 sqlite3_stmt **ppStmt /* OUT: Statement handle */ |
| 11476 ){ |
| 11477 return fts3SelectDocsize(pTab, iDocid, ppStmt); |
| 11478 } |
| 11479 |
| 11480 /* |
| 11481 ** Similar to fts3SqlStmt(). Except, after binding the parameters in |
| 11482 ** array apVal[] to the SQL statement identified by eStmt, the statement |
| 11483 ** is executed. |
| 11484 ** |
| 11485 ** Returns SQLITE_OK if the statement is successfully executed, or an |
| 11486 ** SQLite error code otherwise. |
| 11487 */ |
| 11488 static void fts3SqlExec( |
| 11489 int *pRC, /* Result code */ |
| 11490 Fts3Table *p, /* The FTS3 table */ |
| 11491 int eStmt, /* Index of statement to evaluate */ |
| 11492 sqlite3_value **apVal /* Parameters to bind */ |
| 11493 ){ |
| 11494 sqlite3_stmt *pStmt; |
| 11495 int rc; |
| 11496 if( *pRC ) return; |
| 11497 rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); |
| 11498 if( rc==SQLITE_OK ){ |
| 11499 sqlite3_step(pStmt); |
| 11500 rc = sqlite3_reset(pStmt); |
| 11501 } |
| 11502 *pRC = rc; |
| 11503 } |
| 11504 |
| 11505 |
| 11506 /* |
| 11507 ** This function ensures that the caller has obtained an exclusive |
| 11508 ** shared-cache table-lock on the %_segdir table. This is required before |
| 11509 ** writing data to the fts3 table. If this lock is not acquired first, then |
| 11510 ** the caller may end up attempting to take this lock as part of committing |
| 11511 ** a transaction, causing SQLite to return SQLITE_LOCKED or |
| 11512 ** LOCKED_SHAREDCACHEto a COMMIT command. |
| 11513 ** |
| 11514 ** It is best to avoid this because if FTS3 returns any error when |
| 11515 ** committing a transaction, the whole transaction will be rolled back. |
| 11516 ** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. |
| 11517 ** It can still happen if the user locks the underlying tables directly |
| 11518 ** instead of accessing them via FTS. |
| 11519 */ |
| 11520 static int fts3Writelock(Fts3Table *p){ |
| 11521 int rc = SQLITE_OK; |
| 11522 |
| 11523 if( p->nPendingData==0 ){ |
| 11524 sqlite3_stmt *pStmt; |
| 11525 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); |
| 11526 if( rc==SQLITE_OK ){ |
| 11527 sqlite3_bind_null(pStmt, 1); |
| 11528 sqlite3_step(pStmt); |
| 11529 rc = sqlite3_reset(pStmt); |
| 11530 } |
| 11531 } |
| 11532 |
| 11533 return rc; |
| 11534 } |
| 11535 |
| 11536 /* |
| 11537 ** FTS maintains a separate indexes for each language-id (a 32-bit integer). |
| 11538 ** Within each language id, a separate index is maintained to store the |
| 11539 ** document terms, and each configured prefix size (configured the FTS |
| 11540 ** "prefix=" option). And each index consists of multiple levels ("relative |
| 11541 ** levels"). |
| 11542 ** |
| 11543 ** All three of these values (the language id, the specific index and the |
| 11544 ** level within the index) are encoded in 64-bit integer values stored |
| 11545 ** in the %_segdir table on disk. This function is used to convert three |
| 11546 ** separate component values into the single 64-bit integer value that |
| 11547 ** can be used to query the %_segdir table. |
| 11548 ** |
| 11549 ** Specifically, each language-id/index combination is allocated 1024 |
| 11550 ** 64-bit integer level values ("absolute levels"). The main terms index |
| 11551 ** for language-id 0 is allocate values 0-1023. The first prefix index |
| 11552 ** (if any) for language-id 0 is allocated values 1024-2047. And so on. |
| 11553 ** Language 1 indexes are allocated immediately following language 0. |
| 11554 ** |
| 11555 ** So, for a system with nPrefix prefix indexes configured, the block of |
| 11556 ** absolute levels that corresponds to language-id iLangid and index |
| 11557 ** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). |
| 11558 */ |
| 11559 static sqlite3_int64 getAbsoluteLevel( |
| 11560 Fts3Table *p, /* FTS3 table handle */ |
| 11561 int iLangid, /* Language id */ |
| 11562 int iIndex, /* Index in p->aIndex[] */ |
| 11563 int iLevel /* Level of segments */ |
| 11564 ){ |
| 11565 sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ |
| 11566 assert( iLangid>=0 ); |
| 11567 assert( p->nIndex>0 ); |
| 11568 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 11569 |
| 11570 iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; |
| 11571 return iBase + iLevel; |
| 11572 } |
| 11573 |
| 11574 /* |
| 11575 ** Set *ppStmt to a statement handle that may be used to iterate through |
| 11576 ** all rows in the %_segdir table, from oldest to newest. If successful, |
| 11577 ** return SQLITE_OK. If an error occurs while preparing the statement, |
| 11578 ** return an SQLite error code. |
| 11579 ** |
| 11580 ** There is only ever one instance of this SQL statement compiled for |
| 11581 ** each FTS3 table. |
| 11582 ** |
| 11583 ** The statement returns the following columns from the %_segdir table: |
| 11584 ** |
| 11585 ** 0: idx |
| 11586 ** 1: start_block |
| 11587 ** 2: leaves_end_block |
| 11588 ** 3: end_block |
| 11589 ** 4: root |
| 11590 */ |
| 11591 SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( |
| 11592 Fts3Table *p, /* FTS3 table */ |
| 11593 int iLangid, /* Language being queried */ |
| 11594 int iIndex, /* Index for p->aIndex[] */ |
| 11595 int iLevel, /* Level to select (relative level) */ |
| 11596 sqlite3_stmt **ppStmt /* OUT: Compiled statement */ |
| 11597 ){ |
| 11598 int rc; |
| 11599 sqlite3_stmt *pStmt = 0; |
| 11600 |
| 11601 assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); |
| 11602 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 11603 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 11604 |
| 11605 if( iLevel<0 ){ |
| 11606 /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ |
| 11607 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); |
| 11608 if( rc==SQLITE_OK ){ |
| 11609 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 11610 sqlite3_bind_int64(pStmt, 2, |
| 11611 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 11612 ); |
| 11613 } |
| 11614 }else{ |
| 11615 /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ |
| 11616 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); |
| 11617 if( rc==SQLITE_OK ){ |
| 11618 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); |
| 11619 } |
| 11620 } |
| 11621 *ppStmt = pStmt; |
| 11622 return rc; |
| 11623 } |
| 11624 |
| 11625 |
| 11626 /* |
| 11627 ** Append a single varint to a PendingList buffer. SQLITE_OK is returned |
| 11628 ** if successful, or an SQLite error code otherwise. |
| 11629 ** |
| 11630 ** This function also serves to allocate the PendingList structure itself. |
| 11631 ** For example, to create a new PendingList structure containing two |
| 11632 ** varints: |
| 11633 ** |
| 11634 ** PendingList *p = 0; |
| 11635 ** fts3PendingListAppendVarint(&p, 1); |
| 11636 ** fts3PendingListAppendVarint(&p, 2); |
| 11637 */ |
| 11638 static int fts3PendingListAppendVarint( |
| 11639 PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ |
| 11640 sqlite3_int64 i /* Value to append to data */ |
| 11641 ){ |
| 11642 PendingList *p = *pp; |
| 11643 |
| 11644 /* Allocate or grow the PendingList as required. */ |
| 11645 if( !p ){ |
| 11646 p = sqlite3_malloc(sizeof(*p) + 100); |
| 11647 if( !p ){ |
| 11648 return SQLITE_NOMEM; |
| 11649 } |
| 11650 p->nSpace = 100; |
| 11651 p->aData = (char *)&p[1]; |
| 11652 p->nData = 0; |
| 11653 } |
| 11654 else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ |
| 11655 int nNew = p->nSpace * 2; |
| 11656 p = sqlite3_realloc(p, sizeof(*p) + nNew); |
| 11657 if( !p ){ |
| 11658 sqlite3_free(*pp); |
| 11659 *pp = 0; |
| 11660 return SQLITE_NOMEM; |
| 11661 } |
| 11662 p->nSpace = nNew; |
| 11663 p->aData = (char *)&p[1]; |
| 11664 } |
| 11665 |
| 11666 /* Append the new serialized varint to the end of the list. */ |
| 11667 p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); |
| 11668 p->aData[p->nData] = '\0'; |
| 11669 *pp = p; |
| 11670 return SQLITE_OK; |
| 11671 } |
| 11672 |
| 11673 /* |
| 11674 ** Add a docid/column/position entry to a PendingList structure. Non-zero |
| 11675 ** is returned if the structure is sqlite3_realloced as part of adding |
| 11676 ** the entry. Otherwise, zero. |
| 11677 ** |
| 11678 ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. |
| 11679 ** Zero is always returned in this case. Otherwise, if no OOM error occurs, |
| 11680 ** it is set to SQLITE_OK. |
| 11681 */ |
| 11682 static int fts3PendingListAppend( |
| 11683 PendingList **pp, /* IN/OUT: PendingList structure */ |
| 11684 sqlite3_int64 iDocid, /* Docid for entry to add */ |
| 11685 sqlite3_int64 iCol, /* Column for entry to add */ |
| 11686 sqlite3_int64 iPos, /* Position of term for entry to add */ |
| 11687 int *pRc /* OUT: Return code */ |
| 11688 ){ |
| 11689 PendingList *p = *pp; |
| 11690 int rc = SQLITE_OK; |
| 11691 |
| 11692 assert( !p || p->iLastDocid<=iDocid ); |
| 11693 |
| 11694 if( !p || p->iLastDocid!=iDocid ){ |
| 11695 sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); |
| 11696 if( p ){ |
| 11697 assert( p->nData<p->nSpace ); |
| 11698 assert( p->aData[p->nData]==0 ); |
| 11699 p->nData++; |
| 11700 } |
| 11701 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ |
| 11702 goto pendinglistappend_out; |
| 11703 } |
| 11704 p->iLastCol = -1; |
| 11705 p->iLastPos = 0; |
| 11706 p->iLastDocid = iDocid; |
| 11707 } |
| 11708 if( iCol>0 && p->iLastCol!=iCol ){ |
| 11709 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) |
| 11710 || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) |
| 11711 ){ |
| 11712 goto pendinglistappend_out; |
| 11713 } |
| 11714 p->iLastCol = iCol; |
| 11715 p->iLastPos = 0; |
| 11716 } |
| 11717 if( iCol>=0 ){ |
| 11718 assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); |
| 11719 rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); |
| 11720 if( rc==SQLITE_OK ){ |
| 11721 p->iLastPos = iPos; |
| 11722 } |
| 11723 } |
| 11724 |
| 11725 pendinglistappend_out: |
| 11726 *pRc = rc; |
| 11727 if( p!=*pp ){ |
| 11728 *pp = p; |
| 11729 return 1; |
| 11730 } |
| 11731 return 0; |
| 11732 } |
| 11733 |
| 11734 /* |
| 11735 ** Free a PendingList object allocated by fts3PendingListAppend(). |
| 11736 */ |
| 11737 static void fts3PendingListDelete(PendingList *pList){ |
| 11738 sqlite3_free(pList); |
| 11739 } |
| 11740 |
| 11741 /* |
| 11742 ** Add an entry to one of the pending-terms hash tables. |
| 11743 */ |
| 11744 static int fts3PendingTermsAddOne( |
| 11745 Fts3Table *p, |
| 11746 int iCol, |
| 11747 int iPos, |
| 11748 Fts3Hash *pHash, /* Pending terms hash table to add entry to */ |
| 11749 const char *zToken, |
| 11750 int nToken |
| 11751 ){ |
| 11752 PendingList *pList; |
| 11753 int rc = SQLITE_OK; |
| 11754 |
| 11755 pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); |
| 11756 if( pList ){ |
| 11757 p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); |
| 11758 } |
| 11759 if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ |
| 11760 if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ |
| 11761 /* Malloc failed while inserting the new entry. This can only |
| 11762 ** happen if there was no previous entry for this token. |
| 11763 */ |
| 11764 assert( 0==fts3HashFind(pHash, zToken, nToken) ); |
| 11765 sqlite3_free(pList); |
| 11766 rc = SQLITE_NOMEM; |
| 11767 } |
| 11768 } |
| 11769 if( rc==SQLITE_OK ){ |
| 11770 p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); |
| 11771 } |
| 11772 return rc; |
| 11773 } |
| 11774 |
| 11775 /* |
| 11776 ** Tokenize the nul-terminated string zText and add all tokens to the |
| 11777 ** pending-terms hash-table. The docid used is that currently stored in |
| 11778 ** p->iPrevDocid, and the column is specified by argument iCol. |
| 11779 ** |
| 11780 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. |
| 11781 */ |
| 11782 static int fts3PendingTermsAdd( |
| 11783 Fts3Table *p, /* Table into which text will be inserted */ |
| 11784 int iLangid, /* Language id to use */ |
| 11785 const char *zText, /* Text of document to be inserted */ |
| 11786 int iCol, /* Column into which text is being inserted */ |
| 11787 u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ |
| 11788 ){ |
| 11789 int rc; |
| 11790 int iStart = 0; |
| 11791 int iEnd = 0; |
| 11792 int iPos = 0; |
| 11793 int nWord = 0; |
| 11794 |
| 11795 char const *zToken; |
| 11796 int nToken = 0; |
| 11797 |
| 11798 sqlite3_tokenizer *pTokenizer = p->pTokenizer; |
| 11799 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 11800 sqlite3_tokenizer_cursor *pCsr; |
| 11801 int (*xNext)(sqlite3_tokenizer_cursor *pCursor, |
| 11802 const char**,int*,int*,int*,int*); |
| 11803 |
| 11804 assert( pTokenizer && pModule ); |
| 11805 |
| 11806 /* If the user has inserted a NULL value, this function may be called with |
| 11807 ** zText==0. In this case, add zero token entries to the hash table and |
| 11808 ** return early. */ |
| 11809 if( zText==0 ){ |
| 11810 *pnWord = 0; |
| 11811 return SQLITE_OK; |
| 11812 } |
| 11813 |
| 11814 rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); |
| 11815 if( rc!=SQLITE_OK ){ |
| 11816 return rc; |
| 11817 } |
| 11818 |
| 11819 xNext = pModule->xNext; |
| 11820 while( SQLITE_OK==rc |
| 11821 && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) |
| 11822 ){ |
| 11823 int i; |
| 11824 if( iPos>=nWord ) nWord = iPos+1; |
| 11825 |
| 11826 /* Positions cannot be negative; we use -1 as a terminator internally. |
| 11827 ** Tokens must have a non-zero length. |
| 11828 */ |
| 11829 if( iPos<0 || !zToken || nToken<=0 ){ |
| 11830 rc = SQLITE_ERROR; |
| 11831 break; |
| 11832 } |
| 11833 |
| 11834 /* Add the term to the terms index */ |
| 11835 rc = fts3PendingTermsAddOne( |
| 11836 p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken |
| 11837 ); |
| 11838 |
| 11839 /* Add the term to each of the prefix indexes that it is not too |
| 11840 ** short for. */ |
| 11841 for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 11842 struct Fts3Index *pIndex = &p->aIndex[i]; |
| 11843 if( nToken<pIndex->nPrefix ) continue; |
| 11844 rc = fts3PendingTermsAddOne( |
| 11845 p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix |
| 11846 ); |
| 11847 } |
| 11848 } |
| 11849 |
| 11850 pModule->xClose(pCsr); |
| 11851 *pnWord += nWord; |
| 11852 return (rc==SQLITE_DONE ? SQLITE_OK : rc); |
| 11853 } |
| 11854 |
| 11855 /* |
| 11856 ** Calling this function indicates that subsequent calls to |
| 11857 ** fts3PendingTermsAdd() are to add term/position-list pairs for the |
| 11858 ** contents of the document with docid iDocid. |
| 11859 */ |
| 11860 static int fts3PendingTermsDocid( |
| 11861 Fts3Table *p, /* Full-text table handle */ |
| 11862 int bDelete, /* True if this op is a delete */ |
| 11863 int iLangid, /* Language id of row being written */ |
| 11864 sqlite_int64 iDocid /* Docid of row being written */ |
| 11865 ){ |
| 11866 assert( iLangid>=0 ); |
| 11867 assert( bDelete==1 || bDelete==0 ); |
| 11868 |
| 11869 /* TODO(shess) Explore whether partially flushing the buffer on |
| 11870 ** forced-flush would provide better performance. I suspect that if |
| 11871 ** we ordered the doclists by size and flushed the largest until the |
| 11872 ** buffer was half empty, that would let the less frequent terms |
| 11873 ** generate longer doclists. |
| 11874 */ |
| 11875 if( iDocid<p->iPrevDocid |
| 11876 || (iDocid==p->iPrevDocid && p->bPrevDelete==0) |
| 11877 || p->iPrevLangid!=iLangid |
| 11878 || p->nPendingData>p->nMaxPendingData |
| 11879 ){ |
| 11880 int rc = sqlite3Fts3PendingTermsFlush(p); |
| 11881 if( rc!=SQLITE_OK ) return rc; |
| 11882 } |
| 11883 p->iPrevDocid = iDocid; |
| 11884 p->iPrevLangid = iLangid; |
| 11885 p->bPrevDelete = bDelete; |
| 11886 return SQLITE_OK; |
| 11887 } |
| 11888 |
| 11889 /* |
| 11890 ** Discard the contents of the pending-terms hash tables. |
| 11891 */ |
| 11892 SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ |
| 11893 int i; |
| 11894 for(i=0; i<p->nIndex; i++){ |
| 11895 Fts3HashElem *pElem; |
| 11896 Fts3Hash *pHash = &p->aIndex[i].hPending; |
| 11897 for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ |
| 11898 PendingList *pList = (PendingList *)fts3HashData(pElem); |
| 11899 fts3PendingListDelete(pList); |
| 11900 } |
| 11901 fts3HashClear(pHash); |
| 11902 } |
| 11903 p->nPendingData = 0; |
| 11904 } |
| 11905 |
| 11906 /* |
| 11907 ** This function is called by the xUpdate() method as part of an INSERT |
| 11908 ** operation. It adds entries for each term in the new record to the |
| 11909 ** pendingTerms hash table. |
| 11910 ** |
| 11911 ** Argument apVal is the same as the similarly named argument passed to |
| 11912 ** fts3InsertData(). Parameter iDocid is the docid of the new row. |
| 11913 */ |
| 11914 static int fts3InsertTerms( |
| 11915 Fts3Table *p, |
| 11916 int iLangid, |
| 11917 sqlite3_value **apVal, |
| 11918 u32 *aSz |
| 11919 ){ |
| 11920 int i; /* Iterator variable */ |
| 11921 for(i=2; i<p->nColumn+2; i++){ |
| 11922 int iCol = i-2; |
| 11923 if( p->abNotindexed[iCol]==0 ){ |
| 11924 const char *zText = (const char *)sqlite3_value_text(apVal[i]); |
| 11925 int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); |
| 11926 if( rc!=SQLITE_OK ){ |
| 11927 return rc; |
| 11928 } |
| 11929 aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); |
| 11930 } |
| 11931 } |
| 11932 return SQLITE_OK; |
| 11933 } |
| 11934 |
| 11935 /* |
| 11936 ** This function is called by the xUpdate() method for an INSERT operation. |
| 11937 ** The apVal parameter is passed a copy of the apVal argument passed by |
| 11938 ** SQLite to the xUpdate() method. i.e: |
| 11939 ** |
| 11940 ** apVal[0] Not used for INSERT. |
| 11941 ** apVal[1] rowid |
| 11942 ** apVal[2] Left-most user-defined column |
| 11943 ** ... |
| 11944 ** apVal[p->nColumn+1] Right-most user-defined column |
| 11945 ** apVal[p->nColumn+2] Hidden column with same name as table |
| 11946 ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) |
| 11947 ** apVal[p->nColumn+4] Hidden languageid column |
| 11948 */ |
| 11949 static int fts3InsertData( |
| 11950 Fts3Table *p, /* Full-text table */ |
| 11951 sqlite3_value **apVal, /* Array of values to insert */ |
| 11952 sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ |
| 11953 ){ |
| 11954 int rc; /* Return code */ |
| 11955 sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ |
| 11956 |
| 11957 if( p->zContentTbl ){ |
| 11958 sqlite3_value *pRowid = apVal[p->nColumn+3]; |
| 11959 if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ |
| 11960 pRowid = apVal[1]; |
| 11961 } |
| 11962 if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ |
| 11963 return SQLITE_CONSTRAINT; |
| 11964 } |
| 11965 *piDocid = sqlite3_value_int64(pRowid); |
| 11966 return SQLITE_OK; |
| 11967 } |
| 11968 |
| 11969 /* Locate the statement handle used to insert data into the %_content |
| 11970 ** table. The SQL for this statement is: |
| 11971 ** |
| 11972 ** INSERT INTO %_content VALUES(?, ?, ?, ...) |
| 11973 ** |
| 11974 ** The statement features N '?' variables, where N is the number of user |
| 11975 ** defined columns in the FTS3 table, plus one for the docid field. |
| 11976 */ |
| 11977 rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); |
| 11978 if( rc==SQLITE_OK && p->zLanguageid ){ |
| 11979 rc = sqlite3_bind_int( |
| 11980 pContentInsert, p->nColumn+2, |
| 11981 sqlite3_value_int(apVal[p->nColumn+4]) |
| 11982 ); |
| 11983 } |
| 11984 if( rc!=SQLITE_OK ) return rc; |
| 11985 |
| 11986 /* There is a quirk here. The users INSERT statement may have specified |
| 11987 ** a value for the "rowid" field, for the "docid" field, or for both. |
| 11988 ** Which is a problem, since "rowid" and "docid" are aliases for the |
| 11989 ** same value. For example: |
| 11990 ** |
| 11991 ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); |
| 11992 ** |
| 11993 ** In FTS3, this is an error. It is an error to specify non-NULL values |
| 11994 ** for both docid and some other rowid alias. |
| 11995 */ |
| 11996 if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ |
| 11997 if( SQLITE_NULL==sqlite3_value_type(apVal[0]) |
| 11998 && SQLITE_NULL!=sqlite3_value_type(apVal[1]) |
| 11999 ){ |
| 12000 /* A rowid/docid conflict. */ |
| 12001 return SQLITE_ERROR; |
| 12002 } |
| 12003 rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); |
| 12004 if( rc!=SQLITE_OK ) return rc; |
| 12005 } |
| 12006 |
| 12007 /* Execute the statement to insert the record. Set *piDocid to the |
| 12008 ** new docid value. |
| 12009 */ |
| 12010 sqlite3_step(pContentInsert); |
| 12011 rc = sqlite3_reset(pContentInsert); |
| 12012 |
| 12013 *piDocid = sqlite3_last_insert_rowid(p->db); |
| 12014 return rc; |
| 12015 } |
| 12016 |
| 12017 |
| 12018 |
| 12019 /* |
| 12020 ** Remove all data from the FTS3 table. Clear the hash table containing |
| 12021 ** pending terms. |
| 12022 */ |
| 12023 static int fts3DeleteAll(Fts3Table *p, int bContent){ |
| 12024 int rc = SQLITE_OK; /* Return code */ |
| 12025 |
| 12026 /* Discard the contents of the pending-terms hash table. */ |
| 12027 sqlite3Fts3PendingTermsClear(p); |
| 12028 |
| 12029 /* Delete everything from the shadow tables. Except, leave %_content as |
| 12030 ** is if bContent is false. */ |
| 12031 assert( p->zContentTbl==0 || bContent==0 ); |
| 12032 if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); |
| 12033 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); |
| 12034 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); |
| 12035 if( p->bHasDocsize ){ |
| 12036 fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); |
| 12037 } |
| 12038 if( p->bHasStat ){ |
| 12039 fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); |
| 12040 } |
| 12041 return rc; |
| 12042 } |
| 12043 |
| 12044 /* |
| 12045 ** |
| 12046 */ |
| 12047 static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ |
| 12048 int iLangid = 0; |
| 12049 if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); |
| 12050 return iLangid; |
| 12051 } |
| 12052 |
| 12053 /* |
| 12054 ** The first element in the apVal[] array is assumed to contain the docid |
| 12055 ** (an integer) of a row about to be deleted. Remove all terms from the |
| 12056 ** full-text index. |
| 12057 */ |
| 12058 static void fts3DeleteTerms( |
| 12059 int *pRC, /* Result code */ |
| 12060 Fts3Table *p, /* The FTS table to delete from */ |
| 12061 sqlite3_value *pRowid, /* The docid to be deleted */ |
| 12062 u32 *aSz, /* Sizes of deleted document written here */ |
| 12063 int *pbFound /* OUT: Set to true if row really does exist */ |
| 12064 ){ |
| 12065 int rc; |
| 12066 sqlite3_stmt *pSelect; |
| 12067 |
| 12068 assert( *pbFound==0 ); |
| 12069 if( *pRC ) return; |
| 12070 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); |
| 12071 if( rc==SQLITE_OK ){ |
| 12072 if( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 12073 int i; |
| 12074 int iLangid = langidFromSelect(p, pSelect); |
| 12075 i64 iDocid = sqlite3_column_int64(pSelect, 0); |
| 12076 rc = fts3PendingTermsDocid(p, 1, iLangid, iDocid); |
| 12077 for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ |
| 12078 int iCol = i-1; |
| 12079 if( p->abNotindexed[iCol]==0 ){ |
| 12080 const char *zText = (const char *)sqlite3_column_text(pSelect, i); |
| 12081 rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); |
| 12082 aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); |
| 12083 } |
| 12084 } |
| 12085 if( rc!=SQLITE_OK ){ |
| 12086 sqlite3_reset(pSelect); |
| 12087 *pRC = rc; |
| 12088 return; |
| 12089 } |
| 12090 *pbFound = 1; |
| 12091 } |
| 12092 rc = sqlite3_reset(pSelect); |
| 12093 }else{ |
| 12094 sqlite3_reset(pSelect); |
| 12095 } |
| 12096 *pRC = rc; |
| 12097 } |
| 12098 |
| 12099 /* |
| 12100 ** Forward declaration to account for the circular dependency between |
| 12101 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). |
| 12102 */ |
| 12103 static int fts3SegmentMerge(Fts3Table *, int, int, int); |
| 12104 |
| 12105 /* |
| 12106 ** This function allocates a new level iLevel index in the segdir table. |
| 12107 ** Usually, indexes are allocated within a level sequentially starting |
| 12108 ** with 0, so the allocated index is one greater than the value returned |
| 12109 ** by: |
| 12110 ** |
| 12111 ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel |
| 12112 ** |
| 12113 ** However, if there are already FTS3_MERGE_COUNT indexes at the requested |
| 12114 ** level, they are merged into a single level (iLevel+1) segment and the |
| 12115 ** allocated index is 0. |
| 12116 ** |
| 12117 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK |
| 12118 ** returned. Otherwise, an SQLite error code is returned. |
| 12119 */ |
| 12120 static int fts3AllocateSegdirIdx( |
| 12121 Fts3Table *p, |
| 12122 int iLangid, /* Language id */ |
| 12123 int iIndex, /* Index for p->aIndex */ |
| 12124 int iLevel, |
| 12125 int *piIdx |
| 12126 ){ |
| 12127 int rc; /* Return Code */ |
| 12128 sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ |
| 12129 int iNext = 0; /* Result of query pNextIdx */ |
| 12130 |
| 12131 assert( iLangid>=0 ); |
| 12132 assert( p->nIndex>=1 ); |
| 12133 |
| 12134 /* Set variable iNext to the next available segdir index at level iLevel. */ |
| 12135 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); |
| 12136 if( rc==SQLITE_OK ){ |
| 12137 sqlite3_bind_int64( |
| 12138 pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) |
| 12139 ); |
| 12140 if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ |
| 12141 iNext = sqlite3_column_int(pNextIdx, 0); |
| 12142 } |
| 12143 rc = sqlite3_reset(pNextIdx); |
| 12144 } |
| 12145 |
| 12146 if( rc==SQLITE_OK ){ |
| 12147 /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already |
| 12148 ** full, merge all segments in level iLevel into a single iLevel+1 |
| 12149 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, |
| 12150 ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. |
| 12151 */ |
| 12152 if( iNext>=FTS3_MERGE_COUNT ){ |
| 12153 fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); |
| 12154 rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); |
| 12155 *piIdx = 0; |
| 12156 }else{ |
| 12157 *piIdx = iNext; |
| 12158 } |
| 12159 } |
| 12160 |
| 12161 return rc; |
| 12162 } |
| 12163 |
| 12164 /* |
| 12165 ** The %_segments table is declared as follows: |
| 12166 ** |
| 12167 ** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) |
| 12168 ** |
| 12169 ** This function reads data from a single row of the %_segments table. The |
| 12170 ** specific row is identified by the iBlockid parameter. If paBlob is not |
| 12171 ** NULL, then a buffer is allocated using sqlite3_malloc() and populated |
| 12172 ** with the contents of the blob stored in the "block" column of the |
| 12173 ** identified table row is. Whether or not paBlob is NULL, *pnBlob is set |
| 12174 ** to the size of the blob in bytes before returning. |
| 12175 ** |
| 12176 ** If an error occurs, or the table does not contain the specified row, |
| 12177 ** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If |
| 12178 ** paBlob is non-NULL, then it is the responsibility of the caller to |
| 12179 ** eventually free the returned buffer. |
| 12180 ** |
| 12181 ** This function may leave an open sqlite3_blob* handle in the |
| 12182 ** Fts3Table.pSegments variable. This handle is reused by subsequent calls |
| 12183 ** to this function. The handle may be closed by calling the |
| 12184 ** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy |
| 12185 ** performance improvement, but the blob handle should always be closed |
| 12186 ** before control is returned to the user (to prevent a lock being held |
| 12187 ** on the database file for longer than necessary). Thus, any virtual table |
| 12188 ** method (xFilter etc.) that may directly or indirectly call this function |
| 12189 ** must call sqlite3Fts3SegmentsClose() before returning. |
| 12190 */ |
| 12191 SQLITE_PRIVATE int sqlite3Fts3ReadBlock( |
| 12192 Fts3Table *p, /* FTS3 table handle */ |
| 12193 sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ |
| 12194 char **paBlob, /* OUT: Blob data in malloc'd buffer */ |
| 12195 int *pnBlob, /* OUT: Size of blob data */ |
| 12196 int *pnLoad /* OUT: Bytes actually loaded */ |
| 12197 ){ |
| 12198 int rc; /* Return code */ |
| 12199 |
| 12200 /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ |
| 12201 assert( pnBlob ); |
| 12202 |
| 12203 if( p->pSegments ){ |
| 12204 rc = sqlite3_blob_reopen(p->pSegments, iBlockid); |
| 12205 }else{ |
| 12206 if( 0==p->zSegmentsTbl ){ |
| 12207 p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); |
| 12208 if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; |
| 12209 } |
| 12210 rc = sqlite3_blob_open( |
| 12211 p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments |
| 12212 ); |
| 12213 } |
| 12214 |
| 12215 if( rc==SQLITE_OK ){ |
| 12216 int nByte = sqlite3_blob_bytes(p->pSegments); |
| 12217 *pnBlob = nByte; |
| 12218 if( paBlob ){ |
| 12219 char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); |
| 12220 if( !aByte ){ |
| 12221 rc = SQLITE_NOMEM; |
| 12222 }else{ |
| 12223 if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ |
| 12224 nByte = FTS3_NODE_CHUNKSIZE; |
| 12225 *pnLoad = nByte; |
| 12226 } |
| 12227 rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); |
| 12228 memset(&aByte[nByte], 0, FTS3_NODE_PADDING); |
| 12229 if( rc!=SQLITE_OK ){ |
| 12230 sqlite3_free(aByte); |
| 12231 aByte = 0; |
| 12232 } |
| 12233 } |
| 12234 *paBlob = aByte; |
| 12235 } |
| 12236 } |
| 12237 |
| 12238 return rc; |
| 12239 } |
| 12240 |
| 12241 /* |
| 12242 ** Close the blob handle at p->pSegments, if it is open. See comments above |
| 12243 ** the sqlite3Fts3ReadBlock() function for details. |
| 12244 */ |
| 12245 SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ |
| 12246 sqlite3_blob_close(p->pSegments); |
| 12247 p->pSegments = 0; |
| 12248 } |
| 12249 |
| 12250 static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ |
| 12251 int nRead; /* Number of bytes to read */ |
| 12252 int rc; /* Return code */ |
| 12253 |
| 12254 nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); |
| 12255 rc = sqlite3_blob_read( |
| 12256 pReader->pBlob, |
| 12257 &pReader->aNode[pReader->nPopulate], |
| 12258 nRead, |
| 12259 pReader->nPopulate |
| 12260 ); |
| 12261 |
| 12262 if( rc==SQLITE_OK ){ |
| 12263 pReader->nPopulate += nRead; |
| 12264 memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); |
| 12265 if( pReader->nPopulate==pReader->nNode ){ |
| 12266 sqlite3_blob_close(pReader->pBlob); |
| 12267 pReader->pBlob = 0; |
| 12268 pReader->nPopulate = 0; |
| 12269 } |
| 12270 } |
| 12271 return rc; |
| 12272 } |
| 12273 |
| 12274 static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ |
| 12275 int rc = SQLITE_OK; |
| 12276 assert( !pReader->pBlob |
| 12277 || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) |
| 12278 ); |
| 12279 while( pReader->pBlob && rc==SQLITE_OK |
| 12280 && (pFrom - pReader->aNode + nByte)>pReader->nPopulate |
| 12281 ){ |
| 12282 rc = fts3SegReaderIncrRead(pReader); |
| 12283 } |
| 12284 return rc; |
| 12285 } |
| 12286 |
| 12287 /* |
| 12288 ** Set an Fts3SegReader cursor to point at EOF. |
| 12289 */ |
| 12290 static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ |
| 12291 if( !fts3SegReaderIsRootOnly(pSeg) ){ |
| 12292 sqlite3_free(pSeg->aNode); |
| 12293 sqlite3_blob_close(pSeg->pBlob); |
| 12294 pSeg->pBlob = 0; |
| 12295 } |
| 12296 pSeg->aNode = 0; |
| 12297 } |
| 12298 |
| 12299 /* |
| 12300 ** Move the iterator passed as the first argument to the next term in the |
| 12301 ** segment. If successful, SQLITE_OK is returned. If there is no next term, |
| 12302 ** SQLITE_DONE. Otherwise, an SQLite error code. |
| 12303 */ |
| 12304 static int fts3SegReaderNext( |
| 12305 Fts3Table *p, |
| 12306 Fts3SegReader *pReader, |
| 12307 int bIncr |
| 12308 ){ |
| 12309 int rc; /* Return code of various sub-routines */ |
| 12310 char *pNext; /* Cursor variable */ |
| 12311 int nPrefix; /* Number of bytes in term prefix */ |
| 12312 int nSuffix; /* Number of bytes in term suffix */ |
| 12313 |
| 12314 if( !pReader->aDoclist ){ |
| 12315 pNext = pReader->aNode; |
| 12316 }else{ |
| 12317 pNext = &pReader->aDoclist[pReader->nDoclist]; |
| 12318 } |
| 12319 |
| 12320 if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ |
| 12321 |
| 12322 if( fts3SegReaderIsPending(pReader) ){ |
| 12323 Fts3HashElem *pElem = *(pReader->ppNextElem); |
| 12324 sqlite3_free(pReader->aNode); |
| 12325 pReader->aNode = 0; |
| 12326 if( pElem ){ |
| 12327 char *aCopy; |
| 12328 PendingList *pList = (PendingList *)fts3HashData(pElem); |
| 12329 int nCopy = pList->nData+1; |
| 12330 pReader->zTerm = (char *)fts3HashKey(pElem); |
| 12331 pReader->nTerm = fts3HashKeysize(pElem); |
| 12332 aCopy = (char*)sqlite3_malloc(nCopy); |
| 12333 if( !aCopy ) return SQLITE_NOMEM; |
| 12334 memcpy(aCopy, pList->aData, nCopy); |
| 12335 pReader->nNode = pReader->nDoclist = nCopy; |
| 12336 pReader->aNode = pReader->aDoclist = aCopy; |
| 12337 pReader->ppNextElem++; |
| 12338 assert( pReader->aNode ); |
| 12339 } |
| 12340 return SQLITE_OK; |
| 12341 } |
| 12342 |
| 12343 fts3SegReaderSetEof(pReader); |
| 12344 |
| 12345 /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf |
| 12346 ** blocks have already been traversed. */ |
| 12347 assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); |
| 12348 if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ |
| 12349 return SQLITE_OK; |
| 12350 } |
| 12351 |
| 12352 rc = sqlite3Fts3ReadBlock( |
| 12353 p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, |
| 12354 (bIncr ? &pReader->nPopulate : 0) |
| 12355 ); |
| 12356 if( rc!=SQLITE_OK ) return rc; |
| 12357 assert( pReader->pBlob==0 ); |
| 12358 if( bIncr && pReader->nPopulate<pReader->nNode ){ |
| 12359 pReader->pBlob = p->pSegments; |
| 12360 p->pSegments = 0; |
| 12361 } |
| 12362 pNext = pReader->aNode; |
| 12363 } |
| 12364 |
| 12365 assert( !fts3SegReaderIsPending(pReader) ); |
| 12366 |
| 12367 rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); |
| 12368 if( rc!=SQLITE_OK ) return rc; |
| 12369 |
| 12370 /* Because of the FTS3_NODE_PADDING bytes of padding, the following is |
| 12371 ** safe (no risk of overread) even if the node data is corrupted. */ |
| 12372 pNext += fts3GetVarint32(pNext, &nPrefix); |
| 12373 pNext += fts3GetVarint32(pNext, &nSuffix); |
| 12374 if( nPrefix<0 || nSuffix<=0 |
| 12375 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] |
| 12376 ){ |
| 12377 return FTS_CORRUPT_VTAB; |
| 12378 } |
| 12379 |
| 12380 if( nPrefix+nSuffix>pReader->nTermAlloc ){ |
| 12381 int nNew = (nPrefix+nSuffix)*2; |
| 12382 char *zNew = sqlite3_realloc(pReader->zTerm, nNew); |
| 12383 if( !zNew ){ |
| 12384 return SQLITE_NOMEM; |
| 12385 } |
| 12386 pReader->zTerm = zNew; |
| 12387 pReader->nTermAlloc = nNew; |
| 12388 } |
| 12389 |
| 12390 rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); |
| 12391 if( rc!=SQLITE_OK ) return rc; |
| 12392 |
| 12393 memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); |
| 12394 pReader->nTerm = nPrefix+nSuffix; |
| 12395 pNext += nSuffix; |
| 12396 pNext += fts3GetVarint32(pNext, &pReader->nDoclist); |
| 12397 pReader->aDoclist = pNext; |
| 12398 pReader->pOffsetList = 0; |
| 12399 |
| 12400 /* Check that the doclist does not appear to extend past the end of the |
| 12401 ** b-tree node. And that the final byte of the doclist is 0x00. If either |
| 12402 ** of these statements is untrue, then the data structure is corrupt. |
| 12403 */ |
| 12404 if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] |
| 12405 || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) |
| 12406 ){ |
| 12407 return FTS_CORRUPT_VTAB; |
| 12408 } |
| 12409 return SQLITE_OK; |
| 12410 } |
| 12411 |
| 12412 /* |
| 12413 ** Set the SegReader to point to the first docid in the doclist associated |
| 12414 ** with the current term. |
| 12415 */ |
| 12416 static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ |
| 12417 int rc = SQLITE_OK; |
| 12418 assert( pReader->aDoclist ); |
| 12419 assert( !pReader->pOffsetList ); |
| 12420 if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ |
| 12421 u8 bEof = 0; |
| 12422 pReader->iDocid = 0; |
| 12423 pReader->nOffsetList = 0; |
| 12424 sqlite3Fts3DoclistPrev(0, |
| 12425 pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, |
| 12426 &pReader->iDocid, &pReader->nOffsetList, &bEof |
| 12427 ); |
| 12428 }else{ |
| 12429 rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); |
| 12430 if( rc==SQLITE_OK ){ |
| 12431 int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); |
| 12432 pReader->pOffsetList = &pReader->aDoclist[n]; |
| 12433 } |
| 12434 } |
| 12435 return rc; |
| 12436 } |
| 12437 |
| 12438 /* |
| 12439 ** Advance the SegReader to point to the next docid in the doclist |
| 12440 ** associated with the current term. |
| 12441 ** |
| 12442 ** If arguments ppOffsetList and pnOffsetList are not NULL, then |
| 12443 ** *ppOffsetList is set to point to the first column-offset list |
| 12444 ** in the doclist entry (i.e. immediately past the docid varint). |
| 12445 ** *pnOffsetList is set to the length of the set of column-offset |
| 12446 ** lists, not including the nul-terminator byte. For example: |
| 12447 */ |
| 12448 static int fts3SegReaderNextDocid( |
| 12449 Fts3Table *pTab, |
| 12450 Fts3SegReader *pReader, /* Reader to advance to next docid */ |
| 12451 char **ppOffsetList, /* OUT: Pointer to current position-list */ |
| 12452 int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ |
| 12453 ){ |
| 12454 int rc = SQLITE_OK; |
| 12455 char *p = pReader->pOffsetList; |
| 12456 char c = 0; |
| 12457 |
| 12458 assert( p ); |
| 12459 |
| 12460 if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ |
| 12461 /* A pending-terms seg-reader for an FTS4 table that uses order=desc. |
| 12462 ** Pending-terms doclists are always built up in ascending order, so |
| 12463 ** we have to iterate through them backwards here. */ |
| 12464 u8 bEof = 0; |
| 12465 if( ppOffsetList ){ |
| 12466 *ppOffsetList = pReader->pOffsetList; |
| 12467 *pnOffsetList = pReader->nOffsetList - 1; |
| 12468 } |
| 12469 sqlite3Fts3DoclistPrev(0, |
| 12470 pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, |
| 12471 &pReader->nOffsetList, &bEof |
| 12472 ); |
| 12473 if( bEof ){ |
| 12474 pReader->pOffsetList = 0; |
| 12475 }else{ |
| 12476 pReader->pOffsetList = p; |
| 12477 } |
| 12478 }else{ |
| 12479 char *pEnd = &pReader->aDoclist[pReader->nDoclist]; |
| 12480 |
| 12481 /* Pointer p currently points at the first byte of an offset list. The |
| 12482 ** following block advances it to point one byte past the end of |
| 12483 ** the same offset list. */ |
| 12484 while( 1 ){ |
| 12485 |
| 12486 /* The following line of code (and the "p++" below the while() loop) is |
| 12487 ** normally all that is required to move pointer p to the desired |
| 12488 ** position. The exception is if this node is being loaded from disk |
| 12489 ** incrementally and pointer "p" now points to the first byte past |
| 12490 ** the populated part of pReader->aNode[]. |
| 12491 */ |
| 12492 while( *p | c ) c = *p++ & 0x80; |
| 12493 assert( *p==0 ); |
| 12494 |
| 12495 if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; |
| 12496 rc = fts3SegReaderIncrRead(pReader); |
| 12497 if( rc!=SQLITE_OK ) return rc; |
| 12498 } |
| 12499 p++; |
| 12500 |
| 12501 /* If required, populate the output variables with a pointer to and the |
| 12502 ** size of the previous offset-list. |
| 12503 */ |
| 12504 if( ppOffsetList ){ |
| 12505 *ppOffsetList = pReader->pOffsetList; |
| 12506 *pnOffsetList = (int)(p - pReader->pOffsetList - 1); |
| 12507 } |
| 12508 |
| 12509 /* List may have been edited in place by fts3EvalNearTrim() */ |
| 12510 while( p<pEnd && *p==0 ) p++; |
| 12511 |
| 12512 /* If there are no more entries in the doclist, set pOffsetList to |
| 12513 ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and |
| 12514 ** Fts3SegReader.pOffsetList to point to the next offset list before |
| 12515 ** returning. |
| 12516 */ |
| 12517 if( p>=pEnd ){ |
| 12518 pReader->pOffsetList = 0; |
| 12519 }else{ |
| 12520 rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); |
| 12521 if( rc==SQLITE_OK ){ |
| 12522 sqlite3_int64 iDelta; |
| 12523 pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); |
| 12524 if( pTab->bDescIdx ){ |
| 12525 pReader->iDocid -= iDelta; |
| 12526 }else{ |
| 12527 pReader->iDocid += iDelta; |
| 12528 } |
| 12529 } |
| 12530 } |
| 12531 } |
| 12532 |
| 12533 return SQLITE_OK; |
| 12534 } |
| 12535 |
| 12536 |
| 12537 SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( |
| 12538 Fts3Cursor *pCsr, |
| 12539 Fts3MultiSegReader *pMsr, |
| 12540 int *pnOvfl |
| 12541 ){ |
| 12542 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; |
| 12543 int nOvfl = 0; |
| 12544 int ii; |
| 12545 int rc = SQLITE_OK; |
| 12546 int pgsz = p->nPgsz; |
| 12547 |
| 12548 assert( p->bFts4 ); |
| 12549 assert( pgsz>0 ); |
| 12550 |
| 12551 for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){ |
| 12552 Fts3SegReader *pReader = pMsr->apSegment[ii]; |
| 12553 if( !fts3SegReaderIsPending(pReader) |
| 12554 && !fts3SegReaderIsRootOnly(pReader) |
| 12555 ){ |
| 12556 sqlite3_int64 jj; |
| 12557 for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ |
| 12558 int nBlob; |
| 12559 rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); |
| 12560 if( rc!=SQLITE_OK ) break; |
| 12561 if( (nBlob+35)>pgsz ){ |
| 12562 nOvfl += (nBlob + 34)/pgsz; |
| 12563 } |
| 12564 } |
| 12565 } |
| 12566 } |
| 12567 *pnOvfl = nOvfl; |
| 12568 return rc; |
| 12569 } |
| 12570 |
| 12571 /* |
| 12572 ** Free all allocations associated with the iterator passed as the |
| 12573 ** second argument. |
| 12574 */ |
| 12575 SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ |
| 12576 if( pReader ){ |
| 12577 if( !fts3SegReaderIsPending(pReader) ){ |
| 12578 sqlite3_free(pReader->zTerm); |
| 12579 } |
| 12580 if( !fts3SegReaderIsRootOnly(pReader) ){ |
| 12581 sqlite3_free(pReader->aNode); |
| 12582 } |
| 12583 sqlite3_blob_close(pReader->pBlob); |
| 12584 } |
| 12585 sqlite3_free(pReader); |
| 12586 } |
| 12587 |
| 12588 /* |
| 12589 ** Allocate a new SegReader object. |
| 12590 */ |
| 12591 SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( |
| 12592 int iAge, /* Segment "age". */ |
| 12593 int bLookup, /* True for a lookup only */ |
| 12594 sqlite3_int64 iStartLeaf, /* First leaf to traverse */ |
| 12595 sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ |
| 12596 sqlite3_int64 iEndBlock, /* Final block of segment */ |
| 12597 const char *zRoot, /* Buffer containing root node */ |
| 12598 int nRoot, /* Size of buffer containing root node */ |
| 12599 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ |
| 12600 ){ |
| 12601 Fts3SegReader *pReader; /* Newly allocated SegReader object */ |
| 12602 int nExtra = 0; /* Bytes to allocate segment root node */ |
| 12603 |
| 12604 assert( iStartLeaf<=iEndLeaf ); |
| 12605 if( iStartLeaf==0 ){ |
| 12606 nExtra = nRoot + FTS3_NODE_PADDING; |
| 12607 } |
| 12608 |
| 12609 pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); |
| 12610 if( !pReader ){ |
| 12611 return SQLITE_NOMEM; |
| 12612 } |
| 12613 memset(pReader, 0, sizeof(Fts3SegReader)); |
| 12614 pReader->iIdx = iAge; |
| 12615 pReader->bLookup = bLookup!=0; |
| 12616 pReader->iStartBlock = iStartLeaf; |
| 12617 pReader->iLeafEndBlock = iEndLeaf; |
| 12618 pReader->iEndBlock = iEndBlock; |
| 12619 |
| 12620 if( nExtra ){ |
| 12621 /* The entire segment is stored in the root node. */ |
| 12622 pReader->aNode = (char *)&pReader[1]; |
| 12623 pReader->rootOnly = 1; |
| 12624 pReader->nNode = nRoot; |
| 12625 memcpy(pReader->aNode, zRoot, nRoot); |
| 12626 memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); |
| 12627 }else{ |
| 12628 pReader->iCurrentBlock = iStartLeaf-1; |
| 12629 } |
| 12630 *ppReader = pReader; |
| 12631 return SQLITE_OK; |
| 12632 } |
| 12633 |
| 12634 /* |
| 12635 ** This is a comparison function used as a qsort() callback when sorting |
| 12636 ** an array of pending terms by term. This occurs as part of flushing |
| 12637 ** the contents of the pending-terms hash table to the database. |
| 12638 */ |
| 12639 static int SQLITE_CDECL fts3CompareElemByTerm( |
| 12640 const void *lhs, |
| 12641 const void *rhs |
| 12642 ){ |
| 12643 char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); |
| 12644 char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); |
| 12645 int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); |
| 12646 int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); |
| 12647 |
| 12648 int n = (n1<n2 ? n1 : n2); |
| 12649 int c = memcmp(z1, z2, n); |
| 12650 if( c==0 ){ |
| 12651 c = n1 - n2; |
| 12652 } |
| 12653 return c; |
| 12654 } |
| 12655 |
| 12656 /* |
| 12657 ** This function is used to allocate an Fts3SegReader that iterates through |
| 12658 ** a subset of the terms stored in the Fts3Table.pendingTerms array. |
| 12659 ** |
| 12660 ** If the isPrefixIter parameter is zero, then the returned SegReader iterates |
| 12661 ** through each term in the pending-terms table. Or, if isPrefixIter is |
| 12662 ** non-zero, it iterates through each term and its prefixes. For example, if |
| 12663 ** the pending terms hash table contains the terms "sqlite", "mysql" and |
| 12664 ** "firebird", then the iterator visits the following 'terms' (in the order |
| 12665 ** shown): |
| 12666 ** |
| 12667 ** f fi fir fire fireb firebi firebir firebird |
| 12668 ** m my mys mysq mysql |
| 12669 ** s sq sql sqli sqlit sqlite |
| 12670 ** |
| 12671 ** Whereas if isPrefixIter is zero, the terms visited are: |
| 12672 ** |
| 12673 ** firebird mysql sqlite |
| 12674 */ |
| 12675 SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( |
| 12676 Fts3Table *p, /* Virtual table handle */ |
| 12677 int iIndex, /* Index for p->aIndex */ |
| 12678 const char *zTerm, /* Term to search for */ |
| 12679 int nTerm, /* Size of buffer zTerm */ |
| 12680 int bPrefix, /* True for a prefix iterator */ |
| 12681 Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ |
| 12682 ){ |
| 12683 Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ |
| 12684 Fts3HashElem *pE; /* Iterator variable */ |
| 12685 Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ |
| 12686 int nElem = 0; /* Size of array at aElem */ |
| 12687 int rc = SQLITE_OK; /* Return Code */ |
| 12688 Fts3Hash *pHash; |
| 12689 |
| 12690 pHash = &p->aIndex[iIndex].hPending; |
| 12691 if( bPrefix ){ |
| 12692 int nAlloc = 0; /* Size of allocated array at aElem */ |
| 12693 |
| 12694 for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ |
| 12695 char *zKey = (char *)fts3HashKey(pE); |
| 12696 int nKey = fts3HashKeysize(pE); |
| 12697 if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ |
| 12698 if( nElem==nAlloc ){ |
| 12699 Fts3HashElem **aElem2; |
| 12700 nAlloc += 16; |
| 12701 aElem2 = (Fts3HashElem **)sqlite3_realloc( |
| 12702 aElem, nAlloc*sizeof(Fts3HashElem *) |
| 12703 ); |
| 12704 if( !aElem2 ){ |
| 12705 rc = SQLITE_NOMEM; |
| 12706 nElem = 0; |
| 12707 break; |
| 12708 } |
| 12709 aElem = aElem2; |
| 12710 } |
| 12711 |
| 12712 aElem[nElem++] = pE; |
| 12713 } |
| 12714 } |
| 12715 |
| 12716 /* If more than one term matches the prefix, sort the Fts3HashElem |
| 12717 ** objects in term order using qsort(). This uses the same comparison |
| 12718 ** callback as is used when flushing terms to disk. |
| 12719 */ |
| 12720 if( nElem>1 ){ |
| 12721 qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); |
| 12722 } |
| 12723 |
| 12724 }else{ |
| 12725 /* The query is a simple term lookup that matches at most one term in |
| 12726 ** the index. All that is required is a straight hash-lookup. |
| 12727 ** |
| 12728 ** Because the stack address of pE may be accessed via the aElem pointer |
| 12729 ** below, the "Fts3HashElem *pE" must be declared so that it is valid |
| 12730 ** within this entire function, not just this "else{...}" block. |
| 12731 */ |
| 12732 pE = fts3HashFindElem(pHash, zTerm, nTerm); |
| 12733 if( pE ){ |
| 12734 aElem = &pE; |
| 12735 nElem = 1; |
| 12736 } |
| 12737 } |
| 12738 |
| 12739 if( nElem>0 ){ |
| 12740 int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); |
| 12741 pReader = (Fts3SegReader *)sqlite3_malloc(nByte); |
| 12742 if( !pReader ){ |
| 12743 rc = SQLITE_NOMEM; |
| 12744 }else{ |
| 12745 memset(pReader, 0, nByte); |
| 12746 pReader->iIdx = 0x7FFFFFFF; |
| 12747 pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; |
| 12748 memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); |
| 12749 } |
| 12750 } |
| 12751 |
| 12752 if( bPrefix ){ |
| 12753 sqlite3_free(aElem); |
| 12754 } |
| 12755 *ppReader = pReader; |
| 12756 return rc; |
| 12757 } |
| 12758 |
| 12759 /* |
| 12760 ** Compare the entries pointed to by two Fts3SegReader structures. |
| 12761 ** Comparison is as follows: |
| 12762 ** |
| 12763 ** 1) EOF is greater than not EOF. |
| 12764 ** |
| 12765 ** 2) The current terms (if any) are compared using memcmp(). If one |
| 12766 ** term is a prefix of another, the longer term is considered the |
| 12767 ** larger. |
| 12768 ** |
| 12769 ** 3) By segment age. An older segment is considered larger. |
| 12770 */ |
| 12771 static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12772 int rc; |
| 12773 if( pLhs->aNode && pRhs->aNode ){ |
| 12774 int rc2 = pLhs->nTerm - pRhs->nTerm; |
| 12775 if( rc2<0 ){ |
| 12776 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); |
| 12777 }else{ |
| 12778 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); |
| 12779 } |
| 12780 if( rc==0 ){ |
| 12781 rc = rc2; |
| 12782 } |
| 12783 }else{ |
| 12784 rc = (pLhs->aNode==0) - (pRhs->aNode==0); |
| 12785 } |
| 12786 if( rc==0 ){ |
| 12787 rc = pRhs->iIdx - pLhs->iIdx; |
| 12788 } |
| 12789 assert( rc!=0 ); |
| 12790 return rc; |
| 12791 } |
| 12792 |
| 12793 /* |
| 12794 ** A different comparison function for SegReader structures. In this |
| 12795 ** version, it is assumed that each SegReader points to an entry in |
| 12796 ** a doclist for identical terms. Comparison is made as follows: |
| 12797 ** |
| 12798 ** 1) EOF (end of doclist in this case) is greater than not EOF. |
| 12799 ** |
| 12800 ** 2) By current docid. |
| 12801 ** |
| 12802 ** 3) By segment age. An older segment is considered larger. |
| 12803 */ |
| 12804 static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12805 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); |
| 12806 if( rc==0 ){ |
| 12807 if( pLhs->iDocid==pRhs->iDocid ){ |
| 12808 rc = pRhs->iIdx - pLhs->iIdx; |
| 12809 }else{ |
| 12810 rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; |
| 12811 } |
| 12812 } |
| 12813 assert( pLhs->aNode && pRhs->aNode ); |
| 12814 return rc; |
| 12815 } |
| 12816 static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ |
| 12817 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); |
| 12818 if( rc==0 ){ |
| 12819 if( pLhs->iDocid==pRhs->iDocid ){ |
| 12820 rc = pRhs->iIdx - pLhs->iIdx; |
| 12821 }else{ |
| 12822 rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; |
| 12823 } |
| 12824 } |
| 12825 assert( pLhs->aNode && pRhs->aNode ); |
| 12826 return rc; |
| 12827 } |
| 12828 |
| 12829 /* |
| 12830 ** Compare the term that the Fts3SegReader object passed as the first argument |
| 12831 ** points to with the term specified by arguments zTerm and nTerm. |
| 12832 ** |
| 12833 ** If the pSeg iterator is already at EOF, return 0. Otherwise, return |
| 12834 ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are |
| 12835 ** equal, or +ve if the pSeg term is greater than zTerm/nTerm. |
| 12836 */ |
| 12837 static int fts3SegReaderTermCmp( |
| 12838 Fts3SegReader *pSeg, /* Segment reader object */ |
| 12839 const char *zTerm, /* Term to compare to */ |
| 12840 int nTerm /* Size of term zTerm in bytes */ |
| 12841 ){ |
| 12842 int res = 0; |
| 12843 if( pSeg->aNode ){ |
| 12844 if( pSeg->nTerm>nTerm ){ |
| 12845 res = memcmp(pSeg->zTerm, zTerm, nTerm); |
| 12846 }else{ |
| 12847 res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); |
| 12848 } |
| 12849 if( res==0 ){ |
| 12850 res = pSeg->nTerm-nTerm; |
| 12851 } |
| 12852 } |
| 12853 return res; |
| 12854 } |
| 12855 |
| 12856 /* |
| 12857 ** Argument apSegment is an array of nSegment elements. It is known that |
| 12858 ** the final (nSegment-nSuspect) members are already in sorted order |
| 12859 ** (according to the comparison function provided). This function shuffles |
| 12860 ** the array around until all entries are in sorted order. |
| 12861 */ |
| 12862 static void fts3SegReaderSort( |
| 12863 Fts3SegReader **apSegment, /* Array to sort entries of */ |
| 12864 int nSegment, /* Size of apSegment array */ |
| 12865 int nSuspect, /* Unsorted entry count */ |
| 12866 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ |
| 12867 ){ |
| 12868 int i; /* Iterator variable */ |
| 12869 |
| 12870 assert( nSuspect<=nSegment ); |
| 12871 |
| 12872 if( nSuspect==nSegment ) nSuspect--; |
| 12873 for(i=nSuspect-1; i>=0; i--){ |
| 12874 int j; |
| 12875 for(j=i; j<(nSegment-1); j++){ |
| 12876 Fts3SegReader *pTmp; |
| 12877 if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; |
| 12878 pTmp = apSegment[j+1]; |
| 12879 apSegment[j+1] = apSegment[j]; |
| 12880 apSegment[j] = pTmp; |
| 12881 } |
| 12882 } |
| 12883 |
| 12884 #ifndef NDEBUG |
| 12885 /* Check that the list really is sorted now. */ |
| 12886 for(i=0; i<(nSuspect-1); i++){ |
| 12887 assert( xCmp(apSegment[i], apSegment[i+1])<0 ); |
| 12888 } |
| 12889 #endif |
| 12890 } |
| 12891 |
| 12892 /* |
| 12893 ** Insert a record into the %_segments table. |
| 12894 */ |
| 12895 static int fts3WriteSegment( |
| 12896 Fts3Table *p, /* Virtual table handle */ |
| 12897 sqlite3_int64 iBlock, /* Block id for new block */ |
| 12898 char *z, /* Pointer to buffer containing block data */ |
| 12899 int n /* Size of buffer z in bytes */ |
| 12900 ){ |
| 12901 sqlite3_stmt *pStmt; |
| 12902 int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); |
| 12903 if( rc==SQLITE_OK ){ |
| 12904 sqlite3_bind_int64(pStmt, 1, iBlock); |
| 12905 sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); |
| 12906 sqlite3_step(pStmt); |
| 12907 rc = sqlite3_reset(pStmt); |
| 12908 } |
| 12909 return rc; |
| 12910 } |
| 12911 |
| 12912 /* |
| 12913 ** Find the largest relative level number in the table. If successful, set |
| 12914 ** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, |
| 12915 ** set *pnMax to zero and return an SQLite error code. |
| 12916 */ |
| 12917 SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ |
| 12918 int rc; |
| 12919 int mxLevel = 0; |
| 12920 sqlite3_stmt *pStmt = 0; |
| 12921 |
| 12922 rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); |
| 12923 if( rc==SQLITE_OK ){ |
| 12924 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 12925 mxLevel = sqlite3_column_int(pStmt, 0); |
| 12926 } |
| 12927 rc = sqlite3_reset(pStmt); |
| 12928 } |
| 12929 *pnMax = mxLevel; |
| 12930 return rc; |
| 12931 } |
| 12932 |
| 12933 /* |
| 12934 ** Insert a record into the %_segdir table. |
| 12935 */ |
| 12936 static int fts3WriteSegdir( |
| 12937 Fts3Table *p, /* Virtual table handle */ |
| 12938 sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ |
| 12939 int iIdx, /* Value for "idx" field */ |
| 12940 sqlite3_int64 iStartBlock, /* Value for "start_block" field */ |
| 12941 sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ |
| 12942 sqlite3_int64 iEndBlock, /* Value for "end_block" field */ |
| 12943 sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ |
| 12944 char *zRoot, /* Blob value for "root" field */ |
| 12945 int nRoot /* Number of bytes in buffer zRoot */ |
| 12946 ){ |
| 12947 sqlite3_stmt *pStmt; |
| 12948 int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); |
| 12949 if( rc==SQLITE_OK ){ |
| 12950 sqlite3_bind_int64(pStmt, 1, iLevel); |
| 12951 sqlite3_bind_int(pStmt, 2, iIdx); |
| 12952 sqlite3_bind_int64(pStmt, 3, iStartBlock); |
| 12953 sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); |
| 12954 if( nLeafData==0 ){ |
| 12955 sqlite3_bind_int64(pStmt, 5, iEndBlock); |
| 12956 }else{ |
| 12957 char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); |
| 12958 if( !zEnd ) return SQLITE_NOMEM; |
| 12959 sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); |
| 12960 } |
| 12961 sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); |
| 12962 sqlite3_step(pStmt); |
| 12963 rc = sqlite3_reset(pStmt); |
| 12964 } |
| 12965 return rc; |
| 12966 } |
| 12967 |
| 12968 /* |
| 12969 ** Return the size of the common prefix (if any) shared by zPrev and |
| 12970 ** zNext, in bytes. For example, |
| 12971 ** |
| 12972 ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 |
| 12973 ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 |
| 12974 ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 |
| 12975 */ |
| 12976 static int fts3PrefixCompress( |
| 12977 const char *zPrev, /* Buffer containing previous term */ |
| 12978 int nPrev, /* Size of buffer zPrev in bytes */ |
| 12979 const char *zNext, /* Buffer containing next term */ |
| 12980 int nNext /* Size of buffer zNext in bytes */ |
| 12981 ){ |
| 12982 int n; |
| 12983 UNUSED_PARAMETER(nNext); |
| 12984 for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++); |
| 12985 return n; |
| 12986 } |
| 12987 |
| 12988 /* |
| 12989 ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger |
| 12990 ** (according to memcmp) than the previous term. |
| 12991 */ |
| 12992 static int fts3NodeAddTerm( |
| 12993 Fts3Table *p, /* Virtual table handle */ |
| 12994 SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */ |
| 12995 int isCopyTerm, /* True if zTerm/nTerm is transient */ |
| 12996 const char *zTerm, /* Pointer to buffer containing term */ |
| 12997 int nTerm /* Size of term in bytes */ |
| 12998 ){ |
| 12999 SegmentNode *pTree = *ppTree; |
| 13000 int rc; |
| 13001 SegmentNode *pNew; |
| 13002 |
| 13003 /* First try to append the term to the current node. Return early if |
| 13004 ** this is possible. |
| 13005 */ |
| 13006 if( pTree ){ |
| 13007 int nData = pTree->nData; /* Current size of node in bytes */ |
| 13008 int nReq = nData; /* Required space after adding zTerm */ |
| 13009 int nPrefix; /* Number of bytes of prefix compression */ |
| 13010 int nSuffix; /* Suffix length */ |
| 13011 |
| 13012 nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); |
| 13013 nSuffix = nTerm-nPrefix; |
| 13014 |
| 13015 nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; |
| 13016 if( nReq<=p->nNodeSize || !pTree->zTerm ){ |
| 13017 |
| 13018 if( nReq>p->nNodeSize ){ |
| 13019 /* An unusual case: this is the first term to be added to the node |
| 13020 ** and the static node buffer (p->nNodeSize bytes) is not large |
| 13021 ** enough. Use a separately malloced buffer instead This wastes |
| 13022 ** p->nNodeSize bytes, but since this scenario only comes about when |
| 13023 ** the database contain two terms that share a prefix of almost 2KB, |
| 13024 ** this is not expected to be a serious problem. |
| 13025 */ |
| 13026 assert( pTree->aData==(char *)&pTree[1] ); |
| 13027 pTree->aData = (char *)sqlite3_malloc(nReq); |
| 13028 if( !pTree->aData ){ |
| 13029 return SQLITE_NOMEM; |
| 13030 } |
| 13031 } |
| 13032 |
| 13033 if( pTree->zTerm ){ |
| 13034 /* There is no prefix-length field for first term in a node */ |
| 13035 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); |
| 13036 } |
| 13037 |
| 13038 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); |
| 13039 memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); |
| 13040 pTree->nData = nData + nSuffix; |
| 13041 pTree->nEntry++; |
| 13042 |
| 13043 if( isCopyTerm ){ |
| 13044 if( pTree->nMalloc<nTerm ){ |
| 13045 char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2); |
| 13046 if( !zNew ){ |
| 13047 return SQLITE_NOMEM; |
| 13048 } |
| 13049 pTree->nMalloc = nTerm*2; |
| 13050 pTree->zMalloc = zNew; |
| 13051 } |
| 13052 pTree->zTerm = pTree->zMalloc; |
| 13053 memcpy(pTree->zTerm, zTerm, nTerm); |
| 13054 pTree->nTerm = nTerm; |
| 13055 }else{ |
| 13056 pTree->zTerm = (char *)zTerm; |
| 13057 pTree->nTerm = nTerm; |
| 13058 } |
| 13059 return SQLITE_OK; |
| 13060 } |
| 13061 } |
| 13062 |
| 13063 /* If control flows to here, it was not possible to append zTerm to the |
| 13064 ** current node. Create a new node (a right-sibling of the current node). |
| 13065 ** If this is the first node in the tree, the term is added to it. |
| 13066 ** |
| 13067 ** Otherwise, the term is not added to the new node, it is left empty for |
| 13068 ** now. Instead, the term is inserted into the parent of pTree. If pTree |
| 13069 ** has no parent, one is created here. |
| 13070 */ |
| 13071 pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); |
| 13072 if( !pNew ){ |
| 13073 return SQLITE_NOMEM; |
| 13074 } |
| 13075 memset(pNew, 0, sizeof(SegmentNode)); |
| 13076 pNew->nData = 1 + FTS3_VARINT_MAX; |
| 13077 pNew->aData = (char *)&pNew[1]; |
| 13078 |
| 13079 if( pTree ){ |
| 13080 SegmentNode *pParent = pTree->pParent; |
| 13081 rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); |
| 13082 if( pTree->pParent==0 ){ |
| 13083 pTree->pParent = pParent; |
| 13084 } |
| 13085 pTree->pRight = pNew; |
| 13086 pNew->pLeftmost = pTree->pLeftmost; |
| 13087 pNew->pParent = pParent; |
| 13088 pNew->zMalloc = pTree->zMalloc; |
| 13089 pNew->nMalloc = pTree->nMalloc; |
| 13090 pTree->zMalloc = 0; |
| 13091 }else{ |
| 13092 pNew->pLeftmost = pNew; |
| 13093 rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); |
| 13094 } |
| 13095 |
| 13096 *ppTree = pNew; |
| 13097 return rc; |
| 13098 } |
| 13099 |
| 13100 /* |
| 13101 ** Helper function for fts3NodeWrite(). |
| 13102 */ |
| 13103 static int fts3TreeFinishNode( |
| 13104 SegmentNode *pTree, |
| 13105 int iHeight, |
| 13106 sqlite3_int64 iLeftChild |
| 13107 ){ |
| 13108 int nStart; |
| 13109 assert( iHeight>=1 && iHeight<128 ); |
| 13110 nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); |
| 13111 pTree->aData[nStart] = (char)iHeight; |
| 13112 sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); |
| 13113 return nStart; |
| 13114 } |
| 13115 |
| 13116 /* |
| 13117 ** Write the buffer for the segment node pTree and all of its peers to the |
| 13118 ** database. Then call this function recursively to write the parent of |
| 13119 ** pTree and its peers to the database. |
| 13120 ** |
| 13121 ** Except, if pTree is a root node, do not write it to the database. Instead, |
| 13122 ** set output variables *paRoot and *pnRoot to contain the root node. |
| 13123 ** |
| 13124 ** If successful, SQLITE_OK is returned and output variable *piLast is |
| 13125 ** set to the largest blockid written to the database (or zero if no |
| 13126 ** blocks were written to the db). Otherwise, an SQLite error code is |
| 13127 ** returned. |
| 13128 */ |
| 13129 static int fts3NodeWrite( |
| 13130 Fts3Table *p, /* Virtual table handle */ |
| 13131 SegmentNode *pTree, /* SegmentNode handle */ |
| 13132 int iHeight, /* Height of this node in tree */ |
| 13133 sqlite3_int64 iLeaf, /* Block id of first leaf node */ |
| 13134 sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ |
| 13135 sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ |
| 13136 char **paRoot, /* OUT: Data for root node */ |
| 13137 int *pnRoot /* OUT: Size of root node in bytes */ |
| 13138 ){ |
| 13139 int rc = SQLITE_OK; |
| 13140 |
| 13141 if( !pTree->pParent ){ |
| 13142 /* Root node of the tree. */ |
| 13143 int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); |
| 13144 *piLast = iFree-1; |
| 13145 *pnRoot = pTree->nData - nStart; |
| 13146 *paRoot = &pTree->aData[nStart]; |
| 13147 }else{ |
| 13148 SegmentNode *pIter; |
| 13149 sqlite3_int64 iNextFree = iFree; |
| 13150 sqlite3_int64 iNextLeaf = iLeaf; |
| 13151 for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ |
| 13152 int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); |
| 13153 int nWrite = pIter->nData - nStart; |
| 13154 |
| 13155 rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); |
| 13156 iNextFree++; |
| 13157 iNextLeaf += (pIter->nEntry+1); |
| 13158 } |
| 13159 if( rc==SQLITE_OK ){ |
| 13160 assert( iNextLeaf==iFree ); |
| 13161 rc = fts3NodeWrite( |
| 13162 p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot |
| 13163 ); |
| 13164 } |
| 13165 } |
| 13166 |
| 13167 return rc; |
| 13168 } |
| 13169 |
| 13170 /* |
| 13171 ** Free all memory allocations associated with the tree pTree. |
| 13172 */ |
| 13173 static void fts3NodeFree(SegmentNode *pTree){ |
| 13174 if( pTree ){ |
| 13175 SegmentNode *p = pTree->pLeftmost; |
| 13176 fts3NodeFree(p->pParent); |
| 13177 while( p ){ |
| 13178 SegmentNode *pRight = p->pRight; |
| 13179 if( p->aData!=(char *)&p[1] ){ |
| 13180 sqlite3_free(p->aData); |
| 13181 } |
| 13182 assert( pRight==0 || p->zMalloc==0 ); |
| 13183 sqlite3_free(p->zMalloc); |
| 13184 sqlite3_free(p); |
| 13185 p = pRight; |
| 13186 } |
| 13187 } |
| 13188 } |
| 13189 |
| 13190 /* |
| 13191 ** Add a term to the segment being constructed by the SegmentWriter object |
| 13192 ** *ppWriter. When adding the first term to a segment, *ppWriter should |
| 13193 ** be passed NULL. This function will allocate a new SegmentWriter object |
| 13194 ** and return it via the input/output variable *ppWriter in this case. |
| 13195 ** |
| 13196 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. |
| 13197 */ |
| 13198 static int fts3SegWriterAdd( |
| 13199 Fts3Table *p, /* Virtual table handle */ |
| 13200 SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ |
| 13201 int isCopyTerm, /* True if buffer zTerm must be copied */ |
| 13202 const char *zTerm, /* Pointer to buffer containing term */ |
| 13203 int nTerm, /* Size of term in bytes */ |
| 13204 const char *aDoclist, /* Pointer to buffer containing doclist */ |
| 13205 int nDoclist /* Size of doclist in bytes */ |
| 13206 ){ |
| 13207 int nPrefix; /* Size of term prefix in bytes */ |
| 13208 int nSuffix; /* Size of term suffix in bytes */ |
| 13209 int nReq; /* Number of bytes required on leaf page */ |
| 13210 int nData; |
| 13211 SegmentWriter *pWriter = *ppWriter; |
| 13212 |
| 13213 if( !pWriter ){ |
| 13214 int rc; |
| 13215 sqlite3_stmt *pStmt; |
| 13216 |
| 13217 /* Allocate the SegmentWriter structure */ |
| 13218 pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); |
| 13219 if( !pWriter ) return SQLITE_NOMEM; |
| 13220 memset(pWriter, 0, sizeof(SegmentWriter)); |
| 13221 *ppWriter = pWriter; |
| 13222 |
| 13223 /* Allocate a buffer in which to accumulate data */ |
| 13224 pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); |
| 13225 if( !pWriter->aData ) return SQLITE_NOMEM; |
| 13226 pWriter->nSize = p->nNodeSize; |
| 13227 |
| 13228 /* Find the next free blockid in the %_segments table */ |
| 13229 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); |
| 13230 if( rc!=SQLITE_OK ) return rc; |
| 13231 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13232 pWriter->iFree = sqlite3_column_int64(pStmt, 0); |
| 13233 pWriter->iFirst = pWriter->iFree; |
| 13234 } |
| 13235 rc = sqlite3_reset(pStmt); |
| 13236 if( rc!=SQLITE_OK ) return rc; |
| 13237 } |
| 13238 nData = pWriter->nData; |
| 13239 |
| 13240 nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); |
| 13241 nSuffix = nTerm-nPrefix; |
| 13242 |
| 13243 /* Figure out how many bytes are required by this new entry */ |
| 13244 nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ |
| 13245 sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ |
| 13246 nSuffix + /* Term suffix */ |
| 13247 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ |
| 13248 nDoclist; /* Doclist data */ |
| 13249 |
| 13250 if( nData>0 && nData+nReq>p->nNodeSize ){ |
| 13251 int rc; |
| 13252 |
| 13253 /* The current leaf node is full. Write it out to the database. */ |
| 13254 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); |
| 13255 if( rc!=SQLITE_OK ) return rc; |
| 13256 p->nLeafAdd++; |
| 13257 |
| 13258 /* Add the current term to the interior node tree. The term added to |
| 13259 ** the interior tree must: |
| 13260 ** |
| 13261 ** a) be greater than the largest term on the leaf node just written |
| 13262 ** to the database (still available in pWriter->zTerm), and |
| 13263 ** |
| 13264 ** b) be less than or equal to the term about to be added to the new |
| 13265 ** leaf node (zTerm/nTerm). |
| 13266 ** |
| 13267 ** In other words, it must be the prefix of zTerm 1 byte longer than |
| 13268 ** the common prefix (if any) of zTerm and pWriter->zTerm. |
| 13269 */ |
| 13270 assert( nPrefix<nTerm ); |
| 13271 rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1); |
| 13272 if( rc!=SQLITE_OK ) return rc; |
| 13273 |
| 13274 nData = 0; |
| 13275 pWriter->nTerm = 0; |
| 13276 |
| 13277 nPrefix = 0; |
| 13278 nSuffix = nTerm; |
| 13279 nReq = 1 + /* varint containing prefix size */ |
| 13280 sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ |
| 13281 nTerm + /* Term suffix */ |
| 13282 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ |
| 13283 nDoclist; /* Doclist data */ |
| 13284 } |
| 13285 |
| 13286 /* Increase the total number of bytes written to account for the new entry. */ |
| 13287 pWriter->nLeafData += nReq; |
| 13288 |
| 13289 /* If the buffer currently allocated is too small for this entry, realloc |
| 13290 ** the buffer to make it large enough. |
| 13291 */ |
| 13292 if( nReq>pWriter->nSize ){ |
| 13293 char *aNew = sqlite3_realloc(pWriter->aData, nReq); |
| 13294 if( !aNew ) return SQLITE_NOMEM; |
| 13295 pWriter->aData = aNew; |
| 13296 pWriter->nSize = nReq; |
| 13297 } |
| 13298 assert( nData+nReq<=pWriter->nSize ); |
| 13299 |
| 13300 /* Append the prefix-compressed term and doclist to the buffer. */ |
| 13301 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); |
| 13302 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); |
| 13303 memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); |
| 13304 nData += nSuffix; |
| 13305 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); |
| 13306 memcpy(&pWriter->aData[nData], aDoclist, nDoclist); |
| 13307 pWriter->nData = nData + nDoclist; |
| 13308 |
| 13309 /* Save the current term so that it can be used to prefix-compress the next. |
| 13310 ** If the isCopyTerm parameter is true, then the buffer pointed to by |
| 13311 ** zTerm is transient, so take a copy of the term data. Otherwise, just |
| 13312 ** store a copy of the pointer. |
| 13313 */ |
| 13314 if( isCopyTerm ){ |
| 13315 if( nTerm>pWriter->nMalloc ){ |
| 13316 char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); |
| 13317 if( !zNew ){ |
| 13318 return SQLITE_NOMEM; |
| 13319 } |
| 13320 pWriter->nMalloc = nTerm*2; |
| 13321 pWriter->zMalloc = zNew; |
| 13322 pWriter->zTerm = zNew; |
| 13323 } |
| 13324 assert( pWriter->zTerm==pWriter->zMalloc ); |
| 13325 memcpy(pWriter->zTerm, zTerm, nTerm); |
| 13326 }else{ |
| 13327 pWriter->zTerm = (char *)zTerm; |
| 13328 } |
| 13329 pWriter->nTerm = nTerm; |
| 13330 |
| 13331 return SQLITE_OK; |
| 13332 } |
| 13333 |
| 13334 /* |
| 13335 ** Flush all data associated with the SegmentWriter object pWriter to the |
| 13336 ** database. This function must be called after all terms have been added |
| 13337 ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is |
| 13338 ** returned. Otherwise, an SQLite error code. |
| 13339 */ |
| 13340 static int fts3SegWriterFlush( |
| 13341 Fts3Table *p, /* Virtual table handle */ |
| 13342 SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ |
| 13343 sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ |
| 13344 int iIdx /* Value for 'idx' column of %_segdir */ |
| 13345 ){ |
| 13346 int rc; /* Return code */ |
| 13347 if( pWriter->pTree ){ |
| 13348 sqlite3_int64 iLast = 0; /* Largest block id written to database */ |
| 13349 sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ |
| 13350 char *zRoot = NULL; /* Pointer to buffer containing root node */ |
| 13351 int nRoot = 0; /* Size of buffer zRoot */ |
| 13352 |
| 13353 iLastLeaf = pWriter->iFree; |
| 13354 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); |
| 13355 if( rc==SQLITE_OK ){ |
| 13356 rc = fts3NodeWrite(p, pWriter->pTree, 1, |
| 13357 pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); |
| 13358 } |
| 13359 if( rc==SQLITE_OK ){ |
| 13360 rc = fts3WriteSegdir(p, iLevel, iIdx, |
| 13361 pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); |
| 13362 } |
| 13363 }else{ |
| 13364 /* The entire tree fits on the root node. Write it to the segdir table. */ |
| 13365 rc = fts3WriteSegdir(p, iLevel, iIdx, |
| 13366 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); |
| 13367 } |
| 13368 p->nLeafAdd++; |
| 13369 return rc; |
| 13370 } |
| 13371 |
| 13372 /* |
| 13373 ** Release all memory held by the SegmentWriter object passed as the |
| 13374 ** first argument. |
| 13375 */ |
| 13376 static void fts3SegWriterFree(SegmentWriter *pWriter){ |
| 13377 if( pWriter ){ |
| 13378 sqlite3_free(pWriter->aData); |
| 13379 sqlite3_free(pWriter->zMalloc); |
| 13380 fts3NodeFree(pWriter->pTree); |
| 13381 sqlite3_free(pWriter); |
| 13382 } |
| 13383 } |
| 13384 |
| 13385 /* |
| 13386 ** The first value in the apVal[] array is assumed to contain an integer. |
| 13387 ** This function tests if there exist any documents with docid values that |
| 13388 ** are different from that integer. i.e. if deleting the document with docid |
| 13389 ** pRowid would mean the FTS3 table were empty. |
| 13390 ** |
| 13391 ** If successful, *pisEmpty is set to true if the table is empty except for |
| 13392 ** document pRowid, or false otherwise, and SQLITE_OK is returned. If an |
| 13393 ** error occurs, an SQLite error code is returned. |
| 13394 */ |
| 13395 static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ |
| 13396 sqlite3_stmt *pStmt; |
| 13397 int rc; |
| 13398 if( p->zContentTbl ){ |
| 13399 /* If using the content=xxx option, assume the table is never empty */ |
| 13400 *pisEmpty = 0; |
| 13401 rc = SQLITE_OK; |
| 13402 }else{ |
| 13403 rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); |
| 13404 if( rc==SQLITE_OK ){ |
| 13405 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13406 *pisEmpty = sqlite3_column_int(pStmt, 0); |
| 13407 } |
| 13408 rc = sqlite3_reset(pStmt); |
| 13409 } |
| 13410 } |
| 13411 return rc; |
| 13412 } |
| 13413 |
| 13414 /* |
| 13415 ** Set *pnMax to the largest segment level in the database for the index |
| 13416 ** iIndex. |
| 13417 ** |
| 13418 ** Segment levels are stored in the 'level' column of the %_segdir table. |
| 13419 ** |
| 13420 ** Return SQLITE_OK if successful, or an SQLite error code if not. |
| 13421 */ |
| 13422 static int fts3SegmentMaxLevel( |
| 13423 Fts3Table *p, |
| 13424 int iLangid, |
| 13425 int iIndex, |
| 13426 sqlite3_int64 *pnMax |
| 13427 ){ |
| 13428 sqlite3_stmt *pStmt; |
| 13429 int rc; |
| 13430 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 13431 |
| 13432 /* Set pStmt to the compiled version of: |
| 13433 ** |
| 13434 ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? |
| 13435 ** |
| 13436 ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). |
| 13437 */ |
| 13438 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); |
| 13439 if( rc!=SQLITE_OK ) return rc; |
| 13440 sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 13441 sqlite3_bind_int64(pStmt, 2, |
| 13442 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 13443 ); |
| 13444 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13445 *pnMax = sqlite3_column_int64(pStmt, 0); |
| 13446 } |
| 13447 return sqlite3_reset(pStmt); |
| 13448 } |
| 13449 |
| 13450 /* |
| 13451 ** iAbsLevel is an absolute level that may be assumed to exist within |
| 13452 ** the database. This function checks if it is the largest level number |
| 13453 ** within its index. Assuming no error occurs, *pbMax is set to 1 if |
| 13454 ** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK |
| 13455 ** is returned. If an error occurs, an error code is returned and the |
| 13456 ** final value of *pbMax is undefined. |
| 13457 */ |
| 13458 static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ |
| 13459 |
| 13460 /* Set pStmt to the compiled version of: |
| 13461 ** |
| 13462 ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? |
| 13463 ** |
| 13464 ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). |
| 13465 */ |
| 13466 sqlite3_stmt *pStmt; |
| 13467 int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); |
| 13468 if( rc!=SQLITE_OK ) return rc; |
| 13469 sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); |
| 13470 sqlite3_bind_int64(pStmt, 2, |
| 13471 ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL |
| 13472 ); |
| 13473 |
| 13474 *pbMax = 0; |
| 13475 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 13476 *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; |
| 13477 } |
| 13478 return sqlite3_reset(pStmt); |
| 13479 } |
| 13480 |
| 13481 /* |
| 13482 ** Delete all entries in the %_segments table associated with the segment |
| 13483 ** opened with seg-reader pSeg. This function does not affect the contents |
| 13484 ** of the %_segdir table. |
| 13485 */ |
| 13486 static int fts3DeleteSegment( |
| 13487 Fts3Table *p, /* FTS table handle */ |
| 13488 Fts3SegReader *pSeg /* Segment to delete */ |
| 13489 ){ |
| 13490 int rc = SQLITE_OK; /* Return code */ |
| 13491 if( pSeg->iStartBlock ){ |
| 13492 sqlite3_stmt *pDelete; /* SQL statement to delete rows */ |
| 13493 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); |
| 13494 if( rc==SQLITE_OK ){ |
| 13495 sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); |
| 13496 sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); |
| 13497 sqlite3_step(pDelete); |
| 13498 rc = sqlite3_reset(pDelete); |
| 13499 } |
| 13500 } |
| 13501 return rc; |
| 13502 } |
| 13503 |
| 13504 /* |
| 13505 ** This function is used after merging multiple segments into a single large |
| 13506 ** segment to delete the old, now redundant, segment b-trees. Specifically, |
| 13507 ** it: |
| 13508 ** |
| 13509 ** 1) Deletes all %_segments entries for the segments associated with |
| 13510 ** each of the SegReader objects in the array passed as the third |
| 13511 ** argument, and |
| 13512 ** |
| 13513 ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir |
| 13514 ** entries regardless of level if (iLevel<0). |
| 13515 ** |
| 13516 ** SQLITE_OK is returned if successful, otherwise an SQLite error code. |
| 13517 */ |
| 13518 static int fts3DeleteSegdir( |
| 13519 Fts3Table *p, /* Virtual table handle */ |
| 13520 int iLangid, /* Language id */ |
| 13521 int iIndex, /* Index for p->aIndex */ |
| 13522 int iLevel, /* Level of %_segdir entries to delete */ |
| 13523 Fts3SegReader **apSegment, /* Array of SegReader objects */ |
| 13524 int nReader /* Size of array apSegment */ |
| 13525 ){ |
| 13526 int rc = SQLITE_OK; /* Return Code */ |
| 13527 int i; /* Iterator variable */ |
| 13528 sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ |
| 13529 |
| 13530 for(i=0; rc==SQLITE_OK && i<nReader; i++){ |
| 13531 rc = fts3DeleteSegment(p, apSegment[i]); |
| 13532 } |
| 13533 if( rc!=SQLITE_OK ){ |
| 13534 return rc; |
| 13535 } |
| 13536 |
| 13537 assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); |
| 13538 if( iLevel==FTS3_SEGCURSOR_ALL ){ |
| 13539 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); |
| 13540 if( rc==SQLITE_OK ){ |
| 13541 sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); |
| 13542 sqlite3_bind_int64(pDelete, 2, |
| 13543 getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) |
| 13544 ); |
| 13545 } |
| 13546 }else{ |
| 13547 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); |
| 13548 if( rc==SQLITE_OK ){ |
| 13549 sqlite3_bind_int64( |
| 13550 pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) |
| 13551 ); |
| 13552 } |
| 13553 } |
| 13554 |
| 13555 if( rc==SQLITE_OK ){ |
| 13556 sqlite3_step(pDelete); |
| 13557 rc = sqlite3_reset(pDelete); |
| 13558 } |
| 13559 |
| 13560 return rc; |
| 13561 } |
| 13562 |
| 13563 /* |
| 13564 ** When this function is called, buffer *ppList (size *pnList bytes) contains |
| 13565 ** a position list that may (or may not) feature multiple columns. This |
| 13566 ** function adjusts the pointer *ppList and the length *pnList so that they |
| 13567 ** identify the subset of the position list that corresponds to column iCol. |
| 13568 ** |
| 13569 ** If there are no entries in the input position list for column iCol, then |
| 13570 ** *pnList is set to zero before returning. |
| 13571 ** |
| 13572 ** If parameter bZero is non-zero, then any part of the input list following |
| 13573 ** the end of the output list is zeroed before returning. |
| 13574 */ |
| 13575 static void fts3ColumnFilter( |
| 13576 int iCol, /* Column to filter on */ |
| 13577 int bZero, /* Zero out anything following *ppList */ |
| 13578 char **ppList, /* IN/OUT: Pointer to position list */ |
| 13579 int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ |
| 13580 ){ |
| 13581 char *pList = *ppList; |
| 13582 int nList = *pnList; |
| 13583 char *pEnd = &pList[nList]; |
| 13584 int iCurrent = 0; |
| 13585 char *p = pList; |
| 13586 |
| 13587 assert( iCol>=0 ); |
| 13588 while( 1 ){ |
| 13589 char c = 0; |
| 13590 while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80; |
| 13591 |
| 13592 if( iCol==iCurrent ){ |
| 13593 nList = (int)(p - pList); |
| 13594 break; |
| 13595 } |
| 13596 |
| 13597 nList -= (int)(p - pList); |
| 13598 pList = p; |
| 13599 if( nList==0 ){ |
| 13600 break; |
| 13601 } |
| 13602 p = &pList[1]; |
| 13603 p += fts3GetVarint32(p, &iCurrent); |
| 13604 } |
| 13605 |
| 13606 if( bZero && &pList[nList]!=pEnd ){ |
| 13607 memset(&pList[nList], 0, pEnd - &pList[nList]); |
| 13608 } |
| 13609 *ppList = pList; |
| 13610 *pnList = nList; |
| 13611 } |
| 13612 |
| 13613 /* |
| 13614 ** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any |
| 13615 ** existing data). Grow the buffer if required. |
| 13616 ** |
| 13617 ** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered |
| 13618 ** trying to resize the buffer, return SQLITE_NOMEM. |
| 13619 */ |
| 13620 static int fts3MsrBufferData( |
| 13621 Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ |
| 13622 char *pList, |
| 13623 int nList |
| 13624 ){ |
| 13625 if( nList>pMsr->nBuffer ){ |
| 13626 char *pNew; |
| 13627 pMsr->nBuffer = nList*2; |
| 13628 pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); |
| 13629 if( !pNew ) return SQLITE_NOMEM; |
| 13630 pMsr->aBuffer = pNew; |
| 13631 } |
| 13632 |
| 13633 memcpy(pMsr->aBuffer, pList, nList); |
| 13634 return SQLITE_OK; |
| 13635 } |
| 13636 |
| 13637 SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( |
| 13638 Fts3Table *p, /* Virtual table handle */ |
| 13639 Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ |
| 13640 sqlite3_int64 *piDocid, /* OUT: Docid value */ |
| 13641 char **paPoslist, /* OUT: Pointer to position list */ |
| 13642 int *pnPoslist /* OUT: Size of position list in bytes */ |
| 13643 ){ |
| 13644 int nMerge = pMsr->nAdvance; |
| 13645 Fts3SegReader **apSegment = pMsr->apSegment; |
| 13646 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13647 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13648 ); |
| 13649 |
| 13650 if( nMerge==0 ){ |
| 13651 *paPoslist = 0; |
| 13652 return SQLITE_OK; |
| 13653 } |
| 13654 |
| 13655 while( 1 ){ |
| 13656 Fts3SegReader *pSeg; |
| 13657 pSeg = pMsr->apSegment[0]; |
| 13658 |
| 13659 if( pSeg->pOffsetList==0 ){ |
| 13660 *paPoslist = 0; |
| 13661 break; |
| 13662 }else{ |
| 13663 int rc; |
| 13664 char *pList; |
| 13665 int nList; |
| 13666 int j; |
| 13667 sqlite3_int64 iDocid = apSegment[0]->iDocid; |
| 13668 |
| 13669 rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); |
| 13670 j = 1; |
| 13671 while( rc==SQLITE_OK |
| 13672 && j<nMerge |
| 13673 && apSegment[j]->pOffsetList |
| 13674 && apSegment[j]->iDocid==iDocid |
| 13675 ){ |
| 13676 rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); |
| 13677 j++; |
| 13678 } |
| 13679 if( rc!=SQLITE_OK ) return rc; |
| 13680 fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); |
| 13681 |
| 13682 if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ |
| 13683 rc = fts3MsrBufferData(pMsr, pList, nList+1); |
| 13684 if( rc!=SQLITE_OK ) return rc; |
| 13685 assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); |
| 13686 pList = pMsr->aBuffer; |
| 13687 } |
| 13688 |
| 13689 if( pMsr->iColFilter>=0 ){ |
| 13690 fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); |
| 13691 } |
| 13692 |
| 13693 if( nList>0 ){ |
| 13694 *paPoslist = pList; |
| 13695 *piDocid = iDocid; |
| 13696 *pnPoslist = nList; |
| 13697 break; |
| 13698 } |
| 13699 } |
| 13700 } |
| 13701 |
| 13702 return SQLITE_OK; |
| 13703 } |
| 13704 |
| 13705 static int fts3SegReaderStart( |
| 13706 Fts3Table *p, /* Virtual table handle */ |
| 13707 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13708 const char *zTerm, /* Term searched for (or NULL) */ |
| 13709 int nTerm /* Length of zTerm in bytes */ |
| 13710 ){ |
| 13711 int i; |
| 13712 int nSeg = pCsr->nSegment; |
| 13713 |
| 13714 /* If the Fts3SegFilter defines a specific term (or term prefix) to search |
| 13715 ** for, then advance each segment iterator until it points to a term of |
| 13716 ** equal or greater value than the specified term. This prevents many |
| 13717 ** unnecessary merge/sort operations for the case where single segment |
| 13718 ** b-tree leaf nodes contain more than one term. |
| 13719 */ |
| 13720 for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){ |
| 13721 int res = 0; |
| 13722 Fts3SegReader *pSeg = pCsr->apSegment[i]; |
| 13723 do { |
| 13724 int rc = fts3SegReaderNext(p, pSeg, 0); |
| 13725 if( rc!=SQLITE_OK ) return rc; |
| 13726 }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); |
| 13727 |
| 13728 if( pSeg->bLookup && res!=0 ){ |
| 13729 fts3SegReaderSetEof(pSeg); |
| 13730 } |
| 13731 } |
| 13732 fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); |
| 13733 |
| 13734 return SQLITE_OK; |
| 13735 } |
| 13736 |
| 13737 SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( |
| 13738 Fts3Table *p, /* Virtual table handle */ |
| 13739 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13740 Fts3SegFilter *pFilter /* Restrictions on range of iteration */ |
| 13741 ){ |
| 13742 pCsr->pFilter = pFilter; |
| 13743 return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); |
| 13744 } |
| 13745 |
| 13746 SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( |
| 13747 Fts3Table *p, /* Virtual table handle */ |
| 13748 Fts3MultiSegReader *pCsr, /* Cursor object */ |
| 13749 int iCol, /* Column to match on. */ |
| 13750 const char *zTerm, /* Term to iterate through a doclist for */ |
| 13751 int nTerm /* Number of bytes in zTerm */ |
| 13752 ){ |
| 13753 int i; |
| 13754 int rc; |
| 13755 int nSegment = pCsr->nSegment; |
| 13756 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13757 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13758 ); |
| 13759 |
| 13760 assert( pCsr->pFilter==0 ); |
| 13761 assert( zTerm && nTerm>0 ); |
| 13762 |
| 13763 /* Advance each segment iterator until it points to the term zTerm/nTerm. */ |
| 13764 rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); |
| 13765 if( rc!=SQLITE_OK ) return rc; |
| 13766 |
| 13767 /* Determine how many of the segments actually point to zTerm/nTerm. */ |
| 13768 for(i=0; i<nSegment; i++){ |
| 13769 Fts3SegReader *pSeg = pCsr->apSegment[i]; |
| 13770 if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ |
| 13771 break; |
| 13772 } |
| 13773 } |
| 13774 pCsr->nAdvance = i; |
| 13775 |
| 13776 /* Advance each of the segments to point to the first docid. */ |
| 13777 for(i=0; i<pCsr->nAdvance; i++){ |
| 13778 rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); |
| 13779 if( rc!=SQLITE_OK ) return rc; |
| 13780 } |
| 13781 fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); |
| 13782 |
| 13783 assert( iCol<0 || iCol<p->nColumn ); |
| 13784 pCsr->iColFilter = iCol; |
| 13785 |
| 13786 return SQLITE_OK; |
| 13787 } |
| 13788 |
| 13789 /* |
| 13790 ** This function is called on a MultiSegReader that has been started using |
| 13791 ** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also |
| 13792 ** have been made. Calling this function puts the MultiSegReader in such |
| 13793 ** a state that if the next two calls are: |
| 13794 ** |
| 13795 ** sqlite3Fts3SegReaderStart() |
| 13796 ** sqlite3Fts3SegReaderStep() |
| 13797 ** |
| 13798 ** then the entire doclist for the term is available in |
| 13799 ** MultiSegReader.aDoclist/nDoclist. |
| 13800 */ |
| 13801 SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ |
| 13802 int i; /* Used to iterate through segment-readers */ |
| 13803 |
| 13804 assert( pCsr->zTerm==0 ); |
| 13805 assert( pCsr->nTerm==0 ); |
| 13806 assert( pCsr->aDoclist==0 ); |
| 13807 assert( pCsr->nDoclist==0 ); |
| 13808 |
| 13809 pCsr->nAdvance = 0; |
| 13810 pCsr->bRestart = 1; |
| 13811 for(i=0; i<pCsr->nSegment; i++){ |
| 13812 pCsr->apSegment[i]->pOffsetList = 0; |
| 13813 pCsr->apSegment[i]->nOffsetList = 0; |
| 13814 pCsr->apSegment[i]->iDocid = 0; |
| 13815 } |
| 13816 |
| 13817 return SQLITE_OK; |
| 13818 } |
| 13819 |
| 13820 |
| 13821 SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( |
| 13822 Fts3Table *p, /* Virtual table handle */ |
| 13823 Fts3MultiSegReader *pCsr /* Cursor object */ |
| 13824 ){ |
| 13825 int rc = SQLITE_OK; |
| 13826 |
| 13827 int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); |
| 13828 int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); |
| 13829 int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); |
| 13830 int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); |
| 13831 int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); |
| 13832 int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); |
| 13833 |
| 13834 Fts3SegReader **apSegment = pCsr->apSegment; |
| 13835 int nSegment = pCsr->nSegment; |
| 13836 Fts3SegFilter *pFilter = pCsr->pFilter; |
| 13837 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( |
| 13838 p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp |
| 13839 ); |
| 13840 |
| 13841 if( pCsr->nSegment==0 ) return SQLITE_OK; |
| 13842 |
| 13843 do { |
| 13844 int nMerge; |
| 13845 int i; |
| 13846 |
| 13847 /* Advance the first pCsr->nAdvance entries in the apSegment[] array |
| 13848 ** forward. Then sort the list in order of current term again. |
| 13849 */ |
| 13850 for(i=0; i<pCsr->nAdvance; i++){ |
| 13851 Fts3SegReader *pSeg = apSegment[i]; |
| 13852 if( pSeg->bLookup ){ |
| 13853 fts3SegReaderSetEof(pSeg); |
| 13854 }else{ |
| 13855 rc = fts3SegReaderNext(p, pSeg, 0); |
| 13856 } |
| 13857 if( rc!=SQLITE_OK ) return rc; |
| 13858 } |
| 13859 fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); |
| 13860 pCsr->nAdvance = 0; |
| 13861 |
| 13862 /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ |
| 13863 assert( rc==SQLITE_OK ); |
| 13864 if( apSegment[0]->aNode==0 ) break; |
| 13865 |
| 13866 pCsr->nTerm = apSegment[0]->nTerm; |
| 13867 pCsr->zTerm = apSegment[0]->zTerm; |
| 13868 |
| 13869 /* If this is a prefix-search, and if the term that apSegment[0] points |
| 13870 ** to does not share a suffix with pFilter->zTerm/nTerm, then all |
| 13871 ** required callbacks have been made. In this case exit early. |
| 13872 ** |
| 13873 ** Similarly, if this is a search for an exact match, and the first term |
| 13874 ** of segment apSegment[0] is not a match, exit early. |
| 13875 */ |
| 13876 if( pFilter->zTerm && !isScan ){ |
| 13877 if( pCsr->nTerm<pFilter->nTerm |
| 13878 || (!isPrefix && pCsr->nTerm>pFilter->nTerm) |
| 13879 || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) |
| 13880 ){ |
| 13881 break; |
| 13882 } |
| 13883 } |
| 13884 |
| 13885 nMerge = 1; |
| 13886 while( nMerge<nSegment |
| 13887 && apSegment[nMerge]->aNode |
| 13888 && apSegment[nMerge]->nTerm==pCsr->nTerm |
| 13889 && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) |
| 13890 ){ |
| 13891 nMerge++; |
| 13892 } |
| 13893 |
| 13894 assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); |
| 13895 if( nMerge==1 |
| 13896 && !isIgnoreEmpty |
| 13897 && !isFirst |
| 13898 && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) |
| 13899 ){ |
| 13900 pCsr->nDoclist = apSegment[0]->nDoclist; |
| 13901 if( fts3SegReaderIsPending(apSegment[0]) ){ |
| 13902 rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); |
| 13903 pCsr->aDoclist = pCsr->aBuffer; |
| 13904 }else{ |
| 13905 pCsr->aDoclist = apSegment[0]->aDoclist; |
| 13906 } |
| 13907 if( rc==SQLITE_OK ) rc = SQLITE_ROW; |
| 13908 }else{ |
| 13909 int nDoclist = 0; /* Size of doclist */ |
| 13910 sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ |
| 13911 |
| 13912 /* The current term of the first nMerge entries in the array |
| 13913 ** of Fts3SegReader objects is the same. The doclists must be merged |
| 13914 ** and a single term returned with the merged doclist. |
| 13915 */ |
| 13916 for(i=0; i<nMerge; i++){ |
| 13917 fts3SegReaderFirstDocid(p, apSegment[i]); |
| 13918 } |
| 13919 fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp); |
| 13920 while( apSegment[0]->pOffsetList ){ |
| 13921 int j; /* Number of segments that share a docid */ |
| 13922 char *pList = 0; |
| 13923 int nList = 0; |
| 13924 int nByte; |
| 13925 sqlite3_int64 iDocid = apSegment[0]->iDocid; |
| 13926 fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); |
| 13927 j = 1; |
| 13928 while( j<nMerge |
| 13929 && apSegment[j]->pOffsetList |
| 13930 && apSegment[j]->iDocid==iDocid |
| 13931 ){ |
| 13932 fts3SegReaderNextDocid(p, apSegment[j], 0, 0); |
| 13933 j++; |
| 13934 } |
| 13935 |
| 13936 if( isColFilter ){ |
| 13937 fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); |
| 13938 } |
| 13939 |
| 13940 if( !isIgnoreEmpty || nList>0 ){ |
| 13941 |
| 13942 /* Calculate the 'docid' delta value to write into the merged |
| 13943 ** doclist. */ |
| 13944 sqlite3_int64 iDelta; |
| 13945 if( p->bDescIdx && nDoclist>0 ){ |
| 13946 iDelta = iPrev - iDocid; |
| 13947 }else{ |
| 13948 iDelta = iDocid - iPrev; |
| 13949 } |
| 13950 assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); |
| 13951 assert( nDoclist>0 || iDelta==iDocid ); |
| 13952 |
| 13953 nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); |
| 13954 if( nDoclist+nByte>pCsr->nBuffer ){ |
| 13955 char *aNew; |
| 13956 pCsr->nBuffer = (nDoclist+nByte)*2; |
| 13957 aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); |
| 13958 if( !aNew ){ |
| 13959 return SQLITE_NOMEM; |
| 13960 } |
| 13961 pCsr->aBuffer = aNew; |
| 13962 } |
| 13963 |
| 13964 if( isFirst ){ |
| 13965 char *a = &pCsr->aBuffer[nDoclist]; |
| 13966 int nWrite; |
| 13967 |
| 13968 nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); |
| 13969 if( nWrite ){ |
| 13970 iPrev = iDocid; |
| 13971 nDoclist += nWrite; |
| 13972 } |
| 13973 }else{ |
| 13974 nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); |
| 13975 iPrev = iDocid; |
| 13976 if( isRequirePos ){ |
| 13977 memcpy(&pCsr->aBuffer[nDoclist], pList, nList); |
| 13978 nDoclist += nList; |
| 13979 pCsr->aBuffer[nDoclist++] = '\0'; |
| 13980 } |
| 13981 } |
| 13982 } |
| 13983 |
| 13984 fts3SegReaderSort(apSegment, nMerge, j, xCmp); |
| 13985 } |
| 13986 if( nDoclist>0 ){ |
| 13987 pCsr->aDoclist = pCsr->aBuffer; |
| 13988 pCsr->nDoclist = nDoclist; |
| 13989 rc = SQLITE_ROW; |
| 13990 } |
| 13991 } |
| 13992 pCsr->nAdvance = nMerge; |
| 13993 }while( rc==SQLITE_OK ); |
| 13994 |
| 13995 return rc; |
| 13996 } |
| 13997 |
| 13998 |
| 13999 SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( |
| 14000 Fts3MultiSegReader *pCsr /* Cursor object */ |
| 14001 ){ |
| 14002 if( pCsr ){ |
| 14003 int i; |
| 14004 for(i=0; i<pCsr->nSegment; i++){ |
| 14005 sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); |
| 14006 } |
| 14007 sqlite3_free(pCsr->apSegment); |
| 14008 sqlite3_free(pCsr->aBuffer); |
| 14009 |
| 14010 pCsr->nSegment = 0; |
| 14011 pCsr->apSegment = 0; |
| 14012 pCsr->aBuffer = 0; |
| 14013 } |
| 14014 } |
| 14015 |
| 14016 /* |
| 14017 ** Decode the "end_block" field, selected by column iCol of the SELECT |
| 14018 ** statement passed as the first argument. |
| 14019 ** |
| 14020 ** The "end_block" field may contain either an integer, or a text field |
| 14021 ** containing the text representation of two non-negative integers separated |
| 14022 ** by one or more space (0x20) characters. In the first case, set *piEndBlock |
| 14023 ** to the integer value and *pnByte to zero before returning. In the second, |
| 14024 ** set *piEndBlock to the first value and *pnByte to the second. |
| 14025 */ |
| 14026 static void fts3ReadEndBlockField( |
| 14027 sqlite3_stmt *pStmt, |
| 14028 int iCol, |
| 14029 i64 *piEndBlock, |
| 14030 i64 *pnByte |
| 14031 ){ |
| 14032 const unsigned char *zText = sqlite3_column_text(pStmt, iCol); |
| 14033 if( zText ){ |
| 14034 int i; |
| 14035 int iMul = 1; |
| 14036 i64 iVal = 0; |
| 14037 for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ |
| 14038 iVal = iVal*10 + (zText[i] - '0'); |
| 14039 } |
| 14040 *piEndBlock = iVal; |
| 14041 while( zText[i]==' ' ) i++; |
| 14042 iVal = 0; |
| 14043 if( zText[i]=='-' ){ |
| 14044 i++; |
| 14045 iMul = -1; |
| 14046 } |
| 14047 for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ |
| 14048 iVal = iVal*10 + (zText[i] - '0'); |
| 14049 } |
| 14050 *pnByte = (iVal * (i64)iMul); |
| 14051 } |
| 14052 } |
| 14053 |
| 14054 |
| 14055 /* |
| 14056 ** A segment of size nByte bytes has just been written to absolute level |
| 14057 ** iAbsLevel. Promote any segments that should be promoted as a result. |
| 14058 */ |
| 14059 static int fts3PromoteSegments( |
| 14060 Fts3Table *p, /* FTS table handle */ |
| 14061 sqlite3_int64 iAbsLevel, /* Absolute level just updated */ |
| 14062 sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ |
| 14063 ){ |
| 14064 int rc = SQLITE_OK; |
| 14065 sqlite3_stmt *pRange; |
| 14066 |
| 14067 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); |
| 14068 |
| 14069 if( rc==SQLITE_OK ){ |
| 14070 int bOk = 0; |
| 14071 i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; |
| 14072 i64 nLimit = (nByte*3)/2; |
| 14073 |
| 14074 /* Loop through all entries in the %_segdir table corresponding to |
| 14075 ** segments in this index on levels greater than iAbsLevel. If there is |
| 14076 ** at least one such segment, and it is possible to determine that all |
| 14077 ** such segments are smaller than nLimit bytes in size, they will be |
| 14078 ** promoted to level iAbsLevel. */ |
| 14079 sqlite3_bind_int64(pRange, 1, iAbsLevel+1); |
| 14080 sqlite3_bind_int64(pRange, 2, iLast); |
| 14081 while( SQLITE_ROW==sqlite3_step(pRange) ){ |
| 14082 i64 nSize = 0, dummy; |
| 14083 fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); |
| 14084 if( nSize<=0 || nSize>nLimit ){ |
| 14085 /* If nSize==0, then the %_segdir.end_block field does not not |
| 14086 ** contain a size value. This happens if it was written by an |
| 14087 ** old version of FTS. In this case it is not possible to determine |
| 14088 ** the size of the segment, and so segment promotion does not |
| 14089 ** take place. */ |
| 14090 bOk = 0; |
| 14091 break; |
| 14092 } |
| 14093 bOk = 1; |
| 14094 } |
| 14095 rc = sqlite3_reset(pRange); |
| 14096 |
| 14097 if( bOk ){ |
| 14098 int iIdx = 0; |
| 14099 sqlite3_stmt *pUpdate1 = 0; |
| 14100 sqlite3_stmt *pUpdate2 = 0; |
| 14101 |
| 14102 if( rc==SQLITE_OK ){ |
| 14103 rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); |
| 14104 } |
| 14105 if( rc==SQLITE_OK ){ |
| 14106 rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); |
| 14107 } |
| 14108 |
| 14109 if( rc==SQLITE_OK ){ |
| 14110 |
| 14111 /* Loop through all %_segdir entries for segments in this index with |
| 14112 ** levels equal to or greater than iAbsLevel. As each entry is visited, |
| 14113 ** updated it to set (level = -1) and (idx = N), where N is 0 for the |
| 14114 ** oldest segment in the range, 1 for the next oldest, and so on. |
| 14115 ** |
| 14116 ** In other words, move all segments being promoted to level -1, |
| 14117 ** setting the "idx" fields as appropriate to keep them in the same |
| 14118 ** order. The contents of level -1 (which is never used, except |
| 14119 ** transiently here), will be moved back to level iAbsLevel below. */ |
| 14120 sqlite3_bind_int64(pRange, 1, iAbsLevel); |
| 14121 while( SQLITE_ROW==sqlite3_step(pRange) ){ |
| 14122 sqlite3_bind_int(pUpdate1, 1, iIdx++); |
| 14123 sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); |
| 14124 sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); |
| 14125 sqlite3_step(pUpdate1); |
| 14126 rc = sqlite3_reset(pUpdate1); |
| 14127 if( rc!=SQLITE_OK ){ |
| 14128 sqlite3_reset(pRange); |
| 14129 break; |
| 14130 } |
| 14131 } |
| 14132 } |
| 14133 if( rc==SQLITE_OK ){ |
| 14134 rc = sqlite3_reset(pRange); |
| 14135 } |
| 14136 |
| 14137 /* Move level -1 to level iAbsLevel */ |
| 14138 if( rc==SQLITE_OK ){ |
| 14139 sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); |
| 14140 sqlite3_step(pUpdate2); |
| 14141 rc = sqlite3_reset(pUpdate2); |
| 14142 } |
| 14143 } |
| 14144 } |
| 14145 |
| 14146 |
| 14147 return rc; |
| 14148 } |
| 14149 |
| 14150 /* |
| 14151 ** Merge all level iLevel segments in the database into a single |
| 14152 ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a |
| 14153 ** single segment with a level equal to the numerically largest level |
| 14154 ** currently present in the database. |
| 14155 ** |
| 14156 ** If this function is called with iLevel<0, but there is only one |
| 14157 ** segment in the database, SQLITE_DONE is returned immediately. |
| 14158 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, |
| 14159 ** an SQLite error code is returned. |
| 14160 */ |
| 14161 static int fts3SegmentMerge( |
| 14162 Fts3Table *p, |
| 14163 int iLangid, /* Language id to merge */ |
| 14164 int iIndex, /* Index in p->aIndex[] to merge */ |
| 14165 int iLevel /* Level to merge */ |
| 14166 ){ |
| 14167 int rc; /* Return code */ |
| 14168 int iIdx = 0; /* Index of new segment */ |
| 14169 sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ |
| 14170 SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ |
| 14171 Fts3SegFilter filter; /* Segment term filter condition */ |
| 14172 Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ |
| 14173 int bIgnoreEmpty = 0; /* True to ignore empty segments */ |
| 14174 i64 iMaxLevel = 0; /* Max level number for this index/langid */ |
| 14175 |
| 14176 assert( iLevel==FTS3_SEGCURSOR_ALL |
| 14177 || iLevel==FTS3_SEGCURSOR_PENDING |
| 14178 || iLevel>=0 |
| 14179 ); |
| 14180 assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); |
| 14181 assert( iIndex>=0 && iIndex<p->nIndex ); |
| 14182 |
| 14183 rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); |
| 14184 if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; |
| 14185 |
| 14186 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 14187 rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); |
| 14188 if( rc!=SQLITE_OK ) goto finished; |
| 14189 } |
| 14190 |
| 14191 if( iLevel==FTS3_SEGCURSOR_ALL ){ |
| 14192 /* This call is to merge all segments in the database to a single |
| 14193 ** segment. The level of the new segment is equal to the numerically |
| 14194 ** greatest segment level currently present in the database for this |
| 14195 ** index. The idx of the new segment is always 0. */ |
| 14196 if( csr.nSegment==1 && 0==fts3SegReaderIsPending(csr.apSegment[0]) ){ |
| 14197 rc = SQLITE_DONE; |
| 14198 goto finished; |
| 14199 } |
| 14200 iNewLevel = iMaxLevel; |
| 14201 bIgnoreEmpty = 1; |
| 14202 |
| 14203 }else{ |
| 14204 /* This call is to merge all segments at level iLevel. find the next |
| 14205 ** available segment index at level iLevel+1. The call to |
| 14206 ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to |
| 14207 ** a single iLevel+2 segment if necessary. */ |
| 14208 assert( FTS3_SEGCURSOR_PENDING==-1 ); |
| 14209 iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); |
| 14210 rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); |
| 14211 bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); |
| 14212 } |
| 14213 if( rc!=SQLITE_OK ) goto finished; |
| 14214 |
| 14215 assert( csr.nSegment>0 ); |
| 14216 assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); |
| 14217 assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); |
| 14218 |
| 14219 memset(&filter, 0, sizeof(Fts3SegFilter)); |
| 14220 filter.flags = FTS3_SEGMENT_REQUIRE_POS; |
| 14221 filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); |
| 14222 |
| 14223 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); |
| 14224 while( SQLITE_OK==rc ){ |
| 14225 rc = sqlite3Fts3SegReaderStep(p, &csr); |
| 14226 if( rc!=SQLITE_ROW ) break; |
| 14227 rc = fts3SegWriterAdd(p, &pWriter, 1, |
| 14228 csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); |
| 14229 } |
| 14230 if( rc!=SQLITE_OK ) goto finished; |
| 14231 assert( pWriter || bIgnoreEmpty ); |
| 14232 |
| 14233 if( iLevel!=FTS3_SEGCURSOR_PENDING ){ |
| 14234 rc = fts3DeleteSegdir( |
| 14235 p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment |
| 14236 ); |
| 14237 if( rc!=SQLITE_OK ) goto finished; |
| 14238 } |
| 14239 if( pWriter ){ |
| 14240 rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); |
| 14241 if( rc==SQLITE_OK ){ |
| 14242 if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){ |
| 14243 rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); |
| 14244 } |
| 14245 } |
| 14246 } |
| 14247 |
| 14248 finished: |
| 14249 fts3SegWriterFree(pWriter); |
| 14250 sqlite3Fts3SegReaderFinish(&csr); |
| 14251 return rc; |
| 14252 } |
| 14253 |
| 14254 |
| 14255 /* |
| 14256 ** Flush the contents of pendingTerms to level 0 segments. |
| 14257 */ |
| 14258 SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ |
| 14259 int rc = SQLITE_OK; |
| 14260 int i; |
| 14261 |
| 14262 for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 14263 rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); |
| 14264 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 14265 } |
| 14266 sqlite3Fts3PendingTermsClear(p); |
| 14267 |
| 14268 /* Determine the auto-incr-merge setting if unknown. If enabled, |
| 14269 ** estimate the number of leaf blocks of content to be written |
| 14270 */ |
| 14271 if( rc==SQLITE_OK && p->bHasStat |
| 14272 && p->nAutoincrmerge==0xff && p->nLeafAdd>0 |
| 14273 ){ |
| 14274 sqlite3_stmt *pStmt = 0; |
| 14275 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); |
| 14276 if( rc==SQLITE_OK ){ |
| 14277 sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); |
| 14278 rc = sqlite3_step(pStmt); |
| 14279 if( rc==SQLITE_ROW ){ |
| 14280 p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); |
| 14281 if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; |
| 14282 }else if( rc==SQLITE_DONE ){ |
| 14283 p->nAutoincrmerge = 0; |
| 14284 } |
| 14285 rc = sqlite3_reset(pStmt); |
| 14286 } |
| 14287 } |
| 14288 return rc; |
| 14289 } |
| 14290 |
| 14291 /* |
| 14292 ** Encode N integers as varints into a blob. |
| 14293 */ |
| 14294 static void fts3EncodeIntArray( |
| 14295 int N, /* The number of integers to encode */ |
| 14296 u32 *a, /* The integer values */ |
| 14297 char *zBuf, /* Write the BLOB here */ |
| 14298 int *pNBuf /* Write number of bytes if zBuf[] used here */ |
| 14299 ){ |
| 14300 int i, j; |
| 14301 for(i=j=0; i<N; i++){ |
| 14302 j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]); |
| 14303 } |
| 14304 *pNBuf = j; |
| 14305 } |
| 14306 |
| 14307 /* |
| 14308 ** Decode a blob of varints into N integers |
| 14309 */ |
| 14310 static void fts3DecodeIntArray( |
| 14311 int N, /* The number of integers to decode */ |
| 14312 u32 *a, /* Write the integer values */ |
| 14313 const char *zBuf, /* The BLOB containing the varints */ |
| 14314 int nBuf /* size of the BLOB */ |
| 14315 ){ |
| 14316 int i, j; |
| 14317 UNUSED_PARAMETER(nBuf); |
| 14318 for(i=j=0; i<N; i++){ |
| 14319 sqlite3_int64 x; |
| 14320 j += sqlite3Fts3GetVarint(&zBuf[j], &x); |
| 14321 assert(j<=nBuf); |
| 14322 a[i] = (u32)(x & 0xffffffff); |
| 14323 } |
| 14324 } |
| 14325 |
| 14326 /* |
| 14327 ** Insert the sizes (in tokens) for each column of the document |
| 14328 ** with docid equal to p->iPrevDocid. The sizes are encoded as |
| 14329 ** a blob of varints. |
| 14330 */ |
| 14331 static void fts3InsertDocsize( |
| 14332 int *pRC, /* Result code */ |
| 14333 Fts3Table *p, /* Table into which to insert */ |
| 14334 u32 *aSz /* Sizes of each column, in tokens */ |
| 14335 ){ |
| 14336 char *pBlob; /* The BLOB encoding of the document size */ |
| 14337 int nBlob; /* Number of bytes in the BLOB */ |
| 14338 sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ |
| 14339 int rc; /* Result code from subfunctions */ |
| 14340 |
| 14341 if( *pRC ) return; |
| 14342 pBlob = sqlite3_malloc( 10*p->nColumn ); |
| 14343 if( pBlob==0 ){ |
| 14344 *pRC = SQLITE_NOMEM; |
| 14345 return; |
| 14346 } |
| 14347 fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); |
| 14348 rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); |
| 14349 if( rc ){ |
| 14350 sqlite3_free(pBlob); |
| 14351 *pRC = rc; |
| 14352 return; |
| 14353 } |
| 14354 sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); |
| 14355 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); |
| 14356 sqlite3_step(pStmt); |
| 14357 *pRC = sqlite3_reset(pStmt); |
| 14358 } |
| 14359 |
| 14360 /* |
| 14361 ** Record 0 of the %_stat table contains a blob consisting of N varints, |
| 14362 ** where N is the number of user defined columns in the fts3 table plus |
| 14363 ** two. If nCol is the number of user defined columns, then values of the |
| 14364 ** varints are set as follows: |
| 14365 ** |
| 14366 ** Varint 0: Total number of rows in the table. |
| 14367 ** |
| 14368 ** Varint 1..nCol: For each column, the total number of tokens stored in |
| 14369 ** the column for all rows of the table. |
| 14370 ** |
| 14371 ** Varint 1+nCol: The total size, in bytes, of all text values in all |
| 14372 ** columns of all rows of the table. |
| 14373 ** |
| 14374 */ |
| 14375 static void fts3UpdateDocTotals( |
| 14376 int *pRC, /* The result code */ |
| 14377 Fts3Table *p, /* Table being updated */ |
| 14378 u32 *aSzIns, /* Size increases */ |
| 14379 u32 *aSzDel, /* Size decreases */ |
| 14380 int nChng /* Change in the number of documents */ |
| 14381 ){ |
| 14382 char *pBlob; /* Storage for BLOB written into %_stat */ |
| 14383 int nBlob; /* Size of BLOB written into %_stat */ |
| 14384 u32 *a; /* Array of integers that becomes the BLOB */ |
| 14385 sqlite3_stmt *pStmt; /* Statement for reading and writing */ |
| 14386 int i; /* Loop counter */ |
| 14387 int rc; /* Result code from subfunctions */ |
| 14388 |
| 14389 const int nStat = p->nColumn+2; |
| 14390 |
| 14391 if( *pRC ) return; |
| 14392 a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); |
| 14393 if( a==0 ){ |
| 14394 *pRC = SQLITE_NOMEM; |
| 14395 return; |
| 14396 } |
| 14397 pBlob = (char*)&a[nStat]; |
| 14398 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); |
| 14399 if( rc ){ |
| 14400 sqlite3_free(a); |
| 14401 *pRC = rc; |
| 14402 return; |
| 14403 } |
| 14404 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 14405 if( sqlite3_step(pStmt)==SQLITE_ROW ){ |
| 14406 fts3DecodeIntArray(nStat, a, |
| 14407 sqlite3_column_blob(pStmt, 0), |
| 14408 sqlite3_column_bytes(pStmt, 0)); |
| 14409 }else{ |
| 14410 memset(a, 0, sizeof(u32)*(nStat) ); |
| 14411 } |
| 14412 rc = sqlite3_reset(pStmt); |
| 14413 if( rc!=SQLITE_OK ){ |
| 14414 sqlite3_free(a); |
| 14415 *pRC = rc; |
| 14416 return; |
| 14417 } |
| 14418 if( nChng<0 && a[0]<(u32)(-nChng) ){ |
| 14419 a[0] = 0; |
| 14420 }else{ |
| 14421 a[0] += nChng; |
| 14422 } |
| 14423 for(i=0; i<p->nColumn+1; i++){ |
| 14424 u32 x = a[i+1]; |
| 14425 if( x+aSzIns[i] < aSzDel[i] ){ |
| 14426 x = 0; |
| 14427 }else{ |
| 14428 x = x + aSzIns[i] - aSzDel[i]; |
| 14429 } |
| 14430 a[i+1] = x; |
| 14431 } |
| 14432 fts3EncodeIntArray(nStat, a, pBlob, &nBlob); |
| 14433 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); |
| 14434 if( rc ){ |
| 14435 sqlite3_free(a); |
| 14436 *pRC = rc; |
| 14437 return; |
| 14438 } |
| 14439 sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); |
| 14440 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); |
| 14441 sqlite3_step(pStmt); |
| 14442 *pRC = sqlite3_reset(pStmt); |
| 14443 sqlite3_free(a); |
| 14444 } |
| 14445 |
| 14446 /* |
| 14447 ** Merge the entire database so that there is one segment for each |
| 14448 ** iIndex/iLangid combination. |
| 14449 */ |
| 14450 static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ |
| 14451 int bSeenDone = 0; |
| 14452 int rc; |
| 14453 sqlite3_stmt *pAllLangid = 0; |
| 14454 |
| 14455 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); |
| 14456 if( rc==SQLITE_OK ){ |
| 14457 int rc2; |
| 14458 sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); |
| 14459 sqlite3_bind_int(pAllLangid, 2, p->nIndex); |
| 14460 while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ |
| 14461 int i; |
| 14462 int iLangid = sqlite3_column_int(pAllLangid, 0); |
| 14463 for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ |
| 14464 rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); |
| 14465 if( rc==SQLITE_DONE ){ |
| 14466 bSeenDone = 1; |
| 14467 rc = SQLITE_OK; |
| 14468 } |
| 14469 } |
| 14470 } |
| 14471 rc2 = sqlite3_reset(pAllLangid); |
| 14472 if( rc==SQLITE_OK ) rc = rc2; |
| 14473 } |
| 14474 |
| 14475 sqlite3Fts3SegmentsClose(p); |
| 14476 sqlite3Fts3PendingTermsClear(p); |
| 14477 |
| 14478 return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; |
| 14479 } |
| 14480 |
| 14481 /* |
| 14482 ** This function is called when the user executes the following statement: |
| 14483 ** |
| 14484 ** INSERT INTO <tbl>(<tbl>) VALUES('rebuild'); |
| 14485 ** |
| 14486 ** The entire FTS index is discarded and rebuilt. If the table is one |
| 14487 ** created using the content=xxx option, then the new index is based on |
| 14488 ** the current contents of the xxx table. Otherwise, it is rebuilt based |
| 14489 ** on the contents of the %_content table. |
| 14490 */ |
| 14491 static int fts3DoRebuild(Fts3Table *p){ |
| 14492 int rc; /* Return Code */ |
| 14493 |
| 14494 rc = fts3DeleteAll(p, 0); |
| 14495 if( rc==SQLITE_OK ){ |
| 14496 u32 *aSz = 0; |
| 14497 u32 *aSzIns = 0; |
| 14498 u32 *aSzDel = 0; |
| 14499 sqlite3_stmt *pStmt = 0; |
| 14500 int nEntry = 0; |
| 14501 |
| 14502 /* Compose and prepare an SQL statement to loop through the content table */ |
| 14503 char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); |
| 14504 if( !zSql ){ |
| 14505 rc = SQLITE_NOMEM; |
| 14506 }else{ |
| 14507 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 14508 sqlite3_free(zSql); |
| 14509 } |
| 14510 |
| 14511 if( rc==SQLITE_OK ){ |
| 14512 int nByte = sizeof(u32) * (p->nColumn+1)*3; |
| 14513 aSz = (u32 *)sqlite3_malloc(nByte); |
| 14514 if( aSz==0 ){ |
| 14515 rc = SQLITE_NOMEM; |
| 14516 }else{ |
| 14517 memset(aSz, 0, nByte); |
| 14518 aSzIns = &aSz[p->nColumn+1]; |
| 14519 aSzDel = &aSzIns[p->nColumn+1]; |
| 14520 } |
| 14521 } |
| 14522 |
| 14523 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 14524 int iCol; |
| 14525 int iLangid = langidFromSelect(p, pStmt); |
| 14526 rc = fts3PendingTermsDocid(p, 0, iLangid, sqlite3_column_int64(pStmt, 0)); |
| 14527 memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); |
| 14528 for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ |
| 14529 if( p->abNotindexed[iCol]==0 ){ |
| 14530 const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); |
| 14531 rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); |
| 14532 aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); |
| 14533 } |
| 14534 } |
| 14535 if( p->bHasDocsize ){ |
| 14536 fts3InsertDocsize(&rc, p, aSz); |
| 14537 } |
| 14538 if( rc!=SQLITE_OK ){ |
| 14539 sqlite3_finalize(pStmt); |
| 14540 pStmt = 0; |
| 14541 }else{ |
| 14542 nEntry++; |
| 14543 for(iCol=0; iCol<=p->nColumn; iCol++){ |
| 14544 aSzIns[iCol] += aSz[iCol]; |
| 14545 } |
| 14546 } |
| 14547 } |
| 14548 if( p->bFts4 ){ |
| 14549 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); |
| 14550 } |
| 14551 sqlite3_free(aSz); |
| 14552 |
| 14553 if( pStmt ){ |
| 14554 int rc2 = sqlite3_finalize(pStmt); |
| 14555 if( rc==SQLITE_OK ){ |
| 14556 rc = rc2; |
| 14557 } |
| 14558 } |
| 14559 } |
| 14560 |
| 14561 return rc; |
| 14562 } |
| 14563 |
| 14564 |
| 14565 /* |
| 14566 ** This function opens a cursor used to read the input data for an |
| 14567 ** incremental merge operation. Specifically, it opens a cursor to scan |
| 14568 ** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute |
| 14569 ** level iAbsLevel. |
| 14570 */ |
| 14571 static int fts3IncrmergeCsr( |
| 14572 Fts3Table *p, /* FTS3 table handle */ |
| 14573 sqlite3_int64 iAbsLevel, /* Absolute level to open */ |
| 14574 int nSeg, /* Number of segments to merge */ |
| 14575 Fts3MultiSegReader *pCsr /* Cursor object to populate */ |
| 14576 ){ |
| 14577 int rc; /* Return Code */ |
| 14578 sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ |
| 14579 int nByte; /* Bytes allocated at pCsr->apSegment[] */ |
| 14580 |
| 14581 /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ |
| 14582 memset(pCsr, 0, sizeof(*pCsr)); |
| 14583 nByte = sizeof(Fts3SegReader *) * nSeg; |
| 14584 pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); |
| 14585 |
| 14586 if( pCsr->apSegment==0 ){ |
| 14587 rc = SQLITE_NOMEM; |
| 14588 }else{ |
| 14589 memset(pCsr->apSegment, 0, nByte); |
| 14590 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); |
| 14591 } |
| 14592 if( rc==SQLITE_OK ){ |
| 14593 int i; |
| 14594 int rc2; |
| 14595 sqlite3_bind_int64(pStmt, 1, iAbsLevel); |
| 14596 assert( pCsr->nSegment==0 ); |
| 14597 for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){ |
| 14598 rc = sqlite3Fts3SegReaderNew(i, 0, |
| 14599 sqlite3_column_int64(pStmt, 1), /* segdir.start_block */ |
| 14600 sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */ |
| 14601 sqlite3_column_int64(pStmt, 3), /* segdir.end_block */ |
| 14602 sqlite3_column_blob(pStmt, 4), /* segdir.root */ |
| 14603 sqlite3_column_bytes(pStmt, 4), /* segdir.root */ |
| 14604 &pCsr->apSegment[i] |
| 14605 ); |
| 14606 pCsr->nSegment++; |
| 14607 } |
| 14608 rc2 = sqlite3_reset(pStmt); |
| 14609 if( rc==SQLITE_OK ) rc = rc2; |
| 14610 } |
| 14611 |
| 14612 return rc; |
| 14613 } |
| 14614 |
| 14615 typedef struct IncrmergeWriter IncrmergeWriter; |
| 14616 typedef struct NodeWriter NodeWriter; |
| 14617 typedef struct Blob Blob; |
| 14618 typedef struct NodeReader NodeReader; |
| 14619 |
| 14620 /* |
| 14621 ** An instance of the following structure is used as a dynamic buffer |
| 14622 ** to build up nodes or other blobs of data in. |
| 14623 ** |
| 14624 ** The function blobGrowBuffer() is used to extend the allocation. |
| 14625 */ |
| 14626 struct Blob { |
| 14627 char *a; /* Pointer to allocation */ |
| 14628 int n; /* Number of valid bytes of data in a[] */ |
| 14629 int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ |
| 14630 }; |
| 14631 |
| 14632 /* |
| 14633 ** This structure is used to build up buffers containing segment b-tree |
| 14634 ** nodes (blocks). |
| 14635 */ |
| 14636 struct NodeWriter { |
| 14637 sqlite3_int64 iBlock; /* Current block id */ |
| 14638 Blob key; /* Last key written to the current block */ |
| 14639 Blob block; /* Current block image */ |
| 14640 }; |
| 14641 |
| 14642 /* |
| 14643 ** An object of this type contains the state required to create or append |
| 14644 ** to an appendable b-tree segment. |
| 14645 */ |
| 14646 struct IncrmergeWriter { |
| 14647 int nLeafEst; /* Space allocated for leaf blocks */ |
| 14648 int nWork; /* Number of leaf pages flushed */ |
| 14649 sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ |
| 14650 int iIdx; /* Index of *output* segment in iAbsLevel+1 */ |
| 14651 sqlite3_int64 iStart; /* Block number of first allocated block */ |
| 14652 sqlite3_int64 iEnd; /* Block number of last allocated block */ |
| 14653 sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ |
| 14654 u8 bNoLeafData; /* If true, store 0 for segment size */ |
| 14655 NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; |
| 14656 }; |
| 14657 |
| 14658 /* |
| 14659 ** An object of the following type is used to read data from a single |
| 14660 ** FTS segment node. See the following functions: |
| 14661 ** |
| 14662 ** nodeReaderInit() |
| 14663 ** nodeReaderNext() |
| 14664 ** nodeReaderRelease() |
| 14665 */ |
| 14666 struct NodeReader { |
| 14667 const char *aNode; |
| 14668 int nNode; |
| 14669 int iOff; /* Current offset within aNode[] */ |
| 14670 |
| 14671 /* Output variables. Containing the current node entry. */ |
| 14672 sqlite3_int64 iChild; /* Pointer to child node */ |
| 14673 Blob term; /* Current term */ |
| 14674 const char *aDoclist; /* Pointer to doclist */ |
| 14675 int nDoclist; /* Size of doclist in bytes */ |
| 14676 }; |
| 14677 |
| 14678 /* |
| 14679 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 14680 ** Otherwise, if the allocation at pBlob->a is not already at least nMin |
| 14681 ** bytes in size, extend (realloc) it to be so. |
| 14682 ** |
| 14683 ** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a |
| 14684 ** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc |
| 14685 ** to reflect the new size of the pBlob->a[] buffer. |
| 14686 */ |
| 14687 static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ |
| 14688 if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ |
| 14689 int nAlloc = nMin; |
| 14690 char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); |
| 14691 if( a ){ |
| 14692 pBlob->nAlloc = nAlloc; |
| 14693 pBlob->a = a; |
| 14694 }else{ |
| 14695 *pRc = SQLITE_NOMEM; |
| 14696 } |
| 14697 } |
| 14698 } |
| 14699 |
| 14700 /* |
| 14701 ** Attempt to advance the node-reader object passed as the first argument to |
| 14702 ** the next entry on the node. |
| 14703 ** |
| 14704 ** Return an error code if an error occurs (SQLITE_NOMEM is possible). |
| 14705 ** Otherwise return SQLITE_OK. If there is no next entry on the node |
| 14706 ** (e.g. because the current entry is the last) set NodeReader->aNode to |
| 14707 ** NULL to indicate EOF. Otherwise, populate the NodeReader structure output |
| 14708 ** variables for the new entry. |
| 14709 */ |
| 14710 static int nodeReaderNext(NodeReader *p){ |
| 14711 int bFirst = (p->term.n==0); /* True for first term on the node */ |
| 14712 int nPrefix = 0; /* Bytes to copy from previous term */ |
| 14713 int nSuffix = 0; /* Bytes to append to the prefix */ |
| 14714 int rc = SQLITE_OK; /* Return code */ |
| 14715 |
| 14716 assert( p->aNode ); |
| 14717 if( p->iChild && bFirst==0 ) p->iChild++; |
| 14718 if( p->iOff>=p->nNode ){ |
| 14719 /* EOF */ |
| 14720 p->aNode = 0; |
| 14721 }else{ |
| 14722 if( bFirst==0 ){ |
| 14723 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); |
| 14724 } |
| 14725 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); |
| 14726 |
| 14727 blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); |
| 14728 if( rc==SQLITE_OK ){ |
| 14729 memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); |
| 14730 p->term.n = nPrefix+nSuffix; |
| 14731 p->iOff += nSuffix; |
| 14732 if( p->iChild==0 ){ |
| 14733 p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); |
| 14734 p->aDoclist = &p->aNode[p->iOff]; |
| 14735 p->iOff += p->nDoclist; |
| 14736 } |
| 14737 } |
| 14738 } |
| 14739 |
| 14740 assert( p->iOff<=p->nNode ); |
| 14741 |
| 14742 return rc; |
| 14743 } |
| 14744 |
| 14745 /* |
| 14746 ** Release all dynamic resources held by node-reader object *p. |
| 14747 */ |
| 14748 static void nodeReaderRelease(NodeReader *p){ |
| 14749 sqlite3_free(p->term.a); |
| 14750 } |
| 14751 |
| 14752 /* |
| 14753 ** Initialize a node-reader object to read the node in buffer aNode/nNode. |
| 14754 ** |
| 14755 ** If successful, SQLITE_OK is returned and the NodeReader object set to |
| 14756 ** point to the first entry on the node (if any). Otherwise, an SQLite |
| 14757 ** error code is returned. |
| 14758 */ |
| 14759 static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ |
| 14760 memset(p, 0, sizeof(NodeReader)); |
| 14761 p->aNode = aNode; |
| 14762 p->nNode = nNode; |
| 14763 |
| 14764 /* Figure out if this is a leaf or an internal node. */ |
| 14765 if( p->aNode[0] ){ |
| 14766 /* An internal node. */ |
| 14767 p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); |
| 14768 }else{ |
| 14769 p->iOff = 1; |
| 14770 } |
| 14771 |
| 14772 return nodeReaderNext(p); |
| 14773 } |
| 14774 |
| 14775 /* |
| 14776 ** This function is called while writing an FTS segment each time a leaf o |
| 14777 ** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed |
| 14778 ** to be greater than the largest key on the node just written, but smaller |
| 14779 ** than or equal to the first key that will be written to the next leaf |
| 14780 ** node. |
| 14781 ** |
| 14782 ** The block id of the leaf node just written to disk may be found in |
| 14783 ** (pWriter->aNodeWriter[0].iBlock) when this function is called. |
| 14784 */ |
| 14785 static int fts3IncrmergePush( |
| 14786 Fts3Table *p, /* Fts3 table handle */ |
| 14787 IncrmergeWriter *pWriter, /* Writer object */ |
| 14788 const char *zTerm, /* Term to write to internal node */ |
| 14789 int nTerm /* Bytes at zTerm */ |
| 14790 ){ |
| 14791 sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; |
| 14792 int iLayer; |
| 14793 |
| 14794 assert( nTerm>0 ); |
| 14795 for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){ |
| 14796 sqlite3_int64 iNextPtr = 0; |
| 14797 NodeWriter *pNode = &pWriter->aNodeWriter[iLayer]; |
| 14798 int rc = SQLITE_OK; |
| 14799 int nPrefix; |
| 14800 int nSuffix; |
| 14801 int nSpace; |
| 14802 |
| 14803 /* Figure out how much space the key will consume if it is written to |
| 14804 ** the current node of layer iLayer. Due to the prefix compression, |
| 14805 ** the space required changes depending on which node the key is to |
| 14806 ** be added to. */ |
| 14807 nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); |
| 14808 nSuffix = nTerm - nPrefix; |
| 14809 nSpace = sqlite3Fts3VarintLen(nPrefix); |
| 14810 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14811 |
| 14812 if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ |
| 14813 /* If the current node of layer iLayer contains zero keys, or if adding |
| 14814 ** the key to it will not cause it to grow to larger than nNodeSize |
| 14815 ** bytes in size, write the key here. */ |
| 14816 |
| 14817 Blob *pBlk = &pNode->block; |
| 14818 if( pBlk->n==0 ){ |
| 14819 blobGrowBuffer(pBlk, p->nNodeSize, &rc); |
| 14820 if( rc==SQLITE_OK ){ |
| 14821 pBlk->a[0] = (char)iLayer; |
| 14822 pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); |
| 14823 } |
| 14824 } |
| 14825 blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); |
| 14826 blobGrowBuffer(&pNode->key, nTerm, &rc); |
| 14827 |
| 14828 if( rc==SQLITE_OK ){ |
| 14829 if( pNode->key.n ){ |
| 14830 pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); |
| 14831 } |
| 14832 pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); |
| 14833 memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); |
| 14834 pBlk->n += nSuffix; |
| 14835 |
| 14836 memcpy(pNode->key.a, zTerm, nTerm); |
| 14837 pNode->key.n = nTerm; |
| 14838 } |
| 14839 }else{ |
| 14840 /* Otherwise, flush the current node of layer iLayer to disk. |
| 14841 ** Then allocate a new, empty sibling node. The key will be written |
| 14842 ** into the parent of this node. */ |
| 14843 rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); |
| 14844 |
| 14845 assert( pNode->block.nAlloc>=p->nNodeSize ); |
| 14846 pNode->block.a[0] = (char)iLayer; |
| 14847 pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); |
| 14848 |
| 14849 iNextPtr = pNode->iBlock; |
| 14850 pNode->iBlock++; |
| 14851 pNode->key.n = 0; |
| 14852 } |
| 14853 |
| 14854 if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; |
| 14855 iPtr = iNextPtr; |
| 14856 } |
| 14857 |
| 14858 assert( 0 ); |
| 14859 return 0; |
| 14860 } |
| 14861 |
| 14862 /* |
| 14863 ** Append a term and (optionally) doclist to the FTS segment node currently |
| 14864 ** stored in blob *pNode. The node need not contain any terms, but the |
| 14865 ** header must be written before this function is called. |
| 14866 ** |
| 14867 ** A node header is a single 0x00 byte for a leaf node, or a height varint |
| 14868 ** followed by the left-hand-child varint for an internal node. |
| 14869 ** |
| 14870 ** The term to be appended is passed via arguments zTerm/nTerm. For a |
| 14871 ** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal |
| 14872 ** node, both aDoclist and nDoclist must be passed 0. |
| 14873 ** |
| 14874 ** If the size of the value in blob pPrev is zero, then this is the first |
| 14875 ** term written to the node. Otherwise, pPrev contains a copy of the |
| 14876 ** previous term. Before this function returns, it is updated to contain a |
| 14877 ** copy of zTerm/nTerm. |
| 14878 ** |
| 14879 ** It is assumed that the buffer associated with pNode is already large |
| 14880 ** enough to accommodate the new entry. The buffer associated with pPrev |
| 14881 ** is extended by this function if requrired. |
| 14882 ** |
| 14883 ** If an error (i.e. OOM condition) occurs, an SQLite error code is |
| 14884 ** returned. Otherwise, SQLITE_OK. |
| 14885 */ |
| 14886 static int fts3AppendToNode( |
| 14887 Blob *pNode, /* Current node image to append to */ |
| 14888 Blob *pPrev, /* Buffer containing previous term written */ |
| 14889 const char *zTerm, /* New term to write */ |
| 14890 int nTerm, /* Size of zTerm in bytes */ |
| 14891 const char *aDoclist, /* Doclist (or NULL) to write */ |
| 14892 int nDoclist /* Size of aDoclist in bytes */ |
| 14893 ){ |
| 14894 int rc = SQLITE_OK; /* Return code */ |
| 14895 int bFirst = (pPrev->n==0); /* True if this is the first term written */ |
| 14896 int nPrefix; /* Size of term prefix in bytes */ |
| 14897 int nSuffix; /* Size of term suffix in bytes */ |
| 14898 |
| 14899 /* Node must have already been started. There must be a doclist for a |
| 14900 ** leaf node, and there must not be a doclist for an internal node. */ |
| 14901 assert( pNode->n>0 ); |
| 14902 assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); |
| 14903 |
| 14904 blobGrowBuffer(pPrev, nTerm, &rc); |
| 14905 if( rc!=SQLITE_OK ) return rc; |
| 14906 |
| 14907 nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); |
| 14908 nSuffix = nTerm - nPrefix; |
| 14909 memcpy(pPrev->a, zTerm, nTerm); |
| 14910 pPrev->n = nTerm; |
| 14911 |
| 14912 if( bFirst==0 ){ |
| 14913 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); |
| 14914 } |
| 14915 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); |
| 14916 memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); |
| 14917 pNode->n += nSuffix; |
| 14918 |
| 14919 if( aDoclist ){ |
| 14920 pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); |
| 14921 memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); |
| 14922 pNode->n += nDoclist; |
| 14923 } |
| 14924 |
| 14925 assert( pNode->n<=pNode->nAlloc ); |
| 14926 |
| 14927 return SQLITE_OK; |
| 14928 } |
| 14929 |
| 14930 /* |
| 14931 ** Append the current term and doclist pointed to by cursor pCsr to the |
| 14932 ** appendable b-tree segment opened for writing by pWriter. |
| 14933 ** |
| 14934 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. |
| 14935 */ |
| 14936 static int fts3IncrmergeAppend( |
| 14937 Fts3Table *p, /* Fts3 table handle */ |
| 14938 IncrmergeWriter *pWriter, /* Writer object */ |
| 14939 Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ |
| 14940 ){ |
| 14941 const char *zTerm = pCsr->zTerm; |
| 14942 int nTerm = pCsr->nTerm; |
| 14943 const char *aDoclist = pCsr->aDoclist; |
| 14944 int nDoclist = pCsr->nDoclist; |
| 14945 int rc = SQLITE_OK; /* Return code */ |
| 14946 int nSpace; /* Total space in bytes required on leaf */ |
| 14947 int nPrefix; /* Size of prefix shared with previous term */ |
| 14948 int nSuffix; /* Size of suffix (nTerm - nPrefix) */ |
| 14949 NodeWriter *pLeaf; /* Object used to write leaf nodes */ |
| 14950 |
| 14951 pLeaf = &pWriter->aNodeWriter[0]; |
| 14952 nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); |
| 14953 nSuffix = nTerm - nPrefix; |
| 14954 |
| 14955 nSpace = sqlite3Fts3VarintLen(nPrefix); |
| 14956 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14957 nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; |
| 14958 |
| 14959 /* If the current block is not empty, and if adding this term/doclist |
| 14960 ** to the current block would make it larger than Fts3Table.nNodeSize |
| 14961 ** bytes, write this block out to the database. */ |
| 14962 if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ |
| 14963 rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); |
| 14964 pWriter->nWork++; |
| 14965 |
| 14966 /* Add the current term to the parent node. The term added to the |
| 14967 ** parent must: |
| 14968 ** |
| 14969 ** a) be greater than the largest term on the leaf node just written |
| 14970 ** to the database (still available in pLeaf->key), and |
| 14971 ** |
| 14972 ** b) be less than or equal to the term about to be added to the new |
| 14973 ** leaf node (zTerm/nTerm). |
| 14974 ** |
| 14975 ** In other words, it must be the prefix of zTerm 1 byte longer than |
| 14976 ** the common prefix (if any) of zTerm and pWriter->zTerm. |
| 14977 */ |
| 14978 if( rc==SQLITE_OK ){ |
| 14979 rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); |
| 14980 } |
| 14981 |
| 14982 /* Advance to the next output block */ |
| 14983 pLeaf->iBlock++; |
| 14984 pLeaf->key.n = 0; |
| 14985 pLeaf->block.n = 0; |
| 14986 |
| 14987 nSuffix = nTerm; |
| 14988 nSpace = 1; |
| 14989 nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; |
| 14990 nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; |
| 14991 } |
| 14992 |
| 14993 pWriter->nLeafData += nSpace; |
| 14994 blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); |
| 14995 if( rc==SQLITE_OK ){ |
| 14996 if( pLeaf->block.n==0 ){ |
| 14997 pLeaf->block.n = 1; |
| 14998 pLeaf->block.a[0] = '\0'; |
| 14999 } |
| 15000 rc = fts3AppendToNode( |
| 15001 &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist |
| 15002 ); |
| 15003 } |
| 15004 |
| 15005 return rc; |
| 15006 } |
| 15007 |
| 15008 /* |
| 15009 ** This function is called to release all dynamic resources held by the |
| 15010 ** merge-writer object pWriter, and if no error has occurred, to flush |
| 15011 ** all outstanding node buffers held by pWriter to disk. |
| 15012 ** |
| 15013 ** If *pRc is not SQLITE_OK when this function is called, then no attempt |
| 15014 ** is made to write any data to disk. Instead, this function serves only |
| 15015 ** to release outstanding resources. |
| 15016 ** |
| 15017 ** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while |
| 15018 ** flushing buffers to disk, *pRc is set to an SQLite error code before |
| 15019 ** returning. |
| 15020 */ |
| 15021 static void fts3IncrmergeRelease( |
| 15022 Fts3Table *p, /* FTS3 table handle */ |
| 15023 IncrmergeWriter *pWriter, /* Merge-writer object */ |
| 15024 int *pRc /* IN/OUT: Error code */ |
| 15025 ){ |
| 15026 int i; /* Used to iterate through non-root layers */ |
| 15027 int iRoot; /* Index of root in pWriter->aNodeWriter */ |
| 15028 NodeWriter *pRoot; /* NodeWriter for root node */ |
| 15029 int rc = *pRc; /* Error code */ |
| 15030 |
| 15031 /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment |
| 15032 ** root node. If the segment fits entirely on a single leaf node, iRoot |
| 15033 ** will be set to 0. If the root node is the parent of the leaves, iRoot |
| 15034 ** will be 1. And so on. */ |
| 15035 for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ |
| 15036 NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; |
| 15037 if( pNode->block.n>0 ) break; |
| 15038 assert( *pRc || pNode->block.nAlloc==0 ); |
| 15039 assert( *pRc || pNode->key.nAlloc==0 ); |
| 15040 sqlite3_free(pNode->block.a); |
| 15041 sqlite3_free(pNode->key.a); |
| 15042 } |
| 15043 |
| 15044 /* Empty output segment. This is a no-op. */ |
| 15045 if( iRoot<0 ) return; |
| 15046 |
| 15047 /* The entire output segment fits on a single node. Normally, this means |
| 15048 ** the node would be stored as a blob in the "root" column of the %_segdir |
| 15049 ** table. However, this is not permitted in this case. The problem is that |
| 15050 ** space has already been reserved in the %_segments table, and so the |
| 15051 ** start_block and end_block fields of the %_segdir table must be populated. |
| 15052 ** And, by design or by accident, released versions of FTS cannot handle |
| 15053 ** segments that fit entirely on the root node with start_block!=0. |
| 15054 ** |
| 15055 ** Instead, create a synthetic root node that contains nothing but a |
| 15056 ** pointer to the single content node. So that the segment consists of a |
| 15057 ** single leaf and a single interior (root) node. |
| 15058 ** |
| 15059 ** Todo: Better might be to defer allocating space in the %_segments |
| 15060 ** table until we are sure it is needed. |
| 15061 */ |
| 15062 if( iRoot==0 ){ |
| 15063 Blob *pBlock = &pWriter->aNodeWriter[1].block; |
| 15064 blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); |
| 15065 if( rc==SQLITE_OK ){ |
| 15066 pBlock->a[0] = 0x01; |
| 15067 pBlock->n = 1 + sqlite3Fts3PutVarint( |
| 15068 &pBlock->a[1], pWriter->aNodeWriter[0].iBlock |
| 15069 ); |
| 15070 } |
| 15071 iRoot = 1; |
| 15072 } |
| 15073 pRoot = &pWriter->aNodeWriter[iRoot]; |
| 15074 |
| 15075 /* Flush all currently outstanding nodes to disk. */ |
| 15076 for(i=0; i<iRoot; i++){ |
| 15077 NodeWriter *pNode = &pWriter->aNodeWriter[i]; |
| 15078 if( pNode->block.n>0 && rc==SQLITE_OK ){ |
| 15079 rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); |
| 15080 } |
| 15081 sqlite3_free(pNode->block.a); |
| 15082 sqlite3_free(pNode->key.a); |
| 15083 } |
| 15084 |
| 15085 /* Write the %_segdir record. */ |
| 15086 if( rc==SQLITE_OK ){ |
| 15087 rc = fts3WriteSegdir(p, |
| 15088 pWriter->iAbsLevel+1, /* level */ |
| 15089 pWriter->iIdx, /* idx */ |
| 15090 pWriter->iStart, /* start_block */ |
| 15091 pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ |
| 15092 pWriter->iEnd, /* end_block */ |
| 15093 (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ |
| 15094 pRoot->block.a, pRoot->block.n /* root */ |
| 15095 ); |
| 15096 } |
| 15097 sqlite3_free(pRoot->block.a); |
| 15098 sqlite3_free(pRoot->key.a); |
| 15099 |
| 15100 *pRc = rc; |
| 15101 } |
| 15102 |
| 15103 /* |
| 15104 ** Compare the term in buffer zLhs (size in bytes nLhs) with that in |
| 15105 ** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of |
| 15106 ** the other, it is considered to be smaller than the other. |
| 15107 ** |
| 15108 ** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve |
| 15109 ** if it is greater. |
| 15110 */ |
| 15111 static int fts3TermCmp( |
| 15112 const char *zLhs, int nLhs, /* LHS of comparison */ |
| 15113 const char *zRhs, int nRhs /* RHS of comparison */ |
| 15114 ){ |
| 15115 int nCmp = MIN(nLhs, nRhs); |
| 15116 int res; |
| 15117 |
| 15118 res = memcmp(zLhs, zRhs, nCmp); |
| 15119 if( res==0 ) res = nLhs - nRhs; |
| 15120 |
| 15121 return res; |
| 15122 } |
| 15123 |
| 15124 |
| 15125 /* |
| 15126 ** Query to see if the entry in the %_segments table with blockid iEnd is |
| 15127 ** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before |
| 15128 ** returning. Otherwise, set *pbRes to 0. |
| 15129 ** |
| 15130 ** Or, if an error occurs while querying the database, return an SQLite |
| 15131 ** error code. The final value of *pbRes is undefined in this case. |
| 15132 ** |
| 15133 ** This is used to test if a segment is an "appendable" segment. If it |
| 15134 ** is, then a NULL entry has been inserted into the %_segments table |
| 15135 ** with blockid %_segdir.end_block. |
| 15136 */ |
| 15137 static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ |
| 15138 int bRes = 0; /* Result to set *pbRes to */ |
| 15139 sqlite3_stmt *pCheck = 0; /* Statement to query database with */ |
| 15140 int rc; /* Return code */ |
| 15141 |
| 15142 rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); |
| 15143 if( rc==SQLITE_OK ){ |
| 15144 sqlite3_bind_int64(pCheck, 1, iEnd); |
| 15145 if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; |
| 15146 rc = sqlite3_reset(pCheck); |
| 15147 } |
| 15148 |
| 15149 *pbRes = bRes; |
| 15150 return rc; |
| 15151 } |
| 15152 |
| 15153 /* |
| 15154 ** This function is called when initializing an incremental-merge operation. |
| 15155 ** It checks if the existing segment with index value iIdx at absolute level |
| 15156 ** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the |
| 15157 ** merge-writer object *pWriter is initialized to write to it. |
| 15158 ** |
| 15159 ** An existing segment can be appended to by an incremental merge if: |
| 15160 ** |
| 15161 ** * It was initially created as an appendable segment (with all required |
| 15162 ** space pre-allocated), and |
| 15163 ** |
| 15164 ** * The first key read from the input (arguments zKey and nKey) is |
| 15165 ** greater than the largest key currently stored in the potential |
| 15166 ** output segment. |
| 15167 */ |
| 15168 static int fts3IncrmergeLoad( |
| 15169 Fts3Table *p, /* Fts3 table handle */ |
| 15170 sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ |
| 15171 int iIdx, /* Index of candidate output segment */ |
| 15172 const char *zKey, /* First key to write */ |
| 15173 int nKey, /* Number of bytes in nKey */ |
| 15174 IncrmergeWriter *pWriter /* Populate this object */ |
| 15175 ){ |
| 15176 int rc; /* Return code */ |
| 15177 sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ |
| 15178 |
| 15179 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); |
| 15180 if( rc==SQLITE_OK ){ |
| 15181 sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ |
| 15182 sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ |
| 15183 sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ |
| 15184 const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ |
| 15185 int nRoot = 0; /* Size of aRoot[] in bytes */ |
| 15186 int rc2; /* Return code from sqlite3_reset() */ |
| 15187 int bAppendable = 0; /* Set to true if segment is appendable */ |
| 15188 |
| 15189 /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ |
| 15190 sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); |
| 15191 sqlite3_bind_int(pSelect, 2, iIdx); |
| 15192 if( sqlite3_step(pSelect)==SQLITE_ROW ){ |
| 15193 iStart = sqlite3_column_int64(pSelect, 1); |
| 15194 iLeafEnd = sqlite3_column_int64(pSelect, 2); |
| 15195 fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); |
| 15196 if( pWriter->nLeafData<0 ){ |
| 15197 pWriter->nLeafData = pWriter->nLeafData * -1; |
| 15198 } |
| 15199 pWriter->bNoLeafData = (pWriter->nLeafData==0); |
| 15200 nRoot = sqlite3_column_bytes(pSelect, 4); |
| 15201 aRoot = sqlite3_column_blob(pSelect, 4); |
| 15202 }else{ |
| 15203 return sqlite3_reset(pSelect); |
| 15204 } |
| 15205 |
| 15206 /* Check for the zero-length marker in the %_segments table */ |
| 15207 rc = fts3IsAppendable(p, iEnd, &bAppendable); |
| 15208 |
| 15209 /* Check that zKey/nKey is larger than the largest key the candidate */ |
| 15210 if( rc==SQLITE_OK && bAppendable ){ |
| 15211 char *aLeaf = 0; |
| 15212 int nLeaf = 0; |
| 15213 |
| 15214 rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); |
| 15215 if( rc==SQLITE_OK ){ |
| 15216 NodeReader reader; |
| 15217 for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); |
| 15218 rc==SQLITE_OK && reader.aNode; |
| 15219 rc = nodeReaderNext(&reader) |
| 15220 ){ |
| 15221 assert( reader.aNode ); |
| 15222 } |
| 15223 if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ |
| 15224 bAppendable = 0; |
| 15225 } |
| 15226 nodeReaderRelease(&reader); |
| 15227 } |
| 15228 sqlite3_free(aLeaf); |
| 15229 } |
| 15230 |
| 15231 if( rc==SQLITE_OK && bAppendable ){ |
| 15232 /* It is possible to append to this segment. Set up the IncrmergeWriter |
| 15233 ** object to do so. */ |
| 15234 int i; |
| 15235 int nHeight = (int)aRoot[0]; |
| 15236 NodeWriter *pNode; |
| 15237 |
| 15238 pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; |
| 15239 pWriter->iStart = iStart; |
| 15240 pWriter->iEnd = iEnd; |
| 15241 pWriter->iAbsLevel = iAbsLevel; |
| 15242 pWriter->iIdx = iIdx; |
| 15243 |
| 15244 for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ |
| 15245 pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; |
| 15246 } |
| 15247 |
| 15248 pNode = &pWriter->aNodeWriter[nHeight]; |
| 15249 pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; |
| 15250 blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); |
| 15251 if( rc==SQLITE_OK ){ |
| 15252 memcpy(pNode->block.a, aRoot, nRoot); |
| 15253 pNode->block.n = nRoot; |
| 15254 } |
| 15255 |
| 15256 for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ |
| 15257 NodeReader reader; |
| 15258 pNode = &pWriter->aNodeWriter[i]; |
| 15259 |
| 15260 rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); |
| 15261 while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); |
| 15262 blobGrowBuffer(&pNode->key, reader.term.n, &rc); |
| 15263 if( rc==SQLITE_OK ){ |
| 15264 memcpy(pNode->key.a, reader.term.a, reader.term.n); |
| 15265 pNode->key.n = reader.term.n; |
| 15266 if( i>0 ){ |
| 15267 char *aBlock = 0; |
| 15268 int nBlock = 0; |
| 15269 pNode = &pWriter->aNodeWriter[i-1]; |
| 15270 pNode->iBlock = reader.iChild; |
| 15271 rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); |
| 15272 blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); |
| 15273 if( rc==SQLITE_OK ){ |
| 15274 memcpy(pNode->block.a, aBlock, nBlock); |
| 15275 pNode->block.n = nBlock; |
| 15276 } |
| 15277 sqlite3_free(aBlock); |
| 15278 } |
| 15279 } |
| 15280 nodeReaderRelease(&reader); |
| 15281 } |
| 15282 } |
| 15283 |
| 15284 rc2 = sqlite3_reset(pSelect); |
| 15285 if( rc==SQLITE_OK ) rc = rc2; |
| 15286 } |
| 15287 |
| 15288 return rc; |
| 15289 } |
| 15290 |
| 15291 /* |
| 15292 ** Determine the largest segment index value that exists within absolute |
| 15293 ** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus |
| 15294 ** one before returning SQLITE_OK. Or, if there are no segments at all |
| 15295 ** within level iAbsLevel, set *piIdx to zero. |
| 15296 ** |
| 15297 ** If an error occurs, return an SQLite error code. The final value of |
| 15298 ** *piIdx is undefined in this case. |
| 15299 */ |
| 15300 static int fts3IncrmergeOutputIdx( |
| 15301 Fts3Table *p, /* FTS Table handle */ |
| 15302 sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ |
| 15303 int *piIdx /* OUT: Next free index at iAbsLevel+1 */ |
| 15304 ){ |
| 15305 int rc; |
| 15306 sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ |
| 15307 |
| 15308 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); |
| 15309 if( rc==SQLITE_OK ){ |
| 15310 sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); |
| 15311 sqlite3_step(pOutputIdx); |
| 15312 *piIdx = sqlite3_column_int(pOutputIdx, 0); |
| 15313 rc = sqlite3_reset(pOutputIdx); |
| 15314 } |
| 15315 |
| 15316 return rc; |
| 15317 } |
| 15318 |
| 15319 /* |
| 15320 ** Allocate an appendable output segment on absolute level iAbsLevel+1 |
| 15321 ** with idx value iIdx. |
| 15322 ** |
| 15323 ** In the %_segdir table, a segment is defined by the values in three |
| 15324 ** columns: |
| 15325 ** |
| 15326 ** start_block |
| 15327 ** leaves_end_block |
| 15328 ** end_block |
| 15329 ** |
| 15330 ** When an appendable segment is allocated, it is estimated that the |
| 15331 ** maximum number of leaf blocks that may be required is the sum of the |
| 15332 ** number of leaf blocks consumed by the input segments, plus the number |
| 15333 ** of input segments, multiplied by two. This value is stored in stack |
| 15334 ** variable nLeafEst. |
| 15335 ** |
| 15336 ** A total of 16*nLeafEst blocks are allocated when an appendable segment |
| 15337 ** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous |
| 15338 ** array of leaf nodes starts at the first block allocated. The array |
| 15339 ** of interior nodes that are parents of the leaf nodes start at block |
| 15340 ** (start_block + (1 + end_block - start_block) / 16). And so on. |
| 15341 ** |
| 15342 ** In the actual code below, the value "16" is replaced with the |
| 15343 ** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. |
| 15344 */ |
| 15345 static int fts3IncrmergeWriter( |
| 15346 Fts3Table *p, /* Fts3 table handle */ |
| 15347 sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ |
| 15348 int iIdx, /* Index of new output segment */ |
| 15349 Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ |
| 15350 IncrmergeWriter *pWriter /* Populate this object */ |
| 15351 ){ |
| 15352 int rc; /* Return Code */ |
| 15353 int i; /* Iterator variable */ |
| 15354 int nLeafEst = 0; /* Blocks allocated for leaf nodes */ |
| 15355 sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ |
| 15356 sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ |
| 15357 |
| 15358 /* Calculate nLeafEst. */ |
| 15359 rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); |
| 15360 if( rc==SQLITE_OK ){ |
| 15361 sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); |
| 15362 sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); |
| 15363 if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ |
| 15364 nLeafEst = sqlite3_column_int(pLeafEst, 0); |
| 15365 } |
| 15366 rc = sqlite3_reset(pLeafEst); |
| 15367 } |
| 15368 if( rc!=SQLITE_OK ) return rc; |
| 15369 |
| 15370 /* Calculate the first block to use in the output segment */ |
| 15371 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); |
| 15372 if( rc==SQLITE_OK ){ |
| 15373 if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ |
| 15374 pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); |
| 15375 pWriter->iEnd = pWriter->iStart - 1; |
| 15376 pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; |
| 15377 } |
| 15378 rc = sqlite3_reset(pFirstBlock); |
| 15379 } |
| 15380 if( rc!=SQLITE_OK ) return rc; |
| 15381 |
| 15382 /* Insert the marker in the %_segments table to make sure nobody tries |
| 15383 ** to steal the space just allocated. This is also used to identify |
| 15384 ** appendable segments. */ |
| 15385 rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); |
| 15386 if( rc!=SQLITE_OK ) return rc; |
| 15387 |
| 15388 pWriter->iAbsLevel = iAbsLevel; |
| 15389 pWriter->nLeafEst = nLeafEst; |
| 15390 pWriter->iIdx = iIdx; |
| 15391 |
| 15392 /* Set up the array of NodeWriter objects */ |
| 15393 for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ |
| 15394 pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; |
| 15395 } |
| 15396 return SQLITE_OK; |
| 15397 } |
| 15398 |
| 15399 /* |
| 15400 ** Remove an entry from the %_segdir table. This involves running the |
| 15401 ** following two statements: |
| 15402 ** |
| 15403 ** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx |
| 15404 ** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx |
| 15405 ** |
| 15406 ** The DELETE statement removes the specific %_segdir level. The UPDATE |
| 15407 ** statement ensures that the remaining segments have contiguously allocated |
| 15408 ** idx values. |
| 15409 */ |
| 15410 static int fts3RemoveSegdirEntry( |
| 15411 Fts3Table *p, /* FTS3 table handle */ |
| 15412 sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ |
| 15413 int iIdx /* Index of %_segdir entry to delete */ |
| 15414 ){ |
| 15415 int rc; /* Return code */ |
| 15416 sqlite3_stmt *pDelete = 0; /* DELETE statement */ |
| 15417 |
| 15418 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); |
| 15419 if( rc==SQLITE_OK ){ |
| 15420 sqlite3_bind_int64(pDelete, 1, iAbsLevel); |
| 15421 sqlite3_bind_int(pDelete, 2, iIdx); |
| 15422 sqlite3_step(pDelete); |
| 15423 rc = sqlite3_reset(pDelete); |
| 15424 } |
| 15425 |
| 15426 return rc; |
| 15427 } |
| 15428 |
| 15429 /* |
| 15430 ** One or more segments have just been removed from absolute level iAbsLevel. |
| 15431 ** Update the 'idx' values of the remaining segments in the level so that |
| 15432 ** the idx values are a contiguous sequence starting from 0. |
| 15433 */ |
| 15434 static int fts3RepackSegdirLevel( |
| 15435 Fts3Table *p, /* FTS3 table handle */ |
| 15436 sqlite3_int64 iAbsLevel /* Absolute level to repack */ |
| 15437 ){ |
| 15438 int rc; /* Return code */ |
| 15439 int *aIdx = 0; /* Array of remaining idx values */ |
| 15440 int nIdx = 0; /* Valid entries in aIdx[] */ |
| 15441 int nAlloc = 0; /* Allocated size of aIdx[] */ |
| 15442 int i; /* Iterator variable */ |
| 15443 sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ |
| 15444 sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ |
| 15445 |
| 15446 rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); |
| 15447 if( rc==SQLITE_OK ){ |
| 15448 int rc2; |
| 15449 sqlite3_bind_int64(pSelect, 1, iAbsLevel); |
| 15450 while( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 15451 if( nIdx>=nAlloc ){ |
| 15452 int *aNew; |
| 15453 nAlloc += 16; |
| 15454 aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); |
| 15455 if( !aNew ){ |
| 15456 rc = SQLITE_NOMEM; |
| 15457 break; |
| 15458 } |
| 15459 aIdx = aNew; |
| 15460 } |
| 15461 aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); |
| 15462 } |
| 15463 rc2 = sqlite3_reset(pSelect); |
| 15464 if( rc==SQLITE_OK ) rc = rc2; |
| 15465 } |
| 15466 |
| 15467 if( rc==SQLITE_OK ){ |
| 15468 rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); |
| 15469 } |
| 15470 if( rc==SQLITE_OK ){ |
| 15471 sqlite3_bind_int64(pUpdate, 2, iAbsLevel); |
| 15472 } |
| 15473 |
| 15474 assert( p->bIgnoreSavepoint==0 ); |
| 15475 p->bIgnoreSavepoint = 1; |
| 15476 for(i=0; rc==SQLITE_OK && i<nIdx; i++){ |
| 15477 if( aIdx[i]!=i ){ |
| 15478 sqlite3_bind_int(pUpdate, 3, aIdx[i]); |
| 15479 sqlite3_bind_int(pUpdate, 1, i); |
| 15480 sqlite3_step(pUpdate); |
| 15481 rc = sqlite3_reset(pUpdate); |
| 15482 } |
| 15483 } |
| 15484 p->bIgnoreSavepoint = 0; |
| 15485 |
| 15486 sqlite3_free(aIdx); |
| 15487 return rc; |
| 15488 } |
| 15489 |
| 15490 static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ |
| 15491 pNode->a[0] = (char)iHeight; |
| 15492 if( iChild ){ |
| 15493 assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); |
| 15494 pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); |
| 15495 }else{ |
| 15496 assert( pNode->nAlloc>=1 ); |
| 15497 pNode->n = 1; |
| 15498 } |
| 15499 } |
| 15500 |
| 15501 /* |
| 15502 ** The first two arguments are a pointer to and the size of a segment b-tree |
| 15503 ** node. The node may be a leaf or an internal node. |
| 15504 ** |
| 15505 ** This function creates a new node image in blob object *pNew by copying |
| 15506 ** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) |
| 15507 ** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. |
| 15508 */ |
| 15509 static int fts3TruncateNode( |
| 15510 const char *aNode, /* Current node image */ |
| 15511 int nNode, /* Size of aNode in bytes */ |
| 15512 Blob *pNew, /* OUT: Write new node image here */ |
| 15513 const char *zTerm, /* Omit all terms smaller than this */ |
| 15514 int nTerm, /* Size of zTerm in bytes */ |
| 15515 sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ |
| 15516 ){ |
| 15517 NodeReader reader; /* Reader object */ |
| 15518 Blob prev = {0, 0, 0}; /* Previous term written to new node */ |
| 15519 int rc = SQLITE_OK; /* Return code */ |
| 15520 int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ |
| 15521 |
| 15522 /* Allocate required output space */ |
| 15523 blobGrowBuffer(pNew, nNode, &rc); |
| 15524 if( rc!=SQLITE_OK ) return rc; |
| 15525 pNew->n = 0; |
| 15526 |
| 15527 /* Populate new node buffer */ |
| 15528 for(rc = nodeReaderInit(&reader, aNode, nNode); |
| 15529 rc==SQLITE_OK && reader.aNode; |
| 15530 rc = nodeReaderNext(&reader) |
| 15531 ){ |
| 15532 if( pNew->n==0 ){ |
| 15533 int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); |
| 15534 if( res<0 || (bLeaf==0 && res==0) ) continue; |
| 15535 fts3StartNode(pNew, (int)aNode[0], reader.iChild); |
| 15536 *piBlock = reader.iChild; |
| 15537 } |
| 15538 rc = fts3AppendToNode( |
| 15539 pNew, &prev, reader.term.a, reader.term.n, |
| 15540 reader.aDoclist, reader.nDoclist |
| 15541 ); |
| 15542 if( rc!=SQLITE_OK ) break; |
| 15543 } |
| 15544 if( pNew->n==0 ){ |
| 15545 fts3StartNode(pNew, (int)aNode[0], reader.iChild); |
| 15546 *piBlock = reader.iChild; |
| 15547 } |
| 15548 assert( pNew->n<=pNew->nAlloc ); |
| 15549 |
| 15550 nodeReaderRelease(&reader); |
| 15551 sqlite3_free(prev.a); |
| 15552 return rc; |
| 15553 } |
| 15554 |
| 15555 /* |
| 15556 ** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute |
| 15557 ** level iAbsLevel. This may involve deleting entries from the %_segments |
| 15558 ** table, and modifying existing entries in both the %_segments and %_segdir |
| 15559 ** tables. |
| 15560 ** |
| 15561 ** SQLITE_OK is returned if the segment is updated successfully. Or an |
| 15562 ** SQLite error code otherwise. |
| 15563 */ |
| 15564 static int fts3TruncateSegment( |
| 15565 Fts3Table *p, /* FTS3 table handle */ |
| 15566 sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ |
| 15567 int iIdx, /* Index within level of segment to modify */ |
| 15568 const char *zTerm, /* Remove terms smaller than this */ |
| 15569 int nTerm /* Number of bytes in buffer zTerm */ |
| 15570 ){ |
| 15571 int rc = SQLITE_OK; /* Return code */ |
| 15572 Blob root = {0,0,0}; /* New root page image */ |
| 15573 Blob block = {0,0,0}; /* Buffer used for any other block */ |
| 15574 sqlite3_int64 iBlock = 0; /* Block id */ |
| 15575 sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ |
| 15576 sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ |
| 15577 sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ |
| 15578 |
| 15579 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); |
| 15580 if( rc==SQLITE_OK ){ |
| 15581 int rc2; /* sqlite3_reset() return code */ |
| 15582 sqlite3_bind_int64(pFetch, 1, iAbsLevel); |
| 15583 sqlite3_bind_int(pFetch, 2, iIdx); |
| 15584 if( SQLITE_ROW==sqlite3_step(pFetch) ){ |
| 15585 const char *aRoot = sqlite3_column_blob(pFetch, 4); |
| 15586 int nRoot = sqlite3_column_bytes(pFetch, 4); |
| 15587 iOldStart = sqlite3_column_int64(pFetch, 1); |
| 15588 rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); |
| 15589 } |
| 15590 rc2 = sqlite3_reset(pFetch); |
| 15591 if( rc==SQLITE_OK ) rc = rc2; |
| 15592 } |
| 15593 |
| 15594 while( rc==SQLITE_OK && iBlock ){ |
| 15595 char *aBlock = 0; |
| 15596 int nBlock = 0; |
| 15597 iNewStart = iBlock; |
| 15598 |
| 15599 rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); |
| 15600 if( rc==SQLITE_OK ){ |
| 15601 rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); |
| 15602 } |
| 15603 if( rc==SQLITE_OK ){ |
| 15604 rc = fts3WriteSegment(p, iNewStart, block.a, block.n); |
| 15605 } |
| 15606 sqlite3_free(aBlock); |
| 15607 } |
| 15608 |
| 15609 /* Variable iNewStart now contains the first valid leaf node. */ |
| 15610 if( rc==SQLITE_OK && iNewStart ){ |
| 15611 sqlite3_stmt *pDel = 0; |
| 15612 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); |
| 15613 if( rc==SQLITE_OK ){ |
| 15614 sqlite3_bind_int64(pDel, 1, iOldStart); |
| 15615 sqlite3_bind_int64(pDel, 2, iNewStart-1); |
| 15616 sqlite3_step(pDel); |
| 15617 rc = sqlite3_reset(pDel); |
| 15618 } |
| 15619 } |
| 15620 |
| 15621 if( rc==SQLITE_OK ){ |
| 15622 sqlite3_stmt *pChomp = 0; |
| 15623 rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); |
| 15624 if( rc==SQLITE_OK ){ |
| 15625 sqlite3_bind_int64(pChomp, 1, iNewStart); |
| 15626 sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); |
| 15627 sqlite3_bind_int64(pChomp, 3, iAbsLevel); |
| 15628 sqlite3_bind_int(pChomp, 4, iIdx); |
| 15629 sqlite3_step(pChomp); |
| 15630 rc = sqlite3_reset(pChomp); |
| 15631 } |
| 15632 } |
| 15633 |
| 15634 sqlite3_free(root.a); |
| 15635 sqlite3_free(block.a); |
| 15636 return rc; |
| 15637 } |
| 15638 |
| 15639 /* |
| 15640 ** This function is called after an incrmental-merge operation has run to |
| 15641 ** merge (or partially merge) two or more segments from absolute level |
| 15642 ** iAbsLevel. |
| 15643 ** |
| 15644 ** Each input segment is either removed from the db completely (if all of |
| 15645 ** its data was copied to the output segment by the incrmerge operation) |
| 15646 ** or modified in place so that it no longer contains those entries that |
| 15647 ** have been duplicated in the output segment. |
| 15648 */ |
| 15649 static int fts3IncrmergeChomp( |
| 15650 Fts3Table *p, /* FTS table handle */ |
| 15651 sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ |
| 15652 Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ |
| 15653 int *pnRem /* Number of segments not deleted */ |
| 15654 ){ |
| 15655 int i; |
| 15656 int nRem = 0; |
| 15657 int rc = SQLITE_OK; |
| 15658 |
| 15659 for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ |
| 15660 Fts3SegReader *pSeg = 0; |
| 15661 int j; |
| 15662 |
| 15663 /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding |
| 15664 ** somewhere in the pCsr->apSegment[] array. */ |
| 15665 for(j=0; ALWAYS(j<pCsr->nSegment); j++){ |
| 15666 pSeg = pCsr->apSegment[j]; |
| 15667 if( pSeg->iIdx==i ) break; |
| 15668 } |
| 15669 assert( j<pCsr->nSegment && pSeg->iIdx==i ); |
| 15670 |
| 15671 if( pSeg->aNode==0 ){ |
| 15672 /* Seg-reader is at EOF. Remove the entire input segment. */ |
| 15673 rc = fts3DeleteSegment(p, pSeg); |
| 15674 if( rc==SQLITE_OK ){ |
| 15675 rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); |
| 15676 } |
| 15677 *pnRem = 0; |
| 15678 }else{ |
| 15679 /* The incremental merge did not copy all the data from this |
| 15680 ** segment to the upper level. The segment is modified in place |
| 15681 ** so that it contains no keys smaller than zTerm/nTerm. */ |
| 15682 const char *zTerm = pSeg->zTerm; |
| 15683 int nTerm = pSeg->nTerm; |
| 15684 rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); |
| 15685 nRem++; |
| 15686 } |
| 15687 } |
| 15688 |
| 15689 if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ |
| 15690 rc = fts3RepackSegdirLevel(p, iAbsLevel); |
| 15691 } |
| 15692 |
| 15693 *pnRem = nRem; |
| 15694 return rc; |
| 15695 } |
| 15696 |
| 15697 /* |
| 15698 ** Store an incr-merge hint in the database. |
| 15699 */ |
| 15700 static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ |
| 15701 sqlite3_stmt *pReplace = 0; |
| 15702 int rc; /* Return code */ |
| 15703 |
| 15704 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); |
| 15705 if( rc==SQLITE_OK ){ |
| 15706 sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); |
| 15707 sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); |
| 15708 sqlite3_step(pReplace); |
| 15709 rc = sqlite3_reset(pReplace); |
| 15710 } |
| 15711 |
| 15712 return rc; |
| 15713 } |
| 15714 |
| 15715 /* |
| 15716 ** Load an incr-merge hint from the database. The incr-merge hint, if one |
| 15717 ** exists, is stored in the rowid==1 row of the %_stat table. |
| 15718 ** |
| 15719 ** If successful, populate blob *pHint with the value read from the %_stat |
| 15720 ** table and return SQLITE_OK. Otherwise, if an error occurs, return an |
| 15721 ** SQLite error code. |
| 15722 */ |
| 15723 static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ |
| 15724 sqlite3_stmt *pSelect = 0; |
| 15725 int rc; |
| 15726 |
| 15727 pHint->n = 0; |
| 15728 rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); |
| 15729 if( rc==SQLITE_OK ){ |
| 15730 int rc2; |
| 15731 sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); |
| 15732 if( SQLITE_ROW==sqlite3_step(pSelect) ){ |
| 15733 const char *aHint = sqlite3_column_blob(pSelect, 0); |
| 15734 int nHint = sqlite3_column_bytes(pSelect, 0); |
| 15735 if( aHint ){ |
| 15736 blobGrowBuffer(pHint, nHint, &rc); |
| 15737 if( rc==SQLITE_OK ){ |
| 15738 memcpy(pHint->a, aHint, nHint); |
| 15739 pHint->n = nHint; |
| 15740 } |
| 15741 } |
| 15742 } |
| 15743 rc2 = sqlite3_reset(pSelect); |
| 15744 if( rc==SQLITE_OK ) rc = rc2; |
| 15745 } |
| 15746 |
| 15747 return rc; |
| 15748 } |
| 15749 |
| 15750 /* |
| 15751 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op. |
| 15752 ** Otherwise, append an entry to the hint stored in blob *pHint. Each entry |
| 15753 ** consists of two varints, the absolute level number of the input segments |
| 15754 ** and the number of input segments. |
| 15755 ** |
| 15756 ** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, |
| 15757 ** set *pRc to an SQLite error code before returning. |
| 15758 */ |
| 15759 static void fts3IncrmergeHintPush( |
| 15760 Blob *pHint, /* Hint blob to append to */ |
| 15761 i64 iAbsLevel, /* First varint to store in hint */ |
| 15762 int nInput, /* Second varint to store in hint */ |
| 15763 int *pRc /* IN/OUT: Error code */ |
| 15764 ){ |
| 15765 blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); |
| 15766 if( *pRc==SQLITE_OK ){ |
| 15767 pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); |
| 15768 pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); |
| 15769 } |
| 15770 } |
| 15771 |
| 15772 /* |
| 15773 ** Read the last entry (most recently pushed) from the hint blob *pHint |
| 15774 ** and then remove the entry. Write the two values read to *piAbsLevel and |
| 15775 ** *pnInput before returning. |
| 15776 ** |
| 15777 ** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does |
| 15778 ** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. |
| 15779 */ |
| 15780 static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ |
| 15781 const int nHint = pHint->n; |
| 15782 int i; |
| 15783 |
| 15784 i = pHint->n-2; |
| 15785 while( i>0 && (pHint->a[i-1] & 0x80) ) i--; |
| 15786 while( i>0 && (pHint->a[i-1] & 0x80) ) i--; |
| 15787 |
| 15788 pHint->n = i; |
| 15789 i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); |
| 15790 i += fts3GetVarint32(&pHint->a[i], pnInput); |
| 15791 if( i!=nHint ) return FTS_CORRUPT_VTAB; |
| 15792 |
| 15793 return SQLITE_OK; |
| 15794 } |
| 15795 |
| 15796 |
| 15797 /* |
| 15798 ** Attempt an incremental merge that writes nMerge leaf blocks. |
| 15799 ** |
| 15800 ** Incremental merges happen nMin segments at a time. The segments |
| 15801 ** to be merged are the nMin oldest segments (the ones with the smallest |
| 15802 ** values for the _segdir.idx field) in the highest level that contains |
| 15803 ** at least nMin segments. Multiple merges might occur in an attempt to |
| 15804 ** write the quota of nMerge leaf blocks. |
| 15805 */ |
| 15806 SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ |
| 15807 int rc; /* Return code */ |
| 15808 int nRem = nMerge; /* Number of leaf pages yet to be written */ |
| 15809 Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ |
| 15810 Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ |
| 15811 IncrmergeWriter *pWriter; /* Writer object */ |
| 15812 int nSeg = 0; /* Number of input segments */ |
| 15813 sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ |
| 15814 Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ |
| 15815 int bDirtyHint = 0; /* True if blob 'hint' has been modified */ |
| 15816 |
| 15817 /* Allocate space for the cursor, filter and writer objects */ |
| 15818 const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); |
| 15819 pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); |
| 15820 if( !pWriter ) return SQLITE_NOMEM; |
| 15821 pFilter = (Fts3SegFilter *)&pWriter[1]; |
| 15822 pCsr = (Fts3MultiSegReader *)&pFilter[1]; |
| 15823 |
| 15824 rc = fts3IncrmergeHintLoad(p, &hint); |
| 15825 while( rc==SQLITE_OK && nRem>0 ){ |
| 15826 const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; |
| 15827 sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ |
| 15828 int bUseHint = 0; /* True if attempting to append */ |
| 15829 int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ |
| 15830 |
| 15831 /* Search the %_segdir table for the absolute level with the smallest |
| 15832 ** relative level number that contains at least nMin segments, if any. |
| 15833 ** If one is found, set iAbsLevel to the absolute level number and |
| 15834 ** nSeg to nMin. If no level with at least nMin segments can be found, |
| 15835 ** set nSeg to -1. |
| 15836 */ |
| 15837 rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); |
| 15838 sqlite3_bind_int(pFindLevel, 1, MAX(2, nMin)); |
| 15839 if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ |
| 15840 iAbsLevel = sqlite3_column_int64(pFindLevel, 0); |
| 15841 nSeg = sqlite3_column_int(pFindLevel, 1); |
| 15842 assert( nSeg>=2 ); |
| 15843 }else{ |
| 15844 nSeg = -1; |
| 15845 } |
| 15846 rc = sqlite3_reset(pFindLevel); |
| 15847 |
| 15848 /* If the hint read from the %_stat table is not empty, check if the |
| 15849 ** last entry in it specifies a relative level smaller than or equal |
| 15850 ** to the level identified by the block above (if any). If so, this |
| 15851 ** iteration of the loop will work on merging at the hinted level. |
| 15852 */ |
| 15853 if( rc==SQLITE_OK && hint.n ){ |
| 15854 int nHint = hint.n; |
| 15855 sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ |
| 15856 int nHintSeg = 0; /* Hint number of segments */ |
| 15857 |
| 15858 rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); |
| 15859 if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ |
| 15860 iAbsLevel = iHintAbsLevel; |
| 15861 nSeg = nHintSeg; |
| 15862 bUseHint = 1; |
| 15863 bDirtyHint = 1; |
| 15864 }else{ |
| 15865 /* This undoes the effect of the HintPop() above - so that no entry |
| 15866 ** is removed from the hint blob. */ |
| 15867 hint.n = nHint; |
| 15868 } |
| 15869 } |
| 15870 |
| 15871 /* If nSeg is less that zero, then there is no level with at least |
| 15872 ** nMin segments and no hint in the %_stat table. No work to do. |
| 15873 ** Exit early in this case. */ |
| 15874 if( nSeg<0 ) break; |
| 15875 |
| 15876 /* Open a cursor to iterate through the contents of the oldest nSeg |
| 15877 ** indexes of absolute level iAbsLevel. If this cursor is opened using |
| 15878 ** the 'hint' parameters, it is possible that there are less than nSeg |
| 15879 ** segments available in level iAbsLevel. In this case, no work is |
| 15880 ** done on iAbsLevel - fall through to the next iteration of the loop |
| 15881 ** to start work on some other level. */ |
| 15882 memset(pWriter, 0, nAlloc); |
| 15883 pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; |
| 15884 |
| 15885 if( rc==SQLITE_OK ){ |
| 15886 rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); |
| 15887 assert( bUseHint==1 || bUseHint==0 ); |
| 15888 if( iIdx==0 || (bUseHint && iIdx==1) ){ |
| 15889 int bIgnore = 0; |
| 15890 rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); |
| 15891 if( bIgnore ){ |
| 15892 pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; |
| 15893 } |
| 15894 } |
| 15895 } |
| 15896 |
| 15897 if( rc==SQLITE_OK ){ |
| 15898 rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); |
| 15899 } |
| 15900 if( SQLITE_OK==rc && pCsr->nSegment==nSeg |
| 15901 && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) |
| 15902 && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) |
| 15903 ){ |
| 15904 if( bUseHint && iIdx>0 ){ |
| 15905 const char *zKey = pCsr->zTerm; |
| 15906 int nKey = pCsr->nTerm; |
| 15907 rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); |
| 15908 }else{ |
| 15909 rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); |
| 15910 } |
| 15911 |
| 15912 if( rc==SQLITE_OK && pWriter->nLeafEst ){ |
| 15913 fts3LogMerge(nSeg, iAbsLevel); |
| 15914 do { |
| 15915 rc = fts3IncrmergeAppend(p, pWriter, pCsr); |
| 15916 if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); |
| 15917 if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; |
| 15918 }while( rc==SQLITE_ROW ); |
| 15919 |
| 15920 /* Update or delete the input segments */ |
| 15921 if( rc==SQLITE_OK ){ |
| 15922 nRem -= (1 + pWriter->nWork); |
| 15923 rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); |
| 15924 if( nSeg!=0 ){ |
| 15925 bDirtyHint = 1; |
| 15926 fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); |
| 15927 } |
| 15928 } |
| 15929 } |
| 15930 |
| 15931 if( nSeg!=0 ){ |
| 15932 pWriter->nLeafData = pWriter->nLeafData * -1; |
| 15933 } |
| 15934 fts3IncrmergeRelease(p, pWriter, &rc); |
| 15935 if( nSeg==0 && pWriter->bNoLeafData==0 ){ |
| 15936 fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); |
| 15937 } |
| 15938 } |
| 15939 |
| 15940 sqlite3Fts3SegReaderFinish(pCsr); |
| 15941 } |
| 15942 |
| 15943 /* Write the hint values into the %_stat table for the next incr-merger */ |
| 15944 if( bDirtyHint && rc==SQLITE_OK ){ |
| 15945 rc = fts3IncrmergeHintStore(p, &hint); |
| 15946 } |
| 15947 |
| 15948 sqlite3_free(pWriter); |
| 15949 sqlite3_free(hint.a); |
| 15950 return rc; |
| 15951 } |
| 15952 |
| 15953 /* |
| 15954 ** Convert the text beginning at *pz into an integer and return |
| 15955 ** its value. Advance *pz to point to the first character past |
| 15956 ** the integer. |
| 15957 */ |
| 15958 static int fts3Getint(const char **pz){ |
| 15959 const char *z = *pz; |
| 15960 int i = 0; |
| 15961 while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; |
| 15962 *pz = z; |
| 15963 return i; |
| 15964 } |
| 15965 |
| 15966 /* |
| 15967 ** Process statements of the form: |
| 15968 ** |
| 15969 ** INSERT INTO table(table) VALUES('merge=A,B'); |
| 15970 ** |
| 15971 ** A and B are integers that decode to be the number of leaf pages |
| 15972 ** written for the merge, and the minimum number of segments on a level |
| 15973 ** before it will be selected for a merge, respectively. |
| 15974 */ |
| 15975 static int fts3DoIncrmerge( |
| 15976 Fts3Table *p, /* FTS3 table handle */ |
| 15977 const char *zParam /* Nul-terminated string containing "A,B" */ |
| 15978 ){ |
| 15979 int rc; |
| 15980 int nMin = (FTS3_MERGE_COUNT / 2); |
| 15981 int nMerge = 0; |
| 15982 const char *z = zParam; |
| 15983 |
| 15984 /* Read the first integer value */ |
| 15985 nMerge = fts3Getint(&z); |
| 15986 |
| 15987 /* If the first integer value is followed by a ',', read the second |
| 15988 ** integer value. */ |
| 15989 if( z[0]==',' && z[1]!='\0' ){ |
| 15990 z++; |
| 15991 nMin = fts3Getint(&z); |
| 15992 } |
| 15993 |
| 15994 if( z[0]!='\0' || nMin<2 ){ |
| 15995 rc = SQLITE_ERROR; |
| 15996 }else{ |
| 15997 rc = SQLITE_OK; |
| 15998 if( !p->bHasStat ){ |
| 15999 assert( p->bFts4==0 ); |
| 16000 sqlite3Fts3CreateStatTable(&rc, p); |
| 16001 } |
| 16002 if( rc==SQLITE_OK ){ |
| 16003 rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); |
| 16004 } |
| 16005 sqlite3Fts3SegmentsClose(p); |
| 16006 } |
| 16007 return rc; |
| 16008 } |
| 16009 |
| 16010 /* |
| 16011 ** Process statements of the form: |
| 16012 ** |
| 16013 ** INSERT INTO table(table) VALUES('automerge=X'); |
| 16014 ** |
| 16015 ** where X is an integer. X==0 means to turn automerge off. X!=0 means |
| 16016 ** turn it on. The setting is persistent. |
| 16017 */ |
| 16018 static int fts3DoAutoincrmerge( |
| 16019 Fts3Table *p, /* FTS3 table handle */ |
| 16020 const char *zParam /* Nul-terminated string containing boolean */ |
| 16021 ){ |
| 16022 int rc = SQLITE_OK; |
| 16023 sqlite3_stmt *pStmt = 0; |
| 16024 p->nAutoincrmerge = fts3Getint(&zParam); |
| 16025 if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ |
| 16026 p->nAutoincrmerge = 8; |
| 16027 } |
| 16028 if( !p->bHasStat ){ |
| 16029 assert( p->bFts4==0 ); |
| 16030 sqlite3Fts3CreateStatTable(&rc, p); |
| 16031 if( rc ) return rc; |
| 16032 } |
| 16033 rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); |
| 16034 if( rc ) return rc; |
| 16035 sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); |
| 16036 sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); |
| 16037 sqlite3_step(pStmt); |
| 16038 rc = sqlite3_reset(pStmt); |
| 16039 return rc; |
| 16040 } |
| 16041 |
| 16042 /* |
| 16043 ** Return a 64-bit checksum for the FTS index entry specified by the |
| 16044 ** arguments to this function. |
| 16045 */ |
| 16046 static u64 fts3ChecksumEntry( |
| 16047 const char *zTerm, /* Pointer to buffer containing term */ |
| 16048 int nTerm, /* Size of zTerm in bytes */ |
| 16049 int iLangid, /* Language id for current row */ |
| 16050 int iIndex, /* Index (0..Fts3Table.nIndex-1) */ |
| 16051 i64 iDocid, /* Docid for current row. */ |
| 16052 int iCol, /* Column number */ |
| 16053 int iPos /* Position */ |
| 16054 ){ |
| 16055 int i; |
| 16056 u64 ret = (u64)iDocid; |
| 16057 |
| 16058 ret += (ret<<3) + iLangid; |
| 16059 ret += (ret<<3) + iIndex; |
| 16060 ret += (ret<<3) + iCol; |
| 16061 ret += (ret<<3) + iPos; |
| 16062 for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i]; |
| 16063 |
| 16064 return ret; |
| 16065 } |
| 16066 |
| 16067 /* |
| 16068 ** Return a checksum of all entries in the FTS index that correspond to |
| 16069 ** language id iLangid. The checksum is calculated by XORing the checksums |
| 16070 ** of each individual entry (see fts3ChecksumEntry()) together. |
| 16071 ** |
| 16072 ** If successful, the checksum value is returned and *pRc set to SQLITE_OK. |
| 16073 ** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The |
| 16074 ** return value is undefined in this case. |
| 16075 */ |
| 16076 static u64 fts3ChecksumIndex( |
| 16077 Fts3Table *p, /* FTS3 table handle */ |
| 16078 int iLangid, /* Language id to return cksum for */ |
| 16079 int iIndex, /* Index to cksum (0..p->nIndex-1) */ |
| 16080 int *pRc /* OUT: Return code */ |
| 16081 ){ |
| 16082 Fts3SegFilter filter; |
| 16083 Fts3MultiSegReader csr; |
| 16084 int rc; |
| 16085 u64 cksum = 0; |
| 16086 |
| 16087 assert( *pRc==SQLITE_OK ); |
| 16088 |
| 16089 memset(&filter, 0, sizeof(filter)); |
| 16090 memset(&csr, 0, sizeof(csr)); |
| 16091 filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; |
| 16092 filter.flags |= FTS3_SEGMENT_SCAN; |
| 16093 |
| 16094 rc = sqlite3Fts3SegReaderCursor( |
| 16095 p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr |
| 16096 ); |
| 16097 if( rc==SQLITE_OK ){ |
| 16098 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); |
| 16099 } |
| 16100 |
| 16101 if( rc==SQLITE_OK ){ |
| 16102 while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ |
| 16103 char *pCsr = csr.aDoclist; |
| 16104 char *pEnd = &pCsr[csr.nDoclist]; |
| 16105 |
| 16106 i64 iDocid = 0; |
| 16107 i64 iCol = 0; |
| 16108 i64 iPos = 0; |
| 16109 |
| 16110 pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); |
| 16111 while( pCsr<pEnd ){ |
| 16112 i64 iVal = 0; |
| 16113 pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); |
| 16114 if( pCsr<pEnd ){ |
| 16115 if( iVal==0 || iVal==1 ){ |
| 16116 iCol = 0; |
| 16117 iPos = 0; |
| 16118 if( iVal ){ |
| 16119 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); |
| 16120 }else{ |
| 16121 pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); |
| 16122 iDocid += iVal; |
| 16123 } |
| 16124 }else{ |
| 16125 iPos += (iVal - 2); |
| 16126 cksum = cksum ^ fts3ChecksumEntry( |
| 16127 csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, |
| 16128 (int)iCol, (int)iPos |
| 16129 ); |
| 16130 } |
| 16131 } |
| 16132 } |
| 16133 } |
| 16134 } |
| 16135 sqlite3Fts3SegReaderFinish(&csr); |
| 16136 |
| 16137 *pRc = rc; |
| 16138 return cksum; |
| 16139 } |
| 16140 |
| 16141 /* |
| 16142 ** Check if the contents of the FTS index match the current contents of the |
| 16143 ** content table. If no error occurs and the contents do match, set *pbOk |
| 16144 ** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk |
| 16145 ** to false before returning. |
| 16146 ** |
| 16147 ** If an error occurs (e.g. an OOM or IO error), return an SQLite error |
| 16148 ** code. The final value of *pbOk is undefined in this case. |
| 16149 */ |
| 16150 static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ |
| 16151 int rc = SQLITE_OK; /* Return code */ |
| 16152 u64 cksum1 = 0; /* Checksum based on FTS index contents */ |
| 16153 u64 cksum2 = 0; /* Checksum based on %_content contents */ |
| 16154 sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ |
| 16155 |
| 16156 /* This block calculates the checksum according to the FTS index. */ |
| 16157 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); |
| 16158 if( rc==SQLITE_OK ){ |
| 16159 int rc2; |
| 16160 sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); |
| 16161 sqlite3_bind_int(pAllLangid, 2, p->nIndex); |
| 16162 while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ |
| 16163 int iLangid = sqlite3_column_int(pAllLangid, 0); |
| 16164 int i; |
| 16165 for(i=0; i<p->nIndex; i++){ |
| 16166 cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); |
| 16167 } |
| 16168 } |
| 16169 rc2 = sqlite3_reset(pAllLangid); |
| 16170 if( rc==SQLITE_OK ) rc = rc2; |
| 16171 } |
| 16172 |
| 16173 /* This block calculates the checksum according to the %_content table */ |
| 16174 if( rc==SQLITE_OK ){ |
| 16175 sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; |
| 16176 sqlite3_stmt *pStmt = 0; |
| 16177 char *zSql; |
| 16178 |
| 16179 zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); |
| 16180 if( !zSql ){ |
| 16181 rc = SQLITE_NOMEM; |
| 16182 }else{ |
| 16183 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); |
| 16184 sqlite3_free(zSql); |
| 16185 } |
| 16186 |
| 16187 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 16188 i64 iDocid = sqlite3_column_int64(pStmt, 0); |
| 16189 int iLang = langidFromSelect(p, pStmt); |
| 16190 int iCol; |
| 16191 |
| 16192 for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ |
| 16193 if( p->abNotindexed[iCol]==0 ){ |
| 16194 const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); |
| 16195 int nText = sqlite3_column_bytes(pStmt, iCol+1); |
| 16196 sqlite3_tokenizer_cursor *pT = 0; |
| 16197 |
| 16198 rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); |
| 16199 while( rc==SQLITE_OK ){ |
| 16200 char const *zToken; /* Buffer containing token */ |
| 16201 int nToken = 0; /* Number of bytes in token */ |
| 16202 int iDum1 = 0, iDum2 = 0; /* Dummy variables */ |
| 16203 int iPos = 0; /* Position of token in zText */ |
| 16204 |
| 16205 rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); |
| 16206 if( rc==SQLITE_OK ){ |
| 16207 int i; |
| 16208 cksum2 = cksum2 ^ fts3ChecksumEntry( |
| 16209 zToken, nToken, iLang, 0, iDocid, iCol, iPos |
| 16210 ); |
| 16211 for(i=1; i<p->nIndex; i++){ |
| 16212 if( p->aIndex[i].nPrefix<=nToken ){ |
| 16213 cksum2 = cksum2 ^ fts3ChecksumEntry( |
| 16214 zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos |
| 16215 ); |
| 16216 } |
| 16217 } |
| 16218 } |
| 16219 } |
| 16220 if( pT ) pModule->xClose(pT); |
| 16221 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 16222 } |
| 16223 } |
| 16224 } |
| 16225 |
| 16226 sqlite3_finalize(pStmt); |
| 16227 } |
| 16228 |
| 16229 *pbOk = (cksum1==cksum2); |
| 16230 return rc; |
| 16231 } |
| 16232 |
| 16233 /* |
| 16234 ** Run the integrity-check. If no error occurs and the current contents of |
| 16235 ** the FTS index are correct, return SQLITE_OK. Or, if the contents of the |
| 16236 ** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. |
| 16237 ** |
| 16238 ** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite |
| 16239 ** error code. |
| 16240 ** |
| 16241 ** The integrity-check works as follows. For each token and indexed token |
| 16242 ** prefix in the document set, a 64-bit checksum is calculated (by code |
| 16243 ** in fts3ChecksumEntry()) based on the following: |
| 16244 ** |
| 16245 ** + The index number (0 for the main index, 1 for the first prefix |
| 16246 ** index etc.), |
| 16247 ** + The token (or token prefix) text itself, |
| 16248 ** + The language-id of the row it appears in, |
| 16249 ** + The docid of the row it appears in, |
| 16250 ** + The column it appears in, and |
| 16251 ** + The tokens position within that column. |
| 16252 ** |
| 16253 ** The checksums for all entries in the index are XORed together to create |
| 16254 ** a single checksum for the entire index. |
| 16255 ** |
| 16256 ** The integrity-check code calculates the same checksum in two ways: |
| 16257 ** |
| 16258 ** 1. By scanning the contents of the FTS index, and |
| 16259 ** 2. By scanning and tokenizing the content table. |
| 16260 ** |
| 16261 ** If the two checksums are identical, the integrity-check is deemed to have |
| 16262 ** passed. |
| 16263 */ |
| 16264 static int fts3DoIntegrityCheck( |
| 16265 Fts3Table *p /* FTS3 table handle */ |
| 16266 ){ |
| 16267 int rc; |
| 16268 int bOk = 0; |
| 16269 rc = fts3IntegrityCheck(p, &bOk); |
| 16270 if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; |
| 16271 return rc; |
| 16272 } |
| 16273 |
| 16274 /* |
| 16275 ** Handle a 'special' INSERT of the form: |
| 16276 ** |
| 16277 ** "INSERT INTO tbl(tbl) VALUES(<expr>)" |
| 16278 ** |
| 16279 ** Argument pVal contains the result of <expr>. Currently the only |
| 16280 ** meaningful value to insert is the text 'optimize'. |
| 16281 */ |
| 16282 static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ |
| 16283 int rc; /* Return Code */ |
| 16284 const char *zVal = (const char *)sqlite3_value_text(pVal); |
| 16285 int nVal = sqlite3_value_bytes(pVal); |
| 16286 |
| 16287 if( !zVal ){ |
| 16288 return SQLITE_NOMEM; |
| 16289 }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ |
| 16290 rc = fts3DoOptimize(p, 0); |
| 16291 }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ |
| 16292 rc = fts3DoRebuild(p); |
| 16293 }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ |
| 16294 rc = fts3DoIntegrityCheck(p); |
| 16295 }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ |
| 16296 rc = fts3DoIncrmerge(p, &zVal[6]); |
| 16297 }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ |
| 16298 rc = fts3DoAutoincrmerge(p, &zVal[10]); |
| 16299 #ifdef SQLITE_TEST |
| 16300 }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ |
| 16301 p->nNodeSize = atoi(&zVal[9]); |
| 16302 rc = SQLITE_OK; |
| 16303 }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ |
| 16304 p->nMaxPendingData = atoi(&zVal[11]); |
| 16305 rc = SQLITE_OK; |
| 16306 }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ |
| 16307 p->bNoIncrDoclist = atoi(&zVal[21]); |
| 16308 rc = SQLITE_OK; |
| 16309 #endif |
| 16310 }else{ |
| 16311 rc = SQLITE_ERROR; |
| 16312 } |
| 16313 |
| 16314 return rc; |
| 16315 } |
| 16316 |
| 16317 #ifndef SQLITE_DISABLE_FTS4_DEFERRED |
| 16318 /* |
| 16319 ** Delete all cached deferred doclists. Deferred doclists are cached |
| 16320 ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. |
| 16321 */ |
| 16322 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ |
| 16323 Fts3DeferredToken *pDef; |
| 16324 for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ |
| 16325 fts3PendingListDelete(pDef->pList); |
| 16326 pDef->pList = 0; |
| 16327 } |
| 16328 } |
| 16329 |
| 16330 /* |
| 16331 ** Free all entries in the pCsr->pDeffered list. Entries are added to |
| 16332 ** this list using sqlite3Fts3DeferToken(). |
| 16333 */ |
| 16334 SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ |
| 16335 Fts3DeferredToken *pDef; |
| 16336 Fts3DeferredToken *pNext; |
| 16337 for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ |
| 16338 pNext = pDef->pNext; |
| 16339 fts3PendingListDelete(pDef->pList); |
| 16340 sqlite3_free(pDef); |
| 16341 } |
| 16342 pCsr->pDeferred = 0; |
| 16343 } |
| 16344 |
| 16345 /* |
| 16346 ** Generate deferred-doclists for all tokens in the pCsr->pDeferred list |
| 16347 ** based on the row that pCsr currently points to. |
| 16348 ** |
| 16349 ** A deferred-doclist is like any other doclist with position information |
| 16350 ** included, except that it only contains entries for a single row of the |
| 16351 ** table, not for all rows. |
| 16352 */ |
| 16353 SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ |
| 16354 int rc = SQLITE_OK; /* Return code */ |
| 16355 if( pCsr->pDeferred ){ |
| 16356 int i; /* Used to iterate through table columns */ |
| 16357 sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ |
| 16358 Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ |
| 16359 |
| 16360 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; |
| 16361 sqlite3_tokenizer *pT = p->pTokenizer; |
| 16362 sqlite3_tokenizer_module const *pModule = pT->pModule; |
| 16363 |
| 16364 assert( pCsr->isRequireSeek==0 ); |
| 16365 iDocid = sqlite3_column_int64(pCsr->pStmt, 0); |
| 16366 |
| 16367 for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ |
| 16368 if( p->abNotindexed[i]==0 ){ |
| 16369 const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); |
| 16370 sqlite3_tokenizer_cursor *pTC = 0; |
| 16371 |
| 16372 rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); |
| 16373 while( rc==SQLITE_OK ){ |
| 16374 char const *zToken; /* Buffer containing token */ |
| 16375 int nToken = 0; /* Number of bytes in token */ |
| 16376 int iDum1 = 0, iDum2 = 0; /* Dummy variables */ |
| 16377 int iPos = 0; /* Position of token in zText */ |
| 16378 |
| 16379 rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); |
| 16380 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ |
| 16381 Fts3PhraseToken *pPT = pDef->pToken; |
| 16382 if( (pDef->iCol>=p->nColumn || pDef->iCol==i) |
| 16383 && (pPT->bFirst==0 || iPos==0) |
| 16384 && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) |
| 16385 && (0==memcmp(zToken, pPT->z, pPT->n)) |
| 16386 ){ |
| 16387 fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); |
| 16388 } |
| 16389 } |
| 16390 } |
| 16391 if( pTC ) pModule->xClose(pTC); |
| 16392 if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
| 16393 } |
| 16394 } |
| 16395 |
| 16396 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ |
| 16397 if( pDef->pList ){ |
| 16398 rc = fts3PendingListAppendVarint(&pDef->pList, 0); |
| 16399 } |
| 16400 } |
| 16401 } |
| 16402 |
| 16403 return rc; |
| 16404 } |
| 16405 |
| 16406 SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( |
| 16407 Fts3DeferredToken *p, |
| 16408 char **ppData, |
| 16409 int *pnData |
| 16410 ){ |
| 16411 char *pRet; |
| 16412 int nSkip; |
| 16413 sqlite3_int64 dummy; |
| 16414 |
| 16415 *ppData = 0; |
| 16416 *pnData = 0; |
| 16417 |
| 16418 if( p->pList==0 ){ |
| 16419 return SQLITE_OK; |
| 16420 } |
| 16421 |
| 16422 pRet = (char *)sqlite3_malloc(p->pList->nData); |
| 16423 if( !pRet ) return SQLITE_NOMEM; |
| 16424 |
| 16425 nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); |
| 16426 *pnData = p->pList->nData - nSkip; |
| 16427 *ppData = pRet; |
| 16428 |
| 16429 memcpy(pRet, &p->pList->aData[nSkip], *pnData); |
| 16430 return SQLITE_OK; |
| 16431 } |
| 16432 |
| 16433 /* |
| 16434 ** Add an entry for token pToken to the pCsr->pDeferred list. |
| 16435 */ |
| 16436 SQLITE_PRIVATE int sqlite3Fts3DeferToken( |
| 16437 Fts3Cursor *pCsr, /* Fts3 table cursor */ |
| 16438 Fts3PhraseToken *pToken, /* Token to defer */ |
| 16439 int iCol /* Column that token must appear in (or -1) */ |
| 16440 ){ |
| 16441 Fts3DeferredToken *pDeferred; |
| 16442 pDeferred = sqlite3_malloc(sizeof(*pDeferred)); |
| 16443 if( !pDeferred ){ |
| 16444 return SQLITE_NOMEM; |
| 16445 } |
| 16446 memset(pDeferred, 0, sizeof(*pDeferred)); |
| 16447 pDeferred->pToken = pToken; |
| 16448 pDeferred->pNext = pCsr->pDeferred; |
| 16449 pDeferred->iCol = iCol; |
| 16450 pCsr->pDeferred = pDeferred; |
| 16451 |
| 16452 assert( pToken->pDeferred==0 ); |
| 16453 pToken->pDeferred = pDeferred; |
| 16454 |
| 16455 return SQLITE_OK; |
| 16456 } |
| 16457 #endif |
| 16458 |
| 16459 /* |
| 16460 ** SQLite value pRowid contains the rowid of a row that may or may not be |
| 16461 ** present in the FTS3 table. If it is, delete it and adjust the contents |
| 16462 ** of subsiduary data structures accordingly. |
| 16463 */ |
| 16464 static int fts3DeleteByRowid( |
| 16465 Fts3Table *p, |
| 16466 sqlite3_value *pRowid, |
| 16467 int *pnChng, /* IN/OUT: Decrement if row is deleted */ |
| 16468 u32 *aSzDel |
| 16469 ){ |
| 16470 int rc = SQLITE_OK; /* Return code */ |
| 16471 int bFound = 0; /* True if *pRowid really is in the table */ |
| 16472 |
| 16473 fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); |
| 16474 if( bFound && rc==SQLITE_OK ){ |
| 16475 int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ |
| 16476 rc = fts3IsEmpty(p, pRowid, &isEmpty); |
| 16477 if( rc==SQLITE_OK ){ |
| 16478 if( isEmpty ){ |
| 16479 /* Deleting this row means the whole table is empty. In this case |
| 16480 ** delete the contents of all three tables and throw away any |
| 16481 ** data in the pendingTerms hash table. */ |
| 16482 rc = fts3DeleteAll(p, 1); |
| 16483 *pnChng = 0; |
| 16484 memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); |
| 16485 }else{ |
| 16486 *pnChng = *pnChng - 1; |
| 16487 if( p->zContentTbl==0 ){ |
| 16488 fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); |
| 16489 } |
| 16490 if( p->bHasDocsize ){ |
| 16491 fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); |
| 16492 } |
| 16493 } |
| 16494 } |
| 16495 } |
| 16496 |
| 16497 return rc; |
| 16498 } |
| 16499 |
| 16500 /* |
| 16501 ** This function does the work for the xUpdate method of FTS3 virtual |
| 16502 ** tables. The schema of the virtual table being: |
| 16503 ** |
| 16504 ** CREATE TABLE <table name>( |
| 16505 ** <user columns>, |
| 16506 ** <table name> HIDDEN, |
| 16507 ** docid HIDDEN, |
| 16508 ** <langid> HIDDEN |
| 16509 ** ); |
| 16510 ** |
| 16511 ** |
| 16512 */ |
| 16513 SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( |
| 16514 sqlite3_vtab *pVtab, /* FTS3 vtab object */ |
| 16515 int nArg, /* Size of argument array */ |
| 16516 sqlite3_value **apVal, /* Array of arguments */ |
| 16517 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
| 16518 ){ |
| 16519 Fts3Table *p = (Fts3Table *)pVtab; |
| 16520 int rc = SQLITE_OK; /* Return Code */ |
| 16521 int isRemove = 0; /* True for an UPDATE or DELETE */ |
| 16522 u32 *aSzIns = 0; /* Sizes of inserted documents */ |
| 16523 u32 *aSzDel = 0; /* Sizes of deleted documents */ |
| 16524 int nChng = 0; /* Net change in number of documents */ |
| 16525 int bInsertDone = 0; |
| 16526 |
| 16527 /* At this point it must be known if the %_stat table exists or not. |
| 16528 ** So bHasStat may not be 2. */ |
| 16529 assert( p->bHasStat==0 || p->bHasStat==1 ); |
| 16530 |
| 16531 assert( p->pSegments==0 ); |
| 16532 assert( |
| 16533 nArg==1 /* DELETE operations */ |
| 16534 || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ |
| 16535 ); |
| 16536 |
| 16537 /* Check for a "special" INSERT operation. One of the form: |
| 16538 ** |
| 16539 ** INSERT INTO xyz(xyz) VALUES('command'); |
| 16540 */ |
| 16541 if( nArg>1 |
| 16542 && sqlite3_value_type(apVal[0])==SQLITE_NULL |
| 16543 && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL |
| 16544 ){ |
| 16545 rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); |
| 16546 goto update_out; |
| 16547 } |
| 16548 |
| 16549 if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ |
| 16550 rc = SQLITE_CONSTRAINT; |
| 16551 goto update_out; |
| 16552 } |
| 16553 |
| 16554 /* Allocate space to hold the change in document sizes */ |
| 16555 aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); |
| 16556 if( aSzDel==0 ){ |
| 16557 rc = SQLITE_NOMEM; |
| 16558 goto update_out; |
| 16559 } |
| 16560 aSzIns = &aSzDel[p->nColumn+1]; |
| 16561 memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); |
| 16562 |
| 16563 rc = fts3Writelock(p); |
| 16564 if( rc!=SQLITE_OK ) goto update_out; |
| 16565 |
| 16566 /* If this is an INSERT operation, or an UPDATE that modifies the rowid |
| 16567 ** value, then this operation requires constraint handling. |
| 16568 ** |
| 16569 ** If the on-conflict mode is REPLACE, this means that the existing row |
| 16570 ** should be deleted from the database before inserting the new row. Or, |
| 16571 ** if the on-conflict mode is other than REPLACE, then this method must |
| 16572 ** detect the conflict and return SQLITE_CONSTRAINT before beginning to |
| 16573 ** modify the database file. |
| 16574 */ |
| 16575 if( nArg>1 && p->zContentTbl==0 ){ |
| 16576 /* Find the value object that holds the new rowid value. */ |
| 16577 sqlite3_value *pNewRowid = apVal[3+p->nColumn]; |
| 16578 if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ |
| 16579 pNewRowid = apVal[1]; |
| 16580 } |
| 16581 |
| 16582 if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( |
| 16583 sqlite3_value_type(apVal[0])==SQLITE_NULL |
| 16584 || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) |
| 16585 )){ |
| 16586 /* The new rowid is not NULL (in this case the rowid will be |
| 16587 ** automatically assigned and there is no chance of a conflict), and |
| 16588 ** the statement is either an INSERT or an UPDATE that modifies the |
| 16589 ** rowid column. So if the conflict mode is REPLACE, then delete any |
| 16590 ** existing row with rowid=pNewRowid. |
| 16591 ** |
| 16592 ** Or, if the conflict mode is not REPLACE, insert the new record into |
| 16593 ** the %_content table. If we hit the duplicate rowid constraint (or any |
| 16594 ** other error) while doing so, return immediately. |
| 16595 ** |
| 16596 ** This branch may also run if pNewRowid contains a value that cannot |
| 16597 ** be losslessly converted to an integer. In this case, the eventual |
| 16598 ** call to fts3InsertData() (either just below or further on in this |
| 16599 ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is |
| 16600 ** invoked, it will delete zero rows (since no row will have |
| 16601 ** docid=$pNewRowid if $pNewRowid is not an integer value). |
| 16602 */ |
| 16603 if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ |
| 16604 rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); |
| 16605 }else{ |
| 16606 rc = fts3InsertData(p, apVal, pRowid); |
| 16607 bInsertDone = 1; |
| 16608 } |
| 16609 } |
| 16610 } |
| 16611 if( rc!=SQLITE_OK ){ |
| 16612 goto update_out; |
| 16613 } |
| 16614 |
| 16615 /* If this is a DELETE or UPDATE operation, remove the old record. */ |
| 16616 if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ |
| 16617 assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); |
| 16618 rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); |
| 16619 isRemove = 1; |
| 16620 } |
| 16621 |
| 16622 /* If this is an INSERT or UPDATE operation, insert the new record. */ |
| 16623 if( nArg>1 && rc==SQLITE_OK ){ |
| 16624 int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); |
| 16625 if( bInsertDone==0 ){ |
| 16626 rc = fts3InsertData(p, apVal, pRowid); |
| 16627 if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ |
| 16628 rc = FTS_CORRUPT_VTAB; |
| 16629 } |
| 16630 } |
| 16631 if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ |
| 16632 rc = fts3PendingTermsDocid(p, 0, iLangid, *pRowid); |
| 16633 } |
| 16634 if( rc==SQLITE_OK ){ |
| 16635 assert( p->iPrevDocid==*pRowid ); |
| 16636 rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); |
| 16637 } |
| 16638 if( p->bHasDocsize ){ |
| 16639 fts3InsertDocsize(&rc, p, aSzIns); |
| 16640 } |
| 16641 nChng++; |
| 16642 } |
| 16643 |
| 16644 if( p->bFts4 ){ |
| 16645 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); |
| 16646 } |
| 16647 |
| 16648 update_out: |
| 16649 sqlite3_free(aSzDel); |
| 16650 sqlite3Fts3SegmentsClose(p); |
| 16651 return rc; |
| 16652 } |
| 16653 |
| 16654 /* |
| 16655 ** Flush any data in the pending-terms hash table to disk. If successful, |
| 16656 ** merge all segments in the database (including the new segment, if |
| 16657 ** there was any data to flush) into a single segment. |
| 16658 */ |
| 16659 SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ |
| 16660 int rc; |
| 16661 rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); |
| 16662 if( rc==SQLITE_OK ){ |
| 16663 rc = fts3DoOptimize(p, 1); |
| 16664 if( rc==SQLITE_OK || rc==SQLITE_DONE ){ |
| 16665 int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); |
| 16666 if( rc2!=SQLITE_OK ) rc = rc2; |
| 16667 }else{ |
| 16668 sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); |
| 16669 sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); |
| 16670 } |
| 16671 } |
| 16672 sqlite3Fts3SegmentsClose(p); |
| 16673 return rc; |
| 16674 } |
| 16675 |
| 16676 #endif |
| 16677 |
| 16678 /************** End of fts3_write.c ******************************************/ |
| 16679 /************** Begin file fts3_snippet.c ************************************/ |
| 16680 /* |
| 16681 ** 2009 Oct 23 |
| 16682 ** |
| 16683 ** The author disclaims copyright to this source code. In place of |
| 16684 ** a legal notice, here is a blessing: |
| 16685 ** |
| 16686 ** May you do good and not evil. |
| 16687 ** May you find forgiveness for yourself and forgive others. |
| 16688 ** May you share freely, never taking more than you give. |
| 16689 ** |
| 16690 ****************************************************************************** |
| 16691 */ |
| 16692 |
| 16693 /* #include "fts3Int.h" */ |
| 16694 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 16695 |
| 16696 /* #include <string.h> */ |
| 16697 /* #include <assert.h> */ |
| 16698 |
| 16699 /* |
| 16700 ** Characters that may appear in the second argument to matchinfo(). |
| 16701 */ |
| 16702 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ |
| 16703 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ |
| 16704 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ |
| 16705 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ |
| 16706 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ |
| 16707 #define FTS3_MATCHINFO_LCS 's' /* nCol values */ |
| 16708 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ |
| 16709 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ |
| 16710 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ |
| 16711 |
| 16712 /* |
| 16713 ** The default value for the second argument to matchinfo(). |
| 16714 */ |
| 16715 #define FTS3_MATCHINFO_DEFAULT "pcx" |
| 16716 |
| 16717 |
| 16718 /* |
| 16719 ** Used as an fts3ExprIterate() context when loading phrase doclists to |
| 16720 ** Fts3Expr.aDoclist[]/nDoclist. |
| 16721 */ |
| 16722 typedef struct LoadDoclistCtx LoadDoclistCtx; |
| 16723 struct LoadDoclistCtx { |
| 16724 Fts3Cursor *pCsr; /* FTS3 Cursor */ |
| 16725 int nPhrase; /* Number of phrases seen so far */ |
| 16726 int nToken; /* Number of tokens seen so far */ |
| 16727 }; |
| 16728 |
| 16729 /* |
| 16730 ** The following types are used as part of the implementation of the |
| 16731 ** fts3BestSnippet() routine. |
| 16732 */ |
| 16733 typedef struct SnippetIter SnippetIter; |
| 16734 typedef struct SnippetPhrase SnippetPhrase; |
| 16735 typedef struct SnippetFragment SnippetFragment; |
| 16736 |
| 16737 struct SnippetIter { |
| 16738 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ |
| 16739 int iCol; /* Extract snippet from this column */ |
| 16740 int nSnippet; /* Requested snippet length (in tokens) */ |
| 16741 int nPhrase; /* Number of phrases in query */ |
| 16742 SnippetPhrase *aPhrase; /* Array of size nPhrase */ |
| 16743 int iCurrent; /* First token of current snippet */ |
| 16744 }; |
| 16745 |
| 16746 struct SnippetPhrase { |
| 16747 int nToken; /* Number of tokens in phrase */ |
| 16748 char *pList; /* Pointer to start of phrase position list */ |
| 16749 int iHead; /* Next value in position list */ |
| 16750 char *pHead; /* Position list data following iHead */ |
| 16751 int iTail; /* Next value in trailing position list */ |
| 16752 char *pTail; /* Position list data following iTail */ |
| 16753 }; |
| 16754 |
| 16755 struct SnippetFragment { |
| 16756 int iCol; /* Column snippet is extracted from */ |
| 16757 int iPos; /* Index of first token in snippet */ |
| 16758 u64 covered; /* Mask of query phrases covered */ |
| 16759 u64 hlmask; /* Mask of snippet terms to highlight */ |
| 16760 }; |
| 16761 |
| 16762 /* |
| 16763 ** This type is used as an fts3ExprIterate() context object while |
| 16764 ** accumulating the data returned by the matchinfo() function. |
| 16765 */ |
| 16766 typedef struct MatchInfo MatchInfo; |
| 16767 struct MatchInfo { |
| 16768 Fts3Cursor *pCursor; /* FTS3 Cursor */ |
| 16769 int nCol; /* Number of columns in table */ |
| 16770 int nPhrase; /* Number of matchable phrases in query */ |
| 16771 sqlite3_int64 nDoc; /* Number of docs in database */ |
| 16772 char flag; |
| 16773 u32 *aMatchinfo; /* Pre-allocated buffer */ |
| 16774 }; |
| 16775 |
| 16776 /* |
| 16777 ** An instance of this structure is used to manage a pair of buffers, each |
| 16778 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below |
| 16779 ** for details. |
| 16780 */ |
| 16781 struct MatchinfoBuffer { |
| 16782 u8 aRef[3]; |
| 16783 int nElem; |
| 16784 int bGlobal; /* Set if global data is loaded */ |
| 16785 char *zMatchinfo; |
| 16786 u32 aMatchinfo[1]; |
| 16787 }; |
| 16788 |
| 16789 |
| 16790 /* |
| 16791 ** The snippet() and offsets() functions both return text values. An instance |
| 16792 ** of the following structure is used to accumulate those values while the |
| 16793 ** functions are running. See fts3StringAppend() for details. |
| 16794 */ |
| 16795 typedef struct StrBuffer StrBuffer; |
| 16796 struct StrBuffer { |
| 16797 char *z; /* Pointer to buffer containing string */ |
| 16798 int n; /* Length of z in bytes (excl. nul-term) */ |
| 16799 int nAlloc; /* Allocated size of buffer z in bytes */ |
| 16800 }; |
| 16801 |
| 16802 |
| 16803 /************************************************************************* |
| 16804 ** Start of MatchinfoBuffer code. |
| 16805 */ |
| 16806 |
| 16807 /* |
| 16808 ** Allocate a two-slot MatchinfoBuffer object. |
| 16809 */ |
| 16810 static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ |
| 16811 MatchinfoBuffer *pRet; |
| 16812 int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); |
| 16813 int nStr = (int)strlen(zMatchinfo); |
| 16814 |
| 16815 pRet = sqlite3_malloc(nByte + nStr+1); |
| 16816 if( pRet ){ |
| 16817 memset(pRet, 0, nByte); |
| 16818 pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; |
| 16819 pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); |
| 16820 pRet->nElem = nElem; |
| 16821 pRet->zMatchinfo = ((char*)pRet) + nByte; |
| 16822 memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); |
| 16823 pRet->aRef[0] = 1; |
| 16824 } |
| 16825 |
| 16826 return pRet; |
| 16827 } |
| 16828 |
| 16829 static void fts3MIBufferFree(void *p){ |
| 16830 MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); |
| 16831 |
| 16832 assert( (u32*)p==&pBuf->aMatchinfo[1] |
| 16833 || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] |
| 16834 ); |
| 16835 if( (u32*)p==&pBuf->aMatchinfo[1] ){ |
| 16836 pBuf->aRef[1] = 0; |
| 16837 }else{ |
| 16838 pBuf->aRef[2] = 0; |
| 16839 } |
| 16840 |
| 16841 if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ |
| 16842 sqlite3_free(pBuf); |
| 16843 } |
| 16844 } |
| 16845 |
| 16846 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ |
| 16847 void (*xRet)(void*) = 0; |
| 16848 u32 *aOut = 0; |
| 16849 |
| 16850 if( p->aRef[1]==0 ){ |
| 16851 p->aRef[1] = 1; |
| 16852 aOut = &p->aMatchinfo[1]; |
| 16853 xRet = fts3MIBufferFree; |
| 16854 } |
| 16855 else if( p->aRef[2]==0 ){ |
| 16856 p->aRef[2] = 1; |
| 16857 aOut = &p->aMatchinfo[p->nElem+2]; |
| 16858 xRet = fts3MIBufferFree; |
| 16859 }else{ |
| 16860 aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); |
| 16861 if( aOut ){ |
| 16862 xRet = sqlite3_free; |
| 16863 if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); |
| 16864 } |
| 16865 } |
| 16866 |
| 16867 *paOut = aOut; |
| 16868 return xRet; |
| 16869 } |
| 16870 |
| 16871 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ |
| 16872 p->bGlobal = 1; |
| 16873 memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); |
| 16874 } |
| 16875 |
| 16876 /* |
| 16877 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() |
| 16878 */ |
| 16879 SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ |
| 16880 if( p ){ |
| 16881 assert( p->aRef[0]==1 ); |
| 16882 p->aRef[0] = 0; |
| 16883 if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ |
| 16884 sqlite3_free(p); |
| 16885 } |
| 16886 } |
| 16887 } |
| 16888 |
| 16889 /* |
| 16890 ** End of MatchinfoBuffer code. |
| 16891 *************************************************************************/ |
| 16892 |
| 16893 |
| 16894 /* |
| 16895 ** This function is used to help iterate through a position-list. A position |
| 16896 ** list is a list of unique integers, sorted from smallest to largest. Each |
| 16897 ** element of the list is represented by an FTS3 varint that takes the value |
| 16898 ** of the difference between the current element and the previous one plus |
| 16899 ** two. For example, to store the position-list: |
| 16900 ** |
| 16901 ** 4 9 113 |
| 16902 ** |
| 16903 ** the three varints: |
| 16904 ** |
| 16905 ** 6 7 106 |
| 16906 ** |
| 16907 ** are encoded. |
| 16908 ** |
| 16909 ** When this function is called, *pp points to the start of an element of |
| 16910 ** the list. *piPos contains the value of the previous entry in the list. |
| 16911 ** After it returns, *piPos contains the value of the next element of the |
| 16912 ** list and *pp is advanced to the following varint. |
| 16913 */ |
| 16914 static void fts3GetDeltaPosition(char **pp, int *piPos){ |
| 16915 int iVal; |
| 16916 *pp += fts3GetVarint32(*pp, &iVal); |
| 16917 *piPos += (iVal-2); |
| 16918 } |
| 16919 |
| 16920 /* |
| 16921 ** Helper function for fts3ExprIterate() (see below). |
| 16922 */ |
| 16923 static int fts3ExprIterate2( |
| 16924 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 16925 int *piPhrase, /* Pointer to phrase counter */ |
| 16926 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 16927 void *pCtx /* Second argument to pass to callback */ |
| 16928 ){ |
| 16929 int rc; /* Return code */ |
| 16930 int eType = pExpr->eType; /* Type of expression node pExpr */ |
| 16931 |
| 16932 if( eType!=FTSQUERY_PHRASE ){ |
| 16933 assert( pExpr->pLeft && pExpr->pRight ); |
| 16934 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); |
| 16935 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ |
| 16936 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); |
| 16937 } |
| 16938 }else{ |
| 16939 rc = x(pExpr, *piPhrase, pCtx); |
| 16940 (*piPhrase)++; |
| 16941 } |
| 16942 return rc; |
| 16943 } |
| 16944 |
| 16945 /* |
| 16946 ** Iterate through all phrase nodes in an FTS3 query, except those that |
| 16947 ** are part of a sub-tree that is the right-hand-side of a NOT operator. |
| 16948 ** For each phrase node found, the supplied callback function is invoked. |
| 16949 ** |
| 16950 ** If the callback function returns anything other than SQLITE_OK, |
| 16951 ** the iteration is abandoned and the error code returned immediately. |
| 16952 ** Otherwise, SQLITE_OK is returned after a callback has been made for |
| 16953 ** all eligible phrase nodes. |
| 16954 */ |
| 16955 static int fts3ExprIterate( |
| 16956 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 16957 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 16958 void *pCtx /* Second argument to pass to callback */ |
| 16959 ){ |
| 16960 int iPhrase = 0; /* Variable used as the phrase counter */ |
| 16961 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); |
| 16962 } |
| 16963 |
| 16964 |
| 16965 /* |
| 16966 ** This is an fts3ExprIterate() callback used while loading the doclists |
| 16967 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also |
| 16968 ** fts3ExprLoadDoclists(). |
| 16969 */ |
| 16970 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 16971 int rc = SQLITE_OK; |
| 16972 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 16973 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; |
| 16974 |
| 16975 UNUSED_PARAMETER(iPhrase); |
| 16976 |
| 16977 p->nPhrase++; |
| 16978 p->nToken += pPhrase->nToken; |
| 16979 |
| 16980 return rc; |
| 16981 } |
| 16982 |
| 16983 /* |
| 16984 ** Load the doclists for each phrase in the query associated with FTS3 cursor |
| 16985 ** pCsr. |
| 16986 ** |
| 16987 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable |
| 16988 ** phrases in the expression (all phrases except those directly or |
| 16989 ** indirectly descended from the right-hand-side of a NOT operator). If |
| 16990 ** pnToken is not NULL, then it is set to the number of tokens in all |
| 16991 ** matchable phrases of the expression. |
| 16992 */ |
| 16993 static int fts3ExprLoadDoclists( |
| 16994 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ |
| 16995 int *pnPhrase, /* OUT: Number of phrases in query */ |
| 16996 int *pnToken /* OUT: Number of tokens in query */ |
| 16997 ){ |
| 16998 int rc; /* Return Code */ |
| 16999 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ |
| 17000 sCtx.pCsr = pCsr; |
| 17001 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); |
| 17002 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; |
| 17003 if( pnToken ) *pnToken = sCtx.nToken; |
| 17004 return rc; |
| 17005 } |
| 17006 |
| 17007 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 17008 (*(int *)ctx)++; |
| 17009 pExpr->iPhrase = iPhrase; |
| 17010 return SQLITE_OK; |
| 17011 } |
| 17012 static int fts3ExprPhraseCount(Fts3Expr *pExpr){ |
| 17013 int nPhrase = 0; |
| 17014 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); |
| 17015 return nPhrase; |
| 17016 } |
| 17017 |
| 17018 /* |
| 17019 ** Advance the position list iterator specified by the first two |
| 17020 ** arguments so that it points to the first element with a value greater |
| 17021 ** than or equal to parameter iNext. |
| 17022 */ |
| 17023 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ |
| 17024 char *pIter = *ppIter; |
| 17025 if( pIter ){ |
| 17026 int iIter = *piIter; |
| 17027 |
| 17028 while( iIter<iNext ){ |
| 17029 if( 0==(*pIter & 0xFE) ){ |
| 17030 iIter = -1; |
| 17031 pIter = 0; |
| 17032 break; |
| 17033 } |
| 17034 fts3GetDeltaPosition(&pIter, &iIter); |
| 17035 } |
| 17036 |
| 17037 *piIter = iIter; |
| 17038 *ppIter = pIter; |
| 17039 } |
| 17040 } |
| 17041 |
| 17042 /* |
| 17043 ** Advance the snippet iterator to the next candidate snippet. |
| 17044 */ |
| 17045 static int fts3SnippetNextCandidate(SnippetIter *pIter){ |
| 17046 int i; /* Loop counter */ |
| 17047 |
| 17048 if( pIter->iCurrent<0 ){ |
| 17049 /* The SnippetIter object has just been initialized. The first snippet |
| 17050 ** candidate always starts at offset 0 (even if this candidate has a |
| 17051 ** score of 0.0). |
| 17052 */ |
| 17053 pIter->iCurrent = 0; |
| 17054 |
| 17055 /* Advance the 'head' iterator of each phrase to the first offset that |
| 17056 ** is greater than or equal to (iNext+nSnippet). |
| 17057 */ |
| 17058 for(i=0; i<pIter->nPhrase; i++){ |
| 17059 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17060 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); |
| 17061 } |
| 17062 }else{ |
| 17063 int iStart; |
| 17064 int iEnd = 0x7FFFFFFF; |
| 17065 |
| 17066 for(i=0; i<pIter->nPhrase; i++){ |
| 17067 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17068 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ |
| 17069 iEnd = pPhrase->iHead; |
| 17070 } |
| 17071 } |
| 17072 if( iEnd==0x7FFFFFFF ){ |
| 17073 return 1; |
| 17074 } |
| 17075 |
| 17076 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; |
| 17077 for(i=0; i<pIter->nPhrase; i++){ |
| 17078 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17079 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); |
| 17080 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); |
| 17081 } |
| 17082 } |
| 17083 |
| 17084 return 0; |
| 17085 } |
| 17086 |
| 17087 /* |
| 17088 ** Retrieve information about the current candidate snippet of snippet |
| 17089 ** iterator pIter. |
| 17090 */ |
| 17091 static void fts3SnippetDetails( |
| 17092 SnippetIter *pIter, /* Snippet iterator */ |
| 17093 u64 mCovered, /* Bitmask of phrases already covered */ |
| 17094 int *piToken, /* OUT: First token of proposed snippet */ |
| 17095 int *piScore, /* OUT: "Score" for this snippet */ |
| 17096 u64 *pmCover, /* OUT: Bitmask of phrases covered */ |
| 17097 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ |
| 17098 ){ |
| 17099 int iStart = pIter->iCurrent; /* First token of snippet */ |
| 17100 int iScore = 0; /* Score of this snippet */ |
| 17101 int i; /* Loop counter */ |
| 17102 u64 mCover = 0; /* Mask of phrases covered by this snippet */ |
| 17103 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ |
| 17104 |
| 17105 for(i=0; i<pIter->nPhrase; i++){ |
| 17106 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 17107 if( pPhrase->pTail ){ |
| 17108 char *pCsr = pPhrase->pTail; |
| 17109 int iCsr = pPhrase->iTail; |
| 17110 |
| 17111 while( iCsr<(iStart+pIter->nSnippet) ){ |
| 17112 int j; |
| 17113 u64 mPhrase = (u64)1 << i; |
| 17114 u64 mPos = (u64)1 << (iCsr - iStart); |
| 17115 assert( iCsr>=iStart ); |
| 17116 if( (mCover|mCovered)&mPhrase ){ |
| 17117 iScore++; |
| 17118 }else{ |
| 17119 iScore += 1000; |
| 17120 } |
| 17121 mCover |= mPhrase; |
| 17122 |
| 17123 for(j=0; j<pPhrase->nToken; j++){ |
| 17124 mHighlight |= (mPos>>j); |
| 17125 } |
| 17126 |
| 17127 if( 0==(*pCsr & 0x0FE) ) break; |
| 17128 fts3GetDeltaPosition(&pCsr, &iCsr); |
| 17129 } |
| 17130 } |
| 17131 } |
| 17132 |
| 17133 /* Set the output variables before returning. */ |
| 17134 *piToken = iStart; |
| 17135 *piScore = iScore; |
| 17136 *pmCover = mCover; |
| 17137 *pmHighlight = mHighlight; |
| 17138 } |
| 17139 |
| 17140 /* |
| 17141 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). |
| 17142 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. |
| 17143 */ |
| 17144 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 17145 SnippetIter *p = (SnippetIter *)ctx; |
| 17146 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; |
| 17147 char *pCsr; |
| 17148 int rc; |
| 17149 |
| 17150 pPhrase->nToken = pExpr->pPhrase->nToken; |
| 17151 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); |
| 17152 assert( rc==SQLITE_OK || pCsr==0 ); |
| 17153 if( pCsr ){ |
| 17154 int iFirst = 0; |
| 17155 pPhrase->pList = pCsr; |
| 17156 fts3GetDeltaPosition(&pCsr, &iFirst); |
| 17157 assert( iFirst>=0 ); |
| 17158 pPhrase->pHead = pCsr; |
| 17159 pPhrase->pTail = pCsr; |
| 17160 pPhrase->iHead = iFirst; |
| 17161 pPhrase->iTail = iFirst; |
| 17162 }else{ |
| 17163 assert( rc!=SQLITE_OK || ( |
| 17164 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 |
| 17165 )); |
| 17166 } |
| 17167 |
| 17168 return rc; |
| 17169 } |
| 17170 |
| 17171 /* |
| 17172 ** Select the fragment of text consisting of nFragment contiguous tokens |
| 17173 ** from column iCol that represent the "best" snippet. The best snippet |
| 17174 ** is the snippet with the highest score, where scores are calculated |
| 17175 ** by adding: |
| 17176 ** |
| 17177 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. |
| 17178 ** |
| 17179 ** (b) +1000 points for the first occurrence of each matchable phrase in |
| 17180 ** the snippet for which the corresponding mCovered bit is not set. |
| 17181 ** |
| 17182 ** The selected snippet parameters are stored in structure *pFragment before |
| 17183 ** returning. The score of the selected snippet is stored in *piScore |
| 17184 ** before returning. |
| 17185 */ |
| 17186 static int fts3BestSnippet( |
| 17187 int nSnippet, /* Desired snippet length */ |
| 17188 Fts3Cursor *pCsr, /* Cursor to create snippet for */ |
| 17189 int iCol, /* Index of column to create snippet from */ |
| 17190 u64 mCovered, /* Mask of phrases already covered */ |
| 17191 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ |
| 17192 SnippetFragment *pFragment, /* OUT: Best snippet found */ |
| 17193 int *piScore /* OUT: Score of snippet pFragment */ |
| 17194 ){ |
| 17195 int rc; /* Return Code */ |
| 17196 int nList; /* Number of phrases in expression */ |
| 17197 SnippetIter sIter; /* Iterates through snippet candidates */ |
| 17198 int nByte; /* Number of bytes of space to allocate */ |
| 17199 int iBestScore = -1; /* Best snippet score found so far */ |
| 17200 int i; /* Loop counter */ |
| 17201 |
| 17202 memset(&sIter, 0, sizeof(sIter)); |
| 17203 |
| 17204 /* Iterate through the phrases in the expression to count them. The same |
| 17205 ** callback makes sure the doclists are loaded for each phrase. |
| 17206 */ |
| 17207 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); |
| 17208 if( rc!=SQLITE_OK ){ |
| 17209 return rc; |
| 17210 } |
| 17211 |
| 17212 /* Now that it is known how many phrases there are, allocate and zero |
| 17213 ** the required space using malloc(). |
| 17214 */ |
| 17215 nByte = sizeof(SnippetPhrase) * nList; |
| 17216 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); |
| 17217 if( !sIter.aPhrase ){ |
| 17218 return SQLITE_NOMEM; |
| 17219 } |
| 17220 memset(sIter.aPhrase, 0, nByte); |
| 17221 |
| 17222 /* Initialize the contents of the SnippetIter object. Then iterate through |
| 17223 ** the set of phrases in the expression to populate the aPhrase[] array. |
| 17224 */ |
| 17225 sIter.pCsr = pCsr; |
| 17226 sIter.iCol = iCol; |
| 17227 sIter.nSnippet = nSnippet; |
| 17228 sIter.nPhrase = nList; |
| 17229 sIter.iCurrent = -1; |
| 17230 rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); |
| 17231 if( rc==SQLITE_OK ){ |
| 17232 |
| 17233 /* Set the *pmSeen output variable. */ |
| 17234 for(i=0; i<nList; i++){ |
| 17235 if( sIter.aPhrase[i].pHead ){ |
| 17236 *pmSeen |= (u64)1 << i; |
| 17237 } |
| 17238 } |
| 17239 |
| 17240 /* Loop through all candidate snippets. Store the best snippet in |
| 17241 ** *pFragment. Store its associated 'score' in iBestScore. |
| 17242 */ |
| 17243 pFragment->iCol = iCol; |
| 17244 while( !fts3SnippetNextCandidate(&sIter) ){ |
| 17245 int iPos; |
| 17246 int iScore; |
| 17247 u64 mCover; |
| 17248 u64 mHighlite; |
| 17249 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); |
| 17250 assert( iScore>=0 ); |
| 17251 if( iScore>iBestScore ){ |
| 17252 pFragment->iPos = iPos; |
| 17253 pFragment->hlmask = mHighlite; |
| 17254 pFragment->covered = mCover; |
| 17255 iBestScore = iScore; |
| 17256 } |
| 17257 } |
| 17258 |
| 17259 *piScore = iBestScore; |
| 17260 } |
| 17261 sqlite3_free(sIter.aPhrase); |
| 17262 return rc; |
| 17263 } |
| 17264 |
| 17265 |
| 17266 /* |
| 17267 ** Append a string to the string-buffer passed as the first argument. |
| 17268 ** |
| 17269 ** If nAppend is negative, then the length of the string zAppend is |
| 17270 ** determined using strlen(). |
| 17271 */ |
| 17272 static int fts3StringAppend( |
| 17273 StrBuffer *pStr, /* Buffer to append to */ |
| 17274 const char *zAppend, /* Pointer to data to append to buffer */ |
| 17275 int nAppend /* Size of zAppend in bytes (or -1) */ |
| 17276 ){ |
| 17277 if( nAppend<0 ){ |
| 17278 nAppend = (int)strlen(zAppend); |
| 17279 } |
| 17280 |
| 17281 /* If there is insufficient space allocated at StrBuffer.z, use realloc() |
| 17282 ** to grow the buffer until so that it is big enough to accomadate the |
| 17283 ** appended data. |
| 17284 */ |
| 17285 if( pStr->n+nAppend+1>=pStr->nAlloc ){ |
| 17286 int nAlloc = pStr->nAlloc+nAppend+100; |
| 17287 char *zNew = sqlite3_realloc(pStr->z, nAlloc); |
| 17288 if( !zNew ){ |
| 17289 return SQLITE_NOMEM; |
| 17290 } |
| 17291 pStr->z = zNew; |
| 17292 pStr->nAlloc = nAlloc; |
| 17293 } |
| 17294 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); |
| 17295 |
| 17296 /* Append the data to the string buffer. */ |
| 17297 memcpy(&pStr->z[pStr->n], zAppend, nAppend); |
| 17298 pStr->n += nAppend; |
| 17299 pStr->z[pStr->n] = '\0'; |
| 17300 |
| 17301 return SQLITE_OK; |
| 17302 } |
| 17303 |
| 17304 /* |
| 17305 ** The fts3BestSnippet() function often selects snippets that end with a |
| 17306 ** query term. That is, the final term of the snippet is always a term |
| 17307 ** that requires highlighting. For example, if 'X' is a highlighted term |
| 17308 ** and '.' is a non-highlighted term, BestSnippet() may select: |
| 17309 ** |
| 17310 ** ........X.....X |
| 17311 ** |
| 17312 ** This function "shifts" the beginning of the snippet forward in the |
| 17313 ** document so that there are approximately the same number of |
| 17314 ** non-highlighted terms to the right of the final highlighted term as there |
| 17315 ** are to the left of the first highlighted term. For example, to this: |
| 17316 ** |
| 17317 ** ....X.....X.... |
| 17318 ** |
| 17319 ** This is done as part of extracting the snippet text, not when selecting |
| 17320 ** the snippet. Snippet selection is done based on doclists only, so there |
| 17321 ** is no way for fts3BestSnippet() to know whether or not the document |
| 17322 ** actually contains terms that follow the final highlighted term. |
| 17323 */ |
| 17324 static int fts3SnippetShift( |
| 17325 Fts3Table *pTab, /* FTS3 table snippet comes from */ |
| 17326 int iLangid, /* Language id to use in tokenizing */ |
| 17327 int nSnippet, /* Number of tokens desired for snippet */ |
| 17328 const char *zDoc, /* Document text to extract snippet from */ |
| 17329 int nDoc, /* Size of buffer zDoc in bytes */ |
| 17330 int *piPos, /* IN/OUT: First token of snippet */ |
| 17331 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ |
| 17332 ){ |
| 17333 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ |
| 17334 |
| 17335 if( hlmask ){ |
| 17336 int nLeft; /* Tokens to the left of first highlight */ |
| 17337 int nRight; /* Tokens to the right of last highlight */ |
| 17338 int nDesired; /* Ideal number of tokens to shift forward */ |
| 17339 |
| 17340 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); |
| 17341 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); |
| 17342 nDesired = (nLeft-nRight)/2; |
| 17343 |
| 17344 /* Ideally, the start of the snippet should be pushed forward in the |
| 17345 ** document nDesired tokens. This block checks if there are actually |
| 17346 ** nDesired tokens to the right of the snippet. If so, *piPos and |
| 17347 ** *pHlMask are updated to shift the snippet nDesired tokens to the |
| 17348 ** right. Otherwise, the snippet is shifted by the number of tokens |
| 17349 ** available. |
| 17350 */ |
| 17351 if( nDesired>0 ){ |
| 17352 int nShift; /* Number of tokens to shift snippet by */ |
| 17353 int iCurrent = 0; /* Token counter */ |
| 17354 int rc; /* Return Code */ |
| 17355 sqlite3_tokenizer_module *pMod; |
| 17356 sqlite3_tokenizer_cursor *pC; |
| 17357 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 17358 |
| 17359 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) |
| 17360 ** or more tokens in zDoc/nDoc. |
| 17361 */ |
| 17362 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); |
| 17363 if( rc!=SQLITE_OK ){ |
| 17364 return rc; |
| 17365 } |
| 17366 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ |
| 17367 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; |
| 17368 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); |
| 17369 } |
| 17370 pMod->xClose(pC); |
| 17371 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } |
| 17372 |
| 17373 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; |
| 17374 assert( nShift<=nDesired ); |
| 17375 if( nShift>0 ){ |
| 17376 *piPos += nShift; |
| 17377 *pHlmask = hlmask >> nShift; |
| 17378 } |
| 17379 } |
| 17380 } |
| 17381 return SQLITE_OK; |
| 17382 } |
| 17383 |
| 17384 /* |
| 17385 ** Extract the snippet text for fragment pFragment from cursor pCsr and |
| 17386 ** append it to string buffer pOut. |
| 17387 */ |
| 17388 static int fts3SnippetText( |
| 17389 Fts3Cursor *pCsr, /* FTS3 Cursor */ |
| 17390 SnippetFragment *pFragment, /* Snippet to extract */ |
| 17391 int iFragment, /* Fragment number */ |
| 17392 int isLast, /* True for final fragment in snippet */ |
| 17393 int nSnippet, /* Number of tokens in extracted snippet */ |
| 17394 const char *zOpen, /* String inserted before highlighted term */ |
| 17395 const char *zClose, /* String inserted after highlighted term */ |
| 17396 const char *zEllipsis, /* String inserted between snippets */ |
| 17397 StrBuffer *pOut /* Write output here */ |
| 17398 ){ |
| 17399 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17400 int rc; /* Return code */ |
| 17401 const char *zDoc; /* Document text to extract snippet from */ |
| 17402 int nDoc; /* Size of zDoc in bytes */ |
| 17403 int iCurrent = 0; /* Current token number of document */ |
| 17404 int iEnd = 0; /* Byte offset of end of current token */ |
| 17405 int isShiftDone = 0; /* True after snippet is shifted */ |
| 17406 int iPos = pFragment->iPos; /* First token of snippet */ |
| 17407 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ |
| 17408 int iCol = pFragment->iCol+1; /* Query column to extract text from */ |
| 17409 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ |
| 17410 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ |
| 17411 |
| 17412 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); |
| 17413 if( zDoc==0 ){ |
| 17414 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ |
| 17415 return SQLITE_NOMEM; |
| 17416 } |
| 17417 return SQLITE_OK; |
| 17418 } |
| 17419 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); |
| 17420 |
| 17421 /* Open a token cursor on the document. */ |
| 17422 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 17423 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); |
| 17424 if( rc!=SQLITE_OK ){ |
| 17425 return rc; |
| 17426 } |
| 17427 |
| 17428 while( rc==SQLITE_OK ){ |
| 17429 const char *ZDUMMY; /* Dummy argument used with tokenizer */ |
| 17430 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ |
| 17431 int iBegin = 0; /* Offset in zDoc of start of token */ |
| 17432 int iFin = 0; /* Offset in zDoc of end of token */ |
| 17433 int isHighlight = 0; /* True for highlighted terms */ |
| 17434 |
| 17435 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere |
| 17436 ** in the FTS code the variable that the third argument to xNext points to |
| 17437 ** is initialized to zero before the first (*but not necessarily |
| 17438 ** subsequent*) call to xNext(). This is done for a particular application |
| 17439 ** that needs to know whether or not the tokenizer is being used for |
| 17440 ** snippet generation or for some other purpose. |
| 17441 ** |
| 17442 ** Extreme care is required when writing code to depend on this |
| 17443 ** initialization. It is not a documented part of the tokenizer interface. |
| 17444 ** If a tokenizer is used directly by any code outside of FTS, this |
| 17445 ** convention might not be respected. */ |
| 17446 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); |
| 17447 if( rc!=SQLITE_OK ){ |
| 17448 if( rc==SQLITE_DONE ){ |
| 17449 /* Special case - the last token of the snippet is also the last token |
| 17450 ** of the column. Append any punctuation that occurred between the end |
| 17451 ** of the previous token and the end of the document to the output. |
| 17452 ** Then break out of the loop. */ |
| 17453 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); |
| 17454 } |
| 17455 break; |
| 17456 } |
| 17457 if( iCurrent<iPos ){ continue; } |
| 17458 |
| 17459 if( !isShiftDone ){ |
| 17460 int n = nDoc - iBegin; |
| 17461 rc = fts3SnippetShift( |
| 17462 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask |
| 17463 ); |
| 17464 isShiftDone = 1; |
| 17465 |
| 17466 /* Now that the shift has been done, check if the initial "..." are |
| 17467 ** required. They are required if (a) this is not the first fragment, |
| 17468 ** or (b) this fragment does not begin at position 0 of its column. |
| 17469 */ |
| 17470 if( rc==SQLITE_OK ){ |
| 17471 if( iPos>0 || iFragment>0 ){ |
| 17472 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 17473 }else if( iBegin ){ |
| 17474 rc = fts3StringAppend(pOut, zDoc, iBegin); |
| 17475 } |
| 17476 } |
| 17477 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; |
| 17478 } |
| 17479 |
| 17480 if( iCurrent>=(iPos+nSnippet) ){ |
| 17481 if( isLast ){ |
| 17482 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 17483 } |
| 17484 break; |
| 17485 } |
| 17486 |
| 17487 /* Set isHighlight to true if this term should be highlighted. */ |
| 17488 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; |
| 17489 |
| 17490 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); |
| 17491 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); |
| 17492 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); |
| 17493 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); |
| 17494 |
| 17495 iEnd = iFin; |
| 17496 } |
| 17497 |
| 17498 pMod->xClose(pC); |
| 17499 return rc; |
| 17500 } |
| 17501 |
| 17502 |
| 17503 /* |
| 17504 ** This function is used to count the entries in a column-list (a |
| 17505 ** delta-encoded list of term offsets within a single column of a single |
| 17506 ** row). When this function is called, *ppCollist should point to the |
| 17507 ** beginning of the first varint in the column-list (the varint that |
| 17508 ** contains the position of the first matching term in the column data). |
| 17509 ** Before returning, *ppCollist is set to point to the first byte after |
| 17510 ** the last varint in the column-list (either the 0x00 signifying the end |
| 17511 ** of the position-list, or the 0x01 that precedes the column number of |
| 17512 ** the next column in the position-list). |
| 17513 ** |
| 17514 ** The number of elements in the column-list is returned. |
| 17515 */ |
| 17516 static int fts3ColumnlistCount(char **ppCollist){ |
| 17517 char *pEnd = *ppCollist; |
| 17518 char c = 0; |
| 17519 int nEntry = 0; |
| 17520 |
| 17521 /* A column-list is terminated by either a 0x01 or 0x00. */ |
| 17522 while( 0xFE & (*pEnd | c) ){ |
| 17523 c = *pEnd++ & 0x80; |
| 17524 if( !c ) nEntry++; |
| 17525 } |
| 17526 |
| 17527 *ppCollist = pEnd; |
| 17528 return nEntry; |
| 17529 } |
| 17530 |
| 17531 /* |
| 17532 ** This function gathers 'y' or 'b' data for a single phrase. |
| 17533 */ |
| 17534 static void fts3ExprLHits( |
| 17535 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17536 MatchInfo *p /* Matchinfo context */ |
| 17537 ){ |
| 17538 Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; |
| 17539 int iStart; |
| 17540 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 17541 char *pIter = pPhrase->doclist.pList; |
| 17542 int iCol = 0; |
| 17543 |
| 17544 assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); |
| 17545 if( p->flag==FTS3_MATCHINFO_LHITS ){ |
| 17546 iStart = pExpr->iPhrase * p->nCol; |
| 17547 }else{ |
| 17548 iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); |
| 17549 } |
| 17550 |
| 17551 while( 1 ){ |
| 17552 int nHit = fts3ColumnlistCount(&pIter); |
| 17553 if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ |
| 17554 if( p->flag==FTS3_MATCHINFO_LHITS ){ |
| 17555 p->aMatchinfo[iStart + iCol] = (u32)nHit; |
| 17556 }else if( nHit ){ |
| 17557 p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); |
| 17558 } |
| 17559 } |
| 17560 assert( *pIter==0x00 || *pIter==0x01 ); |
| 17561 if( *pIter!=0x01 ) break; |
| 17562 pIter++; |
| 17563 pIter += fts3GetVarint32(pIter, &iCol); |
| 17564 } |
| 17565 } |
| 17566 |
| 17567 /* |
| 17568 ** Gather the results for matchinfo directives 'y' and 'b'. |
| 17569 */ |
| 17570 static void fts3ExprLHitGather( |
| 17571 Fts3Expr *pExpr, |
| 17572 MatchInfo *p |
| 17573 ){ |
| 17574 assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); |
| 17575 if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ |
| 17576 if( pExpr->pLeft ){ |
| 17577 fts3ExprLHitGather(pExpr->pLeft, p); |
| 17578 fts3ExprLHitGather(pExpr->pRight, p); |
| 17579 }else{ |
| 17580 fts3ExprLHits(pExpr, p); |
| 17581 } |
| 17582 } |
| 17583 } |
| 17584 |
| 17585 /* |
| 17586 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats |
| 17587 ** for a single query. |
| 17588 ** |
| 17589 ** fts3ExprIterate() callback to load the 'global' elements of a |
| 17590 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements |
| 17591 ** of the matchinfo array that are constant for all rows returned by the |
| 17592 ** current query. |
| 17593 ** |
| 17594 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This |
| 17595 ** function populates Matchinfo.aMatchinfo[] as follows: |
| 17596 ** |
| 17597 ** for(iCol=0; iCol<nCol; iCol++){ |
| 17598 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; |
| 17599 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; |
| 17600 ** } |
| 17601 ** |
| 17602 ** where X is the number of matches for phrase iPhrase is column iCol of all |
| 17603 ** rows of the table. Y is the number of rows for which column iCol contains |
| 17604 ** at least one instance of phrase iPhrase. |
| 17605 ** |
| 17606 ** If the phrase pExpr consists entirely of deferred tokens, then all X and |
| 17607 ** Y values are set to nDoc, where nDoc is the number of documents in the |
| 17608 ** file system. This is done because the full-text index doclist is required |
| 17609 ** to calculate these values properly, and the full-text index doclist is |
| 17610 ** not available for deferred tokens. |
| 17611 */ |
| 17612 static int fts3ExprGlobalHitsCb( |
| 17613 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17614 int iPhrase, /* Phrase number (numbered from zero) */ |
| 17615 void *pCtx /* Pointer to MatchInfo structure */ |
| 17616 ){ |
| 17617 MatchInfo *p = (MatchInfo *)pCtx; |
| 17618 return sqlite3Fts3EvalPhraseStats( |
| 17619 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] |
| 17620 ); |
| 17621 } |
| 17622 |
| 17623 /* |
| 17624 ** fts3ExprIterate() callback used to collect the "local" part of the |
| 17625 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the |
| 17626 ** array that are different for each row returned by the query. |
| 17627 */ |
| 17628 static int fts3ExprLocalHitsCb( |
| 17629 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17630 int iPhrase, /* Phrase number */ |
| 17631 void *pCtx /* Pointer to MatchInfo structure */ |
| 17632 ){ |
| 17633 int rc = SQLITE_OK; |
| 17634 MatchInfo *p = (MatchInfo *)pCtx; |
| 17635 int iStart = iPhrase * p->nCol * 3; |
| 17636 int i; |
| 17637 |
| 17638 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ |
| 17639 char *pCsr; |
| 17640 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); |
| 17641 if( pCsr ){ |
| 17642 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); |
| 17643 }else{ |
| 17644 p->aMatchinfo[iStart+i*3] = 0; |
| 17645 } |
| 17646 } |
| 17647 |
| 17648 return rc; |
| 17649 } |
| 17650 |
| 17651 static int fts3MatchinfoCheck( |
| 17652 Fts3Table *pTab, |
| 17653 char cArg, |
| 17654 char **pzErr |
| 17655 ){ |
| 17656 if( (cArg==FTS3_MATCHINFO_NPHRASE) |
| 17657 || (cArg==FTS3_MATCHINFO_NCOL) |
| 17658 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) |
| 17659 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) |
| 17660 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) |
| 17661 || (cArg==FTS3_MATCHINFO_LCS) |
| 17662 || (cArg==FTS3_MATCHINFO_HITS) |
| 17663 || (cArg==FTS3_MATCHINFO_LHITS) |
| 17664 || (cArg==FTS3_MATCHINFO_LHITS_BM) |
| 17665 ){ |
| 17666 return SQLITE_OK; |
| 17667 } |
| 17668 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); |
| 17669 return SQLITE_ERROR; |
| 17670 } |
| 17671 |
| 17672 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ |
| 17673 int nVal; /* Number of integers output by cArg */ |
| 17674 |
| 17675 switch( cArg ){ |
| 17676 case FTS3_MATCHINFO_NDOC: |
| 17677 case FTS3_MATCHINFO_NPHRASE: |
| 17678 case FTS3_MATCHINFO_NCOL: |
| 17679 nVal = 1; |
| 17680 break; |
| 17681 |
| 17682 case FTS3_MATCHINFO_AVGLENGTH: |
| 17683 case FTS3_MATCHINFO_LENGTH: |
| 17684 case FTS3_MATCHINFO_LCS: |
| 17685 nVal = pInfo->nCol; |
| 17686 break; |
| 17687 |
| 17688 case FTS3_MATCHINFO_LHITS: |
| 17689 nVal = pInfo->nCol * pInfo->nPhrase; |
| 17690 break; |
| 17691 |
| 17692 case FTS3_MATCHINFO_LHITS_BM: |
| 17693 nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); |
| 17694 break; |
| 17695 |
| 17696 default: |
| 17697 assert( cArg==FTS3_MATCHINFO_HITS ); |
| 17698 nVal = pInfo->nCol * pInfo->nPhrase * 3; |
| 17699 break; |
| 17700 } |
| 17701 |
| 17702 return nVal; |
| 17703 } |
| 17704 |
| 17705 static int fts3MatchinfoSelectDoctotal( |
| 17706 Fts3Table *pTab, |
| 17707 sqlite3_stmt **ppStmt, |
| 17708 sqlite3_int64 *pnDoc, |
| 17709 const char **paLen |
| 17710 ){ |
| 17711 sqlite3_stmt *pStmt; |
| 17712 const char *a; |
| 17713 sqlite3_int64 nDoc; |
| 17714 |
| 17715 if( !*ppStmt ){ |
| 17716 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); |
| 17717 if( rc!=SQLITE_OK ) return rc; |
| 17718 } |
| 17719 pStmt = *ppStmt; |
| 17720 assert( sqlite3_data_count(pStmt)==1 ); |
| 17721 |
| 17722 a = sqlite3_column_blob(pStmt, 0); |
| 17723 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 17724 if( nDoc==0 ) return FTS_CORRUPT_VTAB; |
| 17725 *pnDoc = (u32)nDoc; |
| 17726 |
| 17727 if( paLen ) *paLen = a; |
| 17728 return SQLITE_OK; |
| 17729 } |
| 17730 |
| 17731 /* |
| 17732 ** An instance of the following structure is used to store state while |
| 17733 ** iterating through a multi-column position-list corresponding to the |
| 17734 ** hits for a single phrase on a single row in order to calculate the |
| 17735 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. |
| 17736 */ |
| 17737 typedef struct LcsIterator LcsIterator; |
| 17738 struct LcsIterator { |
| 17739 Fts3Expr *pExpr; /* Pointer to phrase expression */ |
| 17740 int iPosOffset; /* Tokens count up to end of this phrase */ |
| 17741 char *pRead; /* Cursor used to iterate through aDoclist */ |
| 17742 int iPos; /* Current position */ |
| 17743 }; |
| 17744 |
| 17745 /* |
| 17746 ** If LcsIterator.iCol is set to the following value, the iterator has |
| 17747 ** finished iterating through all offsets for all columns. |
| 17748 */ |
| 17749 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; |
| 17750 |
| 17751 static int fts3MatchinfoLcsCb( |
| 17752 Fts3Expr *pExpr, /* Phrase expression node */ |
| 17753 int iPhrase, /* Phrase number (numbered from zero) */ |
| 17754 void *pCtx /* Pointer to MatchInfo structure */ |
| 17755 ){ |
| 17756 LcsIterator *aIter = (LcsIterator *)pCtx; |
| 17757 aIter[iPhrase].pExpr = pExpr; |
| 17758 return SQLITE_OK; |
| 17759 } |
| 17760 |
| 17761 /* |
| 17762 ** Advance the iterator passed as an argument to the next position. Return |
| 17763 ** 1 if the iterator is at EOF or if it now points to the start of the |
| 17764 ** position list for the next column. |
| 17765 */ |
| 17766 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ |
| 17767 char *pRead = pIter->pRead; |
| 17768 sqlite3_int64 iRead; |
| 17769 int rc = 0; |
| 17770 |
| 17771 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 17772 if( iRead==0 || iRead==1 ){ |
| 17773 pRead = 0; |
| 17774 rc = 1; |
| 17775 }else{ |
| 17776 pIter->iPos += (int)(iRead-2); |
| 17777 } |
| 17778 |
| 17779 pIter->pRead = pRead; |
| 17780 return rc; |
| 17781 } |
| 17782 |
| 17783 /* |
| 17784 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. |
| 17785 ** |
| 17786 ** If the call is successful, the longest-common-substring lengths for each |
| 17787 ** column are written into the first nCol elements of the pInfo->aMatchinfo[] |
| 17788 ** array before returning. SQLITE_OK is returned in this case. |
| 17789 ** |
| 17790 ** Otherwise, if an error occurs, an SQLite error code is returned and the |
| 17791 ** data written to the first nCol elements of pInfo->aMatchinfo[] is |
| 17792 ** undefined. |
| 17793 */ |
| 17794 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ |
| 17795 LcsIterator *aIter; |
| 17796 int i; |
| 17797 int iCol; |
| 17798 int nToken = 0; |
| 17799 |
| 17800 /* Allocate and populate the array of LcsIterator objects. The array |
| 17801 ** contains one element for each matchable phrase in the query. |
| 17802 **/ |
| 17803 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); |
| 17804 if( !aIter ) return SQLITE_NOMEM; |
| 17805 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); |
| 17806 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); |
| 17807 |
| 17808 for(i=0; i<pInfo->nPhrase; i++){ |
| 17809 LcsIterator *pIter = &aIter[i]; |
| 17810 nToken -= pIter->pExpr->pPhrase->nToken; |
| 17811 pIter->iPosOffset = nToken; |
| 17812 } |
| 17813 |
| 17814 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17815 int nLcs = 0; /* LCS value for this column */ |
| 17816 int nLive = 0; /* Number of iterators in aIter not at EOF */ |
| 17817 |
| 17818 for(i=0; i<pInfo->nPhrase; i++){ |
| 17819 int rc; |
| 17820 LcsIterator *pIt = &aIter[i]; |
| 17821 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); |
| 17822 if( rc!=SQLITE_OK ) return rc; |
| 17823 if( pIt->pRead ){ |
| 17824 pIt->iPos = pIt->iPosOffset; |
| 17825 fts3LcsIteratorAdvance(&aIter[i]); |
| 17826 nLive++; |
| 17827 } |
| 17828 } |
| 17829 |
| 17830 while( nLive>0 ){ |
| 17831 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ |
| 17832 int nThisLcs = 0; /* LCS for the current iterator positions */ |
| 17833 |
| 17834 for(i=0; i<pInfo->nPhrase; i++){ |
| 17835 LcsIterator *pIter = &aIter[i]; |
| 17836 if( pIter->pRead==0 ){ |
| 17837 /* This iterator is already at EOF for this column. */ |
| 17838 nThisLcs = 0; |
| 17839 }else{ |
| 17840 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ |
| 17841 pAdv = pIter; |
| 17842 } |
| 17843 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
| 17844 nThisLcs++; |
| 17845 }else{ |
| 17846 nThisLcs = 1; |
| 17847 } |
| 17848 if( nThisLcs>nLcs ) nLcs = nThisLcs; |
| 17849 } |
| 17850 } |
| 17851 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; |
| 17852 } |
| 17853 |
| 17854 pInfo->aMatchinfo[iCol] = nLcs; |
| 17855 } |
| 17856 |
| 17857 sqlite3_free(aIter); |
| 17858 return SQLITE_OK; |
| 17859 } |
| 17860 |
| 17861 /* |
| 17862 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to |
| 17863 ** be returned by the matchinfo() function. Argument zArg contains the |
| 17864 ** format string passed as the second argument to matchinfo (or the |
| 17865 ** default value "pcx" if no second argument was specified). The format |
| 17866 ** string has already been validated and the pInfo->aMatchinfo[] array |
| 17867 ** is guaranteed to be large enough for the output. |
| 17868 ** |
| 17869 ** If bGlobal is true, then populate all fields of the matchinfo() output. |
| 17870 ** If it is false, then assume that those fields that do not change between |
| 17871 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) |
| 17872 ** have already been populated. |
| 17873 ** |
| 17874 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 17875 ** occurs. If a value other than SQLITE_OK is returned, the state the |
| 17876 ** pInfo->aMatchinfo[] buffer is left in is undefined. |
| 17877 */ |
| 17878 static int fts3MatchinfoValues( |
| 17879 Fts3Cursor *pCsr, /* FTS3 cursor object */ |
| 17880 int bGlobal, /* True to grab the global stats */ |
| 17881 MatchInfo *pInfo, /* Matchinfo context object */ |
| 17882 const char *zArg /* Matchinfo format string */ |
| 17883 ){ |
| 17884 int rc = SQLITE_OK; |
| 17885 int i; |
| 17886 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17887 sqlite3_stmt *pSelect = 0; |
| 17888 |
| 17889 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ |
| 17890 pInfo->flag = zArg[i]; |
| 17891 switch( zArg[i] ){ |
| 17892 case FTS3_MATCHINFO_NPHRASE: |
| 17893 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; |
| 17894 break; |
| 17895 |
| 17896 case FTS3_MATCHINFO_NCOL: |
| 17897 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
| 17898 break; |
| 17899 |
| 17900 case FTS3_MATCHINFO_NDOC: |
| 17901 if( bGlobal ){ |
| 17902 sqlite3_int64 nDoc = 0; |
| 17903 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); |
| 17904 pInfo->aMatchinfo[0] = (u32)nDoc; |
| 17905 } |
| 17906 break; |
| 17907 |
| 17908 case FTS3_MATCHINFO_AVGLENGTH: |
| 17909 if( bGlobal ){ |
| 17910 sqlite3_int64 nDoc; /* Number of rows in table */ |
| 17911 const char *a; /* Aggregate column length array */ |
| 17912 |
| 17913 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); |
| 17914 if( rc==SQLITE_OK ){ |
| 17915 int iCol; |
| 17916 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17917 u32 iVal; |
| 17918 sqlite3_int64 nToken; |
| 17919 a += sqlite3Fts3GetVarint(a, &nToken); |
| 17920 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); |
| 17921 pInfo->aMatchinfo[iCol] = iVal; |
| 17922 } |
| 17923 } |
| 17924 } |
| 17925 break; |
| 17926 |
| 17927 case FTS3_MATCHINFO_LENGTH: { |
| 17928 sqlite3_stmt *pSelectDocsize = 0; |
| 17929 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); |
| 17930 if( rc==SQLITE_OK ){ |
| 17931 int iCol; |
| 17932 const char *a = sqlite3_column_blob(pSelectDocsize, 0); |
| 17933 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 17934 sqlite3_int64 nToken; |
| 17935 a += sqlite3Fts3GetVarint(a, &nToken); |
| 17936 pInfo->aMatchinfo[iCol] = (u32)nToken; |
| 17937 } |
| 17938 } |
| 17939 sqlite3_reset(pSelectDocsize); |
| 17940 break; |
| 17941 } |
| 17942 |
| 17943 case FTS3_MATCHINFO_LCS: |
| 17944 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 17945 if( rc==SQLITE_OK ){ |
| 17946 rc = fts3MatchinfoLcs(pCsr, pInfo); |
| 17947 } |
| 17948 break; |
| 17949 |
| 17950 case FTS3_MATCHINFO_LHITS_BM: |
| 17951 case FTS3_MATCHINFO_LHITS: { |
| 17952 int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); |
| 17953 memset(pInfo->aMatchinfo, 0, nZero); |
| 17954 fts3ExprLHitGather(pCsr->pExpr, pInfo); |
| 17955 break; |
| 17956 } |
| 17957 |
| 17958 default: { |
| 17959 Fts3Expr *pExpr; |
| 17960 assert( zArg[i]==FTS3_MATCHINFO_HITS ); |
| 17961 pExpr = pCsr->pExpr; |
| 17962 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 17963 if( rc!=SQLITE_OK ) break; |
| 17964 if( bGlobal ){ |
| 17965 if( pCsr->pDeferred ){ |
| 17966 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); |
| 17967 if( rc!=SQLITE_OK ) break; |
| 17968 } |
| 17969 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); |
| 17970 sqlite3Fts3EvalTestDeferred(pCsr, &rc); |
| 17971 if( rc!=SQLITE_OK ) break; |
| 17972 } |
| 17973 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); |
| 17974 break; |
| 17975 } |
| 17976 } |
| 17977 |
| 17978 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); |
| 17979 } |
| 17980 |
| 17981 sqlite3_reset(pSelect); |
| 17982 return rc; |
| 17983 } |
| 17984 |
| 17985 |
| 17986 /* |
| 17987 ** Populate pCsr->aMatchinfo[] with data for the current row. The |
| 17988 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). |
| 17989 */ |
| 17990 static void fts3GetMatchinfo( |
| 17991 sqlite3_context *pCtx, /* Return results here */ |
| 17992 Fts3Cursor *pCsr, /* FTS3 Cursor object */ |
| 17993 const char *zArg /* Second argument to matchinfo() function */ |
| 17994 ){ |
| 17995 MatchInfo sInfo; |
| 17996 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 17997 int rc = SQLITE_OK; |
| 17998 int bGlobal = 0; /* Collect 'global' stats as well as local */ |
| 17999 |
| 18000 u32 *aOut = 0; |
| 18001 void (*xDestroyOut)(void*) = 0; |
| 18002 |
| 18003 memset(&sInfo, 0, sizeof(MatchInfo)); |
| 18004 sInfo.pCursor = pCsr; |
| 18005 sInfo.nCol = pTab->nColumn; |
| 18006 |
| 18007 /* If there is cached matchinfo() data, but the format string for the |
| 18008 ** cache does not match the format string for this request, discard |
| 18009 ** the cached data. */ |
| 18010 if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ |
| 18011 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); |
| 18012 pCsr->pMIBuffer = 0; |
| 18013 } |
| 18014 |
| 18015 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the |
| 18016 ** matchinfo function has been called for this query. In this case |
| 18017 ** allocate the array used to accumulate the matchinfo data and |
| 18018 ** initialize those elements that are constant for every row. |
| 18019 */ |
| 18020 if( pCsr->pMIBuffer==0 ){ |
| 18021 int nMatchinfo = 0; /* Number of u32 elements in match-info */ |
| 18022 int i; /* Used to iterate through zArg */ |
| 18023 |
| 18024 /* Determine the number of phrases in the query */ |
| 18025 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); |
| 18026 sInfo.nPhrase = pCsr->nPhrase; |
| 18027 |
| 18028 /* Determine the number of integers in the buffer returned by this call. */ |
| 18029 for(i=0; zArg[i]; i++){ |
| 18030 char *zErr = 0; |
| 18031 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ |
| 18032 sqlite3_result_error(pCtx, zErr, -1); |
| 18033 sqlite3_free(zErr); |
| 18034 return; |
| 18035 } |
| 18036 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); |
| 18037 } |
| 18038 |
| 18039 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ |
| 18040 pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); |
| 18041 if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; |
| 18042 |
| 18043 pCsr->isMatchinfoNeeded = 1; |
| 18044 bGlobal = 1; |
| 18045 } |
| 18046 |
| 18047 if( rc==SQLITE_OK ){ |
| 18048 xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); |
| 18049 if( xDestroyOut==0 ){ |
| 18050 rc = SQLITE_NOMEM; |
| 18051 } |
| 18052 } |
| 18053 |
| 18054 if( rc==SQLITE_OK ){ |
| 18055 sInfo.aMatchinfo = aOut; |
| 18056 sInfo.nPhrase = pCsr->nPhrase; |
| 18057 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); |
| 18058 if( bGlobal ){ |
| 18059 fts3MIBufferSetGlobal(pCsr->pMIBuffer); |
| 18060 } |
| 18061 } |
| 18062 |
| 18063 if( rc!=SQLITE_OK ){ |
| 18064 sqlite3_result_error_code(pCtx, rc); |
| 18065 if( xDestroyOut ) xDestroyOut(aOut); |
| 18066 }else{ |
| 18067 int n = pCsr->pMIBuffer->nElem * sizeof(u32); |
| 18068 sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); |
| 18069 } |
| 18070 } |
| 18071 |
| 18072 /* |
| 18073 ** Implementation of snippet() function. |
| 18074 */ |
| 18075 SQLITE_PRIVATE void sqlite3Fts3Snippet( |
| 18076 sqlite3_context *pCtx, /* SQLite function call context */ |
| 18077 Fts3Cursor *pCsr, /* Cursor object */ |
| 18078 const char *zStart, /* Snippet start text - "<b>" */ |
| 18079 const char *zEnd, /* Snippet end text - "</b>" */ |
| 18080 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ |
| 18081 int iCol, /* Extract snippet from this column */ |
| 18082 int nToken /* Approximate number of tokens in snippet */ |
| 18083 ){ |
| 18084 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18085 int rc = SQLITE_OK; |
| 18086 int i; |
| 18087 StrBuffer res = {0, 0, 0}; |
| 18088 |
| 18089 /* The returned text includes up to four fragments of text extracted from |
| 18090 ** the data in the current row. The first iteration of the for(...) loop |
| 18091 ** below attempts to locate a single fragment of text nToken tokens in |
| 18092 ** size that contains at least one instance of all phrases in the query |
| 18093 ** expression that appear in the current row. If such a fragment of text |
| 18094 ** cannot be found, the second iteration of the loop attempts to locate |
| 18095 ** a pair of fragments, and so on. |
| 18096 */ |
| 18097 int nSnippet = 0; /* Number of fragments in this snippet */ |
| 18098 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ |
| 18099 int nFToken = -1; /* Number of tokens in each fragment */ |
| 18100 |
| 18101 if( !pCsr->pExpr ){ |
| 18102 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 18103 return; |
| 18104 } |
| 18105 |
| 18106 for(nSnippet=1; 1; nSnippet++){ |
| 18107 |
| 18108 int iSnip; /* Loop counter 0..nSnippet-1 */ |
| 18109 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ |
| 18110 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ |
| 18111 |
| 18112 if( nToken>=0 ){ |
| 18113 nFToken = (nToken+nSnippet-1) / nSnippet; |
| 18114 }else{ |
| 18115 nFToken = -1 * nToken; |
| 18116 } |
| 18117 |
| 18118 for(iSnip=0; iSnip<nSnippet; iSnip++){ |
| 18119 int iBestScore = -1; /* Best score of columns checked so far */ |
| 18120 int iRead; /* Used to iterate through columns */ |
| 18121 SnippetFragment *pFragment = &aSnippet[iSnip]; |
| 18122 |
| 18123 memset(pFragment, 0, sizeof(*pFragment)); |
| 18124 |
| 18125 /* Loop through all columns of the table being considered for snippets. |
| 18126 ** If the iCol argument to this function was negative, this means all |
| 18127 ** columns of the FTS3 table. Otherwise, only column iCol is considered. |
| 18128 */ |
| 18129 for(iRead=0; iRead<pTab->nColumn; iRead++){ |
| 18130 SnippetFragment sF = {0, 0, 0, 0}; |
| 18131 int iS = 0; |
| 18132 if( iCol>=0 && iRead!=iCol ) continue; |
| 18133 |
| 18134 /* Find the best snippet of nFToken tokens in column iRead. */ |
| 18135 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); |
| 18136 if( rc!=SQLITE_OK ){ |
| 18137 goto snippet_out; |
| 18138 } |
| 18139 if( iS>iBestScore ){ |
| 18140 *pFragment = sF; |
| 18141 iBestScore = iS; |
| 18142 } |
| 18143 } |
| 18144 |
| 18145 mCovered |= pFragment->covered; |
| 18146 } |
| 18147 |
| 18148 /* If all query phrases seen by fts3BestSnippet() are present in at least |
| 18149 ** one of the nSnippet snippet fragments, break out of the loop. |
| 18150 */ |
| 18151 assert( (mCovered&mSeen)==mCovered ); |
| 18152 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; |
| 18153 } |
| 18154 |
| 18155 assert( nFToken>0 ); |
| 18156 |
| 18157 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ |
| 18158 rc = fts3SnippetText(pCsr, &aSnippet[i], |
| 18159 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res |
| 18160 ); |
| 18161 } |
| 18162 |
| 18163 snippet_out: |
| 18164 sqlite3Fts3SegmentsClose(pTab); |
| 18165 if( rc!=SQLITE_OK ){ |
| 18166 sqlite3_result_error_code(pCtx, rc); |
| 18167 sqlite3_free(res.z); |
| 18168 }else{ |
| 18169 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); |
| 18170 } |
| 18171 } |
| 18172 |
| 18173 |
| 18174 typedef struct TermOffset TermOffset; |
| 18175 typedef struct TermOffsetCtx TermOffsetCtx; |
| 18176 |
| 18177 struct TermOffset { |
| 18178 char *pList; /* Position-list */ |
| 18179 int iPos; /* Position just read from pList */ |
| 18180 int iOff; /* Offset of this term from read positions */ |
| 18181 }; |
| 18182 |
| 18183 struct TermOffsetCtx { |
| 18184 Fts3Cursor *pCsr; |
| 18185 int iCol; /* Column of table to populate aTerm for */ |
| 18186 int iTerm; |
| 18187 sqlite3_int64 iDocid; |
| 18188 TermOffset *aTerm; |
| 18189 }; |
| 18190 |
| 18191 /* |
| 18192 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). |
| 18193 */ |
| 18194 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 18195 TermOffsetCtx *p = (TermOffsetCtx *)ctx; |
| 18196 int nTerm; /* Number of tokens in phrase */ |
| 18197 int iTerm; /* For looping through nTerm phrase terms */ |
| 18198 char *pList; /* Pointer to position list for phrase */ |
| 18199 int iPos = 0; /* First position in position-list */ |
| 18200 int rc; |
| 18201 |
| 18202 UNUSED_PARAMETER(iPhrase); |
| 18203 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); |
| 18204 nTerm = pExpr->pPhrase->nToken; |
| 18205 if( pList ){ |
| 18206 fts3GetDeltaPosition(&pList, &iPos); |
| 18207 assert( iPos>=0 ); |
| 18208 } |
| 18209 |
| 18210 for(iTerm=0; iTerm<nTerm; iTerm++){ |
| 18211 TermOffset *pT = &p->aTerm[p->iTerm++]; |
| 18212 pT->iOff = nTerm-iTerm-1; |
| 18213 pT->pList = pList; |
| 18214 pT->iPos = iPos; |
| 18215 } |
| 18216 |
| 18217 return rc; |
| 18218 } |
| 18219 |
| 18220 /* |
| 18221 ** Implementation of offsets() function. |
| 18222 */ |
| 18223 SQLITE_PRIVATE void sqlite3Fts3Offsets( |
| 18224 sqlite3_context *pCtx, /* SQLite function call context */ |
| 18225 Fts3Cursor *pCsr /* Cursor object */ |
| 18226 ){ |
| 18227 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18228 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; |
| 18229 int rc; /* Return Code */ |
| 18230 int nToken; /* Number of tokens in query */ |
| 18231 int iCol; /* Column currently being processed */ |
| 18232 StrBuffer res = {0, 0, 0}; /* Result string */ |
| 18233 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ |
| 18234 |
| 18235 if( !pCsr->pExpr ){ |
| 18236 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 18237 return; |
| 18238 } |
| 18239 |
| 18240 memset(&sCtx, 0, sizeof(sCtx)); |
| 18241 assert( pCsr->isRequireSeek==0 ); |
| 18242 |
| 18243 /* Count the number of terms in the query */ |
| 18244 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); |
| 18245 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18246 |
| 18247 /* Allocate the array of TermOffset iterators. */ |
| 18248 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); |
| 18249 if( 0==sCtx.aTerm ){ |
| 18250 rc = SQLITE_NOMEM; |
| 18251 goto offsets_out; |
| 18252 } |
| 18253 sCtx.iDocid = pCsr->iPrevId; |
| 18254 sCtx.pCsr = pCsr; |
| 18255 |
| 18256 /* Loop through the table columns, appending offset information to |
| 18257 ** string-buffer res for each column. |
| 18258 */ |
| 18259 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 18260 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ |
| 18261 const char *ZDUMMY; /* Dummy argument used with xNext() */ |
| 18262 int NDUMMY = 0; /* Dummy argument used with xNext() */ |
| 18263 int iStart = 0; |
| 18264 int iEnd = 0; |
| 18265 int iCurrent = 0; |
| 18266 const char *zDoc; |
| 18267 int nDoc; |
| 18268 |
| 18269 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is |
| 18270 ** no way that this operation can fail, so the return code from |
| 18271 ** fts3ExprIterate() can be discarded. |
| 18272 */ |
| 18273 sCtx.iCol = iCol; |
| 18274 sCtx.iTerm = 0; |
| 18275 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); |
| 18276 |
| 18277 /* Retreive the text stored in column iCol. If an SQL NULL is stored |
| 18278 ** in column iCol, jump immediately to the next iteration of the loop. |
| 18279 ** If an OOM occurs while retrieving the data (this can happen if SQLite |
| 18280 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM |
| 18281 ** to the caller. |
| 18282 */ |
| 18283 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); |
| 18284 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
| 18285 if( zDoc==0 ){ |
| 18286 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ |
| 18287 continue; |
| 18288 } |
| 18289 rc = SQLITE_NOMEM; |
| 18290 goto offsets_out; |
| 18291 } |
| 18292 |
| 18293 /* Initialize a tokenizer iterator to iterate through column iCol. */ |
| 18294 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, |
| 18295 zDoc, nDoc, &pC |
| 18296 ); |
| 18297 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18298 |
| 18299 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 18300 while( rc==SQLITE_OK ){ |
| 18301 int i; /* Used to loop through terms */ |
| 18302 int iMinPos = 0x7FFFFFFF; /* Position of next token */ |
| 18303 TermOffset *pTerm = 0; /* TermOffset associated with next token */ |
| 18304 |
| 18305 for(i=0; i<nToken; i++){ |
| 18306 TermOffset *pT = &sCtx.aTerm[i]; |
| 18307 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ |
| 18308 iMinPos = pT->iPos-pT->iOff; |
| 18309 pTerm = pT; |
| 18310 } |
| 18311 } |
| 18312 |
| 18313 if( !pTerm ){ |
| 18314 /* All offsets for this column have been gathered. */ |
| 18315 rc = SQLITE_DONE; |
| 18316 }else{ |
| 18317 assert( iCurrent<=iMinPos ); |
| 18318 if( 0==(0xFE&*pTerm->pList) ){ |
| 18319 pTerm->pList = 0; |
| 18320 }else{ |
| 18321 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); |
| 18322 } |
| 18323 while( rc==SQLITE_OK && iCurrent<iMinPos ){ |
| 18324 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 18325 } |
| 18326 if( rc==SQLITE_OK ){ |
| 18327 char aBuffer[64]; |
| 18328 sqlite3_snprintf(sizeof(aBuffer), aBuffer, |
| 18329 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart |
| 18330 ); |
| 18331 rc = fts3StringAppend(&res, aBuffer, -1); |
| 18332 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ |
| 18333 rc = FTS_CORRUPT_VTAB; |
| 18334 } |
| 18335 } |
| 18336 } |
| 18337 if( rc==SQLITE_DONE ){ |
| 18338 rc = SQLITE_OK; |
| 18339 } |
| 18340 |
| 18341 pMod->xClose(pC); |
| 18342 if( rc!=SQLITE_OK ) goto offsets_out; |
| 18343 } |
| 18344 |
| 18345 offsets_out: |
| 18346 sqlite3_free(sCtx.aTerm); |
| 18347 assert( rc!=SQLITE_DONE ); |
| 18348 sqlite3Fts3SegmentsClose(pTab); |
| 18349 if( rc!=SQLITE_OK ){ |
| 18350 sqlite3_result_error_code(pCtx, rc); |
| 18351 sqlite3_free(res.z); |
| 18352 }else{ |
| 18353 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); |
| 18354 } |
| 18355 return; |
| 18356 } |
| 18357 |
| 18358 /* |
| 18359 ** Implementation of matchinfo() function. |
| 18360 */ |
| 18361 SQLITE_PRIVATE void sqlite3Fts3Matchinfo( |
| 18362 sqlite3_context *pContext, /* Function call context */ |
| 18363 Fts3Cursor *pCsr, /* FTS3 table cursor */ |
| 18364 const char *zArg /* Second arg to matchinfo() function */ |
| 18365 ){ |
| 18366 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 18367 const char *zFormat; |
| 18368 |
| 18369 if( zArg ){ |
| 18370 zFormat = zArg; |
| 18371 }else{ |
| 18372 zFormat = FTS3_MATCHINFO_DEFAULT; |
| 18373 } |
| 18374 |
| 18375 if( !pCsr->pExpr ){ |
| 18376 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); |
| 18377 return; |
| 18378 }else{ |
| 18379 /* Retrieve matchinfo() data. */ |
| 18380 fts3GetMatchinfo(pContext, pCsr, zFormat); |
| 18381 sqlite3Fts3SegmentsClose(pTab); |
| 18382 } |
| 18383 } |
| 18384 |
| 18385 #endif |
| 18386 |
| 18387 /************** End of fts3_snippet.c ****************************************/ |
| 18388 /************** Begin file fts3_unicode.c ************************************/ |
| 18389 /* |
| 18390 ** 2012 May 24 |
| 18391 ** |
| 18392 ** The author disclaims copyright to this source code. In place of |
| 18393 ** a legal notice, here is a blessing: |
| 18394 ** |
| 18395 ** May you do good and not evil. |
| 18396 ** May you find forgiveness for yourself and forgive others. |
| 18397 ** May you share freely, never taking more than you give. |
| 18398 ** |
| 18399 ****************************************************************************** |
| 18400 ** |
| 18401 ** Implementation of the "unicode" full-text-search tokenizer. |
| 18402 */ |
| 18403 |
| 18404 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 18405 |
| 18406 /* #include "fts3Int.h" */ |
| 18407 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 18408 |
| 18409 /* #include <assert.h> */ |
| 18410 /* #include <stdlib.h> */ |
| 18411 /* #include <stdio.h> */ |
| 18412 /* #include <string.h> */ |
| 18413 |
| 18414 /* #include "fts3_tokenizer.h" */ |
| 18415 |
| 18416 /* |
| 18417 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied |
| 18418 ** from the sqlite3 source file utf.c. If this file is compiled as part |
| 18419 ** of the amalgamation, they are not required. |
| 18420 */ |
| 18421 #ifndef SQLITE_AMALGAMATION |
| 18422 |
| 18423 static const unsigned char sqlite3Utf8Trans1[] = { |
| 18424 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18425 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18426 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 18427 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
| 18428 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18429 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 18430 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 18431 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
| 18432 }; |
| 18433 |
| 18434 #define READ_UTF8(zIn, zTerm, c) \ |
| 18435 c = *(zIn++); \ |
| 18436 if( c>=0xc0 ){ \ |
| 18437 c = sqlite3Utf8Trans1[c-0xc0]; \ |
| 18438 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ |
| 18439 c = (c<<6) + (0x3f & *(zIn++)); \ |
| 18440 } \ |
| 18441 if( c<0x80 \ |
| 18442 || (c&0xFFFFF800)==0xD800 \ |
| 18443 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ |
| 18444 } |
| 18445 |
| 18446 #define WRITE_UTF8(zOut, c) { \ |
| 18447 if( c<0x00080 ){ \ |
| 18448 *zOut++ = (u8)(c&0xFF); \ |
| 18449 } \ |
| 18450 else if( c<0x00800 ){ \ |
| 18451 *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ |
| 18452 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18453 } \ |
| 18454 else if( c<0x10000 ){ \ |
| 18455 *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ |
| 18456 *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
| 18457 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18458 }else{ \ |
| 18459 *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ |
| 18460 *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ |
| 18461 *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
| 18462 *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
| 18463 } \ |
| 18464 } |
| 18465 |
| 18466 #endif /* ifndef SQLITE_AMALGAMATION */ |
| 18467 |
| 18468 typedef struct unicode_tokenizer unicode_tokenizer; |
| 18469 typedef struct unicode_cursor unicode_cursor; |
| 18470 |
| 18471 struct unicode_tokenizer { |
| 18472 sqlite3_tokenizer base; |
| 18473 int bRemoveDiacritic; |
| 18474 int nException; |
| 18475 int *aiException; |
| 18476 }; |
| 18477 |
| 18478 struct unicode_cursor { |
| 18479 sqlite3_tokenizer_cursor base; |
| 18480 const unsigned char *aInput; /* Input text being tokenized */ |
| 18481 int nInput; /* Size of aInput[] in bytes */ |
| 18482 int iOff; /* Current offset within aInput[] */ |
| 18483 int iToken; /* Index of next token to be returned */ |
| 18484 char *zToken; /* storage for current token */ |
| 18485 int nAlloc; /* space allocated at zToken */ |
| 18486 }; |
| 18487 |
| 18488 |
| 18489 /* |
| 18490 ** Destroy a tokenizer allocated by unicodeCreate(). |
| 18491 */ |
| 18492 static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ |
| 18493 if( pTokenizer ){ |
| 18494 unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; |
| 18495 sqlite3_free(p->aiException); |
| 18496 sqlite3_free(p); |
| 18497 } |
| 18498 return SQLITE_OK; |
| 18499 } |
| 18500 |
| 18501 /* |
| 18502 ** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE |
| 18503 ** statement has specified that the tokenizer for this table shall consider |
| 18504 ** all characters in string zIn/nIn to be separators (if bAlnum==0) or |
| 18505 ** token characters (if bAlnum==1). |
| 18506 ** |
| 18507 ** For each codepoint in the zIn/nIn string, this function checks if the |
| 18508 ** sqlite3FtsUnicodeIsalnum() function already returns the desired result. |
| 18509 ** If so, no action is taken. Otherwise, the codepoint is added to the |
| 18510 ** unicode_tokenizer.aiException[] array. For the purposes of tokenization, |
| 18511 ** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all |
| 18512 ** codepoints in the aiException[] array. |
| 18513 ** |
| 18514 ** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() |
| 18515 ** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. |
| 18516 ** It is not possible to change the behavior of the tokenizer with respect |
| 18517 ** to these codepoints. |
| 18518 */ |
| 18519 static int unicodeAddExceptions( |
| 18520 unicode_tokenizer *p, /* Tokenizer to add exceptions to */ |
| 18521 int bAlnum, /* Replace Isalnum() return value with this */ |
| 18522 const char *zIn, /* Array of characters to make exceptions */ |
| 18523 int nIn /* Length of z in bytes */ |
| 18524 ){ |
| 18525 const unsigned char *z = (const unsigned char *)zIn; |
| 18526 const unsigned char *zTerm = &z[nIn]; |
| 18527 int iCode; |
| 18528 int nEntry = 0; |
| 18529 |
| 18530 assert( bAlnum==0 || bAlnum==1 ); |
| 18531 |
| 18532 while( z<zTerm ){ |
| 18533 READ_UTF8(z, zTerm, iCode); |
| 18534 assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); |
| 18535 if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum |
| 18536 && sqlite3FtsUnicodeIsdiacritic(iCode)==0 |
| 18537 ){ |
| 18538 nEntry++; |
| 18539 } |
| 18540 } |
| 18541 |
| 18542 if( nEntry ){ |
| 18543 int *aNew; /* New aiException[] array */ |
| 18544 int nNew; /* Number of valid entries in array aNew[] */ |
| 18545 |
| 18546 aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int)); |
| 18547 if( aNew==0 ) return SQLITE_NOMEM; |
| 18548 nNew = p->nException; |
| 18549 |
| 18550 z = (const unsigned char *)zIn; |
| 18551 while( z<zTerm ){ |
| 18552 READ_UTF8(z, zTerm, iCode); |
| 18553 if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum |
| 18554 && sqlite3FtsUnicodeIsdiacritic(iCode)==0 |
| 18555 ){ |
| 18556 int i, j; |
| 18557 for(i=0; i<nNew && aNew[i]<iCode; i++); |
| 18558 for(j=nNew; j>i; j--) aNew[j] = aNew[j-1]; |
| 18559 aNew[i] = iCode; |
| 18560 nNew++; |
| 18561 } |
| 18562 } |
| 18563 p->aiException = aNew; |
| 18564 p->nException = nNew; |
| 18565 } |
| 18566 |
| 18567 return SQLITE_OK; |
| 18568 } |
| 18569 |
| 18570 /* |
| 18571 ** Return true if the p->aiException[] array contains the value iCode. |
| 18572 */ |
| 18573 static int unicodeIsException(unicode_tokenizer *p, int iCode){ |
| 18574 if( p->nException>0 ){ |
| 18575 int *a = p->aiException; |
| 18576 int iLo = 0; |
| 18577 int iHi = p->nException-1; |
| 18578 |
| 18579 while( iHi>=iLo ){ |
| 18580 int iTest = (iHi + iLo) / 2; |
| 18581 if( iCode==a[iTest] ){ |
| 18582 return 1; |
| 18583 }else if( iCode>a[iTest] ){ |
| 18584 iLo = iTest+1; |
| 18585 }else{ |
| 18586 iHi = iTest-1; |
| 18587 } |
| 18588 } |
| 18589 } |
| 18590 |
| 18591 return 0; |
| 18592 } |
| 18593 |
| 18594 /* |
| 18595 ** Return true if, for the purposes of tokenization, codepoint iCode is |
| 18596 ** considered a token character (not a separator). |
| 18597 */ |
| 18598 static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ |
| 18599 assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); |
| 18600 return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); |
| 18601 } |
| 18602 |
| 18603 /* |
| 18604 ** Create a new tokenizer instance. |
| 18605 */ |
| 18606 static int unicodeCreate( |
| 18607 int nArg, /* Size of array argv[] */ |
| 18608 const char * const *azArg, /* Tokenizer creation arguments */ |
| 18609 sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ |
| 18610 ){ |
| 18611 unicode_tokenizer *pNew; /* New tokenizer object */ |
| 18612 int i; |
| 18613 int rc = SQLITE_OK; |
| 18614 |
| 18615 pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); |
| 18616 if( pNew==NULL ) return SQLITE_NOMEM; |
| 18617 memset(pNew, 0, sizeof(unicode_tokenizer)); |
| 18618 pNew->bRemoveDiacritic = 1; |
| 18619 |
| 18620 for(i=0; rc==SQLITE_OK && i<nArg; i++){ |
| 18621 const char *z = azArg[i]; |
| 18622 int n = (int)strlen(z); |
| 18623 |
| 18624 if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){ |
| 18625 pNew->bRemoveDiacritic = 1; |
| 18626 } |
| 18627 else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ |
| 18628 pNew->bRemoveDiacritic = 0; |
| 18629 } |
| 18630 else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ |
| 18631 rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); |
| 18632 } |
| 18633 else if( n>=11 && memcmp("separators=", z, 11)==0 ){ |
| 18634 rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); |
| 18635 } |
| 18636 else{ |
| 18637 /* Unrecognized argument */ |
| 18638 rc = SQLITE_ERROR; |
| 18639 } |
| 18640 } |
| 18641 |
| 18642 if( rc!=SQLITE_OK ){ |
| 18643 unicodeDestroy((sqlite3_tokenizer *)pNew); |
| 18644 pNew = 0; |
| 18645 } |
| 18646 *pp = (sqlite3_tokenizer *)pNew; |
| 18647 return rc; |
| 18648 } |
| 18649 |
| 18650 /* |
| 18651 ** Prepare to begin tokenizing a particular string. The input |
| 18652 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 18653 ** used to incrementally tokenize this string is returned in |
| 18654 ** *ppCursor. |
| 18655 */ |
| 18656 static int unicodeOpen( |
| 18657 sqlite3_tokenizer *p, /* The tokenizer */ |
| 18658 const char *aInput, /* Input string */ |
| 18659 int nInput, /* Size of string aInput in bytes */ |
| 18660 sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ |
| 18661 ){ |
| 18662 unicode_cursor *pCsr; |
| 18663 |
| 18664 pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); |
| 18665 if( pCsr==0 ){ |
| 18666 return SQLITE_NOMEM; |
| 18667 } |
| 18668 memset(pCsr, 0, sizeof(unicode_cursor)); |
| 18669 |
| 18670 pCsr->aInput = (const unsigned char *)aInput; |
| 18671 if( aInput==0 ){ |
| 18672 pCsr->nInput = 0; |
| 18673 }else if( nInput<0 ){ |
| 18674 pCsr->nInput = (int)strlen(aInput); |
| 18675 }else{ |
| 18676 pCsr->nInput = nInput; |
| 18677 } |
| 18678 |
| 18679 *pp = &pCsr->base; |
| 18680 UNUSED_PARAMETER(p); |
| 18681 return SQLITE_OK; |
| 18682 } |
| 18683 |
| 18684 /* |
| 18685 ** Close a tokenization cursor previously opened by a call to |
| 18686 ** simpleOpen() above. |
| 18687 */ |
| 18688 static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ |
| 18689 unicode_cursor *pCsr = (unicode_cursor *) pCursor; |
| 18690 sqlite3_free(pCsr->zToken); |
| 18691 sqlite3_free(pCsr); |
| 18692 return SQLITE_OK; |
| 18693 } |
| 18694 |
| 18695 /* |
| 18696 ** Extract the next token from a tokenization cursor. The cursor must |
| 18697 ** have been opened by a prior call to simpleOpen(). |
| 18698 */ |
| 18699 static int unicodeNext( |
| 18700 sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ |
| 18701 const char **paToken, /* OUT: Token text */ |
| 18702 int *pnToken, /* OUT: Number of bytes at *paToken */ |
| 18703 int *piStart, /* OUT: Starting offset of token */ |
| 18704 int *piEnd, /* OUT: Ending offset of token */ |
| 18705 int *piPos /* OUT: Position integer of token */ |
| 18706 ){ |
| 18707 unicode_cursor *pCsr = (unicode_cursor *)pC; |
| 18708 unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); |
| 18709 int iCode = 0; |
| 18710 char *zOut; |
| 18711 const unsigned char *z = &pCsr->aInput[pCsr->iOff]; |
| 18712 const unsigned char *zStart = z; |
| 18713 const unsigned char *zEnd; |
| 18714 const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; |
| 18715 |
| 18716 /* Scan past any delimiter characters before the start of the next token. |
| 18717 ** Return SQLITE_DONE early if this takes us all the way to the end of |
| 18718 ** the input. */ |
| 18719 while( z<zTerm ){ |
| 18720 READ_UTF8(z, zTerm, iCode); |
| 18721 if( unicodeIsAlnum(p, iCode) ) break; |
| 18722 zStart = z; |
| 18723 } |
| 18724 if( zStart>=zTerm ) return SQLITE_DONE; |
| 18725 |
| 18726 zOut = pCsr->zToken; |
| 18727 do { |
| 18728 int iOut; |
| 18729 |
| 18730 /* Grow the output buffer if required. */ |
| 18731 if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ |
| 18732 char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); |
| 18733 if( !zNew ) return SQLITE_NOMEM; |
| 18734 zOut = &zNew[zOut - pCsr->zToken]; |
| 18735 pCsr->zToken = zNew; |
| 18736 pCsr->nAlloc += 64; |
| 18737 } |
| 18738 |
| 18739 /* Write the folded case of the last character read to the output */ |
| 18740 zEnd = z; |
| 18741 iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); |
| 18742 if( iOut ){ |
| 18743 WRITE_UTF8(zOut, iOut); |
| 18744 } |
| 18745 |
| 18746 /* If the cursor is not at EOF, read the next character */ |
| 18747 if( z>=zTerm ) break; |
| 18748 READ_UTF8(z, zTerm, iCode); |
| 18749 }while( unicodeIsAlnum(p, iCode) |
| 18750 || sqlite3FtsUnicodeIsdiacritic(iCode) |
| 18751 ); |
| 18752 |
| 18753 /* Set the output variables and return. */ |
| 18754 pCsr->iOff = (int)(z - pCsr->aInput); |
| 18755 *paToken = pCsr->zToken; |
| 18756 *pnToken = (int)(zOut - pCsr->zToken); |
| 18757 *piStart = (int)(zStart - pCsr->aInput); |
| 18758 *piEnd = (int)(zEnd - pCsr->aInput); |
| 18759 *piPos = pCsr->iToken++; |
| 18760 return SQLITE_OK; |
| 18761 } |
| 18762 |
| 18763 /* |
| 18764 ** Set *ppModule to a pointer to the sqlite3_tokenizer_module |
| 18765 ** structure for the unicode tokenizer. |
| 18766 */ |
| 18767 SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const *
*ppModule){ |
| 18768 static const sqlite3_tokenizer_module module = { |
| 18769 0, |
| 18770 unicodeCreate, |
| 18771 unicodeDestroy, |
| 18772 unicodeOpen, |
| 18773 unicodeClose, |
| 18774 unicodeNext, |
| 18775 0, |
| 18776 }; |
| 18777 *ppModule = &module; |
| 18778 } |
| 18779 |
| 18780 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 18781 #endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ |
| 18782 |
| 18783 /************** End of fts3_unicode.c ****************************************/ |
| 18784 /************** Begin file fts3_unicode2.c ***********************************/ |
| 18785 /* |
| 18786 ** 2012 May 25 |
| 18787 ** |
| 18788 ** The author disclaims copyright to this source code. In place of |
| 18789 ** a legal notice, here is a blessing: |
| 18790 ** |
| 18791 ** May you do good and not evil. |
| 18792 ** May you find forgiveness for yourself and forgive others. |
| 18793 ** May you share freely, never taking more than you give. |
| 18794 ** |
| 18795 ****************************************************************************** |
| 18796 */ |
| 18797 |
| 18798 /* |
| 18799 ** DO NOT EDIT THIS MACHINE GENERATED FILE. |
| 18800 */ |
| 18801 |
| 18802 #ifndef SQLITE_DISABLE_FTS3_UNICODE |
| 18803 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) |
| 18804 |
| 18805 /* #include <assert.h> */ |
| 18806 |
| 18807 /* |
| 18808 ** Return true if the argument corresponds to a unicode codepoint |
| 18809 ** classified as either a letter or a number. Otherwise false. |
| 18810 ** |
| 18811 ** The results are undefined if the value passed to this function |
| 18812 ** is less than zero. |
| 18813 */ |
| 18814 SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ |
| 18815 /* Each unsigned integer in the following array corresponds to a contiguous |
| 18816 ** range of unicode codepoints that are not either letters or numbers (i.e. |
| 18817 ** codepoints for which this function should return 0). |
| 18818 ** |
| 18819 ** The most significant 22 bits in each 32-bit value contain the first |
| 18820 ** codepoint in the range. The least significant 10 bits are used to store |
| 18821 ** the size of the range (always at least 1). In other words, the value |
| 18822 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
| 18823 ** C. It is not possible to represent a range larger than 1023 codepoints |
| 18824 ** using this format. |
| 18825 */ |
| 18826 static const unsigned int aEntry[] = { |
| 18827 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, |
| 18828 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, |
| 18829 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, |
| 18830 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, |
| 18831 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, |
| 18832 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, |
| 18833 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, |
| 18834 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, |
| 18835 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, |
| 18836 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, |
| 18837 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, |
| 18838 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, |
| 18839 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, |
| 18840 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, |
| 18841 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, |
| 18842 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, |
| 18843 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, |
| 18844 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, |
| 18845 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, |
| 18846 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, |
| 18847 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, |
| 18848 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, |
| 18849 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, |
| 18850 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, |
| 18851 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, |
| 18852 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, |
| 18853 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, |
| 18854 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, |
| 18855 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, |
| 18856 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, |
| 18857 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, |
| 18858 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, |
| 18859 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, |
| 18860 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, |
| 18861 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, |
| 18862 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, |
| 18863 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, |
| 18864 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, |
| 18865 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, |
| 18866 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, |
| 18867 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, |
| 18868 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, |
| 18869 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, |
| 18870 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, |
| 18871 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, |
| 18872 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, |
| 18873 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, |
| 18874 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, |
| 18875 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, |
| 18876 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, |
| 18877 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, |
| 18878 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, |
| 18879 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, |
| 18880 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, |
| 18881 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, |
| 18882 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, |
| 18883 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, |
| 18884 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, |
| 18885 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, |
| 18886 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, |
| 18887 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, |
| 18888 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, |
| 18889 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, |
| 18890 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, |
| 18891 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, |
| 18892 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, |
| 18893 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, |
| 18894 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, |
| 18895 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, |
| 18896 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, |
| 18897 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, |
| 18898 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, |
| 18899 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, |
| 18900 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, |
| 18901 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, |
| 18902 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, |
| 18903 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, |
| 18904 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, |
| 18905 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, |
| 18906 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, |
| 18907 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, |
| 18908 0x380400F0, |
| 18909 }; |
| 18910 static const unsigned int aAscii[4] = { |
| 18911 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, |
| 18912 }; |
| 18913 |
| 18914 if( c<128 ){ |
| 18915 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); |
| 18916 }else if( c<(1<<22) ){ |
| 18917 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; |
| 18918 int iRes = 0; |
| 18919 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 18920 int iLo = 0; |
| 18921 while( iHi>=iLo ){ |
| 18922 int iTest = (iHi + iLo) / 2; |
| 18923 if( key >= aEntry[iTest] ){ |
| 18924 iRes = iTest; |
| 18925 iLo = iTest+1; |
| 18926 }else{ |
| 18927 iHi = iTest-1; |
| 18928 } |
| 18929 } |
| 18930 assert( aEntry[0]<key ); |
| 18931 assert( key>=aEntry[iRes] ); |
| 18932 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); |
| 18933 } |
| 18934 return 1; |
| 18935 } |
| 18936 |
| 18937 |
| 18938 /* |
| 18939 ** If the argument is a codepoint corresponding to a lowercase letter |
| 18940 ** in the ASCII range with a diacritic added, return the codepoint |
| 18941 ** of the ASCII letter only. For example, if passed 235 - "LATIN |
| 18942 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
| 18943 ** E"). The resuls of passing a codepoint that corresponds to an |
| 18944 ** uppercase letter are undefined. |
| 18945 */ |
| 18946 static int remove_diacritic(int c){ |
| 18947 unsigned short aDia[] = { |
| 18948 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
| 18949 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
| 18950 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
| 18951 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
| 18952 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
| 18953 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
| 18954 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
| 18955 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
| 18956 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
| 18957 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, |
| 18958 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, |
| 18959 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, |
| 18960 62924, 63050, 63082, 63274, 63390, |
| 18961 }; |
| 18962 char aChar[] = { |
| 18963 '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', |
| 18964 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', |
| 18965 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', |
| 18966 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', |
| 18967 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', |
| 18968 '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', |
| 18969 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', |
| 18970 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', |
| 18971 'e', 'i', 'o', 'u', 'y', |
| 18972 }; |
| 18973 |
| 18974 unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
| 18975 int iRes = 0; |
| 18976 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
| 18977 int iLo = 0; |
| 18978 while( iHi>=iLo ){ |
| 18979 int iTest = (iHi + iLo) / 2; |
| 18980 if( key >= aDia[iTest] ){ |
| 18981 iRes = iTest; |
| 18982 iLo = iTest+1; |
| 18983 }else{ |
| 18984 iHi = iTest-1; |
| 18985 } |
| 18986 } |
| 18987 assert( key>=aDia[iRes] ); |
| 18988 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); |
| 18989 } |
| 18990 |
| 18991 |
| 18992 /* |
| 18993 ** Return true if the argument interpreted as a unicode codepoint |
| 18994 ** is a diacritical modifier character. |
| 18995 */ |
| 18996 SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ |
| 18997 unsigned int mask0 = 0x08029FDF; |
| 18998 unsigned int mask1 = 0x000361F8; |
| 18999 if( c<768 || c>817 ) return 0; |
| 19000 return (c < 768+32) ? |
| 19001 (mask0 & (1 << (c-768))) : |
| 19002 (mask1 & (1 << (c-768-32))); |
| 19003 } |
| 19004 |
| 19005 |
| 19006 /* |
| 19007 ** Interpret the argument as a unicode codepoint. If the codepoint |
| 19008 ** is an upper case character that has a lower case equivalent, |
| 19009 ** return the codepoint corresponding to the lower case version. |
| 19010 ** Otherwise, return a copy of the argument. |
| 19011 ** |
| 19012 ** The results are undefined if the value passed to this function |
| 19013 ** is less than zero. |
| 19014 */ |
| 19015 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ |
| 19016 /* Each entry in the following array defines a rule for folding a range |
| 19017 ** of codepoints to lower case. The rule applies to a range of nRange |
| 19018 ** codepoints starting at codepoint iCode. |
| 19019 ** |
| 19020 ** If the least significant bit in flags is clear, then the rule applies |
| 19021 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
| 19022 ** need to be folded). Or, if it is set, then the rule only applies to |
| 19023 ** every second codepoint in the range, starting with codepoint C. |
| 19024 ** |
| 19025 ** The 7 most significant bits in flags are an index into the aiOff[] |
| 19026 ** array. If a specific codepoint C does require folding, then its lower |
| 19027 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). |
| 19028 ** |
| 19029 ** The contents of this array are generated by parsing the CaseFolding.txt |
| 19030 ** file distributed as part of the "Unicode Character Database". See |
| 19031 ** http://www.unicode.org for details. |
| 19032 */ |
| 19033 static const struct TableEntry { |
| 19034 unsigned short iCode; |
| 19035 unsigned char flags; |
| 19036 unsigned char nRange; |
| 19037 } aEntry[] = { |
| 19038 {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, |
| 19039 {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, |
| 19040 {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, |
| 19041 {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, |
| 19042 {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, |
| 19043 {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, |
| 19044 {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, |
| 19045 {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, |
| 19046 {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, |
| 19047 {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, |
| 19048 {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, |
| 19049 {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, |
| 19050 {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, |
| 19051 {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, |
| 19052 {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, |
| 19053 {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, |
| 19054 {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, |
| 19055 {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, |
| 19056 {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, |
| 19057 {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, |
| 19058 {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, |
| 19059 {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, |
| 19060 {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, |
| 19061 {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, |
| 19062 {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, |
| 19063 {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, |
| 19064 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, |
| 19065 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, |
| 19066 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, |
| 19067 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, |
| 19068 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, |
| 19069 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, |
| 19070 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, |
| 19071 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, |
| 19072 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, |
| 19073 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, |
| 19074 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, |
| 19075 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, |
| 19076 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, |
| 19077 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, |
| 19078 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, |
| 19079 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, |
| 19080 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, |
| 19081 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, |
| 19082 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, |
| 19083 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, |
| 19084 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, |
| 19085 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, |
| 19086 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, |
| 19087 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
| 19088 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
| 19089 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
| 19090 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
| 19091 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
| 19092 {65313, 14, 26}, |
| 19093 }; |
| 19094 static const unsigned short aiOff[] = { |
| 19095 1, 2, 8, 15, 16, 26, 28, 32, |
| 19096 37, 38, 40, 48, 63, 64, 69, 71, |
| 19097 79, 80, 116, 202, 203, 205, 206, 207, |
| 19098 209, 210, 211, 213, 214, 217, 218, 219, |
| 19099 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
| 19100 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
| 19101 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
| 19102 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
| 19103 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
| 19104 65514, 65521, 65527, 65528, 65529, |
| 19105 }; |
| 19106 |
| 19107 int ret = c; |
| 19108 |
| 19109 assert( c>=0 ); |
| 19110 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
| 19111 |
| 19112 if( c<128 ){ |
| 19113 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
| 19114 }else if( c<65536 ){ |
| 19115 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
| 19116 int iLo = 0; |
| 19117 int iRes = -1; |
| 19118 |
| 19119 while( iHi>=iLo ){ |
| 19120 int iTest = (iHi + iLo) / 2; |
| 19121 int cmp = (c - aEntry[iTest].iCode); |
| 19122 if( cmp>=0 ){ |
| 19123 iRes = iTest; |
| 19124 iLo = iTest+1; |
| 19125 }else{ |
| 19126 iHi = iTest-1; |
| 19127 } |
| 19128 } |
| 19129 assert( iRes<0 || c>=aEntry[iRes].iCode ); |
| 19130 |
| 19131 if( iRes>=0 ){ |
| 19132 const struct TableEntry *p = &aEntry[iRes]; |
| 19133 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
| 19134 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
| 19135 assert( ret>0 ); |
| 19136 } |
| 19137 } |
| 19138 |
| 19139 if( bRemoveDiacritic ) ret = remove_diacritic(ret); |
| 19140 } |
| 19141 |
| 19142 else if( c>=66560 && c<66600 ){ |
| 19143 ret = c + 40; |
| 19144 } |
| 19145 |
| 19146 return ret; |
| 19147 } |
| 19148 #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ |
| 19149 #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ |
| 19150 |
| 19151 /************** End of fts3_unicode2.c ***************************************/ |
| 19152 /************** Begin file rtree.c *******************************************/ |
| 19153 /* |
| 19154 ** 2001 September 15 |
| 19155 ** |
| 19156 ** The author disclaims copyright to this source code. In place of |
| 19157 ** a legal notice, here is a blessing: |
| 19158 ** |
| 19159 ** May you do good and not evil. |
| 19160 ** May you find forgiveness for yourself and forgive others. |
| 19161 ** May you share freely, never taking more than you give. |
| 19162 ** |
| 19163 ************************************************************************* |
| 19164 ** This file contains code for implementations of the r-tree and r*-tree |
| 19165 ** algorithms packaged as an SQLite virtual table module. |
| 19166 */ |
| 19167 |
| 19168 /* |
| 19169 ** Database Format of R-Tree Tables |
| 19170 ** -------------------------------- |
| 19171 ** |
| 19172 ** The data structure for a single virtual r-tree table is stored in three |
| 19173 ** native SQLite tables declared as follows. In each case, the '%' character |
| 19174 ** in the table name is replaced with the user-supplied name of the r-tree |
| 19175 ** table. |
| 19176 ** |
| 19177 ** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) |
| 19178 ** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) |
| 19179 ** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) |
| 19180 ** |
| 19181 ** The data for each node of the r-tree structure is stored in the %_node |
| 19182 ** table. For each node that is not the root node of the r-tree, there is |
| 19183 ** an entry in the %_parent table associating the node with its parent. |
| 19184 ** And for each row of data in the table, there is an entry in the %_rowid |
| 19185 ** table that maps from the entries rowid to the id of the node that it |
| 19186 ** is stored on. |
| 19187 ** |
| 19188 ** The root node of an r-tree always exists, even if the r-tree table is |
| 19189 ** empty. The nodeno of the root node is always 1. All other nodes in the |
| 19190 ** table must be the same size as the root node. The content of each node |
| 19191 ** is formatted as follows: |
| 19192 ** |
| 19193 ** 1. If the node is the root node (node 1), then the first 2 bytes |
| 19194 ** of the node contain the tree depth as a big-endian integer. |
| 19195 ** For non-root nodes, the first 2 bytes are left unused. |
| 19196 ** |
| 19197 ** 2. The next 2 bytes contain the number of entries currently |
| 19198 ** stored in the node. |
| 19199 ** |
| 19200 ** 3. The remainder of the node contains the node entries. Each entry |
| 19201 ** consists of a single 8-byte integer followed by an even number |
| 19202 ** of 4-byte coordinates. For leaf nodes the integer is the rowid |
| 19203 ** of a record. For internal nodes it is the node number of a |
| 19204 ** child page. |
| 19205 */ |
| 19206 |
| 19207 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) |
| 19208 |
| 19209 #ifndef SQLITE_CORE |
| 19210 /* #include "sqlite3ext.h" */ |
| 19211 SQLITE_EXTENSION_INIT1 |
| 19212 #else |
| 19213 /* #include "sqlite3.h" */ |
| 19214 #endif |
| 19215 |
| 19216 /* #include <string.h> */ |
| 19217 /* #include <assert.h> */ |
| 19218 /* #include <stdio.h> */ |
| 19219 |
| 19220 #ifndef SQLITE_AMALGAMATION |
| 19221 #include "sqlite3rtree.h" |
| 19222 typedef sqlite3_int64 i64; |
| 19223 typedef sqlite3_uint64 u64; |
| 19224 typedef unsigned char u8; |
| 19225 typedef unsigned short u16; |
| 19226 typedef unsigned int u32; |
| 19227 #endif |
| 19228 |
| 19229 /* The following macro is used to suppress compiler warnings. |
| 19230 */ |
| 19231 #ifndef UNUSED_PARAMETER |
| 19232 # define UNUSED_PARAMETER(x) (void)(x) |
| 19233 #endif |
| 19234 |
| 19235 typedef struct Rtree Rtree; |
| 19236 typedef struct RtreeCursor RtreeCursor; |
| 19237 typedef struct RtreeNode RtreeNode; |
| 19238 typedef struct RtreeCell RtreeCell; |
| 19239 typedef struct RtreeConstraint RtreeConstraint; |
| 19240 typedef struct RtreeMatchArg RtreeMatchArg; |
| 19241 typedef struct RtreeGeomCallback RtreeGeomCallback; |
| 19242 typedef union RtreeCoord RtreeCoord; |
| 19243 typedef struct RtreeSearchPoint RtreeSearchPoint; |
| 19244 |
| 19245 /* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ |
| 19246 #define RTREE_MAX_DIMENSIONS 5 |
| 19247 |
| 19248 /* Size of hash table Rtree.aHash. This hash table is not expected to |
| 19249 ** ever contain very many entries, so a fixed number of buckets is |
| 19250 ** used. |
| 19251 */ |
| 19252 #define HASHSIZE 97 |
| 19253 |
| 19254 /* The xBestIndex method of this virtual table requires an estimate of |
| 19255 ** the number of rows in the virtual table to calculate the costs of |
| 19256 ** various strategies. If possible, this estimate is loaded from the |
| 19257 ** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). |
| 19258 ** Otherwise, if no sqlite_stat1 entry is available, use |
| 19259 ** RTREE_DEFAULT_ROWEST. |
| 19260 */ |
| 19261 #define RTREE_DEFAULT_ROWEST 1048576 |
| 19262 #define RTREE_MIN_ROWEST 100 |
| 19263 |
| 19264 /* |
| 19265 ** An rtree virtual-table object. |
| 19266 */ |
| 19267 struct Rtree { |
| 19268 sqlite3_vtab base; /* Base class. Must be first */ |
| 19269 sqlite3 *db; /* Host database connection */ |
| 19270 int iNodeSize; /* Size in bytes of each node in the node table */ |
| 19271 u8 nDim; /* Number of dimensions */ |
| 19272 u8 nDim2; /* Twice the number of dimensions */ |
| 19273 u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ |
| 19274 u8 nBytesPerCell; /* Bytes consumed per cell */ |
| 19275 u8 inWrTrans; /* True if inside write transaction */ |
| 19276 int iDepth; /* Current depth of the r-tree structure */ |
| 19277 char *zDb; /* Name of database containing r-tree table */ |
| 19278 char *zName; /* Name of r-tree table */ |
| 19279 u32 nBusy; /* Current number of users of this structure */ |
| 19280 i64 nRowEst; /* Estimated number of rows in this table */ |
| 19281 u32 nCursor; /* Number of open cursors */ |
| 19282 |
| 19283 /* List of nodes removed during a CondenseTree operation. List is |
| 19284 ** linked together via the pointer normally used for hash chains - |
| 19285 ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree |
| 19286 ** headed by the node (leaf nodes have RtreeNode.iNode==0). |
| 19287 */ |
| 19288 RtreeNode *pDeleted; |
| 19289 int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ |
| 19290 |
| 19291 /* Blob I/O on xxx_node */ |
| 19292 sqlite3_blob *pNodeBlob; |
| 19293 |
| 19294 /* Statements to read/write/delete a record from xxx_node */ |
| 19295 sqlite3_stmt *pWriteNode; |
| 19296 sqlite3_stmt *pDeleteNode; |
| 19297 |
| 19298 /* Statements to read/write/delete a record from xxx_rowid */ |
| 19299 sqlite3_stmt *pReadRowid; |
| 19300 sqlite3_stmt *pWriteRowid; |
| 19301 sqlite3_stmt *pDeleteRowid; |
| 19302 |
| 19303 /* Statements to read/write/delete a record from xxx_parent */ |
| 19304 sqlite3_stmt *pReadParent; |
| 19305 sqlite3_stmt *pWriteParent; |
| 19306 sqlite3_stmt *pDeleteParent; |
| 19307 |
| 19308 RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ |
| 19309 }; |
| 19310 |
| 19311 /* Possible values for Rtree.eCoordType: */ |
| 19312 #define RTREE_COORD_REAL32 0 |
| 19313 #define RTREE_COORD_INT32 1 |
| 19314 |
| 19315 /* |
| 19316 ** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will |
| 19317 ** only deal with integer coordinates. No floating point operations |
| 19318 ** will be done. |
| 19319 */ |
| 19320 #ifdef SQLITE_RTREE_INT_ONLY |
| 19321 typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ |
| 19322 typedef int RtreeValue; /* Low accuracy coordinate */ |
| 19323 # define RTREE_ZERO 0 |
| 19324 #else |
| 19325 typedef double RtreeDValue; /* High accuracy coordinate */ |
| 19326 typedef float RtreeValue; /* Low accuracy coordinate */ |
| 19327 # define RTREE_ZERO 0.0 |
| 19328 #endif |
| 19329 |
| 19330 /* |
| 19331 ** When doing a search of an r-tree, instances of the following structure |
| 19332 ** record intermediate results from the tree walk. |
| 19333 ** |
| 19334 ** The id is always a node-id. For iLevel>=1 the id is the node-id of |
| 19335 ** the node that the RtreeSearchPoint represents. When iLevel==0, however, |
| 19336 ** the id is of the parent node and the cell that RtreeSearchPoint |
| 19337 ** represents is the iCell-th entry in the parent node. |
| 19338 */ |
| 19339 struct RtreeSearchPoint { |
| 19340 RtreeDValue rScore; /* The score for this node. Smallest goes first. */ |
| 19341 sqlite3_int64 id; /* Node ID */ |
| 19342 u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ |
| 19343 u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ |
| 19344 u8 iCell; /* Cell index within the node */ |
| 19345 }; |
| 19346 |
| 19347 /* |
| 19348 ** The minimum number of cells allowed for a node is a third of the |
| 19349 ** maximum. In Gutman's notation: |
| 19350 ** |
| 19351 ** m = M/3 |
| 19352 ** |
| 19353 ** If an R*-tree "Reinsert" operation is required, the same number of |
| 19354 ** cells are removed from the overfull node and reinserted into the tree. |
| 19355 */ |
| 19356 #define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) |
| 19357 #define RTREE_REINSERT(p) RTREE_MINCELLS(p) |
| 19358 #define RTREE_MAXCELLS 51 |
| 19359 |
| 19360 /* |
| 19361 ** The smallest possible node-size is (512-64)==448 bytes. And the largest |
| 19362 ** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). |
| 19363 ** Therefore all non-root nodes must contain at least 3 entries. Since |
| 19364 ** 2^40 is greater than 2^64, an r-tree structure always has a depth of |
| 19365 ** 40 or less. |
| 19366 */ |
| 19367 #define RTREE_MAX_DEPTH 40 |
| 19368 |
| 19369 |
| 19370 /* |
| 19371 ** Number of entries in the cursor RtreeNode cache. The first entry is |
| 19372 ** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining |
| 19373 ** entries cache the RtreeNode for the first elements of the priority queue. |
| 19374 */ |
| 19375 #define RTREE_CACHE_SZ 5 |
| 19376 |
| 19377 /* |
| 19378 ** An rtree cursor object. |
| 19379 */ |
| 19380 struct RtreeCursor { |
| 19381 sqlite3_vtab_cursor base; /* Base class. Must be first */ |
| 19382 u8 atEOF; /* True if at end of search */ |
| 19383 u8 bPoint; /* True if sPoint is valid */ |
| 19384 int iStrategy; /* Copy of idxNum search parameter */ |
| 19385 int nConstraint; /* Number of entries in aConstraint */ |
| 19386 RtreeConstraint *aConstraint; /* Search constraints. */ |
| 19387 int nPointAlloc; /* Number of slots allocated for aPoint[] */ |
| 19388 int nPoint; /* Number of slots used in aPoint[] */ |
| 19389 int mxLevel; /* iLevel value for root of the tree */ |
| 19390 RtreeSearchPoint *aPoint; /* Priority queue for search points */ |
| 19391 RtreeSearchPoint sPoint; /* Cached next search point */ |
| 19392 RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ |
| 19393 u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ |
| 19394 }; |
| 19395 |
| 19396 /* Return the Rtree of a RtreeCursor */ |
| 19397 #define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) |
| 19398 |
| 19399 /* |
| 19400 ** A coordinate can be either a floating point number or a integer. All |
| 19401 ** coordinates within a single R-Tree are always of the same time. |
| 19402 */ |
| 19403 union RtreeCoord { |
| 19404 RtreeValue f; /* Floating point value */ |
| 19405 int i; /* Integer value */ |
| 19406 u32 u; /* Unsigned for byte-order conversions */ |
| 19407 }; |
| 19408 |
| 19409 /* |
| 19410 ** The argument is an RtreeCoord. Return the value stored within the RtreeCoord |
| 19411 ** formatted as a RtreeDValue (double or int64). This macro assumes that local |
| 19412 ** variable pRtree points to the Rtree structure associated with the |
| 19413 ** RtreeCoord. |
| 19414 */ |
| 19415 #ifdef SQLITE_RTREE_INT_ONLY |
| 19416 # define DCOORD(coord) ((RtreeDValue)coord.i) |
| 19417 #else |
| 19418 # define DCOORD(coord) ( \ |
| 19419 (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ |
| 19420 ((double)coord.f) : \ |
| 19421 ((double)coord.i) \ |
| 19422 ) |
| 19423 #endif |
| 19424 |
| 19425 /* |
| 19426 ** A search constraint. |
| 19427 */ |
| 19428 struct RtreeConstraint { |
| 19429 int iCoord; /* Index of constrained coordinate */ |
| 19430 int op; /* Constraining operation */ |
| 19431 union { |
| 19432 RtreeDValue rValue; /* Constraint value. */ |
| 19433 int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); |
| 19434 int (*xQueryFunc)(sqlite3_rtree_query_info*); |
| 19435 } u; |
| 19436 sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ |
| 19437 }; |
| 19438 |
| 19439 /* Possible values for RtreeConstraint.op */ |
| 19440 #define RTREE_EQ 0x41 /* A */ |
| 19441 #define RTREE_LE 0x42 /* B */ |
| 19442 #define RTREE_LT 0x43 /* C */ |
| 19443 #define RTREE_GE 0x44 /* D */ |
| 19444 #define RTREE_GT 0x45 /* E */ |
| 19445 #define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ |
| 19446 #define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ |
| 19447 |
| 19448 |
| 19449 /* |
| 19450 ** An rtree structure node. |
| 19451 */ |
| 19452 struct RtreeNode { |
| 19453 RtreeNode *pParent; /* Parent node */ |
| 19454 i64 iNode; /* The node number */ |
| 19455 int nRef; /* Number of references to this node */ |
| 19456 int isDirty; /* True if the node needs to be written to disk */ |
| 19457 u8 *zData; /* Content of the node, as should be on disk */ |
| 19458 RtreeNode *pNext; /* Next node in this hash collision chain */ |
| 19459 }; |
| 19460 |
| 19461 /* Return the number of cells in a node */ |
| 19462 #define NCELL(pNode) readInt16(&(pNode)->zData[2]) |
| 19463 |
| 19464 /* |
| 19465 ** A single cell from a node, deserialized |
| 19466 */ |
| 19467 struct RtreeCell { |
| 19468 i64 iRowid; /* Node or entry ID */ |
| 19469 RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ |
| 19470 }; |
| 19471 |
| 19472 |
| 19473 /* |
| 19474 ** This object becomes the sqlite3_user_data() for the SQL functions |
| 19475 ** that are created by sqlite3_rtree_geometry_callback() and |
| 19476 ** sqlite3_rtree_query_callback() and which appear on the right of MATCH |
| 19477 ** operators in order to constrain a search. |
| 19478 ** |
| 19479 ** xGeom and xQueryFunc are the callback functions. Exactly one of |
| 19480 ** xGeom and xQueryFunc fields is non-NULL, depending on whether the |
| 19481 ** SQL function was created using sqlite3_rtree_geometry_callback() or |
| 19482 ** sqlite3_rtree_query_callback(). |
| 19483 ** |
| 19484 ** This object is deleted automatically by the destructor mechanism in |
| 19485 ** sqlite3_create_function_v2(). |
| 19486 */ |
| 19487 struct RtreeGeomCallback { |
| 19488 int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); |
| 19489 int (*xQueryFunc)(sqlite3_rtree_query_info*); |
| 19490 void (*xDestructor)(void*); |
| 19491 void *pContext; |
| 19492 }; |
| 19493 |
| 19494 |
| 19495 /* |
| 19496 ** Value for the first field of every RtreeMatchArg object. The MATCH |
| 19497 ** operator tests that the first field of a blob operand matches this |
| 19498 ** value to avoid operating on invalid blobs (which could cause a segfault). |
| 19499 */ |
| 19500 #define RTREE_GEOMETRY_MAGIC 0x891245AB |
| 19501 |
| 19502 /* |
| 19503 ** An instance of this structure (in the form of a BLOB) is returned by |
| 19504 ** the SQL functions that sqlite3_rtree_geometry_callback() and |
| 19505 ** sqlite3_rtree_query_callback() create, and is read as the right-hand |
| 19506 ** operand to the MATCH operator of an R-Tree. |
| 19507 */ |
| 19508 struct RtreeMatchArg { |
| 19509 u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ |
| 19510 RtreeGeomCallback cb; /* Info about the callback functions */ |
| 19511 int nParam; /* Number of parameters to the SQL function */ |
| 19512 sqlite3_value **apSqlParam; /* Original SQL parameter values */ |
| 19513 RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ |
| 19514 }; |
| 19515 |
| 19516 #ifndef MAX |
| 19517 # define MAX(x,y) ((x) < (y) ? (y) : (x)) |
| 19518 #endif |
| 19519 #ifndef MIN |
| 19520 # define MIN(x,y) ((x) > (y) ? (y) : (x)) |
| 19521 #endif |
| 19522 |
| 19523 /* What version of GCC is being used. 0 means GCC is not being used */ |
| 19524 #ifndef GCC_VERSION |
| 19525 #if defined(__GNUC__) && !defined(SQLITE_DISABLE_INTRINSIC) |
| 19526 # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) |
| 19527 #else |
| 19528 # define GCC_VERSION 0 |
| 19529 #endif |
| 19530 #endif |
| 19531 |
| 19532 /* What version of CLANG is being used. 0 means CLANG is not being used */ |
| 19533 #ifndef CLANG_VERSION |
| 19534 #if defined(__clang__) && !defined(_WIN32) && !defined(SQLITE_DISABLE_INTRINSIC) |
| 19535 # define CLANG_VERSION \ |
| 19536 (__clang_major__*1000000+__clang_minor__*1000+__clang_patchlevel__) |
| 19537 #else |
| 19538 # define CLANG_VERSION 0 |
| 19539 #endif |
| 19540 #endif |
| 19541 |
| 19542 /* The testcase() macro should already be defined in the amalgamation. If |
| 19543 ** it is not, make it a no-op. |
| 19544 */ |
| 19545 #ifndef SQLITE_AMALGAMATION |
| 19546 # define testcase(X) |
| 19547 #endif |
| 19548 |
| 19549 /* |
| 19550 ** Macros to determine whether the machine is big or little endian, |
| 19551 ** and whether or not that determination is run-time or compile-time. |
| 19552 ** |
| 19553 ** For best performance, an attempt is made to guess at the byte-order |
| 19554 ** using C-preprocessor macros. If that is unsuccessful, or if |
| 19555 ** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined |
| 19556 ** at run-time. |
| 19557 */ |
| 19558 #ifndef SQLITE_BYTEORDER |
| 19559 #if defined(i386) || defined(__i386__) || defined(_M_IX86) || \ |
| 19560 defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ |
| 19561 defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ |
| 19562 defined(__arm__) |
| 19563 # define SQLITE_BYTEORDER 1234 |
| 19564 #elif defined(sparc) || defined(__ppc__) |
| 19565 # define SQLITE_BYTEORDER 4321 |
| 19566 #else |
| 19567 # define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ |
| 19568 #endif |
| 19569 #endif |
| 19570 |
| 19571 |
| 19572 /* What version of MSVC is being used. 0 means MSVC is not being used */ |
| 19573 #ifndef MSVC_VERSION |
| 19574 #if defined(_MSC_VER) && !defined(SQLITE_DISABLE_INTRINSIC) |
| 19575 # define MSVC_VERSION _MSC_VER |
| 19576 #else |
| 19577 # define MSVC_VERSION 0 |
| 19578 #endif |
| 19579 #endif |
| 19580 |
| 19581 /* |
| 19582 ** Functions to deserialize a 16 bit integer, 32 bit real number and |
| 19583 ** 64 bit integer. The deserialized value is returned. |
| 19584 */ |
| 19585 static int readInt16(u8 *p){ |
| 19586 return (p[0]<<8) + p[1]; |
| 19587 } |
| 19588 static void readCoord(u8 *p, RtreeCoord *pCoord){ |
| 19589 assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ |
| 19590 #if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 |
| 19591 pCoord->u = _byteswap_ulong(*(u32*)p); |
| 19592 #elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) |
| 19593 pCoord->u = __builtin_bswap32(*(u32*)p); |
| 19594 #elif SQLITE_BYTEORDER==4321 |
| 19595 pCoord->u = *(u32*)p; |
| 19596 #else |
| 19597 pCoord->u = ( |
| 19598 (((u32)p[0]) << 24) + |
| 19599 (((u32)p[1]) << 16) + |
| 19600 (((u32)p[2]) << 8) + |
| 19601 (((u32)p[3]) << 0) |
| 19602 ); |
| 19603 #endif |
| 19604 } |
| 19605 static i64 readInt64(u8 *p){ |
| 19606 #if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 |
| 19607 u64 x; |
| 19608 memcpy(&x, p, 8); |
| 19609 return (i64)_byteswap_uint64(x); |
| 19610 #elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) |
| 19611 u64 x; |
| 19612 memcpy(&x, p, 8); |
| 19613 return (i64)__builtin_bswap64(x); |
| 19614 #elif SQLITE_BYTEORDER==4321 |
| 19615 i64 x; |
| 19616 memcpy(&x, p, 8); |
| 19617 return x; |
| 19618 #else |
| 19619 return ( |
| 19620 (((i64)p[0]) << 56) + |
| 19621 (((i64)p[1]) << 48) + |
| 19622 (((i64)p[2]) << 40) + |
| 19623 (((i64)p[3]) << 32) + |
| 19624 (((i64)p[4]) << 24) + |
| 19625 (((i64)p[5]) << 16) + |
| 19626 (((i64)p[6]) << 8) + |
| 19627 (((i64)p[7]) << 0) |
| 19628 ); |
| 19629 #endif |
| 19630 } |
| 19631 |
| 19632 /* |
| 19633 ** Functions to serialize a 16 bit integer, 32 bit real number and |
| 19634 ** 64 bit integer. The value returned is the number of bytes written |
| 19635 ** to the argument buffer (always 2, 4 and 8 respectively). |
| 19636 */ |
| 19637 static void writeInt16(u8 *p, int i){ |
| 19638 p[0] = (i>> 8)&0xFF; |
| 19639 p[1] = (i>> 0)&0xFF; |
| 19640 } |
| 19641 static int writeCoord(u8 *p, RtreeCoord *pCoord){ |
| 19642 u32 i; |
| 19643 assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ |
| 19644 assert( sizeof(RtreeCoord)==4 ); |
| 19645 assert( sizeof(u32)==4 ); |
| 19646 #if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) |
| 19647 i = __builtin_bswap32(pCoord->u); |
| 19648 memcpy(p, &i, 4); |
| 19649 #elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 |
| 19650 i = _byteswap_ulong(pCoord->u); |
| 19651 memcpy(p, &i, 4); |
| 19652 #elif SQLITE_BYTEORDER==4321 |
| 19653 i = pCoord->u; |
| 19654 memcpy(p, &i, 4); |
| 19655 #else |
| 19656 i = pCoord->u; |
| 19657 p[0] = (i>>24)&0xFF; |
| 19658 p[1] = (i>>16)&0xFF; |
| 19659 p[2] = (i>> 8)&0xFF; |
| 19660 p[3] = (i>> 0)&0xFF; |
| 19661 #endif |
| 19662 return 4; |
| 19663 } |
| 19664 static int writeInt64(u8 *p, i64 i){ |
| 19665 #if SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) |
| 19666 i = (i64)__builtin_bswap64((u64)i); |
| 19667 memcpy(p, &i, 8); |
| 19668 #elif SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 |
| 19669 i = (i64)_byteswap_uint64((u64)i); |
| 19670 memcpy(p, &i, 8); |
| 19671 #elif SQLITE_BYTEORDER==4321 |
| 19672 memcpy(p, &i, 8); |
| 19673 #else |
| 19674 p[0] = (i>>56)&0xFF; |
| 19675 p[1] = (i>>48)&0xFF; |
| 19676 p[2] = (i>>40)&0xFF; |
| 19677 p[3] = (i>>32)&0xFF; |
| 19678 p[4] = (i>>24)&0xFF; |
| 19679 p[5] = (i>>16)&0xFF; |
| 19680 p[6] = (i>> 8)&0xFF; |
| 19681 p[7] = (i>> 0)&0xFF; |
| 19682 #endif |
| 19683 return 8; |
| 19684 } |
| 19685 |
| 19686 /* |
| 19687 ** Increment the reference count of node p. |
| 19688 */ |
| 19689 static void nodeReference(RtreeNode *p){ |
| 19690 if( p ){ |
| 19691 p->nRef++; |
| 19692 } |
| 19693 } |
| 19694 |
| 19695 /* |
| 19696 ** Clear the content of node p (set all bytes to 0x00). |
| 19697 */ |
| 19698 static void nodeZero(Rtree *pRtree, RtreeNode *p){ |
| 19699 memset(&p->zData[2], 0, pRtree->iNodeSize-2); |
| 19700 p->isDirty = 1; |
| 19701 } |
| 19702 |
| 19703 /* |
| 19704 ** Given a node number iNode, return the corresponding key to use |
| 19705 ** in the Rtree.aHash table. |
| 19706 */ |
| 19707 static int nodeHash(i64 iNode){ |
| 19708 return iNode % HASHSIZE; |
| 19709 } |
| 19710 |
| 19711 /* |
| 19712 ** Search the node hash table for node iNode. If found, return a pointer |
| 19713 ** to it. Otherwise, return 0. |
| 19714 */ |
| 19715 static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ |
| 19716 RtreeNode *p; |
| 19717 for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); |
| 19718 return p; |
| 19719 } |
| 19720 |
| 19721 /* |
| 19722 ** Add node pNode to the node hash table. |
| 19723 */ |
| 19724 static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ |
| 19725 int iHash; |
| 19726 assert( pNode->pNext==0 ); |
| 19727 iHash = nodeHash(pNode->iNode); |
| 19728 pNode->pNext = pRtree->aHash[iHash]; |
| 19729 pRtree->aHash[iHash] = pNode; |
| 19730 } |
| 19731 |
| 19732 /* |
| 19733 ** Remove node pNode from the node hash table. |
| 19734 */ |
| 19735 static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ |
| 19736 RtreeNode **pp; |
| 19737 if( pNode->iNode!=0 ){ |
| 19738 pp = &pRtree->aHash[nodeHash(pNode->iNode)]; |
| 19739 for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } |
| 19740 *pp = pNode->pNext; |
| 19741 pNode->pNext = 0; |
| 19742 } |
| 19743 } |
| 19744 |
| 19745 /* |
| 19746 ** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), |
| 19747 ** indicating that node has not yet been assigned a node number. It is |
| 19748 ** assigned a node number when nodeWrite() is called to write the |
| 19749 ** node contents out to the database. |
| 19750 */ |
| 19751 static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ |
| 19752 RtreeNode *pNode; |
| 19753 pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); |
| 19754 if( pNode ){ |
| 19755 memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); |
| 19756 pNode->zData = (u8 *)&pNode[1]; |
| 19757 pNode->nRef = 1; |
| 19758 pNode->pParent = pParent; |
| 19759 pNode->isDirty = 1; |
| 19760 nodeReference(pParent); |
| 19761 } |
| 19762 return pNode; |
| 19763 } |
| 19764 |
| 19765 /* |
| 19766 ** Clear the Rtree.pNodeBlob object |
| 19767 */ |
| 19768 static void nodeBlobReset(Rtree *pRtree){ |
| 19769 if( pRtree->pNodeBlob && pRtree->inWrTrans==0 && pRtree->nCursor==0 ){ |
| 19770 sqlite3_blob *pBlob = pRtree->pNodeBlob; |
| 19771 pRtree->pNodeBlob = 0; |
| 19772 sqlite3_blob_close(pBlob); |
| 19773 } |
| 19774 } |
| 19775 |
| 19776 /* |
| 19777 ** Obtain a reference to an r-tree node. |
| 19778 */ |
| 19779 static int nodeAcquire( |
| 19780 Rtree *pRtree, /* R-tree structure */ |
| 19781 i64 iNode, /* Node number to load */ |
| 19782 RtreeNode *pParent, /* Either the parent node or NULL */ |
| 19783 RtreeNode **ppNode /* OUT: Acquired node */ |
| 19784 ){ |
| 19785 int rc = SQLITE_OK; |
| 19786 RtreeNode *pNode = 0; |
| 19787 |
| 19788 /* Check if the requested node is already in the hash table. If so, |
| 19789 ** increase its reference count and return it. |
| 19790 */ |
| 19791 if( (pNode = nodeHashLookup(pRtree, iNode)) ){ |
| 19792 assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); |
| 19793 if( pParent && !pNode->pParent ){ |
| 19794 nodeReference(pParent); |
| 19795 pNode->pParent = pParent; |
| 19796 } |
| 19797 pNode->nRef++; |
| 19798 *ppNode = pNode; |
| 19799 return SQLITE_OK; |
| 19800 } |
| 19801 |
| 19802 if( pRtree->pNodeBlob ){ |
| 19803 sqlite3_blob *pBlob = pRtree->pNodeBlob; |
| 19804 pRtree->pNodeBlob = 0; |
| 19805 rc = sqlite3_blob_reopen(pBlob, iNode); |
| 19806 pRtree->pNodeBlob = pBlob; |
| 19807 if( rc ){ |
| 19808 nodeBlobReset(pRtree); |
| 19809 if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM; |
| 19810 } |
| 19811 } |
| 19812 if( pRtree->pNodeBlob==0 ){ |
| 19813 char *zTab = sqlite3_mprintf("%s_node", pRtree->zName); |
| 19814 if( zTab==0 ) return SQLITE_NOMEM; |
| 19815 rc = sqlite3_blob_open(pRtree->db, pRtree->zDb, zTab, "data", iNode, 0, |
| 19816 &pRtree->pNodeBlob); |
| 19817 sqlite3_free(zTab); |
| 19818 } |
| 19819 if( rc ){ |
| 19820 nodeBlobReset(pRtree); |
| 19821 *ppNode = 0; |
| 19822 /* If unable to open an sqlite3_blob on the desired row, that can only |
| 19823 ** be because the shadow tables hold erroneous data. */ |
| 19824 if( rc==SQLITE_ERROR ) rc = SQLITE_CORRUPT_VTAB; |
| 19825 }else if( pRtree->iNodeSize==sqlite3_blob_bytes(pRtree->pNodeBlob) ){ |
| 19826 pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); |
| 19827 if( !pNode ){ |
| 19828 rc = SQLITE_NOMEM; |
| 19829 }else{ |
| 19830 pNode->pParent = pParent; |
| 19831 pNode->zData = (u8 *)&pNode[1]; |
| 19832 pNode->nRef = 1; |
| 19833 pNode->iNode = iNode; |
| 19834 pNode->isDirty = 0; |
| 19835 pNode->pNext = 0; |
| 19836 rc = sqlite3_blob_read(pRtree->pNodeBlob, pNode->zData, |
| 19837 pRtree->iNodeSize, 0); |
| 19838 nodeReference(pParent); |
| 19839 } |
| 19840 } |
| 19841 |
| 19842 /* If the root node was just loaded, set pRtree->iDepth to the height |
| 19843 ** of the r-tree structure. A height of zero means all data is stored on |
| 19844 ** the root node. A height of one means the children of the root node |
| 19845 ** are the leaves, and so on. If the depth as specified on the root node |
| 19846 ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. |
| 19847 */ |
| 19848 if( pNode && iNode==1 ){ |
| 19849 pRtree->iDepth = readInt16(pNode->zData); |
| 19850 if( pRtree->iDepth>RTREE_MAX_DEPTH ){ |
| 19851 rc = SQLITE_CORRUPT_VTAB; |
| 19852 } |
| 19853 } |
| 19854 |
| 19855 /* If no error has occurred so far, check if the "number of entries" |
| 19856 ** field on the node is too large. If so, set the return code to |
| 19857 ** SQLITE_CORRUPT_VTAB. |
| 19858 */ |
| 19859 if( pNode && rc==SQLITE_OK ){ |
| 19860 if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ |
| 19861 rc = SQLITE_CORRUPT_VTAB; |
| 19862 } |
| 19863 } |
| 19864 |
| 19865 if( rc==SQLITE_OK ){ |
| 19866 if( pNode!=0 ){ |
| 19867 nodeHashInsert(pRtree, pNode); |
| 19868 }else{ |
| 19869 rc = SQLITE_CORRUPT_VTAB; |
| 19870 } |
| 19871 *ppNode = pNode; |
| 19872 }else{ |
| 19873 sqlite3_free(pNode); |
| 19874 *ppNode = 0; |
| 19875 } |
| 19876 |
| 19877 return rc; |
| 19878 } |
| 19879 |
| 19880 /* |
| 19881 ** Overwrite cell iCell of node pNode with the contents of pCell. |
| 19882 */ |
| 19883 static void nodeOverwriteCell( |
| 19884 Rtree *pRtree, /* The overall R-Tree */ |
| 19885 RtreeNode *pNode, /* The node into which the cell is to be written */ |
| 19886 RtreeCell *pCell, /* The cell to write */ |
| 19887 int iCell /* Index into pNode into which pCell is written */ |
| 19888 ){ |
| 19889 int ii; |
| 19890 u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; |
| 19891 p += writeInt64(p, pCell->iRowid); |
| 19892 for(ii=0; ii<pRtree->nDim2; ii++){ |
| 19893 p += writeCoord(p, &pCell->aCoord[ii]); |
| 19894 } |
| 19895 pNode->isDirty = 1; |
| 19896 } |
| 19897 |
| 19898 /* |
| 19899 ** Remove the cell with index iCell from node pNode. |
| 19900 */ |
| 19901 static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ |
| 19902 u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; |
| 19903 u8 *pSrc = &pDst[pRtree->nBytesPerCell]; |
| 19904 int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; |
| 19905 memmove(pDst, pSrc, nByte); |
| 19906 writeInt16(&pNode->zData[2], NCELL(pNode)-1); |
| 19907 pNode->isDirty = 1; |
| 19908 } |
| 19909 |
| 19910 /* |
| 19911 ** Insert the contents of cell pCell into node pNode. If the insert |
| 19912 ** is successful, return SQLITE_OK. |
| 19913 ** |
| 19914 ** If there is not enough free space in pNode, return SQLITE_FULL. |
| 19915 */ |
| 19916 static int nodeInsertCell( |
| 19917 Rtree *pRtree, /* The overall R-Tree */ |
| 19918 RtreeNode *pNode, /* Write new cell into this node */ |
| 19919 RtreeCell *pCell /* The cell to be inserted */ |
| 19920 ){ |
| 19921 int nCell; /* Current number of cells in pNode */ |
| 19922 int nMaxCell; /* Maximum number of cells for pNode */ |
| 19923 |
| 19924 nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; |
| 19925 nCell = NCELL(pNode); |
| 19926 |
| 19927 assert( nCell<=nMaxCell ); |
| 19928 if( nCell<nMaxCell ){ |
| 19929 nodeOverwriteCell(pRtree, pNode, pCell, nCell); |
| 19930 writeInt16(&pNode->zData[2], nCell+1); |
| 19931 pNode->isDirty = 1; |
| 19932 } |
| 19933 |
| 19934 return (nCell==nMaxCell); |
| 19935 } |
| 19936 |
| 19937 /* |
| 19938 ** If the node is dirty, write it out to the database. |
| 19939 */ |
| 19940 static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ |
| 19941 int rc = SQLITE_OK; |
| 19942 if( pNode->isDirty ){ |
| 19943 sqlite3_stmt *p = pRtree->pWriteNode; |
| 19944 if( pNode->iNode ){ |
| 19945 sqlite3_bind_int64(p, 1, pNode->iNode); |
| 19946 }else{ |
| 19947 sqlite3_bind_null(p, 1); |
| 19948 } |
| 19949 sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); |
| 19950 sqlite3_step(p); |
| 19951 pNode->isDirty = 0; |
| 19952 rc = sqlite3_reset(p); |
| 19953 if( pNode->iNode==0 && rc==SQLITE_OK ){ |
| 19954 pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); |
| 19955 nodeHashInsert(pRtree, pNode); |
| 19956 } |
| 19957 } |
| 19958 return rc; |
| 19959 } |
| 19960 |
| 19961 /* |
| 19962 ** Release a reference to a node. If the node is dirty and the reference |
| 19963 ** count drops to zero, the node data is written to the database. |
| 19964 */ |
| 19965 static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ |
| 19966 int rc = SQLITE_OK; |
| 19967 if( pNode ){ |
| 19968 assert( pNode->nRef>0 ); |
| 19969 pNode->nRef--; |
| 19970 if( pNode->nRef==0 ){ |
| 19971 if( pNode->iNode==1 ){ |
| 19972 pRtree->iDepth = -1; |
| 19973 } |
| 19974 if( pNode->pParent ){ |
| 19975 rc = nodeRelease(pRtree, pNode->pParent); |
| 19976 } |
| 19977 if( rc==SQLITE_OK ){ |
| 19978 rc = nodeWrite(pRtree, pNode); |
| 19979 } |
| 19980 nodeHashDelete(pRtree, pNode); |
| 19981 sqlite3_free(pNode); |
| 19982 } |
| 19983 } |
| 19984 return rc; |
| 19985 } |
| 19986 |
| 19987 /* |
| 19988 ** Return the 64-bit integer value associated with cell iCell of |
| 19989 ** node pNode. If pNode is a leaf node, this is a rowid. If it is |
| 19990 ** an internal node, then the 64-bit integer is a child page number. |
| 19991 */ |
| 19992 static i64 nodeGetRowid( |
| 19993 Rtree *pRtree, /* The overall R-Tree */ |
| 19994 RtreeNode *pNode, /* The node from which to extract the ID */ |
| 19995 int iCell /* The cell index from which to extract the ID */ |
| 19996 ){ |
| 19997 assert( iCell<NCELL(pNode) ); |
| 19998 return readInt64(&pNode->zData[4 + pRtree->nBytesPerCell*iCell]); |
| 19999 } |
| 20000 |
| 20001 /* |
| 20002 ** Return coordinate iCoord from cell iCell in node pNode. |
| 20003 */ |
| 20004 static void nodeGetCoord( |
| 20005 Rtree *pRtree, /* The overall R-Tree */ |
| 20006 RtreeNode *pNode, /* The node from which to extract a coordinate */ |
| 20007 int iCell, /* The index of the cell within the node */ |
| 20008 int iCoord, /* Which coordinate to extract */ |
| 20009 RtreeCoord *pCoord /* OUT: Space to write result to */ |
| 20010 ){ |
| 20011 readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); |
| 20012 } |
| 20013 |
| 20014 /* |
| 20015 ** Deserialize cell iCell of node pNode. Populate the structure pointed |
| 20016 ** to by pCell with the results. |
| 20017 */ |
| 20018 static void nodeGetCell( |
| 20019 Rtree *pRtree, /* The overall R-Tree */ |
| 20020 RtreeNode *pNode, /* The node containing the cell to be read */ |
| 20021 int iCell, /* Index of the cell within the node */ |
| 20022 RtreeCell *pCell /* OUT: Write the cell contents here */ |
| 20023 ){ |
| 20024 u8 *pData; |
| 20025 RtreeCoord *pCoord; |
| 20026 int ii = 0; |
| 20027 pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); |
| 20028 pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); |
| 20029 pCoord = pCell->aCoord; |
| 20030 do{ |
| 20031 readCoord(pData, &pCoord[ii]); |
| 20032 readCoord(pData+4, &pCoord[ii+1]); |
| 20033 pData += 8; |
| 20034 ii += 2; |
| 20035 }while( ii<pRtree->nDim2 ); |
| 20036 } |
| 20037 |
| 20038 |
| 20039 /* Forward declaration for the function that does the work of |
| 20040 ** the virtual table module xCreate() and xConnect() methods. |
| 20041 */ |
| 20042 static int rtreeInit( |
| 20043 sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int |
| 20044 ); |
| 20045 |
| 20046 /* |
| 20047 ** Rtree virtual table module xCreate method. |
| 20048 */ |
| 20049 static int rtreeCreate( |
| 20050 sqlite3 *db, |
| 20051 void *pAux, |
| 20052 int argc, const char *const*argv, |
| 20053 sqlite3_vtab **ppVtab, |
| 20054 char **pzErr |
| 20055 ){ |
| 20056 return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); |
| 20057 } |
| 20058 |
| 20059 /* |
| 20060 ** Rtree virtual table module xConnect method. |
| 20061 */ |
| 20062 static int rtreeConnect( |
| 20063 sqlite3 *db, |
| 20064 void *pAux, |
| 20065 int argc, const char *const*argv, |
| 20066 sqlite3_vtab **ppVtab, |
| 20067 char **pzErr |
| 20068 ){ |
| 20069 return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); |
| 20070 } |
| 20071 |
| 20072 /* |
| 20073 ** Increment the r-tree reference count. |
| 20074 */ |
| 20075 static void rtreeReference(Rtree *pRtree){ |
| 20076 pRtree->nBusy++; |
| 20077 } |
| 20078 |
| 20079 /* |
| 20080 ** Decrement the r-tree reference count. When the reference count reaches |
| 20081 ** zero the structure is deleted. |
| 20082 */ |
| 20083 static void rtreeRelease(Rtree *pRtree){ |
| 20084 pRtree->nBusy--; |
| 20085 if( pRtree->nBusy==0 ){ |
| 20086 pRtree->inWrTrans = 0; |
| 20087 pRtree->nCursor = 0; |
| 20088 nodeBlobReset(pRtree); |
| 20089 sqlite3_finalize(pRtree->pWriteNode); |
| 20090 sqlite3_finalize(pRtree->pDeleteNode); |
| 20091 sqlite3_finalize(pRtree->pReadRowid); |
| 20092 sqlite3_finalize(pRtree->pWriteRowid); |
| 20093 sqlite3_finalize(pRtree->pDeleteRowid); |
| 20094 sqlite3_finalize(pRtree->pReadParent); |
| 20095 sqlite3_finalize(pRtree->pWriteParent); |
| 20096 sqlite3_finalize(pRtree->pDeleteParent); |
| 20097 sqlite3_free(pRtree); |
| 20098 } |
| 20099 } |
| 20100 |
| 20101 /* |
| 20102 ** Rtree virtual table module xDisconnect method. |
| 20103 */ |
| 20104 static int rtreeDisconnect(sqlite3_vtab *pVtab){ |
| 20105 rtreeRelease((Rtree *)pVtab); |
| 20106 return SQLITE_OK; |
| 20107 } |
| 20108 |
| 20109 /* |
| 20110 ** Rtree virtual table module xDestroy method. |
| 20111 */ |
| 20112 static int rtreeDestroy(sqlite3_vtab *pVtab){ |
| 20113 Rtree *pRtree = (Rtree *)pVtab; |
| 20114 int rc; |
| 20115 char *zCreate = sqlite3_mprintf( |
| 20116 "DROP TABLE '%q'.'%q_node';" |
| 20117 "DROP TABLE '%q'.'%q_rowid';" |
| 20118 "DROP TABLE '%q'.'%q_parent';", |
| 20119 pRtree->zDb, pRtree->zName, |
| 20120 pRtree->zDb, pRtree->zName, |
| 20121 pRtree->zDb, pRtree->zName |
| 20122 ); |
| 20123 if( !zCreate ){ |
| 20124 rc = SQLITE_NOMEM; |
| 20125 }else{ |
| 20126 nodeBlobReset(pRtree); |
| 20127 rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); |
| 20128 sqlite3_free(zCreate); |
| 20129 } |
| 20130 if( rc==SQLITE_OK ){ |
| 20131 rtreeRelease(pRtree); |
| 20132 } |
| 20133 |
| 20134 return rc; |
| 20135 } |
| 20136 |
| 20137 /* |
| 20138 ** Rtree virtual table module xOpen method. |
| 20139 */ |
| 20140 static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ |
| 20141 int rc = SQLITE_NOMEM; |
| 20142 Rtree *pRtree = (Rtree *)pVTab; |
| 20143 RtreeCursor *pCsr; |
| 20144 |
| 20145 pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); |
| 20146 if( pCsr ){ |
| 20147 memset(pCsr, 0, sizeof(RtreeCursor)); |
| 20148 pCsr->base.pVtab = pVTab; |
| 20149 rc = SQLITE_OK; |
| 20150 pRtree->nCursor++; |
| 20151 } |
| 20152 *ppCursor = (sqlite3_vtab_cursor *)pCsr; |
| 20153 |
| 20154 return rc; |
| 20155 } |
| 20156 |
| 20157 |
| 20158 /* |
| 20159 ** Free the RtreeCursor.aConstraint[] array and its contents. |
| 20160 */ |
| 20161 static void freeCursorConstraints(RtreeCursor *pCsr){ |
| 20162 if( pCsr->aConstraint ){ |
| 20163 int i; /* Used to iterate through constraint array */ |
| 20164 for(i=0; i<pCsr->nConstraint; i++){ |
| 20165 sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; |
| 20166 if( pInfo ){ |
| 20167 if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); |
| 20168 sqlite3_free(pInfo); |
| 20169 } |
| 20170 } |
| 20171 sqlite3_free(pCsr->aConstraint); |
| 20172 pCsr->aConstraint = 0; |
| 20173 } |
| 20174 } |
| 20175 |
| 20176 /* |
| 20177 ** Rtree virtual table module xClose method. |
| 20178 */ |
| 20179 static int rtreeClose(sqlite3_vtab_cursor *cur){ |
| 20180 Rtree *pRtree = (Rtree *)(cur->pVtab); |
| 20181 int ii; |
| 20182 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 20183 assert( pRtree->nCursor>0 ); |
| 20184 freeCursorConstraints(pCsr); |
| 20185 sqlite3_free(pCsr->aPoint); |
| 20186 for(ii=0; ii<RTREE_CACHE_SZ; ii++) nodeRelease(pRtree, pCsr->aNode[ii]); |
| 20187 sqlite3_free(pCsr); |
| 20188 pRtree->nCursor--; |
| 20189 nodeBlobReset(pRtree); |
| 20190 return SQLITE_OK; |
| 20191 } |
| 20192 |
| 20193 /* |
| 20194 ** Rtree virtual table module xEof method. |
| 20195 ** |
| 20196 ** Return non-zero if the cursor does not currently point to a valid |
| 20197 ** record (i.e if the scan has finished), or zero otherwise. |
| 20198 */ |
| 20199 static int rtreeEof(sqlite3_vtab_cursor *cur){ |
| 20200 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 20201 return pCsr->atEOF; |
| 20202 } |
| 20203 |
| 20204 /* |
| 20205 ** Convert raw bits from the on-disk RTree record into a coordinate value. |
| 20206 ** The on-disk format is big-endian and needs to be converted for little- |
| 20207 ** endian platforms. The on-disk record stores integer coordinates if |
| 20208 ** eInt is true and it stores 32-bit floating point records if eInt is |
| 20209 ** false. a[] is the four bytes of the on-disk record to be decoded. |
| 20210 ** Store the results in "r". |
| 20211 ** |
| 20212 ** There are five versions of this macro. The last one is generic. The |
| 20213 ** other four are various architectures-specific optimizations. |
| 20214 */ |
| 20215 #if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300 |
| 20216 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20217 RtreeCoord c; /* Coordinate decoded */ \ |
| 20218 c.u = _byteswap_ulong(*(u32*)a); \ |
| 20219 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20220 } |
| 20221 #elif SQLITE_BYTEORDER==1234 && (GCC_VERSION>=4003000 || CLANG_VERSION>=3000000) |
| 20222 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20223 RtreeCoord c; /* Coordinate decoded */ \ |
| 20224 c.u = __builtin_bswap32(*(u32*)a); \ |
| 20225 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20226 } |
| 20227 #elif SQLITE_BYTEORDER==1234 |
| 20228 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20229 RtreeCoord c; /* Coordinate decoded */ \ |
| 20230 memcpy(&c.u,a,4); \ |
| 20231 c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ |
| 20232 ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ |
| 20233 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20234 } |
| 20235 #elif SQLITE_BYTEORDER==4321 |
| 20236 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20237 RtreeCoord c; /* Coordinate decoded */ \ |
| 20238 memcpy(&c.u,a,4); \ |
| 20239 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20240 } |
| 20241 #else |
| 20242 #define RTREE_DECODE_COORD(eInt, a, r) { \ |
| 20243 RtreeCoord c; /* Coordinate decoded */ \ |
| 20244 c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ |
| 20245 +((u32)a[2]<<8) + a[3]; \ |
| 20246 r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ |
| 20247 } |
| 20248 #endif |
| 20249 |
| 20250 /* |
| 20251 ** Check the RTree node or entry given by pCellData and p against the MATCH |
| 20252 ** constraint pConstraint. |
| 20253 */ |
| 20254 static int rtreeCallbackConstraint( |
| 20255 RtreeConstraint *pConstraint, /* The constraint to test */ |
| 20256 int eInt, /* True if RTree holding integer coordinates */ |
| 20257 u8 *pCellData, /* Raw cell content */ |
| 20258 RtreeSearchPoint *pSearch, /* Container of this cell */ |
| 20259 sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ |
| 20260 int *peWithin /* OUT: visibility of the cell */ |
| 20261 ){ |
| 20262 sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ |
| 20263 int nCoord = pInfo->nCoord; /* No. of coordinates */ |
| 20264 int rc; /* Callback return code */ |
| 20265 RtreeCoord c; /* Translator union */ |
| 20266 sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ |
| 20267 |
| 20268 assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); |
| 20269 assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); |
| 20270 |
| 20271 if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ |
| 20272 pInfo->iRowid = readInt64(pCellData); |
| 20273 } |
| 20274 pCellData += 8; |
| 20275 #ifndef SQLITE_RTREE_INT_ONLY |
| 20276 if( eInt==0 ){ |
| 20277 switch( nCoord ){ |
| 20278 case 10: readCoord(pCellData+36, &c); aCoord[9] = c.f; |
| 20279 readCoord(pCellData+32, &c); aCoord[8] = c.f; |
| 20280 case 8: readCoord(pCellData+28, &c); aCoord[7] = c.f; |
| 20281 readCoord(pCellData+24, &c); aCoord[6] = c.f; |
| 20282 case 6: readCoord(pCellData+20, &c); aCoord[5] = c.f; |
| 20283 readCoord(pCellData+16, &c); aCoord[4] = c.f; |
| 20284 case 4: readCoord(pCellData+12, &c); aCoord[3] = c.f; |
| 20285 readCoord(pCellData+8, &c); aCoord[2] = c.f; |
| 20286 default: readCoord(pCellData+4, &c); aCoord[1] = c.f; |
| 20287 readCoord(pCellData, &c); aCoord[0] = c.f; |
| 20288 } |
| 20289 }else |
| 20290 #endif |
| 20291 { |
| 20292 switch( nCoord ){ |
| 20293 case 10: readCoord(pCellData+36, &c); aCoord[9] = c.i; |
| 20294 readCoord(pCellData+32, &c); aCoord[8] = c.i; |
| 20295 case 8: readCoord(pCellData+28, &c); aCoord[7] = c.i; |
| 20296 readCoord(pCellData+24, &c); aCoord[6] = c.i; |
| 20297 case 6: readCoord(pCellData+20, &c); aCoord[5] = c.i; |
| 20298 readCoord(pCellData+16, &c); aCoord[4] = c.i; |
| 20299 case 4: readCoord(pCellData+12, &c); aCoord[3] = c.i; |
| 20300 readCoord(pCellData+8, &c); aCoord[2] = c.i; |
| 20301 default: readCoord(pCellData+4, &c); aCoord[1] = c.i; |
| 20302 readCoord(pCellData, &c); aCoord[0] = c.i; |
| 20303 } |
| 20304 } |
| 20305 if( pConstraint->op==RTREE_MATCH ){ |
| 20306 int eWithin = 0; |
| 20307 rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, |
| 20308 nCoord, aCoord, &eWithin); |
| 20309 if( eWithin==0 ) *peWithin = NOT_WITHIN; |
| 20310 *prScore = RTREE_ZERO; |
| 20311 }else{ |
| 20312 pInfo->aCoord = aCoord; |
| 20313 pInfo->iLevel = pSearch->iLevel - 1; |
| 20314 pInfo->rScore = pInfo->rParentScore = pSearch->rScore; |
| 20315 pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; |
| 20316 rc = pConstraint->u.xQueryFunc(pInfo); |
| 20317 if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; |
| 20318 if( pInfo->rScore<*prScore || *prScore<RTREE_ZERO ){ |
| 20319 *prScore = pInfo->rScore; |
| 20320 } |
| 20321 } |
| 20322 return rc; |
| 20323 } |
| 20324 |
| 20325 /* |
| 20326 ** Check the internal RTree node given by pCellData against constraint p. |
| 20327 ** If this constraint cannot be satisfied by any child within the node, |
| 20328 ** set *peWithin to NOT_WITHIN. |
| 20329 */ |
| 20330 static void rtreeNonleafConstraint( |
| 20331 RtreeConstraint *p, /* The constraint to test */ |
| 20332 int eInt, /* True if RTree holds integer coordinates */ |
| 20333 u8 *pCellData, /* Raw cell content as appears on disk */ |
| 20334 int *peWithin /* Adjust downward, as appropriate */ |
| 20335 ){ |
| 20336 sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ |
| 20337 |
| 20338 /* p->iCoord might point to either a lower or upper bound coordinate |
| 20339 ** in a coordinate pair. But make pCellData point to the lower bound. |
| 20340 */ |
| 20341 pCellData += 8 + 4*(p->iCoord&0xfe); |
| 20342 |
| 20343 assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE |
| 20344 || p->op==RTREE_GT || p->op==RTREE_EQ ); |
| 20345 assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ |
| 20346 switch( p->op ){ |
| 20347 case RTREE_LE: |
| 20348 case RTREE_LT: |
| 20349 case RTREE_EQ: |
| 20350 RTREE_DECODE_COORD(eInt, pCellData, val); |
| 20351 /* val now holds the lower bound of the coordinate pair */ |
| 20352 if( p->u.rValue>=val ) return; |
| 20353 if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ |
| 20354 /* Fall through for the RTREE_EQ case */ |
| 20355 |
| 20356 default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ |
| 20357 pCellData += 4; |
| 20358 RTREE_DECODE_COORD(eInt, pCellData, val); |
| 20359 /* val now holds the upper bound of the coordinate pair */ |
| 20360 if( p->u.rValue<=val ) return; |
| 20361 } |
| 20362 *peWithin = NOT_WITHIN; |
| 20363 } |
| 20364 |
| 20365 /* |
| 20366 ** Check the leaf RTree cell given by pCellData against constraint p. |
| 20367 ** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. |
| 20368 ** If the constraint is satisfied, leave *peWithin unchanged. |
| 20369 ** |
| 20370 ** The constraint is of the form: xN op $val |
| 20371 ** |
| 20372 ** The op is given by p->op. The xN is p->iCoord-th coordinate in |
| 20373 ** pCellData. $val is given by p->u.rValue. |
| 20374 */ |
| 20375 static void rtreeLeafConstraint( |
| 20376 RtreeConstraint *p, /* The constraint to test */ |
| 20377 int eInt, /* True if RTree holds integer coordinates */ |
| 20378 u8 *pCellData, /* Raw cell content as appears on disk */ |
| 20379 int *peWithin /* Adjust downward, as appropriate */ |
| 20380 ){ |
| 20381 RtreeDValue xN; /* Coordinate value converted to a double */ |
| 20382 |
| 20383 assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE |
| 20384 || p->op==RTREE_GT || p->op==RTREE_EQ ); |
| 20385 pCellData += 8 + p->iCoord*4; |
| 20386 assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ |
| 20387 RTREE_DECODE_COORD(eInt, pCellData, xN); |
| 20388 switch( p->op ){ |
| 20389 case RTREE_LE: if( xN <= p->u.rValue ) return; break; |
| 20390 case RTREE_LT: if( xN < p->u.rValue ) return; break; |
| 20391 case RTREE_GE: if( xN >= p->u.rValue ) return; break; |
| 20392 case RTREE_GT: if( xN > p->u.rValue ) return; break; |
| 20393 default: if( xN == p->u.rValue ) return; break; |
| 20394 } |
| 20395 *peWithin = NOT_WITHIN; |
| 20396 } |
| 20397 |
| 20398 /* |
| 20399 ** One of the cells in node pNode is guaranteed to have a 64-bit |
| 20400 ** integer value equal to iRowid. Return the index of this cell. |
| 20401 */ |
| 20402 static int nodeRowidIndex( |
| 20403 Rtree *pRtree, |
| 20404 RtreeNode *pNode, |
| 20405 i64 iRowid, |
| 20406 int *piIndex |
| 20407 ){ |
| 20408 int ii; |
| 20409 int nCell = NCELL(pNode); |
| 20410 assert( nCell<200 ); |
| 20411 for(ii=0; ii<nCell; ii++){ |
| 20412 if( nodeGetRowid(pRtree, pNode, ii)==iRowid ){ |
| 20413 *piIndex = ii; |
| 20414 return SQLITE_OK; |
| 20415 } |
| 20416 } |
| 20417 return SQLITE_CORRUPT_VTAB; |
| 20418 } |
| 20419 |
| 20420 /* |
| 20421 ** Return the index of the cell containing a pointer to node pNode |
| 20422 ** in its parent. If pNode is the root node, return -1. |
| 20423 */ |
| 20424 static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){ |
| 20425 RtreeNode *pParent = pNode->pParent; |
| 20426 if( pParent ){ |
| 20427 return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); |
| 20428 } |
| 20429 *piIndex = -1; |
| 20430 return SQLITE_OK; |
| 20431 } |
| 20432 |
| 20433 /* |
| 20434 ** Compare two search points. Return negative, zero, or positive if the first |
| 20435 ** is less than, equal to, or greater than the second. |
| 20436 ** |
| 20437 ** The rScore is the primary key. Smaller rScore values come first. |
| 20438 ** If the rScore is a tie, then use iLevel as the tie breaker with smaller |
| 20439 ** iLevel values coming first. In this way, if rScore is the same for all |
| 20440 ** SearchPoints, then iLevel becomes the deciding factor and the result |
| 20441 ** is a depth-first search, which is the desired default behavior. |
| 20442 */ |
| 20443 static int rtreeSearchPointCompare( |
| 20444 const RtreeSearchPoint *pA, |
| 20445 const RtreeSearchPoint *pB |
| 20446 ){ |
| 20447 if( pA->rScore<pB->rScore ) return -1; |
| 20448 if( pA->rScore>pB->rScore ) return +1; |
| 20449 if( pA->iLevel<pB->iLevel ) return -1; |
| 20450 if( pA->iLevel>pB->iLevel ) return +1; |
| 20451 return 0; |
| 20452 } |
| 20453 |
| 20454 /* |
| 20455 ** Interchange two search points in a cursor. |
| 20456 */ |
| 20457 static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ |
| 20458 RtreeSearchPoint t = p->aPoint[i]; |
| 20459 assert( i<j ); |
| 20460 p->aPoint[i] = p->aPoint[j]; |
| 20461 p->aPoint[j] = t; |
| 20462 i++; j++; |
| 20463 if( i<RTREE_CACHE_SZ ){ |
| 20464 if( j>=RTREE_CACHE_SZ ){ |
| 20465 nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); |
| 20466 p->aNode[i] = 0; |
| 20467 }else{ |
| 20468 RtreeNode *pTemp = p->aNode[i]; |
| 20469 p->aNode[i] = p->aNode[j]; |
| 20470 p->aNode[j] = pTemp; |
| 20471 } |
| 20472 } |
| 20473 } |
| 20474 |
| 20475 /* |
| 20476 ** Return the search point with the lowest current score. |
| 20477 */ |
| 20478 static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ |
| 20479 return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; |
| 20480 } |
| 20481 |
| 20482 /* |
| 20483 ** Get the RtreeNode for the search point with the lowest score. |
| 20484 */ |
| 20485 static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ |
| 20486 sqlite3_int64 id; |
| 20487 int ii = 1 - pCur->bPoint; |
| 20488 assert( ii==0 || ii==1 ); |
| 20489 assert( pCur->bPoint || pCur->nPoint ); |
| 20490 if( pCur->aNode[ii]==0 ){ |
| 20491 assert( pRC!=0 ); |
| 20492 id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; |
| 20493 *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); |
| 20494 } |
| 20495 return pCur->aNode[ii]; |
| 20496 } |
| 20497 |
| 20498 /* |
| 20499 ** Push a new element onto the priority queue |
| 20500 */ |
| 20501 static RtreeSearchPoint *rtreeEnqueue( |
| 20502 RtreeCursor *pCur, /* The cursor */ |
| 20503 RtreeDValue rScore, /* Score for the new search point */ |
| 20504 u8 iLevel /* Level for the new search point */ |
| 20505 ){ |
| 20506 int i, j; |
| 20507 RtreeSearchPoint *pNew; |
| 20508 if( pCur->nPoint>=pCur->nPointAlloc ){ |
| 20509 int nNew = pCur->nPointAlloc*2 + 8; |
| 20510 pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); |
| 20511 if( pNew==0 ) return 0; |
| 20512 pCur->aPoint = pNew; |
| 20513 pCur->nPointAlloc = nNew; |
| 20514 } |
| 20515 i = pCur->nPoint++; |
| 20516 pNew = pCur->aPoint + i; |
| 20517 pNew->rScore = rScore; |
| 20518 pNew->iLevel = iLevel; |
| 20519 assert( iLevel<=RTREE_MAX_DEPTH ); |
| 20520 while( i>0 ){ |
| 20521 RtreeSearchPoint *pParent; |
| 20522 j = (i-1)/2; |
| 20523 pParent = pCur->aPoint + j; |
| 20524 if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; |
| 20525 rtreeSearchPointSwap(pCur, j, i); |
| 20526 i = j; |
| 20527 pNew = pParent; |
| 20528 } |
| 20529 return pNew; |
| 20530 } |
| 20531 |
| 20532 /* |
| 20533 ** Allocate a new RtreeSearchPoint and return a pointer to it. Return |
| 20534 ** NULL if malloc fails. |
| 20535 */ |
| 20536 static RtreeSearchPoint *rtreeSearchPointNew( |
| 20537 RtreeCursor *pCur, /* The cursor */ |
| 20538 RtreeDValue rScore, /* Score for the new search point */ |
| 20539 u8 iLevel /* Level for the new search point */ |
| 20540 ){ |
| 20541 RtreeSearchPoint *pNew, *pFirst; |
| 20542 pFirst = rtreeSearchPointFirst(pCur); |
| 20543 pCur->anQueue[iLevel]++; |
| 20544 if( pFirst==0 |
| 20545 || pFirst->rScore>rScore |
| 20546 || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) |
| 20547 ){ |
| 20548 if( pCur->bPoint ){ |
| 20549 int ii; |
| 20550 pNew = rtreeEnqueue(pCur, rScore, iLevel); |
| 20551 if( pNew==0 ) return 0; |
| 20552 ii = (int)(pNew - pCur->aPoint) + 1; |
| 20553 if( ii<RTREE_CACHE_SZ ){ |
| 20554 assert( pCur->aNode[ii]==0 ); |
| 20555 pCur->aNode[ii] = pCur->aNode[0]; |
| 20556 }else{ |
| 20557 nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); |
| 20558 } |
| 20559 pCur->aNode[0] = 0; |
| 20560 *pNew = pCur->sPoint; |
| 20561 } |
| 20562 pCur->sPoint.rScore = rScore; |
| 20563 pCur->sPoint.iLevel = iLevel; |
| 20564 pCur->bPoint = 1; |
| 20565 return &pCur->sPoint; |
| 20566 }else{ |
| 20567 return rtreeEnqueue(pCur, rScore, iLevel); |
| 20568 } |
| 20569 } |
| 20570 |
| 20571 #if 0 |
| 20572 /* Tracing routines for the RtreeSearchPoint queue */ |
| 20573 static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ |
| 20574 if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } |
| 20575 printf(" %d.%05lld.%02d %g %d", |
| 20576 p->iLevel, p->id, p->iCell, p->rScore, p->eWithin |
| 20577 ); |
| 20578 idx++; |
| 20579 if( idx<RTREE_CACHE_SZ ){ |
| 20580 printf(" %p\n", pCur->aNode[idx]); |
| 20581 }else{ |
| 20582 printf("\n"); |
| 20583 } |
| 20584 } |
| 20585 static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ |
| 20586 int ii; |
| 20587 printf("=== %9s ", zPrefix); |
| 20588 if( pCur->bPoint ){ |
| 20589 tracePoint(&pCur->sPoint, -1, pCur); |
| 20590 } |
| 20591 for(ii=0; ii<pCur->nPoint; ii++){ |
| 20592 if( ii>0 || pCur->bPoint ) printf(" "); |
| 20593 tracePoint(&pCur->aPoint[ii], ii, pCur); |
| 20594 } |
| 20595 } |
| 20596 # define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) |
| 20597 #else |
| 20598 # define RTREE_QUEUE_TRACE(A,B) /* no-op */ |
| 20599 #endif |
| 20600 |
| 20601 /* Remove the search point with the lowest current score. |
| 20602 */ |
| 20603 static void rtreeSearchPointPop(RtreeCursor *p){ |
| 20604 int i, j, k, n; |
| 20605 i = 1 - p->bPoint; |
| 20606 assert( i==0 || i==1 ); |
| 20607 if( p->aNode[i] ){ |
| 20608 nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); |
| 20609 p->aNode[i] = 0; |
| 20610 } |
| 20611 if( p->bPoint ){ |
| 20612 p->anQueue[p->sPoint.iLevel]--; |
| 20613 p->bPoint = 0; |
| 20614 }else if( p->nPoint ){ |
| 20615 p->anQueue[p->aPoint[0].iLevel]--; |
| 20616 n = --p->nPoint; |
| 20617 p->aPoint[0] = p->aPoint[n]; |
| 20618 if( n<RTREE_CACHE_SZ-1 ){ |
| 20619 p->aNode[1] = p->aNode[n+1]; |
| 20620 p->aNode[n+1] = 0; |
| 20621 } |
| 20622 i = 0; |
| 20623 while( (j = i*2+1)<n ){ |
| 20624 k = j+1; |
| 20625 if( k<n && rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[j])<0 ){ |
| 20626 if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ |
| 20627 rtreeSearchPointSwap(p, i, k); |
| 20628 i = k; |
| 20629 }else{ |
| 20630 break; |
| 20631 } |
| 20632 }else{ |
| 20633 if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ |
| 20634 rtreeSearchPointSwap(p, i, j); |
| 20635 i = j; |
| 20636 }else{ |
| 20637 break; |
| 20638 } |
| 20639 } |
| 20640 } |
| 20641 } |
| 20642 } |
| 20643 |
| 20644 |
| 20645 /* |
| 20646 ** Continue the search on cursor pCur until the front of the queue |
| 20647 ** contains an entry suitable for returning as a result-set row, |
| 20648 ** or until the RtreeSearchPoint queue is empty, indicating that the |
| 20649 ** query has completed. |
| 20650 */ |
| 20651 static int rtreeStepToLeaf(RtreeCursor *pCur){ |
| 20652 RtreeSearchPoint *p; |
| 20653 Rtree *pRtree = RTREE_OF_CURSOR(pCur); |
| 20654 RtreeNode *pNode; |
| 20655 int eWithin; |
| 20656 int rc = SQLITE_OK; |
| 20657 int nCell; |
| 20658 int nConstraint = pCur->nConstraint; |
| 20659 int ii; |
| 20660 int eInt; |
| 20661 RtreeSearchPoint x; |
| 20662 |
| 20663 eInt = pRtree->eCoordType==RTREE_COORD_INT32; |
| 20664 while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ |
| 20665 pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); |
| 20666 if( rc ) return rc; |
| 20667 nCell = NCELL(pNode); |
| 20668 assert( nCell<200 ); |
| 20669 while( p->iCell<nCell ){ |
| 20670 sqlite3_rtree_dbl rScore = (sqlite3_rtree_dbl)-1; |
| 20671 u8 *pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell); |
| 20672 eWithin = FULLY_WITHIN; |
| 20673 for(ii=0; ii<nConstraint; ii++){ |
| 20674 RtreeConstraint *pConstraint = pCur->aConstraint + ii; |
| 20675 if( pConstraint->op>=RTREE_MATCH ){ |
| 20676 rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, |
| 20677 &rScore, &eWithin); |
| 20678 if( rc ) return rc; |
| 20679 }else if( p->iLevel==1 ){ |
| 20680 rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); |
| 20681 }else{ |
| 20682 rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); |
| 20683 } |
| 20684 if( eWithin==NOT_WITHIN ) break; |
| 20685 } |
| 20686 p->iCell++; |
| 20687 if( eWithin==NOT_WITHIN ) continue; |
| 20688 x.iLevel = p->iLevel - 1; |
| 20689 if( x.iLevel ){ |
| 20690 x.id = readInt64(pCellData); |
| 20691 x.iCell = 0; |
| 20692 }else{ |
| 20693 x.id = p->id; |
| 20694 x.iCell = p->iCell - 1; |
| 20695 } |
| 20696 if( p->iCell>=nCell ){ |
| 20697 RTREE_QUEUE_TRACE(pCur, "POP-S:"); |
| 20698 rtreeSearchPointPop(pCur); |
| 20699 } |
| 20700 if( rScore<RTREE_ZERO ) rScore = RTREE_ZERO; |
| 20701 p = rtreeSearchPointNew(pCur, rScore, x.iLevel); |
| 20702 if( p==0 ) return SQLITE_NOMEM; |
| 20703 p->eWithin = (u8)eWithin; |
| 20704 p->id = x.id; |
| 20705 p->iCell = x.iCell; |
| 20706 RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); |
| 20707 break; |
| 20708 } |
| 20709 if( p->iCell>=nCell ){ |
| 20710 RTREE_QUEUE_TRACE(pCur, "POP-Se:"); |
| 20711 rtreeSearchPointPop(pCur); |
| 20712 } |
| 20713 } |
| 20714 pCur->atEOF = p==0; |
| 20715 return SQLITE_OK; |
| 20716 } |
| 20717 |
| 20718 /* |
| 20719 ** Rtree virtual table module xNext method. |
| 20720 */ |
| 20721 static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ |
| 20722 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20723 int rc = SQLITE_OK; |
| 20724 |
| 20725 /* Move to the next entry that matches the configured constraints. */ |
| 20726 RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); |
| 20727 rtreeSearchPointPop(pCsr); |
| 20728 rc = rtreeStepToLeaf(pCsr); |
| 20729 return rc; |
| 20730 } |
| 20731 |
| 20732 /* |
| 20733 ** Rtree virtual table module xRowid method. |
| 20734 */ |
| 20735 static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ |
| 20736 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20737 RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); |
| 20738 int rc = SQLITE_OK; |
| 20739 RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); |
| 20740 if( rc==SQLITE_OK && p ){ |
| 20741 *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); |
| 20742 } |
| 20743 return rc; |
| 20744 } |
| 20745 |
| 20746 /* |
| 20747 ** Rtree virtual table module xColumn method. |
| 20748 */ |
| 20749 static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ |
| 20750 Rtree *pRtree = (Rtree *)cur->pVtab; |
| 20751 RtreeCursor *pCsr = (RtreeCursor *)cur; |
| 20752 RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); |
| 20753 RtreeCoord c; |
| 20754 int rc = SQLITE_OK; |
| 20755 RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); |
| 20756 |
| 20757 if( rc ) return rc; |
| 20758 if( p==0 ) return SQLITE_OK; |
| 20759 if( i==0 ){ |
| 20760 sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); |
| 20761 }else{ |
| 20762 nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); |
| 20763 #ifndef SQLITE_RTREE_INT_ONLY |
| 20764 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 20765 sqlite3_result_double(ctx, c.f); |
| 20766 }else |
| 20767 #endif |
| 20768 { |
| 20769 assert( pRtree->eCoordType==RTREE_COORD_INT32 ); |
| 20770 sqlite3_result_int(ctx, c.i); |
| 20771 } |
| 20772 } |
| 20773 return SQLITE_OK; |
| 20774 } |
| 20775 |
| 20776 /* |
| 20777 ** Use nodeAcquire() to obtain the leaf node containing the record with |
| 20778 ** rowid iRowid. If successful, set *ppLeaf to point to the node and |
| 20779 ** return SQLITE_OK. If there is no such record in the table, set |
| 20780 ** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf |
| 20781 ** to zero and return an SQLite error code. |
| 20782 */ |
| 20783 static int findLeafNode( |
| 20784 Rtree *pRtree, /* RTree to search */ |
| 20785 i64 iRowid, /* The rowid searching for */ |
| 20786 RtreeNode **ppLeaf, /* Write the node here */ |
| 20787 sqlite3_int64 *piNode /* Write the node-id here */ |
| 20788 ){ |
| 20789 int rc; |
| 20790 *ppLeaf = 0; |
| 20791 sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); |
| 20792 if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ |
| 20793 i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); |
| 20794 if( piNode ) *piNode = iNode; |
| 20795 rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); |
| 20796 sqlite3_reset(pRtree->pReadRowid); |
| 20797 }else{ |
| 20798 rc = sqlite3_reset(pRtree->pReadRowid); |
| 20799 } |
| 20800 return rc; |
| 20801 } |
| 20802 |
| 20803 /* |
| 20804 ** This function is called to configure the RtreeConstraint object passed |
| 20805 ** as the second argument for a MATCH constraint. The value passed as the |
| 20806 ** first argument to this function is the right-hand operand to the MATCH |
| 20807 ** operator. |
| 20808 */ |
| 20809 static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ |
| 20810 RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ |
| 20811 sqlite3_rtree_query_info *pInfo; /* Callback information */ |
| 20812 int nBlob; /* Size of the geometry function blob */ |
| 20813 int nExpected; /* Expected size of the BLOB */ |
| 20814 |
| 20815 /* Check that value is actually a blob. */ |
| 20816 if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; |
| 20817 |
| 20818 /* Check that the blob is roughly the right size. */ |
| 20819 nBlob = sqlite3_value_bytes(pValue); |
| 20820 if( nBlob<(int)sizeof(RtreeMatchArg) ){ |
| 20821 return SQLITE_ERROR; |
| 20822 } |
| 20823 |
| 20824 pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); |
| 20825 if( !pInfo ) return SQLITE_NOMEM; |
| 20826 memset(pInfo, 0, sizeof(*pInfo)); |
| 20827 pBlob = (RtreeMatchArg*)&pInfo[1]; |
| 20828 |
| 20829 memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); |
| 20830 nExpected = (int)(sizeof(RtreeMatchArg) + |
| 20831 pBlob->nParam*sizeof(sqlite3_value*) + |
| 20832 (pBlob->nParam-1)*sizeof(RtreeDValue)); |
| 20833 if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ |
| 20834 sqlite3_free(pInfo); |
| 20835 return SQLITE_ERROR; |
| 20836 } |
| 20837 pInfo->pContext = pBlob->cb.pContext; |
| 20838 pInfo->nParam = pBlob->nParam; |
| 20839 pInfo->aParam = pBlob->aParam; |
| 20840 pInfo->apSqlParam = pBlob->apSqlParam; |
| 20841 |
| 20842 if( pBlob->cb.xGeom ){ |
| 20843 pCons->u.xGeom = pBlob->cb.xGeom; |
| 20844 }else{ |
| 20845 pCons->op = RTREE_QUERY; |
| 20846 pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; |
| 20847 } |
| 20848 pCons->pInfo = pInfo; |
| 20849 return SQLITE_OK; |
| 20850 } |
| 20851 |
| 20852 /* |
| 20853 ** Rtree virtual table module xFilter method. |
| 20854 */ |
| 20855 static int rtreeFilter( |
| 20856 sqlite3_vtab_cursor *pVtabCursor, |
| 20857 int idxNum, const char *idxStr, |
| 20858 int argc, sqlite3_value **argv |
| 20859 ){ |
| 20860 Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; |
| 20861 RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; |
| 20862 RtreeNode *pRoot = 0; |
| 20863 int ii; |
| 20864 int rc = SQLITE_OK; |
| 20865 int iCell = 0; |
| 20866 |
| 20867 rtreeReference(pRtree); |
| 20868 |
| 20869 /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ |
| 20870 freeCursorConstraints(pCsr); |
| 20871 sqlite3_free(pCsr->aPoint); |
| 20872 memset(pCsr, 0, sizeof(RtreeCursor)); |
| 20873 pCsr->base.pVtab = (sqlite3_vtab*)pRtree; |
| 20874 |
| 20875 pCsr->iStrategy = idxNum; |
| 20876 if( idxNum==1 ){ |
| 20877 /* Special case - lookup by rowid. */ |
| 20878 RtreeNode *pLeaf; /* Leaf on which the required cell resides */ |
| 20879 RtreeSearchPoint *p; /* Search point for the leaf */ |
| 20880 i64 iRowid = sqlite3_value_int64(argv[0]); |
| 20881 i64 iNode = 0; |
| 20882 rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); |
| 20883 if( rc==SQLITE_OK && pLeaf!=0 ){ |
| 20884 p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); |
| 20885 assert( p!=0 ); /* Always returns pCsr->sPoint */ |
| 20886 pCsr->aNode[0] = pLeaf; |
| 20887 p->id = iNode; |
| 20888 p->eWithin = PARTLY_WITHIN; |
| 20889 rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); |
| 20890 p->iCell = (u8)iCell; |
| 20891 RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); |
| 20892 }else{ |
| 20893 pCsr->atEOF = 1; |
| 20894 } |
| 20895 }else{ |
| 20896 /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array |
| 20897 ** with the configured constraints. |
| 20898 */ |
| 20899 rc = nodeAcquire(pRtree, 1, 0, &pRoot); |
| 20900 if( rc==SQLITE_OK && argc>0 ){ |
| 20901 pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); |
| 20902 pCsr->nConstraint = argc; |
| 20903 if( !pCsr->aConstraint ){ |
| 20904 rc = SQLITE_NOMEM; |
| 20905 }else{ |
| 20906 memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); |
| 20907 memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); |
| 20908 assert( (idxStr==0 && argc==0) |
| 20909 || (idxStr && (int)strlen(idxStr)==argc*2) ); |
| 20910 for(ii=0; ii<argc; ii++){ |
| 20911 RtreeConstraint *p = &pCsr->aConstraint[ii]; |
| 20912 p->op = idxStr[ii*2]; |
| 20913 p->iCoord = idxStr[ii*2+1]-'0'; |
| 20914 if( p->op>=RTREE_MATCH ){ |
| 20915 /* A MATCH operator. The right-hand-side must be a blob that |
| 20916 ** can be cast into an RtreeMatchArg object. One created using |
| 20917 ** an sqlite3_rtree_geometry_callback() SQL user function. |
| 20918 */ |
| 20919 rc = deserializeGeometry(argv[ii], p); |
| 20920 if( rc!=SQLITE_OK ){ |
| 20921 break; |
| 20922 } |
| 20923 p->pInfo->nCoord = pRtree->nDim2; |
| 20924 p->pInfo->anQueue = pCsr->anQueue; |
| 20925 p->pInfo->mxLevel = pRtree->iDepth + 1; |
| 20926 }else{ |
| 20927 #ifdef SQLITE_RTREE_INT_ONLY |
| 20928 p->u.rValue = sqlite3_value_int64(argv[ii]); |
| 20929 #else |
| 20930 p->u.rValue = sqlite3_value_double(argv[ii]); |
| 20931 #endif |
| 20932 } |
| 20933 } |
| 20934 } |
| 20935 } |
| 20936 if( rc==SQLITE_OK ){ |
| 20937 RtreeSearchPoint *pNew; |
| 20938 pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, (u8)(pRtree->iDepth+1)); |
| 20939 if( pNew==0 ) return SQLITE_NOMEM; |
| 20940 pNew->id = 1; |
| 20941 pNew->iCell = 0; |
| 20942 pNew->eWithin = PARTLY_WITHIN; |
| 20943 assert( pCsr->bPoint==1 ); |
| 20944 pCsr->aNode[0] = pRoot; |
| 20945 pRoot = 0; |
| 20946 RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); |
| 20947 rc = rtreeStepToLeaf(pCsr); |
| 20948 } |
| 20949 } |
| 20950 |
| 20951 nodeRelease(pRtree, pRoot); |
| 20952 rtreeRelease(pRtree); |
| 20953 return rc; |
| 20954 } |
| 20955 |
| 20956 /* |
| 20957 ** Rtree virtual table module xBestIndex method. There are three |
| 20958 ** table scan strategies to choose from (in order from most to |
| 20959 ** least desirable): |
| 20960 ** |
| 20961 ** idxNum idxStr Strategy |
| 20962 ** ------------------------------------------------ |
| 20963 ** 1 Unused Direct lookup by rowid. |
| 20964 ** 2 See below R-tree query or full-table scan. |
| 20965 ** ------------------------------------------------ |
| 20966 ** |
| 20967 ** If strategy 1 is used, then idxStr is not meaningful. If strategy |
| 20968 ** 2 is used, idxStr is formatted to contain 2 bytes for each |
| 20969 ** constraint used. The first two bytes of idxStr correspond to |
| 20970 ** the constraint in sqlite3_index_info.aConstraintUsage[] with |
| 20971 ** (argvIndex==1) etc. |
| 20972 ** |
| 20973 ** The first of each pair of bytes in idxStr identifies the constraint |
| 20974 ** operator as follows: |
| 20975 ** |
| 20976 ** Operator Byte Value |
| 20977 ** ---------------------- |
| 20978 ** = 0x41 ('A') |
| 20979 ** <= 0x42 ('B') |
| 20980 ** < 0x43 ('C') |
| 20981 ** >= 0x44 ('D') |
| 20982 ** > 0x45 ('E') |
| 20983 ** MATCH 0x46 ('F') |
| 20984 ** ---------------------- |
| 20985 ** |
| 20986 ** The second of each pair of bytes identifies the coordinate column |
| 20987 ** to which the constraint applies. The leftmost coordinate column |
| 20988 ** is 'a', the second from the left 'b' etc. |
| 20989 */ |
| 20990 static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ |
| 20991 Rtree *pRtree = (Rtree*)tab; |
| 20992 int rc = SQLITE_OK; |
| 20993 int ii; |
| 20994 int bMatch = 0; /* True if there exists a MATCH constraint */ |
| 20995 i64 nRow; /* Estimated rows returned by this scan */ |
| 20996 |
| 20997 int iIdx = 0; |
| 20998 char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; |
| 20999 memset(zIdxStr, 0, sizeof(zIdxStr)); |
| 21000 |
| 21001 /* Check if there exists a MATCH constraint - even an unusable one. If there |
| 21002 ** is, do not consider the lookup-by-rowid plan as using such a plan would |
| 21003 ** require the VDBE to evaluate the MATCH constraint, which is not currently |
| 21004 ** possible. */ |
| 21005 for(ii=0; ii<pIdxInfo->nConstraint; ii++){ |
| 21006 if( pIdxInfo->aConstraint[ii].op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |
| 21007 bMatch = 1; |
| 21008 } |
| 21009 } |
| 21010 |
| 21011 assert( pIdxInfo->idxStr==0 ); |
| 21012 for(ii=0; ii<pIdxInfo->nConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ |
| 21013 struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; |
| 21014 |
| 21015 if( bMatch==0 && p->usable |
| 21016 && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ |
| 21017 ){ |
| 21018 /* We have an equality constraint on the rowid. Use strategy 1. */ |
| 21019 int jj; |
| 21020 for(jj=0; jj<ii; jj++){ |
| 21021 pIdxInfo->aConstraintUsage[jj].argvIndex = 0; |
| 21022 pIdxInfo->aConstraintUsage[jj].omit = 0; |
| 21023 } |
| 21024 pIdxInfo->idxNum = 1; |
| 21025 pIdxInfo->aConstraintUsage[ii].argvIndex = 1; |
| 21026 pIdxInfo->aConstraintUsage[jj].omit = 1; |
| 21027 |
| 21028 /* This strategy involves a two rowid lookups on an B-Tree structures |
| 21029 ** and then a linear search of an R-Tree node. This should be |
| 21030 ** considered almost as quick as a direct rowid lookup (for which |
| 21031 ** sqlite uses an internal cost of 0.0). It is expected to return |
| 21032 ** a single row. |
| 21033 */ |
| 21034 pIdxInfo->estimatedCost = 30.0; |
| 21035 pIdxInfo->estimatedRows = 1; |
| 21036 return SQLITE_OK; |
| 21037 } |
| 21038 |
| 21039 if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ |
| 21040 u8 op; |
| 21041 switch( p->op ){ |
| 21042 case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; |
| 21043 case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; |
| 21044 case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; |
| 21045 case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; |
| 21046 case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; |
| 21047 default: |
| 21048 assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); |
| 21049 op = RTREE_MATCH; |
| 21050 break; |
| 21051 } |
| 21052 zIdxStr[iIdx++] = op; |
| 21053 zIdxStr[iIdx++] = (char)(p->iColumn - 1 + '0'); |
| 21054 pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); |
| 21055 pIdxInfo->aConstraintUsage[ii].omit = 1; |
| 21056 } |
| 21057 } |
| 21058 |
| 21059 pIdxInfo->idxNum = 2; |
| 21060 pIdxInfo->needToFreeIdxStr = 1; |
| 21061 if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ |
| 21062 return SQLITE_NOMEM; |
| 21063 } |
| 21064 |
| 21065 nRow = pRtree->nRowEst >> (iIdx/2); |
| 21066 pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; |
| 21067 pIdxInfo->estimatedRows = nRow; |
| 21068 |
| 21069 return rc; |
| 21070 } |
| 21071 |
| 21072 /* |
| 21073 ** Return the N-dimensional volumn of the cell stored in *p. |
| 21074 */ |
| 21075 static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ |
| 21076 RtreeDValue area = (RtreeDValue)1; |
| 21077 assert( pRtree->nDim>=1 && pRtree->nDim<=5 ); |
| 21078 #ifndef SQLITE_RTREE_INT_ONLY |
| 21079 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 21080 switch( pRtree->nDim ){ |
| 21081 case 5: area = p->aCoord[9].f - p->aCoord[8].f; |
| 21082 case 4: area *= p->aCoord[7].f - p->aCoord[6].f; |
| 21083 case 3: area *= p->aCoord[5].f - p->aCoord[4].f; |
| 21084 case 2: area *= p->aCoord[3].f - p->aCoord[2].f; |
| 21085 default: area *= p->aCoord[1].f - p->aCoord[0].f; |
| 21086 } |
| 21087 }else |
| 21088 #endif |
| 21089 { |
| 21090 switch( pRtree->nDim ){ |
| 21091 case 5: area = p->aCoord[9].i - p->aCoord[8].i; |
| 21092 case 4: area *= p->aCoord[7].i - p->aCoord[6].i; |
| 21093 case 3: area *= p->aCoord[5].i - p->aCoord[4].i; |
| 21094 case 2: area *= p->aCoord[3].i - p->aCoord[2].i; |
| 21095 default: area *= p->aCoord[1].i - p->aCoord[0].i; |
| 21096 } |
| 21097 } |
| 21098 return area; |
| 21099 } |
| 21100 |
| 21101 /* |
| 21102 ** Return the margin length of cell p. The margin length is the sum |
| 21103 ** of the objects size in each dimension. |
| 21104 */ |
| 21105 static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ |
| 21106 RtreeDValue margin = 0; |
| 21107 int ii = pRtree->nDim2 - 2; |
| 21108 do{ |
| 21109 margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); |
| 21110 ii -= 2; |
| 21111 }while( ii>=0 ); |
| 21112 return margin; |
| 21113 } |
| 21114 |
| 21115 /* |
| 21116 ** Store the union of cells p1 and p2 in p1. |
| 21117 */ |
| 21118 static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ |
| 21119 int ii = 0; |
| 21120 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 21121 do{ |
| 21122 p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); |
| 21123 p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); |
| 21124 ii += 2; |
| 21125 }while( ii<pRtree->nDim2 ); |
| 21126 }else{ |
| 21127 do{ |
| 21128 p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); |
| 21129 p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); |
| 21130 ii += 2; |
| 21131 }while( ii<pRtree->nDim2 ); |
| 21132 } |
| 21133 } |
| 21134 |
| 21135 /* |
| 21136 ** Return true if the area covered by p2 is a subset of the area covered |
| 21137 ** by p1. False otherwise. |
| 21138 */ |
| 21139 static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ |
| 21140 int ii; |
| 21141 int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); |
| 21142 for(ii=0; ii<pRtree->nDim2; ii+=2){ |
| 21143 RtreeCoord *a1 = &p1->aCoord[ii]; |
| 21144 RtreeCoord *a2 = &p2->aCoord[ii]; |
| 21145 if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f)) |
| 21146 || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i)) |
| 21147 ){ |
| 21148 return 0; |
| 21149 } |
| 21150 } |
| 21151 return 1; |
| 21152 } |
| 21153 |
| 21154 /* |
| 21155 ** Return the amount cell p would grow by if it were unioned with pCell. |
| 21156 */ |
| 21157 static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ |
| 21158 RtreeDValue area; |
| 21159 RtreeCell cell; |
| 21160 memcpy(&cell, p, sizeof(RtreeCell)); |
| 21161 area = cellArea(pRtree, &cell); |
| 21162 cellUnion(pRtree, &cell, pCell); |
| 21163 return (cellArea(pRtree, &cell)-area); |
| 21164 } |
| 21165 |
| 21166 static RtreeDValue cellOverlap( |
| 21167 Rtree *pRtree, |
| 21168 RtreeCell *p, |
| 21169 RtreeCell *aCell, |
| 21170 int nCell |
| 21171 ){ |
| 21172 int ii; |
| 21173 RtreeDValue overlap = RTREE_ZERO; |
| 21174 for(ii=0; ii<nCell; ii++){ |
| 21175 int jj; |
| 21176 RtreeDValue o = (RtreeDValue)1; |
| 21177 for(jj=0; jj<pRtree->nDim2; jj+=2){ |
| 21178 RtreeDValue x1, x2; |
| 21179 x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); |
| 21180 x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); |
| 21181 if( x2<x1 ){ |
| 21182 o = (RtreeDValue)0; |
| 21183 break; |
| 21184 }else{ |
| 21185 o = o * (x2-x1); |
| 21186 } |
| 21187 } |
| 21188 overlap += o; |
| 21189 } |
| 21190 return overlap; |
| 21191 } |
| 21192 |
| 21193 |
| 21194 /* |
| 21195 ** This function implements the ChooseLeaf algorithm from Gutman[84]. |
| 21196 ** ChooseSubTree in r*tree terminology. |
| 21197 */ |
| 21198 static int ChooseLeaf( |
| 21199 Rtree *pRtree, /* Rtree table */ |
| 21200 RtreeCell *pCell, /* Cell to insert into rtree */ |
| 21201 int iHeight, /* Height of sub-tree rooted at pCell */ |
| 21202 RtreeNode **ppLeaf /* OUT: Selected leaf page */ |
| 21203 ){ |
| 21204 int rc; |
| 21205 int ii; |
| 21206 RtreeNode *pNode; |
| 21207 rc = nodeAcquire(pRtree, 1, 0, &pNode); |
| 21208 |
| 21209 for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){ |
| 21210 int iCell; |
| 21211 sqlite3_int64 iBest = 0; |
| 21212 |
| 21213 RtreeDValue fMinGrowth = RTREE_ZERO; |
| 21214 RtreeDValue fMinArea = RTREE_ZERO; |
| 21215 |
| 21216 int nCell = NCELL(pNode); |
| 21217 RtreeCell cell; |
| 21218 RtreeNode *pChild; |
| 21219 |
| 21220 RtreeCell *aCell = 0; |
| 21221 |
| 21222 /* Select the child node which will be enlarged the least if pCell |
| 21223 ** is inserted into it. Resolve ties by choosing the entry with |
| 21224 ** the smallest area. |
| 21225 */ |
| 21226 for(iCell=0; iCell<nCell; iCell++){ |
| 21227 int bBest = 0; |
| 21228 RtreeDValue growth; |
| 21229 RtreeDValue area; |
| 21230 nodeGetCell(pRtree, pNode, iCell, &cell); |
| 21231 growth = cellGrowth(pRtree, &cell, pCell); |
| 21232 area = cellArea(pRtree, &cell); |
| 21233 if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){ |
| 21234 bBest = 1; |
| 21235 } |
| 21236 if( bBest ){ |
| 21237 fMinGrowth = growth; |
| 21238 fMinArea = area; |
| 21239 iBest = cell.iRowid; |
| 21240 } |
| 21241 } |
| 21242 |
| 21243 sqlite3_free(aCell); |
| 21244 rc = nodeAcquire(pRtree, iBest, pNode, &pChild); |
| 21245 nodeRelease(pRtree, pNode); |
| 21246 pNode = pChild; |
| 21247 } |
| 21248 |
| 21249 *ppLeaf = pNode; |
| 21250 return rc; |
| 21251 } |
| 21252 |
| 21253 /* |
| 21254 ** A cell with the same content as pCell has just been inserted into |
| 21255 ** the node pNode. This function updates the bounding box cells in |
| 21256 ** all ancestor elements. |
| 21257 */ |
| 21258 static int AdjustTree( |
| 21259 Rtree *pRtree, /* Rtree table */ |
| 21260 RtreeNode *pNode, /* Adjust ancestry of this node. */ |
| 21261 RtreeCell *pCell /* This cell was just inserted */ |
| 21262 ){ |
| 21263 RtreeNode *p = pNode; |
| 21264 while( p->pParent ){ |
| 21265 RtreeNode *pParent = p->pParent; |
| 21266 RtreeCell cell; |
| 21267 int iCell; |
| 21268 |
| 21269 if( nodeParentIndex(pRtree, p, &iCell) ){ |
| 21270 return SQLITE_CORRUPT_VTAB; |
| 21271 } |
| 21272 |
| 21273 nodeGetCell(pRtree, pParent, iCell, &cell); |
| 21274 if( !cellContains(pRtree, &cell, pCell) ){ |
| 21275 cellUnion(pRtree, &cell, pCell); |
| 21276 nodeOverwriteCell(pRtree, pParent, &cell, iCell); |
| 21277 } |
| 21278 |
| 21279 p = pParent; |
| 21280 } |
| 21281 return SQLITE_OK; |
| 21282 } |
| 21283 |
| 21284 /* |
| 21285 ** Write mapping (iRowid->iNode) to the <rtree>_rowid table. |
| 21286 */ |
| 21287 static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ |
| 21288 sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); |
| 21289 sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); |
| 21290 sqlite3_step(pRtree->pWriteRowid); |
| 21291 return sqlite3_reset(pRtree->pWriteRowid); |
| 21292 } |
| 21293 |
| 21294 /* |
| 21295 ** Write mapping (iNode->iPar) to the <rtree>_parent table. |
| 21296 */ |
| 21297 static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ |
| 21298 sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); |
| 21299 sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); |
| 21300 sqlite3_step(pRtree->pWriteParent); |
| 21301 return sqlite3_reset(pRtree->pWriteParent); |
| 21302 } |
| 21303 |
| 21304 static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); |
| 21305 |
| 21306 |
| 21307 /* |
| 21308 ** Arguments aIdx, aDistance and aSpare all point to arrays of size |
| 21309 ** nIdx. The aIdx array contains the set of integers from 0 to |
| 21310 ** (nIdx-1) in no particular order. This function sorts the values |
| 21311 ** in aIdx according to the indexed values in aDistance. For |
| 21312 ** example, assuming the inputs: |
| 21313 ** |
| 21314 ** aIdx = { 0, 1, 2, 3 } |
| 21315 ** aDistance = { 5.0, 2.0, 7.0, 6.0 } |
| 21316 ** |
| 21317 ** this function sets the aIdx array to contain: |
| 21318 ** |
| 21319 ** aIdx = { 0, 1, 2, 3 } |
| 21320 ** |
| 21321 ** The aSpare array is used as temporary working space by the |
| 21322 ** sorting algorithm. |
| 21323 */ |
| 21324 static void SortByDistance( |
| 21325 int *aIdx, |
| 21326 int nIdx, |
| 21327 RtreeDValue *aDistance, |
| 21328 int *aSpare |
| 21329 ){ |
| 21330 if( nIdx>1 ){ |
| 21331 int iLeft = 0; |
| 21332 int iRight = 0; |
| 21333 |
| 21334 int nLeft = nIdx/2; |
| 21335 int nRight = nIdx-nLeft; |
| 21336 int *aLeft = aIdx; |
| 21337 int *aRight = &aIdx[nLeft]; |
| 21338 |
| 21339 SortByDistance(aLeft, nLeft, aDistance, aSpare); |
| 21340 SortByDistance(aRight, nRight, aDistance, aSpare); |
| 21341 |
| 21342 memcpy(aSpare, aLeft, sizeof(int)*nLeft); |
| 21343 aLeft = aSpare; |
| 21344 |
| 21345 while( iLeft<nLeft || iRight<nRight ){ |
| 21346 if( iLeft==nLeft ){ |
| 21347 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21348 iRight++; |
| 21349 }else if( iRight==nRight ){ |
| 21350 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21351 iLeft++; |
| 21352 }else{ |
| 21353 RtreeDValue fLeft = aDistance[aLeft[iLeft]]; |
| 21354 RtreeDValue fRight = aDistance[aRight[iRight]]; |
| 21355 if( fLeft<fRight ){ |
| 21356 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21357 iLeft++; |
| 21358 }else{ |
| 21359 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21360 iRight++; |
| 21361 } |
| 21362 } |
| 21363 } |
| 21364 |
| 21365 #if 0 |
| 21366 /* Check that the sort worked */ |
| 21367 { |
| 21368 int jj; |
| 21369 for(jj=1; jj<nIdx; jj++){ |
| 21370 RtreeDValue left = aDistance[aIdx[jj-1]]; |
| 21371 RtreeDValue right = aDistance[aIdx[jj]]; |
| 21372 assert( left<=right ); |
| 21373 } |
| 21374 } |
| 21375 #endif |
| 21376 } |
| 21377 } |
| 21378 |
| 21379 /* |
| 21380 ** Arguments aIdx, aCell and aSpare all point to arrays of size |
| 21381 ** nIdx. The aIdx array contains the set of integers from 0 to |
| 21382 ** (nIdx-1) in no particular order. This function sorts the values |
| 21383 ** in aIdx according to dimension iDim of the cells in aCell. The |
| 21384 ** minimum value of dimension iDim is considered first, the |
| 21385 ** maximum used to break ties. |
| 21386 ** |
| 21387 ** The aSpare array is used as temporary working space by the |
| 21388 ** sorting algorithm. |
| 21389 */ |
| 21390 static void SortByDimension( |
| 21391 Rtree *pRtree, |
| 21392 int *aIdx, |
| 21393 int nIdx, |
| 21394 int iDim, |
| 21395 RtreeCell *aCell, |
| 21396 int *aSpare |
| 21397 ){ |
| 21398 if( nIdx>1 ){ |
| 21399 |
| 21400 int iLeft = 0; |
| 21401 int iRight = 0; |
| 21402 |
| 21403 int nLeft = nIdx/2; |
| 21404 int nRight = nIdx-nLeft; |
| 21405 int *aLeft = aIdx; |
| 21406 int *aRight = &aIdx[nLeft]; |
| 21407 |
| 21408 SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); |
| 21409 SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); |
| 21410 |
| 21411 memcpy(aSpare, aLeft, sizeof(int)*nLeft); |
| 21412 aLeft = aSpare; |
| 21413 while( iLeft<nLeft || iRight<nRight ){ |
| 21414 RtreeDValue xleft1 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2]); |
| 21415 RtreeDValue xleft2 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2+1]); |
| 21416 RtreeDValue xright1 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2]); |
| 21417 RtreeDValue xright2 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2+1]); |
| 21418 if( (iLeft!=nLeft) && ((iRight==nRight) |
| 21419 || (xleft1<xright1) |
| 21420 || (xleft1==xright1 && xleft2<xright2) |
| 21421 )){ |
| 21422 aIdx[iLeft+iRight] = aLeft[iLeft]; |
| 21423 iLeft++; |
| 21424 }else{ |
| 21425 aIdx[iLeft+iRight] = aRight[iRight]; |
| 21426 iRight++; |
| 21427 } |
| 21428 } |
| 21429 |
| 21430 #if 0 |
| 21431 /* Check that the sort worked */ |
| 21432 { |
| 21433 int jj; |
| 21434 for(jj=1; jj<nIdx; jj++){ |
| 21435 RtreeDValue xleft1 = aCell[aIdx[jj-1]].aCoord[iDim*2]; |
| 21436 RtreeDValue xleft2 = aCell[aIdx[jj-1]].aCoord[iDim*2+1]; |
| 21437 RtreeDValue xright1 = aCell[aIdx[jj]].aCoord[iDim*2]; |
| 21438 RtreeDValue xright2 = aCell[aIdx[jj]].aCoord[iDim*2+1]; |
| 21439 assert( xleft1<=xright1 && (xleft1<xright1 || xleft2<=xright2) ); |
| 21440 } |
| 21441 } |
| 21442 #endif |
| 21443 } |
| 21444 } |
| 21445 |
| 21446 /* |
| 21447 ** Implementation of the R*-tree variant of SplitNode from Beckman[1990]. |
| 21448 */ |
| 21449 static int splitNodeStartree( |
| 21450 Rtree *pRtree, |
| 21451 RtreeCell *aCell, |
| 21452 int nCell, |
| 21453 RtreeNode *pLeft, |
| 21454 RtreeNode *pRight, |
| 21455 RtreeCell *pBboxLeft, |
| 21456 RtreeCell *pBboxRight |
| 21457 ){ |
| 21458 int **aaSorted; |
| 21459 int *aSpare; |
| 21460 int ii; |
| 21461 |
| 21462 int iBestDim = 0; |
| 21463 int iBestSplit = 0; |
| 21464 RtreeDValue fBestMargin = RTREE_ZERO; |
| 21465 |
| 21466 int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); |
| 21467 |
| 21468 aaSorted = (int **)sqlite3_malloc(nByte); |
| 21469 if( !aaSorted ){ |
| 21470 return SQLITE_NOMEM; |
| 21471 } |
| 21472 |
| 21473 aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; |
| 21474 memset(aaSorted, 0, nByte); |
| 21475 for(ii=0; ii<pRtree->nDim; ii++){ |
| 21476 int jj; |
| 21477 aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; |
| 21478 for(jj=0; jj<nCell; jj++){ |
| 21479 aaSorted[ii][jj] = jj; |
| 21480 } |
| 21481 SortByDimension(pRtree, aaSorted[ii], nCell, ii, aCell, aSpare); |
| 21482 } |
| 21483 |
| 21484 for(ii=0; ii<pRtree->nDim; ii++){ |
| 21485 RtreeDValue margin = RTREE_ZERO; |
| 21486 RtreeDValue fBestOverlap = RTREE_ZERO; |
| 21487 RtreeDValue fBestArea = RTREE_ZERO; |
| 21488 int iBestLeft = 0; |
| 21489 int nLeft; |
| 21490 |
| 21491 for( |
| 21492 nLeft=RTREE_MINCELLS(pRtree); |
| 21493 nLeft<=(nCell-RTREE_MINCELLS(pRtree)); |
| 21494 nLeft++ |
| 21495 ){ |
| 21496 RtreeCell left; |
| 21497 RtreeCell right; |
| 21498 int kk; |
| 21499 RtreeDValue overlap; |
| 21500 RtreeDValue area; |
| 21501 |
| 21502 memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); |
| 21503 memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); |
| 21504 for(kk=1; kk<(nCell-1); kk++){ |
| 21505 if( kk<nLeft ){ |
| 21506 cellUnion(pRtree, &left, &aCell[aaSorted[ii][kk]]); |
| 21507 }else{ |
| 21508 cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]); |
| 21509 } |
| 21510 } |
| 21511 margin += cellMargin(pRtree, &left); |
| 21512 margin += cellMargin(pRtree, &right); |
| 21513 overlap = cellOverlap(pRtree, &left, &right, 1); |
| 21514 area = cellArea(pRtree, &left) + cellArea(pRtree, &right); |
| 21515 if( (nLeft==RTREE_MINCELLS(pRtree)) |
| 21516 || (overlap<fBestOverlap) |
| 21517 || (overlap==fBestOverlap && area<fBestArea) |
| 21518 ){ |
| 21519 iBestLeft = nLeft; |
| 21520 fBestOverlap = overlap; |
| 21521 fBestArea = area; |
| 21522 } |
| 21523 } |
| 21524 |
| 21525 if( ii==0 || margin<fBestMargin ){ |
| 21526 iBestDim = ii; |
| 21527 fBestMargin = margin; |
| 21528 iBestSplit = iBestLeft; |
| 21529 } |
| 21530 } |
| 21531 |
| 21532 memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell)); |
| 21533 memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell)); |
| 21534 for(ii=0; ii<nCell; ii++){ |
| 21535 RtreeNode *pTarget = (ii<iBestSplit)?pLeft:pRight; |
| 21536 RtreeCell *pBbox = (ii<iBestSplit)?pBboxLeft:pBboxRight; |
| 21537 RtreeCell *pCell = &aCell[aaSorted[iBestDim][ii]]; |
| 21538 nodeInsertCell(pRtree, pTarget, pCell); |
| 21539 cellUnion(pRtree, pBbox, pCell); |
| 21540 } |
| 21541 |
| 21542 sqlite3_free(aaSorted); |
| 21543 return SQLITE_OK; |
| 21544 } |
| 21545 |
| 21546 |
| 21547 static int updateMapping( |
| 21548 Rtree *pRtree, |
| 21549 i64 iRowid, |
| 21550 RtreeNode *pNode, |
| 21551 int iHeight |
| 21552 ){ |
| 21553 int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); |
| 21554 xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); |
| 21555 if( iHeight>0 ){ |
| 21556 RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); |
| 21557 if( pChild ){ |
| 21558 nodeRelease(pRtree, pChild->pParent); |
| 21559 nodeReference(pNode); |
| 21560 pChild->pParent = pNode; |
| 21561 } |
| 21562 } |
| 21563 return xSetMapping(pRtree, iRowid, pNode->iNode); |
| 21564 } |
| 21565 |
| 21566 static int SplitNode( |
| 21567 Rtree *pRtree, |
| 21568 RtreeNode *pNode, |
| 21569 RtreeCell *pCell, |
| 21570 int iHeight |
| 21571 ){ |
| 21572 int i; |
| 21573 int newCellIsRight = 0; |
| 21574 |
| 21575 int rc = SQLITE_OK; |
| 21576 int nCell = NCELL(pNode); |
| 21577 RtreeCell *aCell; |
| 21578 int *aiUsed; |
| 21579 |
| 21580 RtreeNode *pLeft = 0; |
| 21581 RtreeNode *pRight = 0; |
| 21582 |
| 21583 RtreeCell leftbbox; |
| 21584 RtreeCell rightbbox; |
| 21585 |
| 21586 /* Allocate an array and populate it with a copy of pCell and |
| 21587 ** all cells from node pLeft. Then zero the original node. |
| 21588 */ |
| 21589 aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); |
| 21590 if( !aCell ){ |
| 21591 rc = SQLITE_NOMEM; |
| 21592 goto splitnode_out; |
| 21593 } |
| 21594 aiUsed = (int *)&aCell[nCell+1]; |
| 21595 memset(aiUsed, 0, sizeof(int)*(nCell+1)); |
| 21596 for(i=0; i<nCell; i++){ |
| 21597 nodeGetCell(pRtree, pNode, i, &aCell[i]); |
| 21598 } |
| 21599 nodeZero(pRtree, pNode); |
| 21600 memcpy(&aCell[nCell], pCell, sizeof(RtreeCell)); |
| 21601 nCell++; |
| 21602 |
| 21603 if( pNode->iNode==1 ){ |
| 21604 pRight = nodeNew(pRtree, pNode); |
| 21605 pLeft = nodeNew(pRtree, pNode); |
| 21606 pRtree->iDepth++; |
| 21607 pNode->isDirty = 1; |
| 21608 writeInt16(pNode->zData, pRtree->iDepth); |
| 21609 }else{ |
| 21610 pLeft = pNode; |
| 21611 pRight = nodeNew(pRtree, pLeft->pParent); |
| 21612 nodeReference(pLeft); |
| 21613 } |
| 21614 |
| 21615 if( !pLeft || !pRight ){ |
| 21616 rc = SQLITE_NOMEM; |
| 21617 goto splitnode_out; |
| 21618 } |
| 21619 |
| 21620 memset(pLeft->zData, 0, pRtree->iNodeSize); |
| 21621 memset(pRight->zData, 0, pRtree->iNodeSize); |
| 21622 |
| 21623 rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, |
| 21624 &leftbbox, &rightbbox); |
| 21625 if( rc!=SQLITE_OK ){ |
| 21626 goto splitnode_out; |
| 21627 } |
| 21628 |
| 21629 /* Ensure both child nodes have node numbers assigned to them by calling |
| 21630 ** nodeWrite(). Node pRight always needs a node number, as it was created |
| 21631 ** by nodeNew() above. But node pLeft sometimes already has a node number. |
| 21632 ** In this case avoid the all to nodeWrite(). |
| 21633 */ |
| 21634 if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) |
| 21635 || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) |
| 21636 ){ |
| 21637 goto splitnode_out; |
| 21638 } |
| 21639 |
| 21640 rightbbox.iRowid = pRight->iNode; |
| 21641 leftbbox.iRowid = pLeft->iNode; |
| 21642 |
| 21643 if( pNode->iNode==1 ){ |
| 21644 rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); |
| 21645 if( rc!=SQLITE_OK ){ |
| 21646 goto splitnode_out; |
| 21647 } |
| 21648 }else{ |
| 21649 RtreeNode *pParent = pLeft->pParent; |
| 21650 int iCell; |
| 21651 rc = nodeParentIndex(pRtree, pLeft, &iCell); |
| 21652 if( rc==SQLITE_OK ){ |
| 21653 nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); |
| 21654 rc = AdjustTree(pRtree, pParent, &leftbbox); |
| 21655 } |
| 21656 if( rc!=SQLITE_OK ){ |
| 21657 goto splitnode_out; |
| 21658 } |
| 21659 } |
| 21660 if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ |
| 21661 goto splitnode_out; |
| 21662 } |
| 21663 |
| 21664 for(i=0; i<NCELL(pRight); i++){ |
| 21665 i64 iRowid = nodeGetRowid(pRtree, pRight, i); |
| 21666 rc = updateMapping(pRtree, iRowid, pRight, iHeight); |
| 21667 if( iRowid==pCell->iRowid ){ |
| 21668 newCellIsRight = 1; |
| 21669 } |
| 21670 if( rc!=SQLITE_OK ){ |
| 21671 goto splitnode_out; |
| 21672 } |
| 21673 } |
| 21674 if( pNode->iNode==1 ){ |
| 21675 for(i=0; i<NCELL(pLeft); i++){ |
| 21676 i64 iRowid = nodeGetRowid(pRtree, pLeft, i); |
| 21677 rc = updateMapping(pRtree, iRowid, pLeft, iHeight); |
| 21678 if( rc!=SQLITE_OK ){ |
| 21679 goto splitnode_out; |
| 21680 } |
| 21681 } |
| 21682 }else if( newCellIsRight==0 ){ |
| 21683 rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight); |
| 21684 } |
| 21685 |
| 21686 if( rc==SQLITE_OK ){ |
| 21687 rc = nodeRelease(pRtree, pRight); |
| 21688 pRight = 0; |
| 21689 } |
| 21690 if( rc==SQLITE_OK ){ |
| 21691 rc = nodeRelease(pRtree, pLeft); |
| 21692 pLeft = 0; |
| 21693 } |
| 21694 |
| 21695 splitnode_out: |
| 21696 nodeRelease(pRtree, pRight); |
| 21697 nodeRelease(pRtree, pLeft); |
| 21698 sqlite3_free(aCell); |
| 21699 return rc; |
| 21700 } |
| 21701 |
| 21702 /* |
| 21703 ** If node pLeaf is not the root of the r-tree and its pParent pointer is |
| 21704 ** still NULL, load all ancestor nodes of pLeaf into memory and populate |
| 21705 ** the pLeaf->pParent chain all the way up to the root node. |
| 21706 ** |
| 21707 ** This operation is required when a row is deleted (or updated - an update |
| 21708 ** is implemented as a delete followed by an insert). SQLite provides the |
| 21709 ** rowid of the row to delete, which can be used to find the leaf on which |
| 21710 ** the entry resides (argument pLeaf). Once the leaf is located, this |
| 21711 ** function is called to determine its ancestry. |
| 21712 */ |
| 21713 static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ |
| 21714 int rc = SQLITE_OK; |
| 21715 RtreeNode *pChild = pLeaf; |
| 21716 while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ |
| 21717 int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ |
| 21718 sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); |
| 21719 rc = sqlite3_step(pRtree->pReadParent); |
| 21720 if( rc==SQLITE_ROW ){ |
| 21721 RtreeNode *pTest; /* Used to test for reference loops */ |
| 21722 i64 iNode; /* Node number of parent node */ |
| 21723 |
| 21724 /* Before setting pChild->pParent, test that we are not creating a |
| 21725 ** loop of references (as we would if, say, pChild==pParent). We don't |
| 21726 ** want to do this as it leads to a memory leak when trying to delete |
| 21727 ** the referenced counted node structures. |
| 21728 */ |
| 21729 iNode = sqlite3_column_int64(pRtree->pReadParent, 0); |
| 21730 for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); |
| 21731 if( !pTest ){ |
| 21732 rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); |
| 21733 } |
| 21734 } |
| 21735 rc = sqlite3_reset(pRtree->pReadParent); |
| 21736 if( rc==SQLITE_OK ) rc = rc2; |
| 21737 if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; |
| 21738 pChild = pChild->pParent; |
| 21739 } |
| 21740 return rc; |
| 21741 } |
| 21742 |
| 21743 static int deleteCell(Rtree *, RtreeNode *, int, int); |
| 21744 |
| 21745 static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ |
| 21746 int rc; |
| 21747 int rc2; |
| 21748 RtreeNode *pParent = 0; |
| 21749 int iCell; |
| 21750 |
| 21751 assert( pNode->nRef==1 ); |
| 21752 |
| 21753 /* Remove the entry in the parent cell. */ |
| 21754 rc = nodeParentIndex(pRtree, pNode, &iCell); |
| 21755 if( rc==SQLITE_OK ){ |
| 21756 pParent = pNode->pParent; |
| 21757 pNode->pParent = 0; |
| 21758 rc = deleteCell(pRtree, pParent, iCell, iHeight+1); |
| 21759 } |
| 21760 rc2 = nodeRelease(pRtree, pParent); |
| 21761 if( rc==SQLITE_OK ){ |
| 21762 rc = rc2; |
| 21763 } |
| 21764 if( rc!=SQLITE_OK ){ |
| 21765 return rc; |
| 21766 } |
| 21767 |
| 21768 /* Remove the xxx_node entry. */ |
| 21769 sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); |
| 21770 sqlite3_step(pRtree->pDeleteNode); |
| 21771 if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ |
| 21772 return rc; |
| 21773 } |
| 21774 |
| 21775 /* Remove the xxx_parent entry. */ |
| 21776 sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); |
| 21777 sqlite3_step(pRtree->pDeleteParent); |
| 21778 if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ |
| 21779 return rc; |
| 21780 } |
| 21781 |
| 21782 /* Remove the node from the in-memory hash table and link it into |
| 21783 ** the Rtree.pDeleted list. Its contents will be re-inserted later on. |
| 21784 */ |
| 21785 nodeHashDelete(pRtree, pNode); |
| 21786 pNode->iNode = iHeight; |
| 21787 pNode->pNext = pRtree->pDeleted; |
| 21788 pNode->nRef++; |
| 21789 pRtree->pDeleted = pNode; |
| 21790 |
| 21791 return SQLITE_OK; |
| 21792 } |
| 21793 |
| 21794 static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ |
| 21795 RtreeNode *pParent = pNode->pParent; |
| 21796 int rc = SQLITE_OK; |
| 21797 if( pParent ){ |
| 21798 int ii; |
| 21799 int nCell = NCELL(pNode); |
| 21800 RtreeCell box; /* Bounding box for pNode */ |
| 21801 nodeGetCell(pRtree, pNode, 0, &box); |
| 21802 for(ii=1; ii<nCell; ii++){ |
| 21803 RtreeCell cell; |
| 21804 nodeGetCell(pRtree, pNode, ii, &cell); |
| 21805 cellUnion(pRtree, &box, &cell); |
| 21806 } |
| 21807 box.iRowid = pNode->iNode; |
| 21808 rc = nodeParentIndex(pRtree, pNode, &ii); |
| 21809 if( rc==SQLITE_OK ){ |
| 21810 nodeOverwriteCell(pRtree, pParent, &box, ii); |
| 21811 rc = fixBoundingBox(pRtree, pParent); |
| 21812 } |
| 21813 } |
| 21814 return rc; |
| 21815 } |
| 21816 |
| 21817 /* |
| 21818 ** Delete the cell at index iCell of node pNode. After removing the |
| 21819 ** cell, adjust the r-tree data structure if required. |
| 21820 */ |
| 21821 static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ |
| 21822 RtreeNode *pParent; |
| 21823 int rc; |
| 21824 |
| 21825 if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ |
| 21826 return rc; |
| 21827 } |
| 21828 |
| 21829 /* Remove the cell from the node. This call just moves bytes around |
| 21830 ** the in-memory node image, so it cannot fail. |
| 21831 */ |
| 21832 nodeDeleteCell(pRtree, pNode, iCell); |
| 21833 |
| 21834 /* If the node is not the tree root and now has less than the minimum |
| 21835 ** number of cells, remove it from the tree. Otherwise, update the |
| 21836 ** cell in the parent node so that it tightly contains the updated |
| 21837 ** node. |
| 21838 */ |
| 21839 pParent = pNode->pParent; |
| 21840 assert( pParent || pNode->iNode==1 ); |
| 21841 if( pParent ){ |
| 21842 if( NCELL(pNode)<RTREE_MINCELLS(pRtree) ){ |
| 21843 rc = removeNode(pRtree, pNode, iHeight); |
| 21844 }else{ |
| 21845 rc = fixBoundingBox(pRtree, pNode); |
| 21846 } |
| 21847 } |
| 21848 |
| 21849 return rc; |
| 21850 } |
| 21851 |
| 21852 static int Reinsert( |
| 21853 Rtree *pRtree, |
| 21854 RtreeNode *pNode, |
| 21855 RtreeCell *pCell, |
| 21856 int iHeight |
| 21857 ){ |
| 21858 int *aOrder; |
| 21859 int *aSpare; |
| 21860 RtreeCell *aCell; |
| 21861 RtreeDValue *aDistance; |
| 21862 int nCell; |
| 21863 RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS]; |
| 21864 int iDim; |
| 21865 int ii; |
| 21866 int rc = SQLITE_OK; |
| 21867 int n; |
| 21868 |
| 21869 memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS); |
| 21870 |
| 21871 nCell = NCELL(pNode)+1; |
| 21872 n = (nCell+1)&(~1); |
| 21873 |
| 21874 /* Allocate the buffers used by this operation. The allocation is |
| 21875 ** relinquished before this function returns. |
| 21876 */ |
| 21877 aCell = (RtreeCell *)sqlite3_malloc(n * ( |
| 21878 sizeof(RtreeCell) + /* aCell array */ |
| 21879 sizeof(int) + /* aOrder array */ |
| 21880 sizeof(int) + /* aSpare array */ |
| 21881 sizeof(RtreeDValue) /* aDistance array */ |
| 21882 )); |
| 21883 if( !aCell ){ |
| 21884 return SQLITE_NOMEM; |
| 21885 } |
| 21886 aOrder = (int *)&aCell[n]; |
| 21887 aSpare = (int *)&aOrder[n]; |
| 21888 aDistance = (RtreeDValue *)&aSpare[n]; |
| 21889 |
| 21890 for(ii=0; ii<nCell; ii++){ |
| 21891 if( ii==(nCell-1) ){ |
| 21892 memcpy(&aCell[ii], pCell, sizeof(RtreeCell)); |
| 21893 }else{ |
| 21894 nodeGetCell(pRtree, pNode, ii, &aCell[ii]); |
| 21895 } |
| 21896 aOrder[ii] = ii; |
| 21897 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21898 aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); |
| 21899 aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); |
| 21900 } |
| 21901 } |
| 21902 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21903 aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); |
| 21904 } |
| 21905 |
| 21906 for(ii=0; ii<nCell; ii++){ |
| 21907 aDistance[ii] = RTREE_ZERO; |
| 21908 for(iDim=0; iDim<pRtree->nDim; iDim++){ |
| 21909 RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - |
| 21910 DCOORD(aCell[ii].aCoord[iDim*2])); |
| 21911 aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); |
| 21912 } |
| 21913 } |
| 21914 |
| 21915 SortByDistance(aOrder, nCell, aDistance, aSpare); |
| 21916 nodeZero(pRtree, pNode); |
| 21917 |
| 21918 for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ |
| 21919 RtreeCell *p = &aCell[aOrder[ii]]; |
| 21920 nodeInsertCell(pRtree, pNode, p); |
| 21921 if( p->iRowid==pCell->iRowid ){ |
| 21922 if( iHeight==0 ){ |
| 21923 rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); |
| 21924 }else{ |
| 21925 rc = parentWrite(pRtree, p->iRowid, pNode->iNode); |
| 21926 } |
| 21927 } |
| 21928 } |
| 21929 if( rc==SQLITE_OK ){ |
| 21930 rc = fixBoundingBox(pRtree, pNode); |
| 21931 } |
| 21932 for(; rc==SQLITE_OK && ii<nCell; ii++){ |
| 21933 /* Find a node to store this cell in. pNode->iNode currently contains |
| 21934 ** the height of the sub-tree headed by the cell. |
| 21935 */ |
| 21936 RtreeNode *pInsert; |
| 21937 RtreeCell *p = &aCell[aOrder[ii]]; |
| 21938 rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); |
| 21939 if( rc==SQLITE_OK ){ |
| 21940 int rc2; |
| 21941 rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); |
| 21942 rc2 = nodeRelease(pRtree, pInsert); |
| 21943 if( rc==SQLITE_OK ){ |
| 21944 rc = rc2; |
| 21945 } |
| 21946 } |
| 21947 } |
| 21948 |
| 21949 sqlite3_free(aCell); |
| 21950 return rc; |
| 21951 } |
| 21952 |
| 21953 /* |
| 21954 ** Insert cell pCell into node pNode. Node pNode is the head of a |
| 21955 ** subtree iHeight high (leaf nodes have iHeight==0). |
| 21956 */ |
| 21957 static int rtreeInsertCell( |
| 21958 Rtree *pRtree, |
| 21959 RtreeNode *pNode, |
| 21960 RtreeCell *pCell, |
| 21961 int iHeight |
| 21962 ){ |
| 21963 int rc = SQLITE_OK; |
| 21964 if( iHeight>0 ){ |
| 21965 RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); |
| 21966 if( pChild ){ |
| 21967 nodeRelease(pRtree, pChild->pParent); |
| 21968 nodeReference(pNode); |
| 21969 pChild->pParent = pNode; |
| 21970 } |
| 21971 } |
| 21972 if( nodeInsertCell(pRtree, pNode, pCell) ){ |
| 21973 if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ |
| 21974 rc = SplitNode(pRtree, pNode, pCell, iHeight); |
| 21975 }else{ |
| 21976 pRtree->iReinsertHeight = iHeight; |
| 21977 rc = Reinsert(pRtree, pNode, pCell, iHeight); |
| 21978 } |
| 21979 }else{ |
| 21980 rc = AdjustTree(pRtree, pNode, pCell); |
| 21981 if( rc==SQLITE_OK ){ |
| 21982 if( iHeight==0 ){ |
| 21983 rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); |
| 21984 }else{ |
| 21985 rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); |
| 21986 } |
| 21987 } |
| 21988 } |
| 21989 return rc; |
| 21990 } |
| 21991 |
| 21992 static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ |
| 21993 int ii; |
| 21994 int rc = SQLITE_OK; |
| 21995 int nCell = NCELL(pNode); |
| 21996 |
| 21997 for(ii=0; rc==SQLITE_OK && ii<nCell; ii++){ |
| 21998 RtreeNode *pInsert; |
| 21999 RtreeCell cell; |
| 22000 nodeGetCell(pRtree, pNode, ii, &cell); |
| 22001 |
| 22002 /* Find a node to store this cell in. pNode->iNode currently contains |
| 22003 ** the height of the sub-tree headed by the cell. |
| 22004 */ |
| 22005 rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); |
| 22006 if( rc==SQLITE_OK ){ |
| 22007 int rc2; |
| 22008 rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); |
| 22009 rc2 = nodeRelease(pRtree, pInsert); |
| 22010 if( rc==SQLITE_OK ){ |
| 22011 rc = rc2; |
| 22012 } |
| 22013 } |
| 22014 } |
| 22015 return rc; |
| 22016 } |
| 22017 |
| 22018 /* |
| 22019 ** Select a currently unused rowid for a new r-tree record. |
| 22020 */ |
| 22021 static int newRowid(Rtree *pRtree, i64 *piRowid){ |
| 22022 int rc; |
| 22023 sqlite3_bind_null(pRtree->pWriteRowid, 1); |
| 22024 sqlite3_bind_null(pRtree->pWriteRowid, 2); |
| 22025 sqlite3_step(pRtree->pWriteRowid); |
| 22026 rc = sqlite3_reset(pRtree->pWriteRowid); |
| 22027 *piRowid = sqlite3_last_insert_rowid(pRtree->db); |
| 22028 return rc; |
| 22029 } |
| 22030 |
| 22031 /* |
| 22032 ** Remove the entry with rowid=iDelete from the r-tree structure. |
| 22033 */ |
| 22034 static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ |
| 22035 int rc; /* Return code */ |
| 22036 RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ |
| 22037 int iCell; /* Index of iDelete cell in pLeaf */ |
| 22038 RtreeNode *pRoot; /* Root node of rtree structure */ |
| 22039 |
| 22040 |
| 22041 /* Obtain a reference to the root node to initialize Rtree.iDepth */ |
| 22042 rc = nodeAcquire(pRtree, 1, 0, &pRoot); |
| 22043 |
| 22044 /* Obtain a reference to the leaf node that contains the entry |
| 22045 ** about to be deleted. |
| 22046 */ |
| 22047 if( rc==SQLITE_OK ){ |
| 22048 rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); |
| 22049 } |
| 22050 |
| 22051 /* Delete the cell in question from the leaf node. */ |
| 22052 if( rc==SQLITE_OK ){ |
| 22053 int rc2; |
| 22054 rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); |
| 22055 if( rc==SQLITE_OK ){ |
| 22056 rc = deleteCell(pRtree, pLeaf, iCell, 0); |
| 22057 } |
| 22058 rc2 = nodeRelease(pRtree, pLeaf); |
| 22059 if( rc==SQLITE_OK ){ |
| 22060 rc = rc2; |
| 22061 } |
| 22062 } |
| 22063 |
| 22064 /* Delete the corresponding entry in the <rtree>_rowid table. */ |
| 22065 if( rc==SQLITE_OK ){ |
| 22066 sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); |
| 22067 sqlite3_step(pRtree->pDeleteRowid); |
| 22068 rc = sqlite3_reset(pRtree->pDeleteRowid); |
| 22069 } |
| 22070 |
| 22071 /* Check if the root node now has exactly one child. If so, remove |
| 22072 ** it, schedule the contents of the child for reinsertion and |
| 22073 ** reduce the tree height by one. |
| 22074 ** |
| 22075 ** This is equivalent to copying the contents of the child into |
| 22076 ** the root node (the operation that Gutman's paper says to perform |
| 22077 ** in this scenario). |
| 22078 */ |
| 22079 if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ |
| 22080 int rc2; |
| 22081 RtreeNode *pChild; |
| 22082 i64 iChild = nodeGetRowid(pRtree, pRoot, 0); |
| 22083 rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); |
| 22084 if( rc==SQLITE_OK ){ |
| 22085 rc = removeNode(pRtree, pChild, pRtree->iDepth-1); |
| 22086 } |
| 22087 rc2 = nodeRelease(pRtree, pChild); |
| 22088 if( rc==SQLITE_OK ) rc = rc2; |
| 22089 if( rc==SQLITE_OK ){ |
| 22090 pRtree->iDepth--; |
| 22091 writeInt16(pRoot->zData, pRtree->iDepth); |
| 22092 pRoot->isDirty = 1; |
| 22093 } |
| 22094 } |
| 22095 |
| 22096 /* Re-insert the contents of any underfull nodes removed from the tree. */ |
| 22097 for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ |
| 22098 if( rc==SQLITE_OK ){ |
| 22099 rc = reinsertNodeContent(pRtree, pLeaf); |
| 22100 } |
| 22101 pRtree->pDeleted = pLeaf->pNext; |
| 22102 sqlite3_free(pLeaf); |
| 22103 } |
| 22104 |
| 22105 /* Release the reference to the root node. */ |
| 22106 if( rc==SQLITE_OK ){ |
| 22107 rc = nodeRelease(pRtree, pRoot); |
| 22108 }else{ |
| 22109 nodeRelease(pRtree, pRoot); |
| 22110 } |
| 22111 |
| 22112 return rc; |
| 22113 } |
| 22114 |
| 22115 /* |
| 22116 ** Rounding constants for float->double conversion. |
| 22117 */ |
| 22118 #define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ |
| 22119 #define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ |
| 22120 |
| 22121 #if !defined(SQLITE_RTREE_INT_ONLY) |
| 22122 /* |
| 22123 ** Convert an sqlite3_value into an RtreeValue (presumably a float) |
| 22124 ** while taking care to round toward negative or positive, respectively. |
| 22125 */ |
| 22126 static RtreeValue rtreeValueDown(sqlite3_value *v){ |
| 22127 double d = sqlite3_value_double(v); |
| 22128 float f = (float)d; |
| 22129 if( f>d ){ |
| 22130 f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); |
| 22131 } |
| 22132 return f; |
| 22133 } |
| 22134 static RtreeValue rtreeValueUp(sqlite3_value *v){ |
| 22135 double d = sqlite3_value_double(v); |
| 22136 float f = (float)d; |
| 22137 if( f<d ){ |
| 22138 f = (float)(d*(d<0 ? RNDTOWARDS : RNDAWAY)); |
| 22139 } |
| 22140 return f; |
| 22141 } |
| 22142 #endif /* !defined(SQLITE_RTREE_INT_ONLY) */ |
| 22143 |
| 22144 /* |
| 22145 ** A constraint has failed while inserting a row into an rtree table. |
| 22146 ** Assuming no OOM error occurs, this function sets the error message |
| 22147 ** (at pRtree->base.zErrMsg) to an appropriate value and returns |
| 22148 ** SQLITE_CONSTRAINT. |
| 22149 ** |
| 22150 ** Parameter iCol is the index of the leftmost column involved in the |
| 22151 ** constraint failure. If it is 0, then the constraint that failed is |
| 22152 ** the unique constraint on the id column. Otherwise, it is the rtree |
| 22153 ** (c1<=c2) constraint on columns iCol and iCol+1 that has failed. |
| 22154 ** |
| 22155 ** If an OOM occurs, SQLITE_NOMEM is returned instead of SQLITE_CONSTRAINT. |
| 22156 */ |
| 22157 static int rtreeConstraintError(Rtree *pRtree, int iCol){ |
| 22158 sqlite3_stmt *pStmt = 0; |
| 22159 char *zSql; |
| 22160 int rc; |
| 22161 |
| 22162 assert( iCol==0 || iCol%2 ); |
| 22163 zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", pRtree->zDb, pRtree->zName); |
| 22164 if( zSql ){ |
| 22165 rc = sqlite3_prepare_v2(pRtree->db, zSql, -1, &pStmt, 0); |
| 22166 }else{ |
| 22167 rc = SQLITE_NOMEM; |
| 22168 } |
| 22169 sqlite3_free(zSql); |
| 22170 |
| 22171 if( rc==SQLITE_OK ){ |
| 22172 if( iCol==0 ){ |
| 22173 const char *zCol = sqlite3_column_name(pStmt, 0); |
| 22174 pRtree->base.zErrMsg = sqlite3_mprintf( |
| 22175 "UNIQUE constraint failed: %s.%s", pRtree->zName, zCol |
| 22176 ); |
| 22177 }else{ |
| 22178 const char *zCol1 = sqlite3_column_name(pStmt, iCol); |
| 22179 const char *zCol2 = sqlite3_column_name(pStmt, iCol+1); |
| 22180 pRtree->base.zErrMsg = sqlite3_mprintf( |
| 22181 "rtree constraint failed: %s.(%s<=%s)", pRtree->zName, zCol1, zCol2 |
| 22182 ); |
| 22183 } |
| 22184 } |
| 22185 |
| 22186 sqlite3_finalize(pStmt); |
| 22187 return (rc==SQLITE_OK ? SQLITE_CONSTRAINT : rc); |
| 22188 } |
| 22189 |
| 22190 |
| 22191 |
| 22192 /* |
| 22193 ** The xUpdate method for rtree module virtual tables. |
| 22194 */ |
| 22195 static int rtreeUpdate( |
| 22196 sqlite3_vtab *pVtab, |
| 22197 int nData, |
| 22198 sqlite3_value **azData, |
| 22199 sqlite_int64 *pRowid |
| 22200 ){ |
| 22201 Rtree *pRtree = (Rtree *)pVtab; |
| 22202 int rc = SQLITE_OK; |
| 22203 RtreeCell cell; /* New cell to insert if nData>1 */ |
| 22204 int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ |
| 22205 |
| 22206 rtreeReference(pRtree); |
| 22207 assert(nData>=1); |
| 22208 |
| 22209 cell.iRowid = 0; /* Used only to suppress a compiler warning */ |
| 22210 |
| 22211 /* Constraint handling. A write operation on an r-tree table may return |
| 22212 ** SQLITE_CONSTRAINT for two reasons: |
| 22213 ** |
| 22214 ** 1. A duplicate rowid value, or |
| 22215 ** 2. The supplied data violates the "x2>=x1" constraint. |
| 22216 ** |
| 22217 ** In the first case, if the conflict-handling mode is REPLACE, then |
| 22218 ** the conflicting row can be removed before proceeding. In the second |
| 22219 ** case, SQLITE_CONSTRAINT must be returned regardless of the |
| 22220 ** conflict-handling mode specified by the user. |
| 22221 */ |
| 22222 if( nData>1 ){ |
| 22223 int ii; |
| 22224 |
| 22225 /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. |
| 22226 ** |
| 22227 ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared |
| 22228 ** with "column" that are interpreted as table constraints. |
| 22229 ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); |
| 22230 ** This problem was discovered after years of use, so we silently ignore |
| 22231 ** these kinds of misdeclared tables to avoid breaking any legacy. |
| 22232 */ |
| 22233 assert( nData<=(pRtree->nDim2 + 3) ); |
| 22234 |
| 22235 #ifndef SQLITE_RTREE_INT_ONLY |
| 22236 if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ |
| 22237 for(ii=0; ii<nData-4; ii+=2){ |
| 22238 cell.aCoord[ii].f = rtreeValueDown(azData[ii+3]); |
| 22239 cell.aCoord[ii+1].f = rtreeValueUp(azData[ii+4]); |
| 22240 if( cell.aCoord[ii].f>cell.aCoord[ii+1].f ){ |
| 22241 rc = rtreeConstraintError(pRtree, ii+1); |
| 22242 goto constraint; |
| 22243 } |
| 22244 } |
| 22245 }else |
| 22246 #endif |
| 22247 { |
| 22248 for(ii=0; ii<nData-4; ii+=2){ |
| 22249 cell.aCoord[ii].i = sqlite3_value_int(azData[ii+3]); |
| 22250 cell.aCoord[ii+1].i = sqlite3_value_int(azData[ii+4]); |
| 22251 if( cell.aCoord[ii].i>cell.aCoord[ii+1].i ){ |
| 22252 rc = rtreeConstraintError(pRtree, ii+1); |
| 22253 goto constraint; |
| 22254 } |
| 22255 } |
| 22256 } |
| 22257 |
| 22258 /* If a rowid value was supplied, check if it is already present in |
| 22259 ** the table. If so, the constraint has failed. */ |
| 22260 if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){ |
| 22261 cell.iRowid = sqlite3_value_int64(azData[2]); |
| 22262 if( sqlite3_value_type(azData[0])==SQLITE_NULL |
| 22263 || sqlite3_value_int64(azData[0])!=cell.iRowid |
| 22264 ){ |
| 22265 int steprc; |
| 22266 sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); |
| 22267 steprc = sqlite3_step(pRtree->pReadRowid); |
| 22268 rc = sqlite3_reset(pRtree->pReadRowid); |
| 22269 if( SQLITE_ROW==steprc ){ |
| 22270 if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ |
| 22271 rc = rtreeDeleteRowid(pRtree, cell.iRowid); |
| 22272 }else{ |
| 22273 rc = rtreeConstraintError(pRtree, 0); |
| 22274 goto constraint; |
| 22275 } |
| 22276 } |
| 22277 } |
| 22278 bHaveRowid = 1; |
| 22279 } |
| 22280 } |
| 22281 |
| 22282 /* If azData[0] is not an SQL NULL value, it is the rowid of a |
| 22283 ** record to delete from the r-tree table. The following block does |
| 22284 ** just that. |
| 22285 */ |
| 22286 if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){ |
| 22287 rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0])); |
| 22288 } |
| 22289 |
| 22290 /* If the azData[] array contains more than one element, elements |
| 22291 ** (azData[2]..azData[argc-1]) contain a new record to insert into |
| 22292 ** the r-tree structure. |
| 22293 */ |
| 22294 if( rc==SQLITE_OK && nData>1 ){ |
| 22295 /* Insert the new record into the r-tree */ |
| 22296 RtreeNode *pLeaf = 0; |
| 22297 |
| 22298 /* Figure out the rowid of the new row. */ |
| 22299 if( bHaveRowid==0 ){ |
| 22300 rc = newRowid(pRtree, &cell.iRowid); |
| 22301 } |
| 22302 *pRowid = cell.iRowid; |
| 22303 |
| 22304 if( rc==SQLITE_OK ){ |
| 22305 rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); |
| 22306 } |
| 22307 if( rc==SQLITE_OK ){ |
| 22308 int rc2; |
| 22309 pRtree->iReinsertHeight = -1; |
| 22310 rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); |
| 22311 rc2 = nodeRelease(pRtree, pLeaf); |
| 22312 if( rc==SQLITE_OK ){ |
| 22313 rc = rc2; |
| 22314 } |
| 22315 } |
| 22316 } |
| 22317 |
| 22318 constraint: |
| 22319 rtreeRelease(pRtree); |
| 22320 return rc; |
| 22321 } |
| 22322 |
| 22323 /* |
| 22324 ** Called when a transaction starts. |
| 22325 */ |
| 22326 static int rtreeBeginTransaction(sqlite3_vtab *pVtab){ |
| 22327 Rtree *pRtree = (Rtree *)pVtab; |
| 22328 assert( pRtree->inWrTrans==0 ); |
| 22329 pRtree->inWrTrans++; |
| 22330 return SQLITE_OK; |
| 22331 } |
| 22332 |
| 22333 /* |
| 22334 ** Called when a transaction completes (either by COMMIT or ROLLBACK). |
| 22335 ** The sqlite3_blob object should be released at this point. |
| 22336 */ |
| 22337 static int rtreeEndTransaction(sqlite3_vtab *pVtab){ |
| 22338 Rtree *pRtree = (Rtree *)pVtab; |
| 22339 pRtree->inWrTrans = 0; |
| 22340 nodeBlobReset(pRtree); |
| 22341 return SQLITE_OK; |
| 22342 } |
| 22343 |
| 22344 /* |
| 22345 ** The xRename method for rtree module virtual tables. |
| 22346 */ |
| 22347 static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ |
| 22348 Rtree *pRtree = (Rtree *)pVtab; |
| 22349 int rc = SQLITE_NOMEM; |
| 22350 char *zSql = sqlite3_mprintf( |
| 22351 "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" |
| 22352 "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" |
| 22353 "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" |
| 22354 , pRtree->zDb, pRtree->zName, zNewName |
| 22355 , pRtree->zDb, pRtree->zName, zNewName |
| 22356 , pRtree->zDb, pRtree->zName, zNewName |
| 22357 ); |
| 22358 if( zSql ){ |
| 22359 rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); |
| 22360 sqlite3_free(zSql); |
| 22361 } |
| 22362 return rc; |
| 22363 } |
| 22364 |
| 22365 |
| 22366 /* |
| 22367 ** This function populates the pRtree->nRowEst variable with an estimate |
| 22368 ** of the number of rows in the virtual table. If possible, this is based |
| 22369 ** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. |
| 22370 */ |
| 22371 static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ |
| 22372 const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; |
| 22373 char *zSql; |
| 22374 sqlite3_stmt *p; |
| 22375 int rc; |
| 22376 i64 nRow = 0; |
| 22377 |
| 22378 rc = sqlite3_table_column_metadata( |
| 22379 db, pRtree->zDb, "sqlite_stat1",0,0,0,0,0,0 |
| 22380 ); |
| 22381 if( rc!=SQLITE_OK ){ |
| 22382 pRtree->nRowEst = RTREE_DEFAULT_ROWEST; |
| 22383 return rc==SQLITE_ERROR ? SQLITE_OK : rc; |
| 22384 } |
| 22385 zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); |
| 22386 if( zSql==0 ){ |
| 22387 rc = SQLITE_NOMEM; |
| 22388 }else{ |
| 22389 rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); |
| 22390 if( rc==SQLITE_OK ){ |
| 22391 if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); |
| 22392 rc = sqlite3_finalize(p); |
| 22393 }else if( rc!=SQLITE_NOMEM ){ |
| 22394 rc = SQLITE_OK; |
| 22395 } |
| 22396 |
| 22397 if( rc==SQLITE_OK ){ |
| 22398 if( nRow==0 ){ |
| 22399 pRtree->nRowEst = RTREE_DEFAULT_ROWEST; |
| 22400 }else{ |
| 22401 pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); |
| 22402 } |
| 22403 } |
| 22404 sqlite3_free(zSql); |
| 22405 } |
| 22406 |
| 22407 return rc; |
| 22408 } |
| 22409 |
| 22410 static sqlite3_module rtreeModule = { |
| 22411 0, /* iVersion */ |
| 22412 rtreeCreate, /* xCreate - create a table */ |
| 22413 rtreeConnect, /* xConnect - connect to an existing table */ |
| 22414 rtreeBestIndex, /* xBestIndex - Determine search strategy */ |
| 22415 rtreeDisconnect, /* xDisconnect - Disconnect from a table */ |
| 22416 rtreeDestroy, /* xDestroy - Drop a table */ |
| 22417 rtreeOpen, /* xOpen - open a cursor */ |
| 22418 rtreeClose, /* xClose - close a cursor */ |
| 22419 rtreeFilter, /* xFilter - configure scan constraints */ |
| 22420 rtreeNext, /* xNext - advance a cursor */ |
| 22421 rtreeEof, /* xEof */ |
| 22422 rtreeColumn, /* xColumn - read data */ |
| 22423 rtreeRowid, /* xRowid - read data */ |
| 22424 rtreeUpdate, /* xUpdate - write data */ |
| 22425 rtreeBeginTransaction, /* xBegin - begin transaction */ |
| 22426 rtreeEndTransaction, /* xSync - sync transaction */ |
| 22427 rtreeEndTransaction, /* xCommit - commit transaction */ |
| 22428 rtreeEndTransaction, /* xRollback - rollback transaction */ |
| 22429 0, /* xFindFunction - function overloading */ |
| 22430 rtreeRename, /* xRename - rename the table */ |
| 22431 0, /* xSavepoint */ |
| 22432 0, /* xRelease */ |
| 22433 0, /* xRollbackTo */ |
| 22434 }; |
| 22435 |
| 22436 static int rtreeSqlInit( |
| 22437 Rtree *pRtree, |
| 22438 sqlite3 *db, |
| 22439 const char *zDb, |
| 22440 const char *zPrefix, |
| 22441 int isCreate |
| 22442 ){ |
| 22443 int rc = SQLITE_OK; |
| 22444 |
| 22445 #define N_STATEMENT 8 |
| 22446 static const char *azSql[N_STATEMENT] = { |
| 22447 /* Write the xxx_node table */ |
| 22448 "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)", |
| 22449 "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1", |
| 22450 |
| 22451 /* Read and write the xxx_rowid table */ |
| 22452 "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1", |
| 22453 "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)", |
| 22454 "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1", |
| 22455 |
| 22456 /* Read and write the xxx_parent table */ |
| 22457 "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1", |
| 22458 "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)", |
| 22459 "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1" |
| 22460 }; |
| 22461 sqlite3_stmt **appStmt[N_STATEMENT]; |
| 22462 int i; |
| 22463 |
| 22464 pRtree->db = db; |
| 22465 |
| 22466 if( isCreate ){ |
| 22467 char *zCreate = sqlite3_mprintf( |
| 22468 "CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);" |
| 22469 "CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);" |
| 22470 "CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY," |
| 22471 " parentnode INTEGER);" |
| 22472 "INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))", |
| 22473 zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize |
| 22474 ); |
| 22475 if( !zCreate ){ |
| 22476 return SQLITE_NOMEM; |
| 22477 } |
| 22478 rc = sqlite3_exec(db, zCreate, 0, 0, 0); |
| 22479 sqlite3_free(zCreate); |
| 22480 if( rc!=SQLITE_OK ){ |
| 22481 return rc; |
| 22482 } |
| 22483 } |
| 22484 |
| 22485 appStmt[0] = &pRtree->pWriteNode; |
| 22486 appStmt[1] = &pRtree->pDeleteNode; |
| 22487 appStmt[2] = &pRtree->pReadRowid; |
| 22488 appStmt[3] = &pRtree->pWriteRowid; |
| 22489 appStmt[4] = &pRtree->pDeleteRowid; |
| 22490 appStmt[5] = &pRtree->pReadParent; |
| 22491 appStmt[6] = &pRtree->pWriteParent; |
| 22492 appStmt[7] = &pRtree->pDeleteParent; |
| 22493 |
| 22494 rc = rtreeQueryStat1(db, pRtree); |
| 22495 for(i=0; i<N_STATEMENT && rc==SQLITE_OK; i++){ |
| 22496 char *zSql = sqlite3_mprintf(azSql[i], zDb, zPrefix); |
| 22497 if( zSql ){ |
| 22498 rc = sqlite3_prepare_v2(db, zSql, -1, appStmt[i], 0); |
| 22499 }else{ |
| 22500 rc = SQLITE_NOMEM; |
| 22501 } |
| 22502 sqlite3_free(zSql); |
| 22503 } |
| 22504 |
| 22505 return rc; |
| 22506 } |
| 22507 |
| 22508 /* |
| 22509 ** The second argument to this function contains the text of an SQL statement |
| 22510 ** that returns a single integer value. The statement is compiled and executed |
| 22511 ** using database connection db. If successful, the integer value returned |
| 22512 ** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error |
| 22513 ** code is returned and the value of *piVal after returning is not defined. |
| 22514 */ |
| 22515 static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ |
| 22516 int rc = SQLITE_NOMEM; |
| 22517 if( zSql ){ |
| 22518 sqlite3_stmt *pStmt = 0; |
| 22519 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 22520 if( rc==SQLITE_OK ){ |
| 22521 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 22522 *piVal = sqlite3_column_int(pStmt, 0); |
| 22523 } |
| 22524 rc = sqlite3_finalize(pStmt); |
| 22525 } |
| 22526 } |
| 22527 return rc; |
| 22528 } |
| 22529 |
| 22530 /* |
| 22531 ** This function is called from within the xConnect() or xCreate() method to |
| 22532 ** determine the node-size used by the rtree table being created or connected |
| 22533 ** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. |
| 22534 ** Otherwise, an SQLite error code is returned. |
| 22535 ** |
| 22536 ** If this function is being called as part of an xConnect(), then the rtree |
| 22537 ** table already exists. In this case the node-size is determined by inspecting |
| 22538 ** the root node of the tree. |
| 22539 ** |
| 22540 ** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. |
| 22541 ** This ensures that each node is stored on a single database page. If the |
| 22542 ** database page-size is so large that more than RTREE_MAXCELLS entries |
| 22543 ** would fit in a single node, use a smaller node-size. |
| 22544 */ |
| 22545 static int getNodeSize( |
| 22546 sqlite3 *db, /* Database handle */ |
| 22547 Rtree *pRtree, /* Rtree handle */ |
| 22548 int isCreate, /* True for xCreate, false for xConnect */ |
| 22549 char **pzErr /* OUT: Error message, if any */ |
| 22550 ){ |
| 22551 int rc; |
| 22552 char *zSql; |
| 22553 if( isCreate ){ |
| 22554 int iPageSize = 0; |
| 22555 zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); |
| 22556 rc = getIntFromStmt(db, zSql, &iPageSize); |
| 22557 if( rc==SQLITE_OK ){ |
| 22558 pRtree->iNodeSize = iPageSize-64; |
| 22559 if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){ |
| 22560 pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; |
| 22561 } |
| 22562 }else{ |
| 22563 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22564 } |
| 22565 }else{ |
| 22566 zSql = sqlite3_mprintf( |
| 22567 "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", |
| 22568 pRtree->zDb, pRtree->zName |
| 22569 ); |
| 22570 rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); |
| 22571 if( rc!=SQLITE_OK ){ |
| 22572 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22573 } |
| 22574 } |
| 22575 |
| 22576 sqlite3_free(zSql); |
| 22577 return rc; |
| 22578 } |
| 22579 |
| 22580 /* |
| 22581 ** This function is the implementation of both the xConnect and xCreate |
| 22582 ** methods of the r-tree virtual table. |
| 22583 ** |
| 22584 ** argv[0] -> module name |
| 22585 ** argv[1] -> database name |
| 22586 ** argv[2] -> table name |
| 22587 ** argv[...] -> column names... |
| 22588 */ |
| 22589 static int rtreeInit( |
| 22590 sqlite3 *db, /* Database connection */ |
| 22591 void *pAux, /* One of the RTREE_COORD_* constants */ |
| 22592 int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ |
| 22593 sqlite3_vtab **ppVtab, /* OUT: New virtual table */ |
| 22594 char **pzErr, /* OUT: Error message, if any */ |
| 22595 int isCreate /* True for xCreate, false for xConnect */ |
| 22596 ){ |
| 22597 int rc = SQLITE_OK; |
| 22598 Rtree *pRtree; |
| 22599 int nDb; /* Length of string argv[1] */ |
| 22600 int nName; /* Length of string argv[2] */ |
| 22601 int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); |
| 22602 |
| 22603 const char *aErrMsg[] = { |
| 22604 0, /* 0 */ |
| 22605 "Wrong number of columns for an rtree table", /* 1 */ |
| 22606 "Too few columns for an rtree table", /* 2 */ |
| 22607 "Too many columns for an rtree table" /* 3 */ |
| 22608 }; |
| 22609 |
| 22610 int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2; |
| 22611 if( aErrMsg[iErr] ){ |
| 22612 *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); |
| 22613 return SQLITE_ERROR; |
| 22614 } |
| 22615 |
| 22616 sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); |
| 22617 |
| 22618 /* Allocate the sqlite3_vtab structure */ |
| 22619 nDb = (int)strlen(argv[1]); |
| 22620 nName = (int)strlen(argv[2]); |
| 22621 pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2); |
| 22622 if( !pRtree ){ |
| 22623 return SQLITE_NOMEM; |
| 22624 } |
| 22625 memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); |
| 22626 pRtree->nBusy = 1; |
| 22627 pRtree->base.pModule = &rtreeModule; |
| 22628 pRtree->zDb = (char *)&pRtree[1]; |
| 22629 pRtree->zName = &pRtree->zDb[nDb+1]; |
| 22630 pRtree->nDim = (u8)((argc-4)/2); |
| 22631 pRtree->nDim2 = pRtree->nDim*2; |
| 22632 pRtree->nBytesPerCell = 8 + pRtree->nDim2*4; |
| 22633 pRtree->eCoordType = (u8)eCoordType; |
| 22634 memcpy(pRtree->zDb, argv[1], nDb); |
| 22635 memcpy(pRtree->zName, argv[2], nName); |
| 22636 |
| 22637 /* Figure out the node size to use. */ |
| 22638 rc = getNodeSize(db, pRtree, isCreate, pzErr); |
| 22639 |
| 22640 /* Create/Connect to the underlying relational database schema. If |
| 22641 ** that is successful, call sqlite3_declare_vtab() to configure |
| 22642 ** the r-tree table schema. |
| 22643 */ |
| 22644 if( rc==SQLITE_OK ){ |
| 22645 if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){ |
| 22646 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22647 }else{ |
| 22648 char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]); |
| 22649 char *zTmp; |
| 22650 int ii; |
| 22651 for(ii=4; zSql && ii<argc; ii++){ |
| 22652 zTmp = zSql; |
| 22653 zSql = sqlite3_mprintf("%s, %s", zTmp, argv[ii]); |
| 22654 sqlite3_free(zTmp); |
| 22655 } |
| 22656 if( zSql ){ |
| 22657 zTmp = zSql; |
| 22658 zSql = sqlite3_mprintf("%s);", zTmp); |
| 22659 sqlite3_free(zTmp); |
| 22660 } |
| 22661 if( !zSql ){ |
| 22662 rc = SQLITE_NOMEM; |
| 22663 }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){ |
| 22664 *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); |
| 22665 } |
| 22666 sqlite3_free(zSql); |
| 22667 } |
| 22668 } |
| 22669 |
| 22670 if( rc==SQLITE_OK ){ |
| 22671 *ppVtab = (sqlite3_vtab *)pRtree; |
| 22672 }else{ |
| 22673 assert( *ppVtab==0 ); |
| 22674 assert( pRtree->nBusy==1 ); |
| 22675 rtreeRelease(pRtree); |
| 22676 } |
| 22677 return rc; |
| 22678 } |
| 22679 |
| 22680 |
| 22681 /* |
| 22682 ** Implementation of a scalar function that decodes r-tree nodes to |
| 22683 ** human readable strings. This can be used for debugging and analysis. |
| 22684 ** |
| 22685 ** The scalar function takes two arguments: (1) the number of dimensions |
| 22686 ** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing |
| 22687 ** an r-tree node. For a two-dimensional r-tree structure called "rt", to |
| 22688 ** deserialize all nodes, a statement like: |
| 22689 ** |
| 22690 ** SELECT rtreenode(2, data) FROM rt_node; |
| 22691 ** |
| 22692 ** The human readable string takes the form of a Tcl list with one |
| 22693 ** entry for each cell in the r-tree node. Each entry is itself a |
| 22694 ** list, containing the 8-byte rowid/pageno followed by the |
| 22695 ** <num-dimension>*2 coordinates. |
| 22696 */ |
| 22697 static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ |
| 22698 char *zText = 0; |
| 22699 RtreeNode node; |
| 22700 Rtree tree; |
| 22701 int ii; |
| 22702 |
| 22703 UNUSED_PARAMETER(nArg); |
| 22704 memset(&node, 0, sizeof(RtreeNode)); |
| 22705 memset(&tree, 0, sizeof(Rtree)); |
| 22706 tree.nDim = (u8)sqlite3_value_int(apArg[0]); |
| 22707 tree.nDim2 = tree.nDim*2; |
| 22708 tree.nBytesPerCell = 8 + 8 * tree.nDim; |
| 22709 node.zData = (u8 *)sqlite3_value_blob(apArg[1]); |
| 22710 |
| 22711 for(ii=0; ii<NCELL(&node); ii++){ |
| 22712 char zCell[512]; |
| 22713 int nCell = 0; |
| 22714 RtreeCell cell; |
| 22715 int jj; |
| 22716 |
| 22717 nodeGetCell(&tree, &node, ii, &cell); |
| 22718 sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid); |
| 22719 nCell = (int)strlen(zCell); |
| 22720 for(jj=0; jj<tree.nDim2; jj++){ |
| 22721 #ifndef SQLITE_RTREE_INT_ONLY |
| 22722 sqlite3_snprintf(512-nCell,&zCell[nCell], " %g", |
| 22723 (double)cell.aCoord[jj].f); |
| 22724 #else |
| 22725 sqlite3_snprintf(512-nCell,&zCell[nCell], " %d", |
| 22726 cell.aCoord[jj].i); |
| 22727 #endif |
| 22728 nCell = (int)strlen(zCell); |
| 22729 } |
| 22730 |
| 22731 if( zText ){ |
| 22732 char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell); |
| 22733 sqlite3_free(zText); |
| 22734 zText = zTextNew; |
| 22735 }else{ |
| 22736 zText = sqlite3_mprintf("{%s}", zCell); |
| 22737 } |
| 22738 } |
| 22739 |
| 22740 sqlite3_result_text(ctx, zText, -1, sqlite3_free); |
| 22741 } |
| 22742 |
| 22743 /* This routine implements an SQL function that returns the "depth" parameter |
| 22744 ** from the front of a blob that is an r-tree node. For example: |
| 22745 ** |
| 22746 ** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; |
| 22747 ** |
| 22748 ** The depth value is 0 for all nodes other than the root node, and the root |
| 22749 ** node always has nodeno=1, so the example above is the primary use for this |
| 22750 ** routine. This routine is intended for testing and analysis only. |
| 22751 */ |
| 22752 static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ |
| 22753 UNUSED_PARAMETER(nArg); |
| 22754 if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB |
| 22755 || sqlite3_value_bytes(apArg[0])<2 |
| 22756 ){ |
| 22757 sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); |
| 22758 }else{ |
| 22759 u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]); |
| 22760 sqlite3_result_int(ctx, readInt16(zBlob)); |
| 22761 } |
| 22762 } |
| 22763 |
| 22764 /* |
| 22765 ** Register the r-tree module with database handle db. This creates the |
| 22766 ** virtual table module "rtree" and the debugging/analysis scalar |
| 22767 ** function "rtreenode". |
| 22768 */ |
| 22769 SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){ |
| 22770 const int utf8 = SQLITE_UTF8; |
| 22771 int rc; |
| 22772 |
| 22773 rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); |
| 22774 if( rc==SQLITE_OK ){ |
| 22775 rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); |
| 22776 } |
| 22777 if( rc==SQLITE_OK ){ |
| 22778 #ifdef SQLITE_RTREE_INT_ONLY |
| 22779 void *c = (void *)RTREE_COORD_INT32; |
| 22780 #else |
| 22781 void *c = (void *)RTREE_COORD_REAL32; |
| 22782 #endif |
| 22783 rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0); |
| 22784 } |
| 22785 if( rc==SQLITE_OK ){ |
| 22786 void *c = (void *)RTREE_COORD_INT32; |
| 22787 rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); |
| 22788 } |
| 22789 |
| 22790 return rc; |
| 22791 } |
| 22792 |
| 22793 /* |
| 22794 ** This routine deletes the RtreeGeomCallback object that was attached |
| 22795 ** one of the SQL functions create by sqlite3_rtree_geometry_callback() |
| 22796 ** or sqlite3_rtree_query_callback(). In other words, this routine is the |
| 22797 ** destructor for an RtreeGeomCallback objecct. This routine is called when |
| 22798 ** the corresponding SQL function is deleted. |
| 22799 */ |
| 22800 static void rtreeFreeCallback(void *p){ |
| 22801 RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p; |
| 22802 if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext); |
| 22803 sqlite3_free(p); |
| 22804 } |
| 22805 |
| 22806 /* |
| 22807 ** This routine frees the BLOB that is returned by geomCallback(). |
| 22808 */ |
| 22809 static void rtreeMatchArgFree(void *pArg){ |
| 22810 int i; |
| 22811 RtreeMatchArg *p = (RtreeMatchArg*)pArg; |
| 22812 for(i=0; i<p->nParam; i++){ |
| 22813 sqlite3_value_free(p->apSqlParam[i]); |
| 22814 } |
| 22815 sqlite3_free(p); |
| 22816 } |
| 22817 |
| 22818 /* |
| 22819 ** Each call to sqlite3_rtree_geometry_callback() or |
| 22820 ** sqlite3_rtree_query_callback() creates an ordinary SQLite |
| 22821 ** scalar function that is implemented by this routine. |
| 22822 ** |
| 22823 ** All this function does is construct an RtreeMatchArg object that |
| 22824 ** contains the geometry-checking callback routines and a list of |
| 22825 ** parameters to this function, then return that RtreeMatchArg object |
| 22826 ** as a BLOB. |
| 22827 ** |
| 22828 ** The R-Tree MATCH operator will read the returned BLOB, deserialize |
| 22829 ** the RtreeMatchArg object, and use the RtreeMatchArg object to figure |
| 22830 ** out which elements of the R-Tree should be returned by the query. |
| 22831 */ |
| 22832 static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ |
| 22833 RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); |
| 22834 RtreeMatchArg *pBlob; |
| 22835 int nBlob; |
| 22836 int memErr = 0; |
| 22837 |
| 22838 nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue) |
| 22839 + nArg*sizeof(sqlite3_value*); |
| 22840 pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); |
| 22841 if( !pBlob ){ |
| 22842 sqlite3_result_error_nomem(ctx); |
| 22843 }else{ |
| 22844 int i; |
| 22845 pBlob->magic = RTREE_GEOMETRY_MAGIC; |
| 22846 pBlob->cb = pGeomCtx[0]; |
| 22847 pBlob->apSqlParam = (sqlite3_value**)&pBlob->aParam[nArg]; |
| 22848 pBlob->nParam = nArg; |
| 22849 for(i=0; i<nArg; i++){ |
| 22850 pBlob->apSqlParam[i] = sqlite3_value_dup(aArg[i]); |
| 22851 if( pBlob->apSqlParam[i]==0 ) memErr = 1; |
| 22852 #ifdef SQLITE_RTREE_INT_ONLY |
| 22853 pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); |
| 22854 #else |
| 22855 pBlob->aParam[i] = sqlite3_value_double(aArg[i]); |
| 22856 #endif |
| 22857 } |
| 22858 if( memErr ){ |
| 22859 sqlite3_result_error_nomem(ctx); |
| 22860 rtreeMatchArgFree(pBlob); |
| 22861 }else{ |
| 22862 sqlite3_result_blob(ctx, pBlob, nBlob, rtreeMatchArgFree); |
| 22863 } |
| 22864 } |
| 22865 } |
| 22866 |
| 22867 /* |
| 22868 ** Register a new geometry function for use with the r-tree MATCH operator. |
| 22869 */ |
| 22870 SQLITE_API int sqlite3_rtree_geometry_callback( |
| 22871 sqlite3 *db, /* Register SQL function on this connection */ |
| 22872 const char *zGeom, /* Name of the new SQL function */ |
| 22873 int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ |
| 22874 void *pContext /* Extra data associated with the callback */ |
| 22875 ){ |
| 22876 RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ |
| 22877 |
| 22878 /* Allocate and populate the context object. */ |
| 22879 pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); |
| 22880 if( !pGeomCtx ) return SQLITE_NOMEM; |
| 22881 pGeomCtx->xGeom = xGeom; |
| 22882 pGeomCtx->xQueryFunc = 0; |
| 22883 pGeomCtx->xDestructor = 0; |
| 22884 pGeomCtx->pContext = pContext; |
| 22885 return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, |
| 22886 (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback |
| 22887 ); |
| 22888 } |
| 22889 |
| 22890 /* |
| 22891 ** Register a new 2nd-generation geometry function for use with the |
| 22892 ** r-tree MATCH operator. |
| 22893 */ |
| 22894 SQLITE_API int sqlite3_rtree_query_callback( |
| 22895 sqlite3 *db, /* Register SQL function on this connection */ |
| 22896 const char *zQueryFunc, /* Name of new SQL function */ |
| 22897 int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ |
| 22898 void *pContext, /* Extra data passed into the callback */ |
| 22899 void (*xDestructor)(void*) /* Destructor for the extra data */ |
| 22900 ){ |
| 22901 RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ |
| 22902 |
| 22903 /* Allocate and populate the context object. */ |
| 22904 pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); |
| 22905 if( !pGeomCtx ) return SQLITE_NOMEM; |
| 22906 pGeomCtx->xGeom = 0; |
| 22907 pGeomCtx->xQueryFunc = xQueryFunc; |
| 22908 pGeomCtx->xDestructor = xDestructor; |
| 22909 pGeomCtx->pContext = pContext; |
| 22910 return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, |
| 22911 (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback |
| 22912 ); |
| 22913 } |
| 22914 |
| 22915 #if !SQLITE_CORE |
| 22916 #ifdef _WIN32 |
| 22917 __declspec(dllexport) |
| 22918 #endif |
| 22919 SQLITE_API int sqlite3_rtree_init( |
| 22920 sqlite3 *db, |
| 22921 char **pzErrMsg, |
| 22922 const sqlite3_api_routines *pApi |
| 22923 ){ |
| 22924 SQLITE_EXTENSION_INIT2(pApi) |
| 22925 return sqlite3RtreeInit(db); |
| 22926 } |
| 22927 #endif |
| 22928 |
| 22929 #endif |
| 22930 |
| 22931 /************** End of rtree.c ***********************************************/ |
| 22932 /************** Begin file icu.c *********************************************/ |
| 22933 /* |
| 22934 ** 2007 May 6 |
| 22935 ** |
| 22936 ** The author disclaims copyright to this source code. In place of |
| 22937 ** a legal notice, here is a blessing: |
| 22938 ** |
| 22939 ** May you do good and not evil. |
| 22940 ** May you find forgiveness for yourself and forgive others. |
| 22941 ** May you share freely, never taking more than you give. |
| 22942 ** |
| 22943 ************************************************************************* |
| 22944 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ |
| 22945 ** |
| 22946 ** This file implements an integration between the ICU library |
| 22947 ** ("International Components for Unicode", an open-source library |
| 22948 ** for handling unicode data) and SQLite. The integration uses |
| 22949 ** ICU to provide the following to SQLite: |
| 22950 ** |
| 22951 ** * An implementation of the SQL regexp() function (and hence REGEXP |
| 22952 ** operator) using the ICU uregex_XX() APIs. |
| 22953 ** |
| 22954 ** * Implementations of the SQL scalar upper() and lower() functions |
| 22955 ** for case mapping. |
| 22956 ** |
| 22957 ** * Integration of ICU and SQLite collation sequences. |
| 22958 ** |
| 22959 ** * An implementation of the LIKE operator that uses ICU to |
| 22960 ** provide case-independent matching. |
| 22961 */ |
| 22962 |
| 22963 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) |
| 22964 |
| 22965 /* Include ICU headers */ |
| 22966 #include <unicode/utypes.h> |
| 22967 #include <unicode/uregex.h> |
| 22968 #include <unicode/ustring.h> |
| 22969 #include <unicode/ucol.h> |
| 22970 |
| 22971 /* #include <assert.h> */ |
| 22972 |
| 22973 #ifndef SQLITE_CORE |
| 22974 /* #include "sqlite3ext.h" */ |
| 22975 SQLITE_EXTENSION_INIT1 |
| 22976 #else |
| 22977 /* #include "sqlite3.h" */ |
| 22978 #endif |
| 22979 |
| 22980 /* |
| 22981 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB |
| 22982 ** operator. |
| 22983 */ |
| 22984 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH |
| 22985 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 |
| 22986 #endif |
| 22987 |
| 22988 /* |
| 22989 ** Version of sqlite3_free() that is always a function, never a macro. |
| 22990 */ |
| 22991 static void xFree(void *p){ |
| 22992 sqlite3_free(p); |
| 22993 } |
| 22994 |
| 22995 /* |
| 22996 ** This lookup table is used to help decode the first byte of |
| 22997 ** a multi-byte UTF8 character. It is copied here from SQLite source |
| 22998 ** code file utf8.c. |
| 22999 */ |
| 23000 static const unsigned char icuUtf8Trans1[] = { |
| 23001 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 23002 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 23003 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 23004 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
| 23005 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 23006 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 23007 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 23008 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
| 23009 }; |
| 23010 |
| 23011 #define SQLITE_ICU_READ_UTF8(zIn, c) \ |
| 23012 c = *(zIn++); \ |
| 23013 if( c>=0xc0 ){ \ |
| 23014 c = icuUtf8Trans1[c-0xc0]; \ |
| 23015 while( (*zIn & 0xc0)==0x80 ){ \ |
| 23016 c = (c<<6) + (0x3f & *(zIn++)); \ |
| 23017 } \ |
| 23018 } |
| 23019 |
| 23020 #define SQLITE_ICU_SKIP_UTF8(zIn) \ |
| 23021 assert( *zIn ); \ |
| 23022 if( *(zIn++)>=0xc0 ){ \ |
| 23023 while( (*zIn & 0xc0)==0x80 ){zIn++;} \ |
| 23024 } |
| 23025 |
| 23026 |
| 23027 /* |
| 23028 ** Compare two UTF-8 strings for equality where the first string is |
| 23029 ** a "LIKE" expression. Return true (1) if they are the same and |
| 23030 ** false (0) if they are different. |
| 23031 */ |
| 23032 static int icuLikeCompare( |
| 23033 const uint8_t *zPattern, /* LIKE pattern */ |
| 23034 const uint8_t *zString, /* The UTF-8 string to compare against */ |
| 23035 const UChar32 uEsc /* The escape character */ |
| 23036 ){ |
| 23037 static const int MATCH_ONE = (UChar32)'_'; |
| 23038 static const int MATCH_ALL = (UChar32)'%'; |
| 23039 |
| 23040 int prevEscape = 0; /* True if the previous character was uEsc */ |
| 23041 |
| 23042 while( 1 ){ |
| 23043 |
| 23044 /* Read (and consume) the next character from the input pattern. */ |
| 23045 UChar32 uPattern; |
| 23046 SQLITE_ICU_READ_UTF8(zPattern, uPattern); |
| 23047 if( uPattern==0 ) break; |
| 23048 |
| 23049 /* There are now 4 possibilities: |
| 23050 ** |
| 23051 ** 1. uPattern is an unescaped match-all character "%", |
| 23052 ** 2. uPattern is an unescaped match-one character "_", |
| 23053 ** 3. uPattern is an unescaped escape character, or |
| 23054 ** 4. uPattern is to be handled as an ordinary character |
| 23055 */ |
| 23056 if( !prevEscape && uPattern==MATCH_ALL ){ |
| 23057 /* Case 1. */ |
| 23058 uint8_t c; |
| 23059 |
| 23060 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a |
| 23061 ** MATCH_ALL. For each MATCH_ONE, skip one character in the |
| 23062 ** test string. |
| 23063 */ |
| 23064 while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){ |
| 23065 if( c==MATCH_ONE ){ |
| 23066 if( *zString==0 ) return 0; |
| 23067 SQLITE_ICU_SKIP_UTF8(zString); |
| 23068 } |
| 23069 zPattern++; |
| 23070 } |
| 23071 |
| 23072 if( *zPattern==0 ) return 1; |
| 23073 |
| 23074 while( *zString ){ |
| 23075 if( icuLikeCompare(zPattern, zString, uEsc) ){ |
| 23076 return 1; |
| 23077 } |
| 23078 SQLITE_ICU_SKIP_UTF8(zString); |
| 23079 } |
| 23080 return 0; |
| 23081 |
| 23082 }else if( !prevEscape && uPattern==MATCH_ONE ){ |
| 23083 /* Case 2. */ |
| 23084 if( *zString==0 ) return 0; |
| 23085 SQLITE_ICU_SKIP_UTF8(zString); |
| 23086 |
| 23087 }else if( !prevEscape && uPattern==uEsc){ |
| 23088 /* Case 3. */ |
| 23089 prevEscape = 1; |
| 23090 |
| 23091 }else{ |
| 23092 /* Case 4. */ |
| 23093 UChar32 uString; |
| 23094 SQLITE_ICU_READ_UTF8(zString, uString); |
| 23095 uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); |
| 23096 uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); |
| 23097 if( uString!=uPattern ){ |
| 23098 return 0; |
| 23099 } |
| 23100 prevEscape = 0; |
| 23101 } |
| 23102 } |
| 23103 |
| 23104 return *zString==0; |
| 23105 } |
| 23106 |
| 23107 /* |
| 23108 ** Implementation of the like() SQL function. This function implements |
| 23109 ** the build-in LIKE operator. The first argument to the function is the |
| 23110 ** pattern and the second argument is the string. So, the SQL statements: |
| 23111 ** |
| 23112 ** A LIKE B |
| 23113 ** |
| 23114 ** is implemented as like(B, A). If there is an escape character E, |
| 23115 ** |
| 23116 ** A LIKE B ESCAPE E |
| 23117 ** |
| 23118 ** is mapped to like(B, A, E). |
| 23119 */ |
| 23120 static void icuLikeFunc( |
| 23121 sqlite3_context *context, |
| 23122 int argc, |
| 23123 sqlite3_value **argv |
| 23124 ){ |
| 23125 const unsigned char *zA = sqlite3_value_text(argv[0]); |
| 23126 const unsigned char *zB = sqlite3_value_text(argv[1]); |
| 23127 UChar32 uEsc = 0; |
| 23128 |
| 23129 /* Limit the length of the LIKE or GLOB pattern to avoid problems |
| 23130 ** of deep recursion and N*N behavior in patternCompare(). |
| 23131 */ |
| 23132 if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ |
| 23133 sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); |
| 23134 return; |
| 23135 } |
| 23136 |
| 23137 |
| 23138 if( argc==3 ){ |
| 23139 /* The escape character string must consist of a single UTF-8 character. |
| 23140 ** Otherwise, return an error. |
| 23141 */ |
| 23142 int nE= sqlite3_value_bytes(argv[2]); |
| 23143 const unsigned char *zE = sqlite3_value_text(argv[2]); |
| 23144 int i = 0; |
| 23145 if( zE==0 ) return; |
| 23146 U8_NEXT(zE, i, nE, uEsc); |
| 23147 if( i!=nE){ |
| 23148 sqlite3_result_error(context, |
| 23149 "ESCAPE expression must be a single character", -1); |
| 23150 return; |
| 23151 } |
| 23152 } |
| 23153 |
| 23154 if( zA && zB ){ |
| 23155 sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); |
| 23156 } |
| 23157 } |
| 23158 |
| 23159 /* |
| 23160 ** This function is called when an ICU function called from within |
| 23161 ** the implementation of an SQL scalar function returns an error. |
| 23162 ** |
| 23163 ** The scalar function context passed as the first argument is |
| 23164 ** loaded with an error message based on the following two args. |
| 23165 */ |
| 23166 static void icuFunctionError( |
| 23167 sqlite3_context *pCtx, /* SQLite scalar function context */ |
| 23168 const char *zName, /* Name of ICU function that failed */ |
| 23169 UErrorCode e /* Error code returned by ICU function */ |
| 23170 ){ |
| 23171 char zBuf[128]; |
| 23172 sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); |
| 23173 zBuf[127] = '\0'; |
| 23174 sqlite3_result_error(pCtx, zBuf, -1); |
| 23175 } |
| 23176 |
| 23177 /* |
| 23178 ** Function to delete compiled regexp objects. Registered as |
| 23179 ** a destructor function with sqlite3_set_auxdata(). |
| 23180 */ |
| 23181 static void icuRegexpDelete(void *p){ |
| 23182 URegularExpression *pExpr = (URegularExpression *)p; |
| 23183 uregex_close(pExpr); |
| 23184 } |
| 23185 |
| 23186 /* |
| 23187 ** Implementation of SQLite REGEXP operator. This scalar function takes |
| 23188 ** two arguments. The first is a regular expression pattern to compile |
| 23189 ** the second is a string to match against that pattern. If either |
| 23190 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result |
| 23191 ** is 1 if the string matches the pattern, or 0 otherwise. |
| 23192 ** |
| 23193 ** SQLite maps the regexp() function to the regexp() operator such |
| 23194 ** that the following two are equivalent: |
| 23195 ** |
| 23196 ** zString REGEXP zPattern |
| 23197 ** regexp(zPattern, zString) |
| 23198 ** |
| 23199 ** Uses the following ICU regexp APIs: |
| 23200 ** |
| 23201 ** uregex_open() |
| 23202 ** uregex_matches() |
| 23203 ** uregex_close() |
| 23204 */ |
| 23205 static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ |
| 23206 UErrorCode status = U_ZERO_ERROR; |
| 23207 URegularExpression *pExpr; |
| 23208 UBool res; |
| 23209 const UChar *zString = sqlite3_value_text16(apArg[1]); |
| 23210 |
| 23211 (void)nArg; /* Unused parameter */ |
| 23212 |
| 23213 /* If the left hand side of the regexp operator is NULL, |
| 23214 ** then the result is also NULL. |
| 23215 */ |
| 23216 if( !zString ){ |
| 23217 return; |
| 23218 } |
| 23219 |
| 23220 pExpr = sqlite3_get_auxdata(p, 0); |
| 23221 if( !pExpr ){ |
| 23222 const UChar *zPattern = sqlite3_value_text16(apArg[0]); |
| 23223 if( !zPattern ){ |
| 23224 return; |
| 23225 } |
| 23226 pExpr = uregex_open(zPattern, -1, 0, 0, &status); |
| 23227 |
| 23228 if( U_SUCCESS(status) ){ |
| 23229 sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); |
| 23230 }else{ |
| 23231 assert(!pExpr); |
| 23232 icuFunctionError(p, "uregex_open", status); |
| 23233 return; |
| 23234 } |
| 23235 } |
| 23236 |
| 23237 /* Configure the text that the regular expression operates on. */ |
| 23238 uregex_setText(pExpr, zString, -1, &status); |
| 23239 if( !U_SUCCESS(status) ){ |
| 23240 icuFunctionError(p, "uregex_setText", status); |
| 23241 return; |
| 23242 } |
| 23243 |
| 23244 /* Attempt the match */ |
| 23245 res = uregex_matches(pExpr, 0, &status); |
| 23246 if( !U_SUCCESS(status) ){ |
| 23247 icuFunctionError(p, "uregex_matches", status); |
| 23248 return; |
| 23249 } |
| 23250 |
| 23251 /* Set the text that the regular expression operates on to a NULL |
| 23252 ** pointer. This is not really necessary, but it is tidier than |
| 23253 ** leaving the regular expression object configured with an invalid |
| 23254 ** pointer after this function returns. |
| 23255 */ |
| 23256 uregex_setText(pExpr, 0, 0, &status); |
| 23257 |
| 23258 /* Return 1 or 0. */ |
| 23259 sqlite3_result_int(p, res ? 1 : 0); |
| 23260 } |
| 23261 |
| 23262 /* |
| 23263 ** Implementations of scalar functions for case mapping - upper() and |
| 23264 ** lower(). Function upper() converts its input to upper-case (ABC). |
| 23265 ** Function lower() converts to lower-case (abc). |
| 23266 ** |
| 23267 ** ICU provides two types of case mapping, "general" case mapping and |
| 23268 ** "language specific". Refer to ICU documentation for the differences |
| 23269 ** between the two. |
| 23270 ** |
| 23271 ** To utilise "general" case mapping, the upper() or lower() scalar |
| 23272 ** functions are invoked with one argument: |
| 23273 ** |
| 23274 ** upper('ABC') -> 'abc' |
| 23275 ** lower('abc') -> 'ABC' |
| 23276 ** |
| 23277 ** To access ICU "language specific" case mapping, upper() or lower() |
| 23278 ** should be invoked with two arguments. The second argument is the name |
| 23279 ** of the locale to use. Passing an empty string ("") or SQL NULL value |
| 23280 ** as the second argument is the same as invoking the 1 argument version |
| 23281 ** of upper() or lower(). |
| 23282 ** |
| 23283 ** lower('I', 'en_us') -> 'i' |
| 23284 ** lower('I', 'tr_tr') -> '\u131' (small dotless i) |
| 23285 ** |
| 23286 ** http://www.icu-project.org/userguide/posix.html#case_mappings |
| 23287 */ |
| 23288 static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ |
| 23289 const UChar *zInput; /* Pointer to input string */ |
| 23290 UChar *zOutput = 0; /* Pointer to output buffer */ |
| 23291 int nInput; /* Size of utf-16 input string in bytes */ |
| 23292 int nOut; /* Size of output buffer in bytes */ |
| 23293 int cnt; |
| 23294 int bToUpper; /* True for toupper(), false for tolower() */ |
| 23295 UErrorCode status; |
| 23296 const char *zLocale = 0; |
| 23297 |
| 23298 assert(nArg==1 || nArg==2); |
| 23299 bToUpper = (sqlite3_user_data(p)!=0); |
| 23300 if( nArg==2 ){ |
| 23301 zLocale = (const char *)sqlite3_value_text(apArg[1]); |
| 23302 } |
| 23303 |
| 23304 zInput = sqlite3_value_text16(apArg[0]); |
| 23305 if( !zInput ){ |
| 23306 return; |
| 23307 } |
| 23308 nOut = nInput = sqlite3_value_bytes16(apArg[0]); |
| 23309 if( nOut==0 ){ |
| 23310 sqlite3_result_text16(p, "", 0, SQLITE_STATIC); |
| 23311 return; |
| 23312 } |
| 23313 |
| 23314 for(cnt=0; cnt<2; cnt++){ |
| 23315 UChar *zNew = sqlite3_realloc(zOutput, nOut); |
| 23316 if( zNew==0 ){ |
| 23317 sqlite3_free(zOutput); |
| 23318 sqlite3_result_error_nomem(p); |
| 23319 return; |
| 23320 } |
| 23321 zOutput = zNew; |
| 23322 status = U_ZERO_ERROR; |
| 23323 if( bToUpper ){ |
| 23324 nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); |
| 23325 }else{ |
| 23326 nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); |
| 23327 } |
| 23328 |
| 23329 if( U_SUCCESS(status) ){ |
| 23330 sqlite3_result_text16(p, zOutput, nOut, xFree); |
| 23331 }else if( status==U_BUFFER_OVERFLOW_ERROR ){ |
| 23332 assert( cnt==0 ); |
| 23333 continue; |
| 23334 }else{ |
| 23335 icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status); |
| 23336 } |
| 23337 return; |
| 23338 } |
| 23339 assert( 0 ); /* Unreachable */ |
| 23340 } |
| 23341 |
| 23342 /* |
| 23343 ** Collation sequence destructor function. The pCtx argument points to |
| 23344 ** a UCollator structure previously allocated using ucol_open(). |
| 23345 */ |
| 23346 static void icuCollationDel(void *pCtx){ |
| 23347 UCollator *p = (UCollator *)pCtx; |
| 23348 ucol_close(p); |
| 23349 } |
| 23350 |
| 23351 /* |
| 23352 ** Collation sequence comparison function. The pCtx argument points to |
| 23353 ** a UCollator structure previously allocated using ucol_open(). |
| 23354 */ |
| 23355 static int icuCollationColl( |
| 23356 void *pCtx, |
| 23357 int nLeft, |
| 23358 const void *zLeft, |
| 23359 int nRight, |
| 23360 const void *zRight |
| 23361 ){ |
| 23362 UCollationResult res; |
| 23363 UCollator *p = (UCollator *)pCtx; |
| 23364 res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); |
| 23365 switch( res ){ |
| 23366 case UCOL_LESS: return -1; |
| 23367 case UCOL_GREATER: return +1; |
| 23368 case UCOL_EQUAL: return 0; |
| 23369 } |
| 23370 assert(!"Unexpected return value from ucol_strcoll()"); |
| 23371 return 0; |
| 23372 } |
| 23373 |
| 23374 /* |
| 23375 ** Implementation of the scalar function icu_load_collation(). |
| 23376 ** |
| 23377 ** This scalar function is used to add ICU collation based collation |
| 23378 ** types to an SQLite database connection. It is intended to be called |
| 23379 ** as follows: |
| 23380 ** |
| 23381 ** SELECT icu_load_collation(<locale>, <collation-name>); |
| 23382 ** |
| 23383 ** Where <locale> is a string containing an ICU locale identifier (i.e. |
| 23384 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the |
| 23385 ** collation sequence to create. |
| 23386 */ |
| 23387 static void icuLoadCollation( |
| 23388 sqlite3_context *p, |
| 23389 int nArg, |
| 23390 sqlite3_value **apArg |
| 23391 ){ |
| 23392 sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); |
| 23393 UErrorCode status = U_ZERO_ERROR; |
| 23394 const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ |
| 23395 const char *zName; /* SQL Collation sequence name (eg. "japanese") */ |
| 23396 UCollator *pUCollator; /* ICU library collation object */ |
| 23397 int rc; /* Return code from sqlite3_create_collation_x() */ |
| 23398 |
| 23399 assert(nArg==2); |
| 23400 (void)nArg; /* Unused parameter */ |
| 23401 zLocale = (const char *)sqlite3_value_text(apArg[0]); |
| 23402 zName = (const char *)sqlite3_value_text(apArg[1]); |
| 23403 |
| 23404 if( !zLocale || !zName ){ |
| 23405 return; |
| 23406 } |
| 23407 |
| 23408 pUCollator = ucol_open(zLocale, &status); |
| 23409 if( !U_SUCCESS(status) ){ |
| 23410 icuFunctionError(p, "ucol_open", status); |
| 23411 return; |
| 23412 } |
| 23413 assert(p); |
| 23414 |
| 23415 rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, |
| 23416 icuCollationColl, icuCollationDel |
| 23417 ); |
| 23418 if( rc!=SQLITE_OK ){ |
| 23419 ucol_close(pUCollator); |
| 23420 sqlite3_result_error(p, "Error registering collation function", -1); |
| 23421 } |
| 23422 } |
| 23423 |
| 23424 /* |
| 23425 ** Register the ICU extension functions with database db. |
| 23426 */ |
| 23427 SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ |
| 23428 static const struct IcuScalar { |
| 23429 const char *zName; /* Function name */ |
| 23430 unsigned char nArg; /* Number of arguments */ |
| 23431 unsigned short enc; /* Optimal text encoding */ |
| 23432 unsigned char iContext; /* sqlite3_user_data() context */ |
| 23433 void (*xFunc)(sqlite3_context*,int,sqlite3_value**); |
| 23434 } scalars[] = { |
| 23435 {"icu_load_collation", 2, SQLITE_UTF8, 1, icuLoadCollation}, |
| 23436 {"regexp", 2, SQLITE_ANY|SQLITE_DETERMINISTIC, 0, icuRegexpFunc}, |
| 23437 {"lower", 1, SQLITE_UTF16|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, |
| 23438 {"lower", 2, SQLITE_UTF16|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, |
| 23439 {"upper", 1, SQLITE_UTF16|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, |
| 23440 {"upper", 2, SQLITE_UTF16|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, |
| 23441 {"lower", 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, |
| 23442 {"lower", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuCaseFunc16}, |
| 23443 {"upper", 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, |
| 23444 {"upper", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 1, icuCaseFunc16}, |
| 23445 {"like", 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuLikeFunc}, |
| 23446 {"like", 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, icuLikeFunc}, |
| 23447 }; |
| 23448 int rc = SQLITE_OK; |
| 23449 int i; |
| 23450 |
| 23451 |
| 23452 for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ |
| 23453 const struct IcuScalar *p = &scalars[i]; |
| 23454 rc = sqlite3_create_function( |
| 23455 db, p->zName, p->nArg, p->enc, |
| 23456 p->iContext ? (void*)db : (void*)0, |
| 23457 p->xFunc, 0, 0 |
| 23458 ); |
| 23459 } |
| 23460 |
| 23461 return rc; |
| 23462 } |
| 23463 |
| 23464 #if !SQLITE_CORE |
| 23465 #ifdef _WIN32 |
| 23466 __declspec(dllexport) |
| 23467 #endif |
| 23468 SQLITE_API int sqlite3_icu_init( |
| 23469 sqlite3 *db, |
| 23470 char **pzErrMsg, |
| 23471 const sqlite3_api_routines *pApi |
| 23472 ){ |
| 23473 SQLITE_EXTENSION_INIT2(pApi) |
| 23474 return sqlite3IcuInit(db); |
| 23475 } |
| 23476 #endif |
| 23477 |
| 23478 #endif |
| 23479 |
| 23480 /************** End of icu.c *************************************************/ |
| 23481 /************** Begin file fts3_icu.c ****************************************/ |
| 23482 /* |
| 23483 ** 2007 June 22 |
| 23484 ** |
| 23485 ** The author disclaims copyright to this source code. In place of |
| 23486 ** a legal notice, here is a blessing: |
| 23487 ** |
| 23488 ** May you do good and not evil. |
| 23489 ** May you find forgiveness for yourself and forgive others. |
| 23490 ** May you share freely, never taking more than you give. |
| 23491 ** |
| 23492 ************************************************************************* |
| 23493 ** This file implements a tokenizer for fts3 based on the ICU library. |
| 23494 */ |
| 23495 /* #include "fts3Int.h" */ |
| 23496 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 23497 #ifdef SQLITE_ENABLE_ICU |
| 23498 |
| 23499 /* #include <assert.h> */ |
| 23500 /* #include <string.h> */ |
| 23501 /* #include "fts3_tokenizer.h" */ |
| 23502 |
| 23503 #include <unicode/ubrk.h> |
| 23504 /* #include <unicode/ucol.h> */ |
| 23505 /* #include <unicode/ustring.h> */ |
| 23506 #include <unicode/utf16.h> |
| 23507 |
| 23508 typedef struct IcuTokenizer IcuTokenizer; |
| 23509 typedef struct IcuCursor IcuCursor; |
| 23510 |
| 23511 struct IcuTokenizer { |
| 23512 sqlite3_tokenizer base; |
| 23513 char *zLocale; |
| 23514 }; |
| 23515 |
| 23516 struct IcuCursor { |
| 23517 sqlite3_tokenizer_cursor base; |
| 23518 |
| 23519 UBreakIterator *pIter; /* ICU break-iterator object */ |
| 23520 int nChar; /* Number of UChar elements in pInput */ |
| 23521 UChar *aChar; /* Copy of input using utf-16 encoding */ |
| 23522 int *aOffset; /* Offsets of each character in utf-8 input */ |
| 23523 |
| 23524 int nBuffer; |
| 23525 char *zBuffer; |
| 23526 |
| 23527 int iToken; |
| 23528 }; |
| 23529 |
| 23530 /* |
| 23531 ** Create a new tokenizer instance. |
| 23532 */ |
| 23533 static int icuCreate( |
| 23534 int argc, /* Number of entries in argv[] */ |
| 23535 const char * const *argv, /* Tokenizer creation arguments */ |
| 23536 sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ |
| 23537 ){ |
| 23538 IcuTokenizer *p; |
| 23539 int n = 0; |
| 23540 |
| 23541 if( argc>0 ){ |
| 23542 n = strlen(argv[0])+1; |
| 23543 } |
| 23544 p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); |
| 23545 if( !p ){ |
| 23546 return SQLITE_NOMEM; |
| 23547 } |
| 23548 memset(p, 0, sizeof(IcuTokenizer)); |
| 23549 |
| 23550 if( n ){ |
| 23551 p->zLocale = (char *)&p[1]; |
| 23552 memcpy(p->zLocale, argv[0], n); |
| 23553 } |
| 23554 |
| 23555 *ppTokenizer = (sqlite3_tokenizer *)p; |
| 23556 |
| 23557 return SQLITE_OK; |
| 23558 } |
| 23559 |
| 23560 /* |
| 23561 ** Destroy a tokenizer |
| 23562 */ |
| 23563 static int icuDestroy(sqlite3_tokenizer *pTokenizer){ |
| 23564 IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| 23565 sqlite3_free(p); |
| 23566 return SQLITE_OK; |
| 23567 } |
| 23568 |
| 23569 /* |
| 23570 ** Prepare to begin tokenizing a particular string. The input |
| 23571 ** string to be tokenized is pInput[0..nBytes-1]. A cursor |
| 23572 ** used to incrementally tokenize this string is returned in |
| 23573 ** *ppCursor. |
| 23574 */ |
| 23575 static int icuOpen( |
| 23576 sqlite3_tokenizer *pTokenizer, /* The tokenizer */ |
| 23577 const char *zInput, /* Input string */ |
| 23578 int nInput, /* Length of zInput in bytes */ |
| 23579 sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ |
| 23580 ){ |
| 23581 IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| 23582 IcuCursor *pCsr; |
| 23583 |
| 23584 const int32_t opt = U_FOLD_CASE_DEFAULT; |
| 23585 UErrorCode status = U_ZERO_ERROR; |
| 23586 int nChar; |
| 23587 |
| 23588 UChar32 c; |
| 23589 int iInput = 0; |
| 23590 int iOut = 0; |
| 23591 |
| 23592 *ppCursor = 0; |
| 23593 |
| 23594 if( zInput==0 ){ |
| 23595 nInput = 0; |
| 23596 zInput = ""; |
| 23597 }else if( nInput<0 ){ |
| 23598 nInput = strlen(zInput); |
| 23599 } |
| 23600 nChar = nInput+1; |
| 23601 pCsr = (IcuCursor *)sqlite3_malloc( |
| 23602 sizeof(IcuCursor) + /* IcuCursor */ |
| 23603 ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ |
| 23604 (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ |
| 23605 ); |
| 23606 if( !pCsr ){ |
| 23607 return SQLITE_NOMEM; |
| 23608 } |
| 23609 memset(pCsr, 0, sizeof(IcuCursor)); |
| 23610 pCsr->aChar = (UChar *)&pCsr[1]; |
| 23611 pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; |
| 23612 |
| 23613 pCsr->aOffset[iOut] = iInput; |
| 23614 U8_NEXT(zInput, iInput, nInput, c); |
| 23615 while( c>0 ){ |
| 23616 int isError = 0; |
| 23617 c = u_foldCase(c, opt); |
| 23618 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); |
| 23619 if( isError ){ |
| 23620 sqlite3_free(pCsr); |
| 23621 return SQLITE_ERROR; |
| 23622 } |
| 23623 pCsr->aOffset[iOut] = iInput; |
| 23624 |
| 23625 if( iInput<nInput ){ |
| 23626 U8_NEXT(zInput, iInput, nInput, c); |
| 23627 }else{ |
| 23628 c = 0; |
| 23629 } |
| 23630 } |
| 23631 |
| 23632 pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); |
| 23633 if( !U_SUCCESS(status) ){ |
| 23634 sqlite3_free(pCsr); |
| 23635 return SQLITE_ERROR; |
| 23636 } |
| 23637 pCsr->nChar = iOut; |
| 23638 |
| 23639 ubrk_first(pCsr->pIter); |
| 23640 *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; |
| 23641 return SQLITE_OK; |
| 23642 } |
| 23643 |
| 23644 /* |
| 23645 ** Close a tokenization cursor previously opened by a call to icuOpen(). |
| 23646 */ |
| 23647 static int icuClose(sqlite3_tokenizer_cursor *pCursor){ |
| 23648 IcuCursor *pCsr = (IcuCursor *)pCursor; |
| 23649 ubrk_close(pCsr->pIter); |
| 23650 sqlite3_free(pCsr->zBuffer); |
| 23651 sqlite3_free(pCsr); |
| 23652 return SQLITE_OK; |
| 23653 } |
| 23654 |
| 23655 /* |
| 23656 ** Extract the next token from a tokenization cursor. |
| 23657 */ |
| 23658 static int icuNext( |
| 23659 sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ |
| 23660 const char **ppToken, /* OUT: *ppToken is the token text */ |
| 23661 int *pnBytes, /* OUT: Number of bytes in token */ |
| 23662 int *piStartOffset, /* OUT: Starting offset of token */ |
| 23663 int *piEndOffset, /* OUT: Ending offset of token */ |
| 23664 int *piPosition /* OUT: Position integer of token */ |
| 23665 ){ |
| 23666 IcuCursor *pCsr = (IcuCursor *)pCursor; |
| 23667 |
| 23668 int iStart = 0; |
| 23669 int iEnd = 0; |
| 23670 int nByte = 0; |
| 23671 |
| 23672 while( iStart==iEnd ){ |
| 23673 UChar32 c; |
| 23674 |
| 23675 iStart = ubrk_current(pCsr->pIter); |
| 23676 iEnd = ubrk_next(pCsr->pIter); |
| 23677 if( iEnd==UBRK_DONE ){ |
| 23678 return SQLITE_DONE; |
| 23679 } |
| 23680 |
| 23681 while( iStart<iEnd ){ |
| 23682 int iWhite = iStart; |
| 23683 U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); |
| 23684 if( u_isspace(c) ){ |
| 23685 iStart = iWhite; |
| 23686 }else{ |
| 23687 break; |
| 23688 } |
| 23689 } |
| 23690 assert(iStart<=iEnd); |
| 23691 } |
| 23692 |
| 23693 do { |
| 23694 UErrorCode status = U_ZERO_ERROR; |
| 23695 if( nByte ){ |
| 23696 char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); |
| 23697 if( !zNew ){ |
| 23698 return SQLITE_NOMEM; |
| 23699 } |
| 23700 pCsr->zBuffer = zNew; |
| 23701 pCsr->nBuffer = nByte; |
| 23702 } |
| 23703 |
| 23704 u_strToUTF8( |
| 23705 pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ |
| 23706 &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ |
| 23707 &status /* Output success/failure */ |
| 23708 ); |
| 23709 } while( nByte>pCsr->nBuffer ); |
| 23710 |
| 23711 *ppToken = pCsr->zBuffer; |
| 23712 *pnBytes = nByte; |
| 23713 *piStartOffset = pCsr->aOffset[iStart]; |
| 23714 *piEndOffset = pCsr->aOffset[iEnd]; |
| 23715 *piPosition = pCsr->iToken++; |
| 23716 |
| 23717 return SQLITE_OK; |
| 23718 } |
| 23719 |
| 23720 /* |
| 23721 ** The set of routines that implement the simple tokenizer |
| 23722 */ |
| 23723 static const sqlite3_tokenizer_module icuTokenizerModule = { |
| 23724 0, /* iVersion */ |
| 23725 icuCreate, /* xCreate */ |
| 23726 icuDestroy, /* xCreate */ |
| 23727 icuOpen, /* xOpen */ |
| 23728 icuClose, /* xClose */ |
| 23729 icuNext, /* xNext */ |
| 23730 0, /* xLanguageid */ |
| 23731 }; |
| 23732 |
| 23733 /* |
| 23734 ** Set *ppModule to point at the implementation of the ICU tokenizer. |
| 23735 */ |
| 23736 SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( |
| 23737 sqlite3_tokenizer_module const**ppModule |
| 23738 ){ |
| 23739 *ppModule = &icuTokenizerModule; |
| 23740 } |
| 23741 |
| 23742 #endif /* defined(SQLITE_ENABLE_ICU) */ |
| 23743 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| 23744 |
| 23745 /************** End of fts3_icu.c ********************************************/ |
| 23746 |
| 23747 /* Chain include. */ |
| 23748 #include "sqlite3.08.c" |
OLD | NEW |